Commit c37611a6cc82f38d785f33f4f1779696d750030f

Authored by Pavel Govyadinov
1 parent ef5cebe5

removed the time wasted in the cost function

stim/cuda/cuda_texture.cuh
... ... @@ -28,6 +28,7 @@ namespace stim
28 28 struct cudaResourceDesc resDesc;
29 29 struct cudaTextureDesc texDesc;
30 30 cudaTextureObject_t tObj;
  31 + float *result;
31 32  
32 33  
33 34 ///basic constructor that creates the texture with default parameters.
... ... @@ -41,6 +42,14 @@ namespace stim
41 42 texDesc.normalizedCoords = 0;
42 43 }
43 44  
  45 + ///basic destructor
  46 + ~cuda_texture()
  47 + {
  48 + UnmapCudaTexture();
  49 + if(result != NULL)
  50 + cudaFree(result);
  51 + }
  52 +
44 53  
45 54 ///Enable the nromalized texture coordinates.
46 55 ///@param bool, 1 for on, 0 for off
... ... @@ -139,6 +148,13 @@ namespace stim
139 148 // );
140 149 }
141 150  
  151 + ///Allocate the auxiliary internal 1D float array
  152 + void
  153 + Alloc(int x)
  154 + {
  155 + cudaMalloc( (void**) &result, x*sizeof(float));
  156 + }
  157 +
142 158 //-------------------------------------------------------------------------//
143 159 //------------------------------GET/SET METHODS----------------------------//
144 160 //-------------------------------------------------------------------------//
... ... @@ -155,6 +171,12 @@ namespace stim
155 171 {
156 172 return srcArray;
157 173 }
  174 +
  175 + float*
  176 + getAuxArray()
  177 + {
  178 + return result;
  179 + }
158 180 };
159 181 }
160 182 }
... ...
stim/cuda/filter.cuh
... ... @@ -47,7 +47,7 @@ namespace stim
47 47 // checkCUDAerrors("Memory Allocation, Result");
48 48 }
49 49  
50   - void cleanUp(cudaGraphicsResource_t src)
  50 + void cleanUP()
51 51 {
52 52 HANDLE_ERROR(
53 53 cudaFree(gpuLoG)
... ...
stim/cuda/spider_cost.cuh
... ... @@ -16,26 +16,25 @@ namespace stim{
16 16 namespace cuda
17 17 {
18 18  
19   - stim::cuda::cuda_texture t; //texture object.
20   - float* result;
  19 +// float* result;
21 20 // float* print;
22 21  
23 22 ///Initialization function, allocates the memory and passes the necessary
24 23 ///handles from OpenGL and Cuda.
25 24 ///@param DIM_Y --integer controlling how much memory to allocate.
26   - void initArray(int DIM_Y)
27   - {
  25 +// void initArray(int DIM_Y)
  26 +// {
28 27 // cudaMalloc( (void**) &print, DIM_Y*16*sizeof(float)); ///temporary
29   - cudaMalloc( (void**) &result, DIM_Y*sizeof(float));
30   - }
  28 +// cudaMalloc( (void**) &result, DIM_Y*sizeof(float));
  29 +// }
31 30  
32 31 ///Deinit function that frees the memery used and releases the texture resource
33 32 ///back to OpenGL.
34   - void cleanUP()
35   - {
36   - cudaFree(result);
  33 +// void cleanUP()
  34 +// {
  35 +// cudaFree(result);
37 36 // cudaFree(print); ///temporary
38   - }
  37 +// }
39 38  
40 39 ///A virtual representation of a uniform template.
41 40 ///Returns the value of the template pixel.
... ... @@ -117,14 +116,15 @@ namespace stim{
117 116 /// may work with other gl texture types, but untested.
118 117 ///@param DIM_Y, the number of samples in the template.
119 118 extern "C"
120   - stim::vec<int> get_cost(GLint texbufferID, GLenum texType, int DIM_Y,int dx = 16, int dy = 8)
  119 + //stim::vec<int> get_cost(GLint texbufferID, GLenum texType, int DIM_Y,int dx = 16, int dy = 8)
  120 + stim::vec<int> get_cost(cudaTextureObject_t tObj, float* result, int DIM_Y,int dx = 16, int dy = 8)
121 121 {
122 122  
123 123 //Bind the Texture in GL and allow access to cuda.
124 124 // #ifdef TIMING
125 125 // gpuStartTimer();
126 126 // #endif
127   - t.MapCudaTexture(texbufferID, texType);
  127 +// t.MapCudaTexture(texbufferID, texType);
128 128 // #ifdef TIMING
129 129 // std::cout << " " << gpuStopTimer();
130 130 // #endif
... ... @@ -137,7 +137,7 @@ namespace stim{
137 137 output = (float* ) malloc(DIM_Y*sizeof(float));
138 138  
139 139 stim::vec<int> ret(0, 0);
140   - initArray(DIM_Y);
  140 +// initArray(DIM_Y);
141 141  
142 142  
143 143 //variables for finding the min.
... ... @@ -154,7 +154,7 @@ namespace stim{
154 154 dim3 numBlocks(1, DIM_Y);
155 155 dim3 threadsPerBlock(dx, dy);
156 156  
157   - get_diff <<< numBlocks, threadsPerBlock, dx*dy*sizeof(float) >>> (t.getTexture(), result, dx, dy);
  157 + get_diff <<< numBlocks, threadsPerBlock, dx*dy*sizeof(float) >>> (tObj, result, dx, dy);
158 158 cudaDeviceSynchronize();
159 159 // #ifdef TIMING
160 160 // std::cout << " " << gpuStopTimer();
... ... @@ -184,8 +184,8 @@ namespace stim{
184 184 // name << "Test.bmp";
185 185 // stim::gpu2image<float>(print, name.str(),16,218,0,256);
186 186  
187   - t.UnmapCudaTexture();
188   - cleanUP();
  187 +// t.UnmapCudaTexture();
  188 +// cleanUP();
189 189 ret[0] = idx; ret[1] = (int) output[idx];
190 190 // std::cout << "The cost is " << output[idx] << std::endl;
191 191 free(output);
... ...
stim/gl/gl_spider.h
... ... @@ -17,6 +17,7 @@
17 17 #include <stim/cuda/spider_cost.cuh>
18 18 #include <stim/cuda/cudatools/glbind.h>
19 19 #include <stim/cuda/arraymath.cuh>
  20 +#include <stim/cuda/cuda_texture.cuh>
20 21 #include <stim/cuda/cudatools.h>
21 22 #include <stim/cuda/ivote.cuh>
22 23 #include <stim/visualization/glObj.h>
... ... @@ -121,6 +122,12 @@ class gl_spider : public virtual gl_texture&lt;T&gt;
121 122 stim::vec3<float> ds;
122 123  
123 124 static const float t_length = 16.0;
  125 +
  126 +
  127 + //cuda texture variables that keep track of the binding.
  128 + stim::cuda::cuda_texture t_dir;
  129 + stim::cuda::cuda_texture t_pos;
  130 + stim::cuda::cuda_texture t_mag;
124 131  
125 132  
126 133 //--------------------------------------------------------------------------//
... ... @@ -138,7 +145,8 @@ class gl_spider : public virtual gl_texture&lt;T&gt;
138 145 #endif
139 146 setMatrix(); //create the transformation matrix.
140 147 glCallList(dList); //move the templates to p, d, m.
141   - glFlush();
  148 + glFinish();
  149 +// glFlush();
142 150 #ifdef TIMING
143 151 direction_time += gpuStopTimer();
144 152 #endif
... ... @@ -146,7 +154,7 @@ class gl_spider : public virtual gl_texture&lt;T&gt;
146 154 // test(texbufferID, GL_TEXTURE_2D,2*t_length,numSamples*t_length, "Final_Cost_Direction.bmp");
147 155 #endif
148 156  
149   - int best = getCost(texbufferID,numSamples); //find min cost.
  157 + int best = getCost(t_dir.getTexture(), t_dir.getAuxArray() ,numSamples); //find min cost.
150 158 stim::vec<float> next( //find next vector.
151 159 dV[best][0]*S[0]*R[0],
152 160 dV[best][1]*S[1]*R[1],
... ... @@ -171,7 +179,8 @@ class gl_spider : public virtual gl_texture&lt;T&gt;
171 179 #endif
172 180 setMatrix(); //create the transformation matrix.
173 181 glCallList(dList+1); //move the templates to p, d, m.
174   - glFlush();
  182 + glFinish();
  183 +// glFlush();
175 184 #ifdef TIMING
176 185 position_time += gpuStopTimer();
177 186 #endif
... ... @@ -179,7 +188,7 @@ class gl_spider : public virtual gl_texture&lt;T&gt;
179 188 #ifdef TESTING
180 189 // test(ptexbufferID, GL_TEXTURE_2D,2*t_length, numSamplesPos*t_length, "Final_Cost_Position.bmp");
181 190 #endif
182   - int best = getCost(ptexbufferID, numSamplesPos); //find min cost.
  191 + int best = getCost(t_pos.getTexture(), t_pos.getAuxArray(), numSamplesPos); //find min cost.
183 192 // std::cerr << best << std::endl;
184 193 stim::vec<float> next( //find next position.
185 194 pV[best][0],
... ... @@ -205,14 +214,15 @@ class gl_spider : public virtual gl_texture&lt;T&gt;
205 214 #endif
206 215 setMatrix(); //create the transformation.
207 216 glCallList(dList+2); //move the templates to p, d, m.
208   - glFlush();
  217 + glFinish();
  218 +// glFlush();
209 219 #ifdef TIMING
210 220 size_time += gpuStopTimer();
211 221 #endif
212 222 #ifdef TESTING
213 223 // test(mtexbufferID, GL_TEXTURE_2D, 2*t_length, numSamplesMag*t_length, "Final_Cost_Position.bmp");
214 224 #endif
215   - int best = getCost(mtexbufferID, numSamplesMag); //get best cost.
  225 + int best = getCost(t_mag.getTexture(), t_mag.getAuxArray(), numSamplesMag); //get best cost.
216 226 setMagnitude(m[0]*mV[best][0]); //adjust the magnitude.
217 227 }
218 228  
... ... @@ -575,7 +585,7 @@ class gl_spider : public virtual gl_texture&lt;T&gt;
575 585 glGenFramebuffers(1, &framebufferID);
576 586 glBindFramebuffer(GL_FRAMEBUFFER, framebufferID);
577 587 int numChannels = 1;
578   - unsigned char* texels = new unsigned char[width * height * numChannels];
  588 +// unsigned char* texels = new unsigned char[width * height * numChannels];
579 589 glGenTextures(1, &textureID);
580 590 glBindTexture(GL_TEXTURE_2D, textureID);
581 591  
... ... @@ -585,8 +595,8 @@ class gl_spider : public virtual gl_texture&lt;T&gt;
585 595 glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR);
586 596 glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_LINEAR);
587 597 glTexImage2D(GL_TEXTURE_2D, 0, GL_LUMINANCE,
588   - width, height, 0, GL_LUMINANCE, GL_UNSIGNED_BYTE, texels);
589   - delete[] texels;
  598 + width, height, 0, GL_LUMINANCE, GL_UNSIGNED_BYTE, NULL);
  599 +// delete[] texels;
590 600 glBindFramebuffer(GL_FRAMEBUFFER, 0);
591 601 glBindTexture(GL_TEXTURE_2D, 0);
592 602 }
... ... @@ -600,7 +610,7 @@ class gl_spider : public virtual gl_texture&lt;T&gt;
600 610 glGenFramebuffers(1, &fboID);
601 611 glBindFramebuffer(GL_FRAMEBUFFER, fboID);
602 612 int numChannels = 1;
603   - unsigned char* texels = new unsigned char[width * height * numChannels];
  613 +// unsigned char* texels = new unsigned char[width * height * numChannels];
604 614 glGenTextures(1, &texbufferID);
605 615 glBindTexture(GL_TEXTURE_2D, texbufferID);
606 616  
... ... @@ -610,8 +620,8 @@ class gl_spider : public virtual gl_texture&lt;T&gt;
610 620 glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR);
611 621 glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_LINEAR);
612 622 glTexImage2D(GL_TEXTURE_2D, 0, GL_LUMINANCE,
613   - width, height, 0, GL_LUMINANCE, GL_UNSIGNED_BYTE, texels);
614   - delete[] texels;
  623 + width, height, 0, GL_LUMINANCE, GL_UNSIGNED_BYTE, NULL);
  624 +// delete[] texels;
615 625 glBindFramebuffer(GL_FRAMEBUFFER, 0);
616 626 glBindTexture(GL_TEXTURE_2D, 0);
617 627 CHECK_OPENGL_ERROR
... ... @@ -777,20 +787,35 @@ class gl_spider : public virtual gl_texture&lt;T&gt;
777 787 getCost()
778 788 {
779 789 stim::vec<int> cost =
780   - stim::cuda::get_cost(texbufferID, GL_TEXTURE_2D, numSamples);
  790 +// stim::cuda::get_cost(texbufferID, GL_TEXTURE_2D, numSamples);
781 791 cudaDeviceSynchronize();
782 792 current_cost = cost[1];
783 793 return cost[0];
784 794 }
785 795  
  796 +// int
  797 +// getCost(GLuint tID, int n)
  798 +// {
  799 +// #ifdef TIMING
  800 +// gpuStartTimer();
  801 +// #endif
  802 +// stim::vec<int> cost =
  803 +// stim::cuda::get_cost(tID, GL_TEXTURE_2D, n, 2*t_length, t_length);
  804 +// #ifdef TIMING
  805 +// cost_time += gpuStopTimer();
  806 +// #endif
  807 +// current_cost = cost[1];
  808 +// return cost[0];
  809 +// }
  810 +
786 811 int
787   - getCost(GLuint tID, int n)
  812 + getCost(cudaTextureObject_t tObj, float* result, int n)
788 813 {
789 814 #ifdef TIMING
790 815 gpuStartTimer();
791 816 #endif
792 817 stim::vec<int> cost =
793   - stim::cuda::get_cost(tID, GL_TEXTURE_2D, n, 2*t_length, t_length);
  818 + stim::cuda::get_cost(tObj, result, n, 2*t_length, t_length);
794 819 #ifdef TIMING
795 820 cost_time += gpuStopTimer();
796 821 #endif
... ... @@ -918,6 +943,12 @@ class gl_spider : public virtual gl_texture&lt;T&gt;
918 943 CHECK_OPENGL_ERROR
919 944 GenerateFBO(16, 216, btexbufferID, bfboID);
920 945 CHECK_OPENGL_ERROR
  946 + t_dir.MapCudaTexture(texbufferID, GL_TEXTURE_2D);
  947 + t_dir.Alloc(numSamples);
  948 + t_pos.MapCudaTexture(ptexbufferID, GL_TEXTURE_2D);
  949 + t_pos.Alloc(numSamplesPos);
  950 + t_mag.MapCudaTexture(mtexbufferID, GL_TEXTURE_2D);
  951 + t_mag.Alloc(numSamplesMag);
921 952 // setDims(0.6, 0.6, 1.0);
922 953 // setSize(1024.0, 1024.0, 1024.0);
923 954 setMatrix();
... ...