Commit c37611a6cc82f38d785f33f4f1779696d750030f
1 parent
ef5cebe5
removed the time wasted in the cost function
Showing
4 changed files
with
85 additions
and
32 deletions
Show diff stats
stim/cuda/cuda_texture.cuh
... | ... | @@ -28,6 +28,7 @@ namespace stim |
28 | 28 | struct cudaResourceDesc resDesc; |
29 | 29 | struct cudaTextureDesc texDesc; |
30 | 30 | cudaTextureObject_t tObj; |
31 | + float *result; | |
31 | 32 | |
32 | 33 | |
33 | 34 | ///basic constructor that creates the texture with default parameters. |
... | ... | @@ -41,6 +42,14 @@ namespace stim |
41 | 42 | texDesc.normalizedCoords = 0; |
42 | 43 | } |
43 | 44 | |
45 | + ///basic destructor | |
46 | + ~cuda_texture() | |
47 | + { | |
48 | + UnmapCudaTexture(); | |
49 | + if(result != NULL) | |
50 | + cudaFree(result); | |
51 | + } | |
52 | + | |
44 | 53 | |
45 | 54 | ///Enable the nromalized texture coordinates. |
46 | 55 | ///@param bool, 1 for on, 0 for off |
... | ... | @@ -139,6 +148,13 @@ namespace stim |
139 | 148 | // ); |
140 | 149 | } |
141 | 150 | |
151 | + ///Allocate the auxiliary internal 1D float array | |
152 | + void | |
153 | + Alloc(int x) | |
154 | + { | |
155 | + cudaMalloc( (void**) &result, x*sizeof(float)); | |
156 | + } | |
157 | + | |
142 | 158 | //-------------------------------------------------------------------------// |
143 | 159 | //------------------------------GET/SET METHODS----------------------------// |
144 | 160 | //-------------------------------------------------------------------------// |
... | ... | @@ -155,6 +171,12 @@ namespace stim |
155 | 171 | { |
156 | 172 | return srcArray; |
157 | 173 | } |
174 | + | |
175 | + float* | |
176 | + getAuxArray() | |
177 | + { | |
178 | + return result; | |
179 | + } | |
158 | 180 | }; |
159 | 181 | } |
160 | 182 | } | ... | ... |
stim/cuda/filter.cuh
stim/cuda/spider_cost.cuh
... | ... | @@ -16,26 +16,25 @@ namespace stim{ |
16 | 16 | namespace cuda |
17 | 17 | { |
18 | 18 | |
19 | - stim::cuda::cuda_texture t; //texture object. | |
20 | - float* result; | |
19 | +// float* result; | |
21 | 20 | // float* print; |
22 | 21 | |
23 | 22 | ///Initialization function, allocates the memory and passes the necessary |
24 | 23 | ///handles from OpenGL and Cuda. |
25 | 24 | ///@param DIM_Y --integer controlling how much memory to allocate. |
26 | - void initArray(int DIM_Y) | |
27 | - { | |
25 | +// void initArray(int DIM_Y) | |
26 | +// { | |
28 | 27 | // cudaMalloc( (void**) &print, DIM_Y*16*sizeof(float)); ///temporary |
29 | - cudaMalloc( (void**) &result, DIM_Y*sizeof(float)); | |
30 | - } | |
28 | +// cudaMalloc( (void**) &result, DIM_Y*sizeof(float)); | |
29 | +// } | |
31 | 30 | |
32 | 31 | ///Deinit function that frees the memery used and releases the texture resource |
33 | 32 | ///back to OpenGL. |
34 | - void cleanUP() | |
35 | - { | |
36 | - cudaFree(result); | |
33 | +// void cleanUP() | |
34 | +// { | |
35 | +// cudaFree(result); | |
37 | 36 | // cudaFree(print); ///temporary |
38 | - } | |
37 | +// } | |
39 | 38 | |
40 | 39 | ///A virtual representation of a uniform template. |
41 | 40 | ///Returns the value of the template pixel. |
... | ... | @@ -117,14 +116,15 @@ namespace stim{ |
117 | 116 | /// may work with other gl texture types, but untested. |
118 | 117 | ///@param DIM_Y, the number of samples in the template. |
119 | 118 | extern "C" |
120 | - stim::vec<int> get_cost(GLint texbufferID, GLenum texType, int DIM_Y,int dx = 16, int dy = 8) | |
119 | + //stim::vec<int> get_cost(GLint texbufferID, GLenum texType, int DIM_Y,int dx = 16, int dy = 8) | |
120 | + stim::vec<int> get_cost(cudaTextureObject_t tObj, float* result, int DIM_Y,int dx = 16, int dy = 8) | |
121 | 121 | { |
122 | 122 | |
123 | 123 | //Bind the Texture in GL and allow access to cuda. |
124 | 124 | // #ifdef TIMING |
125 | 125 | // gpuStartTimer(); |
126 | 126 | // #endif |
127 | - t.MapCudaTexture(texbufferID, texType); | |
127 | +// t.MapCudaTexture(texbufferID, texType); | |
128 | 128 | // #ifdef TIMING |
129 | 129 | // std::cout << " " << gpuStopTimer(); |
130 | 130 | // #endif |
... | ... | @@ -137,7 +137,7 @@ namespace stim{ |
137 | 137 | output = (float* ) malloc(DIM_Y*sizeof(float)); |
138 | 138 | |
139 | 139 | stim::vec<int> ret(0, 0); |
140 | - initArray(DIM_Y); | |
140 | +// initArray(DIM_Y); | |
141 | 141 | |
142 | 142 | |
143 | 143 | //variables for finding the min. |
... | ... | @@ -154,7 +154,7 @@ namespace stim{ |
154 | 154 | dim3 numBlocks(1, DIM_Y); |
155 | 155 | dim3 threadsPerBlock(dx, dy); |
156 | 156 | |
157 | - get_diff <<< numBlocks, threadsPerBlock, dx*dy*sizeof(float) >>> (t.getTexture(), result, dx, dy); | |
157 | + get_diff <<< numBlocks, threadsPerBlock, dx*dy*sizeof(float) >>> (tObj, result, dx, dy); | |
158 | 158 | cudaDeviceSynchronize(); |
159 | 159 | // #ifdef TIMING |
160 | 160 | // std::cout << " " << gpuStopTimer(); |
... | ... | @@ -184,8 +184,8 @@ namespace stim{ |
184 | 184 | // name << "Test.bmp"; |
185 | 185 | // stim::gpu2image<float>(print, name.str(),16,218,0,256); |
186 | 186 | |
187 | - t.UnmapCudaTexture(); | |
188 | - cleanUP(); | |
187 | +// t.UnmapCudaTexture(); | |
188 | +// cleanUP(); | |
189 | 189 | ret[0] = idx; ret[1] = (int) output[idx]; |
190 | 190 | // std::cout << "The cost is " << output[idx] << std::endl; |
191 | 191 | free(output); | ... | ... |
stim/gl/gl_spider.h
... | ... | @@ -17,6 +17,7 @@ |
17 | 17 | #include <stim/cuda/spider_cost.cuh> |
18 | 18 | #include <stim/cuda/cudatools/glbind.h> |
19 | 19 | #include <stim/cuda/arraymath.cuh> |
20 | +#include <stim/cuda/cuda_texture.cuh> | |
20 | 21 | #include <stim/cuda/cudatools.h> |
21 | 22 | #include <stim/cuda/ivote.cuh> |
22 | 23 | #include <stim/visualization/glObj.h> |
... | ... | @@ -121,6 +122,12 @@ class gl_spider : public virtual gl_texture<T> |
121 | 122 | stim::vec3<float> ds; |
122 | 123 | |
123 | 124 | static const float t_length = 16.0; |
125 | + | |
126 | + | |
127 | + //cuda texture variables that keep track of the binding. | |
128 | + stim::cuda::cuda_texture t_dir; | |
129 | + stim::cuda::cuda_texture t_pos; | |
130 | + stim::cuda::cuda_texture t_mag; | |
124 | 131 | |
125 | 132 | |
126 | 133 | //--------------------------------------------------------------------------// |
... | ... | @@ -138,7 +145,8 @@ class gl_spider : public virtual gl_texture<T> |
138 | 145 | #endif |
139 | 146 | setMatrix(); //create the transformation matrix. |
140 | 147 | glCallList(dList); //move the templates to p, d, m. |
141 | - glFlush(); | |
148 | + glFinish(); | |
149 | +// glFlush(); | |
142 | 150 | #ifdef TIMING |
143 | 151 | direction_time += gpuStopTimer(); |
144 | 152 | #endif |
... | ... | @@ -146,7 +154,7 @@ class gl_spider : public virtual gl_texture<T> |
146 | 154 | // test(texbufferID, GL_TEXTURE_2D,2*t_length,numSamples*t_length, "Final_Cost_Direction.bmp"); |
147 | 155 | #endif |
148 | 156 | |
149 | - int best = getCost(texbufferID,numSamples); //find min cost. | |
157 | + int best = getCost(t_dir.getTexture(), t_dir.getAuxArray() ,numSamples); //find min cost. | |
150 | 158 | stim::vec<float> next( //find next vector. |
151 | 159 | dV[best][0]*S[0]*R[0], |
152 | 160 | dV[best][1]*S[1]*R[1], |
... | ... | @@ -171,7 +179,8 @@ class gl_spider : public virtual gl_texture<T> |
171 | 179 | #endif |
172 | 180 | setMatrix(); //create the transformation matrix. |
173 | 181 | glCallList(dList+1); //move the templates to p, d, m. |
174 | - glFlush(); | |
182 | + glFinish(); | |
183 | +// glFlush(); | |
175 | 184 | #ifdef TIMING |
176 | 185 | position_time += gpuStopTimer(); |
177 | 186 | #endif |
... | ... | @@ -179,7 +188,7 @@ class gl_spider : public virtual gl_texture<T> |
179 | 188 | #ifdef TESTING |
180 | 189 | // test(ptexbufferID, GL_TEXTURE_2D,2*t_length, numSamplesPos*t_length, "Final_Cost_Position.bmp"); |
181 | 190 | #endif |
182 | - int best = getCost(ptexbufferID, numSamplesPos); //find min cost. | |
191 | + int best = getCost(t_pos.getTexture(), t_pos.getAuxArray(), numSamplesPos); //find min cost. | |
183 | 192 | // std::cerr << best << std::endl; |
184 | 193 | stim::vec<float> next( //find next position. |
185 | 194 | pV[best][0], |
... | ... | @@ -205,14 +214,15 @@ class gl_spider : public virtual gl_texture<T> |
205 | 214 | #endif |
206 | 215 | setMatrix(); //create the transformation. |
207 | 216 | glCallList(dList+2); //move the templates to p, d, m. |
208 | - glFlush(); | |
217 | + glFinish(); | |
218 | +// glFlush(); | |
209 | 219 | #ifdef TIMING |
210 | 220 | size_time += gpuStopTimer(); |
211 | 221 | #endif |
212 | 222 | #ifdef TESTING |
213 | 223 | // test(mtexbufferID, GL_TEXTURE_2D, 2*t_length, numSamplesMag*t_length, "Final_Cost_Position.bmp"); |
214 | 224 | #endif |
215 | - int best = getCost(mtexbufferID, numSamplesMag); //get best cost. | |
225 | + int best = getCost(t_mag.getTexture(), t_mag.getAuxArray(), numSamplesMag); //get best cost. | |
216 | 226 | setMagnitude(m[0]*mV[best][0]); //adjust the magnitude. |
217 | 227 | } |
218 | 228 | |
... | ... | @@ -575,7 +585,7 @@ class gl_spider : public virtual gl_texture<T> |
575 | 585 | glGenFramebuffers(1, &framebufferID); |
576 | 586 | glBindFramebuffer(GL_FRAMEBUFFER, framebufferID); |
577 | 587 | int numChannels = 1; |
578 | - unsigned char* texels = new unsigned char[width * height * numChannels]; | |
588 | +// unsigned char* texels = new unsigned char[width * height * numChannels]; | |
579 | 589 | glGenTextures(1, &textureID); |
580 | 590 | glBindTexture(GL_TEXTURE_2D, textureID); |
581 | 591 | |
... | ... | @@ -585,8 +595,8 @@ class gl_spider : public virtual gl_texture<T> |
585 | 595 | glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR); |
586 | 596 | glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_LINEAR); |
587 | 597 | glTexImage2D(GL_TEXTURE_2D, 0, GL_LUMINANCE, |
588 | - width, height, 0, GL_LUMINANCE, GL_UNSIGNED_BYTE, texels); | |
589 | - delete[] texels; | |
598 | + width, height, 0, GL_LUMINANCE, GL_UNSIGNED_BYTE, NULL); | |
599 | +// delete[] texels; | |
590 | 600 | glBindFramebuffer(GL_FRAMEBUFFER, 0); |
591 | 601 | glBindTexture(GL_TEXTURE_2D, 0); |
592 | 602 | } |
... | ... | @@ -600,7 +610,7 @@ class gl_spider : public virtual gl_texture<T> |
600 | 610 | glGenFramebuffers(1, &fboID); |
601 | 611 | glBindFramebuffer(GL_FRAMEBUFFER, fboID); |
602 | 612 | int numChannels = 1; |
603 | - unsigned char* texels = new unsigned char[width * height * numChannels]; | |
613 | +// unsigned char* texels = new unsigned char[width * height * numChannels]; | |
604 | 614 | glGenTextures(1, &texbufferID); |
605 | 615 | glBindTexture(GL_TEXTURE_2D, texbufferID); |
606 | 616 | |
... | ... | @@ -610,8 +620,8 @@ class gl_spider : public virtual gl_texture<T> |
610 | 620 | glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR); |
611 | 621 | glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_LINEAR); |
612 | 622 | glTexImage2D(GL_TEXTURE_2D, 0, GL_LUMINANCE, |
613 | - width, height, 0, GL_LUMINANCE, GL_UNSIGNED_BYTE, texels); | |
614 | - delete[] texels; | |
623 | + width, height, 0, GL_LUMINANCE, GL_UNSIGNED_BYTE, NULL); | |
624 | +// delete[] texels; | |
615 | 625 | glBindFramebuffer(GL_FRAMEBUFFER, 0); |
616 | 626 | glBindTexture(GL_TEXTURE_2D, 0); |
617 | 627 | CHECK_OPENGL_ERROR |
... | ... | @@ -777,20 +787,35 @@ class gl_spider : public virtual gl_texture<T> |
777 | 787 | getCost() |
778 | 788 | { |
779 | 789 | stim::vec<int> cost = |
780 | - stim::cuda::get_cost(texbufferID, GL_TEXTURE_2D, numSamples); | |
790 | +// stim::cuda::get_cost(texbufferID, GL_TEXTURE_2D, numSamples); | |
781 | 791 | cudaDeviceSynchronize(); |
782 | 792 | current_cost = cost[1]; |
783 | 793 | return cost[0]; |
784 | 794 | } |
785 | 795 | |
796 | +// int | |
797 | +// getCost(GLuint tID, int n) | |
798 | +// { | |
799 | +// #ifdef TIMING | |
800 | +// gpuStartTimer(); | |
801 | +// #endif | |
802 | +// stim::vec<int> cost = | |
803 | +// stim::cuda::get_cost(tID, GL_TEXTURE_2D, n, 2*t_length, t_length); | |
804 | +// #ifdef TIMING | |
805 | +// cost_time += gpuStopTimer(); | |
806 | +// #endif | |
807 | +// current_cost = cost[1]; | |
808 | +// return cost[0]; | |
809 | +// } | |
810 | + | |
786 | 811 | int |
787 | - getCost(GLuint tID, int n) | |
812 | + getCost(cudaTextureObject_t tObj, float* result, int n) | |
788 | 813 | { |
789 | 814 | #ifdef TIMING |
790 | 815 | gpuStartTimer(); |
791 | 816 | #endif |
792 | 817 | stim::vec<int> cost = |
793 | - stim::cuda::get_cost(tID, GL_TEXTURE_2D, n, 2*t_length, t_length); | |
818 | + stim::cuda::get_cost(tObj, result, n, 2*t_length, t_length); | |
794 | 819 | #ifdef TIMING |
795 | 820 | cost_time += gpuStopTimer(); |
796 | 821 | #endif |
... | ... | @@ -918,6 +943,12 @@ class gl_spider : public virtual gl_texture<T> |
918 | 943 | CHECK_OPENGL_ERROR |
919 | 944 | GenerateFBO(16, 216, btexbufferID, bfboID); |
920 | 945 | CHECK_OPENGL_ERROR |
946 | + t_dir.MapCudaTexture(texbufferID, GL_TEXTURE_2D); | |
947 | + t_dir.Alloc(numSamples); | |
948 | + t_pos.MapCudaTexture(ptexbufferID, GL_TEXTURE_2D); | |
949 | + t_pos.Alloc(numSamplesPos); | |
950 | + t_mag.MapCudaTexture(mtexbufferID, GL_TEXTURE_2D); | |
951 | + t_mag.Alloc(numSamplesMag); | |
921 | 952 | // setDims(0.6, 0.6, 1.0); |
922 | 953 | // setSize(1024.0, 1024.0, 1024.0); |
923 | 954 | setMatrix(); | ... | ... |