Commit 59781ee35001f96eb8a26a333796361193c0cf0f

Authored by Pavel Govyadinov
1 parent 84eff8b1

fixed a stask bug, added support for branch detection and minimized the number o…

…f call to branch detection as well as well as branching points added in the border regions (where the branch outpoint is outside of the search space)
stim/cuda/branch_detection.cuh
... ... @@ -117,7 +117,6 @@ find_branch(GLint texbufferID, GLenum texType, unsigned int x, unsigned int y)
117 117 atan_2d(cpuTable, rmax);
118 118 cudaMemcpy(gpuTable, cpuTable, bytes_table, cudaMemcpyHostToDevice);
119 119  
120   - test(texbufferID, texType);
121 120  
122 121 t.MapCudaTexture(texbufferID, texType);
123 122 cudaDeviceSynchronize();
... ... @@ -125,55 +124,41 @@ find_branch(GLint texbufferID, GLenum texType, unsigned int x, unsigned int y)
125 124 gpuI, sigma, x, y, t.getTexture(), t.getArray()
126 125 );
127 126 cudaDeviceSynchronize();
128   - stim::gpu2image<float>(gpuI, "Blur.jpg", 16, 8*27, 0, 255);
129 127  
130 128  
131 129 stim::cuda::gpu_gradient_2d<float>(
132 130 gpuGrad, gpuI, x, y
133 131 );
134 132 cudaDeviceSynchronize();
135   -// stim::gpu2image<float>(gpuGrad, "Grad.jpg", 16, 8*27, 0, 1);
136 133  
137   - stim::cuda::gpu_cart2polar<float>(gpuGrad, x, y, M_PI);
  134 + stim::cuda::gpu_cart2polar<float>(gpuGrad, x, y);
138 135 cudaDeviceSynchronize();
139   -// stim::gpu2image<float>(gpuGrad, "Cart_2_polar.jpg", 16, 8*27, 0, 1);
140 136  
141 137 cudaDeviceSynchronize();
142 138 for (int i = 0; i < iter; i++)
143 139 {
144 140 stim::cuda::gpu_vote<float>(gpuVote, gpuGrad, gpuTable, phi, rmax, x, y);
145   - name << "Vote" << i << ".bmp";
146   - stim::gpu2image<float>(gpuVote, name.str(), 16, 8*27, 0, 255);
147   - name.str("");
148   - name.clear();
149 141 cudaDeviceSynchronize();
150   -// std::cout << "got here7_a_" << i << std::endl;
151 142 stim::cuda::gpu_update_dir<float>(gpuVote, gpuGrad, gpuTable, phi, rmax, x, y);
152 143 cudaDeviceSynchronize();
153   -// std::cout << "got here7_b_" << i << std::endl;
154 144 phi = phi - dphi;
155 145 }
156 146  
157 147 cudaDeviceSynchronize();
158 148 stim::cuda::gpu_local_max<float>(gpuCenters, gpuVote, final_t, conn, x, y);
159   -// std::cout << "got here_sdkfj" << std::endl;
160 149 cudaMemcpy(cpuCenters, gpuCenters, bytes_ds, cudaMemcpyDeviceToHost);
161   - stim::gpu2image<float>(gpuCenters, "Centers.jpg", 16, 8*27, 0, 255);
162 150 for(int i = 0; i < pixels; i++)
163 151 {
164 152 int ix = (i % x);
165 153 int iy = (i / x);
166   -// std::cout << i << " : " << ix <<" : " << iy << std::endl;
167   - if((cpuCenters[i] == 1) && (ix > 4) && (ix < x-4))
  154 + if((cpuCenters[i] == 1) && (ix > 5) && (ix < x-5))
168 155 {
169   - std::cout << ix << " : " << iy << std::endl;
170 156  
171 157 float x_v = (float) ix;
172 158 float y_v = (float) iy;
173 159 output.push_back(stim::vec<float>((x_v/(float)x),
174 160 (y_v/(float)y), 0.0));
175 161  
176   - std::cout << x_v/16.0 << " : " << y_v/216.0 << std::endl;
177 162 }
178 163 }
179 164  
... ...
stim/cuda/ivote/update_dir.cuh
... ... @@ -164,6 +164,8 @@ namespace stim{
164 164 //free allocated memory
165 165 cudaFree(gpuDir);
166 166  
  167 + cudaDestroyTextureObject(texObj);
  168 +
167 169 }
168 170  
169 171 template<typename T>
... ... @@ -211,4 +213,4 @@ namespace stim{
211 213 }
212 214 }
213 215  
214   -#endif
215 216 \ No newline at end of file
  217 +#endif
... ...
stim/cuda/ivote/vote.cuh
... ... @@ -124,6 +124,9 @@ namespace stim{
124 124  
125 125 cuda_vote <<< blocks, threads,share_bytes >>>(gpuVote, texObj, gpuTable, phi, rmax, x , y);
126 126  
  127 + cudaDestroyTextureObject(texObj);
  128 + cudaFreeArray(cuArray);
  129 +
127 130 }
128 131  
129 132  
... ... @@ -169,4 +172,4 @@ namespace stim{
169 172 }
170 173 }
171 174  
172   -#endif
173 175 \ No newline at end of file
  176 +#endif
... ...
stim/cuda/sharedmem.cuh
... ... @@ -59,7 +59,7 @@ namespace stim{
59 59  
60 60 //perform the copy
61 61 if(sx < X && sy < Y)
62   - dest[sy * X + sx] = tex2D<D>(src, tx, ty);
  62 + dest[sy * X + sx] = abs(255 - tex2D<D>(src, tx, ty));
63 63 }
64 64 }
65 65 }
... ...
stim/cuda/spider_cost.cuh
... ... @@ -155,8 +155,6 @@ namespace stim{
155 155 t.UnmapCudaTexture();
156 156 cleanUP();
157 157 ret[0] = idx; ret[1] = (int) output[idx];
158   - std::cout << output[idx] << std::endl;
159   -
160 158 free(output);
161 159 return ret;
162 160 }
... ...
stim/cuda/templates/conv2.cuh
... ... @@ -107,6 +107,8 @@ namespace stim{
107 107 //call the kernel to do the multiplication
108 108 //cuda_conv2 <<< blocks, threads >>>(img, mask, copy, w, h, M);
109 109 cuda_conv2 <<< blocks, threads >>>(img, mask, copy, texObj, w, h, M);
  110 + cudaDestroyTextureObject(texObj);
  111 + cudaFreeArray(cuArray);
110 112  
111 113 }
112 114  
... ... @@ -143,4 +145,4 @@ namespace stim{
143 145 }
144 146  
145 147  
146   -#endif
147 148 \ No newline at end of file
  149 +#endif
... ...
stim/cuda/templates/conv2sep.cuh
... ... @@ -213,6 +213,8 @@ namespace stim{
213 213 //free allocated memory
214 214 cudaFree(cuArray);
215 215  
  216 + cudaDestroyTextureObject(texObj);
  217 +
216 218 }
217 219  
218 220 /// Applies a Gaussian blur to a 2D image stored on the CPU
... ...
stim/cuda/templates/gaussian_blur.cuh
... ... @@ -43,6 +43,7 @@ namespace stim{
43 43 stim::cuda::tex_conv2sep(out, x, y, texObj, cuArray, gpuKernel0, kwidth, gpuKernel0, kwidth);
44 44  
45 45 HANDLE_ERROR(cudaFree(gpuKernel0));
  46 + free(kernel0);
46 47  
47 48 }
48 49  
... ... @@ -58,6 +59,7 @@ namespace stim{
58 59  
59 60 //copy the kernel to the GPU
60 61 T* gpuKernel0;
  62 + HANDLE_ERROR(cudaMalloc(&gpuKernel0, kwidth*sizeof(T)));
61 63 HANDLE_ERROR(cudaMemcpy(gpuKernel0, kernel0, kwidth * sizeof(T), cudaMemcpyHostToDevice));
62 64  
63 65 //perform the gaussian blur as a separable convolution
... ...
stim/cuda/testKernel.cuh
... ... @@ -39,7 +39,7 @@
39 39 int y = threadIdx.y + blockIdx.y * blockDim.y;
40 40 int idx = y*16+x;
41 41  
42   - float valIn = tex2D<unsigned char>(texIn, x, y)/255.0;
  42 + float valIn = tex2D<unsigned char>(texIn, x, y);
43 43  
44 44 print[idx] = abs(valIn); ///temporary
45 45  
... ... @@ -80,7 +80,7 @@
80 80 cudaDeviceSynchronize();
81 81 stringstream name; //for debugging
82 82 name << "FromTex.bmp";
83   - stim::gpu2image<float>(print, name.str(),16,216,0,1);
  83 + stim::gpu2image<float>(print, name.str(),16,216,0,255);
84 84  
85 85 tx.UnmapCudaTexture();
86 86 cleanUP();
... ...
stim/gl/gl_spider.h
... ... @@ -70,13 +70,14 @@ class gl_spider : public virtual gl_texture&lt;T&gt;
70 70 GLuint btexbufferID;
71 71  
72 72 int numSamples; //The number of templates in the buffer.
73   - float stepsize = 6.0; //Step size.
  73 + float stepsize = 4.0; //Step size.
74 74 int current_cost;
75 75  
76 76  
77 77 //Tracing variables.
78 78 std::stack< stim::vec<float> > seeds; //Variables for tracing
79 79 std::stack< stim::vec<float> > seedsvecs;
  80 + std::stack< float > seedsmags;
80 81 std::vector< stim::vec<float> > cL; //Line currently being traced.
81 82 stim::glObj<float> sk;
82 83 stim::vec<float> rev; //reverse vector;
... ... @@ -84,6 +85,7 @@ class gl_spider : public virtual gl_texture&lt;T&gt;
84 85 stim::vec<float> ps;
85 86 stim::vec<float> ups;
86 87 stim::vec<float> ds;
  88 + std::vector<stim::vec<float> > last3;
87 89  
88 90  
89 91  
... ... @@ -147,42 +149,43 @@ class gl_spider : public virtual gl_texture&lt;T&gt;
147 149 {
148 150 setMatrix();
149 151 glCallList(dList+3);
150   - // test(btexbufferID, GL_TEXTURE_2D, 8*27);
151 152 std::vector< stim::vec<float> > result = find_branch(
152 153 btexbufferID, GL_TEXTURE_2D, 16, 216);
  154 + stim::vec<float> size(S[0]*R[0], S[1]*R[1], S[2]*R[2]);
153 155 if(!result.empty())
154 156 {
155 157 for(int i = 1; i < result.size(); i++)
156 158 {
157   - std::cout << result[i] << std::endl;
158   -// std::cout <<"["<< result[i][0]*16.0 << ", "
159   -// << result[i][1]*16.0 << ", " <<
160   -// result[i][2]*216.0 <<"]" << std::endl;
161   -
162 159 stim::vec<float> cylp(
163 160 0.5 * cos(2*M_PI*(result[i][1])),
164 161 0.5 * sin(2*M_PI*(result[i][1])),
165 162 result[i][0]-0.5,
166 163 1.0);
167   - stim::vec<float> cylv(
168   - 0.5 * cos(2*M_PI*(result[i][1]))*S[0]*R[0],
169   - 0.5 * sin(2*M_PI*(result[i][1]))*S[1]*R[1],
170   - (result[i][0]-0.5)*S[2]*R[2],
171   - 0.0);
172   - std::cout << cylp << std::endl;
173 164 cylp = cT*cylp;
174   - cylv = cT*cylv;
175 165  
176   - std::cout << cylp[0]*S[0]*R[0] << ", "
177   - << cylp[1]*S[1]*R[1] << ", " <<
178   - cylp[2]*S[2]*R[2] << std::endl;
179   - setSeed(cylp[0]*S[0]*R[0],
  166 + stim::vec<float> vec(
  167 + cylp[0]*S[0]*R[0],
180 168 cylp[1]*S[1]*R[1],
181 169 cylp[2]*S[2]*R[2]);
182   - cylv.norm();
183   - setSeedVec(cylv[0],
184   - cylv[1],
185   - cylv[2]);
  170 + stim::vec<float> seeddir(-p[0] + cylp[0]*S[0]*R[0],
  171 + -p[1] + cylp[1]*S[1]*R[1],
  172 + -p[2] + cylp[2]*S[2]*R[2]);
  173 + seeddir = seeddir.norm();
  174 + float seedm = m[0]/2.0;
  175 + stim::vec<float> lSeed = getLastSeed();
  176 +
  177 + if(sqrt(pow((lSeed[0] - vec[0]),2)
  178 + + pow((lSeed[1] - vec[1]),2) +
  179 + pow((lSeed[2] - vec[2]),2)) > m[0]/4.0
  180 + &&
  181 + !(vec[0] > size[0] || vec[1] > size[1]
  182 + || vec[2] > size[2] || vec[0] < 0
  183 + || vec[1] < 0 || vec[2] < 0))
  184 + {
  185 + setSeed(vec);
  186 + setSeedVec(seeddir);
  187 + setSeedMag(seedm);
  188 + }
186 189 }
187 190 }
188 191  
... ... @@ -207,7 +210,7 @@ class gl_spider : public virtual gl_texture&lt;T&gt;
207 210 ///Method for populating the vector arrays with sampled vectors.
208 211 ///uses the default d vector <0,0,1>
209 212 void
210   - genDirectionVectors(float solidAngle = 3*M_PI/2)
  213 + genDirectionVectors(float solidAngle = 5/M_PI*4)
211 214 {
212 215 //ofstream file;
213 216 //file.open("dvectors.txt");
... ... @@ -608,6 +611,7 @@ class gl_spider : public virtual gl_texture&lt;T&gt;
608 611 #endif
609 612 stim::vec<int> cost =
610 613 stim::cuda::get_cost(texbufferID, GL_TEXTURE_2D, numSamples);
  614 + cudaDeviceSynchronize();
611 615 #ifdef TESTING
612 616 duration_cuda = duration_cuda +
613 617 (std::clock() - start) / (double) CLOCKS_PER_SEC;
... ... @@ -837,11 +841,18 @@ class gl_spider : public virtual gl_texture&lt;T&gt;
837 841 }
838 842  
839 843 void
840   - setSeedVec(stim::vec<float> pos)
  844 + setSeedVec(stim::vec<float> dir)
  845 + {
  846 + seedsvecs.push(dir);
  847 + }
  848 +
  849 + void
  850 + setSeedMag(float mag)
841 851 {
842   - seedsvecs.push(pos);
  852 + seedsmags.push(mag);
843 853 }
844 854  
  855 +
845 856 ///@param x, y, z: variables for the x, y and z coordinate of the seed
846 857 ///Adds a seed to the seed list.
847 858 ///Assumes that the coordinates passes are in tissue space.
... ... @@ -864,6 +875,28 @@ class gl_spider : public virtual gl_texture&lt;T&gt;
864 875 return tp;
865 876 }
866 877  
  878 + stim::vec<float>
  879 + getLastSeedVec()
  880 + {
  881 + stim::vec<float> tp = seedsvecs.top();
  882 + return tp;
  883 + }
  884 +
  885 + float
  886 + getLastSeedMag()
  887 + {
  888 + float tp = seedsmags.top();
  889 + return tp;
  890 + }
  891 +
  892 + void
  893 + popSeed()
  894 + {
  895 + seeds.pop();
  896 + seedsvecs.pop();
  897 + // seedsmags.pop();
  898 + }
  899 +
867 900 std::stack<stim::vec<float> >
868 901 getSeeds()
869 902 {
... ... @@ -894,9 +927,9 @@ class gl_spider : public virtual gl_texture&lt;T&gt;
894 927 ((float) y),
895 928 ((float) z)));
896 929 seedsvecs.push(stim::vec<float>(
897   - ((float) x),
898   - ((float) y),
899   - ((float) z)));
  930 + ((float) u),
  931 + ((float) v),
  932 + ((float) w)));
900 933 }
901 934 myfile.close();
902 935 } else {
... ... @@ -956,12 +989,34 @@ class gl_spider : public virtual gl_texture&lt;T&gt;
956 989 Unbind();
957 990 CHECK_OPENGL_ERROR
958 991  
  992 + #ifdef TESTING
  993 + duration_sampling = duration_sampling +
  994 + (std::clock() - start) / (double) CLOCKS_PER_SEC;
  995 + num_sampling = num_sampling + 1.0;
  996 + #endif
  997 + return current_cost;
  998 + }
  999 +
  1000 + int
  1001 + StepP()
  1002 + {
  1003 + Bind();
  1004 + CHECK_OPENGL_ERROR
  1005 + #ifdef TESTING
  1006 + start = std::clock();
  1007 + #endif
  1008 + findOptimalDirection();
  1009 + findOptimalPosition();
  1010 + findOptimalScale();
  1011 + Unbind();
  1012 + CHECK_OPENGL_ERROR
959 1013 Bind(btexbufferID, bfboId, 27);
960 1014 CHECK_OPENGL_ERROR
961 1015 branchDetection();
962 1016 CHECK_OPENGL_ERROR
963 1017 Unbind();
964 1018 CHECK_OPENGL_ERROR
  1019 +
965 1020 #ifdef TESTING
966 1021 duration_sampling = duration_sampling +
967 1022 (std::clock() - start) / (double) CLOCKS_PER_SEC;
... ... @@ -1032,33 +1087,34 @@ class gl_spider : public virtual gl_texture&lt;T&gt;
1032 1087 {
1033 1088 Bind();
1034 1089 rev = stim::vec<float>(0.0,0.0,1.0);
  1090 + bool sEmpty = true;
  1091 + float lastmag = 16.0;;
1035 1092 while(!seeds.empty())
1036 1093 {
1037 1094 //clear the currently traced line and start a new one.
1038 1095 cL.clear();
1039 1096 sk.Begin(stim::OBJ_LINE);
1040 1097 stim::vec<float> curSeed = seeds.top();
  1098 +// std::cout << "The current seeds is " << curSeed << std::endl;
1041 1099 stim::vec<float> curSeedVec = seedsvecs.top();
  1100 + seeds.pop();
  1101 + seedsvecs.pop();
  1102 +// std::cout << "The current seed Vector is " << curSeedVec << std::endl;
1042 1103 setPosition(curSeed);
1043 1104 setDirection(curSeedVec);
1044   - setMagnitude(16.0);
1045 1105 cL.push_back(curSeed);
1046 1106 sk.createFromSelf(GL_SELECT);
1047 1107 traceLine(min_cost);
1048 1108  
1049 1109 sk.rev();
  1110 + // std::cout << "reversed" << std::endl;
1050 1111 std::reverse(cL.begin(), cL.end());
1051 1112 setPosition(curSeed);
1052   - setDirection(rev);
  1113 + setDirection(-rev);
1053 1114 setMagnitude(16.0);
1054 1115 sk.createFromSelf(GL_SELECT);
1055 1116 traceLine(min_cost);
1056   -
1057   - //temporary glObj rendering code.
1058   -
1059 1117 sk.End();
1060   - seeds.pop();
1061   - seedsvecs.pop();
1062 1118 }
1063 1119 Unbind();
1064 1120 }
... ... @@ -1072,21 +1128,23 @@ class gl_spider : public virtual gl_texture&lt;T&gt;
1072 1128 stim::vec<float> mag;
1073 1129 bool h;
1074 1130 bool started = false;
  1131 + bool running = true;
1075 1132 stim::vec<float> size(S[0]*R[0], S[1]*R[1], S[2]*R[2]);
1076   - while(1)
  1133 + while(running)
1077 1134 {
1078 1135 int cost = Step();
1079 1136 if (cost > min_cost){
1080   -// std::cout << "Min Cost" << std::endl;
  1137 + running = false;
1081 1138 break;
1082 1139 } else {
1083 1140 //Have we found an edge?
1084 1141 pos = getPosition();
1085 1142 if(pos[0] > size[0] || pos[1] > size[1]
1086 1143 || pos[2] > size[2] || pos[0] < 0
1087   - || p[1] < 0 || p[2] < 0)
  1144 + || pos[1] < 0 || pos[2] < 0)
1088 1145 {
1089 1146 // std::cout << "Found Edge" << std::endl;
  1147 + running = false;
1090 1148 break;
1091 1149 }
1092 1150 //If this is the first step in the trace,
... ... @@ -1101,6 +1159,7 @@ class gl_spider : public virtual gl_texture&lt;T&gt;
1101 1159 //Has the template size gotten unreasonable?
1102 1160 if(m[0] > 75 || m[0] < 1){
1103 1161 // std::cout << "Magnitude Limit" << std::endl;
  1162 + running = false;
1104 1163 break;
1105 1164 }
1106 1165 else
... ... @@ -1108,12 +1167,18 @@ class gl_spider : public virtual gl_texture&lt;T&gt;
1108 1167 h = selectObject(pos, getDirection(), m[0]);
1109 1168 //Have we hit something previously traced?
1110 1169 if(h){
1111   -// std::cout << "Hit a Previous Line" << std::endl;
  1170 + running = false;
1112 1171 break;
1113 1172 }
1114 1173 else {
1115 1174 sk.TexCoord(m[0]);
1116 1175 sk.Vertex(p[0], p[1], p[2]);
  1176 + Bind(btexbufferID, bfboId, 27);
  1177 + CHECK_OPENGL_ERROR
  1178 + branchDetection();
  1179 + CHECK_OPENGL_ERROR
  1180 + Unbind();
  1181 + CHECK_OPENGL_ERROR
1117 1182 }
1118 1183 }
1119 1184 }
... ... @@ -1132,7 +1197,6 @@ class gl_spider : public virtual gl_texture&lt;T&gt;
1132 1197 glSelectBuffer(2048, selectBuf);
1133 1198 glDisable(GL_CULL_FACE);
1134 1199 (void) glRenderMode(GL_SELECT);
1135   -
1136 1200 //Init Names stack
1137 1201  
1138 1202 glInitNames();
... ... @@ -1151,7 +1215,7 @@ class gl_spider : public virtual gl_texture&lt;T&gt;
1151 1215 glMatrixMode(GL_PROJECTION);
1152 1216 glPushMatrix();
1153 1217 glLoadIdentity();
1154   - glOrtho(-mag/s, mag/s, -mag/s, mag/s, 0.0, mag/s/4.0);
  1218 + glOrtho(-mag/s, mag/s, -mag/s, mag/s, 0.0, mag/s/2.0);
1155 1219 glMatrixMode(GL_MODELVIEW);
1156 1220 glPushMatrix();
1157 1221 glLoadIdentity();
... ... @@ -1173,7 +1237,7 @@ class gl_spider : public virtual gl_texture&lt;T&gt;
1173 1237 CHECK_OPENGL_ERROR
1174 1238 glPopMatrix();
1175 1239  
1176   - glEnable(GL_CULL_FACE);
  1240 + // glEnable(GL_CULL_FACE);
1177 1241 hits = glRenderMode(GL_RENDER);
1178 1242 bool found_hits = processHits(hits, selectBuf);
1179 1243 return found_hits;
... ... @@ -1185,19 +1249,19 @@ class gl_spider : public virtual gl_texture&lt;T&gt;
1185 1249 processHits(GLint hits, GLuint buffer[])
1186 1250 {
1187 1251 GLuint names, *ptr;
1188   - printf("hits = %u\n", hits);
  1252 + //printf("hits = %u\n", hits);
1189 1253 ptr = (GLuint *) buffer;
1190 1254 for (int i = 0; i < hits; i++) { /* for each hit */
1191 1255 names = *ptr;
1192   - printf (" number of names for hit = %u\n", names);
  1256 + // printf (" number of names for hit = %u\n", names);
1193 1257 ptr++;
1194 1258 ptr++; //Skip the minimum depth value.
1195 1259 ptr++; //Skip the maximum depth value.
1196   - printf (" the name is ");
1197   - for (int j = 0; j < names; j++) { /* for each name */
1198   - printf ("%u ", *ptr); ptr++;
1199   - }
1200   - printf ("\n");
  1260 + // printf (" the name is ");
  1261 + // for (int j = 0; j < names; j++) { /* for each name */
  1262 + // printf ("%u ", *ptr); ptr++;
  1263 + // }
  1264 + // printf ("\n");
1201 1265 }
1202 1266 if(hits == 0)
1203 1267 return 0;
... ...