Commit 1d1a38e1b6ecff84b62496c6e2702d1f37736330

Authored by David Mayerich
2 parents 0129eeb3 df0a759f

Merge branch 'master' of git.stim.ee.uh.edu:codebase/stimlib

stim/cuda/branch_detection.cuh
@@ -11,7 +11,7 @@ typedef unsigned int uint; @@ -11,7 +11,7 @@ typedef unsigned int uint;
11 11
12 12
13 std::vector< stim::vec<float> > 13 std::vector< stim::vec<float> >
14 -find_branch(GLint texbufferID, GLenum texType, unsigned int x, unsigned int y) 14 +find_branch(GLint texbufferID, GLenum texType, unsigned int x, unsigned int y, int iter = 0)
15 { 15 {
16 float sigma = 2.0; 16 float sigma = 2.0;
17 unsigned int conn = 7; 17 unsigned int conn = 7;
@@ -22,7 +22,7 @@ find_branch(GLint texbufferID, GLenum texType, unsigned int x, unsigned int y) @@ -22,7 +22,7 @@ find_branch(GLint texbufferID, GLenum texType, unsigned int x, unsigned int y)
22 stringstream name; 22 stringstream name;
23 23
24 24
25 - cpuCenters = stim::cuda::get_centers(texbufferID, texType, x, y, sizek, sigma, conn, threshold); 25 + cpuCenters = stim::cuda::get_centers(texbufferID, texType, x, y, sizek, sigma, conn, threshold, iter);
26 cudaDeviceSynchronize(); 26 cudaDeviceSynchronize();
27 27
28 28
stim/cuda/filter.cuh
@@ -26,6 +26,11 @@ namespace stim @@ -26,6 +26,11 @@ namespace stim
26 float* LoG; 26 float* LoG;
27 float* res; 27 float* res;
28 float* centers; 28 float* centers;
  29 +
  30 +//#ifdef DEBUG
  31 + float* print;
  32 +//#endif
  33 +
29 stim::cuda::cuda_texture tx; 34 stim::cuda::cuda_texture tx;
30 35
31 36
@@ -44,6 +49,13 @@ namespace stim @@ -44,6 +49,13 @@ namespace stim
44 HANDLE_ERROR( 49 HANDLE_ERROR(
45 cudaMalloc( (void**) &centers, DIM_Y*DIM_X*sizeof(float)) 50 cudaMalloc( (void**) &centers, DIM_Y*DIM_X*sizeof(float))
46 ); 51 );
  52 +
  53 +//#ifdef DEBUG
  54 + HANDLE_ERROR(
  55 + cudaMalloc( (void**) &print, DIM_Y*DIM_X*sizeof(float))
  56 + );
  57 +//#endif
  58 +
47 // checkCUDAerrors("Memory Allocation, Result"); 59 // checkCUDAerrors("Memory Allocation, Result");
48 } 60 }
49 61
@@ -58,6 +70,11 @@ namespace stim @@ -58,6 +70,11 @@ namespace stim
58 HANDLE_ERROR( 70 HANDLE_ERROR(
59 cudaFree(centers) 71 cudaFree(centers)
60 ); 72 );
  73 +//#ifdef DEBUG
  74 + HANDLE_ERROR(
  75 + cudaFree(print)
  76 + );
  77 +//#endif
61 free(LoG); 78 free(LoG);
62 } 79 }
63 80
@@ -89,7 +106,7 @@ namespace stim @@ -89,7 +106,7 @@ namespace stim
89 //Shared memory would be better. 106 //Shared memory would be better.
90 __global__ 107 __global__
91 void 108 void
92 - applyFilter(cudaTextureObject_t texIn, unsigned int DIM_X, unsigned int DIM_Y, int kr, int kl, float *res, float* gpuLoG){ 109 + applyFilter(cudaTextureObject_t texIn, unsigned int DIM_X, unsigned int DIM_Y, int kr, int kl, float *res, float* gpuLoG, float* p){
93 //R = floor(size/2) 110 //R = floor(size/2)
94 //THIS IS A NAIVE WAY TO DO IT, and there is a better way) 111 //THIS IS A NAIVE WAY TO DO IT, and there is a better way)
95 112
@@ -101,16 +118,15 @@ namespace stim @@ -101,16 +118,15 @@ namespace stim
101 // float val = 0; 118 // float val = 0;
102 float tu = (x-kr+xi)/(float)DIM_X; 119 float tu = (x-kr+xi)/(float)DIM_X;
103 float tv = (y-kr+yi)/(float)DIM_Y; 120 float tv = (y-kr+yi)/(float)DIM_Y;
  121 + int idx = y*DIM_X+x;
104 shared[xi][yi] = gpuLoG[yi*kl+xi]*(255.0-(float)tex2D<unsigned char>(texIn, tu, tv)); 122 shared[xi][yi] = gpuLoG[yi*kl+xi]*(255.0-(float)tex2D<unsigned char>(texIn, tu, tv));
105 __syncthreads(); 123 __syncthreads();
106 -  
107 124
108 //x = max(0,x); 125 //x = max(0,x);
109 //x = min(x, width-1); 126 //x = min(x, width-1);
110 //y = max(y, 0); 127 //y = max(y, 0);
111 //y = min(y, height - 1); 128 //y = min(y, height - 1);
112 129
113 - int idx = y*DIM_X+x;  
114 // int k_idx; 130 // int k_idx;
115 for(unsigned int step = blockDim.x/2; step >= 1; step >>= 1) 131 for(unsigned int step = blockDim.x/2; step >= 1; step >>= 1)
116 { 132 {
@@ -135,11 +151,12 @@ namespace stim @@ -135,11 +151,12 @@ namespace stim
135 __syncthreads(); 151 __syncthreads();
136 if(xi == 0 && yi == 0) 152 if(xi == 0 && yi == 0)
137 res[idx] = shared[0][0]; 153 res[idx] = shared[0][0];
  154 +
138 } 155 }
139 156
140 extern "C" 157 extern "C"
141 float * 158 float *
142 - get_centers(GLint texbufferID, GLenum texType, int DIM_X, int DIM_Y, int sizeK, float sigma, float conn, float threshold) 159 + get_centers(GLint texbufferID, GLenum texType, int DIM_X, int DIM_Y, int sizeK, float sigma, float conn, float threshold, int iter = 0)
143 { 160 {
144 tx.SetTextureCoordinates(1); 161 tx.SetTextureCoordinates(1);
145 tx.SetAddressMode(1, 3); 162 tx.SetAddressMode(1, 3);
@@ -153,7 +170,14 @@ namespace stim @@ -153,7 +170,14 @@ namespace stim
153 dim3 numBlocks(DIM_X, DIM_Y); 170 dim3 numBlocks(DIM_X, DIM_Y);
154 dim3 threadsPerBlock(sizeK, sizeK); 171 dim3 threadsPerBlock(sizeK, sizeK);
155 172
156 - applyFilter <<< numBlocks, threadsPerBlock >>> (tx.getTexture(), DIM_X, DIM_Y, floor(sizeK/2), sizeK, res, gpuLoG); 173 + applyFilter <<< numBlocks, threadsPerBlock >>> (tx.getTexture(), DIM_X, DIM_Y, floor(sizeK/2), sizeK, res, gpuLoG, print);
  174 +
  175 + #ifdef DEBUG
  176 + stringstream name;
  177 + name.str("");
  178 + name << "Fiber Cylinder " << iter << ".bmp";
  179 + stim::gpu2image<float>(res, name.str(), DIM_X, DIM_Y, 0, 255);
  180 + #endif
157 181
158 182
159 stim::cuda::gpu_local_max<float>(centers, res, threshold, conn, DIM_X, DIM_Y); 183 stim::cuda::gpu_local_max<float>(centers, res, threshold, conn, DIM_X, DIM_Y);
stim/cuda/testKernel.cuh
@@ -53,11 +53,31 @@ @@ -53,11 +53,31 @@
53 53
54 float valIn = tex2D<unsigned char>(texIn, x, y); 54 float valIn = tex2D<unsigned char>(texIn, x, y);
55 float templa = templ(x, 32)*255.0; 55 float templa = templ(x, 32)*255.0;
56 - print[idx] = abs(valIn-templa); ///temporary 56 + //print[idx] = abs(valIn-templa); ///temporary
  57 + print[idx] = abs(valIn);
57 //print[idx] = abs(templa); ///temporary 58 //print[idx] = abs(templa); ///temporary
58 59
59 } 60 }
60 61
  62 + ///Find the difference of the given set of samples and the template
  63 + ///using cuda acceleration.
  64 + ///@param stim::cuda::cuda_texture t --stim texture that holds all the references
  65 + /// to the data.
  66 + ///@param float* result --a pointer to the memory that stores the result.
  67 + __global__
  68 + //void get_diff (float *result)
  69 + void get_diff2 (cudaTextureObject_t texIn, float *print, int dx)
  70 + {
  71 + int x = threadIdx.x + blockIdx.x * blockDim.x;
  72 + int y = threadIdx.y + blockIdx.y * blockDim.y;
  73 + int idx = y*dx+x;
  74 + // int idx = y*16+x;
  75 +
  76 + float valIn = tex2D<unsigned char>(texIn, x, y);
  77 + print[idx] = abs(valIn); ///temporary
  78 +
  79 + }
  80 +
61 void test(cudaTextureObject_t tObj, int x, int y, std::string nam) 81 void test(cudaTextureObject_t tObj, int x, int y, std::string nam)
62 { 82 {
63 83
@@ -86,3 +106,31 @@ @@ -86,3 +106,31 @@
86 cleanUP(); 106 cleanUP();
87 } 107 }
88 108
  109 +
  110 + void test(cudaTextureObject_t tObj, int x, int y, std::string nam, int iter)
  111 + {
  112 +
  113 + //Bind the Texture in GL and allow access to cuda.
  114 +
  115 + //initialize the return arrays.
  116 +
  117 + initArray(x,y);
  118 + dim3 numBlocks(1, y);
  119 + dim3 threadsPerBlock(x, 1);
  120 + int max_threads = stim::maxThreadsPerBlock();
  121 + //dim3 threads(max_threads, 1);
  122 + //dim3 blocks(x / threads.x + 1, y);
  123 + //dim3 numBlocks(2, 2);
  124 + //dim3 threadsPerBlock(8, 108);
  125 +
  126 +
  127 +// get_diff <<< blocks, threads >>> (tx.getTexture(), print);
  128 + get_diff2 <<< numBlocks, threadsPerBlock >>> (tObj, print, x);
  129 +
  130 + cudaDeviceSynchronize();
  131 + stringstream name; //for debugging
  132 + name << nam.c_str();
  133 + stim::gpu2image<float>(print, name.str(),x,y,0,255);
  134 +
  135 + cleanUP();
  136 + }
stim/envi/envi_header.h
@@ -8,6 +8,7 @@ @@ -8,6 +8,7 @@
8 #include <vector> 8 #include <vector>
9 #include <algorithm> 9 #include <algorithm>
10 #include <stdlib.h> 10 #include <stdlib.h>
  11 +#include <cmath>
11 12
12 //information from an ENVI header file 13 //information from an ENVI header file
13 //A good resource can be found here: http://www.exelisvis.com/docs/enviheaderfiles.html 14 //A good resource can be found here: http://www.exelisvis.com/docs/enviheaderfiles.html
stim/gl/gl_spider.h
1 -#ifndef STIM_GL_SPIDER_H 1 + #ifndef STIM_GL_SPIDER_H
2 #define STIM_GL_SPIDER_H 2 #define STIM_GL_SPIDER_H
3 3
4 //#include <GL/glew.h> 4 //#include <GL/glew.h>
@@ -27,7 +27,6 @@ @@ -27,7 +27,6 @@
27 #include <stim/cuda/branch_detection.cuh> 27 #include <stim/cuda/branch_detection.cuh>
28 #include "../../../volume-spider/glnetwork.h" 28 #include "../../../volume-spider/glnetwork.h"
29 #include <stim/visualization/cylinder.h> 29 #include <stim/visualization/cylinder.h>
30 -#include <stim/cuda/testKernel.cuh>  
31 #include <iostream> 30 #include <iostream>
32 #include <fstream> 31 #include <fstream>
33 #ifdef TIMING 32 #ifdef TIMING
@@ -40,6 +39,9 @@ @@ -40,6 +39,9 @@
40 #include <ctime> 39 #include <ctime>
41 #endif 40 #endif
42 41
  42 +#ifdef DEBUG
  43 + #include <stim/cuda/testKernel.cuh>
  44 +#endif
43 45
44 namespace stim 46 namespace stim
45 { 47 {
@@ -138,11 +140,13 @@ class gl_spider // : public virtual gl_texture&lt;T&gt; @@ -138,11 +140,13 @@ class gl_spider // : public virtual gl_texture&lt;T&gt;
138 stim::cuda::cuda_texture t_pos; //cuda_texture object used as an interface between OpenGL and cuda for position vectors. 140 stim::cuda::cuda_texture t_pos; //cuda_texture object used as an interface between OpenGL and cuda for position vectors.
139 stim::cuda::cuda_texture t_mag; //cuda_texture object used as an interface between OpenGL and cuda for size vectors. 141 stim::cuda::cuda_texture t_mag; //cuda_texture object used as an interface between OpenGL and cuda for size vectors.
140 stim::cuda::cuda_texture t_len; //cuda_texture object used as an interface between OpenGL and cuda for size vectors. 142 stim::cuda::cuda_texture t_len; //cuda_texture object used as an interface between OpenGL and cuda for size vectors.
141 - 143 +
  144 + int last_fiber; //variable that tracks the last fiber hit during tracing. -1 if no fiber was hit.
  145 +
142 146
143 #ifdef DEBUG 147 #ifdef DEBUG
144 - stringstream name;  
145 int iter; 148 int iter;
  149 + stringstream name;
146 int iter_pos; 150 int iter_pos;
147 int iter_dir; 151 int iter_dir;
148 int iter_siz; 152 int iter_siz;
@@ -292,6 +296,7 @@ class gl_spider // : public virtual gl_texture&lt;T&gt; @@ -292,6 +296,7 @@ class gl_spider // : public virtual gl_texture&lt;T&gt;
292 DrawLongCylinder(n, l_template, l_square); ///Draw the cylinder. 296 DrawLongCylinder(n, l_template, l_square); ///Draw the cylinder.
293 stim::cylinder<float> cyl(cL, cM); 297 stim::cylinder<float> cyl(cL, cM);
294 std::vector< stim::vec<float> > result = find_branch(cylinder_texID, GL_TEXTURE_2D, n*l_square, (cL.size()-1)*l_template); ///find all the centers in cuda 298 std::vector< stim::vec<float> > result = find_branch(cylinder_texID, GL_TEXTURE_2D, n*l_square, (cL.size()-1)*l_template); ///find all the centers in cuda
  299 +
295 stim::vec3<float> size(S[0]*R[0], S[1]*R[1], S[2]*R[2]); ///the borders of the texture. 300 stim::vec3<float> size(S[0]*R[0], S[1]*R[1], S[2]*R[2]); ///the borders of the texture.
296 float pval; //pvalue associated with the points on the cylinder. 301 float pval; //pvalue associated with the points on the cylinder.
297 if(!result.empty()) ///if we have any points 302 if(!result.empty()) ///if we have any points
@@ -315,7 +320,8 @@ class gl_spider // : public virtual gl_texture&lt;T&gt; @@ -315,7 +320,8 @@ class gl_spider // : public virtual gl_texture&lt;T&gt;
315 } 320 }
316 stim::vec3<float> v = cyl.surf(pval, result[i][0]); ///find the coordinates of the point at pval on the surface in tissue space. 321 stim::vec3<float> v = cyl.surf(pval, result[i][0]); ///find the coordinates of the point at pval on the surface in tissue space.
317 stim::vec3<float> di = cyl.p(pval); ///find the coord of v in tissue space projected on the centerline. 322 stim::vec3<float> di = cyl.p(pval); ///find the coord of v in tissue space projected on the centerline.
318 - float rad = cyl.r(pval)/2; ///find the radius at the pvalue's location 323 + float rad = cyl.r(pval); ///find the radius at the pvalue's location
  324 + // float rad = cyl.r(pval)/2; ///find the radius at the pvalue's location
319 if( 325 if(
320 !(v[0] > size[0] || v[1] > size[1] 326 !(v[0] > size[0] || v[1] > size[1]
321 || v[2] > size[2] || v[0] < 0 327 || v[2] > size[2] || v[0] < 0
@@ -372,7 +378,7 @@ class gl_spider // : public virtual gl_texture&lt;T&gt; @@ -372,7 +378,7 @@ class gl_spider // : public virtual gl_texture&lt;T&gt;
372 ///Stored in a display list. 378 ///Stored in a display list.
373 ///uses the default d vector <0,0,1> 379 ///uses the default d vector <0,0,1>
374 void 380 void
375 - genDirectionVectors(float solidAngle = stim::PI/2) 381 + genDirectionVectors(float solidAngle = 3*stim::PI/4)
376 { 382 {
377 383
378 //Set up the vectors necessary for Rectangle creation. 384 //Set up the vectors necessary for Rectangle creation.
@@ -954,7 +960,7 @@ class gl_spider // : public virtual gl_texture&lt;T&gt; @@ -954,7 +960,7 @@ class gl_spider // : public virtual gl_texture&lt;T&gt;
954 iter_dir = 0; 960 iter_dir = 0;
955 iter_siz = 0; 961 iter_siz = 0;
956 #endif 962 #endif
957 - stepsize = 3.0; 963 + stepsize = 6.0;
958 n_pixels = 16.0; 964 n_pixels = 16.0;
959 965
960 srand(100); 966 srand(100);
@@ -1316,20 +1322,20 @@ class gl_spider // : public virtual gl_texture&lt;T&gt; @@ -1316,20 +1322,20 @@ class gl_spider // : public virtual gl_texture&lt;T&gt;
1316 void 1322 void
1317 saveNetwork(std::string name) 1323 saveNetwork(std::string name)
1318 { 1324 {
1319 -/* stim::glObj<float> sk; 1325 + stim::glObj<float> sk1;
1320 for(int i = 0; i < nt.sizeE(); i++) 1326 for(int i = 0; i < nt.sizeE(); i++)
1321 { 1327 {
1322 - std::vector<stim::vec< float > > cm = nt.getEdgeCenterLineMag(i); 1328 + std::vector<float> cm = nt.getEdgeCenterLineMag(i);
1323 std::vector<stim::vec3< float > > ce = nt.getEdgeCenterLine(i); 1329 std::vector<stim::vec3< float > > ce = nt.getEdgeCenterLine(i);
1324 - sk.Begin(stim::OBJ_LINE); 1330 + sk1.Begin(stim::OBJ_LINE);
1325 for(int j = 0; j < ce.size(); j++) 1331 for(int j = 0; j < ce.size(); j++)
1326 { 1332 {
1327 - sk.TexCoord(cm[j][0]);  
1328 - sk.Vertex(ce[j][0], ce[j][1], ce[j][2]); 1333 + sk1.TexCoord(cm[j]);
  1334 + sk1.Vertex(ce[j][0], ce[j][1], ce[j][2]);
1329 } 1335 }
1330 - sk.End(); 1336 + sk1.End();
1331 } 1337 }
1332 -*/ sk.save(name); 1338 + sk1.save(name);
1333 } 1339 }
1334 1340
1335 ///Depreciated, but might be reused later() 1341 ///Depreciated, but might be reused later()
@@ -1377,20 +1383,31 @@ class gl_spider // : public virtual gl_texture&lt;T&gt; @@ -1377,20 +1383,31 @@ class gl_spider // : public virtual gl_texture&lt;T&gt;
1377 Step() 1383 Step()
1378 { 1384 {
1379 #ifdef DEBUG 1385 #ifdef DEBUG
1380 - std::cerr << "Took a step" << std::endl; 1386 + std::cerr << "Took a step";
1381 #endif 1387 #endif
1382 Bind(direction_texID, direction_buffID, numSamples, n_pixels); 1388 Bind(direction_texID, direction_buffID, numSamples, n_pixels);
1383 CHECK_OPENGL_ERROR 1389 CHECK_OPENGL_ERROR
1384 findOptimalDirection(); 1390 findOptimalDirection();
1385 Unbind(); 1391 Unbind();
  1392 + #ifdef DEBUG
  1393 + std::cerr << " " << current_cost;
  1394 + #endif
1386 Bind(position_texID, position_buffID, numSamplesPos, n_pixels); 1395 Bind(position_texID, position_buffID, numSamplesPos, n_pixels);
1387 findOptimalPosition(); 1396 findOptimalPosition();
1388 Unbind(); 1397 Unbind();
  1398 + #ifdef DEBUG
  1399 + std::cerr << " " << current_cost;
  1400 + #endif
1389 Bind(radius_texID, radius_buffID, numSamplesMag, n_pixels); 1401 Bind(radius_texID, radius_buffID, numSamplesMag, n_pixels);
1390 findOptimalRadius(); 1402 findOptimalRadius();
1391 Unbind(); 1403 Unbind();
  1404 + #ifdef DEBUG
  1405 + std::cerr << " " << current_cost;
  1406 + #endif
1392 CHECK_OPENGL_ERROR 1407 CHECK_OPENGL_ERROR
1393 - 1408 + #ifdef DEBUG
  1409 + std::cerr << std::endl;
  1410 + #endif
1394 return current_cost; 1411 return current_cost;
1395 } 1412 }
1396 1413
@@ -1517,9 +1534,6 @@ class gl_spider // : public virtual gl_texture&lt;T&gt; @@ -1517,9 +1534,6 @@ class gl_spider // : public virtual gl_texture&lt;T&gt;
1517 while(!Empty()) 1534 while(!Empty())
1518 { 1535 {
1519 //clear the currently traced line and start a new one. 1536 //clear the currently traced line and start a new one.
1520 - cL.clear();  
1521 - cM.clear();  
1522 - cD.clear();  
1523 curSeed = seeds.top(); 1537 curSeed = seeds.top();
1524 curSeedVec = seedsvecs.top(); 1538 curSeedVec = seedsvecs.top();
1525 curSeedMag = seedsmags.top(); 1539 curSeedMag = seedsmags.top();
@@ -1539,9 +1553,9 @@ class gl_spider // : public virtual gl_texture&lt;T&gt; @@ -1539,9 +1553,9 @@ class gl_spider // : public virtual gl_texture&lt;T&gt;
1539 // findOptimalDirection(); 1553 // findOptimalDirection();
1540 // Unbind(); 1554 // Unbind();
1541 //THIS IS EXPERIMENTAL 1555 //THIS IS EXPERIMENTAL
1542 - Bind(radius_texID, radius_buffID, numSamplesMag, n_pixels);  
1543 - findOptimalRadius();  
1544 - Unbind(); 1556 + // Bind(radius_texID, radius_buffID, numSamplesMag, n_pixels);
  1557 + // findOptimalRadius();
  1558 + // Unbind();
1545 //THIS IS EXPERIMENTAL 1559 //THIS IS EXPERIMENTAL
1546 1560
1547 // cL.push_back(curSeed); 1561 // cL.push_back(curSeed);
@@ -1593,17 +1607,17 @@ class gl_spider // : public virtual gl_texture&lt;T&gt; @@ -1593,17 +1607,17 @@ class gl_spider // : public virtual gl_texture&lt;T&gt;
1593 ds[0], ds[1], ds[2], 1607 ds[0], ds[1], ds[2],
1594 ups[0], ups[1], ups[2]); 1608 ups[0], ups[1], ups[2]);
1595 ///Set the look at distance 1609 ///Set the look at distance
1596 - sk.Render(); ///Render the network  
1597 -// nt.Render(); 1610 +// sk.Render(); ///Render the network
  1611 + nt.Render();
1598 1612
1599 CHECK_OPENGL_ERROR 1613 CHECK_OPENGL_ERROR
1600 1614
1601 1615
1602 - glLoadName((int) sk.numL()); ///Load all the names  
1603 -// glLoadName(nt.sizeE()); 1616 +// glLoadName((int) sk.numL()); ///Load all the names
  1617 + glLoadName(nt.sizeE());
1604 1618
1605 - sk.RenderLine(cL); ///Render the current line.  
1606 -// nt.RenderLine(cL); 1619 +// sk.RenderLine(cL); ///Render the current line.
  1620 + nt.RenderLine(cL);
1607 1621
1608 // glPopName(); 1622 // glPopName();
1609 glFlush(); ///Flush the buffer 1623 glFlush(); ///Flush the buffer
@@ -1654,55 +1668,51 @@ class gl_spider // : public virtual gl_texture&lt;T&gt; @@ -1654,55 +1668,51 @@ class gl_spider // : public virtual gl_texture&lt;T&gt;
1654 cM.clear(); 1668 cM.clear();
1655 } 1669 }
1656 1670
1657 -/* 1671 +
1658 void 1672 void
1659 - addToNetwork(pair<stim::fiber<float>, int> in, stim::vec3<float> spos,  
1660 - stim::vec<float> smag, stim::vec3<float> sdir) 1673 + addToNetwork(std::vector<stim::vec3<float> > L, std::vector<float > M, stim::vec3<float> spos, stim::vec3<float> sdir, float smag)
1661 { 1674 {
1662 - #ifdef TIMING  
1663 - double s = std::clock();  
1664 - #endif  
1665 -  
1666 - std::vector<stim::vec3<float> > ce = in.first.centerline();  
1667 - std::vector<stim::vec<float> > cm = in.first.centerlinemag();  
1668 //if the fiber is longer than 2 steps (the number it takes to diverge) 1675 //if the fiber is longer than 2 steps (the number it takes to diverge)
1669 - if(ce.size() > 3) 1676 + if(L.size() > 3)
1670 { 1677 {
1671 //if we did not hit a fiber 1678 //if we did not hit a fiber
1672 - if(in.second == -1) 1679 + if(last_fiber == -1)
1673 { 1680 {
1674 - spos[0] = spos[0]-sdir[0]*smag[0]/2.;  
1675 - spos[1] = spos[1]-sdir[1]*smag[0]/2.;  
1676 - spos[2] = spos[2]-sdir[2]*smag[0]/2.;  
1677 - int h = selectObject(spos, -sdir, smag[0]); 1681 + spos[0] = spos[0]-sdir[0]*smag;
  1682 + spos[1] = spos[1]-sdir[1]*smag;
  1683 + spos[2] = spos[2]-sdir[2]*smag;
  1684 + int h = selectObject(spos, -sdir, smag);
1678 //did we start with a fiber? 1685 //did we start with a fiber?
1679 if(h != -1 && h < nt.sizeE()) 1686 if(h != -1 && h < nt.sizeE())
1680 - nt.addEdge(ce, cm, h, -1); 1687 + nt.addEdge(L, M, h, -1);
1681 else 1688 else
1682 - nt.addEdge(ce, cm, -1, -1); 1689 + nt.addEdge(L, M, -1, -1);
1683 } 1690 }
1684 //if we hit a fiber? 1691 //if we hit a fiber?
1685 - else if(in.second != -1) 1692 + else if(last_fiber != -1)
1686 { 1693 {
1687 - nt.addEdge(ce,cm,-1, in.second);  
1688 - spos[0] = spos[0]-sdir[0]*smag[0]/2.;  
1689 - spos[1] = spos[1]-sdir[1]*smag[0]/2.;  
1690 - spos[2] = spos[2]-sdir[2]*smag[0]/2.;  
1691 - int h = selectObject(spos, -sdir, smag[0]); 1694 + nt.addEdge(L, M, -1, last_fiber);
  1695 + spos[0] = spos[0]-sdir[0]*smag;
  1696 + spos[1] = spos[1]-sdir[1]*smag;
  1697 + spos[2] = spos[2]-sdir[2]*smag;
  1698 + int h = selectObject(spos, -sdir, smag);
1692 //did start with a fiber? 1699 //did start with a fiber?
1693 if(h != -1 && h < nt.sizeE()){ 1700 if(h != -1 && h < nt.sizeE()){
1694 // std::cout << "got here double" << smag.str() << std::endl; 1701 // std::cout << "got here double" << smag.str() << std::endl;
1695 - nt.addEdge(ce,cm, h, in.second);  
1696 - } else { nt.addEdge(ce,cm, -1, -1);} 1702 + nt.addEdge(L, M, h, last_fiber);
  1703 + }
  1704 + else
  1705 + {
  1706 + nt.addEdge(L, M, -1, -1);
  1707 + }
1697 } 1708 }
1698 } 1709 }
1699 1710
1700 - #ifdef TIMING  
1701 - double nt = (std::clock() - s) / (double) CLOCKS_PER_SEC;  
1702 - network_time += nt * 1000.0; 1711 + #ifdef DEBUG
  1712 + iter++;
1703 #endif 1713 #endif
1704 } 1714 }
1705 -*/ 1715 +/*
1706 void 1716 void
1707 addToNetwork(std::vector<stim::vec3<float> > L, std::vector<float > M) 1717 addToNetwork(std::vector<stim::vec3<float> > L, std::vector<float > M)
1708 { 1718 {
@@ -1722,7 +1732,7 @@ class gl_spider // : public virtual gl_texture&lt;T&gt; @@ -1722,7 +1732,7 @@ class gl_spider // : public virtual gl_texture&lt;T&gt;
1722 #endif 1732 #endif
1723 } 1733 }
1724 } 1734 }
1725 - 1735 +*/
1726 1736
1727 void 1737 void
1728 printSizes() 1738 printSizes()
@@ -1735,22 +1745,31 @@ class gl_spider // : public virtual gl_texture&lt;T&gt; @@ -1735,22 +1745,31 @@ class gl_spider // : public virtual gl_texture&lt;T&gt;
1735 traceLine(stim::vec3<float> pos, float mag, int min_cost) 1745 traceLine(stim::vec3<float> pos, float mag, int min_cost)
1736 { 1746 {
1737 //starting (seed) position and magnitude. 1747 //starting (seed) position and magnitude.
  1748 + last_fiber = -1;
  1749 + cL.clear();
  1750 + cM.clear();
  1751 + cD.clear();
  1752 +
1738 stim::vec3<float> spos = getPosition(); 1753 stim::vec3<float> spos = getPosition();
  1754 + stim::vec3<float> sdir = getDirection();
1739 float smag = getMagnitude(); 1755 float smag = getMagnitude();
1740 - stim::vec3<float> sdir = getDirection();  
1741 1756
1742 -// Bind();  
1743 -// sk.Begin(stim::OBJ_LINE); 1757 + setPosition(pos);
  1758 + setMagnitude(mag);
1744 1759
  1760 + cL.push_back(p);
  1761 + cD.push_back(d);
  1762 + cM.push_back(m);
  1763 +// stim::vec3<float> spos = getPosition();
  1764 +// float smag = getMagnitude();
  1765 +// stim::vec3<float> sdir = getDirection();
1745 1766
1746 - sk.createFromSelf(GL_SELECT);  
1747 -// nt.createFromSelf(GL_SELECT); 1767 +// Bind();
  1768 +// sk.Begin(stim::OBJ_LINE);
1748 1769
1749 - cL.push_back(pos);  
1750 - cM.push_back(mag);  
1751 1770
1752 -// setPosition(pos);  
1753 -// setMagnitude(mag); 1771 + //sk.createFromSelf(GL_SELECT);
  1772 + nt.createFromSelf(GL_SELECT);
1754 int h; 1773 int h;
1755 bool started = false; 1774 bool started = false;
1756 bool running = true; 1775 bool running = true;
@@ -1761,7 +1780,7 @@ class gl_spider // : public virtual gl_texture&lt;T&gt; @@ -1761,7 +1780,7 @@ class gl_spider // : public virtual gl_texture&lt;T&gt;
1761 if (cost > min_cost){ 1780 if (cost > min_cost){
1762 running = false; 1781 running = false;
1763 branchDetection2(); 1782 branchDetection2();
1764 - addToNetwork(cL, cM); 1783 + addToNetwork(cL, cM, spos, sdir, smag);
1765 #ifdef DEBUG 1784 #ifdef DEBUG
1766 std::cerr << "the cost of " << cost << " > " << min_cost << std::endl; 1785 std::cerr << "the cost of " << cost << " > " << min_cost << std::endl;
1767 #endif 1786 #endif
@@ -1769,13 +1788,14 @@ class gl_spider // : public virtual gl_texture&lt;T&gt; @@ -1769,13 +1788,14 @@ class gl_spider // : public virtual gl_texture&lt;T&gt;
1769 } else { 1788 } else {
1770 //Have we found the edge of the map? 1789 //Have we found the edge of the map?
1771 pos = getPosition(); 1790 pos = getPosition();
1772 - if(pos[0] > size[0] || pos[1] > size[1]  
1773 - || pos[2] > size[2] || pos[0] < 0  
1774 - || pos[1] < 0 || pos[2] < 0) 1791 + if(p[0] > size[0] || p[1] > size[1]
  1792 + || p[2] > size[2] || p[0] < 0
  1793 + || p[1] < 0 || p[2] < 0)
1775 { 1794 {
1776 running = false; 1795 running = false;
1777 branchDetection2(); 1796 branchDetection2();
1778 - addToNetwork(cL, cM); 1797 + // addToNetwork(cL, cM);
  1798 + addToNetwork(cL, cM, spos, sdir, smag);
1779 #ifdef DEBUG 1799 #ifdef DEBUG
1780 std::cerr << "I hit and edge" << std::endl; 1800 std::cerr << "I hit and edge" << std::endl;
1781 #endif 1801 #endif
@@ -1790,10 +1810,11 @@ class gl_spider // : public virtual gl_texture&lt;T&gt; @@ -1790,10 +1810,11 @@ class gl_spider // : public virtual gl_texture&lt;T&gt;
1790 } 1810 }
1791 //Has the template size gotten unreasonable? 1811 //Has the template size gotten unreasonable?
1792 mag = getMagnitude(); 1812 mag = getMagnitude();
1793 - if(mag > 75 || mag < 1){ 1813 + if(m > 75 || m < 1){
1794 running = false; 1814 running = false;
1795 branchDetection2(); 1815 branchDetection2();
1796 - addToNetwork(cL, cM); 1816 + // addToNetwork(cL, cM);
  1817 + addToNetwork(cL, cM, spos, sdir, smag);
1797 #ifdef DEBUG 1818 #ifdef DEBUG
1798 std::cerr << "The templates are too big" << std::endl; 1819 std::cerr << "The templates are too big" << std::endl;
1799 #endif 1820 #endif
@@ -1807,13 +1828,16 @@ class gl_spider // : public virtual gl_texture&lt;T&gt; @@ -1807,13 +1828,16 @@ class gl_spider // : public virtual gl_texture&lt;T&gt;
1807 #ifdef DEBUG 1828 #ifdef DEBUG
1808 std::cerr << "I hit the fiber " << h << std::endl; 1829 std::cerr << "I hit the fiber " << h << std::endl;
1809 #endif 1830 #endif
  1831 + last_fiber = h;
1810 running = false; 1832 running = false;
1811 branchDetection2(); 1833 branchDetection2();
1812 - addToNetwork(cL, cM); 1834 + // addToNetwork(cL, cM);
  1835 + addToNetwork(cL, cM, spos, sdir, smag);
1813 break; 1836 break;
1814 } 1837 }
1815 else { 1838 else {
1816 - cL.push_back(stim::vec3<float>(p[0], p[1],p[2])); 1839 + cL.push_back(p);
  1840 + cD.push_back(d);
1817 cM.push_back(m); 1841 cM.push_back(m);
1818 // Unbind(); 1842 // Unbind();
1819 CHECK_OPENGL_ERROR 1843 CHECK_OPENGL_ERROR
stim/gl/gl_texture.h
@@ -30,9 +30,9 @@ class gl_texture : public virtual image_stack&lt;T, F&gt; @@ -30,9 +30,9 @@ class gl_texture : public virtual image_stack&lt;T, F&gt;
30 GLenum cpu_type; 30 GLenum cpu_type;
31 GLenum gpu_type; 31 GLenum gpu_type;
32 GLenum format; //format for the texture (GL_RGBA, GL_LUMINANCE, etc.) 32 GLenum format; //format for the texture (GL_RGBA, GL_LUMINANCE, etc.)
33 - using image_stack<T>::R; 33 + using image_stack<T,F>::R;
34 //using image_stack<T>::S; 34 //using image_stack<T>::S;
35 - using image_stack<T>::ptr; 35 + using image_stack<T,F>::ptr;
36 36
37 /// Sets the internal texture_type, based on the data dimensions 37 /// Sets the internal texture_type, based on the data dimensions
38 void setTextureType(){ 38 void setTextureType(){
@@ -247,7 +247,7 @@ class gl_texture : public virtual image_stack&lt;T, F&gt; @@ -247,7 +247,7 @@ class gl_texture : public virtual image_stack&lt;T, F&gt;
247 } 247 }
248 248
249 ///returns the dimentions of the data in the x, y, z directions. 249 ///returns the dimentions of the data in the x, y, z directions.
250 - vec<int> getSize(){ 250 + stim::vec<int> getSize(){
251 stim::vec<int> size(R[1], R[2], R[3]); 251 stim::vec<int> size(R[1], R[2], R[3]);
252 return size; 252 return size;
253 } 253 }
@@ -282,7 +282,7 @@ class gl_texture : public virtual image_stack&lt;T, F&gt; @@ -282,7 +282,7 @@ class gl_texture : public virtual image_stack&lt;T, F&gt;
282 ///@param file_mask specifies the file(s) to be loaded 282 ///@param file_mask specifies the file(s) to be loaded
283 /// Sets the path and calls the loader on that path. 283 /// Sets the path and calls the loader on that path.
284 void load_images(std::string file_mask){ 284 void load_images(std::string file_mask){
285 - image_stack<T>::load_images(file_mask); //load images 285 + image_stack<T, F>::load_images(file_mask); //load images
286 guess_parameters(); 286 guess_parameters();
287 } 287 }
288 288
@@ -292,13 +292,18 @@ class gl_texture : public virtual image_stack&lt;T, F&gt; @@ -292,13 +292,18 @@ class gl_texture : public virtual image_stack&lt;T, F&gt;
292 return texID; 292 return texID;
293 } 293 }
294 294
295 - 295 +
296 T* getData(){ 296 T* getData(){
297 return ptr; 297 return ptr;
298 } 298 }
299 -  
300 - 299 +
  300 + void setData(T* rts)
  301 + {
  302 +
  303 + }
  304 +
301 }; 305 };
  306 +
302 } 307 }
303 308
304 309
stim/grids/image_stack.h
@@ -17,13 +17,13 @@ class image_stack : public virtual stim::grid&lt;T, 4, F&gt;{ @@ -17,13 +17,13 @@ class image_stack : public virtual stim::grid&lt;T, 4, F&gt;{
17 17
18 protected: 18 protected:
19 //using stim::grid<T, 4>::S; 19 //using stim::grid<T, 4>::S;
20 - using stim::grid<T, 4>::R;  
21 - using stim::grid<T, 4>::ptr;  
22 - using stim::grid<T, 4>::read; 20 + using stim::grid<T, 4, F>::R;
  21 + using stim::grid<T, 4, F>::ptr;
  22 + using stim::grid<T, 4, F>::read;
23 23
24 public: 24 public:
25 //default constructor 25 //default constructor
26 - image_stack() : grid<T, 4>() { 26 + image_stack() : grid<T, 4, F>() {
27 27
28 } 28 }
29 29
@@ -113,7 +113,8 @@ public: @@ -113,7 +113,8 @@ public:
113 /// @param i is the page to be saved 113 /// @param i is the page to be saved
114 void save_image(std::string file_name, unsigned int i){ 114 void save_image(std::string file_name, unsigned int i){
115 stim::image<T> I; //create an image 115 stim::image<T> I; //create an image
116 - I.set_interleaved_rgb(&ptr[ i * R[0] * R[1] * R[2] ], R[1], R[2], R[0]); //retrieve the interlaced data from the image - store it in the grid 116 + I.set_interleaved(&ptr[ i * R[0] * R[1] * R[2] ], R[1], R[2], R[0]); //retrieve the interlaced data from the image - store it in the grid
  117 +// I.set_interleaved_rgb(&ptr[ i * R[0] * R[1] * R[2] ], R[1], R[2], R[0]); //retrieve the interlaced data from the image - store it in the grid
117 I.save(file_name); 118 I.save(file_name);
118 } 119 }
119 120
stim/math/filters/gauss3.h 0 โ†’ 100644
  1 +#ifndef STIM_CUDA_GAUSS3_H
  2 +#define STIM_CUDA_GAUSS3_H
  3 +#include <stim/math/filters/sepconv3.h>
  4 +#include <stim/math/filters/gauss2.h>
  5 +#include <stim/math/constants.h>
  6 +
  7 +namespace stim
  8 +{
  9 + ///Perform a 3D gaussian convolution on an input image.
  10 + ///@param in is a pointer to the input data.
  11 + ///@param dimx is the size of in* in the x direction.
  12 + ///@param dimx is the size of in* in the y direction.
  13 + ///@param dimx is the size of in* in the z direction.
  14 + ///@param stdx is the standard deviation (in pixels) along the x axis.
  15 + ///@param stdy is the standard deviation (in pixels) along the y axis.
  16 + ///@param nstds specifies the number of standard deviations of the Gaussian that will be k ept in the kernel.
  17 + template<typename T, typename K>
  18 + void cpu_gauss3(T* in, K dimx, K dimy, K dimz, K stdx, K stdy, K stdz, size_t nstds = 3)
  19 + {
  20 + //Set up the sizes of the gaussian Kernels.
  21 + size_t kx = stdx * nstds * 2;
  22 + size_t ky = stdy * nstds * 2;
  23 + size_t kz = stdz * nstds * 2;
  24 +
  25 + //Set up the sizes of the new output, which will be kx, ky, kz, smaller than the input.
  26 + size_t X = dimx - kx +1;
  27 + size_t Y = dimy - ky +1;
  28 + size_t Z = dimz - kz +1;
  29 + T* out = (T*) malloc(X*Y*Z* sizeof(T));
  30 +
  31 + ///Set up the memory that will store the gaussians
  32 + K* gaussx = (K*)malloc(kx *sizeof(K));
  33 + K* gaussy = (K*)malloc(ky *sizeof(K));
  34 + K* gaussz = (K*)malloc(kz *sizeof(K));
  35 +
  36 + ///Set up the midpoints of the gaussians.
  37 + K midgaussx = (K) kx/ (K)2;
  38 + K midgaussy = (K) ky/ (K)2;
  39 + K midgaussz = (K) kz/ (K)2;
  40 +
  41 + ///Evaluate the kernels in each cardinal direction.
  42 + for(size_t i = 0; i < kx; i++)
  43 + gaussx[i] = gauss1d((K) i, midgaussx, stdx);
  44 +
  45 + for(size_t i = 0; i < kx; i++)
  46 + gaussy[i] = gauss1d((K) i, midgaussy, stdy);
  47 +
  48 + for(size_t i = 0; i < kx; i++)
  49 + gaussz[i] = gauss1d((K) i, midgaussz, stdz);
  50 +
  51 + cpu_sepconv3(out, in, gaussx, gaussy, gaussz, dimx, dimy, dimz, kx, ky, kz);
  52 +
  53 + }
  54 +}
  55 +#endif
stim/math/filters/sepconv2.h
@@ -86,4 +86,4 @@ namespace stim { @@ -86,4 +86,4 @@ namespace stim {
86 } 86 }
87 } 87 }
88 88
89 -#endif  
90 \ No newline at end of file 89 \ No newline at end of file
  90 +#endif
stim/math/filters/sepconv3.h 0 โ†’ 100644
  1 +#ifndef STIM_CUDA_SEPCONV3_H
  2 +#define STIM_CUDA_SEPCONV3_H
  3 +
  4 +#include <stim/math/filters/conv2.h>
  5 +#include <stim/math/filters/sepconv2.h>
  6 +#ifdef __CUDACC__
  7 + #include <stim/cuda/cudatools.h>
  8 + #include <stim/cuda/sharedmem.cuh>
  9 +#endif
  10 +
  11 +namespace stim
  12 +{
  13 +#ifdef __CUDACC__
  14 + template<typename T, typename K>
  15 + void gpu_sepconv3(T* out, T* in, K* k0, K* k1, K* k2, size_t dimx, size_t dimy, size_t dimz, size_t kx, size_t ky, size_t kz)
  16 +{
  17 +
  18 + size_t X = dimx - kx + 1;
  19 + size_t Y = dimy - ky + 1;
  20 + size_t Z = dimz - kz + 1;
  21 +
  22 + T* temp_out;
  23 + int idx_IN;
  24 + int idx_OUT;
  25 + HANDLE_ERROR(cudaMalloc(&temp_out, X*Y*dimz*sizeof(T)));
  26 +
  27 + for(int i = 0; i < dimz; i++)
  28 + {
  29 + idx_IN = (dimx*dimy)*i-i;
  30 + idx_OUT = (X*Y)*i-i;
  31 + gpu_sepconv2(&temp_out[idx_OUT], &in[idx_IN], k0, k1, dimx, dimy, kx, ky);
  32 + }
  33 +
  34 + cudaDeviceProp p;
  35 + HANDLE_ERROR(cudaGetDeviceProperties(&p, 0));
  36 + size_t tmax = p.maxThreadsPerBlock;
  37 +
  38 + dim3 numThreads(sqrt(tmax), sqrt(tmax));
  39 + dim3 numBlocks(X*Y/numThreads.x +1, dimz/numThreads.y + 1);
  40 + size_t sharedMem = (numThreads.x + kz - 1) * numThreads.y * sizeof(T);
  41 + if(sharedMem > p.sharedMemPerBlock)
  42 + {
  43 + std::cout << "Error in stim::gpu_sepconv3() - insufficient shared memory for this kernel." << std::endl;
  44 + exit(1);
  45 + }
  46 + kernel_conv2 <<< numBlocks, numThreads, sharedMem >>> (out, temp_out, k2, X*Y, dimz, 1, kz);
  47 + HANDLE_ERROR(cudaFree(temp_out));
  48 +
  49 +
  50 +}
  51 +#endif
  52 +
  53 + //Performs a separable convolution of a 3D image. Only valid pixels based on the kernel ar e returned.
  54 + // As a result, the output image will be smaller than the input image by (kx-1, ky-1 , kz-1)
  55 + //@param out is a pointer to the output image
  56 + //@param in is a pointer to the input image
  57 + //@param kx is the x-axis convolution filter
  58 + //@param ky is the y-axis convolution filter
  59 + //@param kz is the z-axis convolution filter
  60 + //@param dimx is the size of the input image along X
  61 + //@param dimy is the size of the input image along Y
  62 + //@param dimz is the size of the input image along Z
  63 + //@param kx is the size of the kernel along X
  64 + //@param ky is the size of the kernel along Y
  65 + //@param kz is the size of the kernel along Z
  66 +
  67 + template <typename T, typename K>
  68 + void cpu_sepconv3(T* out, T* in, K* k0, K* k1, K* k2, size_t dimx, size_t dimy, size_t dimz, size_t kx, size_t ky, size_t kz)
  69 + {
  70 + //Set up the sizes of the new output, which will be kx, ky, kz, smaller than the i nput.
  71 + size_t X = dimx - kx + 1;
  72 + size_t Y = dimy - ky + 1;
  73 + size_t Z = dimz - kz + 1;
  74 +
  75 +#ifdef __CUDACC__
  76 + ///Set up all of the memory on the GPU
  77 + T* gpu_in;
  78 + HANDLE_ERROR(cudaMalloc(&gpu_in, dimx*dimy*dimz*sizeof(T)));
  79 + HANDLE_ERROR(cudaMemcpy(gpu_in, in, dimx*dimy*dimz*sizeof(T),cudaMemcpyHostToDevice));
  80 + K* gpu_kx;
  81 + HANDLE_ERROR(cudaMalloc(&gpu_kx, kx*sizeof(K)));
  82 + HANDLE_ERROR(cudaMemcpy(gpu_kx, k0, kx*sizeof(K),cudaMemcpyHostToDevice));
  83 + K* gpu_ky;
  84 + HANDLE_ERROR(cudaMalloc(&gpu_ky, ky*sizeof(K)));
  85 + HANDLE_ERROR(cudaMemcpy(gpu_ky, k1, ky*sizeof(K),cudaMemcpyHostToDevice));
  86 + K* gpu_kz;
  87 + HANDLE_ERROR(cudaMalloc(&gpu_kz, kz*sizeof(K)));
  88 + HANDLE_ERROR(cudaMemcpy(gpu_kz, k2, kz*sizeof(K),cudaMemcpyHostToDevice));
  89 + T* gpu_out;
  90 + HANDLE_ERROR(cudaMalloc(&gpu_out, X * Y * Z*sizeof(T)));
  91 +
  92 + ///run the kernel
  93 + gpu_sepconv3(gpu_out, gpu_in, gpu_kx, gpu_ky, gpu_kz, dimx, dimy, dimz, kx, ky, kz);
  94 +
  95 + ///Copy the output
  96 + HANDLE_ERROR(cudaMemcpy(out, gpu_out, X*Y*Z*sizeof(T), cudaMemcpyDeviceToHost));
  97 +
  98 + ///Free all the memory used.
  99 + HANDLE_ERROR(cudaFree(gpu_in));
  100 + HANDLE_ERROR(cudaFree(gpu_kx));
  101 + HANDLE_ERROR(cudaFree(gpu_ky));
  102 + HANDLE_ERROR(cudaFree(gpu_kz));
  103 + HANDLE_ERROR(cudaFree(gpu_out));
  104 +#else
  105 + T* temp = (T*) malloc(X * dimy * sizeof(T));
  106 + T* temp3 = (T*) malloc(X * Y * dimz * sizeof(T));
  107 + for(int i = 0; i < dimz; i++)
  108 + {
  109 + idx_IN = (dimx*dimy)*i-i;
  110 + idx_OUT = (X*Y)*i-i;
  111 + cpu_conv2(temp, &in[idx_IN], k0, dimx, dimy, kx, 1)
  112 + cpu_conv2(&temp3[idx_OUT], temp, k1, X, dimy, 1, ky);
  113 + }
  114 + cpu_conv2(out, temp, k2, X*Y, dimz, 1, kz);
  115 + free(temp);
  116 + free(temp3);
  117 +
  118 +#endif
  119 + }
  120 +}
  121 +
  122 +
  123 +#endif
@@ -68,7 +68,7 @@ public: @@ -68,7 +68,7 @@ public:
68 } 68 }
69 69
70 70
71 - /// Convert the vector from cartesian to spherical coordinates (x, y, z -> r, theta, phi where theta = [0, 2*pi]) 71 + /// Convert the vector from cartesian to spherical coordinates (x, y, z -> r, theta, phi where theta = [-PI, PI])
72 CUDA_CALLABLE vec3<T> cart2sph() const{ 72 CUDA_CALLABLE vec3<T> cart2sph() const{
73 vec3<T> sph; 73 vec3<T> sph;
74 sph.ptr[0] = len(); 74 sph.ptr[0] = len();
@@ -236,6 +236,13 @@ public: @@ -236,6 +236,13 @@ public:
236 return result; 236 return result;
237 } 237 }
238 238
  239 + CUDA_CALLABLE bool operator==(vec3<T> rhs) const{
  240 + if(rhs[0] == ptr[0] && rhs[1] == ptr[1] && rhs[2] == ptr[2])
  241 + return true;
  242 + else
  243 + return false;
  244 + }
  245 +
239 //#ifndef __NVCC__ 246 //#ifndef __NVCC__
240 /// Outputs the vector as a string 247 /// Outputs the vector as a string
241 std::string str() const{ 248 std::string str() const{