Commit df0a759f9242e7b3c8c48b051cec1d35a89fda4e

Authored by Pavel Govyadinov
2 parents 8b7d6f9a bb7275b6

Merged Graph into master and fixed merge conflicts

stim/cuda/branch_detection.cuh
... ... @@ -11,7 +11,7 @@ typedef unsigned int uint;
11 11  
12 12  
13 13 std::vector< stim::vec<float> >
14   -find_branch(GLint texbufferID, GLenum texType, unsigned int x, unsigned int y)
  14 +find_branch(GLint texbufferID, GLenum texType, unsigned int x, unsigned int y, int iter = 0)
15 15 {
16 16 float sigma = 2.0;
17 17 unsigned int conn = 7;
... ... @@ -22,7 +22,7 @@ find_branch(GLint texbufferID, GLenum texType, unsigned int x, unsigned int y)
22 22 stringstream name;
23 23  
24 24  
25   - cpuCenters = stim::cuda::get_centers(texbufferID, texType, x, y, sizek, sigma, conn, threshold);
  25 + cpuCenters = stim::cuda::get_centers(texbufferID, texType, x, y, sizek, sigma, conn, threshold, iter);
26 26 cudaDeviceSynchronize();
27 27  
28 28  
... ...
stim/cuda/filter.cuh
... ... @@ -26,6 +26,11 @@ namespace stim
26 26 float* LoG;
27 27 float* res;
28 28 float* centers;
  29 +
  30 +//#ifdef DEBUG
  31 + float* print;
  32 +//#endif
  33 +
29 34 stim::cuda::cuda_texture tx;
30 35  
31 36  
... ... @@ -44,6 +49,13 @@ namespace stim
44 49 HANDLE_ERROR(
45 50 cudaMalloc( (void**) &centers, DIM_Y*DIM_X*sizeof(float))
46 51 );
  52 +
  53 +//#ifdef DEBUG
  54 + HANDLE_ERROR(
  55 + cudaMalloc( (void**) &print, DIM_Y*DIM_X*sizeof(float))
  56 + );
  57 +//#endif
  58 +
47 59 // checkCUDAerrors("Memory Allocation, Result");
48 60 }
49 61  
... ... @@ -58,6 +70,11 @@ namespace stim
58 70 HANDLE_ERROR(
59 71 cudaFree(centers)
60 72 );
  73 +//#ifdef DEBUG
  74 + HANDLE_ERROR(
  75 + cudaFree(print)
  76 + );
  77 +//#endif
61 78 free(LoG);
62 79 }
63 80  
... ... @@ -89,7 +106,7 @@ namespace stim
89 106 //Shared memory would be better.
90 107 __global__
91 108 void
92   - applyFilter(cudaTextureObject_t texIn, unsigned int DIM_X, unsigned int DIM_Y, int kr, int kl, float *res, float* gpuLoG){
  109 + applyFilter(cudaTextureObject_t texIn, unsigned int DIM_X, unsigned int DIM_Y, int kr, int kl, float *res, float* gpuLoG, float* p){
93 110 //R = floor(size/2)
94 111 //THIS IS A NAIVE WAY TO DO IT, and there is a better way)
95 112  
... ... @@ -101,16 +118,15 @@ namespace stim
101 118 // float val = 0;
102 119 float tu = (x-kr+xi)/(float)DIM_X;
103 120 float tv = (y-kr+yi)/(float)DIM_Y;
  121 + int idx = y*DIM_X+x;
104 122 shared[xi][yi] = gpuLoG[yi*kl+xi]*(255.0-(float)tex2D<unsigned char>(texIn, tu, tv));
105 123 __syncthreads();
106   -
107 124  
108 125 //x = max(0,x);
109 126 //x = min(x, width-1);
110 127 //y = max(y, 0);
111 128 //y = min(y, height - 1);
112 129  
113   - int idx = y*DIM_X+x;
114 130 // int k_idx;
115 131 for(unsigned int step = blockDim.x/2; step >= 1; step >>= 1)
116 132 {
... ... @@ -135,11 +151,12 @@ namespace stim
135 151 __syncthreads();
136 152 if(xi == 0 && yi == 0)
137 153 res[idx] = shared[0][0];
  154 +
138 155 }
139 156  
140 157 extern "C"
141 158 float *
142   - get_centers(GLint texbufferID, GLenum texType, int DIM_X, int DIM_Y, int sizeK, float sigma, float conn, float threshold)
  159 + get_centers(GLint texbufferID, GLenum texType, int DIM_X, int DIM_Y, int sizeK, float sigma, float conn, float threshold, int iter = 0)
143 160 {
144 161 tx.SetTextureCoordinates(1);
145 162 tx.SetAddressMode(1, 3);
... ... @@ -153,7 +170,14 @@ namespace stim
153 170 dim3 numBlocks(DIM_X, DIM_Y);
154 171 dim3 threadsPerBlock(sizeK, sizeK);
155 172  
156   - applyFilter <<< numBlocks, threadsPerBlock >>> (tx.getTexture(), DIM_X, DIM_Y, floor(sizeK/2), sizeK, res, gpuLoG);
  173 + applyFilter <<< numBlocks, threadsPerBlock >>> (tx.getTexture(), DIM_X, DIM_Y, floor(sizeK/2), sizeK, res, gpuLoG, print);
  174 +
  175 + #ifdef DEBUG
  176 + stringstream name;
  177 + name.str("");
  178 + name << "Fiber Cylinder " << iter << ".bmp";
  179 + stim::gpu2image<float>(res, name.str(), DIM_X, DIM_Y, 0, 255);
  180 + #endif
157 181  
158 182  
159 183 stim::cuda::gpu_local_max<float>(centers, res, threshold, conn, DIM_X, DIM_Y);
... ...
stim/cuda/testKernel.cuh
... ... @@ -53,11 +53,31 @@
53 53  
54 54 float valIn = tex2D<unsigned char>(texIn, x, y);
55 55 float templa = templ(x, 32)*255.0;
56   - print[idx] = abs(valIn-templa); ///temporary
  56 + //print[idx] = abs(valIn-templa); ///temporary
  57 + print[idx] = abs(valIn);
57 58 //print[idx] = abs(templa); ///temporary
58 59  
59 60 }
60 61  
  62 + ///Find the difference of the given set of samples and the template
  63 + ///using cuda acceleration.
  64 + ///@param stim::cuda::cuda_texture t --stim texture that holds all the references
  65 + /// to the data.
  66 + ///@param float* result --a pointer to the memory that stores the result.
  67 + __global__
  68 + //void get_diff (float *result)
  69 + void get_diff2 (cudaTextureObject_t texIn, float *print, int dx)
  70 + {
  71 + int x = threadIdx.x + blockIdx.x * blockDim.x;
  72 + int y = threadIdx.y + blockIdx.y * blockDim.y;
  73 + int idx = y*dx+x;
  74 + // int idx = y*16+x;
  75 +
  76 + float valIn = tex2D<unsigned char>(texIn, x, y);
  77 + print[idx] = abs(valIn); ///temporary
  78 +
  79 + }
  80 +
61 81 void test(cudaTextureObject_t tObj, int x, int y, std::string nam)
62 82 {
63 83  
... ... @@ -86,3 +106,31 @@
86 106 cleanUP();
87 107 }
88 108  
  109 +
  110 + void test(cudaTextureObject_t tObj, int x, int y, std::string nam, int iter)
  111 + {
  112 +
  113 + //Bind the Texture in GL and allow access to cuda.
  114 +
  115 + //initialize the return arrays.
  116 +
  117 + initArray(x,y);
  118 + dim3 numBlocks(1, y);
  119 + dim3 threadsPerBlock(x, 1);
  120 + int max_threads = stim::maxThreadsPerBlock();
  121 + //dim3 threads(max_threads, 1);
  122 + //dim3 blocks(x / threads.x + 1, y);
  123 + //dim3 numBlocks(2, 2);
  124 + //dim3 threadsPerBlock(8, 108);
  125 +
  126 +
  127 +// get_diff <<< blocks, threads >>> (tx.getTexture(), print);
  128 + get_diff2 <<< numBlocks, threadsPerBlock >>> (tObj, print, x);
  129 +
  130 + cudaDeviceSynchronize();
  131 + stringstream name; //for debugging
  132 + name << nam.c_str();
  133 + stim::gpu2image<float>(print, name.str(),x,y,0,255);
  134 +
  135 + cleanUP();
  136 + }
... ...
stim/envi/envi_header.h
... ... @@ -8,6 +8,7 @@
8 8 #include <vector>
9 9 #include <algorithm>
10 10 #include <stdlib.h>
  11 +#include <cmath>
11 12  
12 13 //information from an ENVI header file
13 14 //A good resource can be found here: http://www.exelisvis.com/docs/enviheaderfiles.html
... ...
stim/gl/gl_spider.h
1   -#ifndef STIM_GL_SPIDER_H
  1 + #ifndef STIM_GL_SPIDER_H
2 2 #define STIM_GL_SPIDER_H
3 3  
4 4 //#include <GL/glew.h>
... ... @@ -27,7 +27,6 @@
27 27 #include <stim/cuda/branch_detection.cuh>
28 28 #include "../../../volume-spider/glnetwork.h"
29 29 #include <stim/visualization/cylinder.h>
30   -#include <stim/cuda/testKernel.cuh>
31 30 #include <iostream>
32 31 #include <fstream>
33 32 #ifdef TIMING
... ... @@ -40,6 +39,9 @@
40 39 #include <ctime>
41 40 #endif
42 41  
  42 +#ifdef DEBUG
  43 + #include <stim/cuda/testKernel.cuh>
  44 +#endif
43 45  
44 46 namespace stim
45 47 {
... ... @@ -138,11 +140,13 @@ class gl_spider // : public virtual gl_texture&lt;T&gt;
138 140 stim::cuda::cuda_texture t_pos; //cuda_texture object used as an interface between OpenGL and cuda for position vectors.
139 141 stim::cuda::cuda_texture t_mag; //cuda_texture object used as an interface between OpenGL and cuda for size vectors.
140 142 stim::cuda::cuda_texture t_len; //cuda_texture object used as an interface between OpenGL and cuda for size vectors.
141   -
  143 +
  144 + int last_fiber; //variable that tracks the last fiber hit during tracing. -1 if no fiber was hit.
  145 +
142 146  
143 147 #ifdef DEBUG
144   - stringstream name;
145 148 int iter;
  149 + stringstream name;
146 150 int iter_pos;
147 151 int iter_dir;
148 152 int iter_siz;
... ... @@ -292,6 +296,7 @@ class gl_spider // : public virtual gl_texture&lt;T&gt;
292 296 DrawLongCylinder(n, l_template, l_square); ///Draw the cylinder.
293 297 stim::cylinder<float> cyl(cL, cM);
294 298 std::vector< stim::vec<float> > result = find_branch(cylinder_texID, GL_TEXTURE_2D, n*l_square, (cL.size()-1)*l_template); ///find all the centers in cuda
  299 +
295 300 stim::vec3<float> size(S[0]*R[0], S[1]*R[1], S[2]*R[2]); ///the borders of the texture.
296 301 float pval; //pvalue associated with the points on the cylinder.
297 302 if(!result.empty()) ///if we have any points
... ... @@ -315,7 +320,8 @@ class gl_spider // : public virtual gl_texture&lt;T&gt;
315 320 }
316 321 stim::vec3<float> v = cyl.surf(pval, result[i][0]); ///find the coordinates of the point at pval on the surface in tissue space.
317 322 stim::vec3<float> di = cyl.p(pval); ///find the coord of v in tissue space projected on the centerline.
318   - float rad = cyl.r(pval)/2; ///find the radius at the pvalue's location
  323 + float rad = cyl.r(pval); ///find the radius at the pvalue's location
  324 + // float rad = cyl.r(pval)/2; ///find the radius at the pvalue's location
319 325 if(
320 326 !(v[0] > size[0] || v[1] > size[1]
321 327 || v[2] > size[2] || v[0] < 0
... ... @@ -372,7 +378,7 @@ class gl_spider // : public virtual gl_texture&lt;T&gt;
372 378 ///Stored in a display list.
373 379 ///uses the default d vector <0,0,1>
374 380 void
375   - genDirectionVectors(float solidAngle = stim::PI/2)
  381 + genDirectionVectors(float solidAngle = 3*stim::PI/4)
376 382 {
377 383  
378 384 //Set up the vectors necessary for Rectangle creation.
... ... @@ -954,7 +960,7 @@ class gl_spider // : public virtual gl_texture&lt;T&gt;
954 960 iter_dir = 0;
955 961 iter_siz = 0;
956 962 #endif
957   - stepsize = 3.0;
  963 + stepsize = 6.0;
958 964 n_pixels = 16.0;
959 965  
960 966 srand(100);
... ... @@ -1316,20 +1322,20 @@ class gl_spider // : public virtual gl_texture&lt;T&gt;
1316 1322 void
1317 1323 saveNetwork(std::string name)
1318 1324 {
1319   -/* stim::glObj<float> sk;
  1325 + stim::glObj<float> sk1;
1320 1326 for(int i = 0; i < nt.sizeE(); i++)
1321 1327 {
1322   - std::vector<stim::vec< float > > cm = nt.getEdgeCenterLineMag(i);
  1328 + std::vector<float> cm = nt.getEdgeCenterLineMag(i);
1323 1329 std::vector<stim::vec3< float > > ce = nt.getEdgeCenterLine(i);
1324   - sk.Begin(stim::OBJ_LINE);
  1330 + sk1.Begin(stim::OBJ_LINE);
1325 1331 for(int j = 0; j < ce.size(); j++)
1326 1332 {
1327   - sk.TexCoord(cm[j][0]);
1328   - sk.Vertex(ce[j][0], ce[j][1], ce[j][2]);
  1333 + sk1.TexCoord(cm[j]);
  1334 + sk1.Vertex(ce[j][0], ce[j][1], ce[j][2]);
1329 1335 }
1330   - sk.End();
  1336 + sk1.End();
1331 1337 }
1332   -*/ sk.save(name);
  1338 + sk1.save(name);
1333 1339 }
1334 1340  
1335 1341 ///Depreciated, but might be reused later()
... ... @@ -1377,20 +1383,31 @@ class gl_spider // : public virtual gl_texture&lt;T&gt;
1377 1383 Step()
1378 1384 {
1379 1385 #ifdef DEBUG
1380   - std::cerr << "Took a step" << std::endl;
  1386 + std::cerr << "Took a step";
1381 1387 #endif
1382 1388 Bind(direction_texID, direction_buffID, numSamples, n_pixels);
1383 1389 CHECK_OPENGL_ERROR
1384 1390 findOptimalDirection();
1385 1391 Unbind();
  1392 + #ifdef DEBUG
  1393 + std::cerr << " " << current_cost;
  1394 + #endif
1386 1395 Bind(position_texID, position_buffID, numSamplesPos, n_pixels);
1387 1396 findOptimalPosition();
1388 1397 Unbind();
  1398 + #ifdef DEBUG
  1399 + std::cerr << " " << current_cost;
  1400 + #endif
1389 1401 Bind(radius_texID, radius_buffID, numSamplesMag, n_pixels);
1390 1402 findOptimalRadius();
1391 1403 Unbind();
  1404 + #ifdef DEBUG
  1405 + std::cerr << " " << current_cost;
  1406 + #endif
1392 1407 CHECK_OPENGL_ERROR
1393   -
  1408 + #ifdef DEBUG
  1409 + std::cerr << std::endl;
  1410 + #endif
1394 1411 return current_cost;
1395 1412 }
1396 1413  
... ... @@ -1517,9 +1534,6 @@ class gl_spider // : public virtual gl_texture&lt;T&gt;
1517 1534 while(!Empty())
1518 1535 {
1519 1536 //clear the currently traced line and start a new one.
1520   - cL.clear();
1521   - cM.clear();
1522   - cD.clear();
1523 1537 curSeed = seeds.top();
1524 1538 curSeedVec = seedsvecs.top();
1525 1539 curSeedMag = seedsmags.top();
... ... @@ -1539,9 +1553,9 @@ class gl_spider // : public virtual gl_texture&lt;T&gt;
1539 1553 // findOptimalDirection();
1540 1554 // Unbind();
1541 1555 //THIS IS EXPERIMENTAL
1542   - Bind(radius_texID, radius_buffID, numSamplesMag, n_pixels);
1543   - findOptimalRadius();
1544   - Unbind();
  1556 + // Bind(radius_texID, radius_buffID, numSamplesMag, n_pixels);
  1557 + // findOptimalRadius();
  1558 + // Unbind();
1545 1559 //THIS IS EXPERIMENTAL
1546 1560  
1547 1561 // cL.push_back(curSeed);
... ... @@ -1593,17 +1607,17 @@ class gl_spider // : public virtual gl_texture&lt;T&gt;
1593 1607 ds[0], ds[1], ds[2],
1594 1608 ups[0], ups[1], ups[2]);
1595 1609 ///Set the look at distance
1596   - sk.Render(); ///Render the network
1597   -// nt.Render();
  1610 +// sk.Render(); ///Render the network
  1611 + nt.Render();
1598 1612  
1599 1613 CHECK_OPENGL_ERROR
1600 1614  
1601 1615  
1602   - glLoadName((int) sk.numL()); ///Load all the names
1603   -// glLoadName(nt.sizeE());
  1616 +// glLoadName((int) sk.numL()); ///Load all the names
  1617 + glLoadName(nt.sizeE());
1604 1618  
1605   - sk.RenderLine(cL); ///Render the current line.
1606   -// nt.RenderLine(cL);
  1619 +// sk.RenderLine(cL); ///Render the current line.
  1620 + nt.RenderLine(cL);
1607 1621  
1608 1622 // glPopName();
1609 1623 glFlush(); ///Flush the buffer
... ... @@ -1654,55 +1668,51 @@ class gl_spider // : public virtual gl_texture&lt;T&gt;
1654 1668 cM.clear();
1655 1669 }
1656 1670  
1657   -/*
  1671 +
1658 1672 void
1659   - addToNetwork(pair<stim::fiber<float>, int> in, stim::vec3<float> spos,
1660   - stim::vec<float> smag, stim::vec3<float> sdir)
  1673 + addToNetwork(std::vector<stim::vec3<float> > L, std::vector<float > M, stim::vec3<float> spos, stim::vec3<float> sdir, float smag)
1661 1674 {
1662   - #ifdef TIMING
1663   - double s = std::clock();
1664   - #endif
1665   -
1666   - std::vector<stim::vec3<float> > ce = in.first.centerline();
1667   - std::vector<stim::vec<float> > cm = in.first.centerlinemag();
1668 1675 //if the fiber is longer than 2 steps (the number it takes to diverge)
1669   - if(ce.size() > 3)
  1676 + if(L.size() > 3)
1670 1677 {
1671 1678 //if we did not hit a fiber
1672   - if(in.second == -1)
  1679 + if(last_fiber == -1)
1673 1680 {
1674   - spos[0] = spos[0]-sdir[0]*smag[0]/2.;
1675   - spos[1] = spos[1]-sdir[1]*smag[0]/2.;
1676   - spos[2] = spos[2]-sdir[2]*smag[0]/2.;
1677   - int h = selectObject(spos, -sdir, smag[0]);
  1681 + spos[0] = spos[0]-sdir[0]*smag;
  1682 + spos[1] = spos[1]-sdir[1]*smag;
  1683 + spos[2] = spos[2]-sdir[2]*smag;
  1684 + int h = selectObject(spos, -sdir, smag);
1678 1685 //did we start with a fiber?
1679 1686 if(h != -1 && h < nt.sizeE())
1680   - nt.addEdge(ce, cm, h, -1);
  1687 + nt.addEdge(L, M, h, -1);
1681 1688 else
1682   - nt.addEdge(ce, cm, -1, -1);
  1689 + nt.addEdge(L, M, -1, -1);
1683 1690 }
1684 1691 //if we hit a fiber?
1685   - else if(in.second != -1)
  1692 + else if(last_fiber != -1)
1686 1693 {
1687   - nt.addEdge(ce,cm,-1, in.second);
1688   - spos[0] = spos[0]-sdir[0]*smag[0]/2.;
1689   - spos[1] = spos[1]-sdir[1]*smag[0]/2.;
1690   - spos[2] = spos[2]-sdir[2]*smag[0]/2.;
1691   - int h = selectObject(spos, -sdir, smag[0]);
  1694 + nt.addEdge(L, M, -1, last_fiber);
  1695 + spos[0] = spos[0]-sdir[0]*smag;
  1696 + spos[1] = spos[1]-sdir[1]*smag;
  1697 + spos[2] = spos[2]-sdir[2]*smag;
  1698 + int h = selectObject(spos, -sdir, smag);
1692 1699 //did start with a fiber?
1693 1700 if(h != -1 && h < nt.sizeE()){
1694 1701 // std::cout << "got here double" << smag.str() << std::endl;
1695   - nt.addEdge(ce,cm, h, in.second);
1696   - } else { nt.addEdge(ce,cm, -1, -1);}
  1702 + nt.addEdge(L, M, h, last_fiber);
  1703 + }
  1704 + else
  1705 + {
  1706 + nt.addEdge(L, M, -1, -1);
  1707 + }
1697 1708 }
1698 1709 }
1699 1710  
1700   - #ifdef TIMING
1701   - double nt = (std::clock() - s) / (double) CLOCKS_PER_SEC;
1702   - network_time += nt * 1000.0;
  1711 + #ifdef DEBUG
  1712 + iter++;
1703 1713 #endif
1704 1714 }
1705   -*/
  1715 +/*
1706 1716 void
1707 1717 addToNetwork(std::vector<stim::vec3<float> > L, std::vector<float > M)
1708 1718 {
... ... @@ -1722,7 +1732,7 @@ class gl_spider // : public virtual gl_texture&lt;T&gt;
1722 1732 #endif
1723 1733 }
1724 1734 }
1725   -
  1735 +*/
1726 1736  
1727 1737 void
1728 1738 printSizes()
... ... @@ -1735,22 +1745,31 @@ class gl_spider // : public virtual gl_texture&lt;T&gt;
1735 1745 traceLine(stim::vec3<float> pos, float mag, int min_cost)
1736 1746 {
1737 1747 //starting (seed) position and magnitude.
  1748 + last_fiber = -1;
  1749 + cL.clear();
  1750 + cM.clear();
  1751 + cD.clear();
  1752 +
1738 1753 stim::vec3<float> spos = getPosition();
  1754 + stim::vec3<float> sdir = getDirection();
1739 1755 float smag = getMagnitude();
1740   - stim::vec3<float> sdir = getDirection();
1741 1756  
1742   -// Bind();
1743   -// sk.Begin(stim::OBJ_LINE);
  1757 + setPosition(pos);
  1758 + setMagnitude(mag);
1744 1759  
  1760 + cL.push_back(p);
  1761 + cD.push_back(d);
  1762 + cM.push_back(m);
  1763 +// stim::vec3<float> spos = getPosition();
  1764 +// float smag = getMagnitude();
  1765 +// stim::vec3<float> sdir = getDirection();
1745 1766  
1746   - sk.createFromSelf(GL_SELECT);
1747   -// nt.createFromSelf(GL_SELECT);
  1767 +// Bind();
  1768 +// sk.Begin(stim::OBJ_LINE);
1748 1769  
1749   - cL.push_back(pos);
1750   - cM.push_back(mag);
1751 1770  
1752   -// setPosition(pos);
1753   -// setMagnitude(mag);
  1771 + //sk.createFromSelf(GL_SELECT);
  1772 + nt.createFromSelf(GL_SELECT);
1754 1773 int h;
1755 1774 bool started = false;
1756 1775 bool running = true;
... ... @@ -1761,7 +1780,7 @@ class gl_spider // : public virtual gl_texture&lt;T&gt;
1761 1780 if (cost > min_cost){
1762 1781 running = false;
1763 1782 branchDetection2();
1764   - addToNetwork(cL, cM);
  1783 + addToNetwork(cL, cM, spos, sdir, smag);
1765 1784 #ifdef DEBUG
1766 1785 std::cerr << "the cost of " << cost << " > " << min_cost << std::endl;
1767 1786 #endif
... ... @@ -1769,13 +1788,14 @@ class gl_spider // : public virtual gl_texture&lt;T&gt;
1769 1788 } else {
1770 1789 //Have we found the edge of the map?
1771 1790 pos = getPosition();
1772   - if(pos[0] > size[0] || pos[1] > size[1]
1773   - || pos[2] > size[2] || pos[0] < 0
1774   - || pos[1] < 0 || pos[2] < 0)
  1791 + if(p[0] > size[0] || p[1] > size[1]
  1792 + || p[2] > size[2] || p[0] < 0
  1793 + || p[1] < 0 || p[2] < 0)
1775 1794 {
1776 1795 running = false;
1777 1796 branchDetection2();
1778   - addToNetwork(cL, cM);
  1797 + // addToNetwork(cL, cM);
  1798 + addToNetwork(cL, cM, spos, sdir, smag);
1779 1799 #ifdef DEBUG
1780 1800 std::cerr << "I hit and edge" << std::endl;
1781 1801 #endif
... ... @@ -1790,10 +1810,11 @@ class gl_spider // : public virtual gl_texture&lt;T&gt;
1790 1810 }
1791 1811 //Has the template size gotten unreasonable?
1792 1812 mag = getMagnitude();
1793   - if(mag > 75 || mag < 1){
  1813 + if(m > 75 || m < 1){
1794 1814 running = false;
1795 1815 branchDetection2();
1796   - addToNetwork(cL, cM);
  1816 + // addToNetwork(cL, cM);
  1817 + addToNetwork(cL, cM, spos, sdir, smag);
1797 1818 #ifdef DEBUG
1798 1819 std::cerr << "The templates are too big" << std::endl;
1799 1820 #endif
... ... @@ -1807,13 +1828,16 @@ class gl_spider // : public virtual gl_texture&lt;T&gt;
1807 1828 #ifdef DEBUG
1808 1829 std::cerr << "I hit the fiber " << h << std::endl;
1809 1830 #endif
  1831 + last_fiber = h;
1810 1832 running = false;
1811 1833 branchDetection2();
1812   - addToNetwork(cL, cM);
  1834 + // addToNetwork(cL, cM);
  1835 + addToNetwork(cL, cM, spos, sdir, smag);
1813 1836 break;
1814 1837 }
1815 1838 else {
1816   - cL.push_back(stim::vec3<float>(p[0], p[1],p[2]));
  1839 + cL.push_back(p);
  1840 + cD.push_back(d);
1817 1841 cM.push_back(m);
1818 1842 // Unbind();
1819 1843 CHECK_OPENGL_ERROR
... ...
stim/gl/gl_texture.h
... ... @@ -30,9 +30,9 @@ class gl_texture : public virtual image_stack&lt;T, F&gt;
30 30 GLenum cpu_type;
31 31 GLenum gpu_type;
32 32 GLenum format; //format for the texture (GL_RGBA, GL_LUMINANCE, etc.)
33   - using image_stack<T>::R;
  33 + using image_stack<T,F>::R;
34 34 //using image_stack<T>::S;
35   - using image_stack<T>::ptr;
  35 + using image_stack<T,F>::ptr;
36 36  
37 37 /// Sets the internal texture_type, based on the data dimensions
38 38 void setTextureType(){
... ... @@ -247,7 +247,7 @@ class gl_texture : public virtual image_stack&lt;T, F&gt;
247 247 }
248 248  
249 249 ///returns the dimentions of the data in the x, y, z directions.
250   - vec<int> getSize(){
  250 + stim::vec<int> getSize(){
251 251 stim::vec<int> size(R[1], R[2], R[3]);
252 252 return size;
253 253 }
... ... @@ -282,7 +282,7 @@ class gl_texture : public virtual image_stack&lt;T, F&gt;
282 282 ///@param file_mask specifies the file(s) to be loaded
283 283 /// Sets the path and calls the loader on that path.
284 284 void load_images(std::string file_mask){
285   - image_stack<T>::load_images(file_mask); //load images
  285 + image_stack<T, F>::load_images(file_mask); //load images
286 286 guess_parameters();
287 287 }
288 288  
... ... @@ -292,13 +292,18 @@ class gl_texture : public virtual image_stack&lt;T, F&gt;
292 292 return texID;
293 293 }
294 294  
295   -
  295 +
296 296 T* getData(){
297 297 return ptr;
298 298 }
299   -
300   -
  299 +
  300 + void setData(T* rts)
  301 + {
  302 +
  303 + }
  304 +
301 305 };
  306 +
302 307 }
303 308  
304 309  
... ...
stim/grids/image_stack.h
... ... @@ -17,13 +17,13 @@ class image_stack : public virtual stim::grid&lt;T, 4, F&gt;{
17 17  
18 18 protected:
19 19 //using stim::grid<T, 4>::S;
20   - using stim::grid<T, 4>::R;
21   - using stim::grid<T, 4>::ptr;
22   - using stim::grid<T, 4>::read;
  20 + using stim::grid<T, 4, F>::R;
  21 + using stim::grid<T, 4, F>::ptr;
  22 + using stim::grid<T, 4, F>::read;
23 23  
24 24 public:
25 25 //default constructor
26   - image_stack() : grid<T, 4>() {
  26 + image_stack() : grid<T, 4, F>() {
27 27  
28 28 }
29 29  
... ... @@ -113,7 +113,8 @@ public:
113 113 /// @param i is the page to be saved
114 114 void save_image(std::string file_name, unsigned int i){
115 115 stim::image<T> I; //create an image
116   - I.set_interleaved_rgb(&ptr[ i * R[0] * R[1] * R[2] ], R[1], R[2], R[0]); //retrieve the interlaced data from the image - store it in the grid
  116 + I.set_interleaved(&ptr[ i * R[0] * R[1] * R[2] ], R[1], R[2], R[0]); //retrieve the interlaced data from the image - store it in the grid
  117 +// I.set_interleaved_rgb(&ptr[ i * R[0] * R[1] * R[2] ], R[1], R[2], R[0]); //retrieve the interlaced data from the image - store it in the grid
117 118 I.save(file_name);
118 119 }
119 120  
... ...
stim/math/filters/gauss3.h 0 → 100644
  1 +#ifndef STIM_CUDA_GAUSS3_H
  2 +#define STIM_CUDA_GAUSS3_H
  3 +#include <stim/math/filters/sepconv3.h>
  4 +#include <stim/math/filters/gauss2.h>
  5 +#include <stim/math/constants.h>
  6 +
  7 +namespace stim
  8 +{
  9 + ///Perform a 3D gaussian convolution on an input image.
  10 + ///@param in is a pointer to the input data.
  11 + ///@param dimx is the size of in* in the x direction.
  12 + ///@param dimx is the size of in* in the y direction.
  13 + ///@param dimx is the size of in* in the z direction.
  14 + ///@param stdx is the standard deviation (in pixels) along the x axis.
  15 + ///@param stdy is the standard deviation (in pixels) along the y axis.
  16 + ///@param nstds specifies the number of standard deviations of the Gaussian that will be k ept in the kernel.
  17 + template<typename T, typename K>
  18 + void cpu_gauss3(T* in, K dimx, K dimy, K dimz, K stdx, K stdy, K stdz, size_t nstds = 3)
  19 + {
  20 + //Set up the sizes of the gaussian Kernels.
  21 + size_t kx = stdx * nstds * 2;
  22 + size_t ky = stdy * nstds * 2;
  23 + size_t kz = stdz * nstds * 2;
  24 +
  25 + //Set up the sizes of the new output, which will be kx, ky, kz, smaller than the input.
  26 + size_t X = dimx - kx +1;
  27 + size_t Y = dimy - ky +1;
  28 + size_t Z = dimz - kz +1;
  29 + T* out = (T*) malloc(X*Y*Z* sizeof(T));
  30 +
  31 + ///Set up the memory that will store the gaussians
  32 + K* gaussx = (K*)malloc(kx *sizeof(K));
  33 + K* gaussy = (K*)malloc(ky *sizeof(K));
  34 + K* gaussz = (K*)malloc(kz *sizeof(K));
  35 +
  36 + ///Set up the midpoints of the gaussians.
  37 + K midgaussx = (K) kx/ (K)2;
  38 + K midgaussy = (K) ky/ (K)2;
  39 + K midgaussz = (K) kz/ (K)2;
  40 +
  41 + ///Evaluate the kernels in each cardinal direction.
  42 + for(size_t i = 0; i < kx; i++)
  43 + gaussx[i] = gauss1d((K) i, midgaussx, stdx);
  44 +
  45 + for(size_t i = 0; i < kx; i++)
  46 + gaussy[i] = gauss1d((K) i, midgaussy, stdy);
  47 +
  48 + for(size_t i = 0; i < kx; i++)
  49 + gaussz[i] = gauss1d((K) i, midgaussz, stdz);
  50 +
  51 + cpu_sepconv3(out, in, gaussx, gaussy, gaussz, dimx, dimy, dimz, kx, ky, kz);
  52 +
  53 + }
  54 +}
  55 +#endif
... ...
stim/math/filters/sepconv2.h
... ... @@ -86,4 +86,4 @@ namespace stim {
86 86 }
87 87 }
88 88  
89   -#endif
90 89 \ No newline at end of file
  90 +#endif
... ...
stim/math/filters/sepconv3.h 0 → 100644
  1 +#ifndef STIM_CUDA_SEPCONV3_H
  2 +#define STIM_CUDA_SEPCONV3_H
  3 +
  4 +#include <stim/math/filters/conv2.h>
  5 +#include <stim/math/filters/sepconv2.h>
  6 +#ifdef __CUDACC__
  7 + #include <stim/cuda/cudatools.h>
  8 + #include <stim/cuda/sharedmem.cuh>
  9 +#endif
  10 +
  11 +namespace stim
  12 +{
  13 +#ifdef __CUDACC__
  14 + template<typename T, typename K>
  15 + void gpu_sepconv3(T* out, T* in, K* k0, K* k1, K* k2, size_t dimx, size_t dimy, size_t dimz, size_t kx, size_t ky, size_t kz)
  16 +{
  17 +
  18 + size_t X = dimx - kx + 1;
  19 + size_t Y = dimy - ky + 1;
  20 + size_t Z = dimz - kz + 1;
  21 +
  22 + T* temp_out;
  23 + int idx_IN;
  24 + int idx_OUT;
  25 + HANDLE_ERROR(cudaMalloc(&temp_out, X*Y*dimz*sizeof(T)));
  26 +
  27 + for(int i = 0; i < dimz; i++)
  28 + {
  29 + idx_IN = (dimx*dimy)*i-i;
  30 + idx_OUT = (X*Y)*i-i;
  31 + gpu_sepconv2(&temp_out[idx_OUT], &in[idx_IN], k0, k1, dimx, dimy, kx, ky);
  32 + }
  33 +
  34 + cudaDeviceProp p;
  35 + HANDLE_ERROR(cudaGetDeviceProperties(&p, 0));
  36 + size_t tmax = p.maxThreadsPerBlock;
  37 +
  38 + dim3 numThreads(sqrt(tmax), sqrt(tmax));
  39 + dim3 numBlocks(X*Y/numThreads.x +1, dimz/numThreads.y + 1);
  40 + size_t sharedMem = (numThreads.x + kz - 1) * numThreads.y * sizeof(T);
  41 + if(sharedMem > p.sharedMemPerBlock)
  42 + {
  43 + std::cout << "Error in stim::gpu_sepconv3() - insufficient shared memory for this kernel." << std::endl;
  44 + exit(1);
  45 + }
  46 + kernel_conv2 <<< numBlocks, numThreads, sharedMem >>> (out, temp_out, k2, X*Y, dimz, 1, kz);
  47 + HANDLE_ERROR(cudaFree(temp_out));
  48 +
  49 +
  50 +}
  51 +#endif
  52 +
  53 + //Performs a separable convolution of a 3D image. Only valid pixels based on the kernel ar e returned.
  54 + // As a result, the output image will be smaller than the input image by (kx-1, ky-1 , kz-1)
  55 + //@param out is a pointer to the output image
  56 + //@param in is a pointer to the input image
  57 + //@param kx is the x-axis convolution filter
  58 + //@param ky is the y-axis convolution filter
  59 + //@param kz is the z-axis convolution filter
  60 + //@param dimx is the size of the input image along X
  61 + //@param dimy is the size of the input image along Y
  62 + //@param dimz is the size of the input image along Z
  63 + //@param kx is the size of the kernel along X
  64 + //@param ky is the size of the kernel along Y
  65 + //@param kz is the size of the kernel along Z
  66 +
  67 + template <typename T, typename K>
  68 + void cpu_sepconv3(T* out, T* in, K* k0, K* k1, K* k2, size_t dimx, size_t dimy, size_t dimz, size_t kx, size_t ky, size_t kz)
  69 + {
  70 + //Set up the sizes of the new output, which will be kx, ky, kz, smaller than the i nput.
  71 + size_t X = dimx - kx + 1;
  72 + size_t Y = dimy - ky + 1;
  73 + size_t Z = dimz - kz + 1;
  74 +
  75 +#ifdef __CUDACC__
  76 + ///Set up all of the memory on the GPU
  77 + T* gpu_in;
  78 + HANDLE_ERROR(cudaMalloc(&gpu_in, dimx*dimy*dimz*sizeof(T)));
  79 + HANDLE_ERROR(cudaMemcpy(gpu_in, in, dimx*dimy*dimz*sizeof(T),cudaMemcpyHostToDevice));
  80 + K* gpu_kx;
  81 + HANDLE_ERROR(cudaMalloc(&gpu_kx, kx*sizeof(K)));
  82 + HANDLE_ERROR(cudaMemcpy(gpu_kx, k0, kx*sizeof(K),cudaMemcpyHostToDevice));
  83 + K* gpu_ky;
  84 + HANDLE_ERROR(cudaMalloc(&gpu_ky, ky*sizeof(K)));
  85 + HANDLE_ERROR(cudaMemcpy(gpu_ky, k1, ky*sizeof(K),cudaMemcpyHostToDevice));
  86 + K* gpu_kz;
  87 + HANDLE_ERROR(cudaMalloc(&gpu_kz, kz*sizeof(K)));
  88 + HANDLE_ERROR(cudaMemcpy(gpu_kz, k2, kz*sizeof(K),cudaMemcpyHostToDevice));
  89 + T* gpu_out;
  90 + HANDLE_ERROR(cudaMalloc(&gpu_out, X * Y * Z*sizeof(T)));
  91 +
  92 + ///run the kernel
  93 + gpu_sepconv3(gpu_out, gpu_in, gpu_kx, gpu_ky, gpu_kz, dimx, dimy, dimz, kx, ky, kz);
  94 +
  95 + ///Copy the output
  96 + HANDLE_ERROR(cudaMemcpy(out, gpu_out, X*Y*Z*sizeof(T), cudaMemcpyDeviceToHost));
  97 +
  98 + ///Free all the memory used.
  99 + HANDLE_ERROR(cudaFree(gpu_in));
  100 + HANDLE_ERROR(cudaFree(gpu_kx));
  101 + HANDLE_ERROR(cudaFree(gpu_ky));
  102 + HANDLE_ERROR(cudaFree(gpu_kz));
  103 + HANDLE_ERROR(cudaFree(gpu_out));
  104 +#else
  105 + T* temp = (T*) malloc(X * dimy * sizeof(T));
  106 + T* temp3 = (T*) malloc(X * Y * dimz * sizeof(T));
  107 + for(int i = 0; i < dimz; i++)
  108 + {
  109 + idx_IN = (dimx*dimy)*i-i;
  110 + idx_OUT = (X*Y)*i-i;
  111 + cpu_conv2(temp, &in[idx_IN], k0, dimx, dimy, kx, 1)
  112 + cpu_conv2(&temp3[idx_OUT], temp, k1, X, dimy, 1, ky);
  113 + }
  114 + cpu_conv2(out, temp, k2, X*Y, dimz, 1, kz);
  115 + free(temp);
  116 + free(temp3);
  117 +
  118 +#endif
  119 + }
  120 +}
  121 +
  122 +
  123 +#endif
... ...
stim/math/vec3.h
... ... @@ -68,7 +68,7 @@ public:
68 68 }
69 69  
70 70  
71   - /// Convert the vector from cartesian to spherical coordinates (x, y, z -> r, theta, phi where theta = [0, 2*pi])
  71 + /// Convert the vector from cartesian to spherical coordinates (x, y, z -> r, theta, phi where theta = [-PI, PI])
72 72 CUDA_CALLABLE vec3<T> cart2sph() const{
73 73 vec3<T> sph;
74 74 sph.ptr[0] = len();
... ... @@ -236,6 +236,13 @@ public:
236 236 return result;
237 237 }
238 238  
  239 + CUDA_CALLABLE bool operator==(vec3<T> rhs) const{
  240 + if(rhs[0] == ptr[0] && rhs[1] == ptr[1] && rhs[2] == ptr[2])
  241 + return true;
  242 + else
  243 + return false;
  244 + }
  245 +
239 246 //#ifndef __NVCC__
240 247 /// Outputs the vector as a string
241 248 std::string str() const{
... ...