Commit 88088186b35fbdc00bb2b844c351a647b922ea6c

Authored by David Mayerich
2 parents 5bccf89d bf23ee36

Merge branch 'Glnetwork'

stim/cuda/arraymath/array_cart2polar.cuh
... ... @@ -4,7 +4,7 @@
4 4 namespace stim{
5 5 namespace cuda{
6 6 template<typename T>
7   - __global__ void cuda_cart2polar(T* a, int x, int y){
  7 + __global__ void cuda_cart2polar(T* a, int x, int y, float rotation){
8 8  
9 9  
10 10 // calculate the 2D coordinates for this current thread.
... ... @@ -20,21 +20,21 @@ namespace stim{
20 20 float yl = a[i * 2 + 1];
21 21 float theta = atan2( yl, xl ) ;
22 22 float r = sqrt(xl * xl + yl * yl);
23   - a[i * 2 + 0] = theta;
  23 + a[i * 2 + 0] = theta + rotation;
24 24 a[i * 2 + 1] = r;
25 25  
26 26 }
27 27  
28 28  
29 29 template<typename T>
30   - void gpu_cart2polar(T* gpuGrad, unsigned int x, unsigned int y){
  30 + void gpu_cart2polar(T* gpuGrad, unsigned int x, unsigned int y, float rotation = 0){
31 31  
32 32 unsigned int max_threads = stim::maxThreadsPerBlock();
33 33 dim3 threads(max_threads, 1);
34 34 dim3 blocks(x/threads.x + (x %threads.x == 0 ? 0:1) , y);
35 35  
36 36 //call the kernel to do the multiplication
37   - cuda_cart2polar <<< blocks, threads >>>(gpuGrad, x, y);
  37 + cuda_cart2polar <<< blocks, threads >>>(gpuGrad, x, y, rotation);
38 38  
39 39 }
40 40  
... ... @@ -67,4 +67,4 @@ namespace stim{
67 67 }
68 68 }
69 69  
70   -#endif
71 70 \ No newline at end of file
  71 +#endif
... ...
stim/cuda/branch_detection.cuh 0 → 100644
  1 +#include <iostream>
  2 +#include <fstream>
  3 +#include <cuda_runtime.h>
  4 +#include <stim/math/vector.h>
  5 +//#include <math.h>
  6 +#include <stim/visualization/colormap.h>
  7 +#include <stim/cuda/cuda_texture.cuh>
  8 +#include <stim/cuda/templates/gradient.cuh>
  9 +#include <stim/cuda/templates/gaussian_blur.cuh>
  10 +#include <stim/cuda/arraymath.cuh>
  11 +#include <stim/cuda/ivote.cuh>
  12 +#include <stim/cuda/testKernel.cuh>
  13 +typedef unsigned int uint;
  14 +typedef unsigned int uchar;
  15 +
  16 +stim::cuda::cuda_texture t;
  17 +float* gpuTable;
  18 +float* gpuGrad;
  19 +float* gpuVote;
  20 +float* gpuI;
  21 +float* gpuCenters;
  22 +
  23 +void atan_2d(float* cpuTable, unsigned int rmax)
  24 +{
  25 + //initialize the width and height of the window which atan2 are computed in.
  26 + int xsize = 2*rmax +1;
  27 + int ysize = 2*rmax +1;
  28 +
  29 + // assign the center coordinates of the atan2 window to yi and xi
  30 + int yi = rmax;
  31 + int xi = rmax;
  32 +
  33 +
  34 + for (int xt = 0; xt < xsize; xt++){
  35 +
  36 + for(int yt = 0; yt < ysize; yt++){
  37 +
  38 + //convert the current 2D coordinates to 1D
  39 + int id = yt * xsize + xt;
  40 + // calculate the distance between the pixel and the center of the atan2 window
  41 + float xd = xi - xt;
  42 + float yd = yi - yt;
  43 +
  44 + // calculate the angle between the pixel and the center of the atan2 window and store the result.
  45 + float atan_2d_vote = atan2(yd, xd);
  46 + cpuTable[id] = atan_2d_vote;
  47 + }
  48 + }
  49 +
  50 +}
  51 +
  52 +void initCuda(unsigned int bytes_table, unsigned int bytes_ds)
  53 +{
  54 + HANDLE_ERROR(
  55 + cudaMalloc((void**) &gpuTable, bytes_table)
  56 + );
  57 + HANDLE_ERROR(
  58 + cudaMalloc((void**) &gpuI, bytes_ds)
  59 + );
  60 + HANDLE_ERROR(
  61 + cudaMalloc((void**) &gpuGrad, bytes_ds*2)
  62 + );
  63 + HANDLE_ERROR(
  64 + cudaMalloc((void**) &gpuVote, bytes_ds)
  65 + );
  66 + HANDLE_ERROR(
  67 + cudaMalloc((void**) &gpuCenters, bytes_ds)
  68 + );
  69 +}
  70 +
  71 +void cleanCuda()
  72 +{
  73 + HANDLE_ERROR(
  74 + cudaFree(gpuTable)
  75 + );
  76 + HANDLE_ERROR(
  77 + cudaFree(gpuGrad)
  78 + );
  79 + HANDLE_ERROR(
  80 + cudaFree(gpuVote)
  81 + );
  82 + HANDLE_ERROR(
  83 + cudaFree(gpuCenters)
  84 + );
  85 + HANDLE_ERROR(
  86 + cudaFree(gpuI)
  87 + );
  88 +}
  89 +
  90 +std::vector< stim::vec<float> >
  91 +find_branch(GLint texbufferID, GLenum texType, unsigned int x, unsigned int y)
  92 +{
  93 + float phi = 15.1*M_PI/180;
  94 + int iter = 5;
  95 + float dphi = phi/iter;
  96 + float rmax = 10;
  97 + float sigma = 4;
  98 + unsigned int pixels = x * y;
  99 + unsigned int bytes = sizeof(float) * pixels;
  100 + unsigned int bytes_table = sizeof(float) * (2*rmax + 1) * (2*rmax + 1);
  101 + unsigned int x_ds = (x + (x % 1 == 0 ? 0:1));
  102 + unsigned int y_ds = (y + (x % 1 == 0 ? 0:1));
  103 + unsigned int bytes_ds = sizeof(float) * x_ds * y_ds;
  104 + unsigned int conn = 5;
  105 + float final_t = 200.0;
  106 + float* cpuTable = (float*) malloc(bytes_table);
  107 + float* cpuCenters = (float*) malloc(bytes_ds);
  108 +
  109 + stringstream name;
  110 +
  111 +
  112 +
  113 +
  114 + std::vector<stim::vec<float> > output;
  115 + initCuda(bytes_table, bytes_ds);
  116 +
  117 + atan_2d(cpuTable, rmax);
  118 + cudaMemcpy(gpuTable, cpuTable, bytes_table, cudaMemcpyHostToDevice);
  119 +
  120 +
  121 + t.MapCudaTexture(texbufferID, texType);
  122 + cudaDeviceSynchronize();
  123 + stim::cuda::tex_gaussian_blur2<float>(
  124 + gpuI, sigma, x, y, t.getTexture(), t.getArray()
  125 + );
  126 + cudaDeviceSynchronize();
  127 +
  128 +
  129 + stim::cuda::gpu_gradient_2d<float>(
  130 + gpuGrad, gpuI, x, y
  131 + );
  132 + cudaDeviceSynchronize();
  133 +
  134 + stim::cuda::gpu_cart2polar<float>(gpuGrad, x, y);
  135 + cudaDeviceSynchronize();
  136 +
  137 + cudaDeviceSynchronize();
  138 + for (int i = 0; i < iter; i++)
  139 + {
  140 + stim::cuda::gpu_vote<float>(gpuVote, gpuGrad, gpuTable, phi, rmax, x, y);
  141 + cudaDeviceSynchronize();
  142 + stim::cuda::gpu_update_dir<float>(gpuVote, gpuGrad, gpuTable, phi, rmax, x, y);
  143 + cudaDeviceSynchronize();
  144 + phi = phi - dphi;
  145 + }
  146 +
  147 + cudaDeviceSynchronize();
  148 + stim::cuda::gpu_local_max<float>(gpuCenters, gpuVote, final_t, conn, x, y);
  149 + cudaMemcpy(cpuCenters, gpuCenters, bytes_ds, cudaMemcpyDeviceToHost);
  150 + for(int i = 0; i < pixels; i++)
  151 + {
  152 + int ix = (i % x);
  153 + int iy = (i / x);
  154 + if((cpuCenters[i] == 1) && (ix > 4) && (ix < x-4))
  155 + {
  156 +
  157 + float x_v = (float) ix;
  158 + float y_v = (float) iy;
  159 + output.push_back(stim::vec<float>((x_v/(float)x),
  160 + (y_v/(float)y), 0.0));
  161 +
  162 + }
  163 + }
  164 +
  165 +
  166 + t.UnmapCudaTexture();
  167 + cleanCuda();
  168 + free(cpuTable);
  169 + free(cpuCenters);
  170 + return output;
  171 +}
... ...
stim/cuda/branch_detection2.cuh 0 → 100644
  1 +#include <stim/cuda/templates/gaussian_blur.cuh>
  2 +#include <stim/cuda/templates/gradient.cuh>
  3 +#include <stim/cuda/arraymath.cuh>
  4 +#include <stim/cuda/ivote.cuh>
  5 +
  6 +
  7 +
  8 +
  9 +
  10 +
  11 +
  12 +
  13 +
  14 +
  15 +void atan_2(float* cpuTable, unsigned int rmax){
  16 +
  17 + //initialize the width and height of the window which atan2 are computed in.
  18 + int xsize = 2*rmax +1;
  19 + int ysize = 2*rmax +1;
  20 +
  21 + // assign the center coordinates of the atan2 window to yi and xi
  22 + int yi = rmax;
  23 + int xi = rmax;
  24 +
  25 +
  26 + for (int xt = 0; xt < xsize; xt++){
  27 +
  28 + for(int yt = 0; yt < ysize; yt++){
  29 +
  30 + //convert the current 2D coordinates to 1D
  31 + int id = yt * xsize + xt;
  32 + // calculate the distance between the pixel and the center of the atan2 window
  33 + float xd = xi - xt;
  34 + float yd = yi - yt;
  35 +
  36 + // calculate the angle between the pixel and the center of the atan2 window and store the result.
  37 + float atan_2d_vote = atan2(yd, xd);
  38 + cpuTable[id] = atan_2d_vote;
  39 + }
  40 + }
  41 +
  42 +}
  43 +std::vector<stim::vec<float> >
  44 +find_branch(GLint texbufferID, GLenum texType, unsigned int x, unsigned int y)
  45 +{
  46 +
  47 + float* cpuTable = (float
  48 +
  49 + unsigned int pixels = x * y;
  50 + unsigned int bytes = sizeof(float) * pixels;
  51 +
  52 + //calculate the number of bytes in the atan2 table
  53 +
  54 + unsigned int bytes_table = (2*rmax+1) * (2*rmax+1) * sizeof(float);
  55 +
  56 +
  57 +
  58 + //allocate space on the GPU for the atan2 table
  59 +
  60 + float* gpuTable;
  61 +
  62 + cudaMalloc(&gpuTable, bytes_table);
  63 +
  64 +
  65 +
  66 + cudaMemcpy(gpuTable, cpuTable, bytes_table, cudaMemcpyHostToDevice);
  67 +
  68 + unsigned int sigma_ds = 1/resize;
  69 + unsigned int x_ds = (x/sigma_ds + (x %sigma_ds == 0 ? 0:1));
  70 + unsigned int y_ds = (y/sigma_ds + (y %sigma_ds == 0 ? 0:1));
  71 + unsigned int bytes_ds = sizeof(float) * x_ds * y_ds;
  72 +
  73 +
  74 + float* gpuI;
  75 + cudaMalloc(&gpuI, bytes_ds);
  76 +
  77 +
  78 + float* gpuGrad;
  79 + cudaMalloc(&gpuGrad, bytes_ds*2);
  80 +
  81 + float* gpuVote;
  82 + cudaMalloc(&gpuVote, bytes_ds);
  83 +
  84 + // allocate space on the GPU for the detected cell centes
  85 +
  86 + float* gpuCenters;
  87 +
  88 + cudaMalloc(&gpuCenters, bytes_ds);
  89 +
  90 +
  91 + stim::cuda::gpu_down_sample<float>(gpuI, gpuI0, resize, x , y);
  92 + cudaMemcpy(cpuResize, gpuI, bytes_ds, cudaMemcpyDeviceToHost);
  93 +
  94 +x = x_ds;
  95 + y = y_ds;
  96 + t = t * resize;
  97 + //sigma = sigma * resize;
  98 +
  99 + cudaDeviceSynchronize();
  100 + stim::cuda::gpu_gaussian_blur2<float>(gpuI,sigma, x, y);
  101 + cudaDeviceSynchronize();
  102 + cudaMemcpy(cpuBlur, gpuI, bytes_ds, cudaMemcpyDeviceToHost);
  103 + cudaDeviceSynchronize();
  104 +
  105 + stim::cuda::gpu_gradient_2d<float>(gpuGrad, gpuI, x, y);
  106 + cudaDeviceSynchronize();
  107 + cudaMemcpy(cpuGradient, gpuGrad, bytes_ds*2, cudaMemcpyDeviceToHost);
  108 +
  109 + stim::cuda::gpu_cart2polar<float>(gpuGrad, x, y);
  110 + cudaDeviceSynchronize();
  111 + cudaMemcpy(cpuCart2Polar, gpuGrad, bytes_ds*2, cudaMemcpyDeviceToHost);
  112 +
  113 +
  114 + //multiply the gradient by a constant and calculate the absolute value (to save an image)
  115 +
  116 + stim::cuda::cpu_multiply<float>(cpuCart2Polar, 40, x * y * 2);
  117 +
  118 + cudaDeviceSynchronize();
  119 +
  120 + stim::cuda::cpu_abs<float>(cpuCart2Polar, x * y * 2);
  121 +
  122 + cudaDeviceSynchronize();
  123 +
  124 +
  125 + for (int i =0; i<iter; i++){
  126 +
  127 + stim::cuda::gpu_vote<float>(gpuVote, gpuGrad, gpuTable, phi, rmax, x, y);
  128 + cudaDeviceSynchronize();
  129 + stim::cuda::gpu_update_dir<float>(gpuVote, gpuGrad, gpuTable, phi, rmax, x, y);
  130 + cudaDeviceSynchronize();
  131 + switch (i){
  132 + case 0 : cudaMemcpy(cpuVote1, gpuVote, bytes_ds, cudaMemcpyDeviceToHost);
  133 + break;
  134 + case 1 : cudaMemcpy(cpuVote2, gpuVote, bytes_ds, cudaMemcpyDeviceToHost);
  135 + break;
  136 + case 2 : cudaMemcpy(cpuVote3, gpuVote, bytes_ds, cudaMemcpyDeviceToHost);
  137 + break;
  138 + case 3 : cudaMemcpy(cpuVote4, gpuVote, bytes_ds, cudaMemcpyDeviceToHost);
  139 + break;
  140 + case 4 : cudaMemcpy(cpuVote5, gpuVote, bytes_ds, cudaMemcpyDeviceToHost);
  141 + break;
  142 + default : cudaMemcpy(cpuVote5, gpuVote, bytes_ds, cudaMemcpyDeviceToHost);
  143 + break;
  144 + }
  145 + phi = phi - dphi;
  146 + }
  147 +
  148 + stim::cuda::gpu_local_max<float>(gpuCenters, gpuVote, t, conn, x, y);
  149 + cudaMemcpy(cpuCenters, gpuCenters, bytes_ds, cudaMemcpyDeviceToHost);
  150 +
  151 +}
... ...
stim/cuda/cuda_texture.cuh 0 → 100644
  1 +#ifndef STIM_CUDA_TEXTURE_H
  2 +#define STIM_CUDA_TEXTURE_H
  3 +
  4 +#include <assert.h>
  5 +#include <stim/cuda/cudatools/error.h>
  6 +#include <cuda.h>
  7 +#include <cuda_runtime.h>
  8 +#include <cublas_v2.h>
  9 +#include <stdio.h>
  10 +#include <GL/glew.h>
  11 +#include <GL/glut.h>
  12 +#include <sstream>
  13 +#include <stim/visualization/colormap.h>
  14 +#include <stim/cuda/cudatools/devices.h>
  15 +#include <stim/cuda/cudatools/threads.h>
  16 +#include <stim/math/vector.h>
  17 +
  18 +///A container for the texture based methods used by the spider class.
  19 +namespace stim
  20 +{
  21 + namespace cuda
  22 + {
  23 + class cuda_texture
  24 + {
  25 + public:
  26 + cudaArray* srcArray;
  27 + cudaGraphicsResource_t resource;
  28 + struct cudaResourceDesc resDesc;
  29 + struct cudaTextureDesc texDesc;
  30 + cudaTextureObject_t tObj;
  31 +
  32 +
  33 + ///basic constructor that creates the texture with default parameters.
  34 + cuda_texture()
  35 + {
  36 + memset(&texDesc, 0, sizeof(texDesc));
  37 + texDesc.addressMode[0] = cudaAddressModeWrap;
  38 + texDesc.addressMode[1] = cudaAddressModeWrap;
  39 + texDesc.filterMode = cudaFilterModePoint;
  40 + texDesc.readMode = cudaReadModeElementType;
  41 + texDesc.normalizedCoords = 0;
  42 + }
  43 +
  44 +//-------------------------------------------------------------------------//
  45 +//-------------------------------CUDA_MAPPING------------------------------//
  46 +//-------------------------------------------------------------------------//
  47 +//Methods for creating the cuda texture.
  48 + ///@param GLuint tex -- GLtexture (must be contained in a frame buffer object)
  49 + /// that holds that data that will be handed to cuda.
  50 + ///@param GLenum target -- either GL_TEXTURE_1D, GL_TEXTURE_2D or GL_TEXTURE_3D
  51 + /// map work with other gl texture types but untested.
  52 + ///Maps the gl texture in cuda memory, binds that data to a cuda array, and binds the cuda
  53 + ///array to a cuda texture.
  54 + void
  55 + MapCudaTexture(GLuint tex, GLenum target)
  56 + {
  57 + HANDLE_ERROR(
  58 + cudaGraphicsGLRegisterImage(
  59 + &resource,
  60 + tex,
  61 + target,
  62 +// cudaGraphicsMapFlagsReadOnly
  63 + cudaGraphicsRegisterFlagsNone
  64 + )
  65 + );
  66 +
  67 + HANDLE_ERROR(
  68 + cudaGraphicsMapResources(1, &resource)
  69 + );
  70 +
  71 + HANDLE_ERROR(
  72 + cudaGraphicsSubResourceGetMappedArray(&srcArray, resource, 0, 0)
  73 + );
  74 +
  75 + memset(&resDesc, 0, sizeof(resDesc));
  76 + resDesc.resType = cudaResourceTypeArray;
  77 + resDesc.res.array.array = srcArray;
  78 + HANDLE_ERROR(
  79 + cudaCreateTextureObject(&tObj, &resDesc, &texDesc, NULL)
  80 + );
  81 + }
  82 +
  83 + ///Unmaps the gl texture, binds that data to a cuda array, and binds the cuda
  84 + ///array to a cuda texture.
  85 + void
  86 + UnmapCudaTexture()
  87 + {
  88 + HANDLE_ERROR(
  89 + cudaGraphicsUnmapResources(1, &resource)
  90 + );
  91 + HANDLE_ERROR(
  92 + cudaGraphicsUnregisterResource(resource)
  93 + );
  94 + HANDLE_ERROR(
  95 + cudaDestroyTextureObject(tObj)
  96 + );
  97 + }
  98 +
  99 +//-------------------------------------------------------------------------//
  100 +//------------------------------GET/SET METHODS----------------------------//
  101 +//-------------------------------------------------------------------------//
  102 +
  103 +///Returns the bound texture object.
  104 + cudaTextureObject_t
  105 + getTexture()
  106 + {
  107 + return tObj;
  108 + }
  109 +
  110 + cudaArray*
  111 + getArray()
  112 + {
  113 + return srcArray;
  114 + }
  115 + };
  116 +}
  117 +}
  118 +
  119 +
  120 +#endif
... ...
stim/cuda/cudatools/devices.h
... ... @@ -4,7 +4,7 @@
4 4 #include <cuda.h>
5 5  
6 6 namespace stim{
7   -
  7 +extern "C"
8 8 int maxThreadsPerBlock()
9 9 {
10 10 int device;
... ... @@ -14,6 +14,7 @@ int maxThreadsPerBlock()
14 14 return props.maxThreadsPerBlock;
15 15 }
16 16  
  17 +extern "C"
17 18 int sharedMemPerBlock()
18 19 {
19 20 int device;
... ...
stim/cuda/ivote/update_dir.cuh
... ... @@ -164,6 +164,9 @@ namespace stim{
164 164 //free allocated memory
165 165 cudaFree(gpuDir);
166 166  
  167 + cudaDestroyTextureObject(texObj);
  168 + cudaFreeArray(cuArray);
  169 +
167 170 }
168 171  
169 172 template<typename T>
... ... @@ -211,4 +214,4 @@ namespace stim{
211 214 }
212 215 }
213 216  
214   -#endif
215 217 \ No newline at end of file
  218 +#endif
... ...
stim/cuda/ivote/vote.cuh
... ... @@ -124,6 +124,9 @@ namespace stim{
124 124  
125 125 cuda_vote <<< blocks, threads,share_bytes >>>(gpuVote, texObj, gpuTable, phi, rmax, x , y);
126 126  
  127 + cudaDestroyTextureObject(texObj);
  128 + cudaFreeArray(cuArray);
  129 +
127 130 }
128 131  
129 132  
... ... @@ -169,4 +172,4 @@ namespace stim{
169 172 }
170 173 }
171 174  
172   -#endif
173 175 \ No newline at end of file
  176 +#endif
... ...
stim/cuda/sharedmem.cuh
... ... @@ -34,9 +34,38 @@ namespace stim{
34 34 }
35 35 }
36 36 }
  37 +
  38 + template<typename T, typename D>
  39 + __device__ void sharedMemcpy_tex2D(T* dest, cudaTextureObject_t src,
  40 + unsigned int x, unsigned int y, unsigned int X, unsigned int Y,
  41 + dim3 threadIdx, dim3 blockDim){
  42 +
  43 + //calculate the number of iterations required for the copy
  44 + unsigned int xI, yI;
  45 + xI = X/blockDim.x + 1; //number of iterations along X
  46 + yI = Y/blockDim.y + 1; //number of iterations along Y
  47 +
  48 + //for each iteration
  49 + for(unsigned int xi = 0; xi < xI; xi++){
  50 + for(unsigned int yi = 0; yi < yI; yi++){
  51 +
  52 + //calculate the index into shared memory
  53 + unsigned int sx = xi * blockDim.x + threadIdx.x;
  54 + unsigned int sy = yi * blockDim.y + threadIdx.y;
  55 +
  56 + //calculate the index into the texture
  57 + unsigned int tx = x + sx;
  58 + unsigned int ty = y + sy;
  59 +
  60 + //perform the copy
  61 + if(sx < X && sy < Y)
  62 + dest[sy * X + sx] = abs(255 - tex2D<D>(src, tx, ty));
  63 + }
  64 + }
  65 + }
37 66  
38 67 }
39 68 }
40 69  
41 70  
42   -#endif
43 71 \ No newline at end of file
  72 +#endif
... ...
stim/cuda/spider_cost.cuh 0 → 100644
  1 +#ifndef STIM_SPIDER_COST_H
  2 +#define STIM_SPIDER_COST_H
  3 +
  4 +#include <assert.h>
  5 +#include <cuda.h>
  6 +#include <cuda_runtime.h>
  7 +#include <stdio.h>
  8 +#include <stim/visualization/colormap.h>
  9 +#include <sstream>
  10 +#include <stim/math/vector.h>
  11 +#include <stim/cuda/cudatools/devices.h>
  12 +#include <stim/cuda/cudatools/threads.h>
  13 +#include <stim/cuda/cuda_texture.cuh>
  14 +namespace stim{
  15 + namespace cuda
  16 + {
  17 +
  18 + stim::cuda::cuda_texture t; //texture object.
  19 + float* result;
  20 + float* print;
  21 +
  22 + ///Initialization function, allocates the memory and passes the necessary
  23 + ///handles from OpenGL and Cuda.
  24 + ///@param DIM_Y --integer controlling how much memory to allocate.
  25 + void initArray(int DIM_Y)
  26 + {
  27 +// cudaMalloc( (void**) &print, DIM_Y*16*sizeof(float)); ///temporary
  28 + cudaMalloc( (void**) &result, DIM_Y*sizeof(float));
  29 + }
  30 +
  31 + ///Deinit function that frees the memery used and releases the texture resource
  32 + ///back to OpenGL.
  33 + void cleanUP()
  34 + {
  35 + cudaFree(result);
  36 +// cudaFree(print); ///temporary
  37 + }
  38 +
  39 + ///A virtual representation of a uniform template.
  40 + ///Returns the value of the template pixel.
  41 + ///@param int x --location of a pixel.
  42 + __device__
  43 + float Template(int x)
  44 + {
  45 + if(x < 16/6 || x > 16*5/6 || (x > 16*2/6 && x < 16*4/6)){
  46 + return 1.0;
  47 + }else{
  48 + return 0.0;
  49 + }
  50 +
  51 + }
  52 +
  53 + ///Find the difference of the given set of samples and the template
  54 + ///using cuda acceleration.
  55 + ///@param stim::cuda::cuda_texture t --stim texture that holds all the references
  56 + /// to the data.
  57 + ///@param float* result --a pointer to the memory that stores the result.
  58 + __global__
  59 + //void get_diff (float *result)
  60 + void get_diff (cudaTextureObject_t texIn, float *result)
  61 + {
  62 + __shared__ float shared[16][8];
  63 + int x = threadIdx.x + blockIdx.x * blockDim.x;
  64 + int y = threadIdx.y + blockIdx.y * blockDim.y;
  65 + int x_t = threadIdx.x;
  66 + int y_t = threadIdx.y;
  67 +// int idx = y*16+x;
  68 + int g_idx = blockIdx.y;
  69 +
  70 + float valIn = tex2D<unsigned char>(texIn, x, y)/255.0;
  71 + float valTemp = Template(x);
  72 +
  73 +// print[idx] = abs(valIn); ///temporary
  74 +
  75 + shared[x_t][y_t] = abs(valIn-valTemp);
  76 +
  77 + __syncthreads();
  78 +
  79 + for(unsigned int step = blockDim.x/2; step >= 1; step >>= 1)
  80 + {
  81 + __syncthreads();
  82 + if (x_t < step)
  83 + {
  84 + shared[x_t][y_t] += shared[x_t + step][y_t];
  85 + }
  86 + __syncthreads();
  87 + }
  88 + __syncthreads();
  89 +
  90 + for(unsigned int step = blockDim.y/2; step >= 1; step >>= 1)
  91 + {
  92 + __syncthreads();
  93 + if(y_t < step)
  94 + {
  95 + shared[x_t][y_t] += shared[x_t][y_t + step];
  96 + }
  97 + __syncthreads();
  98 + }
  99 + __syncthreads();
  100 + if(x_t == 0 && y_t == 0)
  101 + result[g_idx] = shared[0][0];
  102 +
  103 +
  104 + // //result[idx] = abs(valIn);
  105 + }
  106 +
  107 +
  108 + ///External access-point to the cuda function
  109 + ///@param GLuint texbufferID --GLtexture (most be contained in a framebuffer object)
  110 + /// that holds the data that will be handed to cuda.
  111 + ///@param GLenum texType --either GL_TEXTURE_1D, GL_TEXTURE_2D or GL_TEXTURE_3D
  112 + /// may work with other gl texture types, but untested.
  113 + ///@param DIM_Y, the number of samples in the template.
  114 + extern "C"
  115 + stim::vec<int> get_cost(GLint texbufferID, GLenum texType, int DIM_Y)
  116 + {
  117 +
  118 + //Bind the Texture in GL and allow access to cuda.
  119 + t.MapCudaTexture(texbufferID, texType);
  120 +
  121 + //initialize the return arrays.
  122 + float* output;
  123 + output = (float* ) malloc(DIM_Y*sizeof(float));
  124 +
  125 + stim::vec<int> ret(0, 0);
  126 + initArray(DIM_Y);
  127 +
  128 +
  129 + //variables for finding the min.
  130 + float mini = 10000000000000000.0;
  131 + int idx = 0;
  132 +
  133 + //cuda launch variables.
  134 + dim3 numBlocks(1, DIM_Y);
  135 + dim3 threadsPerBlock(16, 8);
  136 +
  137 +
  138 + get_diff <<< numBlocks, threadsPerBlock >>> (t.getTexture(), result);
  139 +
  140 + HANDLE_ERROR(
  141 + cudaMemcpy(output, result, DIM_Y*sizeof(float), cudaMemcpyDeviceToHost)
  142 + );
  143 +
  144 + for( int i = 0; i<DIM_Y; i++){
  145 + if(output[i] < mini){
  146 + mini = output[i];
  147 + idx = i;
  148 + }
  149 + }
  150 +
  151 +// stringstream name; //for debugging
  152 +// name << "Test.bmp";
  153 +// stim::gpu2image<float>(print, name.str(),16,218,0,256);
  154 +
  155 + t.UnmapCudaTexture();
  156 + cleanUP();
  157 + ret[0] = idx; ret[1] = (int) output[idx];
  158 + free(output);
  159 + return ret;
  160 + }
  161 +
  162 + }
  163 +}
  164 +
  165 +
  166 +#endif
... ...
stim/cuda/templates/conv2.cuh
... ... @@ -102,8 +102,10 @@ namespace stim{
102 102 dim3 blocks(w / threads + 1, h);
103 103  
104 104 //call the kernel to do the multiplication
105   - cuda_conv2 <<< blocks, threads >>>(mask, copy, texObj, w, h, M);
106   -
  105 + //cuda_conv2 <<< blocks, threads >>>(img, mask, copy, w, h, M);
  106 + cuda_conv2 <<< blocks, threads >>>(img, mask, copy, texObj, w, h, M);
  107 + cudaDestroyTextureObject(texObj);
  108 + cudaFreeArray(cuArray);
107 109 }
108 110  
109 111 template<typename T>
... ... @@ -139,4 +141,4 @@ namespace stim{
139 141 }
140 142  
141 143  
142   -#endif
143 144 \ No newline at end of file
  145 +#endif
... ...
stim/cuda/templates/conv2sep.cuh
... ... @@ -30,7 +30,7 @@ namespace stim{
30 30 int byi = blockIdx.y;
31 31  
32 32 //copy the portion of the image necessary for this block to shared memory
33   - stim::cuda::sharedMemcpy_tex2D(s, in, bxi - kr, byi, 2 * kr + blockDim.x, 1, threadIdx, blockDim);
  33 + stim::cuda::sharedMemcpy_tex2D<float, unsigned char>(s, in, bxi - kr, byi, 2 * kr + blockDim.x, 1, threadIdx, blockDim);
34 34  
35 35 //calculate the thread index
36 36 int ti = threadIdx.x;
... ... @@ -88,7 +88,7 @@ namespace stim{
88 88 int byi = blockIdx.y * blockDim.y;
89 89  
90 90 //copy the portion of the image necessary for this block to shared memory
91   - stim::cuda::sharedMemcpy_tex2D(s, in, bxi, byi - kr, 1, 2 * kr + blockDim.y, threadIdx, blockDim);
  91 + stim::cuda::sharedMemcpy_tex2D<float, unsigned char>(s, in, bxi, byi - kr, 1, 2 * kr + blockDim.y, threadIdx, blockDim);
92 92  
93 93 //calculate the thread index
94 94 int ti = threadIdx.y;
... ... @@ -213,6 +213,8 @@ namespace stim{
213 213 //free allocated memory
214 214 cudaFree(cuArray);
215 215  
  216 + cudaDestroyTextureObject(texObj);
  217 +
216 218 }
217 219  
218 220 /// Applies a Gaussian blur to a 2D image stored on the CPU
... ... @@ -257,4 +259,4 @@ namespace stim{
257 259 };
258 260 };
259 261  
260   -#endif
261 262 \ No newline at end of file
  263 +#endif
... ...
stim/cuda/templates/gaussian_blur.cuh
... ... @@ -7,7 +7,6 @@
7 7 #include <stim/cuda/sharedmem.cuh>
8 8 #include <stim/cuda/templates/conv2sep.cuh> //GPU-based separable convolution algorithm
9 9  
10   -#define pi 3.14159
11 10  
12 11 namespace stim{
13 12 namespace cuda{
... ... @@ -37,12 +36,14 @@ namespace stim{
37 36  
38 37 //copy the kernel to the GPU
39 38 T* gpuKernel0;
  39 + HANDLE_ERROR(cudaMalloc(&gpuKernel0, kwidth*sizeof(T)));
40 40 HANDLE_ERROR(cudaMemcpy(gpuKernel0, kernel0, kwidth * sizeof(T), cudaMemcpyHostToDevice));
41 41  
42 42 //perform the gaussian blur as a separable convolution
43 43 stim::cuda::tex_conv2sep(out, x, y, texObj, cuArray, gpuKernel0, kwidth, gpuKernel0, kwidth);
44 44  
45 45 HANDLE_ERROR(cudaFree(gpuKernel0));
  46 + free(kernel0);
46 47  
47 48 }
48 49  
... ... @@ -58,7 +59,7 @@ namespace stim{
58 59  
59 60 //copy the kernel to the GPU
60 61 T* gpuKernel0;
61   - HANDLE_ERROR(cudaMalloc(&gpuKernel0, kwidth * sizeof(T)));
  62 + HANDLE_ERROR(cudaMalloc(&gpuKernel0, kwidth*sizeof(T)));
62 63 HANDLE_ERROR(cudaMemcpy(gpuKernel0, kernel0, kwidth * sizeof(T), cudaMemcpyHostToDevice));
63 64  
64 65 //perform the gaussian blur as a separable convolution
... ... @@ -87,4 +88,4 @@ namespace stim{
87 88 };
88 89 };
89 90  
90   -#endif
91 91 \ No newline at end of file
  92 +#endif
... ...
stim/cuda/testKernel.cuh 0 → 100644
  1 +#include <assert.h>
  2 +#include <cuda.h>
  3 +#include <cuda_runtime.h>
  4 +#include <stdio.h>
  5 +#include <stim/visualization/colormap.h>
  6 +#include <sstream>
  7 +#include <stim/math/vector.h>
  8 +#include <stim/cuda/cudatools/devices.h>
  9 +#include <stim/cuda/cudatools/threads.h>
  10 +#include <stim/cuda/cuda_texture.cuh>
  11 + stim::cuda::cuda_texture tx; //texture object.
  12 + float* print;
  13 +
  14 + ///Initialization function, allocates the memory and passes the necessary
  15 + ///handles from OpenGL and Cuda.
  16 + ///@param DIM_Y --integer controlling how much memory to allocate.
  17 + void initArray()
  18 + {
  19 + cudaMalloc( (void**) &print, 216*16*sizeof(float)); ///temporary
  20 + }
  21 +
  22 + ///Deinit function that frees the memery used and releases the texture resource
  23 + ///back to OpenGL.
  24 + void cleanUP()
  25 + {
  26 + cudaFree(print); ///temporary
  27 + }
  28 +
  29 + __device__
  30 + float templ(int x)
  31 + {
  32 + if(x < 16/6 || x > 16*5/6 || (x > 16*2/6 && x < 16*4/6)){
  33 + return 1.0;
  34 + }else{
  35 + return 0.0;
  36 + }
  37 +
  38 + }
  39 +
  40 + ///Find the difference of the given set of samples and the template
  41 + ///using cuda acceleration.
  42 + ///@param stim::cuda::cuda_texture t --stim texture that holds all the references
  43 + /// to the data.
  44 + ///@param float* result --a pointer to the memory that stores the result.
  45 + __global__
  46 + //void get_diff (float *result)
  47 + void get_diff (cudaTextureObject_t texIn, float *print)
  48 + {
  49 + int x = threadIdx.x + blockIdx.x * blockDim.x;
  50 + int y = threadIdx.y + blockIdx.y * blockDim.y;
  51 + int idx = y*16+x;
  52 +
  53 + float valIn = tex2D<unsigned char>(texIn, x, y);
  54 + float templa = templ(x);
  55 + //print[idx] = abs(valIn); ///temporary
  56 + print[idx] = abs(templa); ///temporary
  57 +
  58 + }
  59 +
  60 +
  61 + ///External access-point to the cuda function
  62 + ///@param GLuint texbufferID --GLtexture (most be contained in a framebuffer object)
  63 + /// that holds the data that will be handed to cuda.
  64 + ///@param GLenum texType --either GL_TEXTURE_1D, GL_TEXTURE_2D or GL_TEXTURE_3D
  65 + /// may work with other gl texture types, but untested.
  66 + ///@param DIM_Y, the number of samples in the template.
  67 + void test(GLint texbufferID, GLenum texType)
  68 + {
  69 +
  70 + //Bind the Texture in GL and allow access to cuda.
  71 + tx.MapCudaTexture(texbufferID, texType);
  72 +
  73 + //initialize the return arrays.
  74 +
  75 + initArray();
  76 +
  77 + int x = 16;
  78 + int y = 27*8;
  79 + y = 8* 1089;
  80 + int max_threads = stim::maxThreadsPerBlock();
  81 + //dim3 threads(max_threads, 1);
  82 + //dim3 blocks(x / threads.x + 1, y);
  83 + dim3 numBlocks(1, 1089);
  84 + dim3 threadsPerBlock(16, 8);
  85 + //dim3 numBlocks(2, 2);
  86 + //dim3 threadsPerBlock(8, 108);
  87 +
  88 +
  89 +// get_diff <<< blocks, threads >>> (tx.getTexture(), print);
  90 + get_diff <<< numBlocks, threadsPerBlock >>> (tx.getTexture(), print);
  91 +
  92 + cudaDeviceSynchronize();
  93 + stringstream name; //for debugging
  94 + name << "FromTex.bmp";
  95 + stim::gpu2image<float>(print, name.str(),16,1089*8,0,1.0);
  96 +
  97 + tx.UnmapCudaTexture();
  98 + cleanUP();
  99 + }
  100 +
... ...
stim/gl/gl_spider.h
... ... @@ -13,50 +13,101 @@
13 13 #include "stim/math/vector.h"
14 14 #include "stim/math/rect.h"
15 15 #include "stim/math/matrix.h"
16   -#include "stim/cuda/cost.h"
  16 +#include "stim/cuda/spider_cost.cuh"
17 17 #include <stim/cuda/cudatools/glbind.h>
18   -#include <stim/visualization/obj.h>
  18 +#include <stim/cuda/arraymath.cuh>
  19 +#include <stim/cuda/cudatools.h>
  20 +#include <stim/cuda/ivote.cuh>
  21 +#include <stim/visualization/glObj.h>
19 22 #include <vector>
  23 +#include <stim/cuda/branch_detection.cuh>
  24 +#include "../../../volume-spider/fiber.h"
  25 +#include "../../../volume-spider/glnetwork.h"
  26 +//#include <stim/cuda/testKernel.cuh>
  27 +
  28 +//#include <stim/cuda/testKernel.cuh>
20 29  
21 30 #include <iostream>
22 31 #include <fstream>
  32 +#ifdef TESTING
  33 + #include <iostream>
  34 + #include <cstdio>
  35 + #include <ctime>
  36 +#endif
23 37  
24 38  
25   -
26   -/* Technically since gl_spider inherits from gl_texture, we could
27   - call the init with a path to an image stack, and upload
28   - the images while creating the spider (calling init) */
29 39 namespace stim
30 40 {
31 41  
32 42 template<typename T>
33   -class gl_spider
  43 +class gl_spider : public virtual gl_texture<T>
34 44 {
35 45 //doen't use gl_texture really, just needs the GLuint id.
36 46 //doesn't even need the texture iD really.
37 47 private:
  48 +
  49 + //
38 50 stim::vec<float> p; //vector designating the position of the spider.
39 51 stim::vec<float> d; //vector designating the orientation of the spider
40 52 //always a unit vector.
41 53 stim::vec<float> m; //magnitude of the spider vector.
42 54 //mag[0] = length.
43 55 //mag[1] = width.
44   - std::vector<stim::vec<float> > dV;
45   - std::vector<stim::vec<float> > pV;
46   - std::vector<stim::vec<float> > mV;
47   - //currentTransform
48   - stim::matrix<float, 4> cT;
  56 + std::vector<stim::vec<float> > dV; //A list of all the direction vectors.
  57 + std::vector<stim::vec<float> > pV; //A list of all the position vectors.
  58 + std::vector<stim::vec<float> > mV; //A list of all the size vectors.
  59 +
  60 + stim::matrix<float, 4> cT; //current Transformation matrix
  61 + //From tissue space to texture space.
49 62 GLuint texID;
50   - stim::vec<float> S;
51   - stim::vec<float> R;
52   - cudaGraphicsResource_t resource;
  63 + stim::vec<float> S; //Size of a voxel in the volume.
  64 + stim::vec<float> R; //Dimensions of the volume.
  65 +
  66 +
  67 + //GL and Cuda variables
  68 + GLuint dList; //displaylist ID
  69 + GLuint fboID; //framebuffer ID
  70 + GLuint texbufferID; //texbuffer ID, only necessary for
  71 + //cuda aspect of the calculation.
  72 + GLuint pfboID; //buffer object for position tracking.
  73 + GLuint ptexbufferID; //texture object for position tracking.
  74 +
  75 + GLuint mfboID; //buffer object for magnitude adjustment.
  76 + GLuint mtexbufferID; //texture object for magnitude adjustment.
  77 + GLuint bfboID; //buffer object for position adjustment.
  78 + GLuint btexbufferID; //buffer object for position adjustment.
  79 +
  80 + int numSamples; //The number of templates in the buffer.
  81 + int numSamplesPos;
  82 + int numSamplesMag;
  83 +
  84 +// float stepsize = 4.0; //Step size.
  85 + float stepsize = 3.0; //Step size.
  86 + int current_cost; //variable to store the cost of the current step.
  87 +
  88 +
  89 + //Tracing variables.
  90 + std::stack< stim::vec<float> > seeds; //seed positions.
  91 + std::stack< stim::vec<float> > seedsvecs; //seed directions.
  92 + std::stack< float > seedsmags; //seed magnitudes.
53 93  
54   - GLuint dList;
55   - GLuint fboID;
56   - GLuint texbufferID;
57   - int numSamples;
58   - float stepsize = 3.0;
59   - int current_cost;
  94 + std::vector< stim::vec<float> > cL; //Positions of line currently being traced.
  95 + std::vector< stim::vec<float> > cD; //Direction of line currently being traced.
  96 + std::vector< stim::vec<float> > cM; //Magnitude of line currently being traced.
  97 +
  98 + stim::glObj<float> sk; //object to store the skeleton.
  99 + stim::glnetwork<float> nt; //object for storing the network.
  100 +
  101 + stim::vec<float> rev; //reverse vector;
  102 + stim::camera camSel;
  103 + stim::vec<float> ps;
  104 + stim::vec<float> ups;
  105 + stim::vec<float> ds;
  106 +
  107 +
  108 +//--------------------------------------------------------------------------//
  109 +//-------------------------------PRIVATE METHODS----------------------------//
  110 +//--------------------------------------------------------------------------//
60 111  
61 112 /// Method for finding the best scale for the spider.
62 113 /// changes the x, y, z size of the spider to minimize the cost
... ... @@ -64,42 +115,43 @@ class gl_spider
64 115 void
65 116 findOptimalDirection()
66 117 {
67   - setMatrix();
68   - glCallList(dList);
69   - int best = getCost();
70   - stim::vec<float> next(
  118 + setMatrix(); //create the transformation matrix.
  119 + glCallList(dList); //move the templates to p, d, m.
  120 + int best = getCost(texbufferID,numSamples); //find min cost.
  121 + stim::vec<float> next( //find next vector.
71 122 dV[best][0]*S[0]*R[0],
72 123 dV[best][1]*S[1]*R[1],
73 124 dV[best][2]*S[2]*R[2],
74 125 0);
75   - next = (cT*next).norm();
76   - //next = (cT*next);
  126 + next = (cT*next).norm(); //find next vector.
77 127 setPosition( p[0]+next[0]*m[0]/stepsize,
78 128 p[1]+next[1]*m[0]/stepsize,
79 129 p[2]+next[2]*m[0]/stepsize);
80 130 setDirection(next[0], next[1], next[2]);
  131 + //move forward and change direction.
81 132 }
82 133  
83   - /// Method for finding the best d for the spider.
84   - /// Not sure if necessary since the next p for the spider
  134 + /// Method for finding the best d (direction) for the spider.
  135 + /// Not sure if necessary since the next p (position) for the spider
85 136 /// will be at d * m.
86 137 void
87 138 findOptimalPosition()
88 139 {
89   - setMatrix();
90   - glCallList(dList+1);
91   - int best = getCost();
92   - stim::vec<float> next(
93   - pV[best][0],
94   - pV[best][1],
95   - pV[best][2],
96   - 1);
97   - next = cT*next;
  140 + setMatrix(); //create the transformation matrix.
  141 + glCallList(dList+1); //move the templates to p, d, m.
  142 + int best = getCost(ptexbufferID, numSamplesPos); //find min cost.
  143 + std::cerr << best << std::endl;
  144 + stim::vec<float> next( //find next position.
  145 + pV[best][0],
  146 + pV[best][1],
  147 + pV[best][2],
  148 + 1);
  149 + next = cT*next; //find next position.
98 150 setPosition(
99 151 next[0]*S[0]*R[0],
100 152 next[1]*S[1]*R[1],
101 153 next[2]*S[2]*R[2]
102   - );
  154 + ); //adjust position.
103 155 }
104 156  
105 157 /// Method for finding the best scale for the spider.
... ... @@ -108,33 +160,64 @@ class gl_spider
108 160 void
109 161 findOptimalScale()
110 162 {
111   - setMatrix();
112   - glCallList(dList+2);
113   - int best = getCost();
114   - setMagnitude(m[0]*mV[best][0]);
  163 + setMatrix(); //create the transformation.
  164 + glCallList(dList+2); //move the templates to p, d, m.
  165 + int best = getCost(mtexbufferID, numSamplesMag); //get best cost.
  166 + setMagnitude(m[0]*mV[best][0]); //adjust the magnitude.
115 167 }
116 168  
  169 +
  170 + ///subject to change.
  171 + ///finds branches.
117 172 void
118 173 branchDetection()
119 174 {
120   - Bind();
121 175 setMatrix();
122 176 glCallList(dList+3);
123   -
124   - // int best = getCost();
  177 + std::vector< stim::vec<float> > result = find_branch(
  178 + btexbufferID, GL_TEXTURE_2D, 16, 216);
  179 + stim::vec<float> size(S[0]*R[0], S[1]*R[1], S[2]*R[2]);
  180 + if(!result.empty())
  181 + {
  182 + for(int i = 1; i < result.size(); i++)
  183 + {
  184 + stim::vec<float> cylp(
  185 + 0.5 * cos(2*M_PI*(result[i][1])),
  186 + 0.5 * sin(2*M_PI*(result[i][1])),
  187 + result[i][0]-0.5,
  188 + 1.0);
  189 + cylp = cT*cylp;
  190 +
  191 + stim::vec<float> vec(
  192 + cylp[0]*S[0]*R[0],
  193 + cylp[1]*S[1]*R[1],
  194 + cylp[2]*S[2]*R[2]);
  195 + stim::vec<float> seeddir(-p[0] + cylp[0]*S[0]*R[0],
  196 + -p[1] + cylp[1]*S[1]*R[1],
  197 + -p[2] + cylp[2]*S[2]*R[2]);
  198 + seeddir = seeddir.norm();
  199 +// float seedm = m[0]/2.0;
  200 + float seedm = m[0];
  201 +// Uncomment for global run
  202 +/* stim::vec<float> lSeed = getLastSeed();
  203 + if(sqrt(pow((lSeed[0] - vec[0]),2)
  204 + + pow((lSeed[1] - vec[1]),2) +
  205 + pow((lSeed[2] - vec[2]),2)) > m[0]/4.0
  206 + && */
  207 + if(
  208 + !(vec[0] > size[0] || vec[1] > size[1]
  209 + || vec[2] > size[2] || vec[0] < 0
  210 + || vec[1] < 0 || vec[2] < 0))
  211 + {
  212 + setSeed(vec);
  213 + setSeedVec(seeddir);
  214 + setSeedMag(seedm);
  215 + }
  216 + }
  217 + }
125 218  
126 219 }
127 220  
128   -
129   -
130   - void
131   - Optimize()
132   - {
133   - /*find the optimum d and scale */
134   - }
135   -
136   -
137   -
138 221  
139 222 //--------------------------------------------------------------------------//
140 223 //---------------------TEMPLATE CREATION METHODS----------------------------//
... ... @@ -142,14 +225,15 @@ class gl_spider
142 225  
143 226 ///@param solidAngle, the size of the arc to sample.
144 227 ///Method for populating the vector arrays with sampled vectors.
  228 + ///Objects created are rectangles the with the created directions.
  229 + ///All points are sampled from a texture.
  230 + ///Stored in a display list.
145 231 ///uses the default d vector <0,0,1>
146 232 void
147   - genDirectionVectors(float solidAngle = 3*M_PI/2)
  233 + genDirectionVectors(float solidAngle = 5/M_PI*4)
148 234 {
149   - //ofstream file;
150   - //file.open("dvectors.txt");
151 235 //Set up the vectors necessary for Rectangle creation.
152   - vec<float> Y(1.0,0.0,0.0);
  236 + vec<float> Y(1.0,0.0,0.0); //orthogonal vec.
153 237 vec<float> pos(0.0,0.0,0.0);
154 238 vec<float> mag(1.0, 1.0, 1.0);
155 239 vec<float> dir(0.0, 0.0, 1.0);
... ... @@ -158,12 +242,12 @@ class gl_spider
158 242 vec<float> d_s = d.cart2sph().norm();
159 243 vec<float> temp(0,0,0);
160 244 int dim = (sqrt(numSamples)-1)/2;
161   - float p0 = -M_PI;
162   - float dt = solidAngle/(2.0 * ((float)dim + 1.0));
163   - float dp = p0/(2.0*((float)dim + 1.0));
  245 + float p0 = -M_PI; //phi angle in spherical coordinates.
  246 + float dt = solidAngle/(2.0 * ((float)dim + 1.0)); //step size in Theta.
  247 + float dp = p0/(2.0*((float)dim + 1.0)); //step size in Phi.
164 248  
165   - glNewList(dList, GL_COMPILE);
166   - //Loop over the space
  249 + glNewList(dList, GL_COMPILE);
  250 + //Loop over the above defined space creating distinct vectors.
167 251 int idx = 0;
168 252 for(int i = -dim; i <= dim; i++){
169 253 for(int j = -dim; j <= dim; j++){
... ... @@ -192,28 +276,30 @@ class gl_spider
192 276 glEndList();
193 277 }
194 278  
195   - ///@param solidAngle, the size of the arc to sample.
  279 + ///@param float delta, How much the rectangles vary in position.
196 280 ///Method for populating the buffer with the sampled texture.
  281 + ///Objects created are rectangles the with the created positions.
  282 + ///All points are sampled from a texture.
  283 + ///Stored in a display list.
197 284 ///uses the default vector <0,0,0>
198 285 void
199 286 genPositionVectors(float delta = 0.4)
200 287 {
201 288 //Set up the vectors necessary for Rectangle creation.
202   - vec<float> Y(1.0,0.0,0.0);
  289 + vec<float> Y(1.0,0.0,0.0); //orthogonal vec.
203 290 vec<float> pos(0.0,0.0,0.0);
204 291 vec<float> mag(1.0, 1.0, 1.0);
205 292 vec<float> dir(0.0, 0.0, 1.0);
206 293  
207 294 //Set up the variable necessary for vector creation.
208 295 vec<float> temp(0,0,0);
209   - int dim = (sqrt(numSamples)-1)/2;
210   - stim::rect<float> samplingPlane =
  296 + int dim = (sqrt(numSamplesPos)-1)/2; //number of position vectors.
  297 + stim::rect<float> samplingPlane = //plane from which we pull position samples
211 298 stim::rect<float>(p, d);
212 299 samplingPlane.scale(mag[0]*delta, mag[0]*delta);
213   - float step = 1.0/(dim);
  300 + float step = 1.0/(dim); //step size.
214 301  
215   - //Loop over the samples, keeping the original p sample
216   - //in the center of the resulting texture.
  302 + //Loop over the samples, keeping the original p samples in the center of the resulting texture to create a large number of position vectors.
217 303 int idx;
218 304 glNewList(dList+1, GL_COMPILE);
219 305 for(int i = -dim; i <= dim; i++){
... ... @@ -240,30 +326,32 @@ class gl_spider
240 326 glEndList();
241 327 }
242 328  
243   - ///@param solidAngle, the size of the arc to sample.
  329 + ///@param float delta, How much the rectangles are allowed to expand.
244 330 ///Method for populating the buffer with the sampled texture.
  331 + ///Objects created are rectangles the with the created sizes.
  332 + ///All points are sampled from a texture.
  333 + ///Stored in a display list.
245 334 ///uses the default m <1,1,0>
246 335 void
247 336 genMagnitudeVectors(float delta = 0.70)
248   -// genMagnitudeVectors(float delta = 0.50)
249 337 {
250 338  
251 339 //Set up the vectors necessary for Rectangle creation.
252   - vec<float> Y(1.0,0.0,0.0);
  340 + vec<float> Y(1.0,0.0,0.0); //orthogonal vec.
253 341 vec<float> pos(0.0,0.0,0.0);
254 342 vec<float> mag(1.0, 1.0, 1.0);
255 343 vec<float> dir(0.0, 0.0, 1.0);
256 344  
257 345 //Set up the variable necessary for vector creation.
258   - int dim = (sqrt(numSamples)-1)/2;
  346 + int dim = (sqrt(numSamplesMag)-1)/2;
259 347 float min = 1.0-delta;
260 348 float max = 1.0+delta;
261   - float step = (max-min)/(numSamples-1);
  349 + float step = (max-min)/(numSamplesMag-1);
262 350 float factor;
263 351 vec<float> temp(0.0,0.0,0.0);
264 352  
265 353 glNewList(dList+2, GL_COMPILE);
266   - for(int i = 0; i < numSamples; i++){
  354 + for(int i = 0; i < numSamplesMag; i++){
267 355 //Create linear index
268 356 factor = (min+step*i)*mag[0];
269 357 temp = factor;
... ... @@ -280,10 +368,11 @@ class gl_spider
280 368 }
281 369 glEndList();
282 370 }
283   - ///@param v_x x-coordinate in buffer-space,
284   - ///@param v_y y-coordinate in buffer-space.
285   - ///Samples the texturespace and places a sample in the provided coordinates
286   - ///of bufferspace.
  371 +
  372 + ///@param float v_x x-coordinate in buffer-space,
  373 + ///@param float v_y y-coordinate in buffer-space.
  374 + ///Samples the texture space.
  375 + ///places a sample in the provided coordinates of bufferspace.
287 376 void
288 377 UpdateBuffer(float v_x, float v_y)
289 378 {
... ... @@ -361,8 +450,37 @@ class gl_spider
361 450 //--------------------------------GL METHODS--------------------------------//
362 451 //--------------------------------------------------------------------------//
363 452  
364   - ///@param width sets the width of the buffer.
365   - ///@param height sets the height of the buffer.
  453 + ///@param uint width sets the width of the buffer.
  454 + ///@param uint height sets the height of the buffer.
  455 + ///@param GLuint &textureID gives the texture ID of the texture to be initialized.
  456 + ///@param GLuint &framebufferID gives the buffer ID of the texture to be initialized.
  457 + ///Function for setting up the 2D buffer that stores the samples.
  458 + ///Initiates and sets parameters.
  459 + void
  460 + GenerateFBO(unsigned int width, unsigned int height, GLuint &textureID, GLuint &framebufferID)
  461 + {
  462 + glGenFramebuffers(1, &framebufferID);
  463 + glBindFramebuffer(GL_FRAMEBUFFER, framebufferID);
  464 + int numChannels = 1;
  465 + unsigned char* texels = new unsigned char[width * height * numChannels];
  466 + glGenTextures(1, &textureID);
  467 + glBindTexture(GL_TEXTURE_2D, textureID);
  468 +
  469 + //Textures repeat and use linear interpolation, luminance format.
  470 + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_REPEAT);
  471 + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_REPEAT);
  472 + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR);
  473 + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_LINEAR);
  474 + glTexImage2D(GL_TEXTURE_2D, 0, GL_LUMINANCE,
  475 + width, height, 0, GL_LUMINANCE, GL_UNSIGNED_BYTE, texels);
  476 + delete[] texels;
  477 + glBindFramebuffer(GL_FRAMEBUFFER, 0);
  478 + glBindTexture(GL_TEXTURE_2D, 0);
  479 + CHECK_OPENGL_ERROR
  480 + }
  481 +
  482 + ///@param uint width sets the width of the buffer.
  483 + ///@param uint height sets the height of the buffer.
366 484 ///Function for setting up the 2D buffer that stores the samples.
367 485 void
368 486 GenerateFBO(unsigned int width, unsigned int height)
... ... @@ -373,6 +491,8 @@ class gl_spider
373 491 unsigned char* texels = new unsigned char[width * height * numChannels];
374 492 glGenTextures(1, &texbufferID);
375 493 glBindTexture(GL_TEXTURE_2D, texbufferID);
  494 +
  495 + //Textures repeat and use linear interpolation, luminance format.
376 496 glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_REPEAT);
377 497 glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_REPEAT);
378 498 glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR);
... ... @@ -382,39 +502,150 @@ class gl_spider
382 502 delete[] texels;
383 503 glBindFramebuffer(GL_FRAMEBUFFER, 0);
384 504 glBindTexture(GL_TEXTURE_2D, 0);
  505 + CHECK_OPENGL_ERROR
385 506 }
386 507  
387 508  
388   - ///Method for using the gl manipulation to alighn templates from
  509 + ///Method for using the gl manipulation to align templates from
389 510 ///Template space (-0.5 0.5) to Texture space (0.0, 1.0),
390 511 ///Based on the p of the spider in real space (arbitrary).
  512 + ///All transformation happen in glMatrixMode(GL_TEXTURE).
391 513 void setMatrix()
392 514 {
393   - float curTrans[16];
394   - stim::vec<float> rot = getRotation(d);
  515 + float curTrans[16]; //array to store the matrix values.
  516 + stim::vec<float> rot = getRotation(d); //get the rotation parameters for the current direction vector.
395 517 glMatrixMode(GL_TEXTURE);
396 518 glLoadIdentity();
397   - glScalef(1.0/S[0]/R[0], 1.0/S[1]/R[1], 1.0/S[2]/R[2]);
398   -
399 519  
  520 + //Scale by the voxel size and number of slices.
  521 + glScalef(1.0/S[0]/R[0], 1.0/S[1]/R[1], 1.0/S[2]/R[2]);
  522 + //translate to the current position of the spider in the texture.
400 523 glTranslatef(p[0],
401 524 p[1],
402 525 p[2]);
403   -
  526 + //rotate to the current direction of the spider.
404 527 glRotatef(rot[0], rot[1], rot[2], rot[3]);
405   -
  528 + //scale to the magnitude of the spider.
406 529 glScalef(m[0],
407 530 m[0],
408 531 m[0]);
409   -
  532 + //get and store the current transformation matrix for later use.
410 533 glGetFloatv(GL_TEXTURE_MATRIX, curTrans);
411 534 cT.set(curTrans);
412   -// printTransform();
  535 + // printTransform();
413 536  
414 537 CHECK_OPENGL_ERROR
  538 + //revert back to default gl mode.
415 539 glMatrixMode(GL_MODELVIEW);
416 540 }
  541 +
  542 + ///Method for controling the buffer and texture binding.
  543 + ///Clears the buffer upon binding.
  544 + void
  545 + Bind()
  546 + {
  547 + float len = 8.0;
  548 + glBindFramebuffer(GL_FRAMEBUFFER, fboID);//set up GL buffer
  549 + glFramebufferTexture2D(
  550 + GL_FRAMEBUFFER,
  551 + GL_COLOR_ATTACHMENT0,
  552 + GL_TEXTURE_2D,
  553 + texbufferID,
  554 + 0);
  555 + glBindFramebuffer(GL_FRAMEBUFFER, fboID);
  556 + GLenum DrawBuffers[1] = {GL_COLOR_ATTACHMENT0};
  557 + glDrawBuffers(1, DrawBuffers);
  558 + glBindTexture(GL_TEXTURE_2D, texbufferID);
  559 + glClearColor(1,1,1,1);
  560 + glClear(GL_COLOR_BUFFER_BIT);
  561 + glMatrixMode(GL_PROJECTION);
  562 + glLoadIdentity();
  563 + glMatrixMode(GL_MODELVIEW);
  564 + glLoadIdentity();
  565 + glViewport(0,0,2.0*len, numSamples*len);
  566 + gluOrtho2D(0.0,2.0*len,0.0,numSamples*len);
  567 + glEnable(GL_TEXTURE_3D);
  568 + glBindTexture(GL_TEXTURE_3D, texID);
  569 +
  570 + CHECK_OPENGL_ERROR
  571 + }
417 572  
  573 + ///Method for controling the buffer and texture binding.
  574 + ///Clears the buffer upon binding.
  575 + ///@param GLuint &textureID, texture to be bound.
  576 + ///@param GLuint &framebufferID, framebuffer used for storage.
  577 + ///@param int nSamples, number of rectanges to create.
  578 + void
  579 + Bind(GLuint &textureID, GLuint &framebufferID, int nSamples)
  580 + {
  581 + float len = 8.0;
  582 + glBindFramebuffer(GL_FRAMEBUFFER, framebufferID);//set up GL buffer
  583 + glFramebufferTexture2D(
  584 + GL_FRAMEBUFFER,
  585 + GL_COLOR_ATTACHMENT0,
  586 + GL_TEXTURE_2D,
  587 + textureID,
  588 + 0);
  589 + glBindFramebuffer(GL_FRAMEBUFFER, framebufferID);
  590 + GLenum DrawBuffers[1] = {GL_COLOR_ATTACHMENT0};
  591 + glDrawBuffers(1, DrawBuffers);
  592 + glBindTexture(GL_TEXTURE_2D, textureID);
  593 +// glClearColor(1,1,1,1);
  594 +// glClear(GL_COLOR_BUFFER_BIT);
  595 + glMatrixMode(GL_PROJECTION);
  596 + glLoadIdentity();
  597 + glMatrixMode(GL_MODELVIEW);
  598 + glLoadIdentity();
  599 + glViewport(0,0,2.0*len, nSamples*len);
  600 + gluOrtho2D(0.0,2.0*len,0.0,nSamples*len);
  601 + glEnable(GL_TEXTURE_3D);
  602 + glBindTexture(GL_TEXTURE_3D, texID);
  603 +
  604 + CHECK_OPENGL_ERROR
  605 + }
  606 +
  607 + ///Unbinds all texture resources.
  608 + void
  609 + Unbind()
  610 + {
  611 + //Finalize GL_buffer
  612 + glBindTexture(GL_TEXTURE_3D, 0);
  613 + CHECK_OPENGL_ERROR
  614 + glBindTexture(GL_TEXTURE_2D, 0);
  615 + CHECK_OPENGL_ERROR
  616 + glBindFramebuffer(GL_FRAMEBUFFER, 0);
  617 + CHECK_OPENGL_ERROR
  618 + glDisable(GL_TEXTURE_3D);
  619 + CHECK_OPENGL_ERROR
  620 + }
  621 +
  622 + ///Makes the spider take a step.
  623 + ///starting with the current p, d, m, find the next optimal p, d, m.
  624 + ///Performs the branch detection on each step.
  625 + int
  626 + StepP()
  627 + {
  628 + Bind();
  629 + CHECK_OPENGL_ERROR
  630 + #ifdef TESTING
  631 + start = std::clock();
  632 + #endif
  633 + findOptimalDirection();
  634 + findOptimalPosition();
  635 + findOptimalScale();
  636 + Unbind();
  637 + Bind(btexbufferID, bfboID, 27);
  638 + branchDetection();
  639 + Unbind();
  640 +
  641 + #ifdef TESTING
  642 + duration_sampling = duration_sampling +
  643 + (std::clock() - start) / (double) CLOCKS_PER_SEC;
  644 + num_sampling = num_sampling + 1.0;
  645 + #endif
  646 + return current_cost;
  647 + }
  648 +
418 649  
419 650  
420 651  
... ... @@ -422,95 +653,150 @@ class gl_spider
422 653 //--------------------------------CUDA METHODS------------------------------//
423 654 //--------------------------------------------------------------------------//
424 655  
425   - /// Method for registering the texture with Cuda for shared
426   - /// access.
427   - void
428   - createResource()
429   - {
430   - HANDLE_ERROR(
431   - cudaGraphicsGLRegisterImage(
432   - &resource,
433   - texbufferID,
434   - GL_TEXTURE_2D,
435   - //CU_GRAPHICS_REGISTER_FLAGS_NONE)
436   - cudaGraphicsMapFlagsReadOnly)
437   - );
438   - }
439   -
440   - ///Method for freeing the texture from Cuda for gl access.
441   - void
442   - destroyResource()
  656 +
  657 + ///Entry-point into the cuda code for calculating the cost of a given samples array (in texture form)
  658 + ///finds the minimum cost and sets the current_cost to that value.
  659 + /// and returns the index of the template with the minimal cost.
  660 + int
  661 + getCost()
443 662 {
444   - HANDLE_ERROR(
445   - cudaGraphicsUnregisterResource(resource)
446   - );
  663 + #ifdef TESTING
  664 + start = std::clock();
  665 + #endif
  666 + stim::vec<int> cost =
  667 + stim::cuda::get_cost(texbufferID, GL_TEXTURE_2D, numSamples);
  668 + cudaDeviceSynchronize();
  669 + #ifdef TESTING
  670 + duration_cuda = duration_cuda +
  671 + (std::clock() - start) / (double) CLOCKS_PER_SEC;
  672 + num_cuda = num_cuda + 1.0;
  673 + #endif
  674 + current_cost = cost[1];
  675 + return cost[0];
447 676 }
448 677  
449   - ///Entry-point into the cuda code for calculating the cost
450   - /// of a given samples array (in texture form)
451 678 int
452   - getCost()
  679 + getCost(GLuint tID, int n)
453 680 {
454   - createResource();
455   - stim::vec<int> cost = get_cost(resource, numSamples);
456   - destroyResource();
457   -// if (cost[1] >= 80)
458   -// exit(0);
  681 + #ifdef TESTING
  682 + start = std::clock();
  683 + #endif
  684 + stim::vec<int> cost =
  685 + stim::cuda::get_cost(tID, GL_TEXTURE_2D, n);
  686 + cudaDeviceSynchronize();
  687 + #ifdef TESTING
  688 + duration_cuda = duration_cuda +
  689 + (std::clock() - start) / (double) CLOCKS_PER_SEC;
  690 + num_cuda = num_cuda + 1.0;
  691 + #endif
459 692 current_cost = cost[1];
460 693 return cost[0];
461 694 }
462 695  
463 696 public:
  697 +
  698 + ///ininializes the cuda device and environment.
  699 + void
  700 + initCuda()
  701 + {
  702 + stim::cudaSetDevice();
  703 + //GLint max;
  704 + //glGetIntegerv(GL_MAX_TEXTURE_SIZE, &max);
  705 + //std::cout << max << std::endl;
  706 + }
  707 +
  708 + //horizonal rectangle forming the spider.
464 709 stim::rect<float> hor;
  710 + //vectical rectangle forming the spider.
465 711 stim::rect<float> ver;
466 712  
  713 + //Testing and Timing variables.
  714 + #ifdef TESTING
  715 + std::clock_t start;
  716 + double duration_sampling = 0.0;
  717 + double duration_cuda = 0.0;
  718 + double num_sampling = 0.0;
  719 + double num_cuda = 0.0;
  720 + #endif
  721 +
467 722 //--------------------------------------------------------------------------//
468 723 //-----------------------------CONSTRUCTORS---------------------------------//
469 724 //--------------------------------------------------------------------------//
470 725  
471 726  
472   - ///@param samples, the number of samples this spider is going to use.
473   - ///best results if samples is can create a perfect root.
  727 + ///@param int samples, the number of samples this spider is going to use.
  728 + ///Best results if samples is can create a perfect root.
474 729 ///Default Constructor
475 730 gl_spider
476   - (int samples = 1089)
  731 + (int samples = 1089, int samplespos = 441,int samplesmag = 144)
477 732 {
478 733 p = vec<float>(0.0, 0.0, 0.0);
479 734 d = vec<float>(0.0, 0.0, 1.0);
480 735 m = vec<float>(1.0, 1.0);
481 736 S = vec<float>(1.0, 1.0, 1.0);
482 737 R = vec<float>(1.0, 1.0, 1.0);
483   - //setPosition(0.0,0.0,0.0);
484   - //setDirection(0.0,0.0,1.0);
485   - //setMagnitude(1.0);
486 738 numSamples = samples;
  739 + numSamplesPos = samplespos;
  740 + numSamplesMag = samplesmag;
487 741 }
488 742  
489   - ///temporary constructor for convenience, will be removed in further updates.
  743 + ///Position constructor: floats.
  744 + ///@param float pos_x, position x.
  745 + ///@param float pos_y, position y.
  746 + ///@param float pos_z, position z.
  747 + ///@param float dir_x, direction x.
  748 + ///@param float dir_y, direction y.
  749 + ///@param float dir_z, direction z.
  750 + ///@param float mag_x, size of the vector.
  751 + ///@param int samples, number of templates this spider is going to use.
490 752 gl_spider
491 753 (float pos_x, float pos_y, float pos_z, float dir_x, float dir_y, float dir_z,
492   - float mag_x, int numSamples = 1089)
  754 + float mag_x, int numsamples = 1089, int numsamplespos = 441, int numsamplesmag =144)
493 755 {
494 756 p = vec<float>(pos_x, pos_y, pos_z);
495 757 d = vec<float>(dir_x, dir_y, dir_z);
496 758 m = vec<float>(mag_x, mag_x, mag_x);
497 759 S = vec<float>(1.0,1.0,1.0);
498 760 R = vec<float>(1.0,1.0,1.0);
499   - //setPosition(pos_x, pos_y, pos_z);
500   - //setDirection(dir_x, dir_y, dir_z);
501   - //setMagnitude(mag_x);
502   -
  761 + numSamples = numsamples;
  762 + numSamplesPos = numsamplespos;
  763 + numSamplesMag = numsamplesmag;
503 764 }
504   -
  765 +
  766 + ///Position constructor: vecs of floats.
  767 + ///@param stim::vec<float> pos, position.
  768 + ///@param stim::vec<float> dir, direction.
  769 + ///@param float mag, size of the vector.
  770 + ///@param int samples, number of templates this spider is going to use.
  771 + gl_spider
  772 + (stim::vec<float> pos, stim::vec<float> dir, float mag, int samples = 1089, int samplesPos = 441, int samplesMag = 144)
  773 + {
  774 + p = pos;
  775 + d = dir;
  776 + m = vec<float>(mag, mag, mag);
  777 + S = vec<float>(1.0,1.0,1.0);
  778 + R = vec<float>(1.0,1.0,1.0);
  779 + numSamples = samples;
  780 + numSamplesPos = samplesPos;
  781 + numSamplesMag = samplesMag;
  782 + }
  783 +
  784 + ///destructor
505 785 ~gl_spider
506 786 (void)
507 787 {
508 788 Unbind();
509 789 glDeleteTextures(1, &texbufferID);
510 790 glDeleteBuffers(1, &fboID);
  791 + glDeleteTextures(1, &ptexbufferID);
  792 + glDeleteBuffers(1, &pfboID);
  793 + glDeleteTextures(1, &mtexbufferID);
  794 + glDeleteBuffers(1, &mfboID);
  795 + glDeleteTextures(1, &btexbufferID);
  796 + glDeleteBuffers(1, &bfboID);
511 797 }
512 798  
513   - ///@param GLuint id texture that is going to be sampled.
  799 + ///@param GLuint id, texture that is going to be sampled.
514 800 ///Attached the spider to the texture with the given GLuint ID.
515 801 ///Samples in the default d acting as the init method.
516 802 ///Also acts an init.
... ... @@ -518,16 +804,26 @@ class gl_spider
518 804 attachSpider(GLuint id)
519 805 {
520 806 texID = id;
521   - GenerateFBO(16, numSamples*8);
  807 + //GenerateFBO(16, numSamples*8);
  808 + GenerateFBO(16, numSamples*8, texbufferID, fboID);
  809 + GenerateFBO(16, numSamplesPos*8, ptexbufferID, pfboID);
  810 + GenerateFBO(16, numSamplesMag*8, mtexbufferID, mfboID);
  811 + GenerateFBO(16, 216, btexbufferID, bfboID);
522 812 setDims(0.6, 0.6, 1.0);
523 813 setSize(512.0, 512.0, 426.0);
524 814 setMatrix();
525 815 dList = glGenLists(3);
526 816