Commit 84eff8b1dce372c5bf4eba569dcf26cc8b575936

Authored by Pavel Govyadinov
1 parent bf731970

Merged only the necessary parts of Branch detection into master

stim/cuda/arraymath/array_cart2polar.cuh
@@ -4,7 +4,7 @@ @@ -4,7 +4,7 @@
4 namespace stim{ 4 namespace stim{
5 namespace cuda{ 5 namespace cuda{
6 template<typename T> 6 template<typename T>
7 - __global__ void cuda_cart2polar(T* a, int x, int y){ 7 + __global__ void cuda_cart2polar(T* a, int x, int y, float rotation){
8 8
9 9
10 // calculate the 2D coordinates for this current thread. 10 // calculate the 2D coordinates for this current thread.
@@ -20,14 +20,14 @@ namespace stim{ @@ -20,14 +20,14 @@ namespace stim{
20 float yl = a[i * 2 + 1]; 20 float yl = a[i * 2 + 1];
21 float theta = atan2( yl, xl ) ; 21 float theta = atan2( yl, xl ) ;
22 float r = sqrt(xl * xl + yl * yl); 22 float r = sqrt(xl * xl + yl * yl);
23 - a[i * 2 + 0] = theta; 23 + a[i * 2 + 0] = theta + rotation;
24 a[i * 2 + 1] = r; 24 a[i * 2 + 1] = r;
25 25
26 } 26 }
27 27
28 28
29 template<typename T> 29 template<typename T>
30 - void gpu_cart2polar(T* gpuGrad, unsigned int x, unsigned int y){ 30 + void gpu_cart2polar(T* gpuGrad, unsigned int x, unsigned int y, float rotation = 0){
31 31
32 unsigned int max_threads = stim::maxThreadsPerBlock(); 32 unsigned int max_threads = stim::maxThreadsPerBlock();
33 dim3 threads(max_threads, 1); 33 dim3 threads(max_threads, 1);
@@ -35,7 +35,7 @@ namespace stim{ @@ -35,7 +35,7 @@ namespace stim{
35 35
36 36
37 //call the kernel to do the multiplication 37 //call the kernel to do the multiplication
38 - cuda_cart2polar <<< blocks, threads >>>(gpuGrad, x, y); 38 + cuda_cart2polar <<< blocks, threads >>>(gpuGrad, x, y, rotation);
39 39
40 } 40 }
41 41
@@ -68,4 +68,4 @@ namespace stim{ @@ -68,4 +68,4 @@ namespace stim{
68 } 68 }
69 } 69 }
70 70
71 -#endif  
72 \ No newline at end of file 71 \ No newline at end of file
  72 +#endif
stim/cuda/branch_detection.cuh 0 → 100644
  1 +#include <iostream>
  2 +#include <fstream>
  3 +#include <cuda_runtime.h>
  4 +#include <stim/math/vector.h>
  5 +//#include <math.h>
  6 +#include <stim/visualization/colormap.h>
  7 +#include <stim/cuda/cuda_texture.cuh>
  8 +#include <stim/cuda/templates/gradient.cuh>
  9 +#include <stim/cuda/templates/gaussian_blur.cuh>
  10 +#include <stim/cuda/arraymath.cuh>
  11 +#include <stim/cuda/ivote.cuh>
  12 +#include <stim/cuda/testKernel.cuh>
  13 +typedef unsigned int uint;
  14 +typedef unsigned int uchar;
  15 +
  16 +stim::cuda::cuda_texture t;
  17 +float* gpuTable;
  18 +float* gpuGrad;
  19 +float* gpuVote;
  20 +float* gpuI;
  21 +float* gpuCenters;
  22 +
  23 +void atan_2d(float* cpuTable, unsigned int rmax)
  24 +{
  25 + //initialize the width and height of the window which atan2 are computed in.
  26 + int xsize = 2*rmax +1;
  27 + int ysize = 2*rmax +1;
  28 +
  29 + // assign the center coordinates of the atan2 window to yi and xi
  30 + int yi = rmax;
  31 + int xi = rmax;
  32 +
  33 +
  34 + for (int xt = 0; xt < xsize; xt++){
  35 +
  36 + for(int yt = 0; yt < ysize; yt++){
  37 +
  38 + //convert the current 2D coordinates to 1D
  39 + int id = yt * xsize + xt;
  40 + // calculate the distance between the pixel and the center of the atan2 window
  41 + float xd = xi - xt;
  42 + float yd = yi - yt;
  43 +
  44 + // calculate the angle between the pixel and the center of the atan2 window and store the result.
  45 + float atan_2d_vote = atan2(yd, xd);
  46 + cpuTable[id] = atan_2d_vote;
  47 + }
  48 + }
  49 +
  50 +}
  51 +
  52 +void initCuda(unsigned int bytes_table, unsigned int bytes_ds)
  53 +{
  54 + HANDLE_ERROR(
  55 + cudaMalloc((void**) &gpuTable, bytes_table)
  56 + );
  57 + HANDLE_ERROR(
  58 + cudaMalloc((void**) &gpuI, bytes_ds)
  59 + );
  60 + HANDLE_ERROR(
  61 + cudaMalloc((void**) &gpuGrad, bytes_ds*2)
  62 + );
  63 + HANDLE_ERROR(
  64 + cudaMalloc((void**) &gpuVote, bytes_ds)
  65 + );
  66 + HANDLE_ERROR(
  67 + cudaMalloc((void**) &gpuCenters, bytes_ds)
  68 + );
  69 +}
  70 +
  71 +void cleanCuda()
  72 +{
  73 + HANDLE_ERROR(
  74 + cudaFree(gpuTable)
  75 + );
  76 + HANDLE_ERROR(
  77 + cudaFree(gpuGrad)
  78 + );
  79 + HANDLE_ERROR(
  80 + cudaFree(gpuVote)
  81 + );
  82 + HANDLE_ERROR(
  83 + cudaFree(gpuCenters)
  84 + );
  85 + HANDLE_ERROR(
  86 + cudaFree(gpuI)
  87 + );
  88 +}
  89 +
  90 +std::vector< stim::vec<float> >
  91 +find_branch(GLint texbufferID, GLenum texType, unsigned int x, unsigned int y)
  92 +{
  93 + float phi = 15.1*M_PI/180;
  94 + int iter = 5;
  95 + float dphi = phi/iter;
  96 + float rmax = 10;
  97 + float sigma = 4;
  98 + unsigned int pixels = x * y;
  99 + unsigned int bytes = sizeof(float) * pixels;
  100 + unsigned int bytes_table = sizeof(float) * (2*rmax + 1) * (2*rmax + 1);
  101 + unsigned int x_ds = (x + (x % 1 == 0 ? 0:1));
  102 + unsigned int y_ds = (y + (x % 1 == 0 ? 0:1));
  103 + unsigned int bytes_ds = sizeof(float) * x_ds * y_ds;
  104 + unsigned int conn = 5;
  105 + float final_t = 200.0;
  106 + float* cpuTable = (float*) malloc(bytes_table);
  107 + float* cpuCenters = (float*) malloc(bytes_ds);
  108 +
  109 + stringstream name;
  110 +
  111 +
  112 +
  113 +
  114 + std::vector<stim::vec<float> > output;
  115 + initCuda(bytes_table, bytes_ds);
  116 +
  117 + atan_2d(cpuTable, rmax);
  118 + cudaMemcpy(gpuTable, cpuTable, bytes_table, cudaMemcpyHostToDevice);
  119 +
  120 + test(texbufferID, texType);
  121 +
  122 + t.MapCudaTexture(texbufferID, texType);
  123 + cudaDeviceSynchronize();
  124 + stim::cuda::tex_gaussian_blur2<float>(
  125 + gpuI, sigma, x, y, t.getTexture(), t.getArray()
  126 + );
  127 + cudaDeviceSynchronize();
  128 + stim::gpu2image<float>(gpuI, "Blur.jpg", 16, 8*27, 0, 255);
  129 +
  130 +
  131 + stim::cuda::gpu_gradient_2d<float>(
  132 + gpuGrad, gpuI, x, y
  133 + );
  134 + cudaDeviceSynchronize();
  135 +// stim::gpu2image<float>(gpuGrad, "Grad.jpg", 16, 8*27, 0, 1);
  136 +
  137 + stim::cuda::gpu_cart2polar<float>(gpuGrad, x, y, M_PI);
  138 + cudaDeviceSynchronize();
  139 +// stim::gpu2image<float>(gpuGrad, "Cart_2_polar.jpg", 16, 8*27, 0, 1);
  140 +
  141 + cudaDeviceSynchronize();
  142 + for (int i = 0; i < iter; i++)
  143 + {
  144 + stim::cuda::gpu_vote<float>(gpuVote, gpuGrad, gpuTable, phi, rmax, x, y);
  145 + name << "Vote" << i << ".bmp";
  146 + stim::gpu2image<float>(gpuVote, name.str(), 16, 8*27, 0, 255);
  147 + name.str("");
  148 + name.clear();
  149 + cudaDeviceSynchronize();
  150 +// std::cout << "got here7_a_" << i << std::endl;
  151 + stim::cuda::gpu_update_dir<float>(gpuVote, gpuGrad, gpuTable, phi, rmax, x, y);
  152 + cudaDeviceSynchronize();
  153 +// std::cout << "got here7_b_" << i << std::endl;
  154 + phi = phi - dphi;
  155 + }
  156 +
  157 + cudaDeviceSynchronize();
  158 + stim::cuda::gpu_local_max<float>(gpuCenters, gpuVote, final_t, conn, x, y);
  159 +// std::cout << "got here_sdkfj" << std::endl;
  160 + cudaMemcpy(cpuCenters, gpuCenters, bytes_ds, cudaMemcpyDeviceToHost);
  161 + stim::gpu2image<float>(gpuCenters, "Centers.jpg", 16, 8*27, 0, 255);
  162 + for(int i = 0; i < pixels; i++)
  163 + {
  164 + int ix = (i % x);
  165 + int iy = (i / x);
  166 +// std::cout << i << " : " << ix <<" : " << iy << std::endl;
  167 + if((cpuCenters[i] == 1) && (ix > 4) && (ix < x-4))
  168 + {
  169 + std::cout << ix << " : " << iy << std::endl;
  170 +
  171 + float x_v = (float) ix;
  172 + float y_v = (float) iy;
  173 + output.push_back(stim::vec<float>((x_v/(float)x),
  174 + (y_v/(float)y), 0.0));
  175 +
  176 + std::cout << x_v/16.0 << " : " << y_v/216.0 << std::endl;
  177 + }
  178 + }
  179 +
  180 +
  181 + t.UnmapCudaTexture();
  182 + cleanCuda();
  183 + free(cpuTable);
  184 + free(cpuCenters);
  185 + return output;
  186 +}
stim/cuda/branch_detection2.cuh 0 → 100644
  1 +#include <stim/cuda/templates/gaussian_blur.cuh>
  2 +#include <stim/cuda/templates/gradient.cuh>
  3 +#include <stim/cuda/arraymath.cuh>
  4 +#include <stim/cuda/ivote.cuh>
  5 +
  6 +
  7 +
  8 +
  9 +
  10 +
  11 +
  12 +
  13 +
  14 +
  15 +void atan_2(float* cpuTable, unsigned int rmax){
  16 +
  17 + //initialize the width and height of the window which atan2 are computed in.
  18 + int xsize = 2*rmax +1;
  19 + int ysize = 2*rmax +1;
  20 +
  21 + // assign the center coordinates of the atan2 window to yi and xi
  22 + int yi = rmax;
  23 + int xi = rmax;
  24 +
  25 +
  26 + for (int xt = 0; xt < xsize; xt++){
  27 +
  28 + for(int yt = 0; yt < ysize; yt++){
  29 +
  30 + //convert the current 2D coordinates to 1D
  31 + int id = yt * xsize + xt;
  32 + // calculate the distance between the pixel and the center of the atan2 window
  33 + float xd = xi - xt;
  34 + float yd = yi - yt;
  35 +
  36 + // calculate the angle between the pixel and the center of the atan2 window and store the result.
  37 + float atan_2d_vote = atan2(yd, xd);
  38 + cpuTable[id] = atan_2d_vote;
  39 + }
  40 + }
  41 +
  42 +}
  43 +std::vector<stim::vec<float> >
  44 +find_branch(GLint texbufferID, GLenum texType, unsigned int x, unsigned int y)
  45 +{
  46 +
  47 + float* cpuTable = (float
  48 +
  49 + unsigned int pixels = x * y;
  50 + unsigned int bytes = sizeof(float) * pixels;
  51 +
  52 + //calculate the number of bytes in the atan2 table
  53 +
  54 + unsigned int bytes_table = (2*rmax+1) * (2*rmax+1) * sizeof(float);
  55 +
  56 +
  57 +
  58 + //allocate space on the GPU for the atan2 table
  59 +
  60 + float* gpuTable;
  61 +
  62 + cudaMalloc(&gpuTable, bytes_table);
  63 +
  64 +
  65 +
  66 + cudaMemcpy(gpuTable, cpuTable, bytes_table, cudaMemcpyHostToDevice);
  67 +
  68 + unsigned int sigma_ds = 1/resize;
  69 + unsigned int x_ds = (x/sigma_ds + (x %sigma_ds == 0 ? 0:1));
  70 + unsigned int y_ds = (y/sigma_ds + (y %sigma_ds == 0 ? 0:1));
  71 + unsigned int bytes_ds = sizeof(float) * x_ds * y_ds;
  72 +
  73 +
  74 + float* gpuI;
  75 + cudaMalloc(&gpuI, bytes_ds);
  76 +
  77 +
  78 + float* gpuGrad;
  79 + cudaMalloc(&gpuGrad, bytes_ds*2);
  80 +
  81 + float* gpuVote;
  82 + cudaMalloc(&gpuVote, bytes_ds);
  83 +
  84 + // allocate space on the GPU for the detected cell centes
  85 +
  86 + float* gpuCenters;
  87 +
  88 + cudaMalloc(&gpuCenters, bytes_ds);
  89 +
  90 +
  91 + stim::cuda::gpu_down_sample<float>(gpuI, gpuI0, resize, x , y);
  92 + cudaMemcpy(cpuResize, gpuI, bytes_ds, cudaMemcpyDeviceToHost);
  93 +
  94 +x = x_ds;
  95 + y = y_ds;
  96 + t = t * resize;
  97 + //sigma = sigma * resize;
  98 +
  99 + cudaDeviceSynchronize();
  100 + stim::cuda::gpu_gaussian_blur2<float>(gpuI,sigma, x, y);
  101 + cudaDeviceSynchronize();
  102 + cudaMemcpy(cpuBlur, gpuI, bytes_ds, cudaMemcpyDeviceToHost);
  103 + cudaDeviceSynchronize();
  104 +
  105 + stim::cuda::gpu_gradient_2d<float>(gpuGrad, gpuI, x, y);
  106 + cudaDeviceSynchronize();
  107 + cudaMemcpy(cpuGradient, gpuGrad, bytes_ds*2, cudaMemcpyDeviceToHost);
  108 +
  109 + stim::cuda::gpu_cart2polar<float>(gpuGrad, x, y);
  110 + cudaDeviceSynchronize();
  111 + cudaMemcpy(cpuCart2Polar, gpuGrad, bytes_ds*2, cudaMemcpyDeviceToHost);
  112 +
  113 +
  114 + //multiply the gradient by a constant and calculate the absolute value (to save an image)
  115 +
  116 + stim::cuda::cpu_multiply<float>(cpuCart2Polar, 40, x * y * 2);
  117 +
  118 + cudaDeviceSynchronize();
  119 +
  120 + stim::cuda::cpu_abs<float>(cpuCart2Polar, x * y * 2);
  121 +
  122 + cudaDeviceSynchronize();
  123 +
  124 +
  125 + for (int i =0; i<iter; i++){
  126 +
  127 + stim::cuda::gpu_vote<float>(gpuVote, gpuGrad, gpuTable, phi, rmax, x, y);
  128 + cudaDeviceSynchronize();
  129 + stim::cuda::gpu_update_dir<float>(gpuVote, gpuGrad, gpuTable, phi, rmax, x, y);
  130 + cudaDeviceSynchronize();
  131 + switch (i){
  132 + case 0 : cudaMemcpy(cpuVote1, gpuVote, bytes_ds, cudaMemcpyDeviceToHost);
  133 + break;
  134 + case 1 : cudaMemcpy(cpuVote2, gpuVote, bytes_ds, cudaMemcpyDeviceToHost);
  135 + break;
  136 + case 2 : cudaMemcpy(cpuVote3, gpuVote, bytes_ds, cudaMemcpyDeviceToHost);
  137 + break;
  138 + case 3 : cudaMemcpy(cpuVote4, gpuVote, bytes_ds, cudaMemcpyDeviceToHost);
  139 + break;
  140 + case 4 : cudaMemcpy(cpuVote5, gpuVote, bytes_ds, cudaMemcpyDeviceToHost);
  141 + break;
  142 + default : cudaMemcpy(cpuVote5, gpuVote, bytes_ds, cudaMemcpyDeviceToHost);
  143 + break;
  144 + }
  145 + phi = phi - dphi;
  146 + }
  147 +
  148 + stim::cuda::gpu_local_max<float>(gpuCenters, gpuVote, t, conn, x, y);
  149 + cudaMemcpy(cpuCenters, gpuCenters, bytes_ds, cudaMemcpyDeviceToHost);
  150 +
  151 +}
stim/cuda/cuda_texture.cuh 0 → 100644
  1 +#ifndef STIM_CUDA_TEXTURE_H
  2 +#define STIM_CUDA_TEXTURE_H
  3 +
  4 +#include <assert.h>
  5 +#include <stim/cuda/cudatools/error.h>
  6 +#include <cuda.h>
  7 +#include <cuda_runtime.h>
  8 +#include <cublas_v2.h>
  9 +#include <stdio.h>
  10 +#include <GL/glew.h>
  11 +#include <GL/glut.h>
  12 +#include <sstream>
  13 +#include <stim/visualization/colormap.h>
  14 +#include <stim/cuda/cudatools/devices.h>
  15 +#include <stim/cuda/cudatools/threads.h>
  16 +#include <stim/math/vector.h>
  17 +
  18 +///A container for the texture based methods used by the spider class.
  19 +namespace stim
  20 +{
  21 + namespace cuda
  22 + {
  23 + class cuda_texture
  24 + {
  25 + public:
  26 + cudaArray* srcArray;
  27 + cudaGraphicsResource_t resource;
  28 + struct cudaResourceDesc resDesc;
  29 + struct cudaTextureDesc texDesc;
  30 + cudaTextureObject_t tObj;
  31 +
  32 +
  33 + ///basic constructor that creates the texture with default parameters.
  34 + cuda_texture()
  35 + {
  36 + memset(&texDesc, 0, sizeof(texDesc));
  37 + texDesc.addressMode[0] = cudaAddressModeWrap;
  38 + texDesc.addressMode[1] = cudaAddressModeWrap;
  39 + texDesc.filterMode = cudaFilterModePoint;
  40 + texDesc.readMode = cudaReadModeElementType;
  41 + texDesc.normalizedCoords = 0;
  42 + }
  43 +
  44 +//-------------------------------------------------------------------------//
  45 +//-------------------------------CUDA_MAPPING------------------------------//
  46 +//-------------------------------------------------------------------------//
  47 +//Methods for creating the cuda texture.
  48 + ///@param GLuint tex -- GLtexture (must be contained in a frame buffer object)
  49 + /// that holds that data that will be handed to cuda.
  50 + ///@param GLenum target -- either GL_TEXTURE_1D, GL_TEXTURE_2D or GL_TEXTURE_3D
  51 + /// map work with other gl texture types but untested.
  52 + ///Maps the gl texture in cuda memory, binds that data to a cuda array, and binds the cuda
  53 + ///array to a cuda texture.
  54 + void
  55 + MapCudaTexture(GLuint tex, GLenum target)
  56 + {
  57 + HANDLE_ERROR(
  58 + cudaGraphicsGLRegisterImage(
  59 + &resource,
  60 + tex,
  61 + target,
  62 +// cudaGraphicsMapFlagsReadOnly
  63 + cudaGraphicsRegisterFlagsNone
  64 + )
  65 + );
  66 +
  67 + HANDLE_ERROR(
  68 + cudaGraphicsMapResources(1, &resource)
  69 + );
  70 +
  71 + HANDLE_ERROR(
  72 + cudaGraphicsSubResourceGetMappedArray(&srcArray, resource, 0, 0)
  73 + );
  74 +
  75 + memset(&resDesc, 0, sizeof(resDesc));
  76 + resDesc.resType = cudaResourceTypeArray;
  77 + resDesc.res.array.array = srcArray;
  78 + HANDLE_ERROR(
  79 + cudaCreateTextureObject(&tObj, &resDesc, &texDesc, NULL)
  80 + );
  81 + }
  82 +
  83 + ///Unmaps the gl texture, binds that data to a cuda array, and binds the cuda
  84 + ///array to a cuda texture.
  85 + void
  86 + UnmapCudaTexture()
  87 + {
  88 + HANDLE_ERROR(
  89 + cudaGraphicsUnmapResources(1, &resource)
  90 + );
  91 + HANDLE_ERROR(
  92 + cudaGraphicsUnregisterResource(resource)
  93 + );
  94 + HANDLE_ERROR(
  95 + cudaDestroyTextureObject(tObj)
  96 + );
  97 + }
  98 +
  99 +//-------------------------------------------------------------------------//
  100 +//------------------------------GET/SET METHODS----------------------------//
  101 +//-------------------------------------------------------------------------//
  102 +
  103 +///Returns the bound texture object.
  104 + cudaTextureObject_t
  105 + getTexture()
  106 + {
  107 + return tObj;
  108 + }
  109 +
  110 + cudaArray*
  111 + getArray()
  112 + {
  113 + return srcArray;
  114 + }
  115 + };
  116 +}
  117 +}
  118 +
  119 +
  120 +#endif
stim/cuda/sharedmem.cuh
@@ -34,9 +34,38 @@ namespace stim{ @@ -34,9 +34,38 @@ namespace stim{
34 } 34 }
35 } 35 }
36 } 36 }
  37 +
  38 + template<typename T, typename D>
  39 + __device__ void sharedMemcpy_tex2D(T* dest, cudaTextureObject_t src,
  40 + unsigned int x, unsigned int y, unsigned int X, unsigned int Y,
  41 + dim3 threadIdx, dim3 blockDim){
  42 +
  43 + //calculate the number of iterations required for the copy
  44 + unsigned int xI, yI;
  45 + xI = X/blockDim.x + 1; //number of iterations along X
  46 + yI = Y/blockDim.y + 1; //number of iterations along Y
  47 +
  48 + //for each iteration
  49 + for(unsigned int xi = 0; xi < xI; xi++){
  50 + for(unsigned int yi = 0; yi < yI; yi++){
  51 +
  52 + //calculate the index into shared memory
  53 + unsigned int sx = xi * blockDim.x + threadIdx.x;
  54 + unsigned int sy = yi * blockDim.y + threadIdx.y;
  55 +
  56 + //calculate the index into the texture
  57 + unsigned int tx = x + sx;
  58 + unsigned int ty = y + sy;
  59 +
  60 + //perform the copy
  61 + if(sx < X && sy < Y)
  62 + dest[sy * X + sx] = tex2D<D>(src, tx, ty);
  63 + }
  64 + }
  65 + }
37 66
38 } 67 }
39 } 68 }
40 69
41 70
42 -#endif  
43 \ No newline at end of file 71 \ No newline at end of file
  72 +#endif
stim/cuda/spider_cost.cuh 0 → 100644
  1 +#ifndef STIM_SPIDER_COST_H
  2 +#define STIM_SPIDER_COST_H
  3 +
  4 +#include <assert.h>
  5 +#include <cuda.h>
  6 +#include <cuda_runtime.h>
  7 +#include <stdio.h>
  8 +#include <stim/visualization/colormap.h>
  9 +#include <sstream>
  10 +#include <stim/math/vector.h>
  11 +#include <stim/cuda/cudatools/devices.h>
  12 +#include <stim/cuda/cudatools/threads.h>
  13 +#include <stim/cuda/cuda_texture.cuh>
  14 +namespace stim{
  15 + namespace cuda
  16 + {
  17 +
  18 + stim::cuda::cuda_texture t; //texture object.
  19 + float* result;
  20 + float* print;
  21 +
  22 + ///Initialization function, allocates the memory and passes the necessary
  23 + ///handles from OpenGL and Cuda.
  24 + ///@param DIM_Y --integer controlling how much memory to allocate.
  25 + void initArray(int DIM_Y)
  26 + {
  27 +// cudaMalloc( (void**) &print, DIM_Y*16*sizeof(float)); ///temporary
  28 + cudaMalloc( (void**) &result, DIM_Y*sizeof(float));
  29 + }
  30 +
  31 + ///Deinit function that frees the memery used and releases the texture resource
  32 + ///back to OpenGL.
  33 + void cleanUP()
  34 + {
  35 + cudaFree(result);
  36 +// cudaFree(print); ///temporary
  37 + }
  38 +
  39 + ///A virtual representation of a uniform template.
  40 + ///Returns the value of the template pixel.
  41 + ///@param int x --location of a pixel.
  42 + __device__
  43 + float Template(int x)
  44 + {
  45 + if(x < 16/6 || x > 16*5/6 || (x > 16*2/6 && x < 16*4/6)){
  46 + return 1.0;
  47 + }else{
  48 + return 0.0;
  49 + }
  50 +
  51 + }
  52 +
  53 + ///Find the difference of the given set of samples and the template
  54 + ///using cuda acceleration.
  55 + ///@param stim::cuda::cuda_texture t --stim texture that holds all the references
  56 + /// to the data.
  57 + ///@param float* result --a pointer to the memory that stores the result.
  58 + __global__
  59 + //void get_diff (float *result)
  60 + void get_diff (cudaTextureObject_t texIn, float *result)
  61 + {
  62 + __shared__ float shared[16][8];
  63 + int x = threadIdx.x + blockIdx.x * blockDim.x;
  64 + int y = threadIdx.y + blockIdx.y * blockDim.y;
  65 + int x_t = threadIdx.x;
  66 + int y_t = threadIdx.y;
  67 +// int idx = y*16+x;
  68 + int g_idx = blockIdx.y;
  69 +
  70 + float valIn = tex2D<unsigned char>(texIn, x, y)/255.0;
  71 + float valTemp = Template(x);
  72 +
  73 +// print[idx] = abs(valIn); ///temporary
  74 +
  75 + shared[x_t][y_t] = abs(valIn-valTemp);
  76 +
  77 + __syncthreads();
  78 +
  79 + for(unsigned int step = blockDim.x/2; step >= 1; step >>= 1)
  80 + {
  81 + __syncthreads();
  82 + if (x_t < step)
  83 + {
  84 + shared[x_t][y_t] += shared[x_t + step][y_t];
  85 + }
  86 + __syncthreads();
  87 + }
  88 + __syncthreads();
  89 +
  90 + for(unsigned int step = blockDim.y/2; step >= 1; step >>= 1)
  91 + {
  92 + __syncthreads();
  93 + if(y_t < step)
  94 + {
  95 + shared[x_t][y_t] += shared[x_t][y_t + step];
  96 + }
  97 + __syncthreads();
  98 + }
  99 + __syncthreads();
  100 + if(x_t == 0 && y_t == 0)
  101 + result[g_idx] = shared[0][0];
  102 +
  103 +
  104 + // //result[idx] = abs(valIn);
  105 + }
  106 +
  107 +
  108 + ///External access-point to the cuda function
  109 + ///@param GLuint texbufferID --GLtexture (most be contained in a framebuffer object)
  110 + /// that holds the data that will be handed to cuda.
  111 + ///@param GLenum texType --either GL_TEXTURE_1D, GL_TEXTURE_2D or GL_TEXTURE_3D
  112 + /// may work with other gl texture types, but untested.
  113 + ///@param DIM_Y, the number of samples in the template.
  114 + extern "C"
  115 + stim::vec<int> get_cost(GLint texbufferID, GLenum texType, int DIM_Y)
  116 + {
  117 +
  118 + //Bind the Texture in GL and allow access to cuda.
  119 + t.MapCudaTexture(texbufferID, texType);
  120 +
  121 + //initialize the return arrays.
  122 + float* output;
  123 + output = (float* ) malloc(DIM_Y*sizeof(float));
  124 +
  125 + stim::vec<int> ret(0, 0);
  126 + initArray(DIM_Y);
  127 +
  128 +
  129 + //variables for finding the min.
  130 + float mini = 10000000000000000.0;
  131 + int idx = 0;
  132 +
  133 + //cuda launch variables.
  134 + dim3 numBlocks(1, DIM_Y);
  135 + dim3 threadsPerBlock(16, 8);
  136 +
  137 +
  138 + get_diff <<< numBlocks, threadsPerBlock >>> (t.getTexture(), result);
  139 +
  140 + HANDLE_ERROR(
  141 + cudaMemcpy(output, result, DIM_Y*sizeof(float), cudaMemcpyDeviceToHost)
  142 + );
  143 +
  144 + for( int i = 0; i<DIM_Y; i++){
  145 + if(output[i] < mini){
  146 + mini = output[i];
  147 + idx = i;
  148 + }
  149 + }
  150 +
  151 +// stringstream name; //for debugging
  152 +// name << "Test.bmp";
  153 +// stim::gpu2image<float>(print, name.str(),16,218,0,1);
  154 +
  155 + t.UnmapCudaTexture();
  156 + cleanUP();
  157 + ret[0] = idx; ret[1] = (int) output[idx];
  158 + std::cout << output[idx] << std::endl;
  159 +
  160 + free(output);
  161 + return ret;
  162 + }
  163 +
  164 + }
  165 +}
  166 +
  167 +
  168 +#endif
stim/cuda/templates/conv2sep.cuh
@@ -30,7 +30,7 @@ namespace stim{ @@ -30,7 +30,7 @@ namespace stim{
30 int byi = blockIdx.y; 30 int byi = blockIdx.y;
31 31
32 //copy the portion of the image necessary for this block to shared memory 32 //copy the portion of the image necessary for this block to shared memory
33 - stim::cuda::sharedMemcpy_tex2D(s, in, bxi - kr, byi, 2 * kr + blockDim.x, 1, threadIdx, blockDim); 33 + stim::cuda::sharedMemcpy_tex2D<float, unsigned char>(s, in, bxi - kr, byi, 2 * kr + blockDim.x, 1, threadIdx, blockDim);
34 34
35 //calculate the thread index 35 //calculate the thread index
36 int ti = threadIdx.x; 36 int ti = threadIdx.x;
@@ -88,7 +88,7 @@ namespace stim{ @@ -88,7 +88,7 @@ namespace stim{
88 int byi = blockIdx.y * blockDim.y; 88 int byi = blockIdx.y * blockDim.y;
89 89
90 //copy the portion of the image necessary for this block to shared memory 90 //copy the portion of the image necessary for this block to shared memory
91 - stim::cuda::sharedMemcpy_tex2D(s, in, bxi, byi - kr, 1, 2 * kr + blockDim.y, threadIdx, blockDim); 91 + stim::cuda::sharedMemcpy_tex2D<float, unsigned char>(s, in, bxi, byi - kr, 1, 2 * kr + blockDim.y, threadIdx, blockDim);
92 92
93 //calculate the thread index 93 //calculate the thread index
94 int ti = threadIdx.y; 94 int ti = threadIdx.y;
@@ -257,4 +257,4 @@ namespace stim{ @@ -257,4 +257,4 @@ namespace stim{
257 }; 257 };
258 }; 258 };
259 259
260 -#endif  
261 \ No newline at end of file 260 \ No newline at end of file
  261 +#endif
stim/cuda/templates/gaussian_blur.cuh
@@ -7,7 +7,6 @@ @@ -7,7 +7,6 @@
7 #include <stim/cuda/sharedmem.cuh> 7 #include <stim/cuda/sharedmem.cuh>
8 #include <stim/cuda/templates/conv2sep.cuh> //GPU-based separable convolution algorithm 8 #include <stim/cuda/templates/conv2sep.cuh> //GPU-based separable convolution algorithm
9 9
10 -#define pi 3.14159  
11 10
12 namespace stim{ 11 namespace stim{
13 namespace cuda{ 12 namespace cuda{
@@ -37,6 +36,7 @@ namespace stim{ @@ -37,6 +36,7 @@ namespace stim{
37 36
38 //copy the kernel to the GPU 37 //copy the kernel to the GPU
39 T* gpuKernel0; 38 T* gpuKernel0;
  39 + HANDLE_ERROR(cudaMalloc(&gpuKernel0, kwidth*sizeof(T)));
40 HANDLE_ERROR(cudaMemcpy(gpuKernel0, kernel0, kwidth * sizeof(T), cudaMemcpyHostToDevice)); 40 HANDLE_ERROR(cudaMemcpy(gpuKernel0, kernel0, kwidth * sizeof(T), cudaMemcpyHostToDevice));
41 41
42 //perform the gaussian blur as a separable convolution 42 //perform the gaussian blur as a separable convolution
@@ -58,7 +58,6 @@ namespace stim{ @@ -58,7 +58,6 @@ namespace stim{
58 58
59 //copy the kernel to the GPU 59 //copy the kernel to the GPU
60 T* gpuKernel0; 60 T* gpuKernel0;
61 - HANDLE_ERROR(cudaMalloc(&gpuKernel0, kwidth * sizeof(T)));  
62 HANDLE_ERROR(cudaMemcpy(gpuKernel0, kernel0, kwidth * sizeof(T), cudaMemcpyHostToDevice)); 61 HANDLE_ERROR(cudaMemcpy(gpuKernel0, kernel0, kwidth * sizeof(T), cudaMemcpyHostToDevice));
63 62
64 //perform the gaussian blur as a separable convolution 63 //perform the gaussian blur as a separable convolution
@@ -87,4 +86,4 @@ namespace stim{ @@ -87,4 +86,4 @@ namespace stim{
87 }; 86 };
88 }; 87 };
89 88
90 -#endif  
91 \ No newline at end of file 89 \ No newline at end of file
  90 +#endif
stim/cuda/testKernel.cuh 0 → 100644
  1 +#include <assert.h>
  2 +#include <cuda.h>
  3 +#include <cuda_runtime.h>
  4 +#include <stdio.h>
  5 +#include <stim/visualization/colormap.h>
  6 +#include <sstream>
  7 +#include <stim/math/vector.h>
  8 +#include <stim/cuda/cudatools/devices.h>
  9 +#include <stim/cuda/cudatools/threads.h>
  10 +#include <stim/cuda/cuda_texture.cuh>
  11 + stim::cuda::cuda_texture tx; //texture object.
  12 + float* print;
  13 +
  14 + ///Initialization function, allocates the memory and passes the necessary
  15 + ///handles from OpenGL and Cuda.
  16 + ///@param DIM_Y --integer controlling how much memory to allocate.
  17 + void initArray()
  18 + {
  19 + cudaMalloc( (void**) &print, 216*16*sizeof(float)); ///temporary
  20 + }
  21 +
  22 + ///Deinit function that frees the memery used and releases the texture resource
  23 + ///back to OpenGL.
  24 + void cleanUP()
  25 + {
  26 + cudaFree(print); ///temporary
  27 + }
  28 +
  29 + ///Find the difference of the given set of samples and the template
  30 + ///using cuda acceleration.
  31 + ///@param stim::cuda::cuda_texture t --stim texture that holds all the references
  32 + /// to the data.
  33 + ///@param float* result --a pointer to the memory that stores the result.
  34 + __global__
  35 + //void get_diff (float *result)
  36 + void get_diff (cudaTextureObject_t texIn, float *print)
  37 + {
  38 + int x = threadIdx.x + blockIdx.x * blockDim.x;
  39 + int y = threadIdx.y + blockIdx.y * blockDim.y;
  40 + int idx = y*16+x;
  41 +
  42 + float valIn = tex2D<unsigned char>(texIn, x, y)/255.0;
  43 +
  44 + print[idx] = abs(valIn); ///temporary
  45 +
  46 + }
  47 +
  48 +
  49 + ///External access-point to the cuda function
  50 + ///@param GLuint texbufferID --GLtexture (most be contained in a framebuffer object)
  51 + /// that holds the data that will be handed to cuda.
  52 + ///@param GLenum texType --either GL_TEXTURE_1D, GL_TEXTURE_2D or GL_TEXTURE_3D
  53 + /// may work with other gl texture types, but untested.
  54 + ///@param DIM_Y, the number of samples in the template.
  55 + extern "C"
  56 + void test(GLint texbufferID, GLenum texType)
  57 + {
  58 +
  59 + //Bind the Texture in GL and allow access to cuda.
  60 + tx.MapCudaTexture(texbufferID, texType);
  61 +
  62 + //initialize the return arrays.
  63 +
  64 + initArray();
  65 +
  66 + int x = 16;
  67 + int y = 27*8;
  68 + int max_threads = stim::maxThreadsPerBlock();
  69 + dim3 threads(max_threads, 1);
  70 + dim3 blocks(x / threads.x +1, y);
  71 + //dim3 numBlocks(1, 1);
  72 + //dim3 threadsPerBlock(16, 216);
  73 + dim3 numBlocks(2, 2);
  74 + dim3 threadsPerBlock(8, 108);
  75 +
  76 +
  77 +// get_diff <<< blocks, threads >>> (tx.getTexture(), print);
  78 + get_diff <<< numBlocks, threadsPerBlock >>> (tx.getTexture(), print);
  79 +
  80 + cudaDeviceSynchronize();
  81 + stringstream name; //for debugging
  82 + name << "FromTex.bmp";
  83 + stim::gpu2image<float>(print, name.str(),16,216,0,1);
  84 +
  85 + tx.UnmapCudaTexture();
  86 + cleanUP();
  87 + }
  88 +
stim/gl/gl_spider.h
@@ -13,50 +13,79 @@ @@ -13,50 +13,79 @@
13 #include "stim/math/vector.h" 13 #include "stim/math/vector.h"
14 #include "stim/math/rect.h" 14 #include "stim/math/rect.h"
15 #include "stim/math/matrix.h" 15 #include "stim/math/matrix.h"
16 -#include "stim/cuda/cost.h" 16 +#include "stim/cuda/spider_cost.cuh"
17 #include <stim/cuda/cudatools/glbind.h> 17 #include <stim/cuda/cudatools/glbind.h>
18 -#include <stim/visualization/obj.h> 18 +#include <stim/cuda/arraymath.cuh>
  19 +#include <stim/cuda/cudatools.h>
  20 +#include <stim/cuda/ivote.cuh>
  21 +#include <stim/visualization/glObj.h>
19 #include <vector> 22 #include <vector>
  23 +#include <stim/cuda/branch_detection.cuh>
  24 +
  25 +//#include <stim/cuda/testKernel.cuh>
20 26
21 #include <iostream> 27 #include <iostream>
22 #include <fstream> 28 #include <fstream>
  29 +#ifdef TESTING
  30 + #include <iostream>
  31 + #include <cstdio>
  32 + #include <ctime>
  33 +#endif
23 34
24 35
25 -  
26 -/* Technically since gl_spider inherits from gl_texture, we could  
27 - call the init with a path to an image stack, and upload  
28 - the images while creating the spider (calling init) */  
29 namespace stim 36 namespace stim
30 { 37 {
31 38
32 template<typename T> 39 template<typename T>
33 -class gl_spider 40 +class gl_spider : public virtual gl_texture<T>
34 { 41 {
35 //doen't use gl_texture really, just needs the GLuint id. 42 //doen't use gl_texture really, just needs the GLuint id.
36 //doesn't even need the texture iD really. 43 //doesn't even need the texture iD really.
37 private: 44 private:
  45 +
  46 + //
38 stim::vec<float> p; //vector designating the position of the spider. 47 stim::vec<float> p; //vector designating the position of the spider.
39 stim::vec<float> d; //vector designating the orientation of the spider 48 stim::vec<float> d; //vector designating the orientation of the spider
40 //always a unit vector. 49 //always a unit vector.
41 stim::vec<float> m; //magnitude of the spider vector. 50 stim::vec<float> m; //magnitude of the spider vector.
42 //mag[0] = length. 51 //mag[0] = length.
43 //mag[1] = width. 52 //mag[1] = width.
44 - std::vector<stim::vec<float> > dV;  
45 - std::vector<stim::vec<float> > pV;  
46 - std::vector<stim::vec<float> > mV;  
47 - //currentTransform  
48 - stim::matrix<float, 4> cT; 53 + std::vector<stim::vec<float> > dV; //A list of all the direction vectors.
  54 + std::vector<stim::vec<float> > pV; //A list of all the position vectors.
  55 + std::vector<stim::vec<float> > mV; //A list of all the size vectors.
  56 +
  57 + stim::matrix<float, 4> cT; //current Transformation matrix
  58 + //From tissue space to texture space.
49 GLuint texID; 59 GLuint texID;
50 - stim::vec<float> S;  
51 - stim::vec<float> R;  
52 - cudaGraphicsResource_t resource;  
53 -  
54 - GLuint dList;  
55 - GLuint fboID;  
56 - GLuint texbufferID;  
57 - int numSamples;  
58 - float stepsize = 3.0; 60 + stim::vec<float> S; //Size of a voxel in the volume.
  61 + stim::vec<float> R; //Dimensions of the volume.
  62 +
  63 +
  64 + //GL and Cuda variables
  65 + GLuint dList; //displaylist ID
  66 + GLuint fboID; //framebuffer ID
  67 + GLuint texbufferID; //texbuffer ID, only necessary for
  68 + //cuda aspect of the calculation.
  69 + GLuint bfboId;
  70 + GLuint btexbufferID;
  71 +
  72 + int numSamples; //The number of templates in the buffer.
  73 + float stepsize = 6.0; //Step size.
59 int current_cost; 74 int current_cost;
  75 +
  76 +
  77 + //Tracing variables.
  78 + std::stack< stim::vec<float> > seeds; //Variables for tracing
  79 + std::stack< stim::vec<float> > seedsvecs;
  80 + std::vector< stim::vec<float> > cL; //Line currently being traced.
  81 + stim::glObj<float> sk;
  82 + stim::vec<float> rev; //reverse vector;
  83 + stim::camera camSel;
  84 + stim::vec<float> ps;
  85 + stim::vec<float> ups;
  86 + stim::vec<float> ds;
  87 +
  88 +
60 89
61 /// Method for finding the best scale for the spider. 90 /// Method for finding the best scale for the spider.
62 /// changes the x, y, z size of the spider to minimize the cost 91 /// changes the x, y, z size of the spider to minimize the cost
@@ -73,7 +102,6 @@ class gl_spider @@ -73,7 +102,6 @@ class gl_spider
73 dV[best][2]*S[2]*R[2], 102 dV[best][2]*S[2]*R[2],
74 0); 103 0);
75 next = (cT*next).norm(); 104 next = (cT*next).norm();
76 - //next = (cT*next);  
77 setPosition( p[0]+next[0]*m[0]/stepsize, 105 setPosition( p[0]+next[0]*m[0]/stepsize,
78 p[1]+next[1]*m[0]/stepsize, 106 p[1]+next[1]*m[0]/stepsize,
79 p[2]+next[2]*m[0]/stepsize); 107 p[2]+next[2]*m[0]/stepsize);
@@ -117,11 +145,46 @@ class gl_spider @@ -117,11 +145,46 @@ class gl_spider
117 void 145 void
118 branchDetection() 146 branchDetection()
119 { 147 {
120 - Bind();  
121 setMatrix(); 148 setMatrix();
122 glCallList(dList+3); 149 glCallList(dList+3);
123 -  
124 - // int best = getCost(); 150 + // test(btexbufferID, GL_TEXTURE_2D, 8*27);
  151 + std::vector< stim::vec<float> > result = find_branch(
  152 + btexbufferID, GL_TEXTURE_2D, 16, 216);
  153 + if(!result.empty())
  154 + {
  155 + for(int i = 1; i < result.size(); i++)
  156 + {
  157 + std::cout << result[i] << std::endl;
  158 +// std::cout <<"["<< result[i][0]*16.0 << ", "
  159 +// << result[i][1]*16.0 << ", " <<
  160 +// result[i][2]*216.0 <<"]" << std::endl;
  161 +
  162 + stim::vec<float> cylp(
  163 + 0.5 * cos(2*M_PI*(result[i][1])),
  164 + 0.5 * sin(2*M_PI*(result[i][1])),
  165 + result[i][0]-0.5,
  166 + 1.0);
  167 + stim::vec<float> cylv(
  168 + 0.5 * cos(2*M_PI*(result[i][1]))*S[0]*R[0],
  169 + 0.5 * sin(2*M_PI*(result[i][1]))*S[1]*R[1],
  170 + (result[i][0]-0.5)*S[2]*R[2],
  171 + 0.0);
  172 + std::cout << cylp << std::endl;
  173 + cylp = cT*cylp;
  174 + cylv = cT*cylv;
  175 +
  176 + std::cout << cylp[0]*S[0]*R[0] << ", "
  177 + << cylp[1]*S[1]*R[1] << ", " <<
  178 + cylp[2]*S[2]*R[2] << std::endl;
  179 + setSeed(cylp[0]*S[0]*R[0],
  180 + cylp[1]*S[1]*R[1],
  181 + cylp[2]*S[2]*R[2]);
  182 + cylv.norm();
  183 + setSeedVec(cylv[0],
  184 + cylv[1],
  185 + cylv[2]);
  186 + }
  187 + }
125 188
126 } 189 }
127 190
@@ -365,6 +428,30 @@ class gl_spider @@ -365,6 +428,30 @@ class gl_spider
365 ///@param height sets the height of the buffer. 428 ///@param height sets the height of the buffer.
366 ///Function for setting up the 2D buffer that stores the samples. 429 ///Function for setting up the 2D buffer that stores the samples.
367 void 430 void
  431 + GenerateFBO(unsigned int width, unsigned int height, GLuint &textureID, GLuint &framebufferID)
  432 + {
  433 + glGenFramebuffers(1, &framebufferID);
  434 + glBindFramebuffer(GL_FRAMEBUFFER, framebufferID);
  435 + int numChannels = 1;
  436 + unsigned char* texels = new unsigned char[width * height * numChannels];
  437 + glGenTextures(1, &textureID);
  438 + glBindTexture(GL_TEXTURE_2D, textureID);
  439 + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_REPEAT);
  440 + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_REPEAT);
  441 + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR);
  442 + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_LINEAR);
  443 + glTexImage2D(GL_TEXTURE_2D, 0, GL_LUMINANCE,
  444 + width, height, 0, GL_LUMINANCE, GL_UNSIGNED_BYTE, texels);
  445 + delete[] texels;
  446 + glBindFramebuffer(GL_FRAMEBUFFER, 0);
  447 + glBindTexture(GL_TEXTURE_2D, 0);
  448 + CHECK_OPENGL_ERROR
  449 + }
  450 +
  451 + ///@param width sets the width of the buffer.
  452 + ///@param height sets the height of the buffer.
  453 + ///Function for setting up the 2D buffer that stores the samples.
  454 + void
368 GenerateFBO(unsigned int width, unsigned int height) 455 GenerateFBO(unsigned int width, unsigned int height)
369 { 456 {
370 glGenFramebuffers(1, &fboID); 457 glGenFramebuffers(1, &fboID);
@@ -382,6 +469,7 @@ class gl_spider @@ -382,6 +469,7 @@ class gl_spider
382 delete[] texels; 469 delete[] texels;
383 glBindFramebuffer(GL_FRAMEBUFFER, 0); 470 glBindFramebuffer(GL_FRAMEBUFFER, 0);
384 glBindTexture(GL_TEXTURE_2D, 0); 471 glBindTexture(GL_TEXTURE_2D, 0);
  472 + CHECK_OPENGL_ERROR
385 } 473 }
386 474
387 475
@@ -409,12 +497,99 @@ class gl_spider @@ -409,12 +497,99 @@ class gl_spider
409 497
410 glGetFloatv(GL_TEXTURE_MATRIX, curTrans); 498 glGetFloatv(GL_TEXTURE_MATRIX, curTrans);
411 cT.set(curTrans); 499 cT.set(curTrans);
412 -// printTransform(); 500 + // printTransform();
413 501
414 CHECK_OPENGL_ERROR 502 CHECK_OPENGL_ERROR
415 glMatrixMode(GL_MODELVIEW); 503 glMatrixMode(GL_MODELVIEW);
416 } 504 }
  505 +
  506 + ///Method for controling the buffer and texture binding in order to properly
  507 + ///do the render to texture.
  508 + void
  509 + Bind()
  510 + {
  511 + float len = 8.0;
  512 + glBindFramebuffer(GL_FRAMEBUFFER, fboID);//set up GL buffer
  513 + glFramebufferTexture2D(
  514 + GL_FRAMEBUFFER,
  515 + GL_COLOR_ATTACHMENT0,
  516 + GL_TEXTURE_2D,
  517 + texbufferID,
  518 + 0);
  519 + glBindFramebuffer(GL_FRAMEBUFFER, fboID);
  520 + GLenum DrawBuffers[1] = {GL_COLOR_ATTACHMENT0};
  521 + glDrawBuffers(1, DrawBuffers);
  522 + glBindTexture(GL_TEXTURE_2D, texbufferID);
  523 + glClearColor(1,1,1,1);
  524 + glClear(GL_COLOR_BUFFER_BIT);
  525 + glMatrixMode(GL_PROJECTION);
  526 + glLoadIdentity();
  527 + glMatrixMode(GL_MODELVIEW);
  528 + glLoadIdentity();
  529 + glViewport(0,0,2.0*len, numSamples*len);
  530 + gluOrtho2D(0.0,2.0*len,0.0,numSamples*len);
  531 + glEnable(GL_TEXTURE_3D);
  532 + glBindTexture(GL_TEXTURE_3D, texID);
  533 +
  534 + CHECK_OPENGL_ERROR
  535 + }
  536 +
  537 + ///Method for controling the buffer and texture binding in order to properly
  538 + ///do the render to texture.
  539 + ///@param GLuint tbID
  540 + void
  541 + Bind(GLuint &textureID, GLuint &framebufferID, int nSamples)
  542 + {
  543 + float len = 8.0;
  544 + glBindFramebuffer(GL_FRAMEBUFFER, framebufferID);//set up GL buffer
  545 + CHECK_OPENGL_ERROR
  546 +
  547 + glFramebufferTexture2D(
  548 + GL_FRAMEBUFFER,
  549 + GL_COLOR_ATTACHMENT0,
  550 + GL_TEXTURE_2D,
  551 + textureID,
  552 + 0);
  553 + CHECK_OPENGL_ERROR
  554 +
  555 + glBindFramebuffer(GL_FRAMEBUFFER, framebufferID);
  556 + CHECK_OPENGL_ERROR
  557 +
  558 + GLenum DrawBuffers[1] = {GL_COLOR_ATTACHMENT0};
  559 + glDrawBuffers(1, DrawBuffers);
  560 + CHECK_OPENGL_ERROR
  561 +
  562 + glBindTexture(GL_TEXTURE_2D, textureID);
  563 + CHECK_OPENGL_ERROR
  564 +
  565 + glClearColor(1,1,1,1);
  566 + glClear(GL_COLOR_BUFFER_BIT);
  567 + glMatrixMode(GL_PROJECTION);
  568 + glLoadIdentity();
  569 + glMatrixMode(GL_MODELVIEW);
  570 + glLoadIdentity();
  571 + glViewport(0,0,2.0*len, nSamples*len);
  572 + gluOrtho2D(0.0,2.0*len,0.0,nSamples*len);
  573 + glEnable(GL_TEXTURE_3D);
  574 + glBindTexture(GL_TEXTURE_3D, texID);
  575 +
  576 + CHECK_OPENGL_ERROR
  577 + }
417 578
  579 + ///Method for Unbinding all of the texture resources
  580 + void
  581 + Unbind()
  582 + {
  583 + //Finalize GL_buffer
  584 + glBindTexture(GL_TEXTURE_3D, 0);
  585 + CHECK_OPENGL_ERROR
  586 + glBindTexture(GL_TEXTURE_2D, 0);
  587 + CHECK_OPENGL_ERROR
  588 + glBindFramebuffer(GL_FRAMEBUFFER, 0);
  589 + CHECK_OPENGL_ERROR
  590 + glDisable(GL_TEXTURE_3D);
  591 + CHECK_OPENGL_ERROR
  592 + }
418 593
419 594
420 595
@@ -422,40 +597,22 @@ class gl_spider @@ -422,40 +597,22 @@ class gl_spider
422 //--------------------------------CUDA METHODS------------------------------// 597 //--------------------------------CUDA METHODS------------------------------//
423 //--------------------------------------------------------------------------// 598 //--------------------------------------------------------------------------//
424 599
425 - /// Method for registering the texture with Cuda for shared  
426 - /// access.  
427 - void  
428 - createResource()  
429 - {  
430 - HANDLE_ERROR(  
431 - cudaGraphicsGLRegisterImage(  
432 - &resource,  
433 - texbufferID,  
434 - GL_TEXTURE_2D,  
435 - //CU_GRAPHICS_REGISTER_FLAGS_NONE)  
436 - cudaGraphicsMapFlagsReadOnly)  
437 - );  
438 - }  
439 -  
440 - ///Method for freeing the texture from Cuda for gl access.  
441 - void  
442 - destroyResource()  
443 - {  
444 - HANDLE_ERROR(  
445 - cudaGraphicsUnregisterResource(resource)  
446 - );  
447 - }  
448 600
449 ///Entry-point into the cuda code for calculating the cost 601 ///Entry-point into the cuda code for calculating the cost
450 /// of a given samples array (in texture form) 602 /// of a given samples array (in texture form)
451 int 603 int
452 getCost() 604 getCost()
453 { 605 {
454 - createResource();  
455 - stim::vec<int> cost = get_cost(resource, numSamples);  
456 - destroyResource();  
457 -// if (cost[1] >= 80)  
458 -// exit(0); 606 + #ifdef TESTING
  607 + start = std::clock();
  608 + #endif
  609 + stim::vec<int> cost =
  610 + stim::cuda::get_cost(texbufferID, GL_TEXTURE_2D, numSamples);
  611 + #ifdef TESTING
  612 + duration_cuda = duration_cuda +
  613 + (std::clock() - start) / (double) CLOCKS_PER_SEC;
  614 + num_cuda = num_cuda + 1.0;
  615 + #endif
459 current_cost = cost[1]; 616 current_cost = cost[1];
460 return cost[0]; 617 return cost[0];
461 } 618 }
@@ -464,6 +621,15 @@ class gl_spider @@ -464,6 +621,15 @@ class gl_spider
464 stim::rect<float> hor; 621 stim::rect<float> hor;
465 stim::rect<float> ver; 622 stim::rect<float> ver;
466 623
  624 + //Testing and Timing variables.
  625 + #ifdef TESTING
  626 + std::clock_t start;
  627 + double duration_sampling = 0.0;
  628 + double duration_cuda = 0.0;
  629 + double num_sampling = 0.0;
  630 + double num_cuda = 0.0;
  631 + #endif
  632 +
467 //--------------------------------------------------------------------------// 633 //--------------------------------------------------------------------------//
468 //-----------------------------CONSTRUCTORS---------------------------------// 634 //-----------------------------CONSTRUCTORS---------------------------------//
469 //--------------------------------------------------------------------------// 635 //--------------------------------------------------------------------------//
@@ -508,6 +674,8 @@ class gl_spider @@ -508,6 +674,8 @@ class gl_spider
508 Unbind(); 674 Unbind();
509 glDeleteTextures(1, &texbufferID); 675 glDeleteTextures(1, &texbufferID);
510 glDeleteBuffers(1, &fboID); 676 glDeleteBuffers(1, &fboID);
  677 + glDeleteTextures(1, &btexbufferID);
  678 + glDeleteBuffers(1, &bfboId);
511 } 679 }
512 680
513 ///@param GLuint id texture that is going to be sampled. 681 ///@param GLuint id texture that is going to be sampled.
@@ -519,6 +687,7 @@ class gl_spider @@ -519,6 +687,7 @@ class gl_spider
519 { 687 {
520 texID = id; 688 texID = id;
521 GenerateFBO(16, numSamples*8); 689 GenerateFBO(16, numSamples*8);
  690 + GenerateFBO(16, 216, btexbufferID, bfboId);
522 setDims(0.6, 0.6, 1.0); 691 setDims(0.6, 0.6, 1.0);
523 setSize(512.0, 512.0, 426.0); 692 setSize(512.0, 512.0, 426.0);
524 setMatrix(); 693 setMatrix();
@@ -528,6 +697,9 @@ class gl_spider @@ -528,6 +697,9 @@ class gl_spider
528 genDirectionVectors(5*M_PI/4); 697 genDirectionVectors(5*M_PI/4);
529 genPositionVectors(); 698 genPositionVectors();
530 genMagnitudeVectors(); 699 genMagnitudeVectors();
  700 + Unbind();
  701 + ///temporarily changed to 216
  702 + Bind(btexbufferID, bfboId, 27);
531 DrawCylinder(); 703 DrawCylinder();
532 Unbind(); 704 Unbind();
533 } 705 }
@@ -612,7 +784,6 @@ class gl_spider @@ -612,7 +784,6 @@ class gl_spider
612 { 784 {
613 m[0] = mag; 785 m[0] = mag;
614 m[1] = mag; 786 m[1] = mag;
615 - // m[2] = mag;  
616 } 787 }
617 788
618 789
@@ -655,56 +826,104 @@ class gl_spider @@ -655,56 +826,104 @@ class gl_spider
655 } 826 }
656 return out; 827 return out;
657 } 828 }
658 -  
659 - ///Function to get back the framebuffer Object attached to the spider.  
660 - ///For external access.  
661 - GLuint  
662 - getFB() 829 +
  830 + ///@param pos, the position of the seed to be added.
  831 + ///Adds a seed to the seed list.
  832 + ///Assumes that the coordinates passes are in tissue space.
  833 + void
  834 + setSeed(stim::vec<float> pos)
663 { 835 {
664 - return fboID; 836 + seeds.push(pos);
665 } 837 }
666 838
667 - ///Method for controling the buffer and texture binding in order to properly  
668 - ///do the render to texture.  
669 void 839 void
670 - Bind() 840 + setSeedVec(stim::vec<float> pos)
671 { 841 {
672 - float len = 8.0;  
673 - glBindFramebuffer(GL_FRAMEBUFFER, fboID);//set up GL buffer  
674 - glFramebufferTexture2D(  
675 - GL_FRAMEBUFFER,  
676 - GL_COLOR_ATTACHMENT0,  
677 - GL_TEXTURE_2D,  
678 - texbufferID,  
679 - 0);  
680 - glBindFramebuffer(GL_FRAMEBUFFER, fboID);  
681 - GLenum DrawBuffers[1] = {GL_COLOR_ATTACHMENT0};  
682 - glDrawBuffers(1, DrawBuffers);  
683 - glBindTexture(GL_TEXTURE_2D, texbufferID);  
684 - glClearColor(1,1,1,1);  
685 - glClear(GL_COLOR_BUFFER_BIT);  
686 - glMatrixMode(GL_PROJECTION);  
687 - glLoadIdentity();  
688 - glMatrixMode(GL_MODELVIEW);