Commit 84eff8b1dce372c5bf4eba569dcf26cc8b575936

Authored by Pavel Govyadinov
1 parent bf731970

Merged only the necessary parts of Branch detection into master

stim/cuda/arraymath/array_cart2polar.cuh
... ... @@ -4,7 +4,7 @@
4 4 namespace stim{
5 5 namespace cuda{
6 6 template<typename T>
7   - __global__ void cuda_cart2polar(T* a, int x, int y){
  7 + __global__ void cuda_cart2polar(T* a, int x, int y, float rotation){
8 8  
9 9  
10 10 // calculate the 2D coordinates for this current thread.
... ... @@ -20,14 +20,14 @@ namespace stim{
20 20 float yl = a[i * 2 + 1];
21 21 float theta = atan2( yl, xl ) ;
22 22 float r = sqrt(xl * xl + yl * yl);
23   - a[i * 2 + 0] = theta;
  23 + a[i * 2 + 0] = theta + rotation;
24 24 a[i * 2 + 1] = r;
25 25  
26 26 }
27 27  
28 28  
29 29 template<typename T>
30   - void gpu_cart2polar(T* gpuGrad, unsigned int x, unsigned int y){
  30 + void gpu_cart2polar(T* gpuGrad, unsigned int x, unsigned int y, float rotation = 0){
31 31  
32 32 unsigned int max_threads = stim::maxThreadsPerBlock();
33 33 dim3 threads(max_threads, 1);
... ... @@ -35,7 +35,7 @@ namespace stim{
35 35  
36 36  
37 37 //call the kernel to do the multiplication
38   - cuda_cart2polar <<< blocks, threads >>>(gpuGrad, x, y);
  38 + cuda_cart2polar <<< blocks, threads >>>(gpuGrad, x, y, rotation);
39 39  
40 40 }
41 41  
... ... @@ -68,4 +68,4 @@ namespace stim{
68 68 }
69 69 }
70 70  
71   -#endif
72 71 \ No newline at end of file
  72 +#endif
... ...
stim/cuda/branch_detection.cuh 0 → 100644
  1 +#include <iostream>
  2 +#include <fstream>
  3 +#include <cuda_runtime.h>
  4 +#include <stim/math/vector.h>
  5 +//#include <math.h>
  6 +#include <stim/visualization/colormap.h>
  7 +#include <stim/cuda/cuda_texture.cuh>
  8 +#include <stim/cuda/templates/gradient.cuh>
  9 +#include <stim/cuda/templates/gaussian_blur.cuh>
  10 +#include <stim/cuda/arraymath.cuh>
  11 +#include <stim/cuda/ivote.cuh>
  12 +#include <stim/cuda/testKernel.cuh>
  13 +typedef unsigned int uint;
  14 +typedef unsigned int uchar;
  15 +
  16 +stim::cuda::cuda_texture t;
  17 +float* gpuTable;
  18 +float* gpuGrad;
  19 +float* gpuVote;
  20 +float* gpuI;
  21 +float* gpuCenters;
  22 +
  23 +void atan_2d(float* cpuTable, unsigned int rmax)
  24 +{
  25 + //initialize the width and height of the window which atan2 are computed in.
  26 + int xsize = 2*rmax +1;
  27 + int ysize = 2*rmax +1;
  28 +
  29 + // assign the center coordinates of the atan2 window to yi and xi
  30 + int yi = rmax;
  31 + int xi = rmax;
  32 +
  33 +
  34 + for (int xt = 0; xt < xsize; xt++){
  35 +
  36 + for(int yt = 0; yt < ysize; yt++){
  37 +
  38 + //convert the current 2D coordinates to 1D
  39 + int id = yt * xsize + xt;
  40 + // calculate the distance between the pixel and the center of the atan2 window
  41 + float xd = xi - xt;
  42 + float yd = yi - yt;
  43 +
  44 + // calculate the angle between the pixel and the center of the atan2 window and store the result.
  45 + float atan_2d_vote = atan2(yd, xd);
  46 + cpuTable[id] = atan_2d_vote;
  47 + }
  48 + }
  49 +
  50 +}
  51 +
  52 +void initCuda(unsigned int bytes_table, unsigned int bytes_ds)
  53 +{
  54 + HANDLE_ERROR(
  55 + cudaMalloc((void**) &gpuTable, bytes_table)
  56 + );
  57 + HANDLE_ERROR(
  58 + cudaMalloc((void**) &gpuI, bytes_ds)
  59 + );
  60 + HANDLE_ERROR(
  61 + cudaMalloc((void**) &gpuGrad, bytes_ds*2)
  62 + );
  63 + HANDLE_ERROR(
  64 + cudaMalloc((void**) &gpuVote, bytes_ds)
  65 + );
  66 + HANDLE_ERROR(
  67 + cudaMalloc((void**) &gpuCenters, bytes_ds)
  68 + );
  69 +}
  70 +
  71 +void cleanCuda()
  72 +{
  73 + HANDLE_ERROR(
  74 + cudaFree(gpuTable)
  75 + );
  76 + HANDLE_ERROR(
  77 + cudaFree(gpuGrad)
  78 + );
  79 + HANDLE_ERROR(
  80 + cudaFree(gpuVote)
  81 + );
  82 + HANDLE_ERROR(
  83 + cudaFree(gpuCenters)
  84 + );
  85 + HANDLE_ERROR(
  86 + cudaFree(gpuI)
  87 + );
  88 +}
  89 +
  90 +std::vector< stim::vec<float> >
  91 +find_branch(GLint texbufferID, GLenum texType, unsigned int x, unsigned int y)
  92 +{
  93 + float phi = 15.1*M_PI/180;
  94 + int iter = 5;
  95 + float dphi = phi/iter;
  96 + float rmax = 10;
  97 + float sigma = 4;
  98 + unsigned int pixels = x * y;
  99 + unsigned int bytes = sizeof(float) * pixels;
  100 + unsigned int bytes_table = sizeof(float) * (2*rmax + 1) * (2*rmax + 1);
  101 + unsigned int x_ds = (x + (x % 1 == 0 ? 0:1));
  102 + unsigned int y_ds = (y + (x % 1 == 0 ? 0:1));
  103 + unsigned int bytes_ds = sizeof(float) * x_ds * y_ds;
  104 + unsigned int conn = 5;
  105 + float final_t = 200.0;
  106 + float* cpuTable = (float*) malloc(bytes_table);
  107 + float* cpuCenters = (float*) malloc(bytes_ds);
  108 +
  109 + stringstream name;
  110 +
  111 +
  112 +
  113 +
  114 + std::vector<stim::vec<float> > output;
  115 + initCuda(bytes_table, bytes_ds);
  116 +
  117 + atan_2d(cpuTable, rmax);
  118 + cudaMemcpy(gpuTable, cpuTable, bytes_table, cudaMemcpyHostToDevice);
  119 +
  120 + test(texbufferID, texType);
  121 +
  122 + t.MapCudaTexture(texbufferID, texType);
  123 + cudaDeviceSynchronize();
  124 + stim::cuda::tex_gaussian_blur2<float>(
  125 + gpuI, sigma, x, y, t.getTexture(), t.getArray()
  126 + );
  127 + cudaDeviceSynchronize();
  128 + stim::gpu2image<float>(gpuI, "Blur.jpg", 16, 8*27, 0, 255);
  129 +
  130 +
  131 + stim::cuda::gpu_gradient_2d<float>(
  132 + gpuGrad, gpuI, x, y
  133 + );
  134 + cudaDeviceSynchronize();
  135 +// stim::gpu2image<float>(gpuGrad, "Grad.jpg", 16, 8*27, 0, 1);
  136 +
  137 + stim::cuda::gpu_cart2polar<float>(gpuGrad, x, y, M_PI);
  138 + cudaDeviceSynchronize();
  139 +// stim::gpu2image<float>(gpuGrad, "Cart_2_polar.jpg", 16, 8*27, 0, 1);
  140 +
  141 + cudaDeviceSynchronize();
  142 + for (int i = 0; i < iter; i++)
  143 + {
  144 + stim::cuda::gpu_vote<float>(gpuVote, gpuGrad, gpuTable, phi, rmax, x, y);
  145 + name << "Vote" << i << ".bmp";
  146 + stim::gpu2image<float>(gpuVote, name.str(), 16, 8*27, 0, 255);
  147 + name.str("");
  148 + name.clear();
  149 + cudaDeviceSynchronize();
  150 +// std::cout << "got here7_a_" << i << std::endl;
  151 + stim::cuda::gpu_update_dir<float>(gpuVote, gpuGrad, gpuTable, phi, rmax, x, y);
  152 + cudaDeviceSynchronize();
  153 +// std::cout << "got here7_b_" << i << std::endl;
  154 + phi = phi - dphi;
  155 + }
  156 +
  157 + cudaDeviceSynchronize();
  158 + stim::cuda::gpu_local_max<float>(gpuCenters, gpuVote, final_t, conn, x, y);
  159 +// std::cout << "got here_sdkfj" << std::endl;
  160 + cudaMemcpy(cpuCenters, gpuCenters, bytes_ds, cudaMemcpyDeviceToHost);
  161 + stim::gpu2image<float>(gpuCenters, "Centers.jpg", 16, 8*27, 0, 255);
  162 + for(int i = 0; i < pixels; i++)
  163 + {
  164 + int ix = (i % x);
  165 + int iy = (i / x);
  166 +// std::cout << i << " : " << ix <<" : " << iy << std::endl;
  167 + if((cpuCenters[i] == 1) && (ix > 4) && (ix < x-4))
  168 + {
  169 + std::cout << ix << " : " << iy << std::endl;
  170 +
  171 + float x_v = (float) ix;
  172 + float y_v = (float) iy;
  173 + output.push_back(stim::vec<float>((x_v/(float)x),
  174 + (y_v/(float)y), 0.0));
  175 +
  176 + std::cout << x_v/16.0 << " : " << y_v/216.0 << std::endl;
  177 + }
  178 + }
  179 +
  180 +
  181 + t.UnmapCudaTexture();
  182 + cleanCuda();
  183 + free(cpuTable);
  184 + free(cpuCenters);
  185 + return output;
  186 +}
... ...
stim/cuda/branch_detection2.cuh 0 → 100644
  1 +#include <stim/cuda/templates/gaussian_blur.cuh>
  2 +#include <stim/cuda/templates/gradient.cuh>
  3 +#include <stim/cuda/arraymath.cuh>
  4 +#include <stim/cuda/ivote.cuh>
  5 +
  6 +
  7 +
  8 +
  9 +
  10 +
  11 +
  12 +
  13 +
  14 +
  15 +void atan_2(float* cpuTable, unsigned int rmax){
  16 +
  17 + //initialize the width and height of the window which atan2 are computed in.
  18 + int xsize = 2*rmax +1;
  19 + int ysize = 2*rmax +1;
  20 +
  21 + // assign the center coordinates of the atan2 window to yi and xi
  22 + int yi = rmax;
  23 + int xi = rmax;
  24 +
  25 +
  26 + for (int xt = 0; xt < xsize; xt++){
  27 +
  28 + for(int yt = 0; yt < ysize; yt++){
  29 +
  30 + //convert the current 2D coordinates to 1D
  31 + int id = yt * xsize + xt;
  32 + // calculate the distance between the pixel and the center of the atan2 window
  33 + float xd = xi - xt;
  34 + float yd = yi - yt;
  35 +
  36 + // calculate the angle between the pixel and the center of the atan2 window and store the result.
  37 + float atan_2d_vote = atan2(yd, xd);
  38 + cpuTable[id] = atan_2d_vote;
  39 + }
  40 + }
  41 +
  42 +}
  43 +std::vector<stim::vec<float> >
  44 +find_branch(GLint texbufferID, GLenum texType, unsigned int x, unsigned int y)
  45 +{
  46 +
  47 + float* cpuTable = (float
  48 +
  49 + unsigned int pixels = x * y;
  50 + unsigned int bytes = sizeof(float) * pixels;
  51 +
  52 + //calculate the number of bytes in the atan2 table
  53 +
  54 + unsigned int bytes_table = (2*rmax+1) * (2*rmax+1) * sizeof(float);
  55 +
  56 +
  57 +
  58 + //allocate space on the GPU for the atan2 table
  59 +
  60 + float* gpuTable;
  61 +
  62 + cudaMalloc(&gpuTable, bytes_table);
  63 +
  64 +
  65 +
  66 + cudaMemcpy(gpuTable, cpuTable, bytes_table, cudaMemcpyHostToDevice);
  67 +
  68 + unsigned int sigma_ds = 1/resize;
  69 + unsigned int x_ds = (x/sigma_ds + (x %sigma_ds == 0 ? 0:1));
  70 + unsigned int y_ds = (y/sigma_ds + (y %sigma_ds == 0 ? 0:1));
  71 + unsigned int bytes_ds = sizeof(float) * x_ds * y_ds;
  72 +
  73 +
  74 + float* gpuI;
  75 + cudaMalloc(&gpuI, bytes_ds);
  76 +
  77 +
  78 + float* gpuGrad;
  79 + cudaMalloc(&gpuGrad, bytes_ds*2);
  80 +
  81 + float* gpuVote;
  82 + cudaMalloc(&gpuVote, bytes_ds);
  83 +
  84 + // allocate space on the GPU for the detected cell centes
  85 +
  86 + float* gpuCenters;
  87 +
  88 + cudaMalloc(&gpuCenters, bytes_ds);
  89 +
  90 +
  91 + stim::cuda::gpu_down_sample<float>(gpuI, gpuI0, resize, x , y);
  92 + cudaMemcpy(cpuResize, gpuI, bytes_ds, cudaMemcpyDeviceToHost);
  93 +
  94 +x = x_ds;
  95 + y = y_ds;
  96 + t = t * resize;
  97 + //sigma = sigma * resize;
  98 +
  99 + cudaDeviceSynchronize();
  100 + stim::cuda::gpu_gaussian_blur2<float>(gpuI,sigma, x, y);
  101 + cudaDeviceSynchronize();
  102 + cudaMemcpy(cpuBlur, gpuI, bytes_ds, cudaMemcpyDeviceToHost);
  103 + cudaDeviceSynchronize();
  104 +
  105 + stim::cuda::gpu_gradient_2d<float>(gpuGrad, gpuI, x, y);
  106 + cudaDeviceSynchronize();
  107 + cudaMemcpy(cpuGradient, gpuGrad, bytes_ds*2, cudaMemcpyDeviceToHost);
  108 +
  109 + stim::cuda::gpu_cart2polar<float>(gpuGrad, x, y);
  110 + cudaDeviceSynchronize();
  111 + cudaMemcpy(cpuCart2Polar, gpuGrad, bytes_ds*2, cudaMemcpyDeviceToHost);
  112 +
  113 +
  114 + //multiply the gradient by a constant and calculate the absolute value (to save an image)
  115 +
  116 + stim::cuda::cpu_multiply<float>(cpuCart2Polar, 40, x * y * 2);
  117 +
  118 + cudaDeviceSynchronize();
  119 +
  120 + stim::cuda::cpu_abs<float>(cpuCart2Polar, x * y * 2);
  121 +
  122 + cudaDeviceSynchronize();
  123 +
  124 +
  125 + for (int i =0; i<iter; i++){
  126 +
  127 + stim::cuda::gpu_vote<float>(gpuVote, gpuGrad, gpuTable, phi, rmax, x, y);
  128 + cudaDeviceSynchronize();
  129 + stim::cuda::gpu_update_dir<float>(gpuVote, gpuGrad, gpuTable, phi, rmax, x, y);
  130 + cudaDeviceSynchronize();
  131 + switch (i){
  132 + case 0 : cudaMemcpy(cpuVote1, gpuVote, bytes_ds, cudaMemcpyDeviceToHost);
  133 + break;
  134 + case 1 : cudaMemcpy(cpuVote2, gpuVote, bytes_ds, cudaMemcpyDeviceToHost);
  135 + break;
  136 + case 2 : cudaMemcpy(cpuVote3, gpuVote, bytes_ds, cudaMemcpyDeviceToHost);
  137 + break;
  138 + case 3 : cudaMemcpy(cpuVote4, gpuVote, bytes_ds, cudaMemcpyDeviceToHost);
  139 + break;
  140 + case 4 : cudaMemcpy(cpuVote5, gpuVote, bytes_ds, cudaMemcpyDeviceToHost);
  141 + break;
  142 + default : cudaMemcpy(cpuVote5, gpuVote, bytes_ds, cudaMemcpyDeviceToHost);
  143 + break;
  144 + }
  145 + phi = phi - dphi;
  146 + }
  147 +
  148 + stim::cuda::gpu_local_max<float>(gpuCenters, gpuVote, t, conn, x, y);
  149 + cudaMemcpy(cpuCenters, gpuCenters, bytes_ds, cudaMemcpyDeviceToHost);
  150 +
  151 +}
... ...
stim/cuda/cuda_texture.cuh 0 → 100644
  1 +#ifndef STIM_CUDA_TEXTURE_H
  2 +#define STIM_CUDA_TEXTURE_H
  3 +
  4 +#include <assert.h>
  5 +#include <stim/cuda/cudatools/error.h>
  6 +#include <cuda.h>
  7 +#include <cuda_runtime.h>
  8 +#include <cublas_v2.h>
  9 +#include <stdio.h>
  10 +#include <GL/glew.h>
  11 +#include <GL/glut.h>
  12 +#include <sstream>
  13 +#include <stim/visualization/colormap.h>
  14 +#include <stim/cuda/cudatools/devices.h>
  15 +#include <stim/cuda/cudatools/threads.h>
  16 +#include <stim/math/vector.h>
  17 +
  18 +///A container for the texture based methods used by the spider class.
  19 +namespace stim
  20 +{
  21 + namespace cuda
  22 + {
  23 + class cuda_texture
  24 + {
  25 + public:
  26 + cudaArray* srcArray;
  27 + cudaGraphicsResource_t resource;
  28 + struct cudaResourceDesc resDesc;
  29 + struct cudaTextureDesc texDesc;
  30 + cudaTextureObject_t tObj;
  31 +
  32 +
  33 + ///basic constructor that creates the texture with default parameters.
  34 + cuda_texture()
  35 + {
  36 + memset(&texDesc, 0, sizeof(texDesc));
  37 + texDesc.addressMode[0] = cudaAddressModeWrap;
  38 + texDesc.addressMode[1] = cudaAddressModeWrap;
  39 + texDesc.filterMode = cudaFilterModePoint;
  40 + texDesc.readMode = cudaReadModeElementType;
  41 + texDesc.normalizedCoords = 0;
  42 + }
  43 +
  44 +//-------------------------------------------------------------------------//
  45 +//-------------------------------CUDA_MAPPING------------------------------//
  46 +//-------------------------------------------------------------------------//
  47 +//Methods for creating the cuda texture.
  48 + ///@param GLuint tex -- GLtexture (must be contained in a frame buffer object)
  49 + /// that holds that data that will be handed to cuda.
  50 + ///@param GLenum target -- either GL_TEXTURE_1D, GL_TEXTURE_2D or GL_TEXTURE_3D
  51 + /// map work with other gl texture types but untested.
  52 + ///Maps the gl texture in cuda memory, binds that data to a cuda array, and binds the cuda
  53 + ///array to a cuda texture.
  54 + void
  55 + MapCudaTexture(GLuint tex, GLenum target)
  56 + {
  57 + HANDLE_ERROR(
  58 + cudaGraphicsGLRegisterImage(
  59 + &resource,
  60 + tex,
  61 + target,
  62 +// cudaGraphicsMapFlagsReadOnly
  63 + cudaGraphicsRegisterFlagsNone
  64 + )
  65 + );
  66 +
  67 + HANDLE_ERROR(
  68 + cudaGraphicsMapResources(1, &resource)
  69 + );
  70 +
  71 + HANDLE_ERROR(
  72 + cudaGraphicsSubResourceGetMappedArray(&srcArray, resource, 0, 0)
  73 + );
  74 +
  75 + memset(&resDesc, 0, sizeof(resDesc));
  76 + resDesc.resType = cudaResourceTypeArray;
  77 + resDesc.res.array.array = srcArray;
  78 + HANDLE_ERROR(
  79 + cudaCreateTextureObject(&tObj, &resDesc, &texDesc, NULL)
  80 + );
  81 + }
  82 +
  83 + ///Unmaps the gl texture, binds that data to a cuda array, and binds the cuda
  84 + ///array to a cuda texture.
  85 + void
  86 + UnmapCudaTexture()
  87 + {
  88 + HANDLE_ERROR(
  89 + cudaGraphicsUnmapResources(1, &resource)
  90 + );
  91 + HANDLE_ERROR(
  92 + cudaGraphicsUnregisterResource(resource)
  93 + );
  94 + HANDLE_ERROR(
  95 + cudaDestroyTextureObject(tObj)
  96 + );
  97 + }
  98 +
  99 +//-------------------------------------------------------------------------//
  100 +//------------------------------GET/SET METHODS----------------------------//
  101 +//-------------------------------------------------------------------------//
  102 +
  103 +///Returns the bound texture object.
  104 + cudaTextureObject_t
  105 + getTexture()
  106 + {
  107 + return tObj;
  108 + }
  109 +
  110 + cudaArray*
  111 + getArray()
  112 + {
  113 + return srcArray;
  114 + }
  115 + };
  116 +}
  117 +}
  118 +
  119 +
  120 +#endif
... ...
stim/cuda/sharedmem.cuh
... ... @@ -34,9 +34,38 @@ namespace stim{
34 34 }
35 35 }
36 36 }
  37 +
  38 + template<typename T, typename D>
  39 + __device__ void sharedMemcpy_tex2D(T* dest, cudaTextureObject_t src,
  40 + unsigned int x, unsigned int y, unsigned int X, unsigned int Y,
  41 + dim3 threadIdx, dim3 blockDim){
  42 +
  43 + //calculate the number of iterations required for the copy
  44 + unsigned int xI, yI;
  45 + xI = X/blockDim.x + 1; //number of iterations along X
  46 + yI = Y/blockDim.y + 1; //number of iterations along Y
  47 +
  48 + //for each iteration
  49 + for(unsigned int xi = 0; xi < xI; xi++){
  50 + for(unsigned int yi = 0; yi < yI; yi++){
  51 +
  52 + //calculate the index into shared memory
  53 + unsigned int sx = xi * blockDim.x + threadIdx.x;
  54 + unsigned int sy = yi * blockDim.y + threadIdx.y;
  55 +
  56 + //calculate the index into the texture
  57 + unsigned int tx = x + sx;
  58 + unsigned int ty = y + sy;
  59 +
  60 + //perform the copy
  61 + if(sx < X && sy < Y)
  62 + dest[sy * X + sx] = tex2D<D>(src, tx, ty);
  63 + }
  64 + }
  65 + }
37 66  
38 67 }
39 68 }
40 69  
41 70  
42   -#endif
43 71 \ No newline at end of file
  72 +#endif
... ...
stim/cuda/spider_cost.cuh 0 → 100644
  1 +#ifndef STIM_SPIDER_COST_H
  2 +#define STIM_SPIDER_COST_H
  3 +
  4 +#include <assert.h>
  5 +#include <cuda.h>
  6 +#include <cuda_runtime.h>
  7 +#include <stdio.h>
  8 +#include <stim/visualization/colormap.h>
  9 +#include <sstream>
  10 +#include <stim/math/vector.h>
  11 +#include <stim/cuda/cudatools/devices.h>
  12 +#include <stim/cuda/cudatools/threads.h>
  13 +#include <stim/cuda/cuda_texture.cuh>
  14 +namespace stim{
  15 + namespace cuda
  16 + {
  17 +
  18 + stim::cuda::cuda_texture t; //texture object.
  19 + float* result;
  20 + float* print;
  21 +
  22 + ///Initialization function, allocates the memory and passes the necessary
  23 + ///handles from OpenGL and Cuda.
  24 + ///@param DIM_Y --integer controlling how much memory to allocate.
  25 + void initArray(int DIM_Y)
  26 + {
  27 +// cudaMalloc( (void**) &print, DIM_Y*16*sizeof(float)); ///temporary
  28 + cudaMalloc( (void**) &result, DIM_Y*sizeof(float));
  29 + }
  30 +
  31 + ///Deinit function that frees the memery used and releases the texture resource
  32 + ///back to OpenGL.
  33 + void cleanUP()
  34 + {
  35 + cudaFree(result);
  36 +// cudaFree(print); ///temporary
  37 + }
  38 +
  39 + ///A virtual representation of a uniform template.
  40 + ///Returns the value of the template pixel.
  41 + ///@param int x --location of a pixel.
  42 + __device__
  43 + float Template(int x)
  44 + {
  45 + if(x < 16/6 || x > 16*5/6 || (x > 16*2/6 && x < 16*4/6)){
  46 + return 1.0;
  47 + }else{
  48 + return 0.0;
  49 + }
  50 +
  51 + }
  52 +
  53 + ///Find the difference of the given set of samples and the template
  54 + ///using cuda acceleration.
  55 + ///@param stim::cuda::cuda_texture t --stim texture that holds all the references
  56 + /// to the data.
  57 + ///@param float* result --a pointer to the memory that stores the result.
  58 + __global__
  59 + //void get_diff (float *result)
  60 + void get_diff (cudaTextureObject_t texIn, float *result)
  61 + {
  62 + __shared__ float shared[16][8];
  63 + int x = threadIdx.x + blockIdx.x * blockDim.x;
  64 + int y = threadIdx.y + blockIdx.y * blockDim.y;
  65 + int x_t = threadIdx.x;
  66 + int y_t = threadIdx.y;
  67 +// int idx = y*16+x;
  68 + int g_idx = blockIdx.y;
  69 +
  70 + float valIn = tex2D<unsigned char>(texIn, x, y)/255.0;
  71 + float valTemp = Template(x);
  72 +
  73 +// print[idx] = abs(valIn); ///temporary
  74 +
  75 + shared[x_t][y_t] = abs(valIn-valTemp);
  76 +
  77 + __syncthreads();
  78 +
  79 + for(unsigned int step = blockDim.x/2; step >= 1; step >>= 1)
  80 + {
  81 + __syncthreads();
  82 + if (x_t < step)
  83 + {
  84 + shared[x_t][y_t] += shared[x_t + step][y_t];
  85 + }
  86 + __syncthreads();
  87 + }
  88 + __syncthreads();
  89 +
  90 + for(unsigned int step = blockDim.y/2; step >= 1; step >>= 1)
  91 + {
  92 + __syncthreads();
  93 + if(y_t < step)
  94 + {
  95 + shared[x_t][y_t] += shared[x_t][y_t + step];
  96 + }
  97 + __syncthreads();
  98 + }
  99 + __syncthreads();
  100 + if(x_t == 0 && y_t == 0)
  101 + result[g_idx] = shared[0][0];
  102 +
  103 +
  104 + // //result[idx] = abs(valIn);
  105 + }
  106 +
  107 +
  108 + ///External access-point to the cuda function
  109 + ///@param GLuint texbufferID --GLtexture (most be contained in a framebuffer object)
  110 + /// that holds the data that will be handed to cuda.
  111 + ///@param GLenum texType --either GL_TEXTURE_1D, GL_TEXTURE_2D or GL_TEXTURE_3D
  112 + /// may work with other gl texture types, but untested.
  113 + ///@param DIM_Y, the number of samples in the template.
  114 + extern "C"
  115 + stim::vec<int> get_cost(GLint texbufferID, GLenum texType, int DIM_Y)
  116 + {
  117 +
  118 + //Bind the Texture in GL and allow access to cuda.
  119 + t.MapCudaTexture(texbufferID, texType);
  120 +
  121 + //initialize the return arrays.
  122 + float* output;
  123 + output = (float* ) malloc(DIM_Y*sizeof(float));
  124 +
  125 + stim::vec<int> ret(0, 0);
  126 + initArray(DIM_Y);
  127 +
  128 +
  129 + //variables for finding the min.
  130 + float mini = 10000000000000000.0;
  131 + int idx = 0;
  132 +
  133 + //cuda launch variables.
  134 + dim3 numBlocks(1, DIM_Y);
  135 + dim3 threadsPerBlock(16, 8);
  136 +
  137 +
  138 + get_diff <<< numBlocks, threadsPerBlock >>> (t.getTexture(), result);
  139 +
  140 + HANDLE_ERROR(
  141 + cudaMemcpy(output, result, DIM_Y*sizeof(float), cudaMemcpyDeviceToHost)
  142 + );
  143 +
  144 + for( int i = 0; i<DIM_Y; i++){
  145 + if(output[i] < mini){
  146 + mini = output[i];
  147 + idx = i;
  148 + }
  149 + }
  150 +
  151 +// stringstream name; //for debugging
  152 +// name << "Test.bmp";
  153 +// stim::gpu2image<float>(print, name.str(),16,218,0,1);
  154 +
  155 + t.UnmapCudaTexture();
  156 + cleanUP();
  157 + ret[0] = idx; ret[1] = (int) output[idx];
  158 + std::cout << output[idx] << std::endl;
  159 +
  160 + free(output);
  161 + return ret;
  162 + }
  163 +
  164 + }
  165 +}
  166 +
  167 +
  168 +#endif
... ...
stim/cuda/templates/conv2sep.cuh
... ... @@ -30,7 +30,7 @@ namespace stim{
30 30 int byi = blockIdx.y;
31 31  
32 32 //copy the portion of the image necessary for this block to shared memory
33   - stim::cuda::sharedMemcpy_tex2D(s, in, bxi - kr, byi, 2 * kr + blockDim.x, 1, threadIdx, blockDim);
  33 + stim::cuda::sharedMemcpy_tex2D<float, unsigned char>(s, in, bxi - kr, byi, 2 * kr + blockDim.x, 1, threadIdx, blockDim);
34 34  
35 35 //calculate the thread index
36 36 int ti = threadIdx.x;
... ... @@ -88,7 +88,7 @@ namespace stim{
88 88 int byi = blockIdx.y * blockDim.y;
89 89  
90 90 //copy the portion of the image necessary for this block to shared memory
91   - stim::cuda::sharedMemcpy_tex2D(s, in, bxi, byi - kr, 1, 2 * kr + blockDim.y, threadIdx, blockDim);
  91 + stim::cuda::sharedMemcpy_tex2D<float, unsigned char>(s, in, bxi, byi - kr, 1, 2 * kr + blockDim.y, threadIdx, blockDim);
92 92  
93 93 //calculate the thread index
94 94 int ti = threadIdx.y;
... ... @@ -257,4 +257,4 @@ namespace stim{
257 257 };
258 258 };
259 259  
260   -#endif
261 260 \ No newline at end of file
  261 +#endif
... ...
stim/cuda/templates/gaussian_blur.cuh
... ... @@ -7,7 +7,6 @@
7 7 #include <stim/cuda/sharedmem.cuh>
8 8 #include <stim/cuda/templates/conv2sep.cuh> //GPU-based separable convolution algorithm
9 9  
10   -#define pi 3.14159
11 10  
12 11 namespace stim{
13 12 namespace cuda{
... ... @@ -37,6 +36,7 @@ namespace stim{
37 36  
38 37 //copy the kernel to the GPU
39 38 T* gpuKernel0;
  39 + HANDLE_ERROR(cudaMalloc(&gpuKernel0, kwidth*sizeof(T)));
40 40 HANDLE_ERROR(cudaMemcpy(gpuKernel0, kernel0, kwidth * sizeof(T), cudaMemcpyHostToDevice));
41 41  
42 42 //perform the gaussian blur as a separable convolution
... ... @@ -58,7 +58,6 @@ namespace stim{
58 58  
59 59 //copy the kernel to the GPU
60 60 T* gpuKernel0;
61   - HANDLE_ERROR(cudaMalloc(&gpuKernel0, kwidth * sizeof(T)));
62 61 HANDLE_ERROR(cudaMemcpy(gpuKernel0, kernel0, kwidth * sizeof(T), cudaMemcpyHostToDevice));
63 62  
64 63 //perform the gaussian blur as a separable convolution
... ... @@ -87,4 +86,4 @@ namespace stim{
87 86 };
88 87 };
89 88  
90   -#endif
91 89 \ No newline at end of file
  90 +#endif
... ...
stim/cuda/testKernel.cuh 0 → 100644
  1 +#include <assert.h>
  2 +#include <cuda.h>
  3 +#include <cuda_runtime.h>
  4 +#include <stdio.h>
  5 +#include <stim/visualization/colormap.h>
  6 +#include <sstream>
  7 +#include <stim/math/vector.h>
  8 +#include <stim/cuda/cudatools/devices.h>
  9 +#include <stim/cuda/cudatools/threads.h>
  10 +#include <stim/cuda/cuda_texture.cuh>
  11 + stim::cuda::cuda_texture tx; //texture object.
  12 + float* print;
  13 +
  14 + ///Initialization function, allocates the memory and passes the necessary
  15 + ///handles from OpenGL and Cuda.
  16 + ///@param DIM_Y --integer controlling how much memory to allocate.
  17 + void initArray()
  18 + {
  19 + cudaMalloc( (void**) &print, 216*16*sizeof(float)); ///temporary
  20 + }
  21 +
  22 + ///Deinit function that frees the memery used and releases the texture resource
  23 + ///back to OpenGL.
  24 + void cleanUP()
  25 + {
  26 + cudaFree(print); ///temporary
  27 + }
  28 +
  29 + ///Find the difference of the given set of samples and the template
  30 + ///using cuda acceleration.
  31 + ///@param stim::cuda::cuda_texture t --stim texture that holds all the references
  32 + /// to the data.
  33 + ///@param float* result --a pointer to the memory that stores the result.
  34 + __global__
  35 + //void get_diff (float *result)
  36 + void get_diff (cudaTextureObject_t texIn, float *print)
  37 + {
  38 + int x = threadIdx.x + blockIdx.x * blockDim.x;
  39 + int y = threadIdx.y + blockIdx.y * blockDim.y;
  40 + int idx = y*16+x;
  41 +
  42 + float valIn = tex2D<unsigned char>(texIn, x, y)/255.0;
  43 +
  44 + print[idx] = abs(valIn); ///temporary
  45 +
  46 + }
  47 +
  48 +
  49 + ///External access-point to the cuda function
  50 + ///@param GLuint texbufferID --GLtexture (most be contained in a framebuffer object)
  51 + /// that holds the data that will be handed to cuda.
  52 + ///@param GLenum texType --either GL_TEXTURE_1D, GL_TEXTURE_2D or GL_TEXTURE_3D
  53 + /// may work with other gl texture types, but untested.
  54 + ///@param DIM_Y, the number of samples in the template.
  55 + extern "C"
  56 + void test(GLint texbufferID, GLenum texType)
  57 + {
  58 +
  59 + //Bind the Texture in GL and allow access to cuda.
  60 + tx.MapCudaTexture(texbufferID, texType);
  61 +
  62 + //initialize the return arrays.
  63 +
  64 + initArray();
  65 +
  66 + int x = 16;
  67 + int y = 27*8;
  68 + int max_threads = stim::maxThreadsPerBlock();
  69 + dim3 threads(max_threads, 1);
  70 + dim3 blocks(x / threads.x +1, y);
  71 + //dim3 numBlocks(1, 1);
  72 + //dim3 threadsPerBlock(16, 216);
  73 + dim3 numBlocks(2, 2);
  74 + dim3 threadsPerBlock(8, 108);
  75 +
  76 +
  77 +// get_diff <<< blocks, threads >>> (tx.getTexture(), print);
  78 + get_diff <<< numBlocks, threadsPerBlock >>> (tx.getTexture(), print);
  79 +
  80 + cudaDeviceSynchronize();
  81 + stringstream name; //for debugging
  82 + name << "FromTex.bmp";
  83 + stim::gpu2image<float>(print, name.str(),16,216,0,1);
  84 +
  85 + tx.UnmapCudaTexture();
  86 + cleanUP();
  87 + }
  88 +
... ...
stim/gl/gl_spider.h
... ... @@ -13,50 +13,79 @@
13 13 #include "stim/math/vector.h"
14 14 #include "stim/math/rect.h"
15 15 #include "stim/math/matrix.h"
16   -#include "stim/cuda/cost.h"
  16 +#include "stim/cuda/spider_cost.cuh"
17 17 #include <stim/cuda/cudatools/glbind.h>
18   -#include <stim/visualization/obj.h>
  18 +#include <stim/cuda/arraymath.cuh>
  19 +#include <stim/cuda/cudatools.h>
  20 +#include <stim/cuda/ivote.cuh>
  21 +#include <stim/visualization/glObj.h>
19 22 #include <vector>
  23 +#include <stim/cuda/branch_detection.cuh>
  24 +
  25 +//#include <stim/cuda/testKernel.cuh>
20 26  
21 27 #include <iostream>
22 28 #include <fstream>
  29 +#ifdef TESTING
  30 + #include <iostream>
  31 + #include <cstdio>
  32 + #include <ctime>
  33 +#endif
23 34  
24 35  
25   -
26   -/* Technically since gl_spider inherits from gl_texture, we could
27   - call the init with a path to an image stack, and upload
28   - the images while creating the spider (calling init) */
29 36 namespace stim
30 37 {
31 38  
32 39 template<typename T>
33   -class gl_spider
  40 +class gl_spider : public virtual gl_texture<T>
34 41 {
35 42 //doen't use gl_texture really, just needs the GLuint id.
36 43 //doesn't even need the texture iD really.
37 44 private:
  45 +
  46 + //
38 47 stim::vec<float> p; //vector designating the position of the spider.
39 48 stim::vec<float> d; //vector designating the orientation of the spider
40 49 //always a unit vector.
41 50 stim::vec<float> m; //magnitude of the spider vector.
42 51 //mag[0] = length.
43 52 //mag[1] = width.
44   - std::vector<stim::vec<float> > dV;
45   - std::vector<stim::vec<float> > pV;
46   - std::vector<stim::vec<float> > mV;
47   - //currentTransform
48   - stim::matrix<float, 4> cT;
  53 + std::vector<stim::vec<float> > dV; //A list of all the direction vectors.
  54 + std::vector<stim::vec<float> > pV; //A list of all the position vectors.
  55 + std::vector<stim::vec<float> > mV; //A list of all the size vectors.
  56 +
  57 + stim::matrix<float, 4> cT; //current Transformation matrix
  58 + //From tissue space to texture space.
49 59 GLuint texID;
50   - stim::vec<float> S;
51   - stim::vec<float> R;
52   - cudaGraphicsResource_t resource;
53   -
54   - GLuint dList;
55   - GLuint fboID;
56   - GLuint texbufferID;
57   - int numSamples;
58   - float stepsize = 3.0;
  60 + stim::vec<float> S; //Size of a voxel in the volume.
  61 + stim::vec<float> R; //Dimensions of the volume.
  62 +
  63 +
  64 + //GL and Cuda variables
  65 + GLuint dList; //displaylist ID
  66 + GLuint fboID; //framebuffer ID
  67 + GLuint texbufferID; //texbuffer ID, only necessary for
  68 + //cuda aspect of the calculation.
  69 + GLuint bfboId;
  70 + GLuint btexbufferID;
  71 +
  72 + int numSamples; //The number of templates in the buffer.
  73 + float stepsize = 6.0; //Step size.
59 74 int current_cost;
  75 +
  76 +
  77 + //Tracing variables.
  78 + std::stack< stim::vec<float> > seeds; //Variables for tracing
  79 + std::stack< stim::vec<float> > seedsvecs;
  80 + std::vector< stim::vec<float> > cL; //Line currently being traced.
  81 + stim::glObj<float> sk;
  82 + stim::vec<float> rev; //reverse vector;
  83 + stim::camera camSel;
  84 + stim::vec<float> ps;
  85 + stim::vec<float> ups;
  86 + stim::vec<float> ds;
  87 +
  88 +
60 89  
61 90 /// Method for finding the best scale for the spider.
62 91 /// changes the x, y, z size of the spider to minimize the cost
... ... @@ -73,7 +102,6 @@ class gl_spider
73 102 dV[best][2]*S[2]*R[2],
74 103 0);
75 104 next = (cT*next).norm();
76   - //next = (cT*next);
77 105 setPosition( p[0]+next[0]*m[0]/stepsize,
78 106 p[1]+next[1]*m[0]/stepsize,
79 107 p[2]+next[2]*m[0]/stepsize);
... ... @@ -117,11 +145,46 @@ class gl_spider
117 145 void
118 146 branchDetection()
119 147 {
120   - Bind();
121 148 setMatrix();
122 149 glCallList(dList+3);
123   -
124   - // int best = getCost();
  150 + // test(btexbufferID, GL_TEXTURE_2D, 8*27);
  151 + std::vector< stim::vec<float> > result = find_branch(
  152 + btexbufferID, GL_TEXTURE_2D, 16, 216);
  153 + if(!result.empty())
  154 + {
  155 + for(int i = 1; i < result.size(); i++)
  156 + {
  157 + std::cout << result[i] << std::endl;
  158 +// std::cout <<"["<< result[i][0]*16.0 << ", "
  159 +// << result[i][1]*16.0 << ", " <<
  160 +// result[i][2]*216.0 <<"]" << std::endl;
  161 +
  162 + stim::vec<float> cylp(
  163 + 0.5 * cos(2*M_PI*(result[i][1])),
  164 + 0.5 * sin(2*M_PI*(result[i][1])),
  165 + result[i][0]-0.5,
  166 + 1.0);
  167 + stim::vec<float> cylv(
  168 + 0.5 * cos(2*M_PI*(result[i][1]))*S[0]*R[0],
  169 + 0.5 * sin(2*M_PI*(result[i][1]))*S[1]*R[1],
  170 + (result[i][0]-0.5)*S[2]*R[2],
  171 + 0.0);
  172 + std::cout << cylp << std::endl;
  173 + cylp = cT*cylp;
  174 + cylv = cT*cylv;
  175 +
  176 + std::cout << cylp[0]*S[0]*R[0] << ", "
  177 + << cylp[1]*S[1]*R[1] << ", " <<
  178 + cylp[2]*S[2]*R[2] << std::endl;
  179 + setSeed(cylp[0]*S[0]*R[0],
  180 + cylp[1]*S[1]*R[1],
  181 + cylp[2]*S[2]*R[2]);
  182 + cylv.norm();
  183 + setSeedVec(cylv[0],
  184 + cylv[1],
  185 + cylv[2]);
  186 + }
  187 + }
125 188  
126 189 }
127 190  
... ... @@ -365,6 +428,30 @@ class gl_spider
365 428 ///@param height sets the height of the buffer.
366 429 ///Function for setting up the 2D buffer that stores the samples.
367 430 void
  431 + GenerateFBO(unsigned int width, unsigned int height, GLuint &textureID, GLuint &framebufferID)
  432 + {
  433 + glGenFramebuffers(1, &framebufferID);
  434 + glBindFramebuffer(GL_FRAMEBUFFER, framebufferID);
  435 + int numChannels = 1;
  436 + unsigned char* texels = new unsigned char[width * height * numChannels];
  437 + glGenTextures(1, &textureID);
  438 + glBindTexture(GL_TEXTURE_2D, textureID);
  439 + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_REPEAT);
  440 + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_REPEAT);
  441 + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR);
  442 + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_LINEAR);
  443 + glTexImage2D(GL_TEXTURE_2D, 0, GL_LUMINANCE,
  444 + width, height, 0, GL_LUMINANCE, GL_UNSIGNED_BYTE, texels);
  445 + delete[] texels;
  446 + glBindFramebuffer(GL_FRAMEBUFFER, 0);
  447 + glBindTexture(GL_TEXTURE_2D, 0);
  448 + CHECK_OPENGL_ERROR
  449 + }
  450 +
  451 + ///@param width sets the width of the buffer.
  452 + ///@param height sets the height of the buffer.
  453 + ///Function for setting up the 2D buffer that stores the samples.
  454 + void
368 455 GenerateFBO(unsigned int width, unsigned int height)
369 456 {
370 457 glGenFramebuffers(1, &fboID);
... ... @@ -382,6 +469,7 @@ class gl_spider
382 469 delete[] texels;
383 470 glBindFramebuffer(GL_FRAMEBUFFER, 0);
384 471 glBindTexture(GL_TEXTURE_2D, 0);
  472 + CHECK_OPENGL_ERROR
385 473 }
386 474  
387 475  
... ... @@ -409,12 +497,99 @@ class gl_spider
409 497  
410 498 glGetFloatv(GL_TEXTURE_MATRIX, curTrans);
411 499 cT.set(curTrans);
412   -// printTransform();
  500 + // printTransform();
413 501  
414 502 CHECK_OPENGL_ERROR
415 503 glMatrixMode(GL_MODELVIEW);
416 504 }
  505 +
  506 + ///Method for controling the buffer and texture binding in order to properly
  507 + ///do the render to texture.
  508 + void
  509 + Bind()
  510 + {
  511 + float len = 8.0;
  512 + glBindFramebuffer(GL_FRAMEBUFFER, fboID);//set up GL buffer
  513 + glFramebufferTexture2D(
  514 + GL_FRAMEBUFFER,
  515 + GL_COLOR_ATTACHMENT0,
  516 + GL_TEXTURE_2D,
  517 + texbufferID,
  518 + 0);
  519 + glBindFramebuffer(GL_FRAMEBUFFER, fboID);
  520 + GLenum DrawBuffers[1] = {GL_COLOR_ATTACHMENT0};
  521 + glDrawBuffers(1, DrawBuffers);
  522 + glBindTexture(GL_TEXTURE_2D, texbufferID);
  523 + glClearColor(1,1,1,1);
  524 + glClear(GL_COLOR_BUFFER_BIT);
  525 + glMatrixMode(GL_PROJECTION);
  526 + glLoadIdentity();
  527 + glMatrixMode(GL_MODELVIEW);
  528 + glLoadIdentity();
  529 + glViewport(0,0,2.0*len, numSamples*len);
  530 + gluOrtho2D(0.0,2.0*len,0.0,numSamples*len);
  531 + glEnable(GL_TEXTURE_3D);
  532 + glBindTexture(GL_TEXTURE_3D, texID);
  533 +
  534 + CHECK_OPENGL_ERROR
  535 + }
  536 +
  537 + ///Method for controling the buffer and texture binding in order to properly
  538 + ///do the render to texture.
  539 + ///@param GLuint tbID
  540 + void
  541 + Bind(GLuint &textureID, GLuint &framebufferID, int nSamples)
  542 + {
  543 + float len = 8.0;
  544 + glBindFramebuffer(GL_FRAMEBUFFER, framebufferID);//set up GL buffer
  545 + CHECK_OPENGL_ERROR
  546 +
  547 + glFramebufferTexture2D(
  548 + GL_FRAMEBUFFER,
  549 + GL_COLOR_ATTACHMENT0,
  550 + GL_TEXTURE_2D,
  551 + textureID,
  552 + 0);
  553 + CHECK_OPENGL_ERROR
  554 +
  555 + glBindFramebuffer(GL_FRAMEBUFFER, framebufferID);
  556 + CHECK_OPENGL_ERROR
  557 +
  558 + GLenum DrawBuffers[1] = {GL_COLOR_ATTACHMENT0};
  559 + glDrawBuffers(1, DrawBuffers);
  560 + CHECK_OPENGL_ERROR
  561 +
  562 + glBindTexture(GL_TEXTURE_2D, textureID);
  563 + CHECK_OPENGL_ERROR
  564 +
  565 + glClearColor(1,1,1,1);
  566 + glClear(GL_COLOR_BUFFER_BIT);
  567 + glMatrixMode(GL_PROJECTION);
  568 + glLoadIdentity();
  569 + glMatrixMode(GL_MODELVIEW);
  570 + glLoadIdentity();
  571 + glViewport(0,0,2.0*len, nSamples*len);
  572 + gluOrtho2D(0.0,2.0*len,0.0,nSamples*len);
  573 + glEnable(GL_TEXTURE_3D);
  574 + glBindTexture(GL_TEXTURE_3D, texID);
  575 +
  576 + CHECK_OPENGL_ERROR
  577 + }
417 578  
  579 + ///Method for Unbinding all of the texture resources
  580 + void
  581 + Unbind()
  582 + {
  583 + //Finalize GL_buffer
  584 + glBindTexture(GL_TEXTURE_3D, 0);
  585 + CHECK_OPENGL_ERROR
  586 + glBindTexture(GL_TEXTURE_2D, 0);
  587 + CHECK_OPENGL_ERROR
  588 + glBindFramebuffer(GL_FRAMEBUFFER, 0);
  589 + CHECK_OPENGL_ERROR
  590 + glDisable(GL_TEXTURE_3D);
  591 + CHECK_OPENGL_ERROR
  592 + }
418 593  
419 594  
420 595  
... ... @@ -422,40 +597,22 @@ class gl_spider
422 597 //--------------------------------CUDA METHODS------------------------------//
423 598 //--------------------------------------------------------------------------//
424 599  
425   - /// Method for registering the texture with Cuda for shared
426   - /// access.
427   - void
428   - createResource()
429   - {
430   - HANDLE_ERROR(
431   - cudaGraphicsGLRegisterImage(
432   - &resource,
433   - texbufferID,
434   - GL_TEXTURE_2D,
435   - //CU_GRAPHICS_REGISTER_FLAGS_NONE)
436   - cudaGraphicsMapFlagsReadOnly)
437   - );
438   - }
439   -
440   - ///Method for freeing the texture from Cuda for gl access.
441   - void
442   - destroyResource()
443   - {
444   - HANDLE_ERROR(
445   - cudaGraphicsUnregisterResource(resource)
446   - );
447   - }
448 600  
449 601 ///Entry-point into the cuda code for calculating the cost
450 602 /// of a given samples array (in texture form)
451 603 int
452 604 getCost()
453 605 {
454   - createResource();
455   - stim::vec<int> cost = get_cost(resource, numSamples);
456   - destroyResource();
457   -// if (cost[1] >= 80)
458   -// exit(0);
  606 + #ifdef TESTING
  607 + start = std::clock();
  608 + #endif
  609 + stim::vec<int> cost =
  610 + stim::cuda::get_cost(texbufferID, GL_TEXTURE_2D, numSamples);
  611 + #ifdef TESTING
  612 + duration_cuda = duration_cuda +
  613 + (std::clock() - start) / (double) CLOCKS_PER_SEC;
  614 + num_cuda = num_cuda + 1.0;
  615 + #endif
459 616 current_cost = cost[1];
460 617 return cost[0];
461 618 }
... ... @@ -464,6 +621,15 @@ class gl_spider
464 621 stim::rect<float> hor;
465 622 stim::rect<float> ver;
466 623  
  624 + //Testing and Timing variables.
  625 + #ifdef TESTING
  626 + std::clock_t start;
  627 + double duration_sampling = 0.0;
  628 + double duration_cuda = 0.0;
  629 + double num_sampling = 0.0;
  630 + double num_cuda = 0.0;
  631 + #endif
  632 +
467 633 //--------------------------------------------------------------------------//
468 634 //-----------------------------CONSTRUCTORS---------------------------------//
469 635 //--------------------------------------------------------------------------//
... ... @@ -508,6 +674,8 @@ class gl_spider
508 674 Unbind();
509 675 glDeleteTextures(1, &texbufferID);
510 676 glDeleteBuffers(1, &fboID);
  677 + glDeleteTextures(1, &btexbufferID);
  678 + glDeleteBuffers(1, &bfboId);
511 679 }
512 680  
513 681 ///@param GLuint id texture that is going to be sampled.
... ... @@ -519,6 +687,7 @@ class gl_spider
519 687 {
520 688 texID = id;
521 689 GenerateFBO(16, numSamples*8);
  690 + GenerateFBO(16, 216, btexbufferID, bfboId);
522 691 setDims(0.6, 0.6, 1.0);
523 692 setSize(512.0, 512.0, 426.0);
524 693 setMatrix();
... ... @@ -528,6 +697,9 @@ class gl_spider
528 697 genDirectionVectors(5*M_PI/4);
529 698 genPositionVectors();
530 699 genMagnitudeVectors();
  700 + Unbind();
  701 + ///temporarily changed to 216
  702 + Bind(btexbufferID, bfboId, 27);
531 703 DrawCylinder();
532 704 Unbind();
533 705 }
... ... @@ -612,7 +784,6 @@ class gl_spider
612 784 {
613 785 m[0] = mag;
614 786 m[1] = mag;
615   - // m[2] = mag;
616 787 }
617 788  
618 789  
... ... @@ -655,56 +826,104 @@ class gl_spider
655 826 }
656 827 return out;
657 828 }
658   -
659   - ///Function to get back the framebuffer Object attached to the spider.
660   - ///For external access.
661   - GLuint
662   - getFB()
  829 +
  830 + ///@param pos, the position of the seed to be added.
  831 + ///Adds a seed to the seed list.
  832 + ///Assumes that the coordinates passes are in tissue space.
  833 + void
  834 + setSeed(stim::vec<float> pos)
663 835 {
664   - return fboID;
  836 + seeds.push(pos);
665 837 }
666 838  
667   - ///Method for controling the buffer and texture binding in order to properly
668   - ///do the render to texture.
669 839 void
670   - Bind()
  840 + setSeedVec(stim::vec<float> pos)
671 841 {
672   - float len = 8.0;
673   - glBindFramebuffer(GL_FRAMEBUFFER, fboID);//set up GL buffer
674   - glFramebufferTexture2D(
675   - GL_FRAMEBUFFER,
676   - GL_COLOR_ATTACHMENT0,
677   - GL_TEXTURE_2D,
678   - texbufferID,
679   - 0);
680   - glBindFramebuffer(GL_FRAMEBUFFER, fboID);
681   - GLenum DrawBuffers[1] = {GL_COLOR_ATTACHMENT0};
682   - glDrawBuffers(1, DrawBuffers);
683   - glBindTexture(GL_TEXTURE_2D, texbufferID);
684   - glClearColor(1,1,1,1);
685   - glClear(GL_COLOR_BUFFER_BIT);
686   - glMatrixMode(GL_PROJECTION);
687   - glLoadIdentity();
688   - glMatrixMode(GL_MODELVIEW);
689   - glLoadIdentity();
690   - glViewport(0,0,2.0*len, numSamples*len);
691   - gluOrtho2D(0.0,2.0*len,0.0,numSamples*len);
692   - glEnable(GL_TEXTURE_3D);
693   - glBindTexture(GL_TEXTURE_3D, texID);
  842 + seedsvecs.push(pos);
  843 + }
694 844  
695   - CHECK_OPENGL_ERROR
  845 + ///@param x, y, z: variables for the x, y and z coordinate of the seed
  846 + ///Adds a seed to the seed list.
  847 + ///Assumes that the coordinates passes are in tissue space.
  848 + void
  849 + setSeed(float x, float y, float z)
  850 + {
  851 + seeds.push(stim::vec<float>(x, y, z));
  852 + }
  853 +
  854 + void
  855 + setSeedVec(float x, float y, float z)
  856 + {
  857 + seedsvecs.push(stim::vec<float>(x, y, z));
  858 + }
  859 +
  860 + stim::vec<float>
  861 + getLastSeed()
  862 + {
  863 + stim::vec<float> tp = seeds.top();
  864 + return tp;
  865 + }
  866 +
  867 + std::stack<stim::vec<float> >
  868 + getSeeds()
  869 + {
  870 + return seeds;
  871 + }
  872 +
  873 + bool
  874 + Empty()
  875 + {
  876 + return (seeds.empty());
  877 + }
  878 + ///@param string file: variables for the x, y and z coordinate of the seed
  879 + ///Adds a seed to the seed list.
  880 + ///Assumes that the coordinates passes are in tissue space.
  881 + void
  882 + setSeeds(std::string file)
  883 + {
  884 + std::ifstream myfile(file.c_str());
  885 + string line;
  886 + if(myfile.is_open())
  887 + {
  888 + while (getline(myfile, line))
  889 + {
  890 + float x, y, z, u, v, w;
  891 + myfile >> x >> y >> z >> u >> v >> w;
  892 + seeds.push(stim::vec<float>(
  893 + ((float) x),
  894 + ((float) y),
  895 + ((float) z)));
  896 + seedsvecs.push(stim::vec<float>(
  897 + ((float) x),
  898 + ((float) y),
  899 + ((float) z)));
  900 + }
  901 + myfile.close();
  902 + } else {
  903 + std::cerr<<"failed" << std::endl;
  904 + }
696 905 }
697 906  
698   - ///Method for Unbinding all of the texture resources
699 907 void
700   - Unbind()
  908 + saveNetwork(std::string name)
701 909 {
702   - //Finalize GL_buffer
703   - glBindTexture(GL_TEXTURE_3D, 0);
704   - glDisable(GL_TEXTURE_3D);
705   - glBindFramebuffer(GL_FRAMEBUFFER,0);
706   - glBindTexture(GL_TEXTURE_2D, 0);
  910 + sk.save(name);
707 911 }
  912 +
  913 + stim::glObj<float>
  914 + getNetwork()
  915 + {
  916 + return sk;
  917 + }
  918 +
  919 + ///Function to get back the framebuffer Object attached to the spider.
  920 + ///For external access.
  921 + GLuint
  922 + getFB()
  923 + {
  924 + return bfboId;
  925 + }
  926 +
708 927 //--------------------------------------------------------------------------//
709 928 //-----------------------------TEMPORARY METHODS----------------------------//
710 929 //--------------------------------------------------------------------------//
... ... @@ -726,12 +945,28 @@ class gl_spider
726 945 int
727 946 Step()
728 947 {
729   - // Bind();
  948 + Bind();
  949 + CHECK_OPENGL_ERROR
  950 + #ifdef TESTING
  951 + start = std::clock();
  952 + #endif
730 953 findOptimalDirection();
731 954 findOptimalPosition();
732 955 findOptimalScale();
  956 + Unbind();
  957 + CHECK_OPENGL_ERROR
  958 +
  959 + Bind(btexbufferID, bfboId, 27);
  960 + CHECK_OPENGL_ERROR
733 961 branchDetection();
734   - // Unbind();
  962 + CHECK_OPENGL_ERROR
  963 + Unbind();
  964 + CHECK_OPENGL_ERROR
  965 + #ifdef TESTING
  966 + duration_sampling = duration_sampling +
  967 + (std::clock() - start) / (double) CLOCKS_PER_SEC;
  968 + num_sampling = num_sampling + 1.0;
  969 + #endif
735 970 return current_cost;
736 971 }
737 972  
... ... @@ -760,36 +995,217 @@ class gl_spider
760 995 void
761 996 DrawCylinder()
762 997 {
763   - Bind();
764 998 glNewList(dList+3, GL_COMPILE);
765 999 float z0 = -0.5; float z1 = 0.5; float r0 = 0.5;
766 1000 float x,y;
767   - float xold = 0.5; float yold = 0.5;
  1001 + float xold = 0.5; float yold = 0.0;
768 1002 float step = 360.0/numSamples;
769   - int j = 0;
770 1003 glEnable(GL_TEXTURE_3D);
771 1004 glBindTexture(GL_TEXTURE_3D, texID);
772 1005 glBegin(GL_QUAD_STRIP);
  1006 + int j = 0;
773 1007 for(float i = step; i <= 360.0; i += step)
774 1008 {
775 1009 x=r0*cos(i*2.0*M_PI/360.0);
776 1010 y=r0*sin(i*2.0*M_PI/360.0);
777 1011 glTexCoord3f(x,y,z0);
778   - glVertex2f(0.0, j*0.1+0.1);
  1012 + glVertex2f(0.0, j*0.2+0.2);
779 1013 glTexCoord3f(x,y,z1);
780   - glVertex2f(16.0, j*0.1+0.1);
  1014 + glVertex2f(16.0, j*0.2+0.2);
781 1015 glTexCoord3f(xold,yold,z1);
782   - glVertex2f(16.0, j*0.1);
  1016 + glVertex2f(16.0, j*0.2);
783 1017 glTexCoord3f(xold,yold,z0);
784   - glVertex2f(0.0, j*0.1);
  1018 + glVertex2f(0.0, j*0.2);
785 1019 xold=x;
786 1020 yold=y;
787 1021 j++;
788 1022 }
789 1023 glEnd();
790 1024 glEndList();
791   - Unbind();
792 1025 }
  1026 +
  1027 +
  1028 + ///@param min_cost the cost value used for tracing
  1029 + ///traces out each seedpoint in the seeds queue to completion in both directions.
  1030 + void
  1031 + trace(int min_cost)
  1032 + {
  1033 + Bind();
  1034 + rev = stim::vec<float>(0.0,0.0,1.0);
  1035 + while(!seeds.empty())
  1036 + {
  1037 + //clear the currently traced line and start a new one.
  1038 + cL.clear();
  1039 + sk.Begin(stim::OBJ_LINE);
  1040 + stim::vec<float> curSeed = seeds.top();
  1041 + stim::vec<float> curSeedVec = seedsvecs.top();
  1042 + setPosition(curSeed);
  1043 + setDirection(curSeedVec);
  1044 + setMagnitude(16.0);
  1045 + cL.push_back(curSeed);
  1046 + sk.createFromSelf(GL_SELECT);
  1047 + traceLine(min_cost);
  1048 +
  1049 + sk.rev();
  1050 + std::reverse(cL.begin(), cL.end());
  1051 + setPosition(curSeed);
  1052 + setDirection(rev);
  1053 + setMagnitude(16.0);
  1054 + sk.createFromSelf(GL_SELECT);
  1055 + traceLine(min_cost);
  1056 +
  1057 + //temporary glObj rendering code.
  1058 +
  1059 + sk.End();
  1060 + seeds.pop();
  1061 + seedsvecs.pop();
  1062 + }
  1063 + Unbind();
  1064 + }
  1065 +
  1066 + ///@param min_cost the cost value used for tracing
  1067 + ///traces the seedpoint passed to completion in one directions.
  1068 + void
  1069 + traceLine(int min_cost)
  1070 + {
  1071 + stim::vec<float> pos;
  1072 + stim::vec<float> mag;
  1073 + bool h;
  1074 + bool started = false;
  1075 + stim::vec<float> size(S[0]*R[0], S[1]*R[1], S[2]*R[2]);
  1076 + while(1)
  1077 + {
  1078 + int cost = Step();
  1079 + if (cost > min_cost){
  1080 +// std::cout << "Min Cost" << std::endl;
  1081 + break;
  1082 + } else {
  1083 + //Have we found an edge?
  1084 + pos = getPosition();
  1085 + if(pos[0] > size[0] || pos[1] > size[1]
  1086 + || pos[2] > size[2] || pos[0] < 0
  1087 + || p[1] < 0 || p[2] < 0)
  1088 + {
  1089 +// std::cout << "Found Edge" << std::endl;
  1090 + break;
  1091 + }
  1092 + //If this is the first step in the trace,
  1093 + // save the direction
  1094 + //(to be used later to trace the fiber in the opposite direction)
  1095 + if(started == false){
  1096 + rev = -getDirection();
  1097 + started = true;
  1098 + }
  1099 +// std::cout << i << p << std::endl;
  1100 + m = getMagnitude();
  1101 + //Has the template size gotten unreasonable?
  1102 + if(m[0] > 75 || m[0] < 1){
  1103 +// std::cout << "Magnitude Limit" << std::endl;
  1104 + break;
  1105 + }
  1106 + else
  1107 + {
  1108 + h = selectObject(pos, getDirection(), m[0]);
  1109 + //Have we hit something previously traced?
  1110 + if(h){
  1111 +// std::cout << "Hit a Previous Line" << std::endl;
  1112 + break;
  1113 + }
  1114 + else {
  1115 + sk.TexCoord(m[0]);
  1116 + sk.Vertex(p[0], p[1], p[2]);
  1117 + }
  1118 + }
  1119 + }
  1120 + }
  1121 + }
  1122 +
  1123 +
  1124 + bool
  1125 + selectObject(stim::vec<float> loc, stim::vec<float> dir, float mag)
  1126 + {
  1127 + //Define the varibles and turn on Selection Mode
  1128 +
  1129 + float s = 3.0;
  1130 + GLuint selectBuf[2048];
  1131 + GLint hits;
  1132 + glSelectBuffer(2048, selectBuf);
  1133 + glDisable(GL_CULL_FACE);
  1134 + (void) glRenderMode(GL_SELECT);
  1135 +
  1136 + //Init Names stack
  1137 +
  1138 + glInitNames();
  1139 + glPushName(1);
  1140 +
  1141 + CHECK_OPENGL_ERROR
  1142 + //What would that vessel see in front of it.
  1143 + camSel.setPosition(loc);
  1144 + camSel.setFocalDistance(mag/s);
  1145 + camSel.LookAt((loc[0]+dir[0]*mag/s),
  1146 + (loc[1]+dir[1]*mag/s),
  1147 + (loc[2]+dir[2]*mag/s));
  1148 + ps = camSel.getPosition();
  1149 + ups = camSel.getUp();
  1150 + ds = camSel.getLookAt();
  1151 + glMatrixMode(GL_PROJECTION);
  1152 + glPushMatrix();
  1153 + glLoadIdentity();
  1154 + glOrtho(-mag/s, mag/s, -mag/s, mag/s, 0.0, mag/s/4.0);
  1155 + glMatrixMode(GL_MODELVIEW);
  1156 + glPushMatrix();
  1157 + glLoadIdentity();
  1158 +
  1159 + CHECK_OPENGL_ERROR
  1160 + gluLookAt(ps[0], ps[1], ps[2],
  1161 + ds[0], ds[1], ds[2],
  1162 + ups[0], ups[1], ups[2]);
  1163 + sk.Render();
  1164 + CHECK_OPENGL_ERROR
  1165 + glLoadName((int) sk.numL());
  1166 + sk.RenderLine(cL);
  1167 +// glPopName();
  1168 + glFlush();
  1169 +
  1170 + glMatrixMode(GL_PROJECTION);
  1171 + glPopMatrix();
  1172 + glMatrixMode(GL_MODELVIEW);
  1173 + CHECK_OPENGL_ERROR
  1174 + glPopMatrix();
  1175 +
  1176 + glEnable(GL_CULL_FACE);
  1177 + hits = glRenderMode(GL_RENDER);
  1178 + bool found_hits = processHits(hits, selectBuf);
  1179 + return found_hits;
  1180 + }
  1181 +
  1182 + //Given a size of the array (hits) and the memory holding it (buffer)
  1183 + //returns whether a hit tool place or not.
  1184 + bool
  1185 + processHits(GLint hits, GLuint buffer[])
  1186 + {
  1187 + GLuint names, *ptr;
  1188 + printf("hits = %u\n", hits);
  1189 + ptr = (GLuint *) buffer;
  1190 + for (int i = 0; i < hits; i++) { /* for each hit */
  1191 + names = *ptr;
  1192 + printf (" number of names for hit = %u\n", names);
  1193 + ptr++;
  1194 + ptr++; //Skip the minimum depth value.
  1195 + ptr++; //Skip the maximum depth value.
  1196 + printf (" the name is ");
  1197 + for (int j = 0; j < names; j++) { /* for each name */
  1198 + printf ("%u ", *ptr); ptr++;
  1199 + }
  1200 + printf ("\n");
  1201 + }
  1202 + if(hits == 0)
  1203 + return 0;
  1204 + else
  1205 + return 1;
  1206 + }
  1207 +
  1208 +
793 1209 };
794 1210 }
795 1211 #endif
... ...
stim/visualization/glObj.h
... ... @@ -29,6 +29,8 @@ private:
29 29 void
30 30 init()
31 31 {
  32 + if(glIsList(dList))
  33 + glDeleteLists(dList, 1);
32 34 dList = glGenLists(1);
33 35 glListBase(dList);
34 36  
... ... @@ -40,15 +42,23 @@ private:
40 42 }
41 43  
42 44 void
43   - Create()
  45 + Create(GLenum mode)
44 46 {
  47 +// GLuint selectBuf[2048];
  48 +// GLint hits;
  49 +// glSelectBuffer(2048, selectBuf);
  50 +
45 51 int len = (int) stim::obj<T>::numL();
46 52 std::vector< stim::vec<float> > line;
47 53 glNewList(dList, GL_COMPILE);
48 54 // glColor3f(0.0, 1.0, 0.0);
49   - glLineWidth(2.5);
  55 + glLineWidth(3.5);
50 56 for(int i = 0; i < len; i++){
51 57 line = stim::obj<T>::getL_V(i);
  58 + if(mode == GL_SELECT)
  59 + {
  60 + glLoadName(i);
  61 + }
52 62 glColor3ub(rand()%255, rand()%255, rand()%255);
53 63 glBegin(GL_LINE_STRIP);
54 64 for(int j = 0; j < line.size(); j++){
... ... @@ -71,21 +81,21 @@ public:
71 81 }
72 82  
73 83 void
74   - createFromSelf()
  84 + createFromSelf(GLenum mode = GL_RENDER)
75 85 {
76 86 // glPopMatrix();
77 87 init();
78   - Create();
  88 + Create(mode);
79 89 // glPushMatrix();
80 90 }
81 91  
82 92 void
83   - createFromFile(std::string filename)
  93 + createFromFile(std::string filename, GLenum mode = GL_RENDER)
84 94 {
85 95 stim::obj<T>::load(filename);
86 96 glPushMatrix(); //Safety Operation to avoid changing the current matrix.
87 97 init();
88   - Create();
  98 + Create(mode);
89 99 glPopMatrix();
90 100 CHECK_OPENGL_ERROR
91 101 }
... ...
stim/visualization/obj.h
... ... @@ -651,14 +651,14 @@ public:
651 651 l.resize(nP);
652 652  
653 653 //copy the points from the point list to the stim vector
654   - unsigned int pi;
  654 + unsigned int pie;
655 655 for(unsigned int p = 0; p < nP; p++){
656 656  
657 657 //get the index of the geometry point
658   - pi = L[i][p][0] - 1;
  658 + pie = L[i][p][0] - 1;
659 659  
660 660 //get the coordinates of the current point
661   - stim::vec<T> newP = V[pi];
  661 + stim::vec<T> newP = V[pie];
662 662  
663 663 //copy the point into the vector
664 664 l[p] = newP;
... ... @@ -705,14 +705,14 @@ public:
705 705 l.resize(nP);
706 706  
707 707 //copy the points from the point list to the stim vector
708   - unsigned int pi;
  708 + unsigned int pie;
709 709 for(unsigned int p = 0; p < nP; p++){
710 710  
711 711 //get the index of the geometry point
712   - pi = L[i][p][1] - 1;
  712 + pie = L[i][p][1] - 1;
713 713  
714 714 //get the coordinates of the current point
715   - stim::vec<T> newP = VT[pi];
  715 + stim::vec<T> newP = VT[pie];
716 716  
717 717 //copy the point into the vector
718 718 l[p] = newP;
... ...