Commit c4887649e0747372189697e6d0ee71097d7865fd
1 parent
ac788020
fixed a significant memory leak in the cost.h, minor bug fixes
Showing
2 changed files
with
38 additions
and
16 deletions
Show diff stats
stim/cuda/cost.h
... | ... | @@ -6,7 +6,8 @@ |
6 | 6 | #include <stim/visualization/colormap.h> |
7 | 7 | #include <sstream> |
8 | 8 | #include <stim/math/mathvec.h> |
9 | - | |
9 | +#include <stim/cuda/devices.h> | |
10 | +#include <stim/cuda/threads.h> | |
10 | 11 | |
11 | 12 | ///Cost function that works with the gl-spider class to find index of the item with min-cost. |
12 | 13 | typedef unsigned char uchar; |
... | ... | @@ -38,6 +39,7 @@ float get_sum(float *diff) |
38 | 39 | ret = cublasSetVector(20*10, sizeof(*diff), diff, 1, v_dif, 1); |
39 | 40 | float out; |
40 | 41 | ret = cublasSasum(handle, 20*10, v_dif, 1, &out); |
42 | + cublasDestroy(ret); | |
41 | 43 | cublasDestroy(handle); |
42 | 44 | return out; |
43 | 45 | } |
... | ... | @@ -90,7 +92,7 @@ void initArray(cudaGraphicsResource_t src, int DIM_Y) |
90 | 92 | cudaGraphicsMapResources(1, &src) |
91 | 93 | ); |
92 | 94 | HANDLE_ERROR( |
93 | - cudaGraphicsSubResourceGetMappedArray(&srcArray, src,0,0) | |
95 | + cudaGraphicsSubResourceGetMappedArray(&srcArray, src, 0, 0) | |
94 | 96 | ); |
95 | 97 | HANDLE_ERROR( |
96 | 98 | cudaBindTextureToArray(texIn, srcArray) |
... | ... | @@ -109,9 +111,6 @@ void initArray(cudaGraphicsResource_t src, int DIM_Y) |
109 | 111 | void cleanUP(cudaGraphicsResource_t src) |
110 | 112 | { |
111 | 113 | HANDLE_ERROR( |
112 | - cudaUnbindTexture(texIn) | |
113 | - ); | |
114 | - HANDLE_ERROR( | |
115 | 114 | cudaFree(result) |
116 | 115 | ); |
117 | 116 | HANDLE_ERROR( |
... | ... | @@ -120,7 +119,13 @@ void cleanUP(cudaGraphicsResource_t src) |
120 | 119 | HANDLE_ERROR( |
121 | 120 | cudaFree(v_dif) |
122 | 121 | ); |
122 | + HANDLE_ERROR( | |
123 | + cudaUnbindTexture(texIn) | |
124 | + ); | |
123 | 125 | } |
126 | + | |
127 | + | |
128 | + | |
124 | 129 | ///External access-point to the cuda function |
125 | 130 | ///@param src, cudaGraphicsResource that handles the shared OpenGL/Cuda Texture |
126 | 131 | ///@param DIM_Y, the number of samples in the template. |
... | ... | @@ -128,17 +133,30 @@ void cleanUP(cudaGraphicsResource_t src) |
128 | 133 | extern "C" |
129 | 134 | stim::vec<int> get_cost(cudaGraphicsResource_t src, int DIM_Y) |
130 | 135 | { |
136 | +// int minGridSize; | |
137 | +// int blockSize; | |
138 | + | |
139 | +// cudaOccupancyMaxPotentialBlockSize(&minGridSize, &blockSize, get_diff, 0, 20*DIM_Y*10); | |
140 | +// std::cout << blockSize << std::endl; | |
141 | +// std::cout << minGridSize << std::endl; | |
142 | + | |
131 | 143 | float output[DIM_Y]; |
132 | 144 | stim::vec<int> ret(0, 0); |
133 | 145 | float mini = 10000000000000000.0; |
134 | 146 | int idx; |
135 | 147 | stringstream name; //for debugging |
136 | - name << "Test.bmp"; | |
148 | +// name << "Test.bmp"; | |
137 | 149 | initArray(src, DIM_Y*10); |
138 | - dim3 grid(20, DIM_Y*10); | |
139 | - dim3 block(1, 1); | |
150 | + dim3 grid(20/2, DIM_Y*10/2); | |
151 | +// dim3 block(4,4); | |
152 | +// dim3 grid(20/4, DIM_Y*10/4); | |
153 | +// int gridSize = (DIM_Y*10*20 + 1024 - 1)/1024; | |
154 | +// dim3 grid(26, 26); | |
155 | + dim3 block(2, 2); | |
156 | +// dim3 grid = GenGrid1D(DIM_Y*10*20); | |
157 | + | |
140 | 158 | get_diff <<< grid, block >>> (result); |
141 | - stim::gpu2image<float>(result, name.str(), 20,DIM_Y*10,0,1); | |
159 | +// stim::gpu2image<float>(result, name.str(), 20,DIM_Y*10,0,1); | |
142 | 160 | for (int i = 0; i < DIM_Y; i++){ |
143 | 161 | output[i] = get_sum(result+(20*10*i)); |
144 | 162 | if(output[i] <= mini){ | ... | ... |
stim/gl/gl_spider.h
... | ... | @@ -15,6 +15,7 @@ |
15 | 15 | #include "stim/math/matrix.h" |
16 | 16 | #include "stim/cuda/cost.h" |
17 | 17 | #include "stim/cuda/glbind.h" |
18 | +#include <stim/visualization/obj.h> | |
18 | 19 | #include <vector> |
19 | 20 | |
20 | 21 | #include <iostream> |
... | ... | @@ -55,6 +56,7 @@ class gl_spider |
55 | 56 | GLuint texbufferID; |
56 | 57 | int numSamples; |
57 | 58 | float stepsize = 3.0; |
59 | + int current_cost; | |
58 | 60 | |
59 | 61 | /// Method for finding the best scale for the spider. |
60 | 62 | /// changes the x, y, z size of the spider to minimize the cost |
... | ... | @@ -119,7 +121,7 @@ class gl_spider |
119 | 121 | setMatrix(); |
120 | 122 | glCallList(dList+3); |
121 | 123 | |
122 | - int best = getCost(); | |
124 | +// int best = getCost(); | |
123 | 125 | |
124 | 126 | } |
125 | 127 | |
... | ... | @@ -492,8 +494,9 @@ class gl_spider |
492 | 494 | createResource(); |
493 | 495 | stim::vec<int> cost = get_cost(resource, numSamples); |
494 | 496 | destroyResource(); |
495 | - if (cost[1] >= 80) | |
496 | - exit(0); | |
497 | +// if (cost[1] >= 80) | |
498 | +// exit(0); | |
499 | + current_cost = cost[1]; | |
497 | 500 | return cost[0]; |
498 | 501 | } |
499 | 502 | |
... | ... | @@ -562,7 +565,7 @@ class gl_spider |
562 | 565 | dList = glGenLists(3); |
563 | 566 | glListBase(dList); |
564 | 567 | Bind(); |
565 | - genDirectionVectors(M_PI); | |
568 | + genDirectionVectors(5*M_PI/4); | |
566 | 569 | genPositionVectors(); |
567 | 570 | genMagnitudeVectors(); |
568 | 571 | DrawCylinder(); |
... | ... | @@ -649,7 +652,7 @@ class gl_spider |
649 | 652 | { |
650 | 653 | m[0] = mag; |
651 | 654 | m[1] = mag; |
652 | - m[2] = mag; | |
655 | + // m[2] = mag; | |
653 | 656 | } |
654 | 657 | |
655 | 658 | |
... | ... | @@ -719,15 +722,16 @@ class gl_spider |
719 | 722 | } |
720 | 723 | |
721 | 724 | |
722 | - void | |
725 | + int | |
723 | 726 | Step() |
724 | 727 | { |
725 | 728 | Bind(); |
726 | 729 | findOptimalDirection(); |
727 | 730 | findOptimalPosition(); |
728 | 731 | findOptimalScale(); |
729 | -// branchDetection(); | |
732 | + // branchDetection(); | |
730 | 733 | Unbind(); |
734 | + return current_cost; | |
731 | 735 | } |
732 | 736 | |
733 | 737 | ... | ... |