Commit 24aab7c9a3fce4b4e78bf210874c7efc0d1e66f8
1 parent
ecfd14df
added field and complexfield classes
Showing
4 changed files
with
340 additions
and
5 deletions
Show diff stats
1 | +#ifndef RTS_COMPLEXFIELD_H | ||
2 | +#define RTS_COMPLEXFIELD_H | ||
3 | + | ||
4 | +#include "cublas_v2.h" | ||
5 | +#include <cuda_runtime.h> | ||
6 | + | ||
7 | +#include "../math/field.cuh" | ||
8 | +#include "../math/complex.h" | ||
9 | + | ||
10 | +namespace rts{ | ||
11 | + | ||
12 | +/*This class stores functions for saving images of complex fields | ||
13 | +*/ | ||
14 | +template<typename T, unsigned int D> | ||
15 | +class complexfield : public field< rts::complex<T>, D >{ | ||
16 | + using field< rts::complex<T>, D >::R; | ||
17 | + using field< rts::complex<T>, D >::X; | ||
18 | + using field< rts::complex<T>, D >::shape; | ||
19 | + | ||
20 | +public: | ||
21 | + | ||
22 | + //find the maximum value of component n | ||
23 | + rts::complex<T> find_max(unsigned int n){ | ||
24 | + cublasStatus_t stat; | ||
25 | + cublasHandle_t handle; | ||
26 | + | ||
27 | + //create a CUBLAS handle | ||
28 | + stat = cublasCreate(&handle); | ||
29 | + if(stat != CUBLAS_STATUS_SUCCESS){ | ||
30 | + std::cout<<"CUBLAS Error: initialization failed"<<std::endl; | ||
31 | + exit(1); | ||
32 | + } | ||
33 | + | ||
34 | + int L = R[0] * R[1]; //compute the number of discrete points in a slice | ||
35 | + int index; //result of the max operation | ||
36 | + rts::complex<T> result; | ||
37 | + | ||
38 | + if(sizeof(T) == 4) | ||
39 | + stat = cublasIcamax(handle, L, (const cuComplex*)X[n], 1, &index); | ||
40 | + else | ||
41 | + stat = cublasIzamax(handle, L, (const cuDoubleComplex*)X[n], 1, &index); | ||
42 | + | ||
43 | + index -= 1; //adjust for 1-based indexing | ||
44 | + | ||
45 | + //if there was a GPU error, terminate | ||
46 | + if(stat != CUBLAS_STATUS_SUCCESS){ | ||
47 | + std::cout<<"CUBLAS Error: failure finding maximum value."<<std::endl; | ||
48 | + exit(1); | ||
49 | + } | ||
50 | + | ||
51 | + //retrieve the maximum value for this slice and store it in the maxVal array | ||
52 | + std::cout<<X[n]<<std::endl; | ||
53 | + HANDLE_ERROR(cudaMemcpy(&result, X[n] + index, sizeof(rts::complex<T>), cudaMemcpyDeviceToHost)); | ||
54 | + return result; | ||
55 | + } | ||
56 | + | ||
57 | +public: | ||
58 | + | ||
59 | + //constructor (no parameters) | ||
60 | + complexfield() : field<rts::complex<T>, D>(){}; | ||
61 | + | ||
62 | + //constructor (resolution specified) | ||
63 | + complexfield(unsigned int r0, unsigned int r1) : field<rts::complex<T>, D>(r0, r1){}; | ||
64 | + | ||
65 | + //assignment operator (scalar value) | ||
66 | + complexfield & operator= (const complex<T> rhs){ | ||
67 | + | ||
68 | + field< complex<T>, D >::operator=(rhs); | ||
69 | + return *this; | ||
70 | + } | ||
71 | + | ||
72 | + //assignment operator (vector value) | ||
73 | + complexfield & operator= (const vec< complex<T>, D > rhs){ | ||
74 | + | ||
75 | + field< complex<T>, D >::operator=(rhs); | ||
76 | + return *this; | ||
77 | + } | ||
78 | + | ||
79 | + void toImage(std::string filename, unsigned int n){ | ||
80 | + | ||
81 | + } | ||
82 | + | ||
83 | + | ||
84 | +}; | ||
85 | + | ||
86 | + | ||
87 | +} //end namespace rts | ||
88 | + | ||
89 | + | ||
90 | +#endif | ||
0 | \ No newline at end of file | 91 | \ No newline at end of file |
1 | +#ifndef RTS_FIELD_CUH | ||
2 | +#define RTS_FIELD_CUH | ||
3 | + | ||
4 | +#include <vector> | ||
5 | +#include <string> | ||
6 | +#include <sstream> | ||
7 | + | ||
8 | +#include "../math/rect.h" | ||
9 | +#include "../cuda/threads.h" | ||
10 | +#include "../cuda/error.h" | ||
11 | +#include "../cuda/devices.h" | ||
12 | + | ||
13 | + | ||
14 | +namespace rts{ | ||
15 | + | ||
16 | +//multiply R = X * Y | ||
17 | +template<typename T> | ||
18 | +__global__ void gpu_field_multiply(T* R, T* X, T* Y, unsigned int r0, unsigned int r1){ | ||
19 | + | ||
20 | + int iu = blockIdx.x * blockDim.x + threadIdx.x; | ||
21 | + int iv = blockIdx.y * blockDim.y + threadIdx.y; | ||
22 | + | ||
23 | + //make sure that the thread indices are in-bounds | ||
24 | + if(iu >= r0 || iv >= r1) return; | ||
25 | + | ||
26 | + //compute the index into the field | ||
27 | + int i = iv*r0 + iu; | ||
28 | + | ||
29 | + //calculate and store the result | ||
30 | + R[i] = X[i] * Y[i]; | ||
31 | +} | ||
32 | + | ||
33 | +//assign a constant value to all points | ||
34 | +template<typename T> | ||
35 | +__global__ void gpu_field_assign(T* ptr, T val, unsigned int r0, unsigned int r1){ | ||
36 | + | ||
37 | + int iu = blockIdx.x * blockDim.x + threadIdx.x; | ||
38 | + int iv = blockIdx.y * blockDim.y + threadIdx.y; | ||
39 | + | ||
40 | + //make sure that the thread indices are in-bounds | ||
41 | + if(iu >= r0 || iv >= r1) return; | ||
42 | + | ||
43 | + //compute the index into the field | ||
44 | + int i = iv*r0 + iu; | ||
45 | + | ||
46 | + //calculate and store the result | ||
47 | + ptr[i] = val; | ||
48 | +} | ||
49 | + | ||
50 | +template<typename T, unsigned int D> | ||
51 | +class field{ | ||
52 | + | ||
53 | +protected: | ||
54 | + | ||
55 | + T* X[D]; //pointer to the field data | ||
56 | + unsigned int R[2]; //field resolution | ||
57 | + rts::rect<T> shape; //position and shape of the field slice | ||
58 | + | ||
59 | +public: | ||
60 | + | ||
61 | + //returns a list of file names given an input string with wild cards | ||
62 | + std::vector<std::string> process_filename(std::string name){ | ||
63 | + std::stringstream ss(name); | ||
64 | + std::string item; | ||
65 | + std::vector<std::string> elems; | ||
66 | + while(std::getline(ss, item, '.')) //split the string at the '.' character (filename and extension) | ||
67 | + { | ||
68 | + elems.push_back(item); | ||
69 | + } | ||
70 | + | ||
71 | + std::string prefix = elems[0]; //prefix contains the filename (with wildcard '?' characters) | ||
72 | + std::string ext = elems[1]; //file extension (ex. .bmp, .png) | ||
73 | + ext = std::string(".") + ext; //add a period back into the extension | ||
74 | + | ||
75 | + size_t i0 = prefix.find_first_of("?"); //find the positions of the first and last wildcard ('?'') | ||
76 | + size_t i1 = prefix.find_last_of("?"); | ||
77 | + | ||
78 | + std::string postfix = prefix.substr(i1+1); | ||
79 | + prefix = prefix.substr(0, i0); | ||
80 | + | ||
81 | + unsigned int digits = i1 - i0 + 1; //compute the number of wildcards | ||
82 | + | ||
83 | + std::vector<std::string> flist; //create a vector of file names | ||
84 | + //fill the list | ||
85 | + for(unsigned int d=0; d<D; d++){ | ||
86 | + std::stringstream ss; //assemble the file name | ||
87 | + ss<<prefix<<std::setfill('0')<<std::setw(digits)<<d<<postfix<<ext; | ||
88 | + flist.push_back(ss.str()); | ||
89 | + } | ||
90 | + | ||
91 | + return flist; | ||
92 | + } | ||
93 | + | ||
94 | + void init(){ | ||
95 | + for(unsigned int n=0; n<D; n++) | ||
96 | + X[n] = NULL; | ||
97 | + } | ||
98 | + void destroy(){ | ||
99 | + for(unsigned int n=0; n<D; n++) | ||
100 | + if(X[n] != NULL) | ||
101 | + HANDLE_ERROR(cudaFree(X[n])); | ||
102 | + } | ||
103 | + | ||
104 | + | ||
105 | +public: | ||
106 | + //field constructor | ||
107 | + field(){ | ||
108 | + R[0] = R[1] = 0; | ||
109 | + init(); | ||
110 | + } | ||
111 | + | ||
112 | + field(unsigned int x, unsigned int y){ | ||
113 | + //set the resolution | ||
114 | + R[0] = x; | ||
115 | + R[1] = y; | ||
116 | + //allocate memory on the GPU | ||
117 | + for(unsigned int n=0; n<D; n++){ | ||
118 | + HANDLE_ERROR(cudaMalloc( (void**)&X[n], sizeof(T) * R[0] * R[1] )); | ||
119 | + } | ||
120 | + clear(); //zero the field | ||
121 | + } | ||
122 | + | ||
123 | + ///copy constructor | ||
124 | + field(const field &rhs){ | ||
125 | + //first make a shallow copy | ||
126 | + R[0] = rhs.R[0]; | ||
127 | + R[1] = rhs.R[1]; | ||
128 | + | ||
129 | + for(unsigned int n=0; n<D; n++){ | ||
130 | + //do we have to make a deep copy? | ||
131 | + if(rhs.X[n] == NULL) | ||
132 | + X[n] = NULL; //no | ||
133 | + else{ | ||
134 | + //allocate the necessary memory | ||
135 | + HANDLE_ERROR(cudaMalloc(&X[n], sizeof(T) * R[0] * R[1])); | ||
136 | + | ||
137 | + //copy the slice | ||
138 | + HANDLE_ERROR(cudaMemcpy(X[n], rhs.X[n], sizeof(T) * R[0] * R[1], cudaMemcpyDeviceToDevice)); | ||
139 | + } | ||
140 | + } | ||
141 | + } | ||
142 | + | ||
143 | + ~field(){ | ||
144 | + destroy(); | ||
145 | + } | ||
146 | + | ||
147 | + //assignment operator | ||
148 | + field & operator= (const field & rhs){ | ||
149 | + | ||
150 | + //de-allocate any existing GPU memory | ||
151 | + destroy(); | ||
152 | + | ||
153 | + //copy the slice resolution | ||
154 | + R[0] = rhs.R[0]; | ||
155 | + R[1] = rhs.R[1]; | ||
156 | + | ||
157 | + for(unsigned int n=0; n<D; n++) | ||
158 | + { | ||
159 | + //allocate the necessary memory | ||
160 | + HANDLE_ERROR(cudaMalloc(&X[n], sizeof(T) * R[0] * R[1])); | ||
161 | + //copy the slice | ||
162 | + HANDLE_ERROR(cudaMemcpy(X[n], rhs.X[n], sizeof(T) * R[0] * R[1], cudaMemcpyDeviceToDevice)); | ||
163 | + } | ||
164 | + return *this; | ||
165 | + } | ||
166 | + | ||
167 | + field & operator= (const T rhs){ | ||
168 | + | ||
169 | + int maxThreads = rts::maxThreadsPerBlock(); //compute the optimal block size | ||
170 | + int SQRT_BLOCK = (int)std::sqrt((float)maxThreads); | ||
171 | + | ||
172 | + //create one thread for each detector pixel | ||
173 | + dim3 dimBlock(SQRT_BLOCK, SQRT_BLOCK); | ||
174 | + dim3 dimGrid((R[0] + SQRT_BLOCK -1)/SQRT_BLOCK, (R[1] + SQRT_BLOCK - 1)/SQRT_BLOCK); | ||
175 | + | ||
176 | + //assign the constant value to all positions and dimensions | ||
177 | + for(int n=0; n<D; n++) | ||
178 | + rts::gpu_field_assign <<<dimGrid, dimBlock>>> (X[n], rhs, R[0], R[1]); | ||
179 | + | ||
180 | + return *this; | ||
181 | + } | ||
182 | + | ||
183 | + //assignment of vector component | ||
184 | + field & operator= (const vec<T, D> rhs){ | ||
185 | + | ||
186 | + int maxThreads = rts::maxThreadsPerBlock(); //compute the optimal block size | ||
187 | + int SQRT_BLOCK = (int)std::sqrt((float)maxThreads); | ||
188 | + | ||
189 | + //create one thread for each detector pixel | ||
190 | + dim3 dimBlock(SQRT_BLOCK, SQRT_BLOCK); | ||
191 | + dim3 dimGrid((R[0] + SQRT_BLOCK -1)/SQRT_BLOCK, (R[1] + SQRT_BLOCK - 1)/SQRT_BLOCK); | ||
192 | + | ||
193 | + //assign the constant value to all positions and dimensions | ||
194 | + for(unsigned int n=0; n<D; n++) | ||
195 | + rts::gpu_field_assign <<<dimGrid, dimBlock>>> (X[n], rhs.v[n], R[0], R[1]); | ||
196 | + | ||
197 | + return *this; | ||
198 | + | ||
199 | + } | ||
200 | + | ||
201 | + //multiply two fields (element-wise multiplication) | ||
202 | + field<T, D> operator* (const field & rhs){ | ||
203 | + | ||
204 | + int maxThreads = rts::maxThreadsPerBlock(); //compute the optimal block size | ||
205 | + int SQRT_BLOCK = (int)std::sqrt((float)maxThreads); | ||
206 | + | ||
207 | + //create one thread for each detector pixel | ||
208 | + dim3 dimBlock(SQRT_BLOCK, SQRT_BLOCK); | ||
209 | + dim3 dimGrid((R[0] + SQRT_BLOCK -1)/SQRT_BLOCK, (R[1] + SQRT_BLOCK - 1)/SQRT_BLOCK); | ||
210 | + | ||
211 | + //create a scalar field to store the result | ||
212 | + field<T, D> result(R[0], R[1]); | ||
213 | + | ||
214 | + for(int n=0; n<D; n++) | ||
215 | + rts::gpu_field_multiply <<<dimGrid, dimBlock>>> (result.X[n], X[n], rhs.X[n], R[0], R[1]); | ||
216 | + | ||
217 | + return result; | ||
218 | + } | ||
219 | + | ||
220 | + T* ptr(unsigned int n = 0){ | ||
221 | + if(n < D) | ||
222 | + return X[n]; | ||
223 | + else return NULL; | ||
224 | + } | ||
225 | + | ||
226 | + //return the vector component at position (u, v) | ||
227 | + vec<T, D> get(unsigned int u, unsigned int v){ | ||
228 | + | ||
229 | + vec<T, D> result; | ||
230 | + for(unsigned int d=0; d<D; d++){ | ||
231 | + HANDLE_ERROR(cudaMemcpy(&result[d], X[d] + v*R[0] + u, sizeof(T), cudaMemcpyDeviceToHost)); | ||
232 | + } | ||
233 | + | ||
234 | + return result; | ||
235 | + } | ||
236 | + | ||
237 | + //set all components of the field to zero | ||
238 | + void clear(){ | ||
239 | + for(unsigned int n=0; n<D; n++) | ||
240 | + if(X[n] != NULL) | ||
241 | + HANDLE_ERROR(cudaMemset(X[n], 0, sizeof(T) * R[0] * R[1])); | ||
242 | + } | ||
243 | + | ||
244 | +}; | ||
245 | + | ||
246 | +} //end namespace rts | ||
247 | +#endif | ||
0 | \ No newline at end of file | 248 | \ No newline at end of file |
math/realfield.cuh
@@ -247,8 +247,6 @@ public: | @@ -247,8 +247,6 @@ public: | ||
247 | rts::gpu_field_multiply <<<dimGrid, dimBlock>>> (result.X[n], X[n], rhs.X[n], R[0], R[1]); | 247 | rts::gpu_field_multiply <<<dimGrid, dimBlock>>> (result.X[n], X[n], rhs.X[n], R[0], R[1]); |
248 | 248 | ||
249 | return result; | 249 | return result; |
250 | - | ||
251 | - | ||
252 | } | 250 | } |
253 | 251 | ||
254 | ///copy constructor | 252 | ///copy constructor |
math/rect.h
@@ -154,9 +154,9 @@ public: | @@ -154,9 +154,9 @@ public: | ||
154 | { | 154 | { |
155 | std::stringstream ss; | 155 | std::stringstream ss; |
156 | vec<T, N> A = C - X * (T)0.5 - Y * (T)0.5; | 156 | vec<T, N> A = C - X * (T)0.5 - Y * (T)0.5; |
157 | - ss<<std::left<<"B="<<setfill('-')<<setw(20)<<A + Y<<">"<<"C="<<A + Y + X<<std::endl; | ||
158 | - ss<<setfill(' ')<<setw(23)<<"|"<<"|"<<std::endl<<setw(23)<<"|"<<"|"<<std::endl; | ||
159 | - ss<<std::left<<"A="<<setfill('-')<<setw(20)<<A<<">"<<"D="<<A + X; | 157 | + ss<<std::left<<"B="<<std::setfill('-')<<std::setw(20)<<A + Y<<">"<<"C="<<A + Y + X<<std::endl; |
158 | + ss<<std::setfill(' ')<<std::setw(23)<<"|"<<"|"<<std::endl<<std::setw(23)<<"|"<<"|"<<std::endl; | ||
159 | + ss<<std::left<<"A="<<std::setfill('-')<<std::setw(20)<<A<<">"<<"D="<<A + X; | ||
160 | 160 | ||
161 | return ss.str(); | 161 | return ss.str(); |
162 | 162 |