Commit 24aab7c9a3fce4b4e78bf210874c7efc0d1e66f8
1 parent
ecfd14df
added field and complexfield classes
Showing
4 changed files
with
340 additions
and
5 deletions
Show diff stats
1 | +#ifndef RTS_COMPLEXFIELD_H | |
2 | +#define RTS_COMPLEXFIELD_H | |
3 | + | |
4 | +#include "cublas_v2.h" | |
5 | +#include <cuda_runtime.h> | |
6 | + | |
7 | +#include "../math/field.cuh" | |
8 | +#include "../math/complex.h" | |
9 | + | |
10 | +namespace rts{ | |
11 | + | |
12 | +/*This class stores functions for saving images of complex fields | |
13 | +*/ | |
14 | +template<typename T, unsigned int D> | |
15 | +class complexfield : public field< rts::complex<T>, D >{ | |
16 | + using field< rts::complex<T>, D >::R; | |
17 | + using field< rts::complex<T>, D >::X; | |
18 | + using field< rts::complex<T>, D >::shape; | |
19 | + | |
20 | +public: | |
21 | + | |
22 | + //find the maximum value of component n | |
23 | + rts::complex<T> find_max(unsigned int n){ | |
24 | + cublasStatus_t stat; | |
25 | + cublasHandle_t handle; | |
26 | + | |
27 | + //create a CUBLAS handle | |
28 | + stat = cublasCreate(&handle); | |
29 | + if(stat != CUBLAS_STATUS_SUCCESS){ | |
30 | + std::cout<<"CUBLAS Error: initialization failed"<<std::endl; | |
31 | + exit(1); | |
32 | + } | |
33 | + | |
34 | + int L = R[0] * R[1]; //compute the number of discrete points in a slice | |
35 | + int index; //result of the max operation | |
36 | + rts::complex<T> result; | |
37 | + | |
38 | + if(sizeof(T) == 4) | |
39 | + stat = cublasIcamax(handle, L, (const cuComplex*)X[n], 1, &index); | |
40 | + else | |
41 | + stat = cublasIzamax(handle, L, (const cuDoubleComplex*)X[n], 1, &index); | |
42 | + | |
43 | + index -= 1; //adjust for 1-based indexing | |
44 | + | |
45 | + //if there was a GPU error, terminate | |
46 | + if(stat != CUBLAS_STATUS_SUCCESS){ | |
47 | + std::cout<<"CUBLAS Error: failure finding maximum value."<<std::endl; | |
48 | + exit(1); | |
49 | + } | |
50 | + | |
51 | + //retrieve the maximum value for this slice and store it in the maxVal array | |
52 | + std::cout<<X[n]<<std::endl; | |
53 | + HANDLE_ERROR(cudaMemcpy(&result, X[n] + index, sizeof(rts::complex<T>), cudaMemcpyDeviceToHost)); | |
54 | + return result; | |
55 | + } | |
56 | + | |
57 | +public: | |
58 | + | |
59 | + //constructor (no parameters) | |
60 | + complexfield() : field<rts::complex<T>, D>(){}; | |
61 | + | |
62 | + //constructor (resolution specified) | |
63 | + complexfield(unsigned int r0, unsigned int r1) : field<rts::complex<T>, D>(r0, r1){}; | |
64 | + | |
65 | + //assignment operator (scalar value) | |
66 | + complexfield & operator= (const complex<T> rhs){ | |
67 | + | |
68 | + field< complex<T>, D >::operator=(rhs); | |
69 | + return *this; | |
70 | + } | |
71 | + | |
72 | + //assignment operator (vector value) | |
73 | + complexfield & operator= (const vec< complex<T>, D > rhs){ | |
74 | + | |
75 | + field< complex<T>, D >::operator=(rhs); | |
76 | + return *this; | |
77 | + } | |
78 | + | |
79 | + void toImage(std::string filename, unsigned int n){ | |
80 | + | |
81 | + } | |
82 | + | |
83 | + | |
84 | +}; | |
85 | + | |
86 | + | |
87 | +} //end namespace rts | |
88 | + | |
89 | + | |
90 | +#endif | |
0 | 91 | \ No newline at end of file | ... | ... |
1 | +#ifndef RTS_FIELD_CUH | |
2 | +#define RTS_FIELD_CUH | |
3 | + | |
4 | +#include <vector> | |
5 | +#include <string> | |
6 | +#include <sstream> | |
7 | + | |
8 | +#include "../math/rect.h" | |
9 | +#include "../cuda/threads.h" | |
10 | +#include "../cuda/error.h" | |
11 | +#include "../cuda/devices.h" | |
12 | + | |
13 | + | |
14 | +namespace rts{ | |
15 | + | |
16 | +//multiply R = X * Y | |
17 | +template<typename T> | |
18 | +__global__ void gpu_field_multiply(T* R, T* X, T* Y, unsigned int r0, unsigned int r1){ | |
19 | + | |
20 | + int iu = blockIdx.x * blockDim.x + threadIdx.x; | |
21 | + int iv = blockIdx.y * blockDim.y + threadIdx.y; | |
22 | + | |
23 | + //make sure that the thread indices are in-bounds | |
24 | + if(iu >= r0 || iv >= r1) return; | |
25 | + | |
26 | + //compute the index into the field | |
27 | + int i = iv*r0 + iu; | |
28 | + | |
29 | + //calculate and store the result | |
30 | + R[i] = X[i] * Y[i]; | |
31 | +} | |
32 | + | |
33 | +//assign a constant value to all points | |
34 | +template<typename T> | |
35 | +__global__ void gpu_field_assign(T* ptr, T val, unsigned int r0, unsigned int r1){ | |
36 | + | |
37 | + int iu = blockIdx.x * blockDim.x + threadIdx.x; | |
38 | + int iv = blockIdx.y * blockDim.y + threadIdx.y; | |
39 | + | |
40 | + //make sure that the thread indices are in-bounds | |
41 | + if(iu >= r0 || iv >= r1) return; | |
42 | + | |
43 | + //compute the index into the field | |
44 | + int i = iv*r0 + iu; | |
45 | + | |
46 | + //calculate and store the result | |
47 | + ptr[i] = val; | |
48 | +} | |
49 | + | |
50 | +template<typename T, unsigned int D> | |
51 | +class field{ | |
52 | + | |
53 | +protected: | |
54 | + | |
55 | + T* X[D]; //pointer to the field data | |
56 | + unsigned int R[2]; //field resolution | |
57 | + rts::rect<T> shape; //position and shape of the field slice | |
58 | + | |
59 | +public: | |
60 | + | |
61 | + //returns a list of file names given an input string with wild cards | |
62 | + std::vector<std::string> process_filename(std::string name){ | |
63 | + std::stringstream ss(name); | |
64 | + std::string item; | |
65 | + std::vector<std::string> elems; | |
66 | + while(std::getline(ss, item, '.')) //split the string at the '.' character (filename and extension) | |
67 | + { | |
68 | + elems.push_back(item); | |
69 | + } | |
70 | + | |
71 | + std::string prefix = elems[0]; //prefix contains the filename (with wildcard '?' characters) | |
72 | + std::string ext = elems[1]; //file extension (ex. .bmp, .png) | |
73 | + ext = std::string(".") + ext; //add a period back into the extension | |
74 | + | |
75 | + size_t i0 = prefix.find_first_of("?"); //find the positions of the first and last wildcard ('?'') | |
76 | + size_t i1 = prefix.find_last_of("?"); | |
77 | + | |
78 | + std::string postfix = prefix.substr(i1+1); | |
79 | + prefix = prefix.substr(0, i0); | |
80 | + | |
81 | + unsigned int digits = i1 - i0 + 1; //compute the number of wildcards | |
82 | + | |
83 | + std::vector<std::string> flist; //create a vector of file names | |
84 | + //fill the list | |
85 | + for(unsigned int d=0; d<D; d++){ | |
86 | + std::stringstream ss; //assemble the file name | |
87 | + ss<<prefix<<std::setfill('0')<<std::setw(digits)<<d<<postfix<<ext; | |
88 | + flist.push_back(ss.str()); | |
89 | + } | |
90 | + | |
91 | + return flist; | |
92 | + } | |
93 | + | |
94 | + void init(){ | |
95 | + for(unsigned int n=0; n<D; n++) | |
96 | + X[n] = NULL; | |
97 | + } | |
98 | + void destroy(){ | |
99 | + for(unsigned int n=0; n<D; n++) | |
100 | + if(X[n] != NULL) | |
101 | + HANDLE_ERROR(cudaFree(X[n])); | |
102 | + } | |
103 | + | |
104 | + | |
105 | +public: | |
106 | + //field constructor | |
107 | + field(){ | |
108 | + R[0] = R[1] = 0; | |
109 | + init(); | |
110 | + } | |
111 | + | |
112 | + field(unsigned int x, unsigned int y){ | |
113 | + //set the resolution | |
114 | + R[0] = x; | |
115 | + R[1] = y; | |
116 | + //allocate memory on the GPU | |
117 | + for(unsigned int n=0; n<D; n++){ | |
118 | + HANDLE_ERROR(cudaMalloc( (void**)&X[n], sizeof(T) * R[0] * R[1] )); | |
119 | + } | |
120 | + clear(); //zero the field | |
121 | + } | |
122 | + | |
123 | + ///copy constructor | |
124 | + field(const field &rhs){ | |
125 | + //first make a shallow copy | |
126 | + R[0] = rhs.R[0]; | |
127 | + R[1] = rhs.R[1]; | |
128 | + | |
129 | + for(unsigned int n=0; n<D; n++){ | |
130 | + //do we have to make a deep copy? | |
131 | + if(rhs.X[n] == NULL) | |
132 | + X[n] = NULL; //no | |
133 | + else{ | |
134 | + //allocate the necessary memory | |
135 | + HANDLE_ERROR(cudaMalloc(&X[n], sizeof(T) * R[0] * R[1])); | |
136 | + | |
137 | + //copy the slice | |
138 | + HANDLE_ERROR(cudaMemcpy(X[n], rhs.X[n], sizeof(T) * R[0] * R[1], cudaMemcpyDeviceToDevice)); | |
139 | + } | |
140 | + } | |
141 | + } | |
142 | + | |
143 | + ~field(){ | |
144 | + destroy(); | |
145 | + } | |
146 | + | |
147 | + //assignment operator | |
148 | + field & operator= (const field & rhs){ | |
149 | + | |
150 | + //de-allocate any existing GPU memory | |
151 | + destroy(); | |
152 | + | |
153 | + //copy the slice resolution | |
154 | + R[0] = rhs.R[0]; | |
155 | + R[1] = rhs.R[1]; | |
156 | + | |
157 | + for(unsigned int n=0; n<D; n++) | |
158 | + { | |
159 | + //allocate the necessary memory | |
160 | + HANDLE_ERROR(cudaMalloc(&X[n], sizeof(T) * R[0] * R[1])); | |
161 | + //copy the slice | |
162 | + HANDLE_ERROR(cudaMemcpy(X[n], rhs.X[n], sizeof(T) * R[0] * R[1], cudaMemcpyDeviceToDevice)); | |
163 | + } | |
164 | + return *this; | |
165 | + } | |
166 | + | |
167 | + field & operator= (const T rhs){ | |
168 | + | |
169 | + int maxThreads = rts::maxThreadsPerBlock(); //compute the optimal block size | |
170 | + int SQRT_BLOCK = (int)std::sqrt((float)maxThreads); | |
171 | + | |
172 | + //create one thread for each detector pixel | |
173 | + dim3 dimBlock(SQRT_BLOCK, SQRT_BLOCK); | |
174 | + dim3 dimGrid((R[0] + SQRT_BLOCK -1)/SQRT_BLOCK, (R[1] + SQRT_BLOCK - 1)/SQRT_BLOCK); | |
175 | + | |
176 | + //assign the constant value to all positions and dimensions | |
177 | + for(int n=0; n<D; n++) | |
178 | + rts::gpu_field_assign <<<dimGrid, dimBlock>>> (X[n], rhs, R[0], R[1]); | |
179 | + | |
180 | + return *this; | |
181 | + } | |
182 | + | |
183 | + //assignment of vector component | |
184 | + field & operator= (const vec<T, D> rhs){ | |
185 | + | |
186 | + int maxThreads = rts::maxThreadsPerBlock(); //compute the optimal block size | |
187 | + int SQRT_BLOCK = (int)std::sqrt((float)maxThreads); | |
188 | + | |
189 | + //create one thread for each detector pixel | |
190 | + dim3 dimBlock(SQRT_BLOCK, SQRT_BLOCK); | |
191 | + dim3 dimGrid((R[0] + SQRT_BLOCK -1)/SQRT_BLOCK, (R[1] + SQRT_BLOCK - 1)/SQRT_BLOCK); | |
192 | + | |
193 | + //assign the constant value to all positions and dimensions | |
194 | + for(unsigned int n=0; n<D; n++) | |
195 | + rts::gpu_field_assign <<<dimGrid, dimBlock>>> (X[n], rhs.v[n], R[0], R[1]); | |
196 | + | |
197 | + return *this; | |
198 | + | |
199 | + } | |
200 | + | |
201 | + //multiply two fields (element-wise multiplication) | |
202 | + field<T, D> operator* (const field & rhs){ | |
203 | + | |
204 | + int maxThreads = rts::maxThreadsPerBlock(); //compute the optimal block size | |
205 | + int SQRT_BLOCK = (int)std::sqrt((float)maxThreads); | |
206 | + | |
207 | + //create one thread for each detector pixel | |
208 | + dim3 dimBlock(SQRT_BLOCK, SQRT_BLOCK); | |
209 | + dim3 dimGrid((R[0] + SQRT_BLOCK -1)/SQRT_BLOCK, (R[1] + SQRT_BLOCK - 1)/SQRT_BLOCK); | |
210 | + | |
211 | + //create a scalar field to store the result | |
212 | + field<T, D> result(R[0], R[1]); | |
213 | + | |
214 | + for(int n=0; n<D; n++) | |
215 | + rts::gpu_field_multiply <<<dimGrid, dimBlock>>> (result.X[n], X[n], rhs.X[n], R[0], R[1]); | |
216 | + | |
217 | + return result; | |
218 | + } | |
219 | + | |
220 | + T* ptr(unsigned int n = 0){ | |
221 | + if(n < D) | |
222 | + return X[n]; | |
223 | + else return NULL; | |
224 | + } | |
225 | + | |
226 | + //return the vector component at position (u, v) | |
227 | + vec<T, D> get(unsigned int u, unsigned int v){ | |
228 | + | |
229 | + vec<T, D> result; | |
230 | + for(unsigned int d=0; d<D; d++){ | |
231 | + HANDLE_ERROR(cudaMemcpy(&result[d], X[d] + v*R[0] + u, sizeof(T), cudaMemcpyDeviceToHost)); | |
232 | + } | |
233 | + | |
234 | + return result; | |
235 | + } | |
236 | + | |
237 | + //set all components of the field to zero | |
238 | + void clear(){ | |
239 | + for(unsigned int n=0; n<D; n++) | |
240 | + if(X[n] != NULL) | |
241 | + HANDLE_ERROR(cudaMemset(X[n], 0, sizeof(T) * R[0] * R[1])); | |
242 | + } | |
243 | + | |
244 | +}; | |
245 | + | |
246 | +} //end namespace rts | |
247 | +#endif | |
0 | 248 | \ No newline at end of file | ... | ... |
math/realfield.cuh
math/rect.h
... | ... | @@ -154,9 +154,9 @@ public: |
154 | 154 | { |
155 | 155 | std::stringstream ss; |
156 | 156 | vec<T, N> A = C - X * (T)0.5 - Y * (T)0.5; |
157 | - ss<<std::left<<"B="<<setfill('-')<<setw(20)<<A + Y<<">"<<"C="<<A + Y + X<<std::endl; | |
158 | - ss<<setfill(' ')<<setw(23)<<"|"<<"|"<<std::endl<<setw(23)<<"|"<<"|"<<std::endl; | |
159 | - ss<<std::left<<"A="<<setfill('-')<<setw(20)<<A<<">"<<"D="<<A + X; | |
157 | + ss<<std::left<<"B="<<std::setfill('-')<<std::setw(20)<<A + Y<<">"<<"C="<<A + Y + X<<std::endl; | |
158 | + ss<<std::setfill(' ')<<std::setw(23)<<"|"<<"|"<<std::endl<<std::setw(23)<<"|"<<"|"<<std::endl; | |
159 | + ss<<std::left<<"A="<<std::setfill('-')<<std::setw(20)<<A<<">"<<"D="<<A + X; | |
160 | 160 | |
161 | 161 | return ss.str(); |
162 | 162 | ... | ... |