Blame view

nfSumUf.cu 3.18 KB
3f56f1f9   dmayerich   initial commit
1
2
  #include "nearfield.h"

  #include <stdlib.h>

d6f53e68   dmayerich   rts organization
3
4
  #include "rts/cuda/error.h"

  #include "rts/cuda/timer.h"

3f56f1f9   dmayerich   initial commit
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
  

  __global__ void gpuScalarUsp(bsComplex* Ufx, bsComplex* Ufy, bsComplex* Ufz,

  							 bsComplex* Ux, bsComplex* Uy, bsComplex* Uz,

  							 bsPoint* ps, ptype* as, int ns, bsRect ABCD, int uR, int vR)

  {

  

  	//get the current coordinate in the plane slice

  	int iu = blockIdx.x * blockDim.x + threadIdx.x;

  	int iv = blockIdx.y * blockDim.y + threadIdx.y;

  

  	//make sure that the thread indices are in-bounds

  	if(iu >= uR || iv >= vR) return;

  

  	//compute the index (easier access to the scalar field array)

  	int i = iv*uR + iu;

  

  	//compute the parameters for u and v

  	ptype u = (ptype)iu / uR;

  	ptype v = (ptype)iv / vR;

  

  	//get the rtsPoint in world space and then the r vector

  	bsPoint p = ABCD(u, v);

  	bsVector r;

  	ptype d;

  

  	//if we are inside of a sphere, return

  	for(int is=0; is<ns; is++)

  	{

  		r = p - ps[is];

  		d = r.len();

  		if(d <= as[is])

  			return;

  	}

  

  	//otherwise add the focused field to the full field

  	if(Ufx != NULL)

  		Ux[i] += Ufx[i];

  	if(Ufy != NULL)

  		Uy[i] += Ufy[i];

  	if(Ufz != NULL)

  		Uz[i] += Ufz[i];

  }

  

  void nearfieldStruct::sumUf()

  {

  

  

  	//create arrays to store sphere positions and radii

  	int nSpheres = sVector.size();

  

  	//if the number of spheres is zero, just copy the incident field

  	if(nSpheres == 0)

  	{

  		if(U.x_hat != NULL)

  			HANDLE_ERROR(cudaMemcpy(U.x_hat, Uf.x_hat, sizeof(bsComplex) * U.R[0] * U.R[1], cudaMemcpyDeviceToDevice));

  		if(U.y_hat != NULL)

  			HANDLE_ERROR(cudaMemcpy(U.y_hat, Uf.y_hat, sizeof(bsComplex) * U.R[0] * U.R[1], cudaMemcpyDeviceToDevice));

  		if(U.z_hat != NULL)

  			HANDLE_ERROR(cudaMemcpy(U.z_hat, Uf.z_hat, sizeof(bsComplex) * U.R[0] * U.R[1], cudaMemcpyDeviceToDevice));

  		return;

  	}

  

  	//time the calculation of the focused field

  	//gpuStartTimer();

  

  	bsPoint* cpu_p = (bsPoint*)malloc(sizeof(bsPoint) * nSpheres);

  	ptype* cpu_a = (ptype*)malloc(sizeof(ptype) * nSpheres);

  

  	//copy the sphere positions and radii to the new arrays

  	for(int s=0; s<nSpheres; s++)

  	{

  		cpu_p[s] = sVector[s].p;

  		cpu_a[s] = sVector[s].a;

  	}

  

  	//copy the arrays to the gpu

  	bsPoint* gpu_p;

  	HANDLE_ERROR(cudaMalloc( (void**) &gpu_p, sizeof(bsPoint) * nSpheres));

  	HANDLE_ERROR(cudaMemcpy(gpu_p, cpu_p, sizeof(bsPoint) * nSpheres, cudaMemcpyHostToDevice));

  	ptype* gpu_a;

  	HANDLE_ERROR(cudaMalloc( (void**) &gpu_a, sizeof(ptype) * nSpheres));

  	HANDLE_ERROR(cudaMemcpy(gpu_a, cpu_a, sizeof(ptype) * nSpheres, cudaMemcpyHostToDevice));

  

  

  	//create one thread for each pixel of the field slice

  	dim3 dimBlock(SQRT_BLOCK, SQRT_BLOCK);

  	dim3 dimGrid((U.R[0] + SQRT_BLOCK -1)/SQRT_BLOCK, (U.R[1] + SQRT_BLOCK - 1)/SQRT_BLOCK);

  

  	//copy the focused field

  	gpuScalarUsp<<<dimGrid, dimBlock>>>(Uf.x_hat,

  										Uf.y_hat,

  										Uf.z_hat,

  										U.x_hat,

  										U.y_hat,

  										U.z_hat,

  										gpu_p,

  										gpu_a,

  										nSpheres,

  										pos,

  										U.R[0],

  										U.R[1]);

  

  

  
      //free sphere lists
      HANDLE_ERROR(cudaFree(gpu_p));
      HANDLE_ERROR(cudaFree(gpu_a));

  

  	//float t = gpuStopTimer();

  	//std::cout<<"Add Us Time: "<<t<<"ms"<<std::endl;
  	//std::cout<<focus<<std::endl;

  

  }