Authored by dmayerich
1 parent b6179de6

Showing 27 changed files with 1518 additions and 588 deletions
bessjy.cpp

 ... ... @@ -13,7 +13,9 @@ 13 13 // 14 14 #define _USE_MATH_DEFINES 15 15 #include 16 -#include "bessel.h" 16 +#include "bessel.h" 17 + 18 +#define PI 3.14159 17 19 18 20 double gamma(double x); 19 21 // ... ... @@ -426,7 +428,7 @@ int bessjynb(int n,double x,int &nm,double *jn,double *yn, 426 428 0.2775764465332031, 427 429 -1.993531733751297, 428 430 2.724882731126854e1}; 429 - 431 + 430 432 int i,k,m; 431 433 nm = n; 432 434 if ((x < 0.0) || (n < 0)) return 1; ... ... @@ -702,5 +704,26 @@ int bessjyv(double v,double x,double &vm,double *jv,double *yv, 702 704 } 703 705 vm = n + v0; 704 706 return 0; 707 +} 708 + 709 +int bessjyv_sph(int v, double z, double &vm, double* cjv, 710 + double* cyv, double* cjvp, double* cyvp) 711 +{ 712 + //first, compute the bessel functions of fractional order 713 + bessjyv(v + 0.5, z, vm, cjv, cyv, cjvp, cyvp); 714 + 715 + //iterate through each and scale 716 + for(int n = 0; n<=v; n++) 717 + { 718 + 719 + cjv[n] = cjv[n] * sqrt(PI/(z * 2.0)); 720 + cyv[n] = cyv[n] * sqrt(PI/(z * 2.0)); 721 + 722 + cjvp[n] = -1.0 / (z * 2.0) * cjv[n] + cjvp[n] * sqrt(PI / (z * 2.0)); 723 + cyvp[n] = -1.0 / (z * 2.0) * cyv[n] + cyvp[n] * sqrt(PI / (z * 2.0)); 724 + } 725 + 726 + return 0; 727 + 705 728 } 706 - 729 + ... ...
cbessjy.cpp

 ... ... @@ -724,6 +724,7 @@ int cbessjyva_sph(int v,complex<double> z,double &vm,complex<double>*cjv, 724 724 //iterate through each and scale 725 725 for(int n = 0; n<=v; n++) 726 726 { 727 + 727 728 cjv[n] = cjv[n] * sqrt(PI/(z * 2.0)); 728 729 cyv[n] = cyv[n] * sqrt(PI/(z * 2.0)); 729 730 ... ...
colormap.h deleted
 1 -#ifndef RTS_COLORMAP_H 2 -#define RTS_COLORMAP_H 3 - 4 -#include 5 -#include 6 -#include 7 -#include "rts/cuda/error.h" 8 - 9 - 10 -#define BREWER_CTRL_PTS 11 11 - 12 -#ifdef __CUDACC__ 13 -texture cudaTexBrewer; 14 -static cudaArray* gpuBrewer; 15 -#endif 16 - 17 - 18 - 19 -namespace rts{ 20 - namespace colormap{ 21 - 22 -enum colormapType {cmBrewer, cmGrayscale}; 23 - 24 -static void buffer2image(unsigned char* buffer, std::string filename, unsigned int x_size, unsigned int y_size) 25 -{ 26 - //create an image object 27 - QImage image(x_size, y_size, QImage::Format_RGB32); 28 - 29 - int i; 30 - unsigned char r, g, b; 31 - unsigned int x, y; 32 - for(y=0; y 101 -__global__ static void applyBrewer(T* gpuSource, unsigned char* gpuDest, unsigned int N, T minVal = 0, T maxVal = 1) 102 -{ 103 - int i = blockIdx.x * blockDim.x + threadIdx.x; 104 - if(i >= N) return; 105 - 106 - //compute the normalized value on [minVal maxVal] 107 - float a = (gpuSource[i] - minVal) / (maxVal - minVal); 108 - 109 - //lookup the color 110 - float shift = 1.0/BREWER_CTRL_PTS; 111 - float4 color = tex1D(cudaTexBrewer, a+shift); 112 - 113 - gpuDest[i * 3 + 0] = 255 * color.x; 114 - gpuDest[i * 3 + 1] = 255 * color.y; 115 - gpuDest[i * 3 + 2] = 255 * color.z; 116 -} 117 - 118 -template 119 -__global__ static void applyGrayscale(T* gpuSource, unsigned char* gpuDest, unsigned int N, T minVal = 0, T maxVal = 1) 120 -{ 121 - int i = blockIdx.x * blockDim.x + threadIdx.x; 122 - if(i >= N) return; 123 - 124 - //compute the normalized value on [minVal maxVal] 125 - float a = (gpuSource[i] - minVal) / (maxVal - minVal); 126 - 127 - gpuDest[i * 3 + 0] = 255 * a; 128 - gpuDest[i * 3 + 1] = 255 * a; 129 - gpuDest[i * 3 + 2] = 255 * a; 130 -} 131 - 132 -template 133 -static void gpu2gpu(T* gpuSource, unsigned char* gpuDest, unsigned int nVals, T minVal = 0, T maxVal = 1, colormapType cm = cmGrayscale, int blockDim = 128) 134 -{ 135 - //This function converts a scalar field on the GPU to a color image on the GPU 136 - int gridDim = (nVals + blockDim - 1)/blockDim; 137 - if(cm == cmGrayscale) 138 - applyGrayscale<<>>(gpuSource, gpuDest, nVals, minVal, maxVal); 139 - else if(cm == cmBrewer) 140 - { 141 - initBrewer(); 142 - applyBrewer<<>>(gpuSource, gpuDest, nVals, minVal, maxVal); 143 - destroyBrewer(); 144 - } 145 - 146 -} 147 - 148 -template 149 -static void gpu2cpu(T* gpuSource, unsigned char* cpuDest, unsigned int nVals, T minVal, T maxVal, colormapType cm = cmGrayscale) 150 -{ 151 - //this function converts a scalar field on the GPU to a color image on the CPU 152 - 153 - //first create the color image on the GPU 154 - 155 - //allocate GPU memory for the color image 156 - unsigned char* gpuDest; 157 - HANDLE_ERROR(cudaMalloc( (void**)&gpuDest, sizeof(unsigned char) * nVals * 3 )); 158 - 159 - //HANDLE_ERROR(cudaMemset(gpuSource, 0, sizeof(T) * nVals)); 160 - 161 - //create the image on the gpu 162 - gpu2gpu(gpuSource, gpuDest, nVals, minVal, maxVal, cm); 163 - 164 - //HANDLE_ERROR(cudaMemset(gpuDest, 0, sizeof(unsigned char) * nVals * 3)); 165 - 166 - //copy the image from the GPU to the CPU 167 - HANDLE_ERROR(cudaMemcpy(cpuDest, gpuDest, sizeof(unsigned char) * nVals * 3, cudaMemcpyDeviceToHost)); 168 - 169 - HANDLE_ERROR(cudaFree( gpuDest )); 170 - 171 -} 172 - 173 -template 174 -static void gpu2image(T* gpuSource, std::string fileDest, unsigned int x_size, unsigned int y_size, T valMin, T valMax, colormapType cm = cmGrayscale) 175 -{ 176 - //allocate a color buffer 177 - unsigned char* cpuBuffer = (unsigned char*) malloc(sizeof(unsigned char) * 3 * x_size * y_size); 178 - 179 - //do the mapping 180 - gpu2cpu(gpuSource, cpuBuffer, x_size * y_size, valMin, valMax, cm); 181 - 182 - //copy the buffer to an image 183 - buffer2image(cpuBuffer, fileDest, x_size, y_size); 184 - 185 - free(cpuBuffer); 186 -} 187 - 188 -#endif 189 - 190 -template 191 -static void cpu2cpu(T* cpuSource, unsigned char* cpuDest, unsigned int nVals, T valMin, T valMax, colormapType cm = cmGrayscale) 192 -{ 193 - int i; 194 - float a; 195 - float range = valMax - valMin; 196 - for(i = 0; i 211 -static void cpu2image(T* cpuSource, std::string fileDest, unsigned int x_size, unsigned int y_size, T valMin, T valMax, colormapType cm = cmGrayscale) 212 -{ 213 - //allocate a color buffer 214 - unsigned char* cpuBuffer = (unsigned char*) malloc(sizeof(unsigned char) * 3 * x_size * y_size); 215 - 216 - //do the mapping 217 - cpu2cpu(cpuSource, cpuBuffer, x_size * y_size, valMin, valMax, cm); 218 - 219 - //copy the buffer to an image 220 - buffer2image(cpuBuffer, fileDest, x_size, y_size); 221 - 222 - free(cpuBuffer); 223 - 224 -} 225 - 226 -}} //end namespace colormap and rts 227 - 228 -#endif 229 -
dataTypes.h

 ... ... @@ -24,6 +24,8 @@ typedef double ptype; 24 24 25 25 typedef ptype fieldPoint; 26 26 27 +extern bool verbose; 28 + 27 29 //hybrid GPU/CPU complex data typ 28 30 #include "rts/math/complex.h" 29 31 #include "rts/math/vector.h" ... ...
defaults.h

 ... ... @@ -15,14 +15,14 @@ 15 15 #define DEFAULT_FOCUS_X 0 16 16 #define DEFAULT_FOCUS_Y 0 17 17 #define DEFAULT_FOCUS_Z 0 18 -#define DEFAULT_INCIDENT_ORDER 100 18 +//#define DEFAULT_INCIDENT_ORDER 20 19 19 #define DEFAULT_STABILITY_PARM 1.4 20 20 21 21 //optics 22 -#define DEFAULT_CONDENSER_MIN 0.0 22 +#define DEFAULT_CONDENSER_MIN 0 23 23 #define DEFAULT_CONDENSER_MAX 1 24 24 25 -#define DEFAULT_OBJECTIVE_MIN 0.0 25 +#define DEFAULT_OBJECTIVE_MIN 0 26 26 #define DEFAULT_OBJECTIVE_MAX 1 27 27 28 28 //incident light direction ... ... @@ -36,17 +36,20 @@ 36 36 //#define DEFAULT_OUTPUT_POINT fileoutStruct::imageObjective 37 37 38 38 39 -#define DEFAULT_SLICE_MIN_X -5 40 -#define DEFAULT_SLICE_MIN_Y 0 41 -#define DEFAULT_SLICE_MIN_Z -5 39 +#define DEFAULT_PLANE_MIN_X -5 40 +#define DEFAULT_PLANE_MIN_Y 0 41 +#define DEFAULT_PLANE_MIN_Z -5 42 42 43 -#define DEFAULT_SLICE_MAX_X 5 44 -#define DEFAULT_SLICE_MAX_Y 0 45 -#define DEFAULT_SLICE_MAX_Z 5 43 +#define DEFAULT_PLANE_MAX_X 5 44 +#define DEFAULT_PLANE_MAX_Y 0 45 +#define DEFAULT_PLANE_MAX_Z 5 46 46 47 -#define DEFAULT_SLICE_NORM_X 0 48 -#define DEFAULT_SLICE_NORM_Y 1 49 -#define DEFAULT_SLICE_NORM_Z 0 47 +#define DEFAULT_PLANE_NORM_X 0 48 +#define DEFAULT_PLANE_NORM_Y 1 49 +#define DEFAULT_PLANE_NORM_Z 0 50 + 51 +#define DEFAULT_PLANE_SIZE 40 52 +#define DEFAULT_PLANE_POSITION 0 50 53 51 54 52 55 /* ... ... @@ -64,21 +67,23 @@ 64 67 */ 65 68 66 69 67 -#define DEFAULT_FIELD_ORDER 200 70 +#define DEFAULT_FIELD_ORDER 10 68 71 69 -#define DEFAULT_SAMPLES 200 72 +#define DEFAULT_SAMPLES 400 70 73 71 74 #define DEFAULT_SLICE_RES 256 72 75 76 +#define DEFAULT_SPHERE_THETA_R 1000 77 + 73 78 #define DEFAULT_PADDING 1 74 79 #define DEFAULT_SUPERSAMPLE 1 75 80 76 -#define DEFAULT_INTENSITY_FILE "testappend" 81 +#define DEFAULT_INTENSITY_FILE "out_i.bmp" 77 82 #define DEFAULT_TRANSMITTANCE_FILE "" 78 -#define DEFAULT_ABSORBANCE_FILE "out_a" 83 +#define DEFAULT_ABSORBANCE_FILE "out_a.bmp" 79 84 #define DEFAULT_NEAR_FILE "out_n.bmp" 80 85 #define DEFAULT_FAR_FILE "out_f.bmp" 81 -#define DEFAULT_EXTENDED_SOURCE "einstein_small.jpg" 86 +#define DEFAULT_EXTENDED_SOURCE "" 82 87 #define DEFAULT_FIELD_TYPE "magnitude" 83 88 #define DEFAULT_FORMAT fileoutStruct::formatImage 84 89 #define DEFAULT_COLORMAP "brewer" ... ...
fieldslice.cpp

 ... ... @@ -8,14 +8,16 @@ 8 8 using namespace std; 9 9 10 10 fieldslice::fieldslice(unsigned int x_size, unsigned int y_size) 11 -{ 11 +{ 12 + x_hat = y_hat = z_hat = NULL; 13 + 12 14 //save the slice resolution 13 15 R[0] = x_size; 14 16 R[1] = x_size; 15 17 16 18 scalarField = true; 17 19 18 - //init_gpu(); 20 + init_gpu(); 19 21 20 22 21 23 } ... ... @@ -101,5 +103,5 @@ fieldslice::fieldslice() 101 103 102 104 fieldslice::~fieldslice() 103 105 { 104 - //kill_gpu(); 106 + kill_gpu(); 105 107 } ... ...
fieldslice.cu

fieldslice.h

 ... ... @@ -31,6 +31,9 @@ struct fieldslice 31 31 32 32 ~fieldslice(); 33 33 34 + //copy constructor 35 + fieldslice(const fieldslice& rhs); 36 + 34 37 //void setPos(bsPoint pMin, bsPoint pMax, bsVector N); 35 38 36 39 scalarslice Mag(); ... ... @@ -47,6 +50,7 @@ struct fieldslice 47 50 48 51 //crop a region from the field 49 52 fieldslice crop(int u, int v, int su, int sv); 53 + fieldslice& operator=(const fieldslice& rhs); 50 54 51 55 void init_gpu(); 52 56 void kill_gpu(); ... ...
fileout.cu

 ... ... @@ -186,11 +186,21 @@ void fileoutStruct::Save(microscopeStruct* scope) 186 186 //save images of the fields in the microscope 187 187 188 188 //if the user specifies an extended source 189 - if(scope->focalPoints.size() > 1) 189 + if(scope->focalPoints.size() > 0) 190 190 { 191 191 //simulate the extended source and output the detector image 192 192 scope->SimulateExtendedSource(); 193 193 194 + //saveNearField(&scope->nf); 195 + saveFarField(scope); 196 + 197 + //save the detector images 198 + saveDetector(scope); 199 + 200 + //simulate scattering for the last point (so that you have a near field image) 201 + scope->SimulateScattering(); 202 + saveNearField(&scope->nf); 203 + 194 204 } 195 205 else 196 206 { ... ... @@ -203,12 +213,15 @@ void fileoutStruct::Save(microscopeStruct* scope) 203 213 //run the far-field simulation 204 214 scope->SimulateImaging(); 205 215 216 + //saveNearField(&scope->nf); 206 217 saveFarField(scope); 207 218 219 + //save the detector images 220 + saveDetector(scope); 221 + 208 222 } 209 223 210 - //save the detector images 211 - saveDetector(scope); 224 + 212 225 213 226 214 227 } ... ...
fileout.h

 ... ... @@ -5,7 +5,7 @@ 5 5 //#include "defaults.h" 6 6 #include "dataTypes.h" 7 7 8 -#include "colormap.h" 8 +#include "rts/graphics/colormap.h" 9 9 #include "fieldslice.h" 10 10 #include "nearfield.h" 11 11 #include "microscope.h" ... ... @@ -34,7 +34,7 @@ struct fileoutStruct{ 34 34 //image_source source; 35 35 36 36 //color map info 37 - rts::colormap::colormapType colormap; 37 + rts::colormapType colormap; 38 38 ptype colorMax; 39 39 40 40 void Save(microscopeStruct* scope); ... ...
main.cpp

 ... ... @@ -24,6 +24,7 @@ microscopeStruct* SCOPE; 24 24 #include "warnings.h" 25 25 26 26 fileoutStruct gFileOut; 27 +bool verbose = false; 27 28 using namespace std; 28 29 29 30 int cbessjyva(double v,complex z,double &vm,complex*cjv, ... ... @@ -31,32 +32,19 @@ int cbessjyva(double v,complex<double> z,double &vm,complex<double>*cjv, 31 32 32 33 int main(int argc, char *argv[]) 33 34 { 34 - //test Envi loading and saving 35 - //EnviFile envi("testenvi", "w"); 36 - 37 - //float* data = (float*)malloc(sizeof(float) * 100 * 100); 38 - //envi.addBand(data, 100, 100, 100); 39 - 40 - //envi.close(); 41 - 42 - //return 0; 43 35 44 36 SCOPE = new microscopeStruct(); 45 37 46 - cout<nf.Uf.R[0]<init(); 54 42 55 - OutputOptions(); 56 - 57 43 gFileOut.Save(SCOPE); 58 44 59 - //NF->destroy(); 45 + if(verbose) 46 + OutputOptions(); 47 + 60 48 SCOPE->destroy(); 61 49 62 50 ... ...
microscope.cu

 ... ... @@ -4,7 +4,7 @@ 4 4 #include "rts/tools/progressbar.h" 5 5 #include "rts/cuda/timer.h" 6 6 #include "dataTypes.h" 7 -#include "colormap.h" 7 +#include "rts/graphics/colormap.h" 8 8 9 9 #include 10 10 ... ... @@ -112,8 +112,8 @@ void microscopeStruct::getFarField() 112 112 //Compute the Far Field image of the focal plane 113 113 114 114 //clear the memory from previous detector fields 115 - Ud.kill_gpu(); 116 - Ufd.kill_gpu(); 115 + //Ud.kill_gpu(); 116 + //Ufd.kill_gpu(); 117 117 118 118 //first crop the filtered near-field image of the source and scattered fields 119 119 Ud = nf.U.crop(padding * Ud.R[0], padding * Ud.R[1], Ud.R[0], Ud.R[1]); ... ... @@ -261,9 +261,14 @@ void microscopeStruct::SimulateExtendedSource() 261 261 t += gpuStopTimer(); 262 262 263 263 rtsProgressBar((double)(i+1)/(double)npts * 100); 264 + //unsigned char c; 265 + //cin>>c; 264 266 } 265 - cout<
microscope.h

 ... ... @@ -63,6 +63,8 @@ struct microscopeStruct 63 63 scalarslice getTransmittance(); 64 64 scalarslice getIntensity(); 65 65 66 + string toStr(); 67 + 66 68 67 69 68 70 }; ... ...
montecarlo.cpp

 ... ... @@ -35,18 +35,12 @@ void mcSampleNA(bsVector* samples, int N, bsVector k, ptype NAin, ptype NAout) 35 35 ptype inPhi = asin(NAin); 36 36 ptype outPhi = asin(NAout); 37 37 38 - //cout<<"inPhi: "<
nearfield.cpp

 1 1 #include "nearfield.h" 2 +#include 3 +#include 4 + 5 +#ifdef _WIN32 6 +#define isnan(x) _isnan(x) 7 +#define isinf(x) (!_finite(x)) 8 +#endif 9 + 10 +int bessjyv_sph(int v, double z, double &vm, double* cjv, 11 + double* cyv, double* cjvp, double* cyvp); 2 12 3 13 nearfieldStruct::nearfieldStruct() 4 14 { 5 15 scalarSim = true; 6 16 planeWave = false; 17 + lut_us = true; 18 + lut_uf = false; 7 19 8 20 nWaves = 0; 9 21 } ... ... @@ -46,6 +58,8 @@ std::string nearfieldStruct::toStr() 46 58 ss<<"Condenser NA: "< n = mVector[imat](lambda); 99 - //std::cout<<"Sphere refractive index: "<
nearfield.h

 ... ... @@ -31,6 +31,8 @@ struct nearfieldStruct 31 31 32 32 //slices for the focused field 33 33 fieldslice Uf; 34 + ptype d_min, d_max; 35 + 34 36 // and total field: Uf + sum(Us) 35 37 fieldslice U; 36 38 ... ... @@ -43,6 +45,14 @@ struct nearfieldStruct 43 45 //flag for a plane wave 44 46 bool planeWave; 45 47 48 + //flag for using a LUT 49 + bool lut_uf; 50 + bool lut_us; 51 + 52 + //timings 53 + float t_Uf; 54 + float t_Us; 55 + 46 56 47 57 48 58 //---------Scatterers------------ ... ... @@ -78,10 +88,17 @@ struct nearfieldStruct 78 88 void setPos(bsPoint pMin, bsPoint pMax, bsVector normal); 79 89 80 90 //this function re-computes the focused field 91 + void calcUf(); 81 92 void scalarUf(); 93 + void scalarUfLut(); 94 + 95 + void calcBesselLut(ptype* j, ptype d_min, ptype d_max, int dR); 82 96 83 97 //compute the field scattered by all of the materials 98 + void calcUs(); 84 99 void scalarUs(); 100 + void scalarUpLut(); 101 + 85 102 86 103 //add the incident field to the sum of scattered fields 87 104 void sumUf(); ... ...
nfScalarUf.cu

 ... ... @@ -5,7 +5,7 @@ 5 5 #include "rts/cuda/error.h" 6 6 #include "rts/cuda/timer.h" 7 7 8 - 8 +//Incident field for a single plane wave 9 9 __global__ void gpuScalarUfp(bsComplex* Uf, bsVector k, ptype kmag, bsPoint f, ptype A, bsRect ABCD, int uR, int vR) 10 10 { 11 11 /*Compute the scalar focused field using Debye focusing ... ... @@ -41,7 +41,8 @@ __global__ void gpuScalarUfp(bsComplex* Uf, bsVector k, ptype kmag, bsPoint f, p 41 41 Uf[i] = exp(d) * A; 42 42 43 43 } 44 - 44 + 45 +//Incident field for a focused point source 45 46 __global__ void gpuScalarUf(bsComplex* Uf, bsVector k, ptype kmag, bsPoint f, ptype A, bsRect ABCD, int uR, int vR, ptype cosAlpha, ptype cosBeta, int nl, ptype j_conv = 1.4) 46 47 { 47 48 /*Compute the scalar focused field using Debye focusing ... ... @@ -151,7 +152,6 @@ __global__ void gpuScalarUf(bsComplex* Uf, bsVector k, ptype kmag, bsPoint f, pt 151 152 } 152 153 153 154 sumUf += il * jl * Pl * (Palpha[1] - Palpha[2] - Pbeta[1] + Pbeta[2]); 154 - //sumUf += il * Pl * (Palpha[1] - Palpha[2] - Pbeta[1] + Pbeta[2]); 155 155 156 156 il *= im; 157 157 } ... ... @@ -162,21 +162,12 @@ __global__ void gpuScalarUf(bsComplex* Uf, bsVector k, ptype kmag, bsPoint f, pt 162 162 163 163 void nearfieldStruct::scalarUf() 164 164 { 165 - //Compute the incident field via a scalar simulation 166 - //This method uses Debye focusing to approximate the field analytically 167 - 168 - //time the calculation of the focused field 169 - //gpuStartTimer(); 170 - 171 - //set the field slice to a scalar field 172 - //Uf.scalarField = true; 173 - 174 - //initialize the GPU arrays 175 - //Uf.init_gpu(); 165 + 166 + gpuStartTimer(); 176 167 177 168 //create one thread for each pixel of the field slice 178 169 dim3 dimBlock(SQRT_BLOCK, SQRT_BLOCK); 179 - dim3 dimGrid((Uf.R[0] + SQRT_BLOCK -1)/SQRT_BLOCK, (Uf.R[1] + SQRT_BLOCK - 1)/SQRT_BLOCK); 170 + dim3 dimGrid((Uf.R[0] + SQRT_BLOCK -1)/SQRT_BLOCK, (Uf.R[1] + SQRT_BLOCK - 1)/SQRT_BLOCK); 180 171 181 172 //if we are computing a plane wave, call the gpuScalarUfp function 182 173 if(planeWave) ... ... @@ -191,10 +182,7 @@ void nearfieldStruct::scalarUf() 191 182 ptype cosBeta = cos(asin(condenser[1])); 192 183 //compute the scalar Uf field (this will be in the x_hat channel of Uf) 193 184 gpuScalarUf<<>>(Uf.x_hat, k, 2 * PI / lambda, focus, A, pos, Uf.R[0], Uf.R[1], cosAlpha, cosBeta, m); 194 - } 195 - 196 - //float t = gpuStopTimer(); 197 - //std::cout<<"Scalar Uf Time: "<
nfScalarUfLut.cu 0 โ 100644