Commit 3669ed62a1bc1061f64432f9af857c22daa7a582

Authored by David Mayerich
1 parent 4252d827

fixed fused multiply-add

Showing 2 changed files with 5 additions and 3 deletions   Show diff stats
stim/optics/scalarbeam.h
... ... @@ -121,12 +121,12 @@ CUDA_CALLABLE void lut_lookup(T* lut_values, T* lut, T val, size_t N, T min_val,
121 121  
122 122 template <typename T>
123 123 CUDA_CALLABLE stim::complex<T> clerp(stim::complex<T> v0, stim::complex<T> v1, T t) {
124   - return stim::complex<T>( fma(t, v1.r, fma(-t, v0.r, v0.r)), fma(t, v1.i, fma(-t, v0.i, v0.i)) );
  124 + return stim::complex<T>( fmaf(t, v1.r, fmaf(-t, v0.r, v0.r)), fmaf(t, v1.i, fmaf(-t, v0.i, v0.i)) );
125 125 }
126 126  
127 127 template <typename T>
128 128 CUDA_CALLABLE T lerp(T v0, T v1, T t) {
129   - return fma(t, v1, fma(-t, v0, v0));
  129 + return fmaf(t, v1, fmaf(-t, v0, v0));
130 130 }
131 131  
132 132 #ifdef CUDA_FOUND
... ...
stim/visualization/colormap.h
... ... @@ -253,7 +253,9 @@ static void gpu2image(T* gpuSource, std::string fileDest, unsigned int x_size, u
253 253 HANDLE_ERROR( cudaMemcpy(&v_min, gpuSource + i_min, sizeof(T), cudaMemcpyDeviceToHost) ); //copy the min and max values from the device to the CPU
254 254 HANDLE_ERROR( cudaMemcpy(&v_max, gpuSource + i_max, sizeof(T), cudaMemcpyDeviceToHost) );
255 255  
256   - gpu2image<T>(gpuSource, fileDest, x_size, y_size, v_min, v_max, cm);
  256 +
  257 +
  258 + gpu2image<T>(gpuSource, fileDest, x_size, y_size, min(v_min, v_max), max(v_min, v_max), cm);
257 259 }
258 260  
259 261 #endif
... ...