Commit 3669ed62a1bc1061f64432f9af857c22daa7a582
1 parent
4252d827
fixed fused multiply-add
Showing
2 changed files
with
5 additions
and
3 deletions
Show diff stats
stim/optics/scalarbeam.h
... | ... | @@ -121,12 +121,12 @@ CUDA_CALLABLE void lut_lookup(T* lut_values, T* lut, T val, size_t N, T min_val, |
121 | 121 | |
122 | 122 | template <typename T> |
123 | 123 | CUDA_CALLABLE stim::complex<T> clerp(stim::complex<T> v0, stim::complex<T> v1, T t) { |
124 | - return stim::complex<T>( fma(t, v1.r, fma(-t, v0.r, v0.r)), fma(t, v1.i, fma(-t, v0.i, v0.i)) ); | |
124 | + return stim::complex<T>( fmaf(t, v1.r, fmaf(-t, v0.r, v0.r)), fmaf(t, v1.i, fmaf(-t, v0.i, v0.i)) ); | |
125 | 125 | } |
126 | 126 | |
127 | 127 | template <typename T> |
128 | 128 | CUDA_CALLABLE T lerp(T v0, T v1, T t) { |
129 | - return fma(t, v1, fma(-t, v0, v0)); | |
129 | + return fmaf(t, v1, fmaf(-t, v0, v0)); | |
130 | 130 | } |
131 | 131 | |
132 | 132 | #ifdef CUDA_FOUND | ... | ... |
stim/visualization/colormap.h
... | ... | @@ -253,7 +253,9 @@ static void gpu2image(T* gpuSource, std::string fileDest, unsigned int x_size, u |
253 | 253 | HANDLE_ERROR( cudaMemcpy(&v_min, gpuSource + i_min, sizeof(T), cudaMemcpyDeviceToHost) ); //copy the min and max values from the device to the CPU |
254 | 254 | HANDLE_ERROR( cudaMemcpy(&v_max, gpuSource + i_max, sizeof(T), cudaMemcpyDeviceToHost) ); |
255 | 255 | |
256 | - gpu2image<T>(gpuSource, fileDest, x_size, y_size, v_min, v_max, cm); | |
256 | + | |
257 | + | |
258 | + gpu2image<T>(gpuSource, fileDest, x_size, y_size, min(v_min, v_max), max(v_min, v_max), cm); | |
257 | 259 | } |
258 | 260 | |
259 | 261 | #endif | ... | ... |