fixed fused multiply-add

David Mayerich
1 parent 4252d827
Showing 2 changed files with 5 additions and 3 deletions Show diff stats
stim/optics/scalarbeam.h
stim/visualization/colormap.h
@@ -121,12 +121,12 @@ CUDA_CALLABLE void lut_lookup(T* lut_values, T* lut, T val, size_t N, T min_val,
  
 template <typename T>
 CUDA_CALLABLE stim::complex<T> clerp(stim::complex<T> v0, stim::complex<T> v1, T t) {
-    return stim::complex<T>( fma(t, v1.r, fma(-t, v0.r, v0.r)), fma(t, v1.i, fma(-t, v0.i, v0.i)) );
+    return stim::complex<T>( fmaf(t, v1.r, fmaf(-t, v0.r, v0.r)), fmaf(t, v1.i, fmaf(-t, v0.i, v0.i)) );
 }
  
 template <typename T>
 CUDA_CALLABLE T lerp(T v0, T v1, T t) {
-    return fma(t, v1, fma(-t, v0, v0));
+    return fmaf(t, v1, fmaf(-t, v0, v0));
 }
  
 #ifdef CUDA_FOUND
@@ -253,7 +253,9 @@ static void gpu2image(T* gpuSource, std::string fileDest, unsigned int x_size, u
 	HANDLE_ERROR( cudaMemcpy(&v_min, gpuSource + i_min, sizeof(T), cudaMemcpyDeviceToHost) );		//copy the min and max values from the device to the CPU
 	HANDLE_ERROR( cudaMemcpy(&v_max, gpuSource + i_max, sizeof(T), cudaMemcpyDeviceToHost) );
  
-	gpu2image<T>(gpuSource, fileDest, x_size, y_size, v_min, v_max, cm);
+
+
+	gpu2image<T>(gpuSource, fileDest, x_size, y_size, min(v_min, v_max), max(v_min, v_max), cm);
 }
  
 #endif