validate-complex.cu 4.94 KB
#include <complex>
#include <iostream>
#include "rts/complex.h"

#include "compare.h"


template<typename T>
__global__ void add(rts::complex<T> a, rts::complex<T> b, rts::complex<T>* c)
{
	*c = a + b;
}

template<typename T>
__global__ void multiply(rts::complex<T> a, rts::complex<T> b, rts::complex<T>* c)
{
	*c = a * b;
}

template<typename T>
__global__ void multiply(rts::complex<T> a, T b, rts::complex<T>* c)
{
	*c = a * b;
}

template<typename T>
__global__ void divide(rts::complex<T> a, rts::complex<T> b, rts::complex<T>* c)
{
	*c = a / b;
}

template<typename T>
__global__ void log(rts::complex<T> a, rts::complex<T>* c)
{
	*c = rts::log(a);
}

template<typename T>
__global__ void sqrt(rts::complex<T> a, rts::complex<T>* c)
{
	*c = rts::sqrt(a);
}

template<typename T>
__global__ void exp(rts::complex<T> a, rts::complex<T>* c)
{
	*c = rts::exp(a);
}

template<typename T>
__global__ void pow(rts::complex<T> a, rts::complex<T>* c)
{
	*c = rts::pow(a, (T)2.0);
}

template<typename T>
__global__ void sin(rts::complex<T> a, rts::complex<T>* c)
{
	*c = rts::sin(a);
}

template<typename T>
__global__ void cos(rts::complex<T> a, rts::complex<T>* c)
{
	*c = rts::cos(a);
}

template <typename T>
void gpuValidateOperators()
{
    int precision = sizeof(T) * 8;
    std::stringstream ss;
    ss<<" ("<<precision<<"-bit)";
    std::string bitString = ss.str();

    rts::complex<T>* gpuResult;
    cudaMalloc((void**)&gpuResult, sizeof(rts::complex<T>));

    //validate complex binary functions
	T x0, x1, y0, y1;
	for(int i = 0; i<N; i++)
	{
		//generate a random complex number
		x0 = (double)rand()/(double)RAND_MAX * 2 - 1;
		y0 = (double)rand()/(double)RAND_MAX * 2 - 1;
		x1 = (double)rand()/(double)RAND_MAX * 2 - 1;
		y1 = (double)rand()/(double)RAND_MAX * 2 - 1;

		//create an STD and RTS instance of the complex class
		std::complex<T> stdComplex0(x0, y0);
		rts::complex<T> rtsComplex0(x0, y0);

		std::complex<T> stdComplex1(x1, y1);
		rts::complex<T> rtsComplex1(x1, y1);

		std::complex<T> stdResult;
		rts::complex<T> rtsResult;


		//test addition
		stdResult = stdComplex0 + stdComplex1;
		add<<<1, 1>>>(rtsComplex0, rtsComplex1, gpuResult);
		cudaMemcpy(&rtsResult, gpuResult, sizeof(rts::complex<T>), cudaMemcpyDeviceToHost);
		compare(stdResult, rtsResult, std::string("Binary Addition") + bitString);
		//std::cout<<stdResult<<"------"<<rtsResult.toStr()<<std::endl;

		//test multiplication
		stdResult = stdComplex0 * stdComplex1;
		multiply<<<1, 1>>>(rtsComplex0, rtsComplex1, gpuResult);
		cudaMemcpy(&rtsResult, gpuResult, sizeof(rts::complex<T>), cudaMemcpyDeviceToHost);
		compare(stdResult, rtsResult, std::string("Binary Multiplication") + bitString);

		//test multiplication with constant
		stdResult = stdComplex0 * stdComplex1.real();
		multiply<<<1, 1>>>(rtsComplex0, rtsComplex1.r, gpuResult);
		cudaMemcpy(&rtsResult, gpuResult, sizeof(rts::complex<T>), cudaMemcpyDeviceToHost);
		compare(stdResult, rtsResult, std::string("Multiplication with Real Value") + bitString);

		//test division
		stdResult = stdComplex0 / stdComplex1;
		divide<<<1, 1>>>(rtsComplex0, rtsComplex1, gpuResult);
		cudaMemcpy(&rtsResult, gpuResult, sizeof(rts::complex<T>), cudaMemcpyDeviceToHost);
		compare(stdResult, rtsResult, std::string("Binary Division") + bitString);

		//test log()
		stdResult = log(stdComplex0);
		log<<<1, 1>>>(rtsComplex0, gpuResult);
		cudaMemcpy(&rtsResult, gpuResult, sizeof(rts::complex<T>), cudaMemcpyDeviceToHost);
		compare(stdResult, rtsResult, std::string("log()") + bitString);

		//test exp()
		stdResult = exp(stdComplex0);
		exp<<<1, 1>>>(rtsComplex0, gpuResult);
		cudaMemcpy(&rtsResult, gpuResult, sizeof(rts::complex<T>), cudaMemcpyDeviceToHost);
		compare(stdResult, rtsResult, std::string("exp()") + bitString);

		//test pow()
		stdResult = pow(stdComplex0, 2);
		pow<<<1, 1>>>(rtsComplex0, gpuResult);
		cudaMemcpy(&rtsResult, gpuResult, sizeof(rts::complex<T>), cudaMemcpyDeviceToHost);
		compare(stdResult, rtsResult, std::string("pow()") + bitString);

		//test sqrt()
		stdResult = sqrt(stdComplex0);
		sqrt<<<1, 1>>>(rtsComplex0, gpuResult);
		cudaMemcpy(&rtsResult, gpuResult, sizeof(rts::complex<T>), cudaMemcpyDeviceToHost);
		compare(stdResult, rtsResult, std::string("sqrt()") + bitString);

		//trigonometric functions
		stdResult = sin(stdComplex0);
		sin<<<1, 1>>>(rtsComplex0, gpuResult);
		cudaMemcpy(&rtsResult, gpuResult, sizeof(rts::complex<T>), cudaMemcpyDeviceToHost);
		compare(stdResult, rtsResult, std::string("sin()") + bitString);

		//trigonometric functions
		stdResult = cos(stdComplex0);
		cos<<<1, 1>>>(rtsComplex0, gpuResult);
		cudaMemcpy(&rtsResult, gpuResult, sizeof(rts::complex<T>), cudaMemcpyDeviceToHost);
		compare(stdResult, rtsResult, std::string("cos()") + bitString);


	}
	cudaFree(gpuResult);

}

void gpuValidateComplex()
{

	gpuValidateOperators<float>();
	//gpuValidateOperators<double>();
}