devices.h 1.61 KB
#ifndef RTS_CUDA_DEVICES
#define RTS_CUDA_DEVICES

#include <cuda.h>

namespace stim{
	extern "C"
	int maxThreadsPerBlock(){
		int device;
		cudaGetDevice(&device);		//get the id of the current device
		cudaDeviceProp props;		//device property structure
		cudaGetDeviceProperties(&props, device);
		return props.maxThreadsPerBlock;
	}

	extern "C"
	size_t sharedMemPerBlock(){
		int device;
		cudaGetDevice(&device);		//get the id of the current device
		cudaDeviceProp props;		//device property structure
		cudaGetDeviceProperties(&props, device);
		return props.sharedMemPerBlock;
	}

	extern "C"
	size_t constMem(){
		int device;
		cudaGetDevice(&device);		//get the id of the current device
		cudaDeviceProp props;		//device property structure
		cudaGetDeviceProperties(&props, device);
		return props.totalConstMem;
	}

	//tests that a given device ID is valid and provides at least the specified compute capability
	bool testDevice(int d, int major, int minor){
		int nd;
		cudaGetDeviceCount(&nd);		//get the number of CUDA devices
		if(d < nd && d > 0)	{		//if the given ID has an associated device
			cudaDeviceProp props;
			cudaGetDeviceProperties(&props, d);	//get the device properties structure
			if(props.major >= major && props.minor >= minor)
					return true;
		}
		return false;
	}

	//tests each device ID in a list and returns the number of devices that fit the desired
	//	compute capability
	int testDevices(int* dlist, unsigned n_devices, int major, int minor){
		int valid = 0;
		for(int d = 0; d < n_devices; d++){
			if(testDevice(dlist[d], major, minor))
				valid++;
		}
		return valid;
	}
}	//end namespace rts

#endif