#ifndef RTS_CUDA_DEVICES #define RTS_CUDA_DEVICES #include namespace stim{ extern "C" int maxThreadsPerBlock(){ int device; cudaGetDevice(&device); //get the id of the current device cudaDeviceProp props; //device property structure cudaGetDeviceProperties(&props, device); return props.maxThreadsPerBlock; } extern "C" size_t sharedMemPerBlock(){ int device; cudaGetDevice(&device); //get the id of the current device cudaDeviceProp props; //device property structure cudaGetDeviceProperties(&props, device); return props.sharedMemPerBlock; } extern "C" size_t constMem(){ int device; cudaGetDevice(&device); //get the id of the current device cudaDeviceProp props; //device property structure cudaGetDeviceProperties(&props, device); return props.totalConstMem; } //tests that a given device ID is valid and provides at least the specified compute capability bool testDevice(int d, int major, int minor){ int nd; cudaGetDeviceCount(&nd); //get the number of CUDA devices if(d < nd && d > 0) { //if the given ID has an associated device cudaDeviceProp props; cudaGetDeviceProperties(&props, d); //get the device properties structure if(props.major >= major && props.minor >= minor) return true; } return false; } //tests each device ID in a list and returns the number of devices that fit the desired // compute capability int testDevices(int* dlist, unsigned n_devices, int major, int minor){ int valid = 0; for(int d = 0; d < n_devices; d++){ if(testDevice(dlist[d], major, minor)) valid++; } return valid; } } //end namespace rts #endif