threads.h
649 Bytes
#include "cuda_runtime.h"
#include "device_launch_parameters.h"
#include <stim/cuda/cudatools/callable.h>
#ifndef CUDA_THREADS_H
#define CUDA_THREADS_H
#define MAX_GRID 65535
__device__ unsigned int ThreadIndex1D()
{
return blockIdx.y * gridDim.x * blockDim.x + blockIdx.x * blockDim.x + threadIdx.x;
}
dim3 GenGrid1D(unsigned int N, unsigned int blocksize = 128)
{
dim3 dimgrid;
dimgrid.x = (N + blocksize - 1)/blocksize;
dimgrid.y = 1;
dimgrid.z = 1;
if(dimgrid.x > MAX_GRID)
{
dimgrid.y = (dimgrid.x + MAX_GRID - 1) / MAX_GRID;
dimgrid.x = MAX_GRID;
}
return dimgrid;
}
#endif