Blame view

stim/cuda/cudatools/threads.h 649 Bytes
5cc0976c   David Mayerich   added separable c...
1
  #include "cuda_runtime.h"
7006df5f   David Mayerich   reformat of direc...
2
  #include "device_launch_parameters.h"
5cc0976c   David Mayerich   added separable c...
3
  #include <stim/cuda/cudatools/callable.h>
7006df5f   David Mayerich   reformat of direc...
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
  
  #ifndef CUDA_THREADS_H
  #define CUDA_THREADS_H
  
  #define MAX_GRID        65535
  
  __device__ unsigned int ThreadIndex1D()
  {
      return blockIdx.y * gridDim.x * blockDim.x + blockIdx.x * blockDim.x + threadIdx.x;
  }
  
  dim3 GenGrid1D(unsigned int N, unsigned int blocksize = 128)
  {
      dim3 dimgrid;
  
      dimgrid.x = (N + blocksize - 1)/blocksize;
      dimgrid.y = 1;
      dimgrid.z = 1;
  
      if(dimgrid.x > MAX_GRID)
      {
          dimgrid.y = (dimgrid.x + MAX_GRID - 1) / MAX_GRID;
          dimgrid.x = MAX_GRID;
      }
  
      return dimgrid;
  
  }
  
  
  #endif