Commit 9e5edd36627f01110c78cf4f07135a741c945813

Authored by David Mayerich
2 parents db331d8a 0954c632

Merge branch 'jiabing' into 'master'

Jiabing

See merge request !39
python/network.pyc 0 โ†’ 100644
No preview for this file type
python/network_dpy.py 0 โ†’ 100644
  1 +# -*- coding: utf-8 -*-
  2 +"""
  3 +Created on Sat Jan 19 2018
  4 +
  5 +@author: Jiabing
  6 +"""
  7 +
  8 +import struct
  9 +import numpy as np
  10 +import scipy as sp
  11 +import networkx as nx
  12 +import matplotlib.pyplot as plt
  13 +import math
  14 +
  15 +
  16 +
  17 +alkjasfdljkhfsadlkjhfsad
  18 +class Point:
  19 + def __init__(self, x, y, z, radius):
  20 + self.x = x
  21 + self.y = y
  22 + self.z = z
  23 + self.r = radius
  24 +
  25 +
  26 +class Fiber:
  27 + def __init__(self, fiber_idx,point_idx):
  28 + self.fidx = fiber_idx
  29 + self.pidx = point_idx
  30 +
  31 +
  32 +
  33 +class Node:
  34 + def __init__(self, point_idx, fidx):
  35 + self.pidx= point_idx
  36 + self.fidx = fidx
  37 +
  38 +
  39 +#class NWT:
  40 +
  41 +#class network(point, Fiber, Node):
... ...
stim/iVote/ivote2.cuh
... ... @@ -152,13 +152,17 @@ namespace stim {
152 152  
153 153  
154 154 template<typename T>
155   - void cpu_ivote2(T* cpuI, unsigned int rmax, size_t x, size_t y, bool invert = false, T t = 0, int iter = 8, T phi = 15.0f * (float)stim::PI / 180, int conn = 8, bool debug = false) {
  155 + void cpu_ivote2(T* cpuI, unsigned int rmax, size_t x, size_t y, float &gpu_time, bool invert = false, T t = 0, int iter = 8, T phi = 15.0f * (float)stim::PI / 180, int conn = 8, bool debug = false) {
156 156 size_t bytes = x*y * sizeof(T);
157 157 T* gpuI; //allocate space on the gpu to save the input image
  158 +
  159 + gpuTimer_start();
158 160 HANDLE_ERROR(cudaMalloc(&gpuI, bytes));
159 161 HANDLE_ERROR(cudaMemcpy(gpuI, cpuI, bytes, cudaMemcpyHostToDevice)); //copy the image to the gpu
160 162 stim::gpu_ivote2<T>(gpuI, rmax, x, y, invert, t, iter, phi, conn, debug); //call the gpu version of the ivote
161 163 HANDLE_ERROR(cudaMemcpy(cpuI, gpuI, bytes, cudaMemcpyDeviceToHost)); //copy the output to the cpu
  164 +
  165 + gpu_time = gpuTimer_end();
162 166 }
163 167 }
164 168 #endif
165 169 \ No newline at end of file
... ...
stim/iVote/ivote2/local_max.cuh
... ... @@ -10,7 +10,7 @@ namespace stim{
10 10  
11 11 // this kernel calculates the local maximum for finding the cell centers
12 12 template<typename T>
13   - __global__ void cuda_local_max(T* gpuCenters, T* gpuVote, int conn, int x, int y){
  13 + __global__ void cuda_local_max(T* gpuCenters, T* gpuVote, int conn, int x, int y){
14 14  
15 15 // calculate the 2D coordinates for this current thread.
16 16 int xi = blockIdx.x * blockDim.x + threadIdx.x;
... ... @@ -20,16 +20,16 @@ namespace stim{
20 20 return;
21 21  
22 22 // convert 2D coordinates to 1D
23   - int i = yi * x + xi;
  23 + int i = yi * x + xi;
24 24  
25 25 gpuCenters[i] = 0; //initialize the value at this location to zero
26 26  
27 27 T val = gpuVote[i];
28 28  
29   - for(int xl = xi - conn; xl < xi + conn; xl++){
30   - for(int yl = yi - conn; yl < yi + conn; yl++){
  29 + for(unsigned int xl = xi - conn; xl < xi + conn; xl++){
  30 + for(unsigned int yl = yi - conn; yl < yi + conn; yl++){
31 31 if(xl >= 0 && xl < x && yl >= 0 && yl < y){
32   - int il = yl * x + xl;
  32 + unsigned int il = yl * x + xl;
33 33 if(gpuVote[il] > val){
34 34 return;
35 35 }
... ... @@ -47,7 +47,7 @@ namespace stim{
47 47 }
48 48  
49 49 template<typename T>
50   - void gpu_local_max(T* gpuCenters, T* gpuVote, unsigned int conn, size_t x, size_t y){
  50 + void gpu_local_max(T* gpuCenters, T* gpuVote, unsigned int conn, unsigned int x, unsigned int y){
51 51  
52 52 unsigned int max_threads = stim::maxThreadsPerBlock();
53 53 /*dim3 threads(max_threads, 1);
... ...
stim/image/image.h
... ... @@ -275,10 +275,10 @@ public:
275 275 int channels = cvImage.channels();
276 276 allocate(cols, rows, channels); //allocate space for the image
277 277 size_t img_bytes = bytes();
278   - unsigned char* cv_ptr = (unsigned char*)cvImage.data;
279   - //if (C() == 1) //if this is a single-color image, just copy the data
280   - // type_copy<T, T>(cv_ptr, img, size());
281   - memcpy(img, cv_ptr, bytes());
  278 + unsigned char* cv_ptr = (unsigned char*)cvImage.data;
  279 + if (C() == 1) //if this is a single-color image, just copy the data
  280 + type_copy<unsigned char, T>(cv_ptr, img, size());
  281 + //memcpy(img, cv_ptr, bytes());
282 282 if(C() == 3) //if this is a 3-color image, OpenCV uses BGR interleaving
283 283 from_opencv(cv_ptr, X(), Y());
284 284 #else
... ...
stim/math/filters/median2.cuh 0 โ†’ 100644
  1 +#ifndef STIM_CUDA_MEDIAN2_H
  2 +#define STIM_CUDA_MEDIAN2_H
  3 +
  4 +#include <iostream>
  5 +#include <cuda.h>
  6 +#include <cmath>
  7 +#include <algorithm>
  8 +
  9 +#ifdef USING_CUDA
  10 +#include "cuda_runtime.h"
  11 +#include "device_launch_parameters.h"
  12 +#include <stim/cuda/cudatools.h>
  13 +#endif
  14 +
  15 +
  16 +//#include <thrust/sort.h>
  17 +//#include <thrust/execution_policy.h>
  18 +//#include <thrust/binary_search.h>
  19 +//#include <thrust/device_ptr.h>
  20 +
  21 +template <typename T>
  22 +__device__ void cuswap ( T& a, T& b ){
  23 + T c(a);
  24 + a=b;
  25 + b=c;
  26 +}
  27 +
  28 +namespace stim{
  29 + namespace cuda{
  30 +
  31 + template<typename T>
  32 + __global__ void cuda_median2(T* in, T* out, T* kernel, size_t X, size_t Y, size_t K){
  33 +
  34 + int xi = blockIdx.x * blockDim.x + threadIdx.x;
  35 + int yi = blockIdx.y * blockDim.y + threadIdx.y;
  36 +
  37 + size_t Xout = X - K + 1;
  38 + size_t Yout = Y - K + 1;
  39 + int i = yi * Xout + xi;
  40 +
  41 + //return if (i,j) is outside the matrix
  42 + if(xi >= Xout || yi >= Yout) return;
  43 +
  44 + //scan the image, copy data from input to 2D window
  45 + size_t kxi, kyi;
  46 + for(kyi = 0; kyi < K; kyi++){
  47 + for(kxi = 0; kxi < K; kxi++){
  48 + kernel[i * K * K + kyi * K + kxi] = in[(yi + kyi)* X + xi + kxi];
  49 + }
  50 + }
  51 +
  52 + //calculate kernel radius 4
  53 + int r = (K*K)/2;
  54 +
  55 + //sort the smallest half pixel values inside the window, calculate the middle one
  56 + size_t Ki = i * K * K;
  57 +
  58 + //sort the smallest half pixel values inside the window, calculate the middle one
  59 + for (int p = 0; p < r+1; p++){
  60 + for(int kk = p + 1; kk < K*K; kk++){
  61 + if (kernel[Ki + kk] < kernel[Ki + p]){
  62 + cuswap<T>(kernel[Ki + kk], kernel[Ki + p]);
  63 + }
  64 + }
  65 + }
  66 +
  67 + //copy the middle pixel value inside the window to output
  68 + out[i] = kernel[Ki + r];
  69 +
  70 + }
  71 +
  72 +
  73 + template<typename T>
  74 + void gpu_median2(T* gpu_in, T* gpu_out, T* gpu_kernel, size_t X, size_t Y, size_t K){
  75 +
  76 + //get the maximum number of threads per block for the CUDA device
  77 + int threads_total = stim::maxThreadsPerBlock();
  78 +
  79 + //set threads in each block
  80 + dim3 threads(sqrt(threads_total), sqrt(threads_total));
  81 +
  82 + //calculate the number of blocks
  83 + dim3 blocks(( (X - K + 1) / threads.x) + 1, ((Y - K + 1) / threads.y) + 1);
  84 +
  85 + //call the kernel to perform median filter function
  86 + cuda_median2 <<< blocks, threads >>>( gpu_in, gpu_out, gpu_kernel, X, Y, K);
  87 +
  88 + }
  89 +
  90 + template<typename T>
  91 + void cpu_median2(T* cpu_in, T* cpu_out, size_t X, size_t Y, size_t K){
  92 +
  93 + #ifndef USING_CUDA
  94 +
  95 + //output image width and height
  96 + size_t X_out = X + K -1;
  97 + size_t Y_out = Y + K -1;
  98 +
  99 + float* window = (float*)malloc(K * k *sizeof(float));
  100 +
  101 + for(int i = 0; i< Y; i++){
  102 + for(int j =0; j< X; j++){
  103 +
  104 + //Pick up window elements
  105 + int k = 0;
  106 + for (int m=0; m< kernel_width; m++){
  107 + for(int n = 0; n < kernel_width; n++){
  108 + window[k] = in_image.at<float>(m+i, n+j);
  109 + k++;
  110 + }
  111 + }
  112 +
  113 + //calculate kernel radius 4
  114 + r_ker = K * K/2;
  115 + //Order elements (only half of them)
  116 + for(int p = 0; p<r_ker+1; p++){
  117 + //Find position of minimum element
  118 + for (int l = p + 1; l < size_kernel; l++){
  119 +
  120 + if(window[l] < window[p]){
  121 + float t = window[p];
  122 + window[p] = window[l];
  123 + window[l] = t; }
  124 + }
  125 + }
  126 +
  127 + //Get result - the middle element
  128 + cpu_out[i * X_out + j] = window[r_ker];
  129 + }
  130 + }
  131 + #else
  132 + //calculate input and out image pixels & calculate kernel size
  133 + size_t N_in = X * Y; //number of pixels in the input image
  134 + size_t N_out = (X - K + 1) * (Y - K + 1); //number of pixels in the output image
  135 + //size_t kernel_size = kernel_width*kernel_width; //total number of pixels in the kernel
  136 +
  137 + //allocate memory on the GPU for the array
  138 + T* gpu_in; //allocate device memory for the input image
  139 + HANDLE_ERROR( cudaMalloc( &gpu_in, N_in * sizeof(T) ) );
  140 + T* gpu_kernel; //allocate device memory for the kernel
  141 + HANDLE_ERROR( cudaMalloc( &gpu_kernel, K * K * N_out * sizeof(T) ) );
  142 + T* gpu_out; //allocate device memory for the output image
  143 + HANDLE_ERROR( cudaMalloc( &gpu_out, N_out * sizeof(T) ) );
  144 +
  145 + //copy the array to the GPU
  146 + HANDLE_ERROR( cudaMemcpy( gpu_in, cpu_in, N_in * sizeof(T), cudaMemcpyHostToDevice) );
  147 +
  148 + //call the GPU version of this function
  149 + gpu_median2<T>(gpu_in, gpu_out, gpu_kernel, X, Y, K);
  150 +
  151 + //copy the array back to the CPU
  152 + HANDLE_ERROR( cudaMemcpy( cpu_out, gpu_out, N_out * sizeof(T), cudaMemcpyDeviceToHost) );
  153 +
  154 + //free allocated memory
  155 + cudaFree(gpu_in);
  156 + cudaFree(gpu_kernel);
  157 + cudaFree(gpu_out);
  158 +
  159 + }
  160 +
  161 + }
  162 + #endif
  163 +}
  164 +
  165 +
  166 +#endif
... ...