00001 /*************************************************************************** 00002 *cr 00003 *cr (C) Copyright 1995-2019 The Board of Trustees of the 00004 *cr University of Illinois 00005 *cr All Rights Reserved 00006 *cr 00007 ***************************************************************************/ 00008 00009 /*************************************************************************** 00010 * RCS INFORMATION: 00011 * 00012 * $RCSfile: GaussianBlur.C,v $ 00013 * $Author: johns $ $Locker: $ $State: Exp $ 00014 * $Revision: 1.27 $ $Date: 2020年10月29日 03:16:18 $ 00015 * 00016 *************************************************************************** 00017 * DESCRIPTION: 00018 * Perform Gaussian blur filtering on 3-D volumes. Primarily for use in 00019 * scale-space filtering for 3-D image segmentation of Cryo-EM density maps. 00020 ***************************************************************************/ 00021 00022 #include <string.h> 00023 #include <stdlib.h> 00024 #include <stdio.h> 00025 #include <math.h> 00026 #include <climits> 00027 #include "GaussianBlur.h" 00028 #include "CUDAGaussianBlur.h" 00029 #include "ProfileHooks.h" 00030 00031 #define PI_F (3.14159265359f) 00032 00033 template <typename IMAGE_T> 00034 GaussianBlur<IMAGE_T>::GaussianBlur(IMAGE_T* img, int w, int h, int d, bool use_cuda) { 00035 host_image_needs_update = 0; 00036 height = h; 00037 width = w; 00038 depth = d; 00039 image_d = NULL; 00040 image = NULL; 00041 scratch = NULL; 00042 heightWidth = long(height) * long(width); 00043 nVoxels = heightWidth * long(depth); 00044 cuda = use_cuda; 00045 #ifndef VMDCUDA 00046 cuda = false; 00047 #endif 00048 00049 image = new IMAGE_T[nVoxels]; 00050 #ifdef VMDCUDA 00051 if (cuda) { 00052 image_d = (IMAGE_T*) alloc_cuda_array(nVoxels * sizeof(IMAGE_T)); 00053 scratch_d = (IMAGE_T*) alloc_cuda_array(nVoxels * sizeof(IMAGE_T)); 00054 if (image_d == NULL || scratch_d == NULL) { 00055 cuda = false; 00056 free_cuda_array(image_d); 00057 free_cuda_array(scratch_d); 00058 } else { 00059 copy_array_to_gpu(image_d, img, nVoxels * sizeof(IMAGE_T)); 00060 copy_array_to_gpu(scratch_d, img, nVoxels * sizeof(IMAGE_T)); 00061 } 00062 } 00063 if (!cuda) 00064 #endif 00065 { 00066 scratch = new IMAGE_T[nVoxels]; 00067 memcpy(image, img, nVoxels * sizeof(IMAGE_T)); 00068 } 00069 } 00070 00071 00072 template <typename IMAGE_T> 00073 GaussianBlur<IMAGE_T>::~GaussianBlur() { 00074 delete [] image; 00075 #ifdef VMDCUDA 00076 if (cuda) { 00077 free_cuda_array(image_d); 00078 free_cuda_array(scratch_d); 00079 } else 00080 #endif 00081 delete [] scratch; 00082 } 00083 00084 00085 template <typename IMAGE_T> 00086 IMAGE_T* GaussianBlur<IMAGE_T>::get_image() { 00087 #ifdef VMDCUDA 00088 if (host_image_needs_update) { 00089 copy_array_from_gpu(image, image_d, nVoxels * sizeof(IMAGE_T)); 00090 host_image_needs_update = 0; 00091 } 00092 #endif 00093 00094 return image; 00095 } 00096 00097 00098 template <typename IMAGE_T> 00099 IMAGE_T* GaussianBlur<IMAGE_T>::get_image_d() { 00100 return image_d; 00101 } 00102 00103 // This is an arbitarily "max" size for the CPU, but 00104 // is the max for the GPU. Additionally we probably never want 00105 // to run a blur with a kernel_size this larger anyways as it is likely 00106 // larger than the image. I think it makes sense to cap the sigma to a certian 00107 // large value. That would have to be done before this function is called though. 00108 #define MAX_KERNEL_SIZE 4096 00109 00110 template <typename IMAGE_T> 00111 int GaussianBlur<IMAGE_T>::getKernelSizeForSigma(float sigma) { 00112 int size = int(ceilf(sigma * 6)); 00113 if (size % 2 == 0) { 00114 ++size; 00115 } 00116 if (size < 3) { 00117 size = 3; 00118 } 00119 if (size >= MAX_KERNEL_SIZE) { 00120 size = MAX_KERNEL_SIZE; 00121 } 00122 return size; 00123 } 00124 00125 00126 template <typename IMAGE_T> 00127 void GaussianBlur<IMAGE_T>::fillGaussianBlurKernel3D(float sigma, int size, float* kernel) { 00128 float sigma2 = sigma * sigma; 00129 int middle = size / 2; 00130 for (int z = 0; z < size; z++) { 00131 for (int y = 0; y < size; y++) { 00132 for (int x = 0; x < size; x++) { 00133 const int idx = z * size * size + y * size + x; 00134 float x_dist = float(middle - x); 00135 float y_dist = float(middle - y); 00136 float z_dist = float(middle - z); 00137 float distance2 = x_dist * x_dist + y_dist * y_dist + z_dist * z_dist; 00138 float s = 1.0f / (sigma * sqrtf(2.0f * PI_F)) * expf(-distance2 / (2.0f * sigma2)); 00139 kernel[idx] = s; 00140 } 00141 } 00142 } 00143 } 00144 00145 00146 template <typename IMAGE_T> 00147 void GaussianBlur<IMAGE_T>::fillGaussianBlurKernel1D(float sigma, int size, float* kernel) { 00148 float sigma2 = sigma * sigma; 00149 int middle = size / 2; 00150 for (int i = 0; i < size; ++i) { 00151 float distance = float (middle - i); 00152 float distance2 = distance * distance; 00153 float s = 1.0f / (sigma * sqrtf(2.0f*PI_F)) * expf(-distance2 / (2.0f * sigma2)); 00154 kernel[i] = s; 00155 } 00156 } 00157 00158 00159 #define SWAPT(a, b) {\ 00160 IMAGE_T* t = a;\ 00161 a = b;\ 00162 b = t;\ 00163 } 00164 00165 00166 template <typename IMAGE_T> 00167 void GaussianBlur<IMAGE_T>::blur(float sigma) { 00168 #ifdef VMDCUDA 00169 if (cuda) { 00170 blur_cuda(sigma); 00171 } else 00172 #endif 00173 blur_cpu(sigma); 00174 } 00175 00176 00177 #if defined(VMDCUDA) 00178 template <typename IMAGE_T> 00179 void GaussianBlur<IMAGE_T>::blur_cuda(float sigma) { 00180 PROFILE_PUSH_RANGE("GaussianBlur<IMAGE_T>::blur_cuda()", 7); 00181 00182 if (true) { //TODO temp hack to test 3D gaussian kernel 00183 int size = getKernelSizeForSigma(sigma); 00184 float *kernel = new float[size]; 00185 fillGaussianBlurKernel1D(sigma, size, kernel); 00186 set_gaussian_1D_kernel_cuda(kernel, size); 00187 00188 gaussian1D_x_cuda(image_d, scratch_d, size, width, height, depth); 00189 SWAPT(image_d, scratch_d); 00190 00191 gaussian1D_y_cuda(image_d, scratch_d, size, width, height, depth); 00192 SWAPT(image_d, scratch_d); 00193 00194 gaussian1D_z_cuda(image_d, scratch_d, size, width, height, depth); 00195 SWAPT(image_d, scratch_d); 00196 00197 // don't copy back to the host until we are absolutely forced to... 00198 host_image_needs_update = 1; 00199 00200 delete [] kernel; 00201 } else { 00202 int size = getKernelSizeForSigma(sigma); 00203 float *kernel = new float[size * size * size]; 00204 fillGaussianBlurKernel3D(sigma, size, kernel); 00205 delete [] kernel; 00206 } 00207 00208 PROFILE_POP_RANGE(); 00209 } 00210 #endif 00211 00212 #if 1 00213 template <typename T> 00214 static inline void assign_float_to_dest(float val, T* dest) { 00215 // truncation is not great for quantized integer representations, 00216 // but it is at least portable... 00217 dest[0] = T(val); 00218 } 00219 #else 00220 template <typename T> 00221 static inline void assign_float_to_dest(float val, T* dest) { 00222 dest[0] = roundf(val); 00223 } 00224 00225 // this case causes problem with older compilers 00226 template <> inline void assign_float_to_dest<float>(float val, float* dest) { 00227 dest[0] = val; 00228 } 00229 #endif 00230 00231 template <typename IMAGE_T> 00232 void GaussianBlur<IMAGE_T>::blur_cpu(float sigma) { 00233 int x, y, z; 00234 int size = getKernelSizeForSigma(sigma); 00235 float *kernel = new float[size]; 00236 fillGaussianBlurKernel1D(sigma, size, kernel); 00237 const int offset = size >> 1; 00238 SWAPT(scratch, image); 00239 00240 /* X kernel */ 00241 #ifdef USE_OMP 00242 #pragma omp parallel for schedule(static) 00243 #endif 00244 for (z = 0; z < depth; ++z) { 00245 for (y = 0; y < height; ++y) { 00246 for (x = 0; x < width; ++x) { 00247 const long idx = z*heightWidth + y*long(width) + x; 00248 const int new_offset_neg = x - offset >= 0 ? -offset : -x; 00249 const int new_offset_pos = x + offset < width ? offset : width - x - 1; 00250 float value = 0.0f; 00251 int i; 00252 for (i = -offset; i < new_offset_neg; ++i) { 00253 value += scratch[idx + new_offset_neg] * kernel[i+offset]; 00254 } 00255 00256 for (; i <= new_offset_pos; ++i) { 00257 value += scratch[idx + i] * kernel[i+offset]; 00258 } 00259 00260 for (; i <= offset; ++i) { 00261 value += scratch[idx + new_offset_pos] * kernel[i+offset]; 00262 } 00263 00264 assign_float_to_dest(value, &image[idx]); 00265 } 00266 } 00267 } 00268 SWAPT(scratch, image); 00269 00270 /* Y kernel */ 00271 #ifdef USE_OMP 00272 #pragma omp parallel for schedule(static) 00273 #endif 00274 for (z = 0; z < depth; ++z) { 00275 for (y = 0; y < height; ++y) { 00276 const int new_offset_neg = y - offset >= 0 ? -offset : -y; 00277 const int new_offset_pos = y + offset < height ? offset : height - y - 1; 00278 for (x = 0; x < width; ++x) { 00279 const long idx = z*heightWidth + y*long(width) + x; 00280 float value = 0.0f; 00281 int i; 00282 00283 for (i = -offset; i < new_offset_neg; ++i) { 00284 value += scratch[idx + new_offset_neg*width] * kernel[i+offset]; 00285 } 00286 00287 for (; i <= new_offset_pos; ++i) { 00288 value += scratch[idx + i*width] * kernel[i+offset]; 00289 } 00290 00291 for (; i <= offset; ++i) { 00292 value += scratch[idx + new_offset_pos*width] * kernel[i+offset]; 00293 } 00294 00295 assign_float_to_dest(value, &image[idx]); 00296 } 00297 } 00298 } 00299 SWAPT(scratch, image); 00300 00301 /* Z kernel */ 00302 #ifdef USE_OMP 00303 #pragma omp parallel for schedule(static) 00304 #endif 00305 for (z = 0; z < depth; ++z) { 00306 const int new_offset_neg = z - offset >= 0 ? -offset : -z; 00307 const int new_offset_pos = z + offset < depth ? offset : depth - z - 1; 00308 for (y = 0; y < height; ++y) { 00309 for (x = 0; x < width; ++x) { 00310 const long idx = z*heightWidth + y*long(width) + x; 00311 float value = 0.0f; 00312 int i; 00313 00314 for (i = -offset; i < new_offset_neg; ++i) { 00315 value += scratch[idx + new_offset_neg*heightWidth] * kernel[i+offset]; 00316 } 00317 00318 for (; i <= new_offset_pos; ++i) { 00319 value += scratch[idx + i*heightWidth] * kernel[i+offset]; 00320 } 00321 00322 for (; i <= offset; ++i) { 00323 value += scratch[idx + new_offset_pos*heightWidth] * kernel[i+offset]; 00324 } 00325 00326 assign_float_to_dest(value, &image[idx]); 00327 } 00328 } 00329 } 00330 00331 delete [] kernel; 00332 } 00333 00334 template class GaussianBlur<float>; 00335 template class GaussianBlur<unsigned short>; 00336 template class GaussianBlur<unsigned char>;