00001 /*************************************************************************** 00002 *cr 00003 *cr (C) Copyright 1995-2019 The Board of Trustees of the 00004 *cr University of Illinois 00005 *cr All Rights Reserved 00006 *cr 00007 ***************************************************************************/ 00008 00009 /*************************************************************************** 00010 * RCS INFORMATION: 00011 * 00012 * $RCSfile: OptiXShaders.h,v $ 00013 * $Author: johns $ $Locker: $ $State: Exp $ 00014 * $Revision: 1.41 $ $Date: 2020年02月26日 03:51:31 $ 00015 * 00016 ***************************************************************************/ 00079 #ifndef OPTIXSHADERS 00080 #define OPTIXSHADERS 00081 #include <optixu/optixu_vector_types.h> 00082 00083 // Compile-time flag for collection and reporting of ray statistics 00084 #if 0 00085 #define ORT_RAYSTATS 1 00086 #endif 00087 00088 // Compile-time flag to enable the use of RTX hardware ray tracing 00089 // acceleration APIs in OptiX 00090 #if OPTIX_VERSION >= 60000 00091 #define ORT_USERTXAPIS 1 00092 #endif 00093 00094 // When compiling with OptiX 3.8 or greater, we use the new 00095 // progressive rendering APIs rather than our previous hand-coded 00096 // progressive renderer. 00097 #if (defined(VMDOPTIX_VCA) || (OPTIX_VERSION >= 3080)) // && !defined(VMDUSEOPENHMD) 00098 #define VMDOPTIX_PROGRESSIVEAPI 1 00099 #endif 00100 00101 #if 1 || defined(VMDOPTIX_PROGRESSIVEAPI) 00102 #define VMDOPTIX_LIGHTUSEROBJS 1 00103 #endif 00104 00105 #if defined(VMDOPTIX_LIGHTUSEROBJS) 00106 #include "Scene.h" // for DISP_LIGHTS macro 00107 #endif 00108 00109 // "*" operator 00110 inline __host__ __device__ float3 operator*(char4 a, float b) { 00111 return make_float3(b * a.x, b * a.y, b * a.z); 00112 } 00113 00114 inline __host__ __device__ float3 operator*(uchar4 a, float b) { 00115 return make_float3(b * a.x, b * a.y, b * a.z); 00116 } 00117 00118 #if defined(__cplusplus) 00119 typedef optix::float3 float3; 00120 #endif 00121 00122 00123 // XXX OptiX 4.0 and later versions have a significant performance impact 00124 // on VMD startup if we use 256-way combinatorial shader specialization. 00125 // Shader template specialization had very little impact on 00126 // OptiX versions 3.[789].x previously. The new LLVM based compiler 00127 // back-end used in recent versions of OptiX has much more overhead 00128 // when processing large numbers of shaders single PTX files. 00129 // If we want to retain the template specialization approach, 00130 // we will have to generate shader code and store it in many separate 00131 // PTX files to mitigate overheads in back-end compiler infrastructure. 00132 #if OPTIX_VERSION < 40000 00133 // this macro enables or disables the use of an array of 00134 // template-specialized shaders for every combination of 00135 // scene-wide and material-specific shader features. 00136 #define ORT_USE_TEMPLATE_SHADERS 1 00137 #endif 00138 00139 00140 // Enable reversed traversal of any-hit rays for shadows/AO. 00141 // This optimization yields a 20% performance gain in many cases. 00142 // #define USE_REVERSE_SHADOW_RAYS 1 00143 00144 // Use reverse rays by default rather than only when enabled interactively 00145 // #define USE_REVERSE_SHADOW_RAYS_DEFAULT 1 00146 enum RtShadowMode { RT_SHADOWS_OFF=0, 00147 RT_SHADOWS_ON=1, 00148 RT_SHADOWS_ON_REVERSE=2 00149 }; 00150 00151 00152 // 00153 // Lighting data structures 00154 // 00155 #if defined(VMDOPTIX_LIGHTUSEROBJS) 00156 typedef struct { 00157 int num_lights; 00158 float3 dirs[DISP_LIGHTS+1]; 00159 } DirectionalLightList; 00160 00161 typedef struct { 00162 int num_lights; 00163 float3 posns[DISP_LIGHTS+1]; 00164 } PositionalLightList; 00165 #endif 00166 00167 typedef struct { 00168 float3 dir; 00169 int padding; // pad to next power of two 00170 } DirectionalLight; 00171 00172 typedef struct { 00173 float3 pos; 00174 int padding; // pad to next power of two 00175 } PositionalLight; 00176 00177 00178 // 00179 // Cylinders 00180 // 00181 00182 // XXX memory layout is likely suboptimal 00183 typedef struct { 00184 float3 start; 00185 float radius; 00186 float3 axis; 00187 float pad; 00188 } vmd_cylinder; 00189 00190 // XXX memory layout is likely suboptimal 00191 typedef struct { 00192 float3 start; 00193 float radius; 00194 float3 axis; 00195 float3 color; 00196 } vmd_cylinder_color; 00197 00198 // 00199 // Rings (annular or otherwise) 00200 // 00201 00202 // XXX memory layout is likely suboptimal, but is a multiple float4 00203 typedef struct { 00204 float3 center; 00205 float3 norm; 00206 float inrad; 00207 float outrad; 00208 float3 color; 00209 float pad; 00210 } vmd_ring_color; 00211 00212 00213 // 00214 // Spheres 00215 // 00216 00217 typedef struct { 00218 float3 center; 00219 float radius; 00220 } vmd_sphere; 00221 00222 // XXX memory layout is likely suboptimal 00223 typedef struct { 00224 float3 center; 00225 float radius; 00226 float3 color; 00227 float pad; 00228 } vmd_sphere_color; 00229 00230 00231 // 00232 // Triangle meshes of various kinds 00233 // 00234 00235 // XXX memory layout is definitely suboptimal 00236 typedef struct { 00237 float3 v0; 00238 float3 v1; 00239 float3 v2; 00240 float3 n0; 00241 float3 n1; 00242 float3 n2; 00243 float3 c0; 00244 float3 c1; 00245 float3 c2; 00246 } vmd_tricolor; 00247 00248 typedef struct { 00249 uchar4 c0; 00250 uchar4 c1; 00251 uchar4 c2; 00252 char4 n0; 00253 char4 n1; 00254 char4 n2; 00255 float3 v0; 00256 float3 v1; 00257 float3 v2; 00258 } vmd_trimesh_c4u_n3b_v3f; 00259 00260 typedef struct { 00261 float3 n0; 00262 float3 n1; 00263 float3 n2; 00264 float3 v0; 00265 float3 v1; 00266 float3 v2; 00267 } vmd_trimesh_n3f_v3f; 00268 00269 typedef struct { 00270 char4 n0; 00271 char4 n1; 00272 char4 n2; 00273 float3 v0; 00274 float3 v1; 00275 float3 v2; 00276 } vmd_trimesh_n3b_v3f; 00277 00278 typedef struct { 00279 float3 v0; 00280 float3 v1; 00281 float3 v2; 00282 } vmd_trimesh_v3f; 00283 00284 00285 00286 // 00287 // Methods for packing normals into a 4-byte quantity, such as a 00288 // [u]int or [u]char4, and similar. See JCGT article by Cigolle et al., 00289 // "A Survey of Efficient Representations for Independent Unit Vectors", 00290 // J. Computer Graphics Techniques 3(2), 2014. 00291 // http://jcgt.org/published/0003/02/01/ 00292 // 00293 #if defined(ORT_USERTXAPIS) 00294 #include <optixu/optixu_math_namespace.h> // for make_xxx() fctns 00295 00296 #if 1 00297 00298 // 00299 // oct32: 32-bit octahedral normal encoding using [su]norm16x2 quantization 00300 // Meyer et al., "On Floating Point Normal Vectors", In Proc. 21st 00301 // Eurographics Conference on Rendering. 00302 // http://dx.doi.org/10.1111/j.1467-8659.2010.01737.x 00303 // Others: 00304 // https://twitter.com/Stubbesaurus/status/937994790553227264 00305 // https://knarkowicz.wordpress.com/2014/04/16/octahedron-normal-vector-encoding 00306 // 00307 static __host__ __device__ __inline__ float3 OctDecode(float2 projected) { 00308 float3 n = make_float3(projected.x, 00309 projected.y, 00310 1.0f - (fabsf(projected.x) + fabsf(projected.y))); 00311 if (n.z < 0.0f) { 00312 float oldX = n.x; 00313 n.x = copysignf(1.0f - fabsf(n.y), oldX); 00314 n.y = copysignf(1.0f - fabsf(oldX), n.y); 00315 } 00316 00317 return n; 00318 } 00319 00320 // 00321 // XXX TODO: implement a high-precision OctPEncode() variant, based on 00322 // floored snorms and an error minimization scheme using a 00323 // comparison of internally decoded values for least error 00324 // 00325 00326 static __host__ __device__ __inline__ float2 OctEncode(float3 n) { 00327 const float invL1Norm = 1.0f / (fabsf(n.x) + fabsf(n.y) + fabsf(n.z)); 00328 float2 projected; 00329 if (n.z < 0.0f) { 00330 projected = 1.0f - make_float2(fabsf(n.y), fabsf(n.x)) * invL1Norm; 00331 projected.x = copysignf(projected.x, n.x); 00332 projected.y = copysignf(projected.y, n.y); 00333 } else { 00334 projected = make_float2(n.x, n.y) * invL1Norm; 00335 } 00336 00337 return projected; 00338 } 00339 00340 00341 static __host__ __device__ __inline__ uint convfloat2uint32(float2 f2) { 00342 f2 = f2 * 0.5f + 0.5f; 00343 uint packed; 00344 packed = ((uint) (f2.x * 65535)) | ((uint) (f2.y * 65535) << 16); 00345 return packed; 00346 } 00347 00348 static __host__ __device__ __inline__ float2 convuint32float2(uint packed) { 00349 float2 f2; 00350 f2.x = (float)((packed ) & 0x0000ffff) / 65535; 00351 f2.y = (float)((packed >> 16) & 0x0000ffff) / 65535; 00352 return f2 * 2.0f - 1.0f; 00353 } 00354 00355 00356 static __host__ __device__ __inline__ uint packNormal(const float3& normal) { 00357 float2 octf2 = OctEncode(normal); 00358 return convfloat2uint32(octf2); 00359 } 00360 00361 static __host__ __device__ __inline__ float3 unpackNormal(uint packed) { 00362 float2 octf2 = convuint32float2(packed); 00363 return OctDecode(octf2); 00364 } 00365 00366 #elif 1 00367 00368 // 00369 // unorm10x3: unsigned 10-bit-per-component scalar unit real representation 00370 // Not quite as good as 'snorm' representations 00371 // This is largely equivalent to OpenGL's UNSIGNED_INT_2_10_10_10_REV 00372 // Described in the GLSL 4.20 specification, J. Kessenich 2011 00373 // i=round(clamp(r,0,1) * (2^b - 1)) 00374 // r=i/(2^b - 1) 00375 // 00376 static __host__ __device__ __inline__ uint packNormal(const float3& normal) { 00377 const float3 N = normal * 0.5f + 0.5f; 00378 const uint packed = ((uint) (N.x * 1023)) | 00379 ((uint) (N.y * 1023) << 10) | 00380 ((uint) (N.z * 1023) << 20); 00381 return packed; 00382 } 00383 00384 static __host__ __device__ __inline__ float3 unpackNormal(uint packed) { 00385 float3 N; 00386 N.x = (float)(packed & 0x000003ff) / 1023; 00387 N.y = (float)(((packed >> 10) & 0x000003ff)) / 1023; 00388 N.z = (float)(((packed >> 20) & 0x000003ff)) / 1023; 00389 return N * 2.0f - 1.0f; 00390 } 00391 00392 #elif 0 00393 00394 // 00395 // snorm10x3: signed 10-bit-per-component scalar unit real representation 00396 // Better representation than unorm. 00397 // Supported by most fixed-function graphics hardware. 00398 // https://www.khronos.org/registry/OpenGL/extensions/EXT/EXT_texture_snorm.txt 00399 // i=round(clamp(r,-1,1) * (2^(b-1) - 1) 00400 // r=clamp(i/(2^(b-1) - 1), -1, 1) 00401 // 00402 00403 #elif 1 00404 00405 // OpenGL GLbyte signed quantization scheme 00406 // i = r * (2^b - 1) - 0.5; 00407 // r = (2i + 1)/(2^b - 1) 00408 static __host__ __device__ __inline__ uint packNormal(const float3& normal) { 00409 // conversion to GLbyte format, Table 2.6, p. 44 of OpenGL spec 1.2.1 00410 const float3 N = normal * 127.5f - 0.5f; 00411 const char4 packed = make_char4(N.x, N.y, N.z, 0); 00412 return *((uint *) &packed); 00413 } 00414 00415 static __host__ __device__ __inline__ float3 unpackNormal(uint packed) { 00416 char4 c4norm = *((char4 *) &packed); 00417 00418 // conversion from GLbyte format, Table 2.6, p. 44 of OpenGL spec 1.2.1 00419 // float = (2c+1)/(2^8-1) 00420 const float ci2f = 1.0f / 255.0f; 00421 const float cn2f = 1.0f / 127.5f; 00422 float3 N = c4norm * cn2f + ci2f; 00423 00424 return N; 00425 } 00426 00427 #endif 00428 #endif 00429 00430 00431 #endif 00432