Main Page Namespace List Class Hierarchy Alphabetical List Compound List File List Namespace Members Compound Members File Members Related Pages
OptiXShaders.h

Go to the documentation of this file.
00001 /***************************************************************************
00002 *cr
00003 *cr (C) Copyright 1995-2019 The Board of Trustees of the
00004 *cr University of Illinois
00005 *cr All Rights Reserved
00006 *cr
00007 ***************************************************************************/
00008 
00009 /***************************************************************************
00010 * RCS INFORMATION:
00011 *
00012 * $RCSfile: OptiXShaders.h,v $
00013 * $Author: johns $ $Locker: $ $State: Exp $
00014 * $Revision: 1.41 $ $Date: 2020年02月26日 03:51:31 $
00015 *
00016 ***************************************************************************/
00079 #ifndef OPTIXSHADERS
00080 #define OPTIXSHADERS
00081 #include <optixu/optixu_vector_types.h>
00082 
00083 // Compile-time flag for collection and reporting of ray statistics
00084 #if 0
00085 #define ORT_RAYSTATS 1
00086 #endif
00087 
00088 // Compile-time flag to enable the use of RTX hardware ray tracing 
00089 // acceleration APIs in OptiX
00090 #if OPTIX_VERSION >= 60000
00091 #define ORT_USERTXAPIS 1
00092 #endif
00093 
00094 // When compiling with OptiX 3.8 or greater, we use the new
00095 // progressive rendering APIs rather than our previous hand-coded
00096 // progressive renderer.
00097 #if (defined(VMDOPTIX_VCA) || (OPTIX_VERSION >= 3080)) // && !defined(VMDUSEOPENHMD)
00098 #define VMDOPTIX_PROGRESSIVEAPI 1
00099 #endif
00100 
00101 #if 1 || defined(VMDOPTIX_PROGRESSIVEAPI)
00102 #define VMDOPTIX_LIGHTUSEROBJS 1
00103 #endif
00104 
00105 #if defined(VMDOPTIX_LIGHTUSEROBJS)
00106 #include "Scene.h" // for DISP_LIGHTS macro
00107 #endif
00108 
00109 // "*" operator
00110 inline __host__ __device__ float3 operator*(char4 a, float b) {
00111 return make_float3(b * a.x, b * a.y, b * a.z);
00112 }
00113 
00114 inline __host__ __device__ float3 operator*(uchar4 a, float b) {
00115 return make_float3(b * a.x, b * a.y, b * a.z);
00116 }
00117 
00118 #if defined(__cplusplus)
00119 typedef optix::float3 float3;
00120 #endif
00121 
00122 
00123 // XXX OptiX 4.0 and later versions have a significant performance impact
00124 // on VMD startup if we use 256-way combinatorial shader specialization.
00125 // Shader template specialization had very little impact on
00126 // OptiX versions 3.[789].x previously. The new LLVM based compiler 
00127 // back-end used in recent versions of OptiX has much more overhead
00128 // when processing large numbers of shaders single PTX files. 
00129 // If we want to retain the template specialization approach, 
00130 // we will have to generate shader code and store it in many separate 
00131 // PTX files to mitigate overheads in back-end compiler infrastructure.
00132 #if OPTIX_VERSION < 40000
00133 // this macro enables or disables the use of an array of
00134 // template-specialized shaders for every combination of
00135 // scene-wide and material-specific shader features.
00136 #define ORT_USE_TEMPLATE_SHADERS 1
00137 #endif
00138 
00139 
00140 // Enable reversed traversal of any-hit rays for shadows/AO.
00141 // This optimization yields a 20% performance gain in many cases.
00142 // #define USE_REVERSE_SHADOW_RAYS 1
00143 
00144 // Use reverse rays by default rather than only when enabled interactively
00145 // #define USE_REVERSE_SHADOW_RAYS_DEFAULT 1
00146 enum RtShadowMode { RT_SHADOWS_OFF=0, 
00147 RT_SHADOWS_ON=1, 
00148 RT_SHADOWS_ON_REVERSE=2 
00149 };
00150 
00151 
00152 //
00153 // Lighting data structures
00154 //
00155 #if defined(VMDOPTIX_LIGHTUSEROBJS)
00156 typedef struct {
00157 int num_lights;
00158 float3 dirs[DISP_LIGHTS+1]; 
00159 } DirectionalLightList;
00160 
00161 typedef struct {
00162 int num_lights;
00163 float3 posns[DISP_LIGHTS+1]; 
00164 } PositionalLightList;
00165 #endif
00166 
00167 typedef struct {
00168 float3 dir;
00169 int padding; // pad to next power of two
00170 } DirectionalLight;
00171 
00172 typedef struct {
00173 float3 pos;
00174 int padding; // pad to next power of two
00175 } PositionalLight;
00176 
00177 
00178 //
00179 // Cylinders
00180 //
00181 
00182 // XXX memory layout is likely suboptimal
00183 typedef struct {
00184 float3 start;
00185 float radius;
00186 float3 axis;
00187 float pad;
00188 } vmd_cylinder;
00189 
00190 // XXX memory layout is likely suboptimal
00191 typedef struct {
00192 float3 start;
00193 float radius;
00194 float3 axis;
00195 float3 color;
00196 } vmd_cylinder_color;
00197 
00198 //
00199 // Rings (annular or otherwise)
00200 //
00201 
00202 // XXX memory layout is likely suboptimal, but is a multiple float4
00203 typedef struct {
00204 float3 center;
00205 float3 norm;
00206 float inrad;
00207 float outrad;
00208 float3 color;
00209 float pad;
00210 } vmd_ring_color;
00211 
00212 
00213 //
00214 // Spheres
00215 //
00216 
00217 typedef struct {
00218 float3 center;
00219 float radius;
00220 } vmd_sphere;
00221 
00222 // XXX memory layout is likely suboptimal
00223 typedef struct {
00224 float3 center;
00225 float radius;
00226 float3 color;
00227 float pad;
00228 } vmd_sphere_color;
00229 
00230 
00231 //
00232 // Triangle meshes of various kinds
00233 //
00234 
00235 // XXX memory layout is definitely suboptimal
00236 typedef struct {
00237 float3 v0;
00238 float3 v1;
00239 float3 v2;
00240 float3 n0;
00241 float3 n1;
00242 float3 n2;
00243 float3 c0;
00244 float3 c1;
00245 float3 c2;
00246 } vmd_tricolor;
00247 
00248 typedef struct {
00249 uchar4 c0;
00250 uchar4 c1;
00251 uchar4 c2;
00252 char4 n0;
00253 char4 n1;
00254 char4 n2;
00255 float3 v0;
00256 float3 v1;
00257 float3 v2;
00258 } vmd_trimesh_c4u_n3b_v3f;
00259 
00260 typedef struct {
00261 float3 n0;
00262 float3 n1;
00263 float3 n2;
00264 float3 v0;
00265 float3 v1;
00266 float3 v2;
00267 } vmd_trimesh_n3f_v3f;
00268 
00269 typedef struct {
00270 char4 n0;
00271 char4 n1;
00272 char4 n2;
00273 float3 v0;
00274 float3 v1;
00275 float3 v2;
00276 } vmd_trimesh_n3b_v3f;
00277 
00278 typedef struct {
00279 float3 v0;
00280 float3 v1;
00281 float3 v2;
00282 } vmd_trimesh_v3f;
00283 
00284 
00285 
00286 //
00287 // Methods for packing normals into a 4-byte quantity, such as a 
00288 // [u]int or [u]char4, and similar. See JCGT article by Cigolle et al.,
00289 // "A Survey of Efficient Representations for Independent Unit Vectors",
00290 // J. Computer Graphics Techniques 3(2), 2014.
00291 // http://jcgt.org/published/0003/02/01/
00292 //
00293 #if defined(ORT_USERTXAPIS)
00294 #include <optixu/optixu_math_namespace.h> // for make_xxx() fctns
00295 
00296 #if 1
00297 
00298 //
00299 // oct32: 32-bit octahedral normal encoding using [su]norm16x2 quantization
00300 // Meyer et al., "On Floating Point Normal Vectors", In Proc. 21st
00301 // Eurographics Conference on Rendering.
00302 // http://dx.doi.org/10.1111/j.1467-8659.2010.01737.x
00303 // Others:
00304 // https://twitter.com/Stubbesaurus/status/937994790553227264
00305 // https://knarkowicz.wordpress.com/2014/04/16/octahedron-normal-vector-encoding
00306 //
00307 static __host__ __device__ __inline__ float3 OctDecode(float2 projected) {
00308 float3 n = make_float3(projected.x, 
00309 projected.y, 
00310 1.0f - (fabsf(projected.x) + fabsf(projected.y)));
00311 if (n.z < 0.0f) {
00312 float oldX = n.x;
00313 n.x = copysignf(1.0f - fabsf(n.y), oldX);
00314 n.y = copysignf(1.0f - fabsf(oldX), n.y);
00315 }
00316 
00317 return n;
00318 }
00319 
00320 //
00321 // XXX TODO: implement a high-precision OctPEncode() variant, based on 
00322 // floored snorms and an error minimization scheme using a 
00323 // comparison of internally decoded values for least error
00324 //
00325 
00326 static __host__ __device__ __inline__ float2 OctEncode(float3 n) {
00327 const float invL1Norm = 1.0f / (fabsf(n.x) + fabsf(n.y) + fabsf(n.z));
00328 float2 projected;
00329 if (n.z < 0.0f) {
00330 projected = 1.0f - make_float2(fabsf(n.y), fabsf(n.x)) * invL1Norm;
00331 projected.x = copysignf(projected.x, n.x);
00332 projected.y = copysignf(projected.y, n.y);
00333 } else {
00334 projected = make_float2(n.x, n.y) * invL1Norm;
00335 }
00336 
00337 return projected;
00338 }
00339 
00340 
00341 static __host__ __device__ __inline__ uint convfloat2uint32(float2 f2) {
00342 f2 = f2 * 0.5f + 0.5f;
00343 uint packed;
00344 packed = ((uint) (f2.x * 65535)) | ((uint) (f2.y * 65535) << 16);
00345 return packed;
00346 }
00347 
00348 static __host__ __device__ __inline__ float2 convuint32float2(uint packed) {
00349 float2 f2;
00350 f2.x = (float)((packed ) & 0x0000ffff) / 65535;
00351 f2.y = (float)((packed >> 16) & 0x0000ffff) / 65535;
00352 return f2 * 2.0f - 1.0f;
00353 }
00354 
00355 
00356 static __host__ __device__ __inline__ uint packNormal(const float3& normal) {
00357 float2 octf2 = OctEncode(normal);
00358 return convfloat2uint32(octf2);
00359 }
00360 
00361 static __host__ __device__ __inline__ float3 unpackNormal(uint packed) {
00362 float2 octf2 = convuint32float2(packed);
00363 return OctDecode(octf2);
00364 }
00365 
00366 #elif 1
00367 
00368 // 
00369 // unorm10x3: unsigned 10-bit-per-component scalar unit real representation
00370 // Not quite as good as 'snorm' representations
00371 // This is largely equivalent to OpenGL's UNSIGNED_INT_2_10_10_10_REV 
00372 // Described in the GLSL 4.20 specification, J. Kessenich 2011
00373 // i=round(clamp(r,0,1) * (2^b - 1))
00374 // r=i/(2^b - 1)
00375 //
00376 static __host__ __device__ __inline__ uint packNormal(const float3& normal) {
00377 const float3 N = normal * 0.5f + 0.5f;
00378 const uint packed = ((uint) (N.x * 1023)) |
00379 ((uint) (N.y * 1023) << 10) |
00380 ((uint) (N.z * 1023) << 20);
00381 return packed;
00382 }
00383 
00384 static __host__ __device__ __inline__ float3 unpackNormal(uint packed) {
00385 float3 N;
00386 N.x = (float)(packed & 0x000003ff) / 1023;
00387 N.y = (float)(((packed >> 10) & 0x000003ff)) / 1023;
00388 N.z = (float)(((packed >> 20) & 0x000003ff)) / 1023;
00389 return N * 2.0f - 1.0f;
00390 }
00391 
00392 #elif 0
00393 
00394 // 
00395 // snorm10x3: signed 10-bit-per-component scalar unit real representation
00396 // Better representation than unorm. 
00397 // Supported by most fixed-function graphics hardware.
00398 // https://www.khronos.org/registry/OpenGL/extensions/EXT/EXT_texture_snorm.txt
00399 // i=round(clamp(r,-1,1) * (2^(b-1) - 1)
00400 // r=clamp(i/(2^(b-1) - 1), -1, 1)
00401 //
00402 
00403 #elif 1
00404 
00405 // OpenGL GLbyte signed quantization scheme
00406 // i = r * (2^b - 1) - 0.5;
00407 // r = (2i + 1)/(2^b - 1)
00408 static __host__ __device__ __inline__ uint packNormal(const float3& normal) {
00409 // conversion to GLbyte format, Table 2.6, p. 44 of OpenGL spec 1.2.1
00410 const float3 N = normal * 127.5f - 0.5f;
00411 const char4 packed = make_char4(N.x, N.y, N.z, 0);
00412 return *((uint *) &packed);
00413 }
00414 
00415 static __host__ __device__ __inline__ float3 unpackNormal(uint packed) {
00416 char4 c4norm = *((char4 *) &packed);
00417 
00418 // conversion from GLbyte format, Table 2.6, p. 44 of OpenGL spec 1.2.1
00419 // float = (2c+1)/(2^8-1)
00420 const float ci2f = 1.0f / 255.0f;
00421 const float cn2f = 1.0f / 127.5f;
00422 float3 N = c4norm * cn2f + ci2f;
00423 
00424 return N;
00425 }
00426 
00427 #endif
00428 #endif
00429 
00430 
00431 #endif
00432