Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commit a1761c7

Browse files
authored
batch: suppress warning for SIMD loop vectorization failure with clang (#2032)
batch: relax SIMD loop for certain operations When building with a newer version of clang, I uncovered a few more SIMD pragma loops fail to vectorize on clang. Since it seems to depend on the compiler version, and I don't want to disable attempted vectorization, I thought a better strategy was just to disable the warnings when it fails to vectorize in those specific places. Add a "latest dependencies with clang" test, that's how I stumbled across these in the first place. Signed-off-by: Larry Gritz <lg@larrygritz.com>
1 parent 1cfaf77 commit a1761c7

File tree

6 files changed

+83
-11
lines changed

6 files changed

+83
-11
lines changed

‎.github/workflows/ci.yml‎

Lines changed: 21 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -414,8 +414,27 @@ jobs:
414414
- desc: latest releases gcc11/C++17 llvm17 oiio-rel exr3.2 py3.12 avx2 batch-b16avx512
415415
nametag: linux-latest-releases
416416
runner: ubuntu-24.04
417-
cc_compiler: gcc-13
418-
cxx_compiler: g++-13
417+
cc_compiler: gcc-14
418+
cxx_compiler: g++-14
419+
cxx_std: 17
420+
fmt_ver: 12.1.0
421+
opencolorio_ver: v2.5.0
422+
openexr_ver: v3.4.2
423+
openimageio_ver: release
424+
pybind11_ver: v3.0.1
425+
python_ver: "3.12"
426+
llvm_action_ver: "18.1.7"
427+
simd: avx2,f16c
428+
batched: b8_AVX2,b8_AVX512,b16_AVX512
429+
setenvs: export LIBTIFF_VERSION=v4.7.1
430+
PTEX_VERSION=v2.4.3
431+
PUGIXML_VERSION=v1.15
432+
FREETYPE_VERSION=VER-2-14-3
433+
- desc: latest releases clang18/C++17 llvm18 oiio-rel exr3.4 py3.12 avx2 batch-b16avx512
434+
nametag: linux-latest-releases
435+
runner: ubuntu-24.04
436+
cc_compiler: clang
437+
cxx_compiler: clang++
419438
cxx_std: 17
420439
fmt_ver: 11.1.4
421440
opencolorio_ver: v2.4.2

‎src/include/OSL/platform.h‎

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -200,6 +200,21 @@
200200
#endif
201201

202202
// Compiler-specific pragmas
203+
//
204+
// - OSL_PRAGMA_WARNING_PUSH/POP pushes/pops warning options (for all
205+
// compilers).
206+
// - OSL_PRAGMA_VISIBILITY_PUSH/POP pushes/pops symbol visibility options (for
207+
// all compilers that support it).
208+
// - OSL_GCC_PRAGMA makes a pragma for all gcc-like compilers, but does nothing
209+
// for MSVS.
210+
// - OSL_GCC_ONLY_PRAGMA makes a pragma for real gcc only.
211+
// - OSL_CLANG_PRAGMA makes a pragma for all clang-based compilers (including
212+
// Apple clang and Intel LLVM).
213+
// - OSL_NONINTEL_CLANG_PRAGMA makes a pragma for regular clang and Apple
214+
// clang, but not Intel clang.
215+
// - OSL_INTEL_CLASSIC_PRAGMA makes a pragma for icc only.
216+
// - OSL_INTEL_LLVM_PRAGMA makes a pragma for icx only.
217+
// - OSL_MSVS_PRAGMA makes a pragma for MSVS only.
203218
#if defined(__GNUC__) /* gcc, clang, icc */
204219
# define OSL_PRAGMA_WARNING_PUSH OSL_PRAGMA(GCC diagnostic push)
205220
# define OSL_PRAGMA_WARNING_POP OSL_PRAGMA(GCC diagnostic pop)
@@ -223,6 +238,11 @@
223238
# else
224239
# define OSL_INTEL_LLVM_PRAGMA(UnQuotedPragma)
225240
# endif
241+
# if defined(__clang__) && !defined(__INTEL_COMPILER) && !defined(__INTEL_LLVM_COMPILER)
242+
# define OSL_NONINTEL_CLANG_PRAGMA(UnQuotedPragma) OSL_PRAGMA(UnQuotedPragma)
243+
# else
244+
# define OSL_NONINTEL_CLANG_PRAGMA(UnQuotedPragma)
245+
# endif
226246
# define OSL_MSVS_PRAGMA(UnQuotedPragma)
227247
#elif defined(_MSC_VER)
228248
# define OSL_PRAGMA_WARNING_PUSH __pragma(warning(push))
@@ -232,6 +252,7 @@
232252
# define OSL_GCC_PRAGMA(UnQuotedPragma)
233253
# define OSL_GCC_ONLY_PRAGMA(UnQuotedPragma)
234254
# define OSL_CLANG_PRAGMA(UnQuotedPragma)
255+
# define OSL_NONINTEL_CLANG_PRAGMA(UnQuotedPragma)
235256
# define OSL_INTEL_CLASSIC_PRAGMA(UnQuotedPragma)
236257
# define OSL_INTEL_LLVM_PRAGMA(UnQuotedPragma)
237258
# define OSL_MSVS_PRAGMA(UnQuotedPragma) OSL_PRAGMA(UnQuotedPragma)
@@ -243,6 +264,7 @@
243264
# define OSL_GCC_PRAGMA(UnQuotedPragma)
244265
# define OSL_GCC_ONLY_PRAGMA(UnQuotedPragma)
245266
# define OSL_CLANG_PRAGMA(UnQuotedPragma)
267+
# define OSL_NONINTEL_CLANG_PRAGMA(UnQuotedPragma)
246268
# define OSL_INTEL_CLASSIC_PRAGMA(UnQuotedPragma)
247269
# define OSL_INTEL_LLVM_PRAGMA(UnQuotedPragma)
248270
# define OSL_MSVS_PRAGMA(UnQuotedPragma)
@@ -291,6 +313,7 @@
291313
#define OSL_OMP_SIMD_LOOP(...) OSL_OMP_PRAGMA(omp simd __VA_ARGS__)
292314

293315
#if (OSL_GNUC_VERSION || OSL_INTEL_CLASSIC_COMPILER_VERSION || OSL_INTEL_LLVM_COMPILER_VERSION)
316+
// GCC, icc, icx: Use a simd loop for sure
294317
# define OSL_OMP_COMPLEX_SIMD_LOOP(...) OSL_OMP_SIMD_LOOP(__VA_ARGS__)
295318
#else
296319
// Ignore requests to vectorize complex/nested SIMD loops for certain

‎src/liboslexec/wide/wide_opalgebraic.cpp‎

Lines changed: 20 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -128,6 +128,9 @@ calculatenormal(const Dual2<Vec3>& tmpP, bool flipHandedness)
128128

129129

130130

131+
OSL_PRAGMA_WARNING_PUSH
132+
OSL_NONINTEL_CLANG_PRAGMA(GCC diagnostic ignored "-Wpass-failed")
133+
131134
OSL_BATCHOP void
132135
__OSL_OP2(length, Wf, Wv)(void* r_, void* V_)
133136
{
@@ -136,7 +139,7 @@ __OSL_OP2(length, Wf, Wv)(void* r_, void* V_)
136139
Wide<const Vec3> wV(V_);
137140
Wide<float> wr(r_);
138141

139-
OSL_OMP_PRAGMA(omp simd simdlen(__OSL_WIDTH))
142+
OSL_OMP_SIMD_LOOP(simdlen(__OSL_WIDTH))
140143
for (int lane = 0; lane < __OSL_WIDTH; ++lane) {
141144
Vec3 V = wV[lane];
142145
float r = sfm::length(V);
@@ -155,7 +158,7 @@ __OSL_MASKED_OP2(length, Wf, Wv)(void* r_, void* V_, unsigned int mask_value)
155158
Wide<const Vec3> wV(V_);
156159
Masked<float> wr(r_, Mask(mask_value));
157160

158-
OSL_OMP_PRAGMA(omp simd simdlen(__OSL_WIDTH))
161+
OSL_OMP_SIMD_LOOP(simdlen(__OSL_WIDTH))
159162
for (int lane = 0; lane < __OSL_WIDTH; ++lane) {
160163
Vec3 V = wV[lane];
161164
if (wr.mask()[lane]) {
@@ -166,6 +169,8 @@ __OSL_MASKED_OP2(length, Wf, Wv)(void* r_, void* V_, unsigned int mask_value)
166169
}
167170
}
168171

172+
OSL_PRAGMA_WARNING_POP
173+
169174

170175

171176
OSL_BATCHOP void
@@ -208,6 +213,9 @@ __OSL_MASKED_OP2(length, Wdf, Wdv)(void* r_, void* V_, unsigned int mask_value)
208213

209214

210215

216+
OSL_PRAGMA_WARNING_PUSH
217+
OSL_NONINTEL_CLANG_PRAGMA(GCC diagnostic ignored "-Wpass-failed")
218+
211219
OSL_BATCHOP void
212220
__OSL_OP2(area, Wf, Wdv)(void* r_, void* DP_)
213221
{
@@ -217,7 +225,7 @@ __OSL_OP2(area, Wf, Wdv)(void* r_, void* DP_)
217225

218226
Wide<float> wr(r_);
219227

220-
OSL_OMP_PRAGMA(omp simd simdlen(__OSL_WIDTH))
228+
OSL_OMP_SIMD_LOOP(simdlen(__OSL_WIDTH))
221229
for (int lane = 0; lane < __OSL_WIDTH; ++lane) {
222230
Dual2<Vec3> DP = wDP[lane];
223231

@@ -240,7 +248,7 @@ __OSL_MASKED_OP2(area, Wf, Wdv)(void* r_, void* DP_, unsigned int mask_value)
240248

241249
Masked<float> wr(r_, Mask(mask_value));
242250

243-
OSL_OMP_PRAGMA(omp simd simdlen(__OSL_WIDTH))
251+
OSL_OMP_SIMD_LOOP(simdlen(__OSL_WIDTH))
244252
for (int lane = 0; lane < __OSL_WIDTH; ++lane) {
245253
Dual2<Vec3> DP = wDP[lane];
246254
if (wr.mask()[lane]) {
@@ -253,6 +261,8 @@ __OSL_MASKED_OP2(area, Wf, Wdv)(void* r_, void* DP_, unsigned int mask_value)
253261
}
254262
}
255263

264+
OSL_PRAGMA_WARNING_POP
265+
256266

257267

258268
OSL_BATCHOP void
@@ -447,6 +457,9 @@ __OSL_MASKED_OP3(distance, Wdf, Wdv, Wdv)(void* r_, void* a_, void* b_,
447457

448458

449459

460+
OSL_PRAGMA_WARNING_PUSH
461+
OSL_NONINTEL_CLANG_PRAGMA(GCC diagnostic ignored "-Wpass-failed")
462+
450463
OSL_BATCHOP void
451464
__OSL_OP2(normalize, Wv, Wv)(void* r_, void* V_)
452465
{
@@ -455,7 +468,7 @@ __OSL_OP2(normalize, Wv, Wv)(void* r_, void* V_)
455468
Wide<const Vec3> wV(V_);
456469
Wide<Vec3> wr(r_);
457470

458-
OSL_OMP_PRAGMA(omp simd simdlen(__OSL_WIDTH))
471+
OSL_OMP_SIMD_LOOP(simdlen(__OSL_WIDTH))
459472
for (int lane = 0; lane < __OSL_WIDTH; ++lane) {
460473
Vec3 V = wV[lane];
461474
Vec3 N = sfm::normalize(V);
@@ -473,7 +486,7 @@ __OSL_MASKED_OP2(normalize, Wv, Wv)(void* r_, void* V_, unsigned int mask_value)
473486
Wide<const Vec3> wV(V_);
474487
Masked<Vec3> wr(r_, Mask(mask_value));
475488

476-
OSL_OMP_PRAGMA(omp simd simdlen(__OSL_WIDTH))
489+
OSL_OMP_SIMD_LOOP(simdlen(__OSL_WIDTH))
477490
for (int lane = 0; lane < __OSL_WIDTH; ++lane) {
478491
Vec3 V = wV[lane];
479492
if (wr.mask()[lane]) {
@@ -484,6 +497,7 @@ __OSL_MASKED_OP2(normalize, Wv, Wv)(void* r_, void* V_, unsigned int mask_value)
484497
}
485498
}
486499

500+
OSL_PRAGMA_WARNING_POP
487501

488502

489503
OSL_BATCHOP void

‎src/liboslexec/wide/wide_opcolor.cpp‎

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -56,6 +56,9 @@ __OSL_OP(blackbody_vf)(void* bsg_, void* out, float temp)
5656

5757

5858

59+
OSL_PRAGMA_WARNING_PUSH
60+
OSL_NONINTEL_CLANG_PRAGMA(GCC diagnostic ignored "-Wpass-failed")
61+
5962
OSL_BATCHOP void
6063
__OSL_MASKED_OP2(blackbody, Wv, Wf)(void* bsg_, void* wout_, void* wtemp_,
6164
unsigned int mask_value)
@@ -68,7 +71,7 @@ __OSL_MASKED_OP2(blackbody, Wv, Wf)(void* bsg_, void* wout_, void* wtemp_,
6871
Block<int> computeRequiredBlock;
6972
Wide<int> wcomputeRequired(computeRequiredBlock);
7073

71-
OSL_OMP_PRAGMA(omp simd simdlen(__OSL_WIDTH))
74+
OSL_OMP_SIMD_LOOP(simdlen(__OSL_WIDTH))
7275
for (int lane = 0; lane < __OSL_WIDTH; ++lane) {
7376
float temperature = wL[lane];
7477
bool canNotLookup = !cs.can_lookup_blackbody(temperature);
@@ -105,6 +108,8 @@ __OSL_MASKED_OP2(blackbody, Wv, Wf)(void* bsg_, void* wout_, void* wtemp_,
105108
}
106109
}
107110

111+
OSL_PRAGMA_WARNING_POP
112+
108113

109114

110115
OSL_BATCHOP void

‎src/liboslexec/wide/wide_opspline.cpp‎

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -362,6 +362,9 @@ splineinverse_search(const MatrixT& M, R_T& result, X_T& xval, KArrayT knots,
362362

363363
namespace { // unnamed
364364

365+
OSL_PRAGMA_WARNING_PUSH
366+
OSL_NONINTEL_CLANG_PRAGMA(GCC diagnostic ignored "-Wpass-failed")
367+
365368
template<bool IsBasisUConstantT, int BasisStepT, typename MatrixT,
366369
typename RAccessorT, typename XAccessorT, typename KAccessorT>
367370
OSL_FORCEINLINE void
@@ -376,7 +379,7 @@ spline_evaluate_loop_over_wide(const MatrixT& M, RAccessorT wR, XAccessorT wX,
376379

377380
OSL_FORCEINLINE_BLOCK
378381
{
379-
OSL_OMP_PRAGMA(omp simd simdlen(vec_width))
382+
OSL_OMP_SIMD_LOOP(simdlen(vec_width))
380383
for (int lane = 0; lane < vec_width; ++lane) {
381384
X_Type x = wX[lane];
382385
auto knots = wK[lane];
@@ -547,6 +550,8 @@ splineinverse_evaluate_wide(RAccessorT wR, ustring spline_basis, XAccessorT wX,
547550
impl_by_basis[basis_type](wR, wX, wK, knot_count);
548551
}
549552

553+
OSL_PRAGMA_WARNING_POP
554+
550555
} // namespace
551556

552557

‎src/liboslexec/wide/wide_opstring.cpp‎

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -90,6 +90,10 @@ __OSL_MASKED_OP2(strlen, Wi, Ws)(void* wr_, void* ws_, unsigned int mask_value)
9090
}
9191

9292

93+
94+
OSL_PRAGMA_WARNING_PUSH
95+
OSL_NONINTEL_CLANG_PRAGMA(GCC diagnostic ignored "-Wpass-failed")
96+
9397
OSL_BATCHOP void
9498
__OSL_MASKED_OP2(hash, Wi, Ws)(void* wr_, void* ws_, unsigned int mask_value)
9599
{
@@ -98,7 +102,7 @@ __OSL_MASKED_OP2(hash, Wi, Ws)(void* wr_, void* ws_, unsigned int mask_value)
98102

99103
OSL_FORCEINLINE_BLOCK
100104
{
101-
OSL_OMP_PRAGMA(omp simd simdlen(__OSL_WIDTH))
105+
OSL_OMP_SIMD_LOOP(simdlen(__OSL_WIDTH))
102106
for (int lane = 0; lane < __OSL_WIDTH; ++lane) {
103107
ustring s = wS[lane];
104108
if (wR.mask()[lane]) {
@@ -108,6 +112,8 @@ __OSL_MASKED_OP2(hash, Wi, Ws)(void* wr_, void* ws_, unsigned int mask_value)
108112
}
109113
}
110114

115+
OSL_PRAGMA_WARNING_POP
116+
111117

112118

113119
OSL_BATCHOP void

0 commit comments

Comments
(0)

AltStyle によって変換されたページ (->オリジナル) /