Super User's BSD Cross Reference: /OpenBSD/usr.sbin/nsd/simdzone/src/westmere/time.h

1 /*
2 * ip4.h -- SSE 4.1 parser for time stamps
3 * https://lemire.me/blog/2023/07/01/parsing-time-stamps-faster-with-simd-instructions/
4 *
5 * Copyright (c) 2023. Daniel Lemire
6 *
7 * SPDX-License-Identifier: BSD-3-Clause
8 *
9 */
10#ifndef SSE_TIME_H
11#define SSE_TIME_H
12
13 static const int mdays_minus_one[] = {30, 27, 30, 29, 30, 29, 30, 30, 29, 30, 29, 30};
14
15 static const int mdays_cumulative[] = {0, 31, 59, 90, 120, 151, 181,
16 212, 243, 273, 304, 334, 365};
17
18 /*
19 The 32-bit timestamp spans from year 1970 to 2106.
20 Therefore, the only special case for leap years is 2100.
21 We use that to produce fast functions.
22*/
23 static inline uint32_t is_leap_year(uint32_t year) {
24 return (year % 4 == 0) & (year != 2100);
25}
26
27 static inline uint32_t leap_days(uint32_t year) {
28 --year;
29 return (year/4 - 1970/4) - (year >= 2100);
30}
31
32 static bool sse_parse_time(const char *date_string, uint32_t *time_in_second) {
33 // We load the block of digits. We subtract 0x30 (the code point value of the
34 // character '0'), and all bytes values should be between 0 and 9,
35 // inclusively. We know that some character must be smaller that 9, for
36 // example, we cannot have more than 59 seconds and never 60 seconds, in the
37 // time stamp string. So one character must be between 0 and 5. Similarly, we
38 // start the hours at 00 and end at 23, so one character must be between 0
39 // and 2. We do a saturating subtraction of the maximum: the result of such a
40 // subtraction should be zero if the value is no larger. We then use a special
41 // instruction to multiply one byte by 10, and sum it up with the next byte,
42 // getting a 16-bit value. We then repeat the same approach as before,
43 // checking that the result is not too large.
44 //
45 // We compute the month the good old ways, as an integer in [0,11], we
46 // check for overflows later.
47 uint64_t mo = (uint64_t)((date_string[4]-0x30)*10 + (date_string[5]-0x30) - 1);
48 __m128i v = _mm_loadu_si128((const __m128i *)date_string);
49 // loaded YYYYMMDDHHmmSS.....
50 v = _mm_xor_si128(v, _mm_set1_epi8(0x30));
51 // W can use _mm_sub_epi8 or _mm_xor_si128 for the subtraction above.
52 // subtracting by 0x30 (or '0'), turns all values into a byte value between 0
53 // and 9 if the initial input was made of digits.
54 __m128i limit =
55 _mm_setr_epi8(9, 9, 9, 9, 1, 9, 3, 9, 2, 9, 5, 9, 5, 9, -1, -1);
56 // credit @aqrit
57 // overflows are still possible, if hours are in the range 24 to 29
58 // of if days are in the range 32 to 39
59 // or if months are in the range 12 to 19.
60 __m128i abide_by_limits = _mm_subs_epu8(v, limit); // must be all zero
61
62#if defined __SUNPRO_C
63 __m128i byteflip = _mm_setr_epi64((__m64){0x0607040502030001ULL},
64 (__m64){0x0e0f0c0d0a0b0809ULL});
65#else
66 __m128i byteflip = _mm_setr_epi64((__m64)0x0607040502030001ULL,
67 (__m64)0x0e0f0c0d0a0b0809ULL);
68#endif
69
70 __m128i little_endian = _mm_shuffle_epi8(v, byteflip);
71 __m128i limit16 = _mm_setr_epi16(0x0909, 0x0909, 0x0102, 0x0301, 0x0203,
72 0x0509, 0x0509, -1);
73 __m128i abide_by_limits16 =
74 _mm_subs_epu16(little_endian, limit16); // must be all zero
75
76 __m128i combined_limits =
77 _mm_or_si128(abide_by_limits16, abide_by_limits); // must be all zero
78
79 if (!_mm_test_all_zeros(combined_limits, combined_limits)) {
80 return false;
81 }
82 // 0x000000SS0mmm0HHH`00DD00MM00YY00YY
83 //////////////////////////////////////////////////////
84 // pmaddubsw has a high latency (e.g., 5 cycles) and is
85 // likely a performance bottleneck.
86 /////////////////////////////////////////////////////
87 const __m128i weights = _mm_setr_epi8(
88 // Y Y Y Y m m d d H H M M S S - -
89 10, 1, 10, 1, 10, 1, 10, 1, 10, 1, 10, 1, 10, 1, 0, 0);
90 v = _mm_maddubs_epi16(v, weights);
91
92 uint64_t hi = (uint64_t)_mm_extract_epi64(v, 1);
93 uint64_t seconds = (hi * 0x0384000F00004000) >> 46;
94 uint64_t lo = (uint64_t)_mm_extract_epi64(v, 0);
95 uint64_t yr = (lo * 0x64000100000000) >> 48;
96
97 // We compute the day (starting at zero). We implicitly
98 // check for overflows later.
99 uint64_t dy = (uint64_t)_mm_extract_epi8(v, 6) - 1;
100
101 bool is_leap_yr = (bool)is_leap_year((uint32_t)yr);
102 if(yr < 1970 || mo > 11) { return false; } // unlikely branch
103 if (dy > (uint64_t)mdays_minus_one[mo]) { // unlikely branch
104 if (mo == 1 && is_leap_yr) {
105 if (dy != 29 - 1) {
106 return false;
107 }
108 } else {
109 return false;
110 }
111 }
112 uint64_t days = 365 * (yr - 1970) + (uint64_t)leap_days((uint32_t)yr);
113
114 days += (uint64_t)mdays_cumulative[mo];
115 days += is_leap_yr & (mo > 1);
116
117 days += dy;
118 uint64_t time_in_second64 = seconds + days * 60 * 60 * 24;
119 *time_in_second = (uint32_t)time_in_second64;
120 return true;
121}
122
123 nonnull_all
124 static really_inline int32_t parse_time(
125 parser_t *parser,
126 const type_info_t *type,
127 const rdata_info_t *field,
128 rdata_t *rdata,
129 const token_t *token)
130{
131 uint32_t time;
132
133 if (unlikely(token->length != 14))
134 return parse_int32(parser, type, field, rdata, token);
135 if (!sse_parse_time(token->data, &time))
136 SYNTAX_ERROR(parser, "Invalid %s in %s", NAME(field), NAME(type));
137
138 time = htobe32(time);
139 memcpy(rdata->octets, &time, sizeof(time));
140 rdata->octets += sizeof(time);
141 return 0;
142}
143
144#endif // TIME_H
145 

AltStyle によって変換されたページ (->オリジナル) /