1 /* 2 * ip4.h -- SSE 4.1 parser for time stamps 3 * https://lemire.me/blog/2023/07/01/parsing-time-stamps-faster-with-simd-instructions/ 4 * 5 * Copyright (c) 2023. Daniel Lemire 6 * 7 * SPDX-License-Identifier: BSD-3-Clause 8 * 9 */ 10#ifndef SSE_TIME_H 11#define SSE_TIME_H 12 13 static const int mdays_minus_one[] = {30, 27, 30, 29, 30, 29, 30, 30, 29, 30, 29, 30}; 14 15 static const int mdays_cumulative[] = {0, 31, 59, 90, 120, 151, 181, 16 212, 243, 273, 304, 334, 365}; 17 18 /* 19 The 32-bit timestamp spans from year 1970 to 2106. 20 Therefore, the only special case for leap years is 2100. 21 We use that to produce fast functions. 22*/ 23 static inline uint32_t is_leap_year(uint32_t year) { 24 return (year % 4 == 0) & (year != 2100); 25} 26 27 static inline uint32_t leap_days(uint32_t year) { 28 --year; 29 return (year/4 - 1970/4) - (year >= 2100); 30} 31 32 static bool sse_parse_time(const char *date_string, uint32_t *time_in_second) { 33 // We load the block of digits. We subtract 0x30 (the code point value of the 34 // character '0'), and all bytes values should be between 0 and 9, 35 // inclusively. We know that some character must be smaller that 9, for 36 // example, we cannot have more than 59 seconds and never 60 seconds, in the 37 // time stamp string. So one character must be between 0 and 5. Similarly, we 38 // start the hours at 00 and end at 23, so one character must be between 0 39 // and 2. We do a saturating subtraction of the maximum: the result of such a 40 // subtraction should be zero if the value is no larger. We then use a special 41 // instruction to multiply one byte by 10, and sum it up with the next byte, 42 // getting a 16-bit value. We then repeat the same approach as before, 43 // checking that the result is not too large. 44 // 45 // We compute the month the good old ways, as an integer in [0,11], we 46 // check for overflows later. 47 uint64_t mo = (uint64_t)((date_string[4]-0x30)*10 + (date_string[5]-0x30) - 1); 48 __m128i v = _mm_loadu_si128((const __m128i *)date_string); 49 // loaded YYYYMMDDHHmmSS..... 50 v = _mm_xor_si128(v, _mm_set1_epi8(0x30)); 51 // W can use _mm_sub_epi8 or _mm_xor_si128 for the subtraction above. 52 // subtracting by 0x30 (or '0'), turns all values into a byte value between 0 53 // and 9 if the initial input was made of digits. 54 __m128i limit = 55 _mm_setr_epi8(9, 9, 9, 9, 1, 9, 3, 9, 2, 9, 5, 9, 5, 9, -1, -1); 56 // credit @aqrit 57 // overflows are still possible, if hours are in the range 24 to 29 58 // of if days are in the range 32 to 39 59 // or if months are in the range 12 to 19. 60 __m128i abide_by_limits = _mm_subs_epu8(v, limit); // must be all zero 61 62#if defined __SUNPRO_C 63 __m128i byteflip = _mm_setr_epi64((__m64){0x0607040502030001ULL}, 64 (__m64){0x0e0f0c0d0a0b0809ULL}); 65#else 66 __m128i byteflip = _mm_setr_epi64((__m64)0x0607040502030001ULL, 67 (__m64)0x0e0f0c0d0a0b0809ULL); 68#endif 69 70 __m128i little_endian = _mm_shuffle_epi8(v, byteflip); 71 __m128i limit16 = _mm_setr_epi16(0x0909, 0x0909, 0x0102, 0x0301, 0x0203, 72 0x0509, 0x0509, -1); 73 __m128i abide_by_limits16 = 74 _mm_subs_epu16(little_endian, limit16); // must be all zero 75 76 __m128i combined_limits = 77 _mm_or_si128(abide_by_limits16, abide_by_limits); // must be all zero 78 79 if (!_mm_test_all_zeros(combined_limits, combined_limits)) { 80 return false; 81 } 82 // 0x000000SS0mmm0HHH`00DD00MM00YY00YY 83 ////////////////////////////////////////////////////// 84 // pmaddubsw has a high latency (e.g., 5 cycles) and is 85 // likely a performance bottleneck. 86 ///////////////////////////////////////////////////// 87 const __m128i weights = _mm_setr_epi8( 88 // Y Y Y Y m m d d H H M M S S - - 89 10, 1, 10, 1, 10, 1, 10, 1, 10, 1, 10, 1, 10, 1, 0, 0); 90 v = _mm_maddubs_epi16(v, weights); 91 92 uint64_t hi = (uint64_t)_mm_extract_epi64(v, 1); 93 uint64_t seconds = (hi * 0x0384000F00004000) >> 46; 94 uint64_t lo = (uint64_t)_mm_extract_epi64(v, 0); 95 uint64_t yr = (lo * 0x64000100000000) >> 48; 96 97 // We compute the day (starting at zero). We implicitly 98 // check for overflows later. 99 uint64_t dy = (uint64_t)_mm_extract_epi8(v, 6) - 1; 100 101 bool is_leap_yr = (bool)is_leap_year((uint32_t)yr); 102 if(yr < 1970 || mo > 11) { return false; } // unlikely branch 103 if (dy > (uint64_t)mdays_minus_one[mo]) { // unlikely branch 104 if (mo == 1 && is_leap_yr) { 105 if (dy != 29 - 1) { 106 return false; 107 } 108 } else { 109 return false; 110 } 111 } 112 uint64_t days = 365 * (yr - 1970) + (uint64_t)leap_days((uint32_t)yr); 113 114 days += (uint64_t)mdays_cumulative[mo]; 115 days += is_leap_yr & (mo > 1); 116 117 days += dy; 118 uint64_t time_in_second64 = seconds + days * 60 * 60 * 24; 119 *time_in_second = (uint32_t)time_in_second64; 120 return true; 121} 122 123 nonnull_all 124 static really_inline int32_t parse_time( 125 parser_t *parser, 126 const type_info_t *type, 127 const rdata_info_t *field, 128 rdata_t *rdata, 129 const token_t *token) 130{ 131 uint32_t time; 132 133 if (unlikely(token->length != 14)) 134 return parse_int32(parser, type, field, rdata, token); 135 if (!sse_parse_time(token->data, &time)) 136 SYNTAX_ERROR(parser, "Invalid %s in %s", NAME(field), NAME(type)); 137 138 time = htobe32(time); 139 memcpy(rdata->octets, &time, sizeof(time)); 140 rdata->octets += sizeof(time); 141 return 0; 142} 143 144#endif // TIME_H 145