1//===-- StringRef.cpp - Lightweight String References ---------------------===//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7//===----------------------------------------------------------------------===//
20// strncasecmp() is not available on non-POSIX systems, so define an
21// alternative function here.
24 unsigned char LHC =
toLower(LC);
25 unsigned char RHC =
toLower(RC);
27 return LHC < RHC ? -1 : 1;
33 size_t Min = std::min(
size(), RHS.size());
36 if (
size() == RHS.size())
38 return size() < RHS.size() ? -1 : 1;
42 return size() >= Prefix.size() &&
56/// compare_numeric - Compare strings, handle embedded numbers.
58 for (
size_t I = 0, E = std::min(
size(), RHS.size());
I != E; ++
I) {
59 // Check for sequences of digits.
61 // The longer sequence of numbers is considered larger.
62 // This doesn't really handle prefixed zeros well.
64 for (J =
I + 1; J != E + 1; ++J) {
66 bool rd = J < RHS.size() &&
isDigit(RHS.data()[J]);
72 // The two number sequences have the same length (J-I), just memcmp them.
73 if (
int Res = compareMemory(
data() +
I, RHS.data() +
I, J -
I))
74 return Res < 0 ? -1 : 1;
75 // Identical number sequences, continue search after the numbers.
79 if (
data()[
I] != RHS.data()[
I])
80 return (
unsigned char)
data()[
I] < (
unsigned char)RHS.data()[
I] ? -1 : 1;
82 if (
size() == RHS.size())
84 return size() < RHS.size() ? -1 : 1;
87// Compute the edit distance between the two given strings.
89 bool AllowReplacements,
90 unsigned MaxEditDistance)
const {
93 AllowReplacements, MaxEditDistance);
97 StringRef Other,
bool AllowReplacements,
unsigned MaxEditDistance)
const {
103//===----------------------------------------------------------------------===//
105//===----------------------------------------------------------------------===//
117//===----------------------------------------------------------------------===//
119//===----------------------------------------------------------------------===//
122/// find - Search for the first string \arg Str in the string.
124/// \return - The index of the first occurrence of \arg Str, or npos if not
130 const char *Start =
data() + From;
133 const char *Needle = Str.data();
134 size_t N = Str.size();
140 const char *
Ptr = (
const char *)::memchr(Start, Needle[0],
Size);
144 const char *Stop = Start + (
Size -
N + 1);
147 // Provide a fast path for newline finding (CRLF case) in InclusionRewriter.
148 // Not the most optimized strategy, but getting memcmp inlined should be
151 if (std::memcmp(Start, Needle, 2) == 0)
152 return Start -
data();
154 }
while (Start < Stop);
158 // For short haystacks or unsupported needles fall back to the naive algorithm
161 if (std::memcmp(Start, Needle,
N) == 0)
162 return Start -
data();
164 }
while (Start < Stop);
168 // Build the bad char heuristic table, with uint8_t to reduce cache thrashing.
170 std::memset(BadCharSkip,
N, 256);
171 for (
unsigned i = 0; i !=
N-1; ++i)
172 BadCharSkip[(
uint8_t)Str[i]] =
N-1-i;
177 if (std::memcmp(Start, Needle,
N - 1) == 0)
178 return Start -
data();
180 // Otherwise skip the appropriate number of bytes.
181 Start += BadCharSkip[
Last];
182 }
while (Start < Stop);
189 while (This.size() >= Str.size()) {
190 if (This.starts_with_insensitive(Str))
192 This = This.drop_front();
199 From = std::min(From,
size());
209/// rfind - Search for the last string \arg Str in the string.
211/// \return - The index of the last occurrence of \arg Str, or npos if not
214 return std::string_view(*this).rfind(Str);
218 size_t N = Str.size();
221 for (
size_t i =
size() -
N + 1, e = 0; i != e;) {
229/// find_first_of - Find the first character in the string that is in \arg
230/// Chars, or npos if not found.
232/// Note: O(size() + Chars.size())
235 std::bitset<1 << CHAR_BIT> CharBits;
237 CharBits.set((
unsigned char)
C);
240 if (CharBits.test((
unsigned char)
data()[i]))
245/// find_first_not_of - Find the first character in the string that is not
246/// \arg C or npos if not found.
248 return std::string_view(*this).find_first_not_of(
C, From);
251/// find_first_not_of - Find the first character in the string that is not
252/// in the string \arg Chars, or npos if not found.
254/// Note: O(size() + Chars.size())
257 std::bitset<1 << CHAR_BIT> CharBits;
259 CharBits.set((
unsigned char)
C);
262 if (!CharBits.test((
unsigned char)
data()[i]))
267/// find_last_of - Find the last character in the string that is in \arg C,
268/// or npos if not found.
270/// Note: O(size() + Chars.size())
273 std::bitset<1 << CHAR_BIT> CharBits;
275 CharBits.set((
unsigned char)
C);
277 for (
size_type i = std::min(From,
size()) - 1, e = -1; i != e; --i)
278 if (CharBits.test((
unsigned char)
data()[i]))
283/// find_last_not_of - Find the last character in the string that is not
284/// \arg C, or npos if not found.
286 for (
size_type i = std::min(From,
size()) - 1, e = -1; i != e; --i)
292/// find_last_not_of - Find the last character in the string that is not in
293/// \arg Chars, or npos if not found.
295/// Note: O(size() + Chars.size())
298 std::bitset<1 << CHAR_BIT> CharBits;
300 CharBits.set((
unsigned char)
C);
302 for (
size_type i = std::min(From,
size()) - 1, e = -1; i != e; --i)
303 if (!CharBits.test((
unsigned char)
data()[i]))
310 bool KeepEmpty)
const {
313 // Count down from MaxSplit. When MaxSplit is -1, this will just split
314 // "forever". This doesn't support splitting more than 2^31 times
315 // intentionally; if we ever want that we can make MaxSplit a 64-bit integer
316 // but that seems unlikely to be useful.
317 while (MaxSplit-- != 0) {
318 size_t Idx = S.
find(Separator);
323 if (KeepEmpty || Idx > 0)
324 A.push_back(S.
slice(0, Idx));
331 if (KeepEmpty || !S.
empty())
336 int MaxSplit,
bool KeepEmpty)
const {
339 // Count down from MaxSplit. When MaxSplit is -1, this will just split
340 // "forever". This doesn't support splitting more than 2^31 times
341 // intentionally; if we ever want that we can make MaxSplit a 64-bit integer
342 // but that seems unlikely to be useful.
343 while (MaxSplit-- != 0) {
344 size_t Idx = S.
find(Separator);
349 if (KeepEmpty || Idx > 0)
350 A.push_back(S.
slice(0, Idx));
357 if (KeepEmpty || !S.
empty())
361//===----------------------------------------------------------------------===//
363//===----------------------------------------------------------------------===//
365/// count - Return the number of non-overlapped occurrences of \arg Str in
370 size_t N = Str.size();
371 // TODO: For an empty `Str` we return 0 for legacy reasons. Consider changing
372 // this to `Length + 1` which is more in-line with the function
376 while ((Pos =
find(Str, Pos)) !=
npos) {
387 if (Str.consume_front_insensitive(
"0x"))
390 if (Str.consume_front_insensitive(
"0b"))
393 if (Str.consume_front(
"0o"))
396 if (Str[0] ==
'0' && Str.size() > 1 &&
isDigit(Str[1])) {
405 unsigned long long &Result) {
406 // Autosense radix if not specified.
410 // Empty strings (after the radix autosense) are invalid.
411 if (Str.empty())
return true;
413 // Parse all the bytes of the string given this radix. Watch for overflow.
416 while (!Str2.
empty()) {
418 if (Str2[0] >=
'0' && Str2[0] <=
'9')
419 CharVal = Str2[0] -
'0';
420 else if (Str2[0] >=
'a' && Str2[0] <=
'z')
421 CharVal = Str2[0] -
'a' + 10;
422 else if (Str2[0] >=
'A' && Str2[0] <=
'Z')
423 CharVal = Str2[0] -
'A' + 10;
427 // If the parsed value is larger than the integer radix, we cannot
428 // consume any more characters.
429 if (CharVal >= Radix)
432 // Add in this character.
433 unsigned long long PrevResult = Result;
434 Result = Result * Radix + CharVal;
436 // Check for overflow by shifting back and seeing if bits were lost.
437 if (Result / Radix < PrevResult)
443 // We consider the operation a failure if no characters were consumed
445 if (Str.size() == Str2.
size())
454 unsigned long long ULLVal;
456 // Handle positive strings first.
457 if (!Str.starts_with(
"-")) {
459 // Check for value so large it overflows a signed value.
460 (
long long)ULLVal < 0)
466 // Get the positive part of the value.
469 // Reject values so large they'd overflow as negative signed, but allow
470 // "-0". This negates the unsigned so that the negative isn't undefined
471 // on signed overflow.
472 (
long long)-ULLVal > 0)
480/// GetAsUnsignedInteger - Workhorse method that converts a integer character
481/// sequence of radix up to 36 to an unsigned long long value.
483 unsigned long long &Result) {
487 // For getAsUnsignedInteger, we require the whole string to be consumed or
488 // else we consider it a failure.
497 // For getAsSignedInteger, we require the whole string to be consumed or else
498 // we consider it a failure.
505 // Autosense radix if not specified.
509 assert(Radix > 1 && Radix <= 36);
511 // Empty strings (after the radix autosense) are invalid.
512 if (Str.empty())
return true;
514 // Skip leading zeroes. This can be a significant improvement if
515 // it means we don't need > 64 bits.
516 Str = Str.ltrim(
'0');
518 // If it was nothing but zeroes....
520 Result =
APInt(64, 0);
525 // (Over-)estimate the required number of bits.
526 unsigned Log2Radix = 0;
527 while ((1U << Log2Radix) < Radix) Log2Radix++;
528 bool IsPowerOf2Radix = ((1U << Log2Radix) == Radix);
530 unsigned BitWidth = Log2Radix * Str.size();
531 if (
BitWidth < Result.getBitWidth())
532 BitWidth = Result.getBitWidth();
// don't shrink the result
533 else if (
BitWidth > Result.getBitWidth())
536 APInt RadixAP, CharAP;
// unused unless !IsPowerOf2Radix
537 if (!IsPowerOf2Radix) {
538 // These must have the same bit-width as Result.
543 // Parse all the bytes of the string given this radix.
545 while (!Str.empty()) {
547 if (Str[0] >=
'0' && Str[0] <=
'9')
548 CharVal = Str[0]-
'0';
549 else if (Str[0] >=
'a' && Str[0] <=
'z')
550 CharVal = Str[0]-
'a'+10;
551 else if (Str[0] >=
'A' && Str[0] <=
'Z')
552 CharVal = Str[0]-
'A'+10;
556 // If the parsed value is larger than the integer radix, the string is
558 if (CharVal >= Radix)
561 // Add in this character.
562 if (IsPowerOf2Radix) {
563 Result <<= Log2Radix;
574 // We consider the operation a failure if no characters were consumed
576 if (
size() == Str.size())
585 if (Str.consumeInteger(Radix, Result))
588 // For getAsInteger, we require the whole string to be consumed or else we
589 // consider it a failure.
605 Result =
F.convertToDouble();
609// Implementation of StringRef hashing.
614 "Cannot hash the empty key!");
616 "Cannot hash the tombstone key!");
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
This file declares a class to represent arbitrary precision floating point values and provide a varie...
This file implements a class to represent arbitrary precision integral constant values and operations...
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
#define LLVM_UNLIKELY(EXPR)
static int ascii_strncasecmp(StringRef LHS, StringRef RHS)
static constexpr roundingMode rmNearestTiesToEven
opStatus
IEEE-754R 7: Default exception handling.
Class for arbitrary precision integers.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
StringRef - Represent a constant reference to a string, i.e.
std::pair< StringRef, StringRef > split(char Separator) const
Split into two substrings around the first occurrence of a separator character.
LLVM_ABI size_t find_last_not_of(char C, size_t From=npos) const
Find the last character in the string that is not C, or npos if not found.
static constexpr size_t npos
bool consumeInteger(unsigned Radix, T &Result)
Parse the current string as an integer of the specified radix.
bool getAsInteger(unsigned Radix, T &Result) const
Parse the current string as an integer of the specified radix.
LLVM_ABI bool getAsDouble(double &Result, bool AllowInexact=true) const
Parse the current string as an IEEE double-precision floating point value.
size_t find_if(function_ref< bool(char)> F, size_t From=0) const
Search for the first character satisfying the predicate F.
constexpr StringRef substr(size_t Start, size_t N=npos) const
Return a reference to the substring from [Start, Start + N).
constexpr bool empty() const
empty - Check if the string is empty.
LLVM_ABI bool starts_with_insensitive(StringRef Prefix) const
Check if this string starts with the given Prefix, ignoring case.
StringRef drop_front(size_t N=1) const
Return a StringRef equal to 'this' but with the first N elements dropped.
LLVM_ABI std::string upper() const
Convert the given ASCII string to uppercase.
LLVM_ABI unsigned edit_distance(StringRef Other, bool AllowReplacements=true, unsigned MaxEditDistance=0) const
Determine the edit distance between this string and another string.
StringRef slice(size_t Start, size_t End) const
Return a reference to the substring from [Start, End).
constexpr size_t size() const
size - Get the string size.
size_t find_last_of(char C, size_t From=npos) const
Find the last character in the string that is C, or npos if not found.
constexpr const char * data() const
data - Get a pointer to the start of the string (which may not be null terminated).
LLVM_ABI int compare_numeric(StringRef RHS) const
compare_numeric - Compare two strings, treating sequences of digits as numbers.
size_t find_first_of(char C, size_t From=0) const
Find the first character in the string that is C, or npos if not found.
StringRef()=default
Construct an empty string ref.
size_t rfind(char C, size_t From=npos) const
Search for the last character C in the string.
StringRef take_back(size_t N=1) const
Return a StringRef equal to 'this' but with only the last N elements remaining.
StringRef take_front(size_t N=1) const
Return a StringRef equal to 'this' but with only the first N elements remaining.
size_t find(char C, size_t From=0) const
Search for the first character C in the string.
LLVM_ABI size_t rfind_insensitive(char C, size_t From=npos) const
Search for the last character C in the string, ignoring case.
LLVM_ABI std::string lower() const
LLVM_ABI size_t find_insensitive(char C, size_t From=0) const
Search for the first character C in the string, ignoring case.
size_t count(char C) const
Return the number of occurrences of C in the string.
bool equals_insensitive(StringRef RHS) const
Check for string equality, ignoring case.
LLVM_ABI bool ends_with_insensitive(StringRef Suffix) const
Check if this string ends with the given Suffix, ignoring case.
LLVM_ABI size_t find_first_not_of(char C, size_t From=0) const
Find the first character in the string that is not C or npos if not found.
LLVM_ABI int compare_insensitive(StringRef RHS) const
Compare two strings, ignoring case.
LLVM_ABI unsigned edit_distance_insensitive(StringRef Other, bool AllowReplacements=true, unsigned MaxEditDistance=0) const
An opaque object representing a hash code.
This file defines a Levenshtein distance function that works for any two sequences,...
@ C
The default llvm calling convention, compatible with C.
This is an optimization pass for GlobalISel generic memory operations.
bool errorToBool(Error Err)
Helper for converting an Error to a bool.
char toLower(char x)
Returns the corresponding lowercase character if x is uppercase.
LLVM_ABI bool getAsSignedInteger(StringRef Str, unsigned Radix, long long &Result)
hash_code hash_value(const FixedPointSemantics &Val)
LLVM_ABI unsigned getAutoSenseRadix(StringRef &Str)
detail::zippy< detail::zip_first, T, U, Args... > zip_equal(T &&t, U &&u, Args &&...args)
zip iterator that assumes that all iteratees have the same length.
mapped_iterator< ItTy, FuncTy > map_iterator(ItTy I, FuncTy F)
LLVM_ABI bool consumeUnsignedInteger(StringRef &Str, unsigned Radix, unsigned long long &Result)
unsigned ComputeEditDistance(ArrayRef< T > FromArray, ArrayRef< T > ToArray, bool AllowReplacements=true, unsigned MaxEditDistance=0)
bool isDigit(char C)
Checks if character C is one of the 10 decimal digits.
FunctionAddr VTableAddr Count
char toUpper(char x)
Returns the corresponding uppercase character if x is lowercase.
LLVM_ABI bool consumeSignedInteger(StringRef &Str, unsigned Radix, long long &Result)
ArrayRef(const T &OneElt) -> ArrayRef< T >
constexpr unsigned BitWidth
LLVM_ABI bool getAsUnsignedInteger(StringRef Str, unsigned Radix, unsigned long long &Result)
Helper functions for StringRef::getAsInteger.
unsigned ComputeMappedEditDistance(ArrayRef< T > FromArray, ArrayRef< T > ToArray, Functor Map, bool AllowReplacements=true, unsigned MaxEditDistance=0)
Determine the edit distance between two sequences.
hash_code hash_combine_range(InputIteratorT first, InputIteratorT last)
Compute a hash_code for a sequence of values.
An information struct used to provide DenseMap with the various necessary components for a given valu...