xref: /freebsd-src/contrib/llvm-project/llvm/lib/Support/StringRef.cpp (revision 0fca6ea1d4eea4c934cfff25ac9ee8ad6fe95583)
10b57cec5SDimitry Andric //===-- StringRef.cpp - Lightweight String References ---------------------===//
20b57cec5SDimitry Andric //
30b57cec5SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
40b57cec5SDimitry Andric // See https://llvm.org/LICENSE.txt for license information.
50b57cec5SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
60b57cec5SDimitry Andric //
70b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
80b57cec5SDimitry Andric 
90b57cec5SDimitry Andric #include "llvm/ADT/StringRef.h"
100b57cec5SDimitry Andric #include "llvm/ADT/APFloat.h"
110b57cec5SDimitry Andric #include "llvm/ADT/APInt.h"
120b57cec5SDimitry Andric #include "llvm/ADT/Hashing.h"
130b57cec5SDimitry Andric #include "llvm/ADT/StringExtras.h"
140b57cec5SDimitry Andric #include "llvm/ADT/edit_distance.h"
15480093f4SDimitry Andric #include "llvm/Support/Error.h"
160b57cec5SDimitry Andric #include <bitset>
170b57cec5SDimitry Andric 
180b57cec5SDimitry Andric using namespace llvm;
190b57cec5SDimitry Andric 
200b57cec5SDimitry Andric // MSVC emits references to this into the translation units which reference it.
210b57cec5SDimitry Andric #ifndef _MSC_VER
225ffd83dbSDimitry Andric constexpr size_t StringRef::npos;
230b57cec5SDimitry Andric #endif
240b57cec5SDimitry Andric 
250b57cec5SDimitry Andric // strncasecmp() is not available on non-POSIX systems, so define an
260b57cec5SDimitry Andric // alternative function here.
270b57cec5SDimitry Andric static int ascii_strncasecmp(const char *LHS, const char *RHS, size_t Length) {
280b57cec5SDimitry Andric   for (size_t I = 0; I < Length; ++I) {
290b57cec5SDimitry Andric     unsigned char LHC = toLower(LHS[I]);
300b57cec5SDimitry Andric     unsigned char RHC = toLower(RHS[I]);
310b57cec5SDimitry Andric     if (LHC != RHC)
320b57cec5SDimitry Andric       return LHC < RHC ? -1 : 1;
330b57cec5SDimitry Andric   }
340b57cec5SDimitry Andric   return 0;
350b57cec5SDimitry Andric }
360b57cec5SDimitry Andric 
37fe6060f1SDimitry Andric int StringRef::compare_insensitive(StringRef RHS) const {
380b57cec5SDimitry Andric   if (int Res = ascii_strncasecmp(Data, RHS.Data, std::min(Length, RHS.Length)))
390b57cec5SDimitry Andric     return Res;
400b57cec5SDimitry Andric   if (Length == RHS.Length)
410b57cec5SDimitry Andric     return 0;
420b57cec5SDimitry Andric   return Length < RHS.Length ? -1 : 1;
430b57cec5SDimitry Andric }
440b57cec5SDimitry Andric 
45bdd1243dSDimitry Andric bool StringRef::starts_with_insensitive(StringRef Prefix) const {
460b57cec5SDimitry Andric   return Length >= Prefix.Length &&
470b57cec5SDimitry Andric       ascii_strncasecmp(Data, Prefix.Data, Prefix.Length) == 0;
480b57cec5SDimitry Andric }
490b57cec5SDimitry Andric 
50bdd1243dSDimitry Andric bool StringRef::ends_with_insensitive(StringRef Suffix) const {
510b57cec5SDimitry Andric   return Length >= Suffix.Length &&
520b57cec5SDimitry Andric       ascii_strncasecmp(end() - Suffix.Length, Suffix.Data, Suffix.Length) == 0;
530b57cec5SDimitry Andric }
540b57cec5SDimitry Andric 
55fe6060f1SDimitry Andric size_t StringRef::find_insensitive(char C, size_t From) const {
560b57cec5SDimitry Andric   char L = toLower(C);
570b57cec5SDimitry Andric   return find_if([L](char D) { return toLower(D) == L; }, From);
580b57cec5SDimitry Andric }
590b57cec5SDimitry Andric 
600b57cec5SDimitry Andric /// compare_numeric - Compare strings, handle embedded numbers.
610b57cec5SDimitry Andric int StringRef::compare_numeric(StringRef RHS) const {
620b57cec5SDimitry Andric   for (size_t I = 0, E = std::min(Length, RHS.Length); I != E; ++I) {
630b57cec5SDimitry Andric     // Check for sequences of digits.
640b57cec5SDimitry Andric     if (isDigit(Data[I]) && isDigit(RHS.Data[I])) {
650b57cec5SDimitry Andric       // The longer sequence of numbers is considered larger.
660b57cec5SDimitry Andric       // This doesn't really handle prefixed zeros well.
670b57cec5SDimitry Andric       size_t J;
680b57cec5SDimitry Andric       for (J = I + 1; J != E + 1; ++J) {
690b57cec5SDimitry Andric         bool ld = J < Length && isDigit(Data[J]);
700b57cec5SDimitry Andric         bool rd = J < RHS.Length && isDigit(RHS.Data[J]);
710b57cec5SDimitry Andric         if (ld != rd)
720b57cec5SDimitry Andric           return rd ? -1 : 1;
730b57cec5SDimitry Andric         if (!rd)
740b57cec5SDimitry Andric           break;
750b57cec5SDimitry Andric       }
760b57cec5SDimitry Andric       // The two number sequences have the same length (J-I), just memcmp them.
770b57cec5SDimitry Andric       if (int Res = compareMemory(Data + I, RHS.Data + I, J - I))
780b57cec5SDimitry Andric         return Res < 0 ? -1 : 1;
790b57cec5SDimitry Andric       // Identical number sequences, continue search after the numbers.
800b57cec5SDimitry Andric       I = J - 1;
810b57cec5SDimitry Andric       continue;
820b57cec5SDimitry Andric     }
830b57cec5SDimitry Andric     if (Data[I] != RHS.Data[I])
840b57cec5SDimitry Andric       return (unsigned char)Data[I] < (unsigned char)RHS.Data[I] ? -1 : 1;
850b57cec5SDimitry Andric   }
860b57cec5SDimitry Andric   if (Length == RHS.Length)
870b57cec5SDimitry Andric     return 0;
880b57cec5SDimitry Andric   return Length < RHS.Length ? -1 : 1;
890b57cec5SDimitry Andric }
900b57cec5SDimitry Andric 
910b57cec5SDimitry Andric // Compute the edit distance between the two given strings.
920b57cec5SDimitry Andric unsigned StringRef::edit_distance(llvm::StringRef Other,
930b57cec5SDimitry Andric                                   bool AllowReplacements,
940b57cec5SDimitry Andric                                   unsigned MaxEditDistance) const {
95bdd1243dSDimitry Andric   return llvm::ComputeEditDistance(ArrayRef(data(), size()),
96bdd1243dSDimitry Andric                                    ArrayRef(Other.data(), Other.size()),
970b57cec5SDimitry Andric                                    AllowReplacements, MaxEditDistance);
980b57cec5SDimitry Andric }
990b57cec5SDimitry Andric 
10081ad6265SDimitry Andric unsigned llvm::StringRef::edit_distance_insensitive(
10181ad6265SDimitry Andric     StringRef Other, bool AllowReplacements, unsigned MaxEditDistance) const {
10281ad6265SDimitry Andric   return llvm::ComputeMappedEditDistance(
103bdd1243dSDimitry Andric       ArrayRef(data(), size()), ArrayRef(Other.data(), Other.size()),
10481ad6265SDimitry Andric       llvm::toLower, AllowReplacements, MaxEditDistance);
10581ad6265SDimitry Andric }
10681ad6265SDimitry Andric 
1070b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
1080b57cec5SDimitry Andric // String Operations
1090b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
1100b57cec5SDimitry Andric 
1110b57cec5SDimitry Andric std::string StringRef::lower() const {
1125ffd83dbSDimitry Andric   return std::string(map_iterator(begin(), toLower),
1135ffd83dbSDimitry Andric                      map_iterator(end(), toLower));
1140b57cec5SDimitry Andric }
1150b57cec5SDimitry Andric 
1160b57cec5SDimitry Andric std::string StringRef::upper() const {
1175ffd83dbSDimitry Andric   return std::string(map_iterator(begin(), toUpper),
1185ffd83dbSDimitry Andric                      map_iterator(end(), toUpper));
1190b57cec5SDimitry Andric }
1200b57cec5SDimitry Andric 
1210b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
1220b57cec5SDimitry Andric // String Searching
1230b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
1240b57cec5SDimitry Andric 
1250b57cec5SDimitry Andric 
1260b57cec5SDimitry Andric /// find - Search for the first string \arg Str in the string.
1270b57cec5SDimitry Andric ///
1280b57cec5SDimitry Andric /// \return - The index of the first occurrence of \arg Str, or npos if not
1290b57cec5SDimitry Andric /// found.
1300b57cec5SDimitry Andric size_t StringRef::find(StringRef Str, size_t From) const {
1310b57cec5SDimitry Andric   if (From > Length)
1320b57cec5SDimitry Andric     return npos;
1330b57cec5SDimitry Andric 
1340b57cec5SDimitry Andric   const char *Start = Data + From;
1350b57cec5SDimitry Andric   size_t Size = Length - From;
1360b57cec5SDimitry Andric 
1370b57cec5SDimitry Andric   const char *Needle = Str.data();
1380b57cec5SDimitry Andric   size_t N = Str.size();
1390b57cec5SDimitry Andric   if (N == 0)
1400b57cec5SDimitry Andric     return From;
1410b57cec5SDimitry Andric   if (Size < N)
1420b57cec5SDimitry Andric     return npos;
1430b57cec5SDimitry Andric   if (N == 1) {
1440b57cec5SDimitry Andric     const char *Ptr = (const char *)::memchr(Start, Needle[0], Size);
1450b57cec5SDimitry Andric     return Ptr == nullptr ? npos : Ptr - Data;
1460b57cec5SDimitry Andric   }
1470b57cec5SDimitry Andric 
1480b57cec5SDimitry Andric   const char *Stop = Start + (Size - N + 1);
1490b57cec5SDimitry Andric 
150bdd1243dSDimitry Andric   if (N == 2) {
151bdd1243dSDimitry Andric     // Provide a fast path for newline finding (CRLF case) in InclusionRewriter.
152bdd1243dSDimitry Andric     // Not the most optimized strategy, but getting memcmp inlined should be
153bdd1243dSDimitry Andric     // good enough.
154bdd1243dSDimitry Andric     do {
155bdd1243dSDimitry Andric       if (std::memcmp(Start, Needle, 2) == 0)
156bdd1243dSDimitry Andric         return Start - Data;
157bdd1243dSDimitry Andric       ++Start;
158bdd1243dSDimitry Andric     } while (Start < Stop);
159bdd1243dSDimitry Andric     return npos;
160bdd1243dSDimitry Andric   }
161bdd1243dSDimitry Andric 
1620b57cec5SDimitry Andric   // For short haystacks or unsupported needles fall back to the naive algorithm
1630b57cec5SDimitry Andric   if (Size < 16 || N > 255) {
1640b57cec5SDimitry Andric     do {
1650b57cec5SDimitry Andric       if (std::memcmp(Start, Needle, N) == 0)
1660b57cec5SDimitry Andric         return Start - Data;
1670b57cec5SDimitry Andric       ++Start;
1680b57cec5SDimitry Andric     } while (Start < Stop);
1690b57cec5SDimitry Andric     return npos;
1700b57cec5SDimitry Andric   }
1710b57cec5SDimitry Andric 
1720b57cec5SDimitry Andric   // Build the bad char heuristic table, with uint8_t to reduce cache thrashing.
1730b57cec5SDimitry Andric   uint8_t BadCharSkip[256];
1740b57cec5SDimitry Andric   std::memset(BadCharSkip, N, 256);
1750b57cec5SDimitry Andric   for (unsigned i = 0; i != N-1; ++i)
1760b57cec5SDimitry Andric     BadCharSkip[(uint8_t)Str[i]] = N-1-i;
1770b57cec5SDimitry Andric 
1780b57cec5SDimitry Andric   do {
1790b57cec5SDimitry Andric     uint8_t Last = Start[N - 1];
1800b57cec5SDimitry Andric     if (LLVM_UNLIKELY(Last == (uint8_t)Needle[N - 1]))
1810b57cec5SDimitry Andric       if (std::memcmp(Start, Needle, N - 1) == 0)
1820b57cec5SDimitry Andric         return Start - Data;
1830b57cec5SDimitry Andric 
1840b57cec5SDimitry Andric     // Otherwise skip the appropriate number of bytes.
1850b57cec5SDimitry Andric     Start += BadCharSkip[Last];
1860b57cec5SDimitry Andric   } while (Start < Stop);
1870b57cec5SDimitry Andric 
1880b57cec5SDimitry Andric   return npos;
1890b57cec5SDimitry Andric }
1900b57cec5SDimitry Andric 
191fe6060f1SDimitry Andric size_t StringRef::find_insensitive(StringRef Str, size_t From) const {
1920b57cec5SDimitry Andric   StringRef This = substr(From);
1930b57cec5SDimitry Andric   while (This.size() >= Str.size()) {
19406c3fb27SDimitry Andric     if (This.starts_with_insensitive(Str))
1950b57cec5SDimitry Andric       return From;
1960b57cec5SDimitry Andric     This = This.drop_front();
1970b57cec5SDimitry Andric     ++From;
1980b57cec5SDimitry Andric   }
1990b57cec5SDimitry Andric   return npos;
2000b57cec5SDimitry Andric }
2010b57cec5SDimitry Andric 
202fe6060f1SDimitry Andric size_t StringRef::rfind_insensitive(char C, size_t From) const {
2030b57cec5SDimitry Andric   From = std::min(From, Length);
2040b57cec5SDimitry Andric   size_t i = From;
2050b57cec5SDimitry Andric   while (i != 0) {
2060b57cec5SDimitry Andric     --i;
2070b57cec5SDimitry Andric     if (toLower(Data[i]) == toLower(C))
2080b57cec5SDimitry Andric       return i;
2090b57cec5SDimitry Andric   }
2100b57cec5SDimitry Andric   return npos;
2110b57cec5SDimitry Andric }
2120b57cec5SDimitry Andric 
2130b57cec5SDimitry Andric /// rfind - Search for the last string \arg Str in the string.
2140b57cec5SDimitry Andric ///
2150b57cec5SDimitry Andric /// \return - The index of the last occurrence of \arg Str, or npos if not
2160b57cec5SDimitry Andric /// found.
2170b57cec5SDimitry Andric size_t StringRef::rfind(StringRef Str) const {
218bdd1243dSDimitry Andric   return std::string_view(*this).rfind(Str);
2190b57cec5SDimitry Andric }
2200b57cec5SDimitry Andric 
221fe6060f1SDimitry Andric size_t StringRef::rfind_insensitive(StringRef Str) const {
2220b57cec5SDimitry Andric   size_t N = Str.size();
2230b57cec5SDimitry Andric   if (N > Length)
2240b57cec5SDimitry Andric     return npos;
2250b57cec5SDimitry Andric   for (size_t i = Length - N + 1, e = 0; i != e;) {
2260b57cec5SDimitry Andric     --i;
227fe6060f1SDimitry Andric     if (substr(i, N).equals_insensitive(Str))
2280b57cec5SDimitry Andric       return i;
2290b57cec5SDimitry Andric   }
2300b57cec5SDimitry Andric   return npos;
2310b57cec5SDimitry Andric }
2320b57cec5SDimitry Andric 
2330b57cec5SDimitry Andric /// find_first_of - Find the first character in the string that is in \arg
2340b57cec5SDimitry Andric /// Chars, or npos if not found.
2350b57cec5SDimitry Andric ///
2360b57cec5SDimitry Andric /// Note: O(size() + Chars.size())
2370b57cec5SDimitry Andric StringRef::size_type StringRef::find_first_of(StringRef Chars,
2380b57cec5SDimitry Andric                                               size_t From) const {
2390b57cec5SDimitry Andric   std::bitset<1 << CHAR_BIT> CharBits;
2404824e7fdSDimitry Andric   for (char C : Chars)
2414824e7fdSDimitry Andric     CharBits.set((unsigned char)C);
2420b57cec5SDimitry Andric 
2430b57cec5SDimitry Andric   for (size_type i = std::min(From, Length), e = Length; i != e; ++i)
2440b57cec5SDimitry Andric     if (CharBits.test((unsigned char)Data[i]))
2450b57cec5SDimitry Andric       return i;
2460b57cec5SDimitry Andric   return npos;
2470b57cec5SDimitry Andric }
2480b57cec5SDimitry Andric 
2490b57cec5SDimitry Andric /// find_first_not_of - Find the first character in the string that is not
2500b57cec5SDimitry Andric /// \arg C or npos if not found.
2510b57cec5SDimitry Andric StringRef::size_type StringRef::find_first_not_of(char C, size_t From) const {
252bdd1243dSDimitry Andric   return std::string_view(*this).find_first_not_of(C, From);
2530b57cec5SDimitry Andric }
2540b57cec5SDimitry Andric 
2550b57cec5SDimitry Andric /// find_first_not_of - Find the first character in the string that is not
2560b57cec5SDimitry Andric /// in the string \arg Chars, or npos if not found.
2570b57cec5SDimitry Andric ///
2580b57cec5SDimitry Andric /// Note: O(size() + Chars.size())
2590b57cec5SDimitry Andric StringRef::size_type StringRef::find_first_not_of(StringRef Chars,
2600b57cec5SDimitry Andric                                                   size_t From) const {
2610b57cec5SDimitry Andric   std::bitset<1 << CHAR_BIT> CharBits;
2624824e7fdSDimitry Andric   for (char C : Chars)
2634824e7fdSDimitry Andric     CharBits.set((unsigned char)C);
2640b57cec5SDimitry Andric 
2650b57cec5SDimitry Andric   for (size_type i = std::min(From, Length), e = Length; i != e; ++i)
2660b57cec5SDimitry Andric     if (!CharBits.test((unsigned char)Data[i]))
2670b57cec5SDimitry Andric       return i;
2680b57cec5SDimitry Andric   return npos;
2690b57cec5SDimitry Andric }
2700b57cec5SDimitry Andric 
2710b57cec5SDimitry Andric /// find_last_of - Find the last character in the string that is in \arg C,
2720b57cec5SDimitry Andric /// or npos if not found.
2730b57cec5SDimitry Andric ///
2740b57cec5SDimitry Andric /// Note: O(size() + Chars.size())
2750b57cec5SDimitry Andric StringRef::size_type StringRef::find_last_of(StringRef Chars,
2760b57cec5SDimitry Andric                                              size_t From) const {
2770b57cec5SDimitry Andric   std::bitset<1 << CHAR_BIT> CharBits;
2784824e7fdSDimitry Andric   for (char C : Chars)
2794824e7fdSDimitry Andric     CharBits.set((unsigned char)C);
2800b57cec5SDimitry Andric 
2810b57cec5SDimitry Andric   for (size_type i = std::min(From, Length) - 1, e = -1; i != e; --i)
2820b57cec5SDimitry Andric     if (CharBits.test((unsigned char)Data[i]))
2830b57cec5SDimitry Andric       return i;
2840b57cec5SDimitry Andric   return npos;
2850b57cec5SDimitry Andric }
2860b57cec5SDimitry Andric 
2870b57cec5SDimitry Andric /// find_last_not_of - Find the last character in the string that is not
2880b57cec5SDimitry Andric /// \arg C, or npos if not found.
2890b57cec5SDimitry Andric StringRef::size_type StringRef::find_last_not_of(char C, size_t From) const {
2900b57cec5SDimitry Andric   for (size_type i = std::min(From, Length) - 1, e = -1; i != e; --i)
2910b57cec5SDimitry Andric     if (Data[i] != C)
2920b57cec5SDimitry Andric       return i;
2930b57cec5SDimitry Andric   return npos;
2940b57cec5SDimitry Andric }
2950b57cec5SDimitry Andric 
2960b57cec5SDimitry Andric /// find_last_not_of - Find the last character in the string that is not in
2970b57cec5SDimitry Andric /// \arg Chars, or npos if not found.
2980b57cec5SDimitry Andric ///
2990b57cec5SDimitry Andric /// Note: O(size() + Chars.size())
3000b57cec5SDimitry Andric StringRef::size_type StringRef::find_last_not_of(StringRef Chars,
3010b57cec5SDimitry Andric                                                  size_t From) const {
3020b57cec5SDimitry Andric   std::bitset<1 << CHAR_BIT> CharBits;
3034824e7fdSDimitry Andric   for (char C : Chars)
3044824e7fdSDimitry Andric     CharBits.set((unsigned char)C);
3050b57cec5SDimitry Andric 
3060b57cec5SDimitry Andric   for (size_type i = std::min(From, Length) - 1, e = -1; i != e; --i)
3070b57cec5SDimitry Andric     if (!CharBits.test((unsigned char)Data[i]))
3080b57cec5SDimitry Andric       return i;
3090b57cec5SDimitry Andric   return npos;
3100b57cec5SDimitry Andric }
3110b57cec5SDimitry Andric 
3120b57cec5SDimitry Andric void StringRef::split(SmallVectorImpl<StringRef> &A,
3130b57cec5SDimitry Andric                       StringRef Separator, int MaxSplit,
3140b57cec5SDimitry Andric                       bool KeepEmpty) const {
3150b57cec5SDimitry Andric   StringRef S = *this;
3160b57cec5SDimitry Andric 
3170b57cec5SDimitry Andric   // Count down from MaxSplit. When MaxSplit is -1, this will just split
3180b57cec5SDimitry Andric   // "forever". This doesn't support splitting more than 2^31 times
3190b57cec5SDimitry Andric   // intentionally; if we ever want that we can make MaxSplit a 64-bit integer
3200b57cec5SDimitry Andric   // but that seems unlikely to be useful.
3210b57cec5SDimitry Andric   while (MaxSplit-- != 0) {
3220b57cec5SDimitry Andric     size_t Idx = S.find(Separator);
3230b57cec5SDimitry Andric     if (Idx == npos)
3240b57cec5SDimitry Andric       break;
3250b57cec5SDimitry Andric 
3260b57cec5SDimitry Andric     // Push this split.
3270b57cec5SDimitry Andric     if (KeepEmpty || Idx > 0)
3280b57cec5SDimitry Andric       A.push_back(S.slice(0, Idx));
3290b57cec5SDimitry Andric 
3300b57cec5SDimitry Andric     // Jump forward.
3310b57cec5SDimitry Andric     S = S.slice(Idx + Separator.size(), npos);
3320b57cec5SDimitry Andric   }
3330b57cec5SDimitry Andric 
3340b57cec5SDimitry Andric   // Push the tail.
3350b57cec5SDimitry Andric   if (KeepEmpty || !S.empty())
3360b57cec5SDimitry Andric     A.push_back(S);
3370b57cec5SDimitry Andric }
3380b57cec5SDimitry Andric 
3390b57cec5SDimitry Andric void StringRef::split(SmallVectorImpl<StringRef> &A, char Separator,
3400b57cec5SDimitry Andric                       int MaxSplit, bool KeepEmpty) const {
3410b57cec5SDimitry Andric   StringRef S = *this;
3420b57cec5SDimitry Andric 
3430b57cec5SDimitry Andric   // Count down from MaxSplit. When MaxSplit is -1, this will just split
3440b57cec5SDimitry Andric   // "forever". This doesn't support splitting more than 2^31 times
3450b57cec5SDimitry Andric   // intentionally; if we ever want that we can make MaxSplit a 64-bit integer
3460b57cec5SDimitry Andric   // but that seems unlikely to be useful.
3470b57cec5SDimitry Andric   while (MaxSplit-- != 0) {
3480b57cec5SDimitry Andric     size_t Idx = S.find(Separator);
3490b57cec5SDimitry Andric     if (Idx == npos)
3500b57cec5SDimitry Andric       break;
3510b57cec5SDimitry Andric 
3520b57cec5SDimitry Andric     // Push this split.
3530b57cec5SDimitry Andric     if (KeepEmpty || Idx > 0)
3540b57cec5SDimitry Andric       A.push_back(S.slice(0, Idx));
3550b57cec5SDimitry Andric 
3560b57cec5SDimitry Andric     // Jump forward.
3570b57cec5SDimitry Andric     S = S.slice(Idx + 1, npos);
3580b57cec5SDimitry Andric   }
3590b57cec5SDimitry Andric 
3600b57cec5SDimitry Andric   // Push the tail.
3610b57cec5SDimitry Andric   if (KeepEmpty || !S.empty())
3620b57cec5SDimitry Andric     A.push_back(S);
3630b57cec5SDimitry Andric }
3640b57cec5SDimitry Andric 
3650b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
3660b57cec5SDimitry Andric // Helpful Algorithms
3670b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
3680b57cec5SDimitry Andric 
3690b57cec5SDimitry Andric /// count - Return the number of non-overlapped occurrences of \arg Str in
3700b57cec5SDimitry Andric /// the string.
3710b57cec5SDimitry Andric size_t StringRef::count(StringRef Str) const {
3720b57cec5SDimitry Andric   size_t Count = 0;
373bdd1243dSDimitry Andric   size_t Pos = 0;
3740b57cec5SDimitry Andric   size_t N = Str.size();
375bdd1243dSDimitry Andric   // TODO: For an empty `Str` we return 0 for legacy reasons. Consider changing
376bdd1243dSDimitry Andric   //       this to `Length + 1` which is more in-line with the function
377bdd1243dSDimitry Andric   //       description.
378bdd1243dSDimitry Andric   if (!N)
3790b57cec5SDimitry Andric     return 0;
380bdd1243dSDimitry Andric   while ((Pos = find(Str, Pos)) != npos) {
3810b57cec5SDimitry Andric     ++Count;
382bdd1243dSDimitry Andric     Pos += N;
383480093f4SDimitry Andric   }
3840b57cec5SDimitry Andric   return Count;
3850b57cec5SDimitry Andric }
3860b57cec5SDimitry Andric 
3870b57cec5SDimitry Andric static unsigned GetAutoSenseRadix(StringRef &Str) {
3880b57cec5SDimitry Andric   if (Str.empty())
3890b57cec5SDimitry Andric     return 10;
3900b57cec5SDimitry Andric 
3917a6dacacSDimitry Andric   if (Str.consume_front_insensitive("0x"))
3920b57cec5SDimitry Andric     return 16;
3930b57cec5SDimitry Andric 
3947a6dacacSDimitry Andric   if (Str.consume_front_insensitive("0b"))
3950b57cec5SDimitry Andric     return 2;
3960b57cec5SDimitry Andric 
3977a6dacacSDimitry Andric   if (Str.consume_front("0o"))
3980b57cec5SDimitry Andric     return 8;
3990b57cec5SDimitry Andric 
4000b57cec5SDimitry Andric   if (Str[0] == '0' && Str.size() > 1 && isDigit(Str[1])) {
4010b57cec5SDimitry Andric     Str = Str.substr(1);
4020b57cec5SDimitry Andric     return 8;
4030b57cec5SDimitry Andric   }
4040b57cec5SDimitry Andric 
4050b57cec5SDimitry Andric   return 10;
4060b57cec5SDimitry Andric }
4070b57cec5SDimitry Andric 
4080b57cec5SDimitry Andric bool llvm::consumeUnsignedInteger(StringRef &Str, unsigned Radix,
4090b57cec5SDimitry Andric                                   unsigned long long &Result) {
4100b57cec5SDimitry Andric   // Autosense radix if not specified.
4110b57cec5SDimitry Andric   if (Radix == 0)
4120b57cec5SDimitry Andric     Radix = GetAutoSenseRadix(Str);
4130b57cec5SDimitry Andric 
4140b57cec5SDimitry Andric   // Empty strings (after the radix autosense) are invalid.
4150b57cec5SDimitry Andric   if (Str.empty()) return true;
4160b57cec5SDimitry Andric 
4170b57cec5SDimitry Andric   // Parse all the bytes of the string given this radix.  Watch for overflow.
4180b57cec5SDimitry Andric   StringRef Str2 = Str;
4190b57cec5SDimitry Andric   Result = 0;
4200b57cec5SDimitry Andric   while (!Str2.empty()) {
4210b57cec5SDimitry Andric     unsigned CharVal;
4220b57cec5SDimitry Andric     if (Str2[0] >= '0' && Str2[0] <= '9')
4230b57cec5SDimitry Andric       CharVal = Str2[0] - '0';
4240b57cec5SDimitry Andric     else if (Str2[0] >= 'a' && Str2[0] <= 'z')
4250b57cec5SDimitry Andric       CharVal = Str2[0] - 'a' + 10;
4260b57cec5SDimitry Andric     else if (Str2[0] >= 'A' && Str2[0] <= 'Z')
4270b57cec5SDimitry Andric       CharVal = Str2[0] - 'A' + 10;
4280b57cec5SDimitry Andric     else
4290b57cec5SDimitry Andric       break;
4300b57cec5SDimitry Andric 
4310b57cec5SDimitry Andric     // If the parsed value is larger than the integer radix, we cannot
4320b57cec5SDimitry Andric     // consume any more characters.
4330b57cec5SDimitry Andric     if (CharVal >= Radix)
4340b57cec5SDimitry Andric       break;
4350b57cec5SDimitry Andric 
4360b57cec5SDimitry Andric     // Add in this character.
4370b57cec5SDimitry Andric     unsigned long long PrevResult = Result;
4380b57cec5SDimitry Andric     Result = Result * Radix + CharVal;
4390b57cec5SDimitry Andric 
4400b57cec5SDimitry Andric     // Check for overflow by shifting back and seeing if bits were lost.
4410b57cec5SDimitry Andric     if (Result / Radix < PrevResult)
4420b57cec5SDimitry Andric       return true;
4430b57cec5SDimitry Andric 
4440b57cec5SDimitry Andric     Str2 = Str2.substr(1);
4450b57cec5SDimitry Andric   }
4460b57cec5SDimitry Andric 
4470b57cec5SDimitry Andric   // We consider the operation a failure if no characters were consumed
4480b57cec5SDimitry Andric   // successfully.
4490b57cec5SDimitry Andric   if (Str.size() == Str2.size())
4500b57cec5SDimitry Andric     return true;
4510b57cec5SDimitry Andric 
4520b57cec5SDimitry Andric   Str = Str2;
4530b57cec5SDimitry Andric   return false;
4540b57cec5SDimitry Andric }
4550b57cec5SDimitry Andric 
4560b57cec5SDimitry Andric bool llvm::consumeSignedInteger(StringRef &Str, unsigned Radix,
4570b57cec5SDimitry Andric                                 long long &Result) {
4580b57cec5SDimitry Andric   unsigned long long ULLVal;
4590b57cec5SDimitry Andric 
4600b57cec5SDimitry Andric   // Handle positive strings first.
461*0fca6ea1SDimitry Andric   if (!Str.starts_with("-")) {
4620b57cec5SDimitry Andric     if (consumeUnsignedInteger(Str, Radix, ULLVal) ||
4630b57cec5SDimitry Andric         // Check for value so large it overflows a signed value.
4640b57cec5SDimitry Andric         (long long)ULLVal < 0)
4650b57cec5SDimitry Andric       return true;
4660b57cec5SDimitry Andric     Result = ULLVal;
4670b57cec5SDimitry Andric     return false;
4680b57cec5SDimitry Andric   }
4690b57cec5SDimitry Andric 
4700b57cec5SDimitry Andric   // Get the positive part of the value.
4710b57cec5SDimitry Andric   StringRef Str2 = Str.drop_front(1);
4720b57cec5SDimitry Andric   if (consumeUnsignedInteger(Str2, Radix, ULLVal) ||
4730b57cec5SDimitry Andric       // Reject values so large they'd overflow as negative signed, but allow
4740b57cec5SDimitry Andric       // "-0".  This negates the unsigned so that the negative isn't undefined
4750b57cec5SDimitry Andric       // on signed overflow.
4760b57cec5SDimitry Andric       (long long)-ULLVal > 0)
4770b57cec5SDimitry Andric     return true;
4780b57cec5SDimitry Andric 
4790b57cec5SDimitry Andric   Str = Str2;
4800b57cec5SDimitry Andric   Result = -ULLVal;
4810b57cec5SDimitry Andric   return false;
4820b57cec5SDimitry Andric }
4830b57cec5SDimitry Andric 
4840b57cec5SDimitry Andric /// GetAsUnsignedInteger - Workhorse method that converts a integer character
4850b57cec5SDimitry Andric /// sequence of radix up to 36 to an unsigned long long value.
4860b57cec5SDimitry Andric bool llvm::getAsUnsignedInteger(StringRef Str, unsigned Radix,
4870b57cec5SDimitry Andric                                 unsigned long long &Result) {
4880b57cec5SDimitry Andric   if (consumeUnsignedInteger(Str, Radix, Result))
4890b57cec5SDimitry Andric     return true;
4900b57cec5SDimitry Andric 
4910b57cec5SDimitry Andric   // For getAsUnsignedInteger, we require the whole string to be consumed or
4920b57cec5SDimitry Andric   // else we consider it a failure.
4930b57cec5SDimitry Andric   return !Str.empty();
4940b57cec5SDimitry Andric }
4950b57cec5SDimitry Andric 
4960b57cec5SDimitry Andric bool llvm::getAsSignedInteger(StringRef Str, unsigned Radix,
4970b57cec5SDimitry Andric                               long long &Result) {
4980b57cec5SDimitry Andric   if (consumeSignedInteger(Str, Radix, Result))
4990b57cec5SDimitry Andric     return true;
5000b57cec5SDimitry Andric 
5010b57cec5SDimitry Andric   // For getAsSignedInteger, we require the whole string to be consumed or else
5020b57cec5SDimitry Andric   // we consider it a failure.
5030b57cec5SDimitry Andric   return !Str.empty();
5040b57cec5SDimitry Andric }
5050b57cec5SDimitry Andric 
50606c3fb27SDimitry Andric bool StringRef::consumeInteger(unsigned Radix, APInt &Result) {
5070b57cec5SDimitry Andric   StringRef Str = *this;
5080b57cec5SDimitry Andric 
5090b57cec5SDimitry Andric   // Autosense radix if not specified.
5100b57cec5SDimitry Andric   if (Radix == 0)
5110b57cec5SDimitry Andric     Radix = GetAutoSenseRadix(Str);
5120b57cec5SDimitry Andric 
5130b57cec5SDimitry Andric   assert(Radix > 1 && Radix <= 36);
5140b57cec5SDimitry Andric 
5150b57cec5SDimitry Andric   // Empty strings (after the radix autosense) are invalid.
5160b57cec5SDimitry Andric   if (Str.empty()) return true;
5170b57cec5SDimitry Andric 
5180b57cec5SDimitry Andric   // Skip leading zeroes.  This can be a significant improvement if
5190b57cec5SDimitry Andric   // it means we don't need > 64 bits.
5207a6dacacSDimitry Andric   Str = Str.ltrim('0');
5210b57cec5SDimitry Andric 
5220b57cec5SDimitry Andric   // If it was nothing but zeroes....
5230b57cec5SDimitry Andric   if (Str.empty()) {
5240b57cec5SDimitry Andric     Result = APInt(64, 0);
52506c3fb27SDimitry Andric     *this = Str;
5260b57cec5SDimitry Andric     return false;
5270b57cec5SDimitry Andric   }
5280b57cec5SDimitry Andric 
5290b57cec5SDimitry Andric   // (Over-)estimate the required number of bits.
5300b57cec5SDimitry Andric   unsigned Log2Radix = 0;
5310b57cec5SDimitry Andric   while ((1U << Log2Radix) < Radix) Log2Radix++;
5320b57cec5SDimitry Andric   bool IsPowerOf2Radix = ((1U << Log2Radix) == Radix);
5330b57cec5SDimitry Andric 
5340b57cec5SDimitry Andric   unsigned BitWidth = Log2Radix * Str.size();
5350b57cec5SDimitry Andric   if (BitWidth < Result.getBitWidth())
5360b57cec5SDimitry Andric     BitWidth = Result.getBitWidth(); // don't shrink the result
5370b57cec5SDimitry Andric   else if (BitWidth > Result.getBitWidth())
5380b57cec5SDimitry Andric     Result = Result.zext(BitWidth);
5390b57cec5SDimitry Andric 
5400b57cec5SDimitry Andric   APInt RadixAP, CharAP; // unused unless !IsPowerOf2Radix
5410b57cec5SDimitry Andric   if (!IsPowerOf2Radix) {
5420b57cec5SDimitry Andric     // These must have the same bit-width as Result.
5430b57cec5SDimitry Andric     RadixAP = APInt(BitWidth, Radix);
5440b57cec5SDimitry Andric     CharAP = APInt(BitWidth, 0);
5450b57cec5SDimitry Andric   }
5460b57cec5SDimitry Andric 
5470b57cec5SDimitry Andric   // Parse all the bytes of the string given this radix.
5480b57cec5SDimitry Andric   Result = 0;
5490b57cec5SDimitry Andric   while (!Str.empty()) {
5500b57cec5SDimitry Andric     unsigned CharVal;
5510b57cec5SDimitry Andric     if (Str[0] >= '0' && Str[0] <= '9')
5520b57cec5SDimitry Andric       CharVal = Str[0]-'0';
5530b57cec5SDimitry Andric     else if (Str[0] >= 'a' && Str[0] <= 'z')
5540b57cec5SDimitry Andric       CharVal = Str[0]-'a'+10;
5550b57cec5SDimitry Andric     else if (Str[0] >= 'A' && Str[0] <= 'Z')
5560b57cec5SDimitry Andric       CharVal = Str[0]-'A'+10;
5570b57cec5SDimitry Andric     else
55806c3fb27SDimitry Andric       break;
5590b57cec5SDimitry Andric 
5600b57cec5SDimitry Andric     // If the parsed value is larger than the integer radix, the string is
5610b57cec5SDimitry Andric     // invalid.
5620b57cec5SDimitry Andric     if (CharVal >= Radix)
56306c3fb27SDimitry Andric       break;
5640b57cec5SDimitry Andric 
5650b57cec5SDimitry Andric     // Add in this character.
5660b57cec5SDimitry Andric     if (IsPowerOf2Radix) {
5670b57cec5SDimitry Andric       Result <<= Log2Radix;
5680b57cec5SDimitry Andric       Result |= CharVal;
5690b57cec5SDimitry Andric     } else {
5700b57cec5SDimitry Andric       Result *= RadixAP;
5710b57cec5SDimitry Andric       CharAP = CharVal;
5720b57cec5SDimitry Andric       Result += CharAP;
5730b57cec5SDimitry Andric     }
5740b57cec5SDimitry Andric 
5750b57cec5SDimitry Andric     Str = Str.substr(1);
5760b57cec5SDimitry Andric   }
5770b57cec5SDimitry Andric 
57806c3fb27SDimitry Andric   // We consider the operation a failure if no characters were consumed
57906c3fb27SDimitry Andric   // successfully.
58006c3fb27SDimitry Andric   if (size() == Str.size())
58106c3fb27SDimitry Andric     return true;
58206c3fb27SDimitry Andric 
58306c3fb27SDimitry Andric   *this = Str;
5840b57cec5SDimitry Andric   return false;
5850b57cec5SDimitry Andric }
5860b57cec5SDimitry Andric 
58706c3fb27SDimitry Andric bool StringRef::getAsInteger(unsigned Radix, APInt &Result) const {
58806c3fb27SDimitry Andric   StringRef Str = *this;
58906c3fb27SDimitry Andric   if (Str.consumeInteger(Radix, Result))
59006c3fb27SDimitry Andric     return true;
59106c3fb27SDimitry Andric 
59206c3fb27SDimitry Andric   // For getAsInteger, we require the whole string to be consumed or else we
59306c3fb27SDimitry Andric   // consider it a failure.
59406c3fb27SDimitry Andric   return !Str.empty();
59506c3fb27SDimitry Andric }
59606c3fb27SDimitry Andric 
5970b57cec5SDimitry Andric bool StringRef::getAsDouble(double &Result, bool AllowInexact) const {
5980b57cec5SDimitry Andric   APFloat F(0.0);
599480093f4SDimitry Andric   auto StatusOrErr = F.convertFromString(*this, APFloat::rmNearestTiesToEven);
600480093f4SDimitry Andric   if (errorToBool(StatusOrErr.takeError()))
601480093f4SDimitry Andric     return true;
602480093f4SDimitry Andric 
603480093f4SDimitry Andric   APFloat::opStatus Status = *StatusOrErr;
6040b57cec5SDimitry Andric   if (Status != APFloat::opOK) {
6050b57cec5SDimitry Andric     if (!AllowInexact || !(Status & APFloat::opInexact))
6060b57cec5SDimitry Andric       return true;
6070b57cec5SDimitry Andric   }
6080b57cec5SDimitry Andric 
6090b57cec5SDimitry Andric   Result = F.convertToDouble();
6100b57cec5SDimitry Andric   return false;
6110b57cec5SDimitry Andric }
6120b57cec5SDimitry Andric 
6130b57cec5SDimitry Andric // Implementation of StringRef hashing.
6140b57cec5SDimitry Andric hash_code llvm::hash_value(StringRef S) {
6150b57cec5SDimitry Andric   return hash_combine_range(S.begin(), S.end());
6160b57cec5SDimitry Andric }
61704eeddc0SDimitry Andric 
61804eeddc0SDimitry Andric unsigned DenseMapInfo<StringRef, void>::getHashValue(StringRef Val) {
61904eeddc0SDimitry Andric   assert(Val.data() != getEmptyKey().data() &&
62004eeddc0SDimitry Andric          "Cannot hash the empty key!");
62104eeddc0SDimitry Andric   assert(Val.data() != getTombstoneKey().data() &&
62204eeddc0SDimitry Andric          "Cannot hash the tombstone key!");
62304eeddc0SDimitry Andric   return (unsigned)(hash_value(Val));
62404eeddc0SDimitry Andric }
625