10b57cec5SDimitry Andric //===-- StringRef.cpp - Lightweight String References ---------------------===// 20b57cec5SDimitry Andric // 30b57cec5SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 40b57cec5SDimitry Andric // See https://llvm.org/LICENSE.txt for license information. 50b57cec5SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 60b57cec5SDimitry Andric // 70b57cec5SDimitry Andric //===----------------------------------------------------------------------===// 80b57cec5SDimitry Andric 90b57cec5SDimitry Andric #include "llvm/ADT/StringRef.h" 100b57cec5SDimitry Andric #include "llvm/ADT/APFloat.h" 110b57cec5SDimitry Andric #include "llvm/ADT/APInt.h" 120b57cec5SDimitry Andric #include "llvm/ADT/Hashing.h" 130b57cec5SDimitry Andric #include "llvm/ADT/StringExtras.h" 140b57cec5SDimitry Andric #include "llvm/ADT/edit_distance.h" 15480093f4SDimitry Andric #include "llvm/Support/Error.h" 160b57cec5SDimitry Andric #include <bitset> 170b57cec5SDimitry Andric 180b57cec5SDimitry Andric using namespace llvm; 190b57cec5SDimitry Andric 200b57cec5SDimitry Andric // MSVC emits references to this into the translation units which reference it. 210b57cec5SDimitry Andric #ifndef _MSC_VER 225ffd83dbSDimitry Andric constexpr size_t StringRef::npos; 230b57cec5SDimitry Andric #endif 240b57cec5SDimitry Andric 250b57cec5SDimitry Andric // strncasecmp() is not available on non-POSIX systems, so define an 260b57cec5SDimitry Andric // alternative function here. 270b57cec5SDimitry Andric static int ascii_strncasecmp(const char *LHS, const char *RHS, size_t Length) { 280b57cec5SDimitry Andric for (size_t I = 0; I < Length; ++I) { 290b57cec5SDimitry Andric unsigned char LHC = toLower(LHS[I]); 300b57cec5SDimitry Andric unsigned char RHC = toLower(RHS[I]); 310b57cec5SDimitry Andric if (LHC != RHC) 320b57cec5SDimitry Andric return LHC < RHC ? -1 : 1; 330b57cec5SDimitry Andric } 340b57cec5SDimitry Andric return 0; 350b57cec5SDimitry Andric } 360b57cec5SDimitry Andric 37fe6060f1SDimitry Andric int StringRef::compare_insensitive(StringRef RHS) const { 380b57cec5SDimitry Andric if (int Res = ascii_strncasecmp(Data, RHS.Data, std::min(Length, RHS.Length))) 390b57cec5SDimitry Andric return Res; 400b57cec5SDimitry Andric if (Length == RHS.Length) 410b57cec5SDimitry Andric return 0; 420b57cec5SDimitry Andric return Length < RHS.Length ? -1 : 1; 430b57cec5SDimitry Andric } 440b57cec5SDimitry Andric 45bdd1243dSDimitry Andric bool StringRef::starts_with_insensitive(StringRef Prefix) const { 460b57cec5SDimitry Andric return Length >= Prefix.Length && 470b57cec5SDimitry Andric ascii_strncasecmp(Data, Prefix.Data, Prefix.Length) == 0; 480b57cec5SDimitry Andric } 490b57cec5SDimitry Andric 50bdd1243dSDimitry Andric bool StringRef::ends_with_insensitive(StringRef Suffix) const { 510b57cec5SDimitry Andric return Length >= Suffix.Length && 520b57cec5SDimitry Andric ascii_strncasecmp(end() - Suffix.Length, Suffix.Data, Suffix.Length) == 0; 530b57cec5SDimitry Andric } 540b57cec5SDimitry Andric 55fe6060f1SDimitry Andric size_t StringRef::find_insensitive(char C, size_t From) const { 560b57cec5SDimitry Andric char L = toLower(C); 570b57cec5SDimitry Andric return find_if([L](char D) { return toLower(D) == L; }, From); 580b57cec5SDimitry Andric } 590b57cec5SDimitry Andric 600b57cec5SDimitry Andric /// compare_numeric - Compare strings, handle embedded numbers. 610b57cec5SDimitry Andric int StringRef::compare_numeric(StringRef RHS) const { 620b57cec5SDimitry Andric for (size_t I = 0, E = std::min(Length, RHS.Length); I != E; ++I) { 630b57cec5SDimitry Andric // Check for sequences of digits. 640b57cec5SDimitry Andric if (isDigit(Data[I]) && isDigit(RHS.Data[I])) { 650b57cec5SDimitry Andric // The longer sequence of numbers is considered larger. 660b57cec5SDimitry Andric // This doesn't really handle prefixed zeros well. 670b57cec5SDimitry Andric size_t J; 680b57cec5SDimitry Andric for (J = I + 1; J != E + 1; ++J) { 690b57cec5SDimitry Andric bool ld = J < Length && isDigit(Data[J]); 700b57cec5SDimitry Andric bool rd = J < RHS.Length && isDigit(RHS.Data[J]); 710b57cec5SDimitry Andric if (ld != rd) 720b57cec5SDimitry Andric return rd ? -1 : 1; 730b57cec5SDimitry Andric if (!rd) 740b57cec5SDimitry Andric break; 750b57cec5SDimitry Andric } 760b57cec5SDimitry Andric // The two number sequences have the same length (J-I), just memcmp them. 770b57cec5SDimitry Andric if (int Res = compareMemory(Data + I, RHS.Data + I, J - I)) 780b57cec5SDimitry Andric return Res < 0 ? -1 : 1; 790b57cec5SDimitry Andric // Identical number sequences, continue search after the numbers. 800b57cec5SDimitry Andric I = J - 1; 810b57cec5SDimitry Andric continue; 820b57cec5SDimitry Andric } 830b57cec5SDimitry Andric if (Data[I] != RHS.Data[I]) 840b57cec5SDimitry Andric return (unsigned char)Data[I] < (unsigned char)RHS.Data[I] ? -1 : 1; 850b57cec5SDimitry Andric } 860b57cec5SDimitry Andric if (Length == RHS.Length) 870b57cec5SDimitry Andric return 0; 880b57cec5SDimitry Andric return Length < RHS.Length ? -1 : 1; 890b57cec5SDimitry Andric } 900b57cec5SDimitry Andric 910b57cec5SDimitry Andric // Compute the edit distance between the two given strings. 920b57cec5SDimitry Andric unsigned StringRef::edit_distance(llvm::StringRef Other, 930b57cec5SDimitry Andric bool AllowReplacements, 940b57cec5SDimitry Andric unsigned MaxEditDistance) const { 95bdd1243dSDimitry Andric return llvm::ComputeEditDistance(ArrayRef(data(), size()), 96bdd1243dSDimitry Andric ArrayRef(Other.data(), Other.size()), 970b57cec5SDimitry Andric AllowReplacements, MaxEditDistance); 980b57cec5SDimitry Andric } 990b57cec5SDimitry Andric 10081ad6265SDimitry Andric unsigned llvm::StringRef::edit_distance_insensitive( 10181ad6265SDimitry Andric StringRef Other, bool AllowReplacements, unsigned MaxEditDistance) const { 10281ad6265SDimitry Andric return llvm::ComputeMappedEditDistance( 103bdd1243dSDimitry Andric ArrayRef(data(), size()), ArrayRef(Other.data(), Other.size()), 10481ad6265SDimitry Andric llvm::toLower, AllowReplacements, MaxEditDistance); 10581ad6265SDimitry Andric } 10681ad6265SDimitry Andric 1070b57cec5SDimitry Andric //===----------------------------------------------------------------------===// 1080b57cec5SDimitry Andric // String Operations 1090b57cec5SDimitry Andric //===----------------------------------------------------------------------===// 1100b57cec5SDimitry Andric 1110b57cec5SDimitry Andric std::string StringRef::lower() const { 1125ffd83dbSDimitry Andric return std::string(map_iterator(begin(), toLower), 1135ffd83dbSDimitry Andric map_iterator(end(), toLower)); 1140b57cec5SDimitry Andric } 1150b57cec5SDimitry Andric 1160b57cec5SDimitry Andric std::string StringRef::upper() const { 1175ffd83dbSDimitry Andric return std::string(map_iterator(begin(), toUpper), 1185ffd83dbSDimitry Andric map_iterator(end(), toUpper)); 1190b57cec5SDimitry Andric } 1200b57cec5SDimitry Andric 1210b57cec5SDimitry Andric //===----------------------------------------------------------------------===// 1220b57cec5SDimitry Andric // String Searching 1230b57cec5SDimitry Andric //===----------------------------------------------------------------------===// 1240b57cec5SDimitry Andric 1250b57cec5SDimitry Andric 1260b57cec5SDimitry Andric /// find - Search for the first string \arg Str in the string. 1270b57cec5SDimitry Andric /// 1280b57cec5SDimitry Andric /// \return - The index of the first occurrence of \arg Str, or npos if not 1290b57cec5SDimitry Andric /// found. 1300b57cec5SDimitry Andric size_t StringRef::find(StringRef Str, size_t From) const { 1310b57cec5SDimitry Andric if (From > Length) 1320b57cec5SDimitry Andric return npos; 1330b57cec5SDimitry Andric 1340b57cec5SDimitry Andric const char *Start = Data + From; 1350b57cec5SDimitry Andric size_t Size = Length - From; 1360b57cec5SDimitry Andric 1370b57cec5SDimitry Andric const char *Needle = Str.data(); 1380b57cec5SDimitry Andric size_t N = Str.size(); 1390b57cec5SDimitry Andric if (N == 0) 1400b57cec5SDimitry Andric return From; 1410b57cec5SDimitry Andric if (Size < N) 1420b57cec5SDimitry Andric return npos; 1430b57cec5SDimitry Andric if (N == 1) { 1440b57cec5SDimitry Andric const char *Ptr = (const char *)::memchr(Start, Needle[0], Size); 1450b57cec5SDimitry Andric return Ptr == nullptr ? npos : Ptr - Data; 1460b57cec5SDimitry Andric } 1470b57cec5SDimitry Andric 1480b57cec5SDimitry Andric const char *Stop = Start + (Size - N + 1); 1490b57cec5SDimitry Andric 150bdd1243dSDimitry Andric if (N == 2) { 151bdd1243dSDimitry Andric // Provide a fast path for newline finding (CRLF case) in InclusionRewriter. 152bdd1243dSDimitry Andric // Not the most optimized strategy, but getting memcmp inlined should be 153bdd1243dSDimitry Andric // good enough. 154bdd1243dSDimitry Andric do { 155bdd1243dSDimitry Andric if (std::memcmp(Start, Needle, 2) == 0) 156bdd1243dSDimitry Andric return Start - Data; 157bdd1243dSDimitry Andric ++Start; 158bdd1243dSDimitry Andric } while (Start < Stop); 159bdd1243dSDimitry Andric return npos; 160bdd1243dSDimitry Andric } 161bdd1243dSDimitry Andric 1620b57cec5SDimitry Andric // For short haystacks or unsupported needles fall back to the naive algorithm 1630b57cec5SDimitry Andric if (Size < 16 || N > 255) { 1640b57cec5SDimitry Andric do { 1650b57cec5SDimitry Andric if (std::memcmp(Start, Needle, N) == 0) 1660b57cec5SDimitry Andric return Start - Data; 1670b57cec5SDimitry Andric ++Start; 1680b57cec5SDimitry Andric } while (Start < Stop); 1690b57cec5SDimitry Andric return npos; 1700b57cec5SDimitry Andric } 1710b57cec5SDimitry Andric 1720b57cec5SDimitry Andric // Build the bad char heuristic table, with uint8_t to reduce cache thrashing. 1730b57cec5SDimitry Andric uint8_t BadCharSkip[256]; 1740b57cec5SDimitry Andric std::memset(BadCharSkip, N, 256); 1750b57cec5SDimitry Andric for (unsigned i = 0; i != N-1; ++i) 1760b57cec5SDimitry Andric BadCharSkip[(uint8_t)Str[i]] = N-1-i; 1770b57cec5SDimitry Andric 1780b57cec5SDimitry Andric do { 1790b57cec5SDimitry Andric uint8_t Last = Start[N - 1]; 1800b57cec5SDimitry Andric if (LLVM_UNLIKELY(Last == (uint8_t)Needle[N - 1])) 1810b57cec5SDimitry Andric if (std::memcmp(Start, Needle, N - 1) == 0) 1820b57cec5SDimitry Andric return Start - Data; 1830b57cec5SDimitry Andric 1840b57cec5SDimitry Andric // Otherwise skip the appropriate number of bytes. 1850b57cec5SDimitry Andric Start += BadCharSkip[Last]; 1860b57cec5SDimitry Andric } while (Start < Stop); 1870b57cec5SDimitry Andric 1880b57cec5SDimitry Andric return npos; 1890b57cec5SDimitry Andric } 1900b57cec5SDimitry Andric 191fe6060f1SDimitry Andric size_t StringRef::find_insensitive(StringRef Str, size_t From) const { 1920b57cec5SDimitry Andric StringRef This = substr(From); 1930b57cec5SDimitry Andric while (This.size() >= Str.size()) { 19406c3fb27SDimitry Andric if (This.starts_with_insensitive(Str)) 1950b57cec5SDimitry Andric return From; 1960b57cec5SDimitry Andric This = This.drop_front(); 1970b57cec5SDimitry Andric ++From; 1980b57cec5SDimitry Andric } 1990b57cec5SDimitry Andric return npos; 2000b57cec5SDimitry Andric } 2010b57cec5SDimitry Andric 202fe6060f1SDimitry Andric size_t StringRef::rfind_insensitive(char C, size_t From) const { 2030b57cec5SDimitry Andric From = std::min(From, Length); 2040b57cec5SDimitry Andric size_t i = From; 2050b57cec5SDimitry Andric while (i != 0) { 2060b57cec5SDimitry Andric --i; 2070b57cec5SDimitry Andric if (toLower(Data[i]) == toLower(C)) 2080b57cec5SDimitry Andric return i; 2090b57cec5SDimitry Andric } 2100b57cec5SDimitry Andric return npos; 2110b57cec5SDimitry Andric } 2120b57cec5SDimitry Andric 2130b57cec5SDimitry Andric /// rfind - Search for the last string \arg Str in the string. 2140b57cec5SDimitry Andric /// 2150b57cec5SDimitry Andric /// \return - The index of the last occurrence of \arg Str, or npos if not 2160b57cec5SDimitry Andric /// found. 2170b57cec5SDimitry Andric size_t StringRef::rfind(StringRef Str) const { 218bdd1243dSDimitry Andric return std::string_view(*this).rfind(Str); 2190b57cec5SDimitry Andric } 2200b57cec5SDimitry Andric 221fe6060f1SDimitry Andric size_t StringRef::rfind_insensitive(StringRef Str) const { 2220b57cec5SDimitry Andric size_t N = Str.size(); 2230b57cec5SDimitry Andric if (N > Length) 2240b57cec5SDimitry Andric return npos; 2250b57cec5SDimitry Andric for (size_t i = Length - N + 1, e = 0; i != e;) { 2260b57cec5SDimitry Andric --i; 227fe6060f1SDimitry Andric if (substr(i, N).equals_insensitive(Str)) 2280b57cec5SDimitry Andric return i; 2290b57cec5SDimitry Andric } 2300b57cec5SDimitry Andric return npos; 2310b57cec5SDimitry Andric } 2320b57cec5SDimitry Andric 2330b57cec5SDimitry Andric /// find_first_of - Find the first character in the string that is in \arg 2340b57cec5SDimitry Andric /// Chars, or npos if not found. 2350b57cec5SDimitry Andric /// 2360b57cec5SDimitry Andric /// Note: O(size() + Chars.size()) 2370b57cec5SDimitry Andric StringRef::size_type StringRef::find_first_of(StringRef Chars, 2380b57cec5SDimitry Andric size_t From) const { 2390b57cec5SDimitry Andric std::bitset<1 << CHAR_BIT> CharBits; 2404824e7fdSDimitry Andric for (char C : Chars) 2414824e7fdSDimitry Andric CharBits.set((unsigned char)C); 2420b57cec5SDimitry Andric 2430b57cec5SDimitry Andric for (size_type i = std::min(From, Length), e = Length; i != e; ++i) 2440b57cec5SDimitry Andric if (CharBits.test((unsigned char)Data[i])) 2450b57cec5SDimitry Andric return i; 2460b57cec5SDimitry Andric return npos; 2470b57cec5SDimitry Andric } 2480b57cec5SDimitry Andric 2490b57cec5SDimitry Andric /// find_first_not_of - Find the first character in the string that is not 2500b57cec5SDimitry Andric /// \arg C or npos if not found. 2510b57cec5SDimitry Andric StringRef::size_type StringRef::find_first_not_of(char C, size_t From) const { 252bdd1243dSDimitry Andric return std::string_view(*this).find_first_not_of(C, From); 2530b57cec5SDimitry Andric } 2540b57cec5SDimitry Andric 2550b57cec5SDimitry Andric /// find_first_not_of - Find the first character in the string that is not 2560b57cec5SDimitry Andric /// in the string \arg Chars, or npos if not found. 2570b57cec5SDimitry Andric /// 2580b57cec5SDimitry Andric /// Note: O(size() + Chars.size()) 2590b57cec5SDimitry Andric StringRef::size_type StringRef::find_first_not_of(StringRef Chars, 2600b57cec5SDimitry Andric size_t From) const { 2610b57cec5SDimitry Andric std::bitset<1 << CHAR_BIT> CharBits; 2624824e7fdSDimitry Andric for (char C : Chars) 2634824e7fdSDimitry Andric CharBits.set((unsigned char)C); 2640b57cec5SDimitry Andric 2650b57cec5SDimitry Andric for (size_type i = std::min(From, Length), e = Length; i != e; ++i) 2660b57cec5SDimitry Andric if (!CharBits.test((unsigned char)Data[i])) 2670b57cec5SDimitry Andric return i; 2680b57cec5SDimitry Andric return npos; 2690b57cec5SDimitry Andric } 2700b57cec5SDimitry Andric 2710b57cec5SDimitry Andric /// find_last_of - Find the last character in the string that is in \arg C, 2720b57cec5SDimitry Andric /// or npos if not found. 2730b57cec5SDimitry Andric /// 2740b57cec5SDimitry Andric /// Note: O(size() + Chars.size()) 2750b57cec5SDimitry Andric StringRef::size_type StringRef::find_last_of(StringRef Chars, 2760b57cec5SDimitry Andric size_t From) const { 2770b57cec5SDimitry Andric std::bitset<1 << CHAR_BIT> CharBits; 2784824e7fdSDimitry Andric for (char C : Chars) 2794824e7fdSDimitry Andric CharBits.set((unsigned char)C); 2800b57cec5SDimitry Andric 2810b57cec5SDimitry Andric for (size_type i = std::min(From, Length) - 1, e = -1; i != e; --i) 2820b57cec5SDimitry Andric if (CharBits.test((unsigned char)Data[i])) 2830b57cec5SDimitry Andric return i; 2840b57cec5SDimitry Andric return npos; 2850b57cec5SDimitry Andric } 2860b57cec5SDimitry Andric 2870b57cec5SDimitry Andric /// find_last_not_of - Find the last character in the string that is not 2880b57cec5SDimitry Andric /// \arg C, or npos if not found. 2890b57cec5SDimitry Andric StringRef::size_type StringRef::find_last_not_of(char C, size_t From) const { 2900b57cec5SDimitry Andric for (size_type i = std::min(From, Length) - 1, e = -1; i != e; --i) 2910b57cec5SDimitry Andric if (Data[i] != C) 2920b57cec5SDimitry Andric return i; 2930b57cec5SDimitry Andric return npos; 2940b57cec5SDimitry Andric } 2950b57cec5SDimitry Andric 2960b57cec5SDimitry Andric /// find_last_not_of - Find the last character in the string that is not in 2970b57cec5SDimitry Andric /// \arg Chars, or npos if not found. 2980b57cec5SDimitry Andric /// 2990b57cec5SDimitry Andric /// Note: O(size() + Chars.size()) 3000b57cec5SDimitry Andric StringRef::size_type StringRef::find_last_not_of(StringRef Chars, 3010b57cec5SDimitry Andric size_t From) const { 3020b57cec5SDimitry Andric std::bitset<1 << CHAR_BIT> CharBits; 3034824e7fdSDimitry Andric for (char C : Chars) 3044824e7fdSDimitry Andric CharBits.set((unsigned char)C); 3050b57cec5SDimitry Andric 3060b57cec5SDimitry Andric for (size_type i = std::min(From, Length) - 1, e = -1; i != e; --i) 3070b57cec5SDimitry Andric if (!CharBits.test((unsigned char)Data[i])) 3080b57cec5SDimitry Andric return i; 3090b57cec5SDimitry Andric return npos; 3100b57cec5SDimitry Andric } 3110b57cec5SDimitry Andric 3120b57cec5SDimitry Andric void StringRef::split(SmallVectorImpl<StringRef> &A, 3130b57cec5SDimitry Andric StringRef Separator, int MaxSplit, 3140b57cec5SDimitry Andric bool KeepEmpty) const { 3150b57cec5SDimitry Andric StringRef S = *this; 3160b57cec5SDimitry Andric 3170b57cec5SDimitry Andric // Count down from MaxSplit. When MaxSplit is -1, this will just split 3180b57cec5SDimitry Andric // "forever". This doesn't support splitting more than 2^31 times 3190b57cec5SDimitry Andric // intentionally; if we ever want that we can make MaxSplit a 64-bit integer 3200b57cec5SDimitry Andric // but that seems unlikely to be useful. 3210b57cec5SDimitry Andric while (MaxSplit-- != 0) { 3220b57cec5SDimitry Andric size_t Idx = S.find(Separator); 3230b57cec5SDimitry Andric if (Idx == npos) 3240b57cec5SDimitry Andric break; 3250b57cec5SDimitry Andric 3260b57cec5SDimitry Andric // Push this split. 3270b57cec5SDimitry Andric if (KeepEmpty || Idx > 0) 3280b57cec5SDimitry Andric A.push_back(S.slice(0, Idx)); 3290b57cec5SDimitry Andric 3300b57cec5SDimitry Andric // Jump forward. 3310b57cec5SDimitry Andric S = S.slice(Idx + Separator.size(), npos); 3320b57cec5SDimitry Andric } 3330b57cec5SDimitry Andric 3340b57cec5SDimitry Andric // Push the tail. 3350b57cec5SDimitry Andric if (KeepEmpty || !S.empty()) 3360b57cec5SDimitry Andric A.push_back(S); 3370b57cec5SDimitry Andric } 3380b57cec5SDimitry Andric 3390b57cec5SDimitry Andric void StringRef::split(SmallVectorImpl<StringRef> &A, char Separator, 3400b57cec5SDimitry Andric int MaxSplit, bool KeepEmpty) const { 3410b57cec5SDimitry Andric StringRef S = *this; 3420b57cec5SDimitry Andric 3430b57cec5SDimitry Andric // Count down from MaxSplit. When MaxSplit is -1, this will just split 3440b57cec5SDimitry Andric // "forever". This doesn't support splitting more than 2^31 times 3450b57cec5SDimitry Andric // intentionally; if we ever want that we can make MaxSplit a 64-bit integer 3460b57cec5SDimitry Andric // but that seems unlikely to be useful. 3470b57cec5SDimitry Andric while (MaxSplit-- != 0) { 3480b57cec5SDimitry Andric size_t Idx = S.find(Separator); 3490b57cec5SDimitry Andric if (Idx == npos) 3500b57cec5SDimitry Andric break; 3510b57cec5SDimitry Andric 3520b57cec5SDimitry Andric // Push this split. 3530b57cec5SDimitry Andric if (KeepEmpty || Idx > 0) 3540b57cec5SDimitry Andric A.push_back(S.slice(0, Idx)); 3550b57cec5SDimitry Andric 3560b57cec5SDimitry Andric // Jump forward. 3570b57cec5SDimitry Andric S = S.slice(Idx + 1, npos); 3580b57cec5SDimitry Andric } 3590b57cec5SDimitry Andric 3600b57cec5SDimitry Andric // Push the tail. 3610b57cec5SDimitry Andric if (KeepEmpty || !S.empty()) 3620b57cec5SDimitry Andric A.push_back(S); 3630b57cec5SDimitry Andric } 3640b57cec5SDimitry Andric 3650b57cec5SDimitry Andric //===----------------------------------------------------------------------===// 3660b57cec5SDimitry Andric // Helpful Algorithms 3670b57cec5SDimitry Andric //===----------------------------------------------------------------------===// 3680b57cec5SDimitry Andric 3690b57cec5SDimitry Andric /// count - Return the number of non-overlapped occurrences of \arg Str in 3700b57cec5SDimitry Andric /// the string. 3710b57cec5SDimitry Andric size_t StringRef::count(StringRef Str) const { 3720b57cec5SDimitry Andric size_t Count = 0; 373bdd1243dSDimitry Andric size_t Pos = 0; 3740b57cec5SDimitry Andric size_t N = Str.size(); 375bdd1243dSDimitry Andric // TODO: For an empty `Str` we return 0 for legacy reasons. Consider changing 376bdd1243dSDimitry Andric // this to `Length + 1` which is more in-line with the function 377bdd1243dSDimitry Andric // description. 378bdd1243dSDimitry Andric if (!N) 3790b57cec5SDimitry Andric return 0; 380bdd1243dSDimitry Andric while ((Pos = find(Str, Pos)) != npos) { 3810b57cec5SDimitry Andric ++Count; 382bdd1243dSDimitry Andric Pos += N; 383480093f4SDimitry Andric } 3840b57cec5SDimitry Andric return Count; 3850b57cec5SDimitry Andric } 3860b57cec5SDimitry Andric 3870b57cec5SDimitry Andric static unsigned GetAutoSenseRadix(StringRef &Str) { 3880b57cec5SDimitry Andric if (Str.empty()) 3890b57cec5SDimitry Andric return 10; 3900b57cec5SDimitry Andric 3917a6dacacSDimitry Andric if (Str.consume_front_insensitive("0x")) 3920b57cec5SDimitry Andric return 16; 3930b57cec5SDimitry Andric 3947a6dacacSDimitry Andric if (Str.consume_front_insensitive("0b")) 3950b57cec5SDimitry Andric return 2; 3960b57cec5SDimitry Andric 3977a6dacacSDimitry Andric if (Str.consume_front("0o")) 3980b57cec5SDimitry Andric return 8; 3990b57cec5SDimitry Andric 4000b57cec5SDimitry Andric if (Str[0] == '0' && Str.size() > 1 && isDigit(Str[1])) { 4010b57cec5SDimitry Andric Str = Str.substr(1); 4020b57cec5SDimitry Andric return 8; 4030b57cec5SDimitry Andric } 4040b57cec5SDimitry Andric 4050b57cec5SDimitry Andric return 10; 4060b57cec5SDimitry Andric } 4070b57cec5SDimitry Andric 4080b57cec5SDimitry Andric bool llvm::consumeUnsignedInteger(StringRef &Str, unsigned Radix, 4090b57cec5SDimitry Andric unsigned long long &Result) { 4100b57cec5SDimitry Andric // Autosense radix if not specified. 4110b57cec5SDimitry Andric if (Radix == 0) 4120b57cec5SDimitry Andric Radix = GetAutoSenseRadix(Str); 4130b57cec5SDimitry Andric 4140b57cec5SDimitry Andric // Empty strings (after the radix autosense) are invalid. 4150b57cec5SDimitry Andric if (Str.empty()) return true; 4160b57cec5SDimitry Andric 4170b57cec5SDimitry Andric // Parse all the bytes of the string given this radix. Watch for overflow. 4180b57cec5SDimitry Andric StringRef Str2 = Str; 4190b57cec5SDimitry Andric Result = 0; 4200b57cec5SDimitry Andric while (!Str2.empty()) { 4210b57cec5SDimitry Andric unsigned CharVal; 4220b57cec5SDimitry Andric if (Str2[0] >= '0' && Str2[0] <= '9') 4230b57cec5SDimitry Andric CharVal = Str2[0] - '0'; 4240b57cec5SDimitry Andric else if (Str2[0] >= 'a' && Str2[0] <= 'z') 4250b57cec5SDimitry Andric CharVal = Str2[0] - 'a' + 10; 4260b57cec5SDimitry Andric else if (Str2[0] >= 'A' && Str2[0] <= 'Z') 4270b57cec5SDimitry Andric CharVal = Str2[0] - 'A' + 10; 4280b57cec5SDimitry Andric else 4290b57cec5SDimitry Andric break; 4300b57cec5SDimitry Andric 4310b57cec5SDimitry Andric // If the parsed value is larger than the integer radix, we cannot 4320b57cec5SDimitry Andric // consume any more characters. 4330b57cec5SDimitry Andric if (CharVal >= Radix) 4340b57cec5SDimitry Andric break; 4350b57cec5SDimitry Andric 4360b57cec5SDimitry Andric // Add in this character. 4370b57cec5SDimitry Andric unsigned long long PrevResult = Result; 4380b57cec5SDimitry Andric Result = Result * Radix + CharVal; 4390b57cec5SDimitry Andric 4400b57cec5SDimitry Andric // Check for overflow by shifting back and seeing if bits were lost. 4410b57cec5SDimitry Andric if (Result / Radix < PrevResult) 4420b57cec5SDimitry Andric return true; 4430b57cec5SDimitry Andric 4440b57cec5SDimitry Andric Str2 = Str2.substr(1); 4450b57cec5SDimitry Andric } 4460b57cec5SDimitry Andric 4470b57cec5SDimitry Andric // We consider the operation a failure if no characters were consumed 4480b57cec5SDimitry Andric // successfully. 4490b57cec5SDimitry Andric if (Str.size() == Str2.size()) 4500b57cec5SDimitry Andric return true; 4510b57cec5SDimitry Andric 4520b57cec5SDimitry Andric Str = Str2; 4530b57cec5SDimitry Andric return false; 4540b57cec5SDimitry Andric } 4550b57cec5SDimitry Andric 4560b57cec5SDimitry Andric bool llvm::consumeSignedInteger(StringRef &Str, unsigned Radix, 4570b57cec5SDimitry Andric long long &Result) { 4580b57cec5SDimitry Andric unsigned long long ULLVal; 4590b57cec5SDimitry Andric 4600b57cec5SDimitry Andric // Handle positive strings first. 461*0fca6ea1SDimitry Andric if (!Str.starts_with("-")) { 4620b57cec5SDimitry Andric if (consumeUnsignedInteger(Str, Radix, ULLVal) || 4630b57cec5SDimitry Andric // Check for value so large it overflows a signed value. 4640b57cec5SDimitry Andric (long long)ULLVal < 0) 4650b57cec5SDimitry Andric return true; 4660b57cec5SDimitry Andric Result = ULLVal; 4670b57cec5SDimitry Andric return false; 4680b57cec5SDimitry Andric } 4690b57cec5SDimitry Andric 4700b57cec5SDimitry Andric // Get the positive part of the value. 4710b57cec5SDimitry Andric StringRef Str2 = Str.drop_front(1); 4720b57cec5SDimitry Andric if (consumeUnsignedInteger(Str2, Radix, ULLVal) || 4730b57cec5SDimitry Andric // Reject values so large they'd overflow as negative signed, but allow 4740b57cec5SDimitry Andric // "-0". This negates the unsigned so that the negative isn't undefined 4750b57cec5SDimitry Andric // on signed overflow. 4760b57cec5SDimitry Andric (long long)-ULLVal > 0) 4770b57cec5SDimitry Andric return true; 4780b57cec5SDimitry Andric 4790b57cec5SDimitry Andric Str = Str2; 4800b57cec5SDimitry Andric Result = -ULLVal; 4810b57cec5SDimitry Andric return false; 4820b57cec5SDimitry Andric } 4830b57cec5SDimitry Andric 4840b57cec5SDimitry Andric /// GetAsUnsignedInteger - Workhorse method that converts a integer character 4850b57cec5SDimitry Andric /// sequence of radix up to 36 to an unsigned long long value. 4860b57cec5SDimitry Andric bool llvm::getAsUnsignedInteger(StringRef Str, unsigned Radix, 4870b57cec5SDimitry Andric unsigned long long &Result) { 4880b57cec5SDimitry Andric if (consumeUnsignedInteger(Str, Radix, Result)) 4890b57cec5SDimitry Andric return true; 4900b57cec5SDimitry Andric 4910b57cec5SDimitry Andric // For getAsUnsignedInteger, we require the whole string to be consumed or 4920b57cec5SDimitry Andric // else we consider it a failure. 4930b57cec5SDimitry Andric return !Str.empty(); 4940b57cec5SDimitry Andric } 4950b57cec5SDimitry Andric 4960b57cec5SDimitry Andric bool llvm::getAsSignedInteger(StringRef Str, unsigned Radix, 4970b57cec5SDimitry Andric long long &Result) { 4980b57cec5SDimitry Andric if (consumeSignedInteger(Str, Radix, Result)) 4990b57cec5SDimitry Andric return true; 5000b57cec5SDimitry Andric 5010b57cec5SDimitry Andric // For getAsSignedInteger, we require the whole string to be consumed or else 5020b57cec5SDimitry Andric // we consider it a failure. 5030b57cec5SDimitry Andric return !Str.empty(); 5040b57cec5SDimitry Andric } 5050b57cec5SDimitry Andric 50606c3fb27SDimitry Andric bool StringRef::consumeInteger(unsigned Radix, APInt &Result) { 5070b57cec5SDimitry Andric StringRef Str = *this; 5080b57cec5SDimitry Andric 5090b57cec5SDimitry Andric // Autosense radix if not specified. 5100b57cec5SDimitry Andric if (Radix == 0) 5110b57cec5SDimitry Andric Radix = GetAutoSenseRadix(Str); 5120b57cec5SDimitry Andric 5130b57cec5SDimitry Andric assert(Radix > 1 && Radix <= 36); 5140b57cec5SDimitry Andric 5150b57cec5SDimitry Andric // Empty strings (after the radix autosense) are invalid. 5160b57cec5SDimitry Andric if (Str.empty()) return true; 5170b57cec5SDimitry Andric 5180b57cec5SDimitry Andric // Skip leading zeroes. This can be a significant improvement if 5190b57cec5SDimitry Andric // it means we don't need > 64 bits. 5207a6dacacSDimitry Andric Str = Str.ltrim('0'); 5210b57cec5SDimitry Andric 5220b57cec5SDimitry Andric // If it was nothing but zeroes.... 5230b57cec5SDimitry Andric if (Str.empty()) { 5240b57cec5SDimitry Andric Result = APInt(64, 0); 52506c3fb27SDimitry Andric *this = Str; 5260b57cec5SDimitry Andric return false; 5270b57cec5SDimitry Andric } 5280b57cec5SDimitry Andric 5290b57cec5SDimitry Andric // (Over-)estimate the required number of bits. 5300b57cec5SDimitry Andric unsigned Log2Radix = 0; 5310b57cec5SDimitry Andric while ((1U << Log2Radix) < Radix) Log2Radix++; 5320b57cec5SDimitry Andric bool IsPowerOf2Radix = ((1U << Log2Radix) == Radix); 5330b57cec5SDimitry Andric 5340b57cec5SDimitry Andric unsigned BitWidth = Log2Radix * Str.size(); 5350b57cec5SDimitry Andric if (BitWidth < Result.getBitWidth()) 5360b57cec5SDimitry Andric BitWidth = Result.getBitWidth(); // don't shrink the result 5370b57cec5SDimitry Andric else if (BitWidth > Result.getBitWidth()) 5380b57cec5SDimitry Andric Result = Result.zext(BitWidth); 5390b57cec5SDimitry Andric 5400b57cec5SDimitry Andric APInt RadixAP, CharAP; // unused unless !IsPowerOf2Radix 5410b57cec5SDimitry Andric if (!IsPowerOf2Radix) { 5420b57cec5SDimitry Andric // These must have the same bit-width as Result. 5430b57cec5SDimitry Andric RadixAP = APInt(BitWidth, Radix); 5440b57cec5SDimitry Andric CharAP = APInt(BitWidth, 0); 5450b57cec5SDimitry Andric } 5460b57cec5SDimitry Andric 5470b57cec5SDimitry Andric // Parse all the bytes of the string given this radix. 5480b57cec5SDimitry Andric Result = 0; 5490b57cec5SDimitry Andric while (!Str.empty()) { 5500b57cec5SDimitry Andric unsigned CharVal; 5510b57cec5SDimitry Andric if (Str[0] >= '0' && Str[0] <= '9') 5520b57cec5SDimitry Andric CharVal = Str[0]-'0'; 5530b57cec5SDimitry Andric else if (Str[0] >= 'a' && Str[0] <= 'z') 5540b57cec5SDimitry Andric CharVal = Str[0]-'a'+10; 5550b57cec5SDimitry Andric else if (Str[0] >= 'A' && Str[0] <= 'Z') 5560b57cec5SDimitry Andric CharVal = Str[0]-'A'+10; 5570b57cec5SDimitry Andric else 55806c3fb27SDimitry Andric break; 5590b57cec5SDimitry Andric 5600b57cec5SDimitry Andric // If the parsed value is larger than the integer radix, the string is 5610b57cec5SDimitry Andric // invalid. 5620b57cec5SDimitry Andric if (CharVal >= Radix) 56306c3fb27SDimitry Andric break; 5640b57cec5SDimitry Andric 5650b57cec5SDimitry Andric // Add in this character. 5660b57cec5SDimitry Andric if (IsPowerOf2Radix) { 5670b57cec5SDimitry Andric Result <<= Log2Radix; 5680b57cec5SDimitry Andric Result |= CharVal; 5690b57cec5SDimitry Andric } else { 5700b57cec5SDimitry Andric Result *= RadixAP; 5710b57cec5SDimitry Andric CharAP = CharVal; 5720b57cec5SDimitry Andric Result += CharAP; 5730b57cec5SDimitry Andric } 5740b57cec5SDimitry Andric 5750b57cec5SDimitry Andric Str = Str.substr(1); 5760b57cec5SDimitry Andric } 5770b57cec5SDimitry Andric 57806c3fb27SDimitry Andric // We consider the operation a failure if no characters were consumed 57906c3fb27SDimitry Andric // successfully. 58006c3fb27SDimitry Andric if (size() == Str.size()) 58106c3fb27SDimitry Andric return true; 58206c3fb27SDimitry Andric 58306c3fb27SDimitry Andric *this = Str; 5840b57cec5SDimitry Andric return false; 5850b57cec5SDimitry Andric } 5860b57cec5SDimitry Andric 58706c3fb27SDimitry Andric bool StringRef::getAsInteger(unsigned Radix, APInt &Result) const { 58806c3fb27SDimitry Andric StringRef Str = *this; 58906c3fb27SDimitry Andric if (Str.consumeInteger(Radix, Result)) 59006c3fb27SDimitry Andric return true; 59106c3fb27SDimitry Andric 59206c3fb27SDimitry Andric // For getAsInteger, we require the whole string to be consumed or else we 59306c3fb27SDimitry Andric // consider it a failure. 59406c3fb27SDimitry Andric return !Str.empty(); 59506c3fb27SDimitry Andric } 59606c3fb27SDimitry Andric 5970b57cec5SDimitry Andric bool StringRef::getAsDouble(double &Result, bool AllowInexact) const { 5980b57cec5SDimitry Andric APFloat F(0.0); 599480093f4SDimitry Andric auto StatusOrErr = F.convertFromString(*this, APFloat::rmNearestTiesToEven); 600480093f4SDimitry Andric if (errorToBool(StatusOrErr.takeError())) 601480093f4SDimitry Andric return true; 602480093f4SDimitry Andric 603480093f4SDimitry Andric APFloat::opStatus Status = *StatusOrErr; 6040b57cec5SDimitry Andric if (Status != APFloat::opOK) { 6050b57cec5SDimitry Andric if (!AllowInexact || !(Status & APFloat::opInexact)) 6060b57cec5SDimitry Andric return true; 6070b57cec5SDimitry Andric } 6080b57cec5SDimitry Andric 6090b57cec5SDimitry Andric Result = F.convertToDouble(); 6100b57cec5SDimitry Andric return false; 6110b57cec5SDimitry Andric } 6120b57cec5SDimitry Andric 6130b57cec5SDimitry Andric // Implementation of StringRef hashing. 6140b57cec5SDimitry Andric hash_code llvm::hash_value(StringRef S) { 6150b57cec5SDimitry Andric return hash_combine_range(S.begin(), S.end()); 6160b57cec5SDimitry Andric } 61704eeddc0SDimitry Andric 61804eeddc0SDimitry Andric unsigned DenseMapInfo<StringRef, void>::getHashValue(StringRef Val) { 61904eeddc0SDimitry Andric assert(Val.data() != getEmptyKey().data() && 62004eeddc0SDimitry Andric "Cannot hash the empty key!"); 62104eeddc0SDimitry Andric assert(Val.data() != getTombstoneKey().data() && 62204eeddc0SDimitry Andric "Cannot hash the tombstone key!"); 62304eeddc0SDimitry Andric return (unsigned)(hash_value(Val)); 62404eeddc0SDimitry Andric } 625