10b57cec5SDimitry Andric //===-- StringExtras.cpp - Implement the StringExtras header --------------===//
20b57cec5SDimitry Andric //
30b57cec5SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
40b57cec5SDimitry Andric // See https://llvm.org/LICENSE.txt for license information.
50b57cec5SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
60b57cec5SDimitry Andric //
70b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
80b57cec5SDimitry Andric //
90b57cec5SDimitry Andric // This file implements the StringExtras.h header
100b57cec5SDimitry Andric //
110b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
120b57cec5SDimitry Andric
130b57cec5SDimitry Andric #include "llvm/ADT/StringExtras.h"
140b57cec5SDimitry Andric #include "llvm/ADT/SmallVector.h"
150b57cec5SDimitry Andric #include "llvm/Support/raw_ostream.h"
165ffd83dbSDimitry Andric #include <cctype>
175ffd83dbSDimitry Andric
180b57cec5SDimitry Andric using namespace llvm;
190b57cec5SDimitry Andric
200b57cec5SDimitry Andric /// StrInStrNoCase - Portable version of strcasestr. Locates the first
210b57cec5SDimitry Andric /// occurrence of string 's1' in string 's2', ignoring case. Returns
220b57cec5SDimitry Andric /// the offset of s2 in s1 or npos if s2 cannot be found.
StrInStrNoCase(StringRef s1,StringRef s2)230b57cec5SDimitry Andric StringRef::size_type llvm::StrInStrNoCase(StringRef s1, StringRef s2) {
240b57cec5SDimitry Andric size_t N = s2.size(), M = s1.size();
250b57cec5SDimitry Andric if (N > M)
260b57cec5SDimitry Andric return StringRef::npos;
270b57cec5SDimitry Andric for (size_t i = 0, e = M - N + 1; i != e; ++i)
28fe6060f1SDimitry Andric if (s1.substr(i, N).equals_insensitive(s2))
290b57cec5SDimitry Andric return i;
300b57cec5SDimitry Andric return StringRef::npos;
310b57cec5SDimitry Andric }
320b57cec5SDimitry Andric
330b57cec5SDimitry Andric /// getToken - This function extracts one token from source, ignoring any
340b57cec5SDimitry Andric /// leading characters that appear in the Delimiters string, and ending the
350b57cec5SDimitry Andric /// token at any of the characters that appear in the Delimiters string. If
360b57cec5SDimitry Andric /// there are no tokens in the source string, an empty string is returned.
370b57cec5SDimitry Andric /// The function returns a pair containing the extracted token and the
380b57cec5SDimitry Andric /// remaining tail string.
getToken(StringRef Source,StringRef Delimiters)390b57cec5SDimitry Andric std::pair<StringRef, StringRef> llvm::getToken(StringRef Source,
400b57cec5SDimitry Andric StringRef Delimiters) {
410b57cec5SDimitry Andric // Figure out where the token starts.
420b57cec5SDimitry Andric StringRef::size_type Start = Source.find_first_not_of(Delimiters);
430b57cec5SDimitry Andric
440b57cec5SDimitry Andric // Find the next occurrence of the delimiter.
450b57cec5SDimitry Andric StringRef::size_type End = Source.find_first_of(Delimiters, Start);
460b57cec5SDimitry Andric
470b57cec5SDimitry Andric return std::make_pair(Source.slice(Start, End), Source.substr(End));
480b57cec5SDimitry Andric }
490b57cec5SDimitry Andric
500b57cec5SDimitry Andric /// SplitString - Split up the specified string according to the specified
510b57cec5SDimitry Andric /// delimiters, appending the result fragments to the output list.
SplitString(StringRef Source,SmallVectorImpl<StringRef> & OutFragments,StringRef Delimiters)520b57cec5SDimitry Andric void llvm::SplitString(StringRef Source,
530b57cec5SDimitry Andric SmallVectorImpl<StringRef> &OutFragments,
540b57cec5SDimitry Andric StringRef Delimiters) {
550b57cec5SDimitry Andric std::pair<StringRef, StringRef> S = getToken(Source, Delimiters);
560b57cec5SDimitry Andric while (!S.first.empty()) {
570b57cec5SDimitry Andric OutFragments.push_back(S.first);
580b57cec5SDimitry Andric S = getToken(S.second, Delimiters);
590b57cec5SDimitry Andric }
600b57cec5SDimitry Andric }
610b57cec5SDimitry Andric
printEscapedString(StringRef Name,raw_ostream & Out)620b57cec5SDimitry Andric void llvm::printEscapedString(StringRef Name, raw_ostream &Out) {
634824e7fdSDimitry Andric for (unsigned char C : Name) {
648bcb0991SDimitry Andric if (C == '\\')
658bcb0991SDimitry Andric Out << '\\' << C;
668bcb0991SDimitry Andric else if (isPrint(C) && C != '"')
670b57cec5SDimitry Andric Out << C;
680b57cec5SDimitry Andric else
690b57cec5SDimitry Andric Out << '\\' << hexdigit(C >> 4) << hexdigit(C & 0x0F);
700b57cec5SDimitry Andric }
710b57cec5SDimitry Andric }
720b57cec5SDimitry Andric
printHTMLEscaped(StringRef String,raw_ostream & Out)730b57cec5SDimitry Andric void llvm::printHTMLEscaped(StringRef String, raw_ostream &Out) {
740b57cec5SDimitry Andric for (char C : String) {
750b57cec5SDimitry Andric if (C == '&')
760b57cec5SDimitry Andric Out << "&";
770b57cec5SDimitry Andric else if (C == '<')
780b57cec5SDimitry Andric Out << "<";
790b57cec5SDimitry Andric else if (C == '>')
800b57cec5SDimitry Andric Out << ">";
810b57cec5SDimitry Andric else if (C == '\"')
820b57cec5SDimitry Andric Out << """;
830b57cec5SDimitry Andric else if (C == '\'')
840b57cec5SDimitry Andric Out << "'";
850b57cec5SDimitry Andric else
860b57cec5SDimitry Andric Out << C;
870b57cec5SDimitry Andric }
880b57cec5SDimitry Andric }
890b57cec5SDimitry Andric
printLowerCase(StringRef String,raw_ostream & Out)900b57cec5SDimitry Andric void llvm::printLowerCase(StringRef String, raw_ostream &Out) {
910b57cec5SDimitry Andric for (const char C : String)
920b57cec5SDimitry Andric Out << toLower(C);
930b57cec5SDimitry Andric }
945ffd83dbSDimitry Andric
convertToSnakeFromCamelCase(StringRef input)955ffd83dbSDimitry Andric std::string llvm::convertToSnakeFromCamelCase(StringRef input) {
965ffd83dbSDimitry Andric if (input.empty())
975ffd83dbSDimitry Andric return "";
985ffd83dbSDimitry Andric
995ffd83dbSDimitry Andric std::string snakeCase;
1005ffd83dbSDimitry Andric snakeCase.reserve(input.size());
101*5f757f3fSDimitry Andric auto check = [&input](size_t j, function_ref<bool(int)> predicate) {
102*5f757f3fSDimitry Andric return j < input.size() && predicate(input[j]);
103*5f757f3fSDimitry Andric };
104*5f757f3fSDimitry Andric for (size_t i = 0; i < input.size(); ++i) {
105*5f757f3fSDimitry Andric snakeCase.push_back(tolower(input[i]));
106*5f757f3fSDimitry Andric // Handles "runs" of capitals, such as in OPName -> op_name.
107*5f757f3fSDimitry Andric if (check(i, isupper) && check(i + 1, isupper) && check(i + 2, islower))
1085ffd83dbSDimitry Andric snakeCase.push_back('_');
109*5f757f3fSDimitry Andric if ((check(i, islower) || check(i, isdigit)) && check(i + 1, isupper))
110*5f757f3fSDimitry Andric snakeCase.push_back('_');
1115ffd83dbSDimitry Andric }
1125ffd83dbSDimitry Andric return snakeCase;
1135ffd83dbSDimitry Andric }
1145ffd83dbSDimitry Andric
convertToCamelFromSnakeCase(StringRef input,bool capitalizeFirst)1155ffd83dbSDimitry Andric std::string llvm::convertToCamelFromSnakeCase(StringRef input,
1165ffd83dbSDimitry Andric bool capitalizeFirst) {
1175ffd83dbSDimitry Andric if (input.empty())
1185ffd83dbSDimitry Andric return "";
1195ffd83dbSDimitry Andric
1205ffd83dbSDimitry Andric std::string output;
1215ffd83dbSDimitry Andric output.reserve(input.size());
1225ffd83dbSDimitry Andric
1235ffd83dbSDimitry Andric // Push the first character, capatilizing if necessary.
1245ffd83dbSDimitry Andric if (capitalizeFirst && std::islower(input.front()))
1255ffd83dbSDimitry Andric output.push_back(llvm::toUpper(input.front()));
1265ffd83dbSDimitry Andric else
1275ffd83dbSDimitry Andric output.push_back(input.front());
1285ffd83dbSDimitry Andric
1295ffd83dbSDimitry Andric // Walk the input converting any `*_[a-z]` snake case into `*[A-Z]` camelCase.
1305ffd83dbSDimitry Andric for (size_t pos = 1, e = input.size(); pos < e; ++pos) {
1315ffd83dbSDimitry Andric if (input[pos] == '_' && pos != (e - 1) && std::islower(input[pos + 1]))
1325ffd83dbSDimitry Andric output.push_back(llvm::toUpper(input[++pos]));
1335ffd83dbSDimitry Andric else
1345ffd83dbSDimitry Andric output.push_back(input[pos]);
1355ffd83dbSDimitry Andric }
1365ffd83dbSDimitry Andric return output;
1375ffd83dbSDimitry Andric }
138