1bdd1243dSDimitry Andric //===--- IntegerLiteralSeparatorFixer.cpp -----------------------*- C++ -*-===// 2bdd1243dSDimitry Andric // 3bdd1243dSDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4bdd1243dSDimitry Andric // See https://llvm.org/LICENSE.txt for license information. 5bdd1243dSDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6bdd1243dSDimitry Andric // 7bdd1243dSDimitry Andric //===----------------------------------------------------------------------===// 8bdd1243dSDimitry Andric /// 9bdd1243dSDimitry Andric /// \file 10bdd1243dSDimitry Andric /// This file implements IntegerLiteralSeparatorFixer that fixes C++ integer 11bdd1243dSDimitry Andric /// literal separators. 12bdd1243dSDimitry Andric /// 13bdd1243dSDimitry Andric //===----------------------------------------------------------------------===// 14bdd1243dSDimitry Andric 15bdd1243dSDimitry Andric #include "IntegerLiteralSeparatorFixer.h" 16bdd1243dSDimitry Andric 17bdd1243dSDimitry Andric namespace clang { 18bdd1243dSDimitry Andric namespace format { 19bdd1243dSDimitry Andric 20bdd1243dSDimitry Andric enum class Base { Binary, Decimal, Hex, Other }; 21bdd1243dSDimitry Andric 22bdd1243dSDimitry Andric static Base getBase(const StringRef IntegerLiteral) { 23bdd1243dSDimitry Andric assert(IntegerLiteral.size() > 1); 24bdd1243dSDimitry Andric 25bdd1243dSDimitry Andric if (IntegerLiteral[0] > '0') { 26bdd1243dSDimitry Andric assert(IntegerLiteral[0] <= '9'); 27bdd1243dSDimitry Andric return Base::Decimal; 28bdd1243dSDimitry Andric } 29bdd1243dSDimitry Andric 30bdd1243dSDimitry Andric assert(IntegerLiteral[0] == '0'); 31bdd1243dSDimitry Andric 32bdd1243dSDimitry Andric switch (IntegerLiteral[1]) { 33bdd1243dSDimitry Andric case 'b': 34bdd1243dSDimitry Andric case 'B': 35bdd1243dSDimitry Andric return Base::Binary; 36bdd1243dSDimitry Andric case 'x': 37bdd1243dSDimitry Andric case 'X': 38bdd1243dSDimitry Andric return Base::Hex; 39bdd1243dSDimitry Andric default: 40bdd1243dSDimitry Andric return Base::Other; 41bdd1243dSDimitry Andric } 42bdd1243dSDimitry Andric } 43bdd1243dSDimitry Andric 44bdd1243dSDimitry Andric std::pair<tooling::Replacements, unsigned> 45bdd1243dSDimitry Andric IntegerLiteralSeparatorFixer::process(const Environment &Env, 46bdd1243dSDimitry Andric const FormatStyle &Style) { 47bdd1243dSDimitry Andric switch (Style.Language) { 48bdd1243dSDimitry Andric case FormatStyle::LK_Cpp: 49bdd1243dSDimitry Andric case FormatStyle::LK_ObjC: 50bdd1243dSDimitry Andric Separator = '\''; 51bdd1243dSDimitry Andric break; 52bdd1243dSDimitry Andric case FormatStyle::LK_CSharp: 53bdd1243dSDimitry Andric case FormatStyle::LK_Java: 54bdd1243dSDimitry Andric case FormatStyle::LK_JavaScript: 55bdd1243dSDimitry Andric Separator = '_'; 56bdd1243dSDimitry Andric break; 57bdd1243dSDimitry Andric default: 58bdd1243dSDimitry Andric return {}; 59bdd1243dSDimitry Andric } 60bdd1243dSDimitry Andric 61bdd1243dSDimitry Andric const auto &Option = Style.IntegerLiteralSeparator; 62bdd1243dSDimitry Andric const auto Binary = Option.Binary; 63bdd1243dSDimitry Andric const auto Decimal = Option.Decimal; 64bdd1243dSDimitry Andric const auto Hex = Option.Hex; 65bdd1243dSDimitry Andric const bool SkipBinary = Binary == 0; 66bdd1243dSDimitry Andric const bool SkipDecimal = Decimal == 0; 67bdd1243dSDimitry Andric const bool SkipHex = Hex == 0; 68bdd1243dSDimitry Andric 69bdd1243dSDimitry Andric if (SkipBinary && SkipDecimal && SkipHex) 70bdd1243dSDimitry Andric return {}; 71bdd1243dSDimitry Andric 721ac55f4cSDimitry Andric const auto BinaryMinDigits = 731ac55f4cSDimitry Andric std::max((int)Option.BinaryMinDigits, Binary + 1); 741ac55f4cSDimitry Andric const auto DecimalMinDigits = 751ac55f4cSDimitry Andric std::max((int)Option.DecimalMinDigits, Decimal + 1); 761ac55f4cSDimitry Andric const auto HexMinDigits = std::max((int)Option.HexMinDigits, Hex + 1); 771ac55f4cSDimitry Andric 78bdd1243dSDimitry Andric const auto &SourceMgr = Env.getSourceManager(); 79bdd1243dSDimitry Andric AffectedRangeManager AffectedRangeMgr(SourceMgr, Env.getCharRanges()); 80bdd1243dSDimitry Andric 81bdd1243dSDimitry Andric const auto ID = Env.getFileID(); 82bdd1243dSDimitry Andric const auto LangOpts = getFormattingLangOpts(Style); 83bdd1243dSDimitry Andric Lexer Lex(ID, SourceMgr.getBufferOrFake(ID), SourceMgr, LangOpts); 84bdd1243dSDimitry Andric Lex.SetCommentRetentionState(true); 85bdd1243dSDimitry Andric 86bdd1243dSDimitry Andric Token Tok; 87bdd1243dSDimitry Andric tooling::Replacements Result; 88bdd1243dSDimitry Andric 89bdd1243dSDimitry Andric for (bool Skip = false; !Lex.LexFromRawLexer(Tok);) { 90bdd1243dSDimitry Andric auto Length = Tok.getLength(); 91bdd1243dSDimitry Andric if (Length < 2) 92bdd1243dSDimitry Andric continue; 93bdd1243dSDimitry Andric auto Location = Tok.getLocation(); 94bdd1243dSDimitry Andric auto Text = StringRef(SourceMgr.getCharacterData(Location), Length); 95bdd1243dSDimitry Andric if (Tok.is(tok::comment)) { 96bdd1243dSDimitry Andric if (Text == "// clang-format off" || Text == "/* clang-format off */") 97bdd1243dSDimitry Andric Skip = true; 98bdd1243dSDimitry Andric else if (Text == "// clang-format on" || Text == "/* clang-format on */") 99bdd1243dSDimitry Andric Skip = false; 100bdd1243dSDimitry Andric continue; 101bdd1243dSDimitry Andric } 102bdd1243dSDimitry Andric if (Skip || Tok.isNot(tok::numeric_constant) || Text[0] == '.' || 103bdd1243dSDimitry Andric !AffectedRangeMgr.affectsCharSourceRange( 104bdd1243dSDimitry Andric CharSourceRange::getCharRange(Location, Tok.getEndLoc()))) { 105bdd1243dSDimitry Andric continue; 106bdd1243dSDimitry Andric } 107bdd1243dSDimitry Andric const auto B = getBase(Text); 108bdd1243dSDimitry Andric const bool IsBase2 = B == Base::Binary; 109bdd1243dSDimitry Andric const bool IsBase10 = B == Base::Decimal; 110bdd1243dSDimitry Andric const bool IsBase16 = B == Base::Hex; 111bdd1243dSDimitry Andric if ((IsBase2 && SkipBinary) || (IsBase10 && SkipDecimal) || 112bdd1243dSDimitry Andric (IsBase16 && SkipHex) || B == Base::Other) { 113bdd1243dSDimitry Andric continue; 114bdd1243dSDimitry Andric } 1151ac55f4cSDimitry Andric if (Style.isCpp()) { 116*2efbaac7SDimitry Andric // Hex alpha digits a-f/A-F must be at the end of the string literal. 117*2efbaac7SDimitry Andric StringRef Suffixes = "_himnsuyd"; 118*2efbaac7SDimitry Andric if (const auto Pos = 119*2efbaac7SDimitry Andric Text.find_first_of(IsBase16 ? Suffixes.drop_back() : Suffixes); 120*2efbaac7SDimitry Andric Pos != StringRef::npos) { 1211ac55f4cSDimitry Andric Text = Text.substr(0, Pos); 1221ac55f4cSDimitry Andric Length = Pos; 1231ac55f4cSDimitry Andric } 1241ac55f4cSDimitry Andric } 125bdd1243dSDimitry Andric if ((IsBase10 && Text.find_last_of(".eEfFdDmM") != StringRef::npos) || 126bdd1243dSDimitry Andric (IsBase16 && Text.find_last_of(".pP") != StringRef::npos)) { 127bdd1243dSDimitry Andric continue; 128bdd1243dSDimitry Andric } 129bdd1243dSDimitry Andric const auto Start = Text[0] == '0' ? 2 : 0; 1301ac55f4cSDimitry Andric auto End = Text.find_first_of("uUlLzZn", Start); 131bdd1243dSDimitry Andric if (End == StringRef::npos) 132bdd1243dSDimitry Andric End = Length; 133bdd1243dSDimitry Andric if (Start > 0 || End < Length) { 134bdd1243dSDimitry Andric Length = End - Start; 135bdd1243dSDimitry Andric Text = Text.substr(Start, Length); 136bdd1243dSDimitry Andric } 137bdd1243dSDimitry Andric auto DigitsPerGroup = Decimal; 1381ac55f4cSDimitry Andric auto MinDigits = DecimalMinDigits; 1391ac55f4cSDimitry Andric if (IsBase2) { 140bdd1243dSDimitry Andric DigitsPerGroup = Binary; 1411ac55f4cSDimitry Andric MinDigits = BinaryMinDigits; 1421ac55f4cSDimitry Andric } else if (IsBase16) { 143bdd1243dSDimitry Andric DigitsPerGroup = Hex; 1441ac55f4cSDimitry Andric MinDigits = HexMinDigits; 1451ac55f4cSDimitry Andric } 1461ac55f4cSDimitry Andric const auto SeparatorCount = Text.count(Separator); 1471ac55f4cSDimitry Andric const int DigitCount = Length - SeparatorCount; 1481ac55f4cSDimitry Andric const bool RemoveSeparator = DigitsPerGroup < 0 || DigitCount < MinDigits; 1491ac55f4cSDimitry Andric if (RemoveSeparator && SeparatorCount == 0) 150bdd1243dSDimitry Andric continue; 1511ac55f4cSDimitry Andric if (!RemoveSeparator && SeparatorCount > 0 && 1521ac55f4cSDimitry Andric checkSeparator(Text, DigitsPerGroup)) { 1531ac55f4cSDimitry Andric continue; 1541ac55f4cSDimitry Andric } 1551ac55f4cSDimitry Andric const auto &Formatted = 1561ac55f4cSDimitry Andric format(Text, DigitsPerGroup, DigitCount, RemoveSeparator); 1571ac55f4cSDimitry Andric assert(Formatted != Text); 158bdd1243dSDimitry Andric if (Start > 0) 159bdd1243dSDimitry Andric Location = Location.getLocWithOffset(Start); 1601ac55f4cSDimitry Andric cantFail(Result.add( 1611ac55f4cSDimitry Andric tooling::Replacement(SourceMgr, Location, Length, Formatted))); 162bdd1243dSDimitry Andric } 163bdd1243dSDimitry Andric 164bdd1243dSDimitry Andric return {Result, 0}; 165bdd1243dSDimitry Andric } 166bdd1243dSDimitry Andric 167bdd1243dSDimitry Andric bool IntegerLiteralSeparatorFixer::checkSeparator( 168bdd1243dSDimitry Andric const StringRef IntegerLiteral, int DigitsPerGroup) const { 169bdd1243dSDimitry Andric assert(DigitsPerGroup > 0); 170bdd1243dSDimitry Andric 171bdd1243dSDimitry Andric int I = 0; 172bdd1243dSDimitry Andric for (auto C : llvm::reverse(IntegerLiteral)) { 173bdd1243dSDimitry Andric if (C == Separator) { 174bdd1243dSDimitry Andric if (I < DigitsPerGroup) 175bdd1243dSDimitry Andric return false; 176bdd1243dSDimitry Andric I = 0; 177bdd1243dSDimitry Andric } else { 178bdd1243dSDimitry Andric if (I == DigitsPerGroup) 179bdd1243dSDimitry Andric return false; 1801ac55f4cSDimitry Andric ++I; 181bdd1243dSDimitry Andric } 182bdd1243dSDimitry Andric } 183bdd1243dSDimitry Andric 184bdd1243dSDimitry Andric return true; 185bdd1243dSDimitry Andric } 186bdd1243dSDimitry Andric 187bdd1243dSDimitry Andric std::string IntegerLiteralSeparatorFixer::format(const StringRef IntegerLiteral, 1881ac55f4cSDimitry Andric int DigitsPerGroup, 1891ac55f4cSDimitry Andric int DigitCount, 1901ac55f4cSDimitry Andric bool RemoveSeparator) const { 191bdd1243dSDimitry Andric assert(DigitsPerGroup != 0); 192bdd1243dSDimitry Andric 193bdd1243dSDimitry Andric std::string Formatted; 194bdd1243dSDimitry Andric 1951ac55f4cSDimitry Andric if (RemoveSeparator) { 196bdd1243dSDimitry Andric for (auto C : IntegerLiteral) 197bdd1243dSDimitry Andric if (C != Separator) 198bdd1243dSDimitry Andric Formatted.push_back(C); 199bdd1243dSDimitry Andric return Formatted; 200bdd1243dSDimitry Andric } 201bdd1243dSDimitry Andric 202bdd1243dSDimitry Andric int Remainder = DigitCount % DigitsPerGroup; 203bdd1243dSDimitry Andric 204bdd1243dSDimitry Andric int I = 0; 205bdd1243dSDimitry Andric for (auto C : IntegerLiteral) { 206bdd1243dSDimitry Andric if (C == Separator) 207bdd1243dSDimitry Andric continue; 208bdd1243dSDimitry Andric if (I == (Remainder > 0 ? Remainder : DigitsPerGroup)) { 209bdd1243dSDimitry Andric Formatted.push_back(Separator); 210bdd1243dSDimitry Andric I = 0; 211bdd1243dSDimitry Andric Remainder = 0; 212bdd1243dSDimitry Andric } 213bdd1243dSDimitry Andric Formatted.push_back(C); 214bdd1243dSDimitry Andric ++I; 215bdd1243dSDimitry Andric } 216bdd1243dSDimitry Andric 217bdd1243dSDimitry Andric return Formatted; 218bdd1243dSDimitry Andric } 219bdd1243dSDimitry Andric 220bdd1243dSDimitry Andric } // namespace format 221bdd1243dSDimitry Andric } // namespace clang 222