1*bdd1243dSDimitry Andric //===--- IntegerLiteralSeparatorFixer.cpp -----------------------*- C++ -*-===// 2*bdd1243dSDimitry Andric // 3*bdd1243dSDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4*bdd1243dSDimitry Andric // See https://llvm.org/LICENSE.txt for license information. 5*bdd1243dSDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6*bdd1243dSDimitry Andric // 7*bdd1243dSDimitry Andric //===----------------------------------------------------------------------===// 8*bdd1243dSDimitry Andric /// 9*bdd1243dSDimitry Andric /// \file 10*bdd1243dSDimitry Andric /// This file implements IntegerLiteralSeparatorFixer that fixes C++ integer 11*bdd1243dSDimitry Andric /// literal separators. 12*bdd1243dSDimitry Andric /// 13*bdd1243dSDimitry Andric //===----------------------------------------------------------------------===// 14*bdd1243dSDimitry Andric 15*bdd1243dSDimitry Andric #include "IntegerLiteralSeparatorFixer.h" 16*bdd1243dSDimitry Andric 17*bdd1243dSDimitry Andric namespace clang { 18*bdd1243dSDimitry Andric namespace format { 19*bdd1243dSDimitry Andric 20*bdd1243dSDimitry Andric enum class Base { Binary, Decimal, Hex, Other }; 21*bdd1243dSDimitry Andric 22*bdd1243dSDimitry Andric static Base getBase(const StringRef IntegerLiteral) { 23*bdd1243dSDimitry Andric assert(IntegerLiteral.size() > 1); 24*bdd1243dSDimitry Andric 25*bdd1243dSDimitry Andric if (IntegerLiteral[0] > '0') { 26*bdd1243dSDimitry Andric assert(IntegerLiteral[0] <= '9'); 27*bdd1243dSDimitry Andric return Base::Decimal; 28*bdd1243dSDimitry Andric } 29*bdd1243dSDimitry Andric 30*bdd1243dSDimitry Andric assert(IntegerLiteral[0] == '0'); 31*bdd1243dSDimitry Andric 32*bdd1243dSDimitry Andric switch (IntegerLiteral[1]) { 33*bdd1243dSDimitry Andric case 'b': 34*bdd1243dSDimitry Andric case 'B': 35*bdd1243dSDimitry Andric return Base::Binary; 36*bdd1243dSDimitry Andric case 'x': 37*bdd1243dSDimitry Andric case 'X': 38*bdd1243dSDimitry Andric return Base::Hex; 39*bdd1243dSDimitry Andric default: 40*bdd1243dSDimitry Andric return Base::Other; 41*bdd1243dSDimitry Andric } 42*bdd1243dSDimitry Andric } 43*bdd1243dSDimitry Andric 44*bdd1243dSDimitry Andric std::pair<tooling::Replacements, unsigned> 45*bdd1243dSDimitry Andric IntegerLiteralSeparatorFixer::process(const Environment &Env, 46*bdd1243dSDimitry Andric const FormatStyle &Style) { 47*bdd1243dSDimitry Andric switch (Style.Language) { 48*bdd1243dSDimitry Andric case FormatStyle::LK_Cpp: 49*bdd1243dSDimitry Andric case FormatStyle::LK_ObjC: 50*bdd1243dSDimitry Andric Separator = '\''; 51*bdd1243dSDimitry Andric break; 52*bdd1243dSDimitry Andric case FormatStyle::LK_CSharp: 53*bdd1243dSDimitry Andric case FormatStyle::LK_Java: 54*bdd1243dSDimitry Andric case FormatStyle::LK_JavaScript: 55*bdd1243dSDimitry Andric Separator = '_'; 56*bdd1243dSDimitry Andric break; 57*bdd1243dSDimitry Andric default: 58*bdd1243dSDimitry Andric return {}; 59*bdd1243dSDimitry Andric } 60*bdd1243dSDimitry Andric 61*bdd1243dSDimitry Andric const auto &Option = Style.IntegerLiteralSeparator; 62*bdd1243dSDimitry Andric const auto Binary = Option.Binary; 63*bdd1243dSDimitry Andric const auto Decimal = Option.Decimal; 64*bdd1243dSDimitry Andric const auto Hex = Option.Hex; 65*bdd1243dSDimitry Andric const bool SkipBinary = Binary == 0; 66*bdd1243dSDimitry Andric const bool SkipDecimal = Decimal == 0; 67*bdd1243dSDimitry Andric const bool SkipHex = Hex == 0; 68*bdd1243dSDimitry Andric 69*bdd1243dSDimitry Andric if (SkipBinary && SkipDecimal && SkipHex) 70*bdd1243dSDimitry Andric return {}; 71*bdd1243dSDimitry Andric 72*bdd1243dSDimitry Andric const auto &SourceMgr = Env.getSourceManager(); 73*bdd1243dSDimitry Andric AffectedRangeManager AffectedRangeMgr(SourceMgr, Env.getCharRanges()); 74*bdd1243dSDimitry Andric 75*bdd1243dSDimitry Andric const auto ID = Env.getFileID(); 76*bdd1243dSDimitry Andric const auto LangOpts = getFormattingLangOpts(Style); 77*bdd1243dSDimitry Andric Lexer Lex(ID, SourceMgr.getBufferOrFake(ID), SourceMgr, LangOpts); 78*bdd1243dSDimitry Andric Lex.SetCommentRetentionState(true); 79*bdd1243dSDimitry Andric 80*bdd1243dSDimitry Andric Token Tok; 81*bdd1243dSDimitry Andric tooling::Replacements Result; 82*bdd1243dSDimitry Andric 83*bdd1243dSDimitry Andric for (bool Skip = false; !Lex.LexFromRawLexer(Tok);) { 84*bdd1243dSDimitry Andric auto Length = Tok.getLength(); 85*bdd1243dSDimitry Andric if (Length < 2) 86*bdd1243dSDimitry Andric continue; 87*bdd1243dSDimitry Andric auto Location = Tok.getLocation(); 88*bdd1243dSDimitry Andric auto Text = StringRef(SourceMgr.getCharacterData(Location), Length); 89*bdd1243dSDimitry Andric if (Tok.is(tok::comment)) { 90*bdd1243dSDimitry Andric if (Text == "// clang-format off" || Text == "/* clang-format off */") 91*bdd1243dSDimitry Andric Skip = true; 92*bdd1243dSDimitry Andric else if (Text == "// clang-format on" || Text == "/* clang-format on */") 93*bdd1243dSDimitry Andric Skip = false; 94*bdd1243dSDimitry Andric continue; 95*bdd1243dSDimitry Andric } 96*bdd1243dSDimitry Andric if (Skip || Tok.isNot(tok::numeric_constant) || Text[0] == '.' || 97*bdd1243dSDimitry Andric !AffectedRangeMgr.affectsCharSourceRange( 98*bdd1243dSDimitry Andric CharSourceRange::getCharRange(Location, Tok.getEndLoc()))) { 99*bdd1243dSDimitry Andric continue; 100*bdd1243dSDimitry Andric } 101*bdd1243dSDimitry Andric const auto B = getBase(Text); 102*bdd1243dSDimitry Andric const bool IsBase2 = B == Base::Binary; 103*bdd1243dSDimitry Andric const bool IsBase10 = B == Base::Decimal; 104*bdd1243dSDimitry Andric const bool IsBase16 = B == Base::Hex; 105*bdd1243dSDimitry Andric if ((IsBase2 && SkipBinary) || (IsBase10 && SkipDecimal) || 106*bdd1243dSDimitry Andric (IsBase16 && SkipHex) || B == Base::Other) { 107*bdd1243dSDimitry Andric continue; 108*bdd1243dSDimitry Andric } 109*bdd1243dSDimitry Andric if ((IsBase10 && Text.find_last_of(".eEfFdDmM") != StringRef::npos) || 110*bdd1243dSDimitry Andric (IsBase16 && Text.find_last_of(".pP") != StringRef::npos)) { 111*bdd1243dSDimitry Andric continue; 112*bdd1243dSDimitry Andric } 113*bdd1243dSDimitry Andric if (((IsBase2 && Binary < 0) || (IsBase10 && Decimal < 0) || 114*bdd1243dSDimitry Andric (IsBase16 && Hex < 0)) && 115*bdd1243dSDimitry Andric Text.find(Separator) == StringRef::npos) { 116*bdd1243dSDimitry Andric continue; 117*bdd1243dSDimitry Andric } 118*bdd1243dSDimitry Andric const auto Start = Text[0] == '0' ? 2 : 0; 119*bdd1243dSDimitry Andric auto End = Text.find_first_of("uUlLzZn"); 120*bdd1243dSDimitry Andric if (End == StringRef::npos) 121*bdd1243dSDimitry Andric End = Length; 122*bdd1243dSDimitry Andric if (Start > 0 || End < Length) { 123*bdd1243dSDimitry Andric Length = End - Start; 124*bdd1243dSDimitry Andric Text = Text.substr(Start, Length); 125*bdd1243dSDimitry Andric } 126*bdd1243dSDimitry Andric auto DigitsPerGroup = Decimal; 127*bdd1243dSDimitry Andric if (IsBase2) 128*bdd1243dSDimitry Andric DigitsPerGroup = Binary; 129*bdd1243dSDimitry Andric else if (IsBase16) 130*bdd1243dSDimitry Andric DigitsPerGroup = Hex; 131*bdd1243dSDimitry Andric if (DigitsPerGroup > 0 && checkSeparator(Text, DigitsPerGroup)) 132*bdd1243dSDimitry Andric continue; 133*bdd1243dSDimitry Andric if (Start > 0) 134*bdd1243dSDimitry Andric Location = Location.getLocWithOffset(Start); 135*bdd1243dSDimitry Andric cantFail(Result.add(tooling::Replacement(SourceMgr, Location, Length, 136*bdd1243dSDimitry Andric format(Text, DigitsPerGroup)))); 137*bdd1243dSDimitry Andric } 138*bdd1243dSDimitry Andric 139*bdd1243dSDimitry Andric return {Result, 0}; 140*bdd1243dSDimitry Andric } 141*bdd1243dSDimitry Andric 142*bdd1243dSDimitry Andric bool IntegerLiteralSeparatorFixer::checkSeparator( 143*bdd1243dSDimitry Andric const StringRef IntegerLiteral, int DigitsPerGroup) const { 144*bdd1243dSDimitry Andric assert(DigitsPerGroup > 0); 145*bdd1243dSDimitry Andric 146*bdd1243dSDimitry Andric int I = 0; 147*bdd1243dSDimitry Andric for (auto C : llvm::reverse(IntegerLiteral)) { 148*bdd1243dSDimitry Andric if (C == Separator) { 149*bdd1243dSDimitry Andric if (I < DigitsPerGroup) 150*bdd1243dSDimitry Andric return false; 151*bdd1243dSDimitry Andric I = 0; 152*bdd1243dSDimitry Andric } else { 153*bdd1243dSDimitry Andric ++I; 154*bdd1243dSDimitry Andric if (I == DigitsPerGroup) 155*bdd1243dSDimitry Andric return false; 156*bdd1243dSDimitry Andric } 157*bdd1243dSDimitry Andric } 158*bdd1243dSDimitry Andric 159*bdd1243dSDimitry Andric return true; 160*bdd1243dSDimitry Andric } 161*bdd1243dSDimitry Andric 162*bdd1243dSDimitry Andric std::string IntegerLiteralSeparatorFixer::format(const StringRef IntegerLiteral, 163*bdd1243dSDimitry Andric int DigitsPerGroup) const { 164*bdd1243dSDimitry Andric assert(DigitsPerGroup != 0); 165*bdd1243dSDimitry Andric 166*bdd1243dSDimitry Andric std::string Formatted; 167*bdd1243dSDimitry Andric 168*bdd1243dSDimitry Andric if (DigitsPerGroup < 0) { 169*bdd1243dSDimitry Andric for (auto C : IntegerLiteral) 170*bdd1243dSDimitry Andric if (C != Separator) 171*bdd1243dSDimitry Andric Formatted.push_back(C); 172*bdd1243dSDimitry Andric return Formatted; 173*bdd1243dSDimitry Andric } 174*bdd1243dSDimitry Andric 175*bdd1243dSDimitry Andric int DigitCount = 0; 176*bdd1243dSDimitry Andric for (auto C : IntegerLiteral) 177*bdd1243dSDimitry Andric if (C != Separator) 178*bdd1243dSDimitry Andric ++DigitCount; 179*bdd1243dSDimitry Andric 180*bdd1243dSDimitry Andric int Remainder = DigitCount % DigitsPerGroup; 181*bdd1243dSDimitry Andric 182*bdd1243dSDimitry Andric int I = 0; 183*bdd1243dSDimitry Andric for (auto C : IntegerLiteral) { 184*bdd1243dSDimitry Andric if (C == Separator) 185*bdd1243dSDimitry Andric continue; 186*bdd1243dSDimitry Andric if (I == (Remainder > 0 ? Remainder : DigitsPerGroup)) { 187*bdd1243dSDimitry Andric Formatted.push_back(Separator); 188*bdd1243dSDimitry Andric I = 0; 189*bdd1243dSDimitry Andric Remainder = 0; 190*bdd1243dSDimitry Andric } 191*bdd1243dSDimitry Andric Formatted.push_back(C); 192*bdd1243dSDimitry Andric ++I; 193*bdd1243dSDimitry Andric } 194*bdd1243dSDimitry Andric 195*bdd1243dSDimitry Andric return Formatted; 196*bdd1243dSDimitry Andric } 197*bdd1243dSDimitry Andric 198*bdd1243dSDimitry Andric } // namespace format 199*bdd1243dSDimitry Andric } // namespace clang 200