xref: /freebsd-src/contrib/llvm-project/clang/lib/Format/IntegerLiteralSeparatorFixer.cpp (revision 06c3fb2749bda94cb5201f81ffdb8fa6c3161b2e)
1bdd1243dSDimitry Andric //===--- IntegerLiteralSeparatorFixer.cpp -----------------------*- C++ -*-===//
2bdd1243dSDimitry Andric //
3bdd1243dSDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4bdd1243dSDimitry Andric // See https://llvm.org/LICENSE.txt for license information.
5bdd1243dSDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6bdd1243dSDimitry Andric //
7bdd1243dSDimitry Andric //===----------------------------------------------------------------------===//
8bdd1243dSDimitry Andric ///
9bdd1243dSDimitry Andric /// \file
10bdd1243dSDimitry Andric /// This file implements IntegerLiteralSeparatorFixer that fixes C++ integer
11bdd1243dSDimitry Andric /// literal separators.
12bdd1243dSDimitry Andric ///
13bdd1243dSDimitry Andric //===----------------------------------------------------------------------===//
14bdd1243dSDimitry Andric 
15bdd1243dSDimitry Andric #include "IntegerLiteralSeparatorFixer.h"
16bdd1243dSDimitry Andric 
17bdd1243dSDimitry Andric namespace clang {
18bdd1243dSDimitry Andric namespace format {
19bdd1243dSDimitry Andric 
20bdd1243dSDimitry Andric enum class Base { Binary, Decimal, Hex, Other };
21bdd1243dSDimitry Andric 
getBase(const StringRef IntegerLiteral)22bdd1243dSDimitry Andric static Base getBase(const StringRef IntegerLiteral) {
23bdd1243dSDimitry Andric   assert(IntegerLiteral.size() > 1);
24bdd1243dSDimitry Andric 
25bdd1243dSDimitry Andric   if (IntegerLiteral[0] > '0') {
26bdd1243dSDimitry Andric     assert(IntegerLiteral[0] <= '9');
27bdd1243dSDimitry Andric     return Base::Decimal;
28bdd1243dSDimitry Andric   }
29bdd1243dSDimitry Andric 
30bdd1243dSDimitry Andric   assert(IntegerLiteral[0] == '0');
31bdd1243dSDimitry Andric 
32bdd1243dSDimitry Andric   switch (IntegerLiteral[1]) {
33bdd1243dSDimitry Andric   case 'b':
34bdd1243dSDimitry Andric   case 'B':
35bdd1243dSDimitry Andric     return Base::Binary;
36bdd1243dSDimitry Andric   case 'x':
37bdd1243dSDimitry Andric   case 'X':
38bdd1243dSDimitry Andric     return Base::Hex;
39bdd1243dSDimitry Andric   default:
40bdd1243dSDimitry Andric     return Base::Other;
41bdd1243dSDimitry Andric   }
42bdd1243dSDimitry Andric }
43bdd1243dSDimitry Andric 
44bdd1243dSDimitry Andric std::pair<tooling::Replacements, unsigned>
process(const Environment & Env,const FormatStyle & Style)45bdd1243dSDimitry Andric IntegerLiteralSeparatorFixer::process(const Environment &Env,
46bdd1243dSDimitry Andric                                       const FormatStyle &Style) {
47bdd1243dSDimitry Andric   switch (Style.Language) {
48bdd1243dSDimitry Andric   case FormatStyle::LK_Cpp:
49bdd1243dSDimitry Andric   case FormatStyle::LK_ObjC:
50bdd1243dSDimitry Andric     Separator = '\'';
51bdd1243dSDimitry Andric     break;
52bdd1243dSDimitry Andric   case FormatStyle::LK_CSharp:
53bdd1243dSDimitry Andric   case FormatStyle::LK_Java:
54bdd1243dSDimitry Andric   case FormatStyle::LK_JavaScript:
55bdd1243dSDimitry Andric     Separator = '_';
56bdd1243dSDimitry Andric     break;
57bdd1243dSDimitry Andric   default:
58bdd1243dSDimitry Andric     return {};
59bdd1243dSDimitry Andric   }
60bdd1243dSDimitry Andric 
61bdd1243dSDimitry Andric   const auto &Option = Style.IntegerLiteralSeparator;
62bdd1243dSDimitry Andric   const auto Binary = Option.Binary;
63bdd1243dSDimitry Andric   const auto Decimal = Option.Decimal;
64bdd1243dSDimitry Andric   const auto Hex = Option.Hex;
65bdd1243dSDimitry Andric   const bool SkipBinary = Binary == 0;
66bdd1243dSDimitry Andric   const bool SkipDecimal = Decimal == 0;
67bdd1243dSDimitry Andric   const bool SkipHex = Hex == 0;
68bdd1243dSDimitry Andric 
69bdd1243dSDimitry Andric   if (SkipBinary && SkipDecimal && SkipHex)
70bdd1243dSDimitry Andric     return {};
71bdd1243dSDimitry Andric 
721ac55f4cSDimitry Andric   const auto BinaryMinDigits =
731ac55f4cSDimitry Andric       std::max((int)Option.BinaryMinDigits, Binary + 1);
741ac55f4cSDimitry Andric   const auto DecimalMinDigits =
751ac55f4cSDimitry Andric       std::max((int)Option.DecimalMinDigits, Decimal + 1);
761ac55f4cSDimitry Andric   const auto HexMinDigits = std::max((int)Option.HexMinDigits, Hex + 1);
771ac55f4cSDimitry Andric 
78bdd1243dSDimitry Andric   const auto &SourceMgr = Env.getSourceManager();
79bdd1243dSDimitry Andric   AffectedRangeManager AffectedRangeMgr(SourceMgr, Env.getCharRanges());
80bdd1243dSDimitry Andric 
81bdd1243dSDimitry Andric   const auto ID = Env.getFileID();
82bdd1243dSDimitry Andric   const auto LangOpts = getFormattingLangOpts(Style);
83bdd1243dSDimitry Andric   Lexer Lex(ID, SourceMgr.getBufferOrFake(ID), SourceMgr, LangOpts);
84bdd1243dSDimitry Andric   Lex.SetCommentRetentionState(true);
85bdd1243dSDimitry Andric 
86bdd1243dSDimitry Andric   Token Tok;
87bdd1243dSDimitry Andric   tooling::Replacements Result;
88bdd1243dSDimitry Andric 
89bdd1243dSDimitry Andric   for (bool Skip = false; !Lex.LexFromRawLexer(Tok);) {
90bdd1243dSDimitry Andric     auto Length = Tok.getLength();
91bdd1243dSDimitry Andric     if (Length < 2)
92bdd1243dSDimitry Andric       continue;
93bdd1243dSDimitry Andric     auto Location = Tok.getLocation();
94bdd1243dSDimitry Andric     auto Text = StringRef(SourceMgr.getCharacterData(Location), Length);
95bdd1243dSDimitry Andric     if (Tok.is(tok::comment)) {
96*06c3fb27SDimitry Andric       if (isClangFormatOff(Text))
97bdd1243dSDimitry Andric         Skip = true;
98*06c3fb27SDimitry Andric       else if (isClangFormatOn(Text))
99bdd1243dSDimitry Andric         Skip = false;
100bdd1243dSDimitry Andric       continue;
101bdd1243dSDimitry Andric     }
102bdd1243dSDimitry Andric     if (Skip || Tok.isNot(tok::numeric_constant) || Text[0] == '.' ||
103bdd1243dSDimitry Andric         !AffectedRangeMgr.affectsCharSourceRange(
104bdd1243dSDimitry Andric             CharSourceRange::getCharRange(Location, Tok.getEndLoc()))) {
105bdd1243dSDimitry Andric       continue;
106bdd1243dSDimitry Andric     }
107bdd1243dSDimitry Andric     const auto B = getBase(Text);
108bdd1243dSDimitry Andric     const bool IsBase2 = B == Base::Binary;
109bdd1243dSDimitry Andric     const bool IsBase10 = B == Base::Decimal;
110bdd1243dSDimitry Andric     const bool IsBase16 = B == Base::Hex;
111bdd1243dSDimitry Andric     if ((IsBase2 && SkipBinary) || (IsBase10 && SkipDecimal) ||
112bdd1243dSDimitry Andric         (IsBase16 && SkipHex) || B == Base::Other) {
113bdd1243dSDimitry Andric       continue;
114bdd1243dSDimitry Andric     }
1151ac55f4cSDimitry Andric     if (Style.isCpp()) {
1162efbaac7SDimitry Andric       // Hex alpha digits a-f/A-F must be at the end of the string literal.
1172efbaac7SDimitry Andric       StringRef Suffixes = "_himnsuyd";
1182efbaac7SDimitry Andric       if (const auto Pos =
1192efbaac7SDimitry Andric               Text.find_first_of(IsBase16 ? Suffixes.drop_back() : Suffixes);
1202efbaac7SDimitry Andric           Pos != StringRef::npos) {
1211ac55f4cSDimitry Andric         Text = Text.substr(0, Pos);
1221ac55f4cSDimitry Andric         Length = Pos;
1231ac55f4cSDimitry Andric       }
1241ac55f4cSDimitry Andric     }
125bdd1243dSDimitry Andric     if ((IsBase10 && Text.find_last_of(".eEfFdDmM") != StringRef::npos) ||
126bdd1243dSDimitry Andric         (IsBase16 && Text.find_last_of(".pP") != StringRef::npos)) {
127bdd1243dSDimitry Andric       continue;
128bdd1243dSDimitry Andric     }
129bdd1243dSDimitry Andric     const auto Start = Text[0] == '0' ? 2 : 0;
1301ac55f4cSDimitry Andric     auto End = Text.find_first_of("uUlLzZn", Start);
131bdd1243dSDimitry Andric     if (End == StringRef::npos)
132bdd1243dSDimitry Andric       End = Length;
133bdd1243dSDimitry Andric     if (Start > 0 || End < Length) {
134bdd1243dSDimitry Andric       Length = End - Start;
135bdd1243dSDimitry Andric       Text = Text.substr(Start, Length);
136bdd1243dSDimitry Andric     }
137bdd1243dSDimitry Andric     auto DigitsPerGroup = Decimal;
1381ac55f4cSDimitry Andric     auto MinDigits = DecimalMinDigits;
1391ac55f4cSDimitry Andric     if (IsBase2) {
140bdd1243dSDimitry Andric       DigitsPerGroup = Binary;
1411ac55f4cSDimitry Andric       MinDigits = BinaryMinDigits;
1421ac55f4cSDimitry Andric     } else if (IsBase16) {
143bdd1243dSDimitry Andric       DigitsPerGroup = Hex;
1441ac55f4cSDimitry Andric       MinDigits = HexMinDigits;
1451ac55f4cSDimitry Andric     }
1461ac55f4cSDimitry Andric     const auto SeparatorCount = Text.count(Separator);
1471ac55f4cSDimitry Andric     const int DigitCount = Length - SeparatorCount;
1481ac55f4cSDimitry Andric     const bool RemoveSeparator = DigitsPerGroup < 0 || DigitCount < MinDigits;
1491ac55f4cSDimitry Andric     if (RemoveSeparator && SeparatorCount == 0)
150bdd1243dSDimitry Andric       continue;
1511ac55f4cSDimitry Andric     if (!RemoveSeparator && SeparatorCount > 0 &&
1521ac55f4cSDimitry Andric         checkSeparator(Text, DigitsPerGroup)) {
1531ac55f4cSDimitry Andric       continue;
1541ac55f4cSDimitry Andric     }
1551ac55f4cSDimitry Andric     const auto &Formatted =
1561ac55f4cSDimitry Andric         format(Text, DigitsPerGroup, DigitCount, RemoveSeparator);
1571ac55f4cSDimitry Andric     assert(Formatted != Text);
158bdd1243dSDimitry Andric     if (Start > 0)
159bdd1243dSDimitry Andric       Location = Location.getLocWithOffset(Start);
1601ac55f4cSDimitry Andric     cantFail(Result.add(
1611ac55f4cSDimitry Andric         tooling::Replacement(SourceMgr, Location, Length, Formatted)));
162bdd1243dSDimitry Andric   }
163bdd1243dSDimitry Andric 
164bdd1243dSDimitry Andric   return {Result, 0};
165bdd1243dSDimitry Andric }
166bdd1243dSDimitry Andric 
checkSeparator(const StringRef IntegerLiteral,int DigitsPerGroup) const167bdd1243dSDimitry Andric bool IntegerLiteralSeparatorFixer::checkSeparator(
168bdd1243dSDimitry Andric     const StringRef IntegerLiteral, int DigitsPerGroup) const {
169bdd1243dSDimitry Andric   assert(DigitsPerGroup > 0);
170bdd1243dSDimitry Andric 
171bdd1243dSDimitry Andric   int I = 0;
172bdd1243dSDimitry Andric   for (auto C : llvm::reverse(IntegerLiteral)) {
173bdd1243dSDimitry Andric     if (C == Separator) {
174bdd1243dSDimitry Andric       if (I < DigitsPerGroup)
175bdd1243dSDimitry Andric         return false;
176bdd1243dSDimitry Andric       I = 0;
177bdd1243dSDimitry Andric     } else {
178bdd1243dSDimitry Andric       if (I == DigitsPerGroup)
179bdd1243dSDimitry Andric         return false;
1801ac55f4cSDimitry Andric       ++I;
181bdd1243dSDimitry Andric     }
182bdd1243dSDimitry Andric   }
183bdd1243dSDimitry Andric 
184bdd1243dSDimitry Andric   return true;
185bdd1243dSDimitry Andric }
186bdd1243dSDimitry Andric 
format(const StringRef IntegerLiteral,int DigitsPerGroup,int DigitCount,bool RemoveSeparator) const187bdd1243dSDimitry Andric std::string IntegerLiteralSeparatorFixer::format(const StringRef IntegerLiteral,
1881ac55f4cSDimitry Andric                                                  int DigitsPerGroup,
1891ac55f4cSDimitry Andric                                                  int DigitCount,
1901ac55f4cSDimitry Andric                                                  bool RemoveSeparator) const {
191bdd1243dSDimitry Andric   assert(DigitsPerGroup != 0);
192bdd1243dSDimitry Andric 
193bdd1243dSDimitry Andric   std::string Formatted;
194bdd1243dSDimitry Andric 
1951ac55f4cSDimitry Andric   if (RemoveSeparator) {
196bdd1243dSDimitry Andric     for (auto C : IntegerLiteral)
197bdd1243dSDimitry Andric       if (C != Separator)
198bdd1243dSDimitry Andric         Formatted.push_back(C);
199bdd1243dSDimitry Andric     return Formatted;
200bdd1243dSDimitry Andric   }
201bdd1243dSDimitry Andric 
202bdd1243dSDimitry Andric   int Remainder = DigitCount % DigitsPerGroup;
203bdd1243dSDimitry Andric 
204bdd1243dSDimitry Andric   int I = 0;
205bdd1243dSDimitry Andric   for (auto C : IntegerLiteral) {
206bdd1243dSDimitry Andric     if (C == Separator)
207bdd1243dSDimitry Andric       continue;
208bdd1243dSDimitry Andric     if (I == (Remainder > 0 ? Remainder : DigitsPerGroup)) {
209bdd1243dSDimitry Andric       Formatted.push_back(Separator);
210bdd1243dSDimitry Andric       I = 0;
211bdd1243dSDimitry Andric       Remainder = 0;
212bdd1243dSDimitry Andric     }
213bdd1243dSDimitry Andric     Formatted.push_back(C);
214bdd1243dSDimitry Andric     ++I;
215bdd1243dSDimitry Andric   }
216bdd1243dSDimitry Andric 
217bdd1243dSDimitry Andric   return Formatted;
218bdd1243dSDimitry Andric }
219bdd1243dSDimitry Andric 
220bdd1243dSDimitry Andric } // namespace format
221bdd1243dSDimitry Andric } // namespace clang
222