xref: /openbsd-src/gnu/llvm/clang/lib/Format/IntegerLiteralSeparatorFixer.cpp (revision 12c855180aad702bbcca06e0398d774beeafb155)
1*12c85518Srobert //===--- IntegerLiteralSeparatorFixer.cpp -----------------------*- C++ -*-===//
2*12c85518Srobert //
3*12c85518Srobert // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4*12c85518Srobert // See https://llvm.org/LICENSE.txt for license information.
5*12c85518Srobert // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6*12c85518Srobert //
7*12c85518Srobert //===----------------------------------------------------------------------===//
8*12c85518Srobert ///
9*12c85518Srobert /// \file
10*12c85518Srobert /// This file implements IntegerLiteralSeparatorFixer that fixes C++ integer
11*12c85518Srobert /// literal separators.
12*12c85518Srobert ///
13*12c85518Srobert //===----------------------------------------------------------------------===//
14*12c85518Srobert 
15*12c85518Srobert #include "IntegerLiteralSeparatorFixer.h"
16*12c85518Srobert 
17*12c85518Srobert namespace clang {
18*12c85518Srobert namespace format {
19*12c85518Srobert 
20*12c85518Srobert enum class Base { Binary, Decimal, Hex, Other };
21*12c85518Srobert 
getBase(const StringRef IntegerLiteral)22*12c85518Srobert static Base getBase(const StringRef IntegerLiteral) {
23*12c85518Srobert   assert(IntegerLiteral.size() > 1);
24*12c85518Srobert 
25*12c85518Srobert   if (IntegerLiteral[0] > '0') {
26*12c85518Srobert     assert(IntegerLiteral[0] <= '9');
27*12c85518Srobert     return Base::Decimal;
28*12c85518Srobert   }
29*12c85518Srobert 
30*12c85518Srobert   assert(IntegerLiteral[0] == '0');
31*12c85518Srobert 
32*12c85518Srobert   switch (IntegerLiteral[1]) {
33*12c85518Srobert   case 'b':
34*12c85518Srobert   case 'B':
35*12c85518Srobert     return Base::Binary;
36*12c85518Srobert   case 'x':
37*12c85518Srobert   case 'X':
38*12c85518Srobert     return Base::Hex;
39*12c85518Srobert   default:
40*12c85518Srobert     return Base::Other;
41*12c85518Srobert   }
42*12c85518Srobert }
43*12c85518Srobert 
44*12c85518Srobert std::pair<tooling::Replacements, unsigned>
process(const Environment & Env,const FormatStyle & Style)45*12c85518Srobert IntegerLiteralSeparatorFixer::process(const Environment &Env,
46*12c85518Srobert                                       const FormatStyle &Style) {
47*12c85518Srobert   switch (Style.Language) {
48*12c85518Srobert   case FormatStyle::LK_Cpp:
49*12c85518Srobert   case FormatStyle::LK_ObjC:
50*12c85518Srobert     Separator = '\'';
51*12c85518Srobert     break;
52*12c85518Srobert   case FormatStyle::LK_CSharp:
53*12c85518Srobert   case FormatStyle::LK_Java:
54*12c85518Srobert   case FormatStyle::LK_JavaScript:
55*12c85518Srobert     Separator = '_';
56*12c85518Srobert     break;
57*12c85518Srobert   default:
58*12c85518Srobert     return {};
59*12c85518Srobert   }
60*12c85518Srobert 
61*12c85518Srobert   const auto &Option = Style.IntegerLiteralSeparator;
62*12c85518Srobert   const auto Binary = Option.Binary;
63*12c85518Srobert   const auto Decimal = Option.Decimal;
64*12c85518Srobert   const auto Hex = Option.Hex;
65*12c85518Srobert   const bool SkipBinary = Binary == 0;
66*12c85518Srobert   const bool SkipDecimal = Decimal == 0;
67*12c85518Srobert   const bool SkipHex = Hex == 0;
68*12c85518Srobert 
69*12c85518Srobert   if (SkipBinary && SkipDecimal && SkipHex)
70*12c85518Srobert     return {};
71*12c85518Srobert 
72*12c85518Srobert   const auto BinaryMinDigits =
73*12c85518Srobert       std::max((int)Option.BinaryMinDigits, Binary + 1);
74*12c85518Srobert   const auto DecimalMinDigits =
75*12c85518Srobert       std::max((int)Option.DecimalMinDigits, Decimal + 1);
76*12c85518Srobert   const auto HexMinDigits = std::max((int)Option.HexMinDigits, Hex + 1);
77*12c85518Srobert 
78*12c85518Srobert   const auto &SourceMgr = Env.getSourceManager();
79*12c85518Srobert   AffectedRangeManager AffectedRangeMgr(SourceMgr, Env.getCharRanges());
80*12c85518Srobert 
81*12c85518Srobert   const auto ID = Env.getFileID();
82*12c85518Srobert   const auto LangOpts = getFormattingLangOpts(Style);
83*12c85518Srobert   Lexer Lex(ID, SourceMgr.getBufferOrFake(ID), SourceMgr, LangOpts);
84*12c85518Srobert   Lex.SetCommentRetentionState(true);
85*12c85518Srobert 
86*12c85518Srobert   Token Tok;
87*12c85518Srobert   tooling::Replacements Result;
88*12c85518Srobert 
89*12c85518Srobert   for (bool Skip = false; !Lex.LexFromRawLexer(Tok);) {
90*12c85518Srobert     auto Length = Tok.getLength();
91*12c85518Srobert     if (Length < 2)
92*12c85518Srobert       continue;
93*12c85518Srobert     auto Location = Tok.getLocation();
94*12c85518Srobert     auto Text = StringRef(SourceMgr.getCharacterData(Location), Length);
95*12c85518Srobert     if (Tok.is(tok::comment)) {
96*12c85518Srobert       if (Text == "// clang-format off" || Text == "/* clang-format off */")
97*12c85518Srobert         Skip = true;
98*12c85518Srobert       else if (Text == "// clang-format on" || Text == "/* clang-format on */")
99*12c85518Srobert         Skip = false;
100*12c85518Srobert       continue;
101*12c85518Srobert     }
102*12c85518Srobert     if (Skip || Tok.isNot(tok::numeric_constant) || Text[0] == '.' ||
103*12c85518Srobert         !AffectedRangeMgr.affectsCharSourceRange(
104*12c85518Srobert             CharSourceRange::getCharRange(Location, Tok.getEndLoc()))) {
105*12c85518Srobert       continue;
106*12c85518Srobert     }
107*12c85518Srobert     const auto B = getBase(Text);
108*12c85518Srobert     const bool IsBase2 = B == Base::Binary;
109*12c85518Srobert     const bool IsBase10 = B == Base::Decimal;
110*12c85518Srobert     const bool IsBase16 = B == Base::Hex;
111*12c85518Srobert     if ((IsBase2 && SkipBinary) || (IsBase10 && SkipDecimal) ||
112*12c85518Srobert         (IsBase16 && SkipHex) || B == Base::Other) {
113*12c85518Srobert       continue;
114*12c85518Srobert     }
115*12c85518Srobert     if (Style.isCpp()) {
116*12c85518Srobert       // Hex alpha digits a-f/A-F must be at the end of the string literal.
117*12c85518Srobert       StringRef Suffixes = "_himnsuyd";
118*12c85518Srobert       if (const auto Pos =
119*12c85518Srobert               Text.find_first_of(IsBase16 ? Suffixes.drop_back() : Suffixes);
120*12c85518Srobert           Pos != StringRef::npos) {
121*12c85518Srobert         Text = Text.substr(0, Pos);
122*12c85518Srobert         Length = Pos;
123*12c85518Srobert       }
124*12c85518Srobert     }
125*12c85518Srobert     if ((IsBase10 && Text.find_last_of(".eEfFdDmM") != StringRef::npos) ||
126*12c85518Srobert         (IsBase16 && Text.find_last_of(".pP") != StringRef::npos)) {
127*12c85518Srobert       continue;
128*12c85518Srobert     }
129*12c85518Srobert     const auto Start = Text[0] == '0' ? 2 : 0;
130*12c85518Srobert     auto End = Text.find_first_of("uUlLzZn", Start);
131*12c85518Srobert     if (End == StringRef::npos)
132*12c85518Srobert       End = Length;
133*12c85518Srobert     if (Start > 0 || End < Length) {
134*12c85518Srobert       Length = End - Start;
135*12c85518Srobert       Text = Text.substr(Start, Length);
136*12c85518Srobert     }
137*12c85518Srobert     auto DigitsPerGroup = Decimal;
138*12c85518Srobert     auto MinDigits = DecimalMinDigits;
139*12c85518Srobert     if (IsBase2) {
140*12c85518Srobert       DigitsPerGroup = Binary;
141*12c85518Srobert       MinDigits = BinaryMinDigits;
142*12c85518Srobert     } else if (IsBase16) {
143*12c85518Srobert       DigitsPerGroup = Hex;
144*12c85518Srobert       MinDigits = HexMinDigits;
145*12c85518Srobert     }
146*12c85518Srobert     const auto SeparatorCount = Text.count(Separator);
147*12c85518Srobert     const int DigitCount = Length - SeparatorCount;
148*12c85518Srobert     const bool RemoveSeparator = DigitsPerGroup < 0 || DigitCount < MinDigits;
149*12c85518Srobert     if (RemoveSeparator && SeparatorCount == 0)
150*12c85518Srobert       continue;
151*12c85518Srobert     if (!RemoveSeparator && SeparatorCount > 0 &&
152*12c85518Srobert         checkSeparator(Text, DigitsPerGroup)) {
153*12c85518Srobert       continue;
154*12c85518Srobert     }
155*12c85518Srobert     const auto &Formatted =
156*12c85518Srobert         format(Text, DigitsPerGroup, DigitCount, RemoveSeparator);
157*12c85518Srobert     assert(Formatted != Text);
158*12c85518Srobert     if (Start > 0)
159*12c85518Srobert       Location = Location.getLocWithOffset(Start);
160*12c85518Srobert     cantFail(Result.add(
161*12c85518Srobert         tooling::Replacement(SourceMgr, Location, Length, Formatted)));
162*12c85518Srobert   }
163*12c85518Srobert 
164*12c85518Srobert   return {Result, 0};
165*12c85518Srobert }
166*12c85518Srobert 
checkSeparator(const StringRef IntegerLiteral,int DigitsPerGroup) const167*12c85518Srobert bool IntegerLiteralSeparatorFixer::checkSeparator(
168*12c85518Srobert     const StringRef IntegerLiteral, int DigitsPerGroup) const {
169*12c85518Srobert   assert(DigitsPerGroup > 0);
170*12c85518Srobert 
171*12c85518Srobert   int I = 0;
172*12c85518Srobert   for (auto C : llvm::reverse(IntegerLiteral)) {
173*12c85518Srobert     if (C == Separator) {
174*12c85518Srobert       if (I < DigitsPerGroup)
175*12c85518Srobert         return false;
176*12c85518Srobert       I = 0;
177*12c85518Srobert     } else {
178*12c85518Srobert       if (I == DigitsPerGroup)
179*12c85518Srobert         return false;
180*12c85518Srobert       ++I;
181*12c85518Srobert     }
182*12c85518Srobert   }
183*12c85518Srobert 
184*12c85518Srobert   return true;
185*12c85518Srobert }
186*12c85518Srobert 
format(const StringRef IntegerLiteral,int DigitsPerGroup,int DigitCount,bool RemoveSeparator) const187*12c85518Srobert std::string IntegerLiteralSeparatorFixer::format(const StringRef IntegerLiteral,
188*12c85518Srobert                                                  int DigitsPerGroup,
189*12c85518Srobert                                                  int DigitCount,
190*12c85518Srobert                                                  bool RemoveSeparator) const {
191*12c85518Srobert   assert(DigitsPerGroup != 0);
192*12c85518Srobert 
193*12c85518Srobert   std::string Formatted;
194*12c85518Srobert 
195*12c85518Srobert   if (RemoveSeparator) {
196*12c85518Srobert     for (auto C : IntegerLiteral)
197*12c85518Srobert       if (C != Separator)
198*12c85518Srobert         Formatted.push_back(C);
199*12c85518Srobert     return Formatted;
200*12c85518Srobert   }
201*12c85518Srobert 
202*12c85518Srobert   int Remainder = DigitCount % DigitsPerGroup;
203*12c85518Srobert 
204*12c85518Srobert   int I = 0;
205*12c85518Srobert   for (auto C : IntegerLiteral) {
206*12c85518Srobert     if (C == Separator)
207*12c85518Srobert       continue;
208*12c85518Srobert     if (I == (Remainder > 0 ? Remainder : DigitsPerGroup)) {
209*12c85518Srobert       Formatted.push_back(Separator);
210*12c85518Srobert       I = 0;
211*12c85518Srobert       Remainder = 0;
212*12c85518Srobert     }
213*12c85518Srobert     Formatted.push_back(C);
214*12c85518Srobert     ++I;
215*12c85518Srobert   }
216*12c85518Srobert 
217*12c85518Srobert   return Formatted;
218*12c85518Srobert }
219*12c85518Srobert 
220*12c85518Srobert } // namespace format
221*12c85518Srobert } // namespace clang
222