xref: /freebsd-src/contrib/llvm-project/clang/lib/Format/IntegerLiteralSeparatorFixer.cpp (revision bdd1243df58e60e85101c09001d9812a789b6bc4)
1*bdd1243dSDimitry Andric //===--- IntegerLiteralSeparatorFixer.cpp -----------------------*- C++ -*-===//
2*bdd1243dSDimitry Andric //
3*bdd1243dSDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4*bdd1243dSDimitry Andric // See https://llvm.org/LICENSE.txt for license information.
5*bdd1243dSDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6*bdd1243dSDimitry Andric //
7*bdd1243dSDimitry Andric //===----------------------------------------------------------------------===//
8*bdd1243dSDimitry Andric ///
9*bdd1243dSDimitry Andric /// \file
10*bdd1243dSDimitry Andric /// This file implements IntegerLiteralSeparatorFixer that fixes C++ integer
11*bdd1243dSDimitry Andric /// literal separators.
12*bdd1243dSDimitry Andric ///
13*bdd1243dSDimitry Andric //===----------------------------------------------------------------------===//
14*bdd1243dSDimitry Andric 
15*bdd1243dSDimitry Andric #include "IntegerLiteralSeparatorFixer.h"
16*bdd1243dSDimitry Andric 
17*bdd1243dSDimitry Andric namespace clang {
18*bdd1243dSDimitry Andric namespace format {
19*bdd1243dSDimitry Andric 
20*bdd1243dSDimitry Andric enum class Base { Binary, Decimal, Hex, Other };
21*bdd1243dSDimitry Andric 
22*bdd1243dSDimitry Andric static Base getBase(const StringRef IntegerLiteral) {
23*bdd1243dSDimitry Andric   assert(IntegerLiteral.size() > 1);
24*bdd1243dSDimitry Andric 
25*bdd1243dSDimitry Andric   if (IntegerLiteral[0] > '0') {
26*bdd1243dSDimitry Andric     assert(IntegerLiteral[0] <= '9');
27*bdd1243dSDimitry Andric     return Base::Decimal;
28*bdd1243dSDimitry Andric   }
29*bdd1243dSDimitry Andric 
30*bdd1243dSDimitry Andric   assert(IntegerLiteral[0] == '0');
31*bdd1243dSDimitry Andric 
32*bdd1243dSDimitry Andric   switch (IntegerLiteral[1]) {
33*bdd1243dSDimitry Andric   case 'b':
34*bdd1243dSDimitry Andric   case 'B':
35*bdd1243dSDimitry Andric     return Base::Binary;
36*bdd1243dSDimitry Andric   case 'x':
37*bdd1243dSDimitry Andric   case 'X':
38*bdd1243dSDimitry Andric     return Base::Hex;
39*bdd1243dSDimitry Andric   default:
40*bdd1243dSDimitry Andric     return Base::Other;
41*bdd1243dSDimitry Andric   }
42*bdd1243dSDimitry Andric }
43*bdd1243dSDimitry Andric 
44*bdd1243dSDimitry Andric std::pair<tooling::Replacements, unsigned>
45*bdd1243dSDimitry Andric IntegerLiteralSeparatorFixer::process(const Environment &Env,
46*bdd1243dSDimitry Andric                                       const FormatStyle &Style) {
47*bdd1243dSDimitry Andric   switch (Style.Language) {
48*bdd1243dSDimitry Andric   case FormatStyle::LK_Cpp:
49*bdd1243dSDimitry Andric   case FormatStyle::LK_ObjC:
50*bdd1243dSDimitry Andric     Separator = '\'';
51*bdd1243dSDimitry Andric     break;
52*bdd1243dSDimitry Andric   case FormatStyle::LK_CSharp:
53*bdd1243dSDimitry Andric   case FormatStyle::LK_Java:
54*bdd1243dSDimitry Andric   case FormatStyle::LK_JavaScript:
55*bdd1243dSDimitry Andric     Separator = '_';
56*bdd1243dSDimitry Andric     break;
57*bdd1243dSDimitry Andric   default:
58*bdd1243dSDimitry Andric     return {};
59*bdd1243dSDimitry Andric   }
60*bdd1243dSDimitry Andric 
61*bdd1243dSDimitry Andric   const auto &Option = Style.IntegerLiteralSeparator;
62*bdd1243dSDimitry Andric   const auto Binary = Option.Binary;
63*bdd1243dSDimitry Andric   const auto Decimal = Option.Decimal;
64*bdd1243dSDimitry Andric   const auto Hex = Option.Hex;
65*bdd1243dSDimitry Andric   const bool SkipBinary = Binary == 0;
66*bdd1243dSDimitry Andric   const bool SkipDecimal = Decimal == 0;
67*bdd1243dSDimitry Andric   const bool SkipHex = Hex == 0;
68*bdd1243dSDimitry Andric 
69*bdd1243dSDimitry Andric   if (SkipBinary && SkipDecimal && SkipHex)
70*bdd1243dSDimitry Andric     return {};
71*bdd1243dSDimitry Andric 
72*bdd1243dSDimitry Andric   const auto &SourceMgr = Env.getSourceManager();
73*bdd1243dSDimitry Andric   AffectedRangeManager AffectedRangeMgr(SourceMgr, Env.getCharRanges());
74*bdd1243dSDimitry Andric 
75*bdd1243dSDimitry Andric   const auto ID = Env.getFileID();
76*bdd1243dSDimitry Andric   const auto LangOpts = getFormattingLangOpts(Style);
77*bdd1243dSDimitry Andric   Lexer Lex(ID, SourceMgr.getBufferOrFake(ID), SourceMgr, LangOpts);
78*bdd1243dSDimitry Andric   Lex.SetCommentRetentionState(true);
79*bdd1243dSDimitry Andric 
80*bdd1243dSDimitry Andric   Token Tok;
81*bdd1243dSDimitry Andric   tooling::Replacements Result;
82*bdd1243dSDimitry Andric 
83*bdd1243dSDimitry Andric   for (bool Skip = false; !Lex.LexFromRawLexer(Tok);) {
84*bdd1243dSDimitry Andric     auto Length = Tok.getLength();
85*bdd1243dSDimitry Andric     if (Length < 2)
86*bdd1243dSDimitry Andric       continue;
87*bdd1243dSDimitry Andric     auto Location = Tok.getLocation();
88*bdd1243dSDimitry Andric     auto Text = StringRef(SourceMgr.getCharacterData(Location), Length);
89*bdd1243dSDimitry Andric     if (Tok.is(tok::comment)) {
90*bdd1243dSDimitry Andric       if (Text == "// clang-format off" || Text == "/* clang-format off */")
91*bdd1243dSDimitry Andric         Skip = true;
92*bdd1243dSDimitry Andric       else if (Text == "// clang-format on" || Text == "/* clang-format on */")
93*bdd1243dSDimitry Andric         Skip = false;
94*bdd1243dSDimitry Andric       continue;
95*bdd1243dSDimitry Andric     }
96*bdd1243dSDimitry Andric     if (Skip || Tok.isNot(tok::numeric_constant) || Text[0] == '.' ||
97*bdd1243dSDimitry Andric         !AffectedRangeMgr.affectsCharSourceRange(
98*bdd1243dSDimitry Andric             CharSourceRange::getCharRange(Location, Tok.getEndLoc()))) {
99*bdd1243dSDimitry Andric       continue;
100*bdd1243dSDimitry Andric     }
101*bdd1243dSDimitry Andric     const auto B = getBase(Text);
102*bdd1243dSDimitry Andric     const bool IsBase2 = B == Base::Binary;
103*bdd1243dSDimitry Andric     const bool IsBase10 = B == Base::Decimal;
104*bdd1243dSDimitry Andric     const bool IsBase16 = B == Base::Hex;
105*bdd1243dSDimitry Andric     if ((IsBase2 && SkipBinary) || (IsBase10 && SkipDecimal) ||
106*bdd1243dSDimitry Andric         (IsBase16 && SkipHex) || B == Base::Other) {
107*bdd1243dSDimitry Andric       continue;
108*bdd1243dSDimitry Andric     }
109*bdd1243dSDimitry Andric     if ((IsBase10 && Text.find_last_of(".eEfFdDmM") != StringRef::npos) ||
110*bdd1243dSDimitry Andric         (IsBase16 && Text.find_last_of(".pP") != StringRef::npos)) {
111*bdd1243dSDimitry Andric       continue;
112*bdd1243dSDimitry Andric     }
113*bdd1243dSDimitry Andric     if (((IsBase2 && Binary < 0) || (IsBase10 && Decimal < 0) ||
114*bdd1243dSDimitry Andric          (IsBase16 && Hex < 0)) &&
115*bdd1243dSDimitry Andric         Text.find(Separator) == StringRef::npos) {
116*bdd1243dSDimitry Andric       continue;
117*bdd1243dSDimitry Andric     }
118*bdd1243dSDimitry Andric     const auto Start = Text[0] == '0' ? 2 : 0;
119*bdd1243dSDimitry Andric     auto End = Text.find_first_of("uUlLzZn");
120*bdd1243dSDimitry Andric     if (End == StringRef::npos)
121*bdd1243dSDimitry Andric       End = Length;
122*bdd1243dSDimitry Andric     if (Start > 0 || End < Length) {
123*bdd1243dSDimitry Andric       Length = End - Start;
124*bdd1243dSDimitry Andric       Text = Text.substr(Start, Length);
125*bdd1243dSDimitry Andric     }
126*bdd1243dSDimitry Andric     auto DigitsPerGroup = Decimal;
127*bdd1243dSDimitry Andric     if (IsBase2)
128*bdd1243dSDimitry Andric       DigitsPerGroup = Binary;
129*bdd1243dSDimitry Andric     else if (IsBase16)
130*bdd1243dSDimitry Andric       DigitsPerGroup = Hex;
131*bdd1243dSDimitry Andric     if (DigitsPerGroup > 0 && checkSeparator(Text, DigitsPerGroup))
132*bdd1243dSDimitry Andric       continue;
133*bdd1243dSDimitry Andric     if (Start > 0)
134*bdd1243dSDimitry Andric       Location = Location.getLocWithOffset(Start);
135*bdd1243dSDimitry Andric     cantFail(Result.add(tooling::Replacement(SourceMgr, Location, Length,
136*bdd1243dSDimitry Andric                                              format(Text, DigitsPerGroup))));
137*bdd1243dSDimitry Andric   }
138*bdd1243dSDimitry Andric 
139*bdd1243dSDimitry Andric   return {Result, 0};
140*bdd1243dSDimitry Andric }
141*bdd1243dSDimitry Andric 
142*bdd1243dSDimitry Andric bool IntegerLiteralSeparatorFixer::checkSeparator(
143*bdd1243dSDimitry Andric     const StringRef IntegerLiteral, int DigitsPerGroup) const {
144*bdd1243dSDimitry Andric   assert(DigitsPerGroup > 0);
145*bdd1243dSDimitry Andric 
146*bdd1243dSDimitry Andric   int I = 0;
147*bdd1243dSDimitry Andric   for (auto C : llvm::reverse(IntegerLiteral)) {
148*bdd1243dSDimitry Andric     if (C == Separator) {
149*bdd1243dSDimitry Andric       if (I < DigitsPerGroup)
150*bdd1243dSDimitry Andric         return false;
151*bdd1243dSDimitry Andric       I = 0;
152*bdd1243dSDimitry Andric     } else {
153*bdd1243dSDimitry Andric       ++I;
154*bdd1243dSDimitry Andric       if (I == DigitsPerGroup)
155*bdd1243dSDimitry Andric         return false;
156*bdd1243dSDimitry Andric     }
157*bdd1243dSDimitry Andric   }
158*bdd1243dSDimitry Andric 
159*bdd1243dSDimitry Andric   return true;
160*bdd1243dSDimitry Andric }
161*bdd1243dSDimitry Andric 
162*bdd1243dSDimitry Andric std::string IntegerLiteralSeparatorFixer::format(const StringRef IntegerLiteral,
163*bdd1243dSDimitry Andric                                                  int DigitsPerGroup) const {
164*bdd1243dSDimitry Andric   assert(DigitsPerGroup != 0);
165*bdd1243dSDimitry Andric 
166*bdd1243dSDimitry Andric   std::string Formatted;
167*bdd1243dSDimitry Andric 
168*bdd1243dSDimitry Andric   if (DigitsPerGroup < 0) {
169*bdd1243dSDimitry Andric     for (auto C : IntegerLiteral)
170*bdd1243dSDimitry Andric       if (C != Separator)
171*bdd1243dSDimitry Andric         Formatted.push_back(C);
172*bdd1243dSDimitry Andric     return Formatted;
173*bdd1243dSDimitry Andric   }
174*bdd1243dSDimitry Andric 
175*bdd1243dSDimitry Andric   int DigitCount = 0;
176*bdd1243dSDimitry Andric   for (auto C : IntegerLiteral)
177*bdd1243dSDimitry Andric     if (C != Separator)
178*bdd1243dSDimitry Andric       ++DigitCount;
179*bdd1243dSDimitry Andric 
180*bdd1243dSDimitry Andric   int Remainder = DigitCount % DigitsPerGroup;
181*bdd1243dSDimitry Andric 
182*bdd1243dSDimitry Andric   int I = 0;
183*bdd1243dSDimitry Andric   for (auto C : IntegerLiteral) {
184*bdd1243dSDimitry Andric     if (C == Separator)
185*bdd1243dSDimitry Andric       continue;
186*bdd1243dSDimitry Andric     if (I == (Remainder > 0 ? Remainder : DigitsPerGroup)) {
187*bdd1243dSDimitry Andric       Formatted.push_back(Separator);
188*bdd1243dSDimitry Andric       I = 0;
189*bdd1243dSDimitry Andric       Remainder = 0;
190*bdd1243dSDimitry Andric     }
191*bdd1243dSDimitry Andric     Formatted.push_back(C);
192*bdd1243dSDimitry Andric     ++I;
193*bdd1243dSDimitry Andric   }
194*bdd1243dSDimitry Andric 
195*bdd1243dSDimitry Andric   return Formatted;
196*bdd1243dSDimitry Andric }
197*bdd1243dSDimitry Andric 
198*bdd1243dSDimitry Andric } // namespace format
199*bdd1243dSDimitry Andric } // namespace clang
200