1bdd1243dSDimitry Andric //===--- IntegerLiteralSeparatorFixer.cpp -----------------------*- C++ -*-===//
2bdd1243dSDimitry Andric //
3bdd1243dSDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4bdd1243dSDimitry Andric // See https://llvm.org/LICENSE.txt for license information.
5bdd1243dSDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6bdd1243dSDimitry Andric //
7bdd1243dSDimitry Andric //===----------------------------------------------------------------------===//
8bdd1243dSDimitry Andric ///
9bdd1243dSDimitry Andric /// \file
10bdd1243dSDimitry Andric /// This file implements IntegerLiteralSeparatorFixer that fixes C++ integer
11bdd1243dSDimitry Andric /// literal separators.
12bdd1243dSDimitry Andric ///
13bdd1243dSDimitry Andric //===----------------------------------------------------------------------===//
14bdd1243dSDimitry Andric
15bdd1243dSDimitry Andric #include "IntegerLiteralSeparatorFixer.h"
16bdd1243dSDimitry Andric
17bdd1243dSDimitry Andric namespace clang {
18bdd1243dSDimitry Andric namespace format {
19bdd1243dSDimitry Andric
20bdd1243dSDimitry Andric enum class Base { Binary, Decimal, Hex, Other };
21bdd1243dSDimitry Andric
getBase(const StringRef IntegerLiteral)22bdd1243dSDimitry Andric static Base getBase(const StringRef IntegerLiteral) {
23bdd1243dSDimitry Andric assert(IntegerLiteral.size() > 1);
24bdd1243dSDimitry Andric
25bdd1243dSDimitry Andric if (IntegerLiteral[0] > '0') {
26bdd1243dSDimitry Andric assert(IntegerLiteral[0] <= '9');
27bdd1243dSDimitry Andric return Base::Decimal;
28bdd1243dSDimitry Andric }
29bdd1243dSDimitry Andric
30bdd1243dSDimitry Andric assert(IntegerLiteral[0] == '0');
31bdd1243dSDimitry Andric
32bdd1243dSDimitry Andric switch (IntegerLiteral[1]) {
33bdd1243dSDimitry Andric case 'b':
34bdd1243dSDimitry Andric case 'B':
35bdd1243dSDimitry Andric return Base::Binary;
36bdd1243dSDimitry Andric case 'x':
37bdd1243dSDimitry Andric case 'X':
38bdd1243dSDimitry Andric return Base::Hex;
39bdd1243dSDimitry Andric default:
40bdd1243dSDimitry Andric return Base::Other;
41bdd1243dSDimitry Andric }
42bdd1243dSDimitry Andric }
43bdd1243dSDimitry Andric
44bdd1243dSDimitry Andric std::pair<tooling::Replacements, unsigned>
process(const Environment & Env,const FormatStyle & Style)45bdd1243dSDimitry Andric IntegerLiteralSeparatorFixer::process(const Environment &Env,
46bdd1243dSDimitry Andric const FormatStyle &Style) {
47bdd1243dSDimitry Andric switch (Style.Language) {
48bdd1243dSDimitry Andric case FormatStyle::LK_Cpp:
49bdd1243dSDimitry Andric case FormatStyle::LK_ObjC:
50bdd1243dSDimitry Andric Separator = '\'';
51bdd1243dSDimitry Andric break;
52bdd1243dSDimitry Andric case FormatStyle::LK_CSharp:
53bdd1243dSDimitry Andric case FormatStyle::LK_Java:
54bdd1243dSDimitry Andric case FormatStyle::LK_JavaScript:
55bdd1243dSDimitry Andric Separator = '_';
56bdd1243dSDimitry Andric break;
57bdd1243dSDimitry Andric default:
58bdd1243dSDimitry Andric return {};
59bdd1243dSDimitry Andric }
60bdd1243dSDimitry Andric
61bdd1243dSDimitry Andric const auto &Option = Style.IntegerLiteralSeparator;
62bdd1243dSDimitry Andric const auto Binary = Option.Binary;
63bdd1243dSDimitry Andric const auto Decimal = Option.Decimal;
64bdd1243dSDimitry Andric const auto Hex = Option.Hex;
65bdd1243dSDimitry Andric const bool SkipBinary = Binary == 0;
66bdd1243dSDimitry Andric const bool SkipDecimal = Decimal == 0;
67bdd1243dSDimitry Andric const bool SkipHex = Hex == 0;
68bdd1243dSDimitry Andric
69bdd1243dSDimitry Andric if (SkipBinary && SkipDecimal && SkipHex)
70bdd1243dSDimitry Andric return {};
71bdd1243dSDimitry Andric
721ac55f4cSDimitry Andric const auto BinaryMinDigits =
731ac55f4cSDimitry Andric std::max((int)Option.BinaryMinDigits, Binary + 1);
741ac55f4cSDimitry Andric const auto DecimalMinDigits =
751ac55f4cSDimitry Andric std::max((int)Option.DecimalMinDigits, Decimal + 1);
761ac55f4cSDimitry Andric const auto HexMinDigits = std::max((int)Option.HexMinDigits, Hex + 1);
771ac55f4cSDimitry Andric
78bdd1243dSDimitry Andric const auto &SourceMgr = Env.getSourceManager();
79bdd1243dSDimitry Andric AffectedRangeManager AffectedRangeMgr(SourceMgr, Env.getCharRanges());
80bdd1243dSDimitry Andric
81bdd1243dSDimitry Andric const auto ID = Env.getFileID();
82bdd1243dSDimitry Andric const auto LangOpts = getFormattingLangOpts(Style);
83bdd1243dSDimitry Andric Lexer Lex(ID, SourceMgr.getBufferOrFake(ID), SourceMgr, LangOpts);
84bdd1243dSDimitry Andric Lex.SetCommentRetentionState(true);
85bdd1243dSDimitry Andric
86bdd1243dSDimitry Andric Token Tok;
87bdd1243dSDimitry Andric tooling::Replacements Result;
88bdd1243dSDimitry Andric
89bdd1243dSDimitry Andric for (bool Skip = false; !Lex.LexFromRawLexer(Tok);) {
90bdd1243dSDimitry Andric auto Length = Tok.getLength();
91bdd1243dSDimitry Andric if (Length < 2)
92bdd1243dSDimitry Andric continue;
93bdd1243dSDimitry Andric auto Location = Tok.getLocation();
94bdd1243dSDimitry Andric auto Text = StringRef(SourceMgr.getCharacterData(Location), Length);
95bdd1243dSDimitry Andric if (Tok.is(tok::comment)) {
96*06c3fb27SDimitry Andric if (isClangFormatOff(Text))
97bdd1243dSDimitry Andric Skip = true;
98*06c3fb27SDimitry Andric else if (isClangFormatOn(Text))
99bdd1243dSDimitry Andric Skip = false;
100bdd1243dSDimitry Andric continue;
101bdd1243dSDimitry Andric }
102bdd1243dSDimitry Andric if (Skip || Tok.isNot(tok::numeric_constant) || Text[0] == '.' ||
103bdd1243dSDimitry Andric !AffectedRangeMgr.affectsCharSourceRange(
104bdd1243dSDimitry Andric CharSourceRange::getCharRange(Location, Tok.getEndLoc()))) {
105bdd1243dSDimitry Andric continue;
106bdd1243dSDimitry Andric }
107bdd1243dSDimitry Andric const auto B = getBase(Text);
108bdd1243dSDimitry Andric const bool IsBase2 = B == Base::Binary;
109bdd1243dSDimitry Andric const bool IsBase10 = B == Base::Decimal;
110bdd1243dSDimitry Andric const bool IsBase16 = B == Base::Hex;
111bdd1243dSDimitry Andric if ((IsBase2 && SkipBinary) || (IsBase10 && SkipDecimal) ||
112bdd1243dSDimitry Andric (IsBase16 && SkipHex) || B == Base::Other) {
113bdd1243dSDimitry Andric continue;
114bdd1243dSDimitry Andric }
1151ac55f4cSDimitry Andric if (Style.isCpp()) {
1162efbaac7SDimitry Andric // Hex alpha digits a-f/A-F must be at the end of the string literal.
1172efbaac7SDimitry Andric StringRef Suffixes = "_himnsuyd";
1182efbaac7SDimitry Andric if (const auto Pos =
1192efbaac7SDimitry Andric Text.find_first_of(IsBase16 ? Suffixes.drop_back() : Suffixes);
1202efbaac7SDimitry Andric Pos != StringRef::npos) {
1211ac55f4cSDimitry Andric Text = Text.substr(0, Pos);
1221ac55f4cSDimitry Andric Length = Pos;
1231ac55f4cSDimitry Andric }
1241ac55f4cSDimitry Andric }
125bdd1243dSDimitry Andric if ((IsBase10 && Text.find_last_of(".eEfFdDmM") != StringRef::npos) ||
126bdd1243dSDimitry Andric (IsBase16 && Text.find_last_of(".pP") != StringRef::npos)) {
127bdd1243dSDimitry Andric continue;
128bdd1243dSDimitry Andric }
129bdd1243dSDimitry Andric const auto Start = Text[0] == '0' ? 2 : 0;
1301ac55f4cSDimitry Andric auto End = Text.find_first_of("uUlLzZn", Start);
131bdd1243dSDimitry Andric if (End == StringRef::npos)
132bdd1243dSDimitry Andric End = Length;
133bdd1243dSDimitry Andric if (Start > 0 || End < Length) {
134bdd1243dSDimitry Andric Length = End - Start;
135bdd1243dSDimitry Andric Text = Text.substr(Start, Length);
136bdd1243dSDimitry Andric }
137bdd1243dSDimitry Andric auto DigitsPerGroup = Decimal;
1381ac55f4cSDimitry Andric auto MinDigits = DecimalMinDigits;
1391ac55f4cSDimitry Andric if (IsBase2) {
140bdd1243dSDimitry Andric DigitsPerGroup = Binary;
1411ac55f4cSDimitry Andric MinDigits = BinaryMinDigits;
1421ac55f4cSDimitry Andric } else if (IsBase16) {
143bdd1243dSDimitry Andric DigitsPerGroup = Hex;
1441ac55f4cSDimitry Andric MinDigits = HexMinDigits;
1451ac55f4cSDimitry Andric }
1461ac55f4cSDimitry Andric const auto SeparatorCount = Text.count(Separator);
1471ac55f4cSDimitry Andric const int DigitCount = Length - SeparatorCount;
1481ac55f4cSDimitry Andric const bool RemoveSeparator = DigitsPerGroup < 0 || DigitCount < MinDigits;
1491ac55f4cSDimitry Andric if (RemoveSeparator && SeparatorCount == 0)
150bdd1243dSDimitry Andric continue;
1511ac55f4cSDimitry Andric if (!RemoveSeparator && SeparatorCount > 0 &&
1521ac55f4cSDimitry Andric checkSeparator(Text, DigitsPerGroup)) {
1531ac55f4cSDimitry Andric continue;
1541ac55f4cSDimitry Andric }
1551ac55f4cSDimitry Andric const auto &Formatted =
1561ac55f4cSDimitry Andric format(Text, DigitsPerGroup, DigitCount, RemoveSeparator);
1571ac55f4cSDimitry Andric assert(Formatted != Text);
158bdd1243dSDimitry Andric if (Start > 0)
159bdd1243dSDimitry Andric Location = Location.getLocWithOffset(Start);
1601ac55f4cSDimitry Andric cantFail(Result.add(
1611ac55f4cSDimitry Andric tooling::Replacement(SourceMgr, Location, Length, Formatted)));
162bdd1243dSDimitry Andric }
163bdd1243dSDimitry Andric
164bdd1243dSDimitry Andric return {Result, 0};
165bdd1243dSDimitry Andric }
166bdd1243dSDimitry Andric
checkSeparator(const StringRef IntegerLiteral,int DigitsPerGroup) const167bdd1243dSDimitry Andric bool IntegerLiteralSeparatorFixer::checkSeparator(
168bdd1243dSDimitry Andric const StringRef IntegerLiteral, int DigitsPerGroup) const {
169bdd1243dSDimitry Andric assert(DigitsPerGroup > 0);
170bdd1243dSDimitry Andric
171bdd1243dSDimitry Andric int I = 0;
172bdd1243dSDimitry Andric for (auto C : llvm::reverse(IntegerLiteral)) {
173bdd1243dSDimitry Andric if (C == Separator) {
174bdd1243dSDimitry Andric if (I < DigitsPerGroup)
175bdd1243dSDimitry Andric return false;
176bdd1243dSDimitry Andric I = 0;
177bdd1243dSDimitry Andric } else {
178bdd1243dSDimitry Andric if (I == DigitsPerGroup)
179bdd1243dSDimitry Andric return false;
1801ac55f4cSDimitry Andric ++I;
181bdd1243dSDimitry Andric }
182bdd1243dSDimitry Andric }
183bdd1243dSDimitry Andric
184bdd1243dSDimitry Andric return true;
185bdd1243dSDimitry Andric }
186bdd1243dSDimitry Andric
format(const StringRef IntegerLiteral,int DigitsPerGroup,int DigitCount,bool RemoveSeparator) const187bdd1243dSDimitry Andric std::string IntegerLiteralSeparatorFixer::format(const StringRef IntegerLiteral,
1881ac55f4cSDimitry Andric int DigitsPerGroup,
1891ac55f4cSDimitry Andric int DigitCount,
1901ac55f4cSDimitry Andric bool RemoveSeparator) const {
191bdd1243dSDimitry Andric assert(DigitsPerGroup != 0);
192bdd1243dSDimitry Andric
193bdd1243dSDimitry Andric std::string Formatted;
194bdd1243dSDimitry Andric
1951ac55f4cSDimitry Andric if (RemoveSeparator) {
196bdd1243dSDimitry Andric for (auto C : IntegerLiteral)
197bdd1243dSDimitry Andric if (C != Separator)
198bdd1243dSDimitry Andric Formatted.push_back(C);
199bdd1243dSDimitry Andric return Formatted;
200bdd1243dSDimitry Andric }
201bdd1243dSDimitry Andric
202bdd1243dSDimitry Andric int Remainder = DigitCount % DigitsPerGroup;
203bdd1243dSDimitry Andric
204bdd1243dSDimitry Andric int I = 0;
205bdd1243dSDimitry Andric for (auto C : IntegerLiteral) {
206bdd1243dSDimitry Andric if (C == Separator)
207bdd1243dSDimitry Andric continue;
208bdd1243dSDimitry Andric if (I == (Remainder > 0 ? Remainder : DigitsPerGroup)) {
209bdd1243dSDimitry Andric Formatted.push_back(Separator);
210bdd1243dSDimitry Andric I = 0;
211bdd1243dSDimitry Andric Remainder = 0;
212bdd1243dSDimitry Andric }
213bdd1243dSDimitry Andric Formatted.push_back(C);
214bdd1243dSDimitry Andric ++I;
215bdd1243dSDimitry Andric }
216bdd1243dSDimitry Andric
217bdd1243dSDimitry Andric return Formatted;
218bdd1243dSDimitry Andric }
219bdd1243dSDimitry Andric
220bdd1243dSDimitry Andric } // namespace format
221bdd1243dSDimitry Andric } // namespace clang
222