1c60b897dSRiver Riddle //===- Token.cpp - MLIR Token Implementation ------------------------------===//
2c60b897dSRiver Riddle //
3c60b897dSRiver Riddle // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4c60b897dSRiver Riddle // See https://llvm.org/LICENSE.txt for license information.
5c60b897dSRiver Riddle // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6c60b897dSRiver Riddle //
7c60b897dSRiver Riddle //===----------------------------------------------------------------------===//
8c60b897dSRiver Riddle //
9c60b897dSRiver Riddle // This file implements the Token class for the MLIR textual form.
10c60b897dSRiver Riddle //
11c60b897dSRiver Riddle //===----------------------------------------------------------------------===//
12c60b897dSRiver Riddle
13c60b897dSRiver Riddle #include "Token.h"
14*285a229fSMehdi Amini #include "mlir/Support/LLVM.h"
15c60b897dSRiver Riddle #include "llvm/ADT/StringExtras.h"
16*285a229fSMehdi Amini #include "llvm/Support/ErrorHandling.h"
17*285a229fSMehdi Amini #include <cassert>
18*285a229fSMehdi Amini #include <cstdint>
19a1fe1f5fSKazu Hirata #include <optional>
20*285a229fSMehdi Amini #include <string>
21c60b897dSRiver Riddle
22c60b897dSRiver Riddle using namespace mlir;
23c60b897dSRiver Riddle
getLoc() const24c60b897dSRiver Riddle SMLoc Token::getLoc() const { return SMLoc::getFromPointer(spelling.data()); }
25c60b897dSRiver Riddle
getEndLoc() const26c60b897dSRiver Riddle SMLoc Token::getEndLoc() const {
27c60b897dSRiver Riddle return SMLoc::getFromPointer(spelling.data() + spelling.size());
28c60b897dSRiver Riddle }
29c60b897dSRiver Riddle
getLocRange() const30c60b897dSRiver Riddle SMRange Token::getLocRange() const { return SMRange(getLoc(), getEndLoc()); }
31c60b897dSRiver Riddle
32c60b897dSRiver Riddle /// For an integer token, return its value as an unsigned. If it doesn't fit,
3370c73d1bSKazu Hirata /// return std::nullopt.
getUnsignedIntegerValue() const340a81ace0SKazu Hirata std::optional<unsigned> Token::getUnsignedIntegerValue() const {
35c60b897dSRiver Riddle bool isHex = spelling.size() > 1 && spelling[1] == 'x';
36c60b897dSRiver Riddle
37c60b897dSRiver Riddle unsigned result = 0;
38c60b897dSRiver Riddle if (spelling.getAsInteger(isHex ? 0 : 10, result))
391a36588eSKazu Hirata return std::nullopt;
40c60b897dSRiver Riddle return result;
41c60b897dSRiver Riddle }
42c60b897dSRiver Riddle
43c60b897dSRiver Riddle /// For an integer token, return its value as a uint64_t. If it doesn't fit,
4470c73d1bSKazu Hirata /// return std::nullopt.
getUInt64IntegerValue(StringRef spelling)450a81ace0SKazu Hirata std::optional<uint64_t> Token::getUInt64IntegerValue(StringRef spelling) {
46c60b897dSRiver Riddle bool isHex = spelling.size() > 1 && spelling[1] == 'x';
47c60b897dSRiver Riddle
48c60b897dSRiver Riddle uint64_t result = 0;
49c60b897dSRiver Riddle if (spelling.getAsInteger(isHex ? 0 : 10, result))
501a36588eSKazu Hirata return std::nullopt;
51c60b897dSRiver Riddle return result;
52c60b897dSRiver Riddle }
53c60b897dSRiver Riddle
5470c73d1bSKazu Hirata /// For a floatliteral, return its value as a double. Return std::nullopt if the
5570c73d1bSKazu Hirata /// value underflows or overflows.
getFloatingPointValue() const560a81ace0SKazu Hirata std::optional<double> Token::getFloatingPointValue() const {
57c60b897dSRiver Riddle double result = 0;
58c60b897dSRiver Riddle if (spelling.getAsDouble(result))
591a36588eSKazu Hirata return std::nullopt;
60c60b897dSRiver Riddle return result;
61c60b897dSRiver Riddle }
62c60b897dSRiver Riddle
63c60b897dSRiver Riddle /// For an inttype token, return its bitwidth.
getIntTypeBitwidth() const640a81ace0SKazu Hirata std::optional<unsigned> Token::getIntTypeBitwidth() const {
65c60b897dSRiver Riddle assert(getKind() == inttype);
66c60b897dSRiver Riddle unsigned bitwidthStart = (spelling[0] == 'i' ? 1 : 2);
67c60b897dSRiver Riddle unsigned result = 0;
68c60b897dSRiver Riddle if (spelling.drop_front(bitwidthStart).getAsInteger(10, result))
691a36588eSKazu Hirata return std::nullopt;
70c60b897dSRiver Riddle return result;
71c60b897dSRiver Riddle }
72c60b897dSRiver Riddle
getIntTypeSignedness() const730a81ace0SKazu Hirata std::optional<bool> Token::getIntTypeSignedness() const {
74c60b897dSRiver Riddle assert(getKind() == inttype);
75c60b897dSRiver Riddle if (spelling[0] == 'i')
761a36588eSKazu Hirata return std::nullopt;
77c60b897dSRiver Riddle if (spelling[0] == 's')
78c60b897dSRiver Riddle return true;
79c60b897dSRiver Riddle assert(spelling[0] == 'u');
80c60b897dSRiver Riddle return false;
81c60b897dSRiver Riddle }
82c60b897dSRiver Riddle
83c60b897dSRiver Riddle /// Given a token containing a string literal, return its value, including
84c60b897dSRiver Riddle /// removing the quote characters and unescaping the contents of the string. The
85c60b897dSRiver Riddle /// lexer has already verified that this token is valid.
getStringValue() const86c60b897dSRiver Riddle std::string Token::getStringValue() const {
87c60b897dSRiver Riddle assert(getKind() == string || getKind() == code_complete ||
88c60b897dSRiver Riddle (getKind() == at_identifier && getSpelling()[1] == '"'));
89c60b897dSRiver Riddle // Start by dropping the quotes.
90c60b897dSRiver Riddle StringRef bytes = getSpelling().drop_front();
91c60b897dSRiver Riddle if (getKind() != Token::code_complete) {
92c60b897dSRiver Riddle bytes = bytes.drop_back();
93c60b897dSRiver Riddle if (getKind() == at_identifier)
94c60b897dSRiver Riddle bytes = bytes.drop_front();
95c60b897dSRiver Riddle }
96c60b897dSRiver Riddle
97c60b897dSRiver Riddle std::string result;
98c60b897dSRiver Riddle result.reserve(bytes.size());
99c60b897dSRiver Riddle for (unsigned i = 0, e = bytes.size(); i != e;) {
100c60b897dSRiver Riddle auto c = bytes[i++];
101c60b897dSRiver Riddle if (c != '\\') {
102c60b897dSRiver Riddle result.push_back(c);
103c60b897dSRiver Riddle continue;
104c60b897dSRiver Riddle }
105c60b897dSRiver Riddle
106c60b897dSRiver Riddle assert(i + 1 <= e && "invalid string should be caught by lexer");
107c60b897dSRiver Riddle auto c1 = bytes[i++];
108c60b897dSRiver Riddle switch (c1) {
109c60b897dSRiver Riddle case '"':
110c60b897dSRiver Riddle case '\\':
111c60b897dSRiver Riddle result.push_back(c1);
112c60b897dSRiver Riddle continue;
113c60b897dSRiver Riddle case 'n':
114c60b897dSRiver Riddle result.push_back('\n');
115c60b897dSRiver Riddle continue;
116c60b897dSRiver Riddle case 't':
117c60b897dSRiver Riddle result.push_back('\t');
118c60b897dSRiver Riddle continue;
119c60b897dSRiver Riddle default:
120c60b897dSRiver Riddle break;
121c60b897dSRiver Riddle }
122c60b897dSRiver Riddle
123c60b897dSRiver Riddle assert(i + 1 <= e && "invalid string should be caught by lexer");
124c60b897dSRiver Riddle auto c2 = bytes[i++];
125c60b897dSRiver Riddle
126c60b897dSRiver Riddle assert(llvm::isHexDigit(c1) && llvm::isHexDigit(c2) && "invalid escape");
127c60b897dSRiver Riddle result.push_back((llvm::hexDigitValue(c1) << 4) | llvm::hexDigitValue(c2));
128c60b897dSRiver Riddle }
129c60b897dSRiver Riddle
130c60b897dSRiver Riddle return result;
131c60b897dSRiver Riddle }
132c60b897dSRiver Riddle
133192d9dd7SKazu Hirata /// Given a token containing a hex string literal, return its value or
134192d9dd7SKazu Hirata /// std::nullopt if the token does not contain a valid hex string.
getHexStringValue() const1350a81ace0SKazu Hirata std::optional<std::string> Token::getHexStringValue() const {
136c60b897dSRiver Riddle assert(getKind() == string);
137c60b897dSRiver Riddle
138c60b897dSRiver Riddle // Get the internal string data, without the quotes.
139c60b897dSRiver Riddle StringRef bytes = getSpelling().drop_front().drop_back();
140c60b897dSRiver Riddle
141c60b897dSRiver Riddle // Try to extract the binary data from the hex string. We expect the hex
142c60b897dSRiver Riddle // string to start with `0x` and have an even number of hex nibbles (nibbles
143c60b897dSRiver Riddle // should come in pairs).
144c60b897dSRiver Riddle std::string hex;
145c60b897dSRiver Riddle if (!bytes.consume_front("0x") || (bytes.size() & 1) ||
146c60b897dSRiver Riddle !llvm::tryGetFromHex(bytes, hex))
1471a36588eSKazu Hirata return std::nullopt;
148c60b897dSRiver Riddle return hex;
149c60b897dSRiver Riddle }
150c60b897dSRiver Riddle
151c60b897dSRiver Riddle /// Given a token containing a symbol reference, return the unescaped string
152c60b897dSRiver Riddle /// value.
getSymbolReference() const153c60b897dSRiver Riddle std::string Token::getSymbolReference() const {
154c60b897dSRiver Riddle assert(is(Token::at_identifier) && "expected valid @-identifier");
155c60b897dSRiver Riddle StringRef nameStr = getSpelling().drop_front();
156c60b897dSRiver Riddle
157c60b897dSRiver Riddle // Check to see if the reference is a string literal, or a bare identifier.
158c60b897dSRiver Riddle if (nameStr.front() == '"')
159c60b897dSRiver Riddle return getStringValue();
160c60b897dSRiver Riddle return std::string(nameStr);
161c60b897dSRiver Riddle }
162c60b897dSRiver Riddle
163c60b897dSRiver Riddle /// Given a hash_identifier token like #123, try to parse the number out of
16415ae9964SKazu Hirata /// the identifier, returning std::nullopt if it is a named identifier like #x
16515ae9964SKazu Hirata /// or if the integer doesn't fit.
getHashIdentifierNumber() const1660a81ace0SKazu Hirata std::optional<unsigned> Token::getHashIdentifierNumber() const {
167c60b897dSRiver Riddle assert(getKind() == hash_identifier);
168c60b897dSRiver Riddle unsigned result = 0;
169c60b897dSRiver Riddle if (spelling.drop_front().getAsInteger(10, result))
1701a36588eSKazu Hirata return std::nullopt;
171c60b897dSRiver Riddle return result;
172c60b897dSRiver Riddle }
173c60b897dSRiver Riddle
174c60b897dSRiver Riddle /// Given a punctuation or keyword token kind, return the spelling of the
175c60b897dSRiver Riddle /// token as a string. Warning: This will abort on markers, identifiers and
176c60b897dSRiver Riddle /// literal tokens since they have no fixed spelling.
getTokenSpelling(Kind kind)177c60b897dSRiver Riddle StringRef Token::getTokenSpelling(Kind kind) {
178c60b897dSRiver Riddle switch (kind) {
179c60b897dSRiver Riddle default:
180c60b897dSRiver Riddle llvm_unreachable("This token kind has no fixed spelling");
181c60b897dSRiver Riddle #define TOK_PUNCTUATION(NAME, SPELLING) \
182c60b897dSRiver Riddle case NAME: \
183c60b897dSRiver Riddle return SPELLING;
184c60b897dSRiver Riddle #define TOK_KEYWORD(SPELLING) \
185c60b897dSRiver Riddle case kw_##SPELLING: \
186c60b897dSRiver Riddle return #SPELLING;
187c60b897dSRiver Riddle #include "TokenKinds.def"
188c60b897dSRiver Riddle }
189c60b897dSRiver Riddle }
190c60b897dSRiver Riddle
191c60b897dSRiver Riddle /// Return true if this is one of the keyword token kinds (e.g. kw_if).
isKeyword() const192c60b897dSRiver Riddle bool Token::isKeyword() const {
193c60b897dSRiver Riddle switch (kind) {
194c60b897dSRiver Riddle default:
195c60b897dSRiver Riddle return false;
196c60b897dSRiver Riddle #define TOK_KEYWORD(SPELLING) \
197c60b897dSRiver Riddle case kw_##SPELLING: \
198c60b897dSRiver Riddle return true;
199c60b897dSRiver Riddle #include "TokenKinds.def"
200c60b897dSRiver Riddle }
201c60b897dSRiver Riddle }
202c60b897dSRiver Riddle
isCodeCompletionFor(Kind kind) const203c60b897dSRiver Riddle bool Token::isCodeCompletionFor(Kind kind) const {
204c60b897dSRiver Riddle if (!isCodeCompletion() || spelling.empty())
205c60b897dSRiver Riddle return false;
206c60b897dSRiver Riddle switch (kind) {
207c60b897dSRiver Riddle case Kind::string:
208c60b897dSRiver Riddle return spelling[0] == '"';
209c60b897dSRiver Riddle case Kind::hash_identifier:
210c60b897dSRiver Riddle return spelling[0] == '#';
211c60b897dSRiver Riddle case Kind::percent_identifier:
212c60b897dSRiver Riddle return spelling[0] == '%';
213c60b897dSRiver Riddle case Kind::caret_identifier:
214c60b897dSRiver Riddle return spelling[0] == '^';
215c60b897dSRiver Riddle case Kind::exclamation_identifier:
216c60b897dSRiver Riddle return spelling[0] == '!';
217c60b897dSRiver Riddle default:
218c60b897dSRiver Riddle return false;
219c60b897dSRiver Riddle }
220c60b897dSRiver Riddle }
221