xref: /llvm-project/lld/ELF/ScriptLexer.cpp (revision 483516fd83f000fd6b2ac1cde943f5639f72b9e9)
1794366a2SRui Ueyama //===- ScriptLexer.cpp ----------------------------------------------------===//
2794366a2SRui Ueyama //
32946cd70SChandler Carruth // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
42946cd70SChandler Carruth // See https://llvm.org/LICENSE.txt for license information.
52946cd70SChandler Carruth // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6794366a2SRui Ueyama //
7794366a2SRui Ueyama //===----------------------------------------------------------------------===//
8794366a2SRui Ueyama //
94c82b4f6SRui Ueyama // This file defines a lexer for the linker script.
104c82b4f6SRui Ueyama //
114c82b4f6SRui Ueyama // The linker script's grammar is not complex but ambiguous due to the
124c82b4f6SRui Ueyama // lack of the formal specification of the language. What we are trying to
134c82b4f6SRui Ueyama // do in this and other files in LLD is to make a "reasonable" linker
144c82b4f6SRui Ueyama // script processor.
154c82b4f6SRui Ueyama //
164c82b4f6SRui Ueyama // Among simplicity, compatibility and efficiency, we put the most
174c82b4f6SRui Ueyama // emphasis on simplicity when we wrote this lexer. Compatibility with the
184c82b4f6SRui Ueyama // GNU linkers is important, but we did not try to clone every tiny corner
194c82b4f6SRui Ueyama // case of their lexers, as even ld.bfd and ld.gold are subtly different
204c82b4f6SRui Ueyama // in various corner cases. We do not care much about efficiency because
214c82b4f6SRui Ueyama // the time spent in parsing linker scripts is usually negligible.
224c82b4f6SRui Ueyama //
23731a66aeSRui Ueyama // Overall, this lexer works fine for most linker scripts. There might
24731a66aeSRui Ueyama // be room for improving compatibility, but that's probably not at the
25731a66aeSRui Ueyama // top of our todo list.
26794366a2SRui Ueyama //
27794366a2SRui Ueyama //===----------------------------------------------------------------------===//
28794366a2SRui Ueyama 
29794366a2SRui Ueyama #include "ScriptLexer.h"
30a7e8bddfSFangrui Song #include "Config.h"
31b8a59c8aSBob Haarman #include "lld/Common/ErrorHandler.h"
32794366a2SRui Ueyama #include "llvm/ADT/Twine.h"
3327bb7990SFangrui Song #include "llvm/Support/ErrorHandling.h"
34a7e8bddfSFangrui Song #include "llvm/Support/FileSystem.h"
35a7e8bddfSFangrui Song #include "llvm/Support/Path.h"
3627bb7990SFangrui Song #include <algorithm>
37794366a2SRui Ueyama 
38794366a2SRui Ueyama using namespace llvm;
3907837b8fSFangrui Song using namespace lld;
4007837b8fSFangrui Song using namespace lld::elf;
41794366a2SRui Ueyama 
42cf57a670SFangrui Song ScriptLexer::Buffer::Buffer(Ctx &ctx, MemoryBufferRef mb)
43a7e8bddfSFangrui Song     : s(mb.getBuffer()), filename(mb.getBufferIdentifier()),
44a7e8bddfSFangrui Song       begin(mb.getBufferStart()) {
45cf57a670SFangrui Song   if (ctx.arg.sysroot == "")
46a7e8bddfSFangrui Song     return;
47a7e8bddfSFangrui Song   StringRef path = filename;
48a7e8bddfSFangrui Song   for (; !path.empty(); path = sys::path::parent_path(path)) {
49cf57a670SFangrui Song     if (!sys::fs::equivalent(ctx.arg.sysroot, path))
50a7e8bddfSFangrui Song       continue;
51a7e8bddfSFangrui Song     isUnderSysroot = true;
52a7e8bddfSFangrui Song     return;
53a7e8bddfSFangrui Song   }
54a7e8bddfSFangrui Song }
55a7e8bddfSFangrui Song 
56cf57a670SFangrui Song ScriptLexer::ScriptLexer(Ctx &ctx, MemoryBufferRef mb)
57e24457a3SFangrui Song     : ctx(ctx), curBuf(ctx, mb), mbs(1, mb) {
588f72b0cbSFangrui Song   activeFilenames.insert(mb.getBufferIdentifier());
598f72b0cbSFangrui Song }
601978c21dSFangrui Song 
61794366a2SRui Ueyama // Returns a whole line containing the current token.
62794366a2SRui Ueyama StringRef ScriptLexer::getLine() {
633837f427SRui Ueyama   StringRef s = getCurrentMB().getBuffer();
64794366a2SRui Ueyama 
651978c21dSFangrui Song   size_t pos = s.rfind('\n', prevTok.data() - s.data());
663837f427SRui Ueyama   if (pos != StringRef::npos)
673837f427SRui Ueyama     s = s.substr(pos + 1);
683837f427SRui Ueyama   return s.substr(0, s.find_first_of("\r\n"));
69794366a2SRui Ueyama }
70794366a2SRui Ueyama 
71794366a2SRui Ueyama // Returns 0-based column number of the current token.
72794366a2SRui Ueyama size_t ScriptLexer::getColumnNumber() {
731978c21dSFangrui Song   return prevTok.data() - getLine().data();
74794366a2SRui Ueyama }
75794366a2SRui Ueyama 
76794366a2SRui Ueyama std::string ScriptLexer::getCurrentLocation() {
77adcd0268SBenjamin Kramer   std::string filename = std::string(getCurrentMB().getBufferIdentifier());
789328c20cSFangrui Song   return (filename + ":" + Twine(prevTokLine)).str();
79794366a2SRui Ueyama }
80794366a2SRui Ueyama 
81794366a2SRui Ueyama // We don't want to record cascading errors. Keep only the first one.
823837f427SRui Ueyama void ScriptLexer::setError(const Twine &msg) {
83ed6c106eSFangrui Song   if (errCount(ctx))
84794366a2SRui Ueyama     return;
85794366a2SRui Ueyama 
863837f427SRui Ueyama   std::string s = (getCurrentLocation() + ": " + msg).str();
871978c21dSFangrui Song   if (prevTok.size())
883837f427SRui Ueyama     s += "\n>>> " + getLine().str() + "\n>>> " +
89de2d1066SGeorge Rimar          std::string(getColumnNumber(), ' ') + "^";
9009c2c5e1SFangrui Song   ErrAlways(ctx) << s;
91794366a2SRui Ueyama }
92794366a2SRui Ueyama 
931978c21dSFangrui Song void ScriptLexer::lex() {
94794366a2SRui Ueyama   for (;;) {
951978c21dSFangrui Song     StringRef &s = curBuf.s;
963837f427SRui Ueyama     s = skipSpace(s);
971978c21dSFangrui Song     if (s.empty()) {
981978c21dSFangrui Song       // If this buffer is from an INCLUDE command, switch to the "return
991978c21dSFangrui Song       // value"; otherwise, mark EOF.
1001978c21dSFangrui Song       if (buffers.empty()) {
1011978c21dSFangrui Song         eof = true;
1021978c21dSFangrui Song         return;
1031978c21dSFangrui Song       }
1048f72b0cbSFangrui Song       activeFilenames.erase(curBuf.filename);
1051978c21dSFangrui Song       curBuf = buffers.pop_back_val();
1061978c21dSFangrui Song       continue;
1071978c21dSFangrui Song     }
1081978c21dSFangrui Song     curTokState = inExpr;
109794366a2SRui Ueyama 
110794366a2SRui Ueyama     // Quoted token. Note that double-quote characters are parts of a token
111794366a2SRui Ueyama     // because, in a glob match context, only unquoted tokens are interpreted
112794366a2SRui Ueyama     // as glob patterns. Double-quoted tokens are literal patterns in that
113794366a2SRui Ueyama     // context.
1148d85c96eSFangrui Song     if (s.starts_with("\"")) {
1153837f427SRui Ueyama       size_t e = s.find("\"", 1);
1163837f427SRui Ueyama       if (e == StringRef::npos) {
1171978c21dSFangrui Song         size_t lineno =
1181978c21dSFangrui Song             StringRef(curBuf.begin, s.data() - curBuf.begin).count('\n');
119*483516fdSFangrui Song         ErrAlways(ctx) << curBuf.filename << ":" << (lineno + 1)
12009c2c5e1SFangrui Song                        << ": unclosed quote";
121794366a2SRui Ueyama         return;
122794366a2SRui Ueyama       }
123794366a2SRui Ueyama 
1241978c21dSFangrui Song       curTok = s.take_front(e + 1);
1253837f427SRui Ueyama       s = s.substr(e + 1);
1261978c21dSFangrui Song       return;
127794366a2SRui Ueyama     }
128794366a2SRui Ueyama 
1290a0effddSFangrui Song     // Some operators form separate tokens.
1308d85c96eSFangrui Song     if (s.starts_with("<<=") || s.starts_with(">>=")) {
1311978c21dSFangrui Song       curTok = s.substr(0, 3);
1320a0effddSFangrui Song       s = s.substr(3);
1331978c21dSFangrui Song       return;
1340a0effddSFangrui Song     }
1351978c21dSFangrui Song     if (s.size() > 1 && (s[1] == '=' && strchr("+-*/!&^|", s[0]))) {
1361978c21dSFangrui Song       curTok = s.substr(0, 2);
1373837f427SRui Ueyama       s = s.substr(2);
1381978c21dSFangrui Song       return;
139c67d6b2dSRui Ueyama     }
140c67d6b2dSRui Ueyama 
1411978c21dSFangrui Song     // Unquoted token. The non-expression token is more relaxed than tokens in
1421978c21dSFangrui Song     // C-like languages, so that you can write "file-name.cpp" as one bare
1431978c21dSFangrui Song     // token.
1441978c21dSFangrui Song     size_t pos;
1451978c21dSFangrui Song     if (inExpr) {
1461978c21dSFangrui Song       pos = s.find_first_not_of(
1471978c21dSFangrui Song           "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz"
1481978c21dSFangrui Song           "0123456789_.$");
1491978c21dSFangrui Song       if (pos == 0 && s.size() >= 2 &&
1501978c21dSFangrui Song           ((s[0] == s[1] && strchr("<>&|", s[0])) ||
1511978c21dSFangrui Song            is_contained({"==", "!=", "<=", ">=", "<<", ">>"}, s.substr(0, 2))))
1521978c21dSFangrui Song         pos = 2;
1531978c21dSFangrui Song     } else {
1541978c21dSFangrui Song       pos = s.find_first_not_of(
155794366a2SRui Ueyama           "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz"
156c67d6b2dSRui Ueyama           "0123456789_.$/\\~=+[]*?-!^:");
157794366a2SRui Ueyama     }
158794366a2SRui Ueyama 
1591978c21dSFangrui Song     if (pos == 0)
1601978c21dSFangrui Song       pos = 1;
1611978c21dSFangrui Song     curTok = s.substr(0, pos);
1621978c21dSFangrui Song     s = s.substr(pos);
1631978c21dSFangrui Song     break;
1641978c21dSFangrui Song   }
165794366a2SRui Ueyama }
166794366a2SRui Ueyama 
167794366a2SRui Ueyama // Skip leading whitespace characters or comments.
1683837f427SRui Ueyama StringRef ScriptLexer::skipSpace(StringRef s) {
169794366a2SRui Ueyama   for (;;) {
1708d85c96eSFangrui Song     if (s.starts_with("/*")) {
1713837f427SRui Ueyama       size_t e = s.find("*/", 2);
1723837f427SRui Ueyama       if (e == StringRef::npos) {
173ae4279bdSGeorgii Rymar         setError("unclosed comment in a linker script");
174794366a2SRui Ueyama         return "";
175794366a2SRui Ueyama       }
1769328c20cSFangrui Song       curBuf.lineNumber += s.substr(0, e).count('\n');
1773837f427SRui Ueyama       s = s.substr(e + 2);
178794366a2SRui Ueyama       continue;
179794366a2SRui Ueyama     }
1808d85c96eSFangrui Song     if (s.starts_with("#")) {
1813837f427SRui Ueyama       size_t e = s.find('\n', 1);
1823837f427SRui Ueyama       if (e == StringRef::npos)
1833837f427SRui Ueyama         e = s.size() - 1;
1849328c20cSFangrui Song       else
1859328c20cSFangrui Song         ++curBuf.lineNumber;
1863837f427SRui Ueyama       s = s.substr(e + 1);
187794366a2SRui Ueyama       continue;
188794366a2SRui Ueyama     }
1899328c20cSFangrui Song     StringRef saved = s;
1903837f427SRui Ueyama     s = s.ltrim();
1919328c20cSFangrui Song     auto len = saved.size() - s.size();
1929328c20cSFangrui Song     if (len == 0)
1933837f427SRui Ueyama       return s;
1949328c20cSFangrui Song     curBuf.lineNumber += saved.substr(0, len).count('\n');
195794366a2SRui Ueyama   }
196794366a2SRui Ueyama }
197794366a2SRui Ueyama 
1981978c21dSFangrui Song // Used to determine whether to stop parsing. Treat errors like EOF.
199ed6c106eSFangrui Song bool ScriptLexer::atEOF() { return eof || errCount(ctx); }
200731a66aeSRui Ueyama 
201794366a2SRui Ueyama StringRef ScriptLexer::next() {
2021978c21dSFangrui Song   prevTok = peek();
2039328c20cSFangrui Song   // `prevTokLine` is not updated for EOF so that the line number in `setError`
2049328c20cSFangrui Song   // will be more useful.
2059328c20cSFangrui Song   if (prevTok.size())
2069328c20cSFangrui Song     prevTokLine = curBuf.lineNumber;
2071978c21dSFangrui Song   return std::exchange(curTok, StringRef(curBuf.s.data(), 0));
208794366a2SRui Ueyama }
209794366a2SRui Ueyama 
210f5fce486SRui Ueyama StringRef ScriptLexer::peek() {
2111978c21dSFangrui Song   // curTok is invalid if curTokState and inExpr mismatch.
2121978c21dSFangrui Song   if (curTok.size() && curTokState != inExpr) {
2131978c21dSFangrui Song     curBuf.s = StringRef(curTok.data(), curBuf.s.end() - curTok.data());
2141978c21dSFangrui Song     curTok = {};
2151978c21dSFangrui Song   }
2161978c21dSFangrui Song   if (curTok.empty())
2171978c21dSFangrui Song     lex();
2181978c21dSFangrui Song   return curTok;
219794366a2SRui Ueyama }
220794366a2SRui Ueyama 
2213837f427SRui Ueyama bool ScriptLexer::consume(StringRef tok) {
2221978c21dSFangrui Song   if (peek() != tok)
223794366a2SRui Ueyama     return false;
2241978c21dSFangrui Song   next();
2251978c21dSFangrui Song   return true;
226794366a2SRui Ueyama }
227794366a2SRui Ueyama 
228794366a2SRui Ueyama void ScriptLexer::skip() { (void)next(); }
229794366a2SRui Ueyama 
2303837f427SRui Ueyama void ScriptLexer::expect(StringRef expect) {
231ed6c106eSFangrui Song   if (errCount(ctx))
232794366a2SRui Ueyama     return;
2333837f427SRui Ueyama   StringRef tok = next();
2341978c21dSFangrui Song   if (tok != expect) {
2351978c21dSFangrui Song     if (atEOF())
2361978c21dSFangrui Song       setError("unexpected EOF");
2371978c21dSFangrui Song     else
2383837f427SRui Ueyama       setError(expect + " expected, but got " + tok);
239794366a2SRui Ueyama   }
2401978c21dSFangrui Song }
241794366a2SRui Ueyama 
2422a89356dSFangrui Song ScriptLexer::Token ScriptLexer::till(StringRef tok) {
2432a89356dSFangrui Song   StringRef str = next();
2442a89356dSFangrui Song   if (str == tok)
2452a89356dSFangrui Song     return {};
2462a89356dSFangrui Song   if (!atEOF())
2472a89356dSFangrui Song     return {str};
2482a89356dSFangrui Song   prevTok = {};
2492a89356dSFangrui Song   setError("unexpected EOF");
2502a89356dSFangrui Song   return {};
2512a89356dSFangrui Song }
2522a89356dSFangrui Song 
253794366a2SRui Ueyama // Returns true if S encloses T.
2543837f427SRui Ueyama static bool encloses(StringRef s, StringRef t) {
2553837f427SRui Ueyama   return s.bytes_begin() <= t.bytes_begin() && t.bytes_end() <= s.bytes_end();
256794366a2SRui Ueyama }
257794366a2SRui Ueyama 
258794366a2SRui Ueyama MemoryBufferRef ScriptLexer::getCurrentMB() {
259794366a2SRui Ueyama   // Find input buffer containing the current token.
260ac6abc99SFangrui Song   assert(!mbs.empty());
2613837f427SRui Ueyama   for (MemoryBufferRef mb : mbs)
2621978c21dSFangrui Song     if (encloses(mb.getBuffer(), curBuf.s))
2633837f427SRui Ueyama       return mb;
264794366a2SRui Ueyama   llvm_unreachable("getCurrentMB: failed to find a token");
265794366a2SRui Ueyama }
266