//===-- lib/Parser/preprocessor.cpp ---------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// #include "flang/Parser/preprocessor.h" #include "prescan.h" #include "flang/Common/idioms.h" #include "flang/Parser/characters.h" #include "flang/Parser/message.h" #include "llvm/Support/FileSystem.h" #include "llvm/Support/raw_ostream.h" #include #include #include #include #include #include #include #include #include #include namespace Fortran::parser { Definition::Definition( const TokenSequence &repl, std::size_t firstToken, std::size_t tokens) : replacement_{Tokenize({}, repl, firstToken, tokens)} {} Definition::Definition(const std::vector &argNames, const TokenSequence &repl, std::size_t firstToken, std::size_t tokens, bool isVariadic) : isFunctionLike_{true}, isVariadic_{isVariadic}, argNames_{argNames}, replacement_{Tokenize(argNames, repl, firstToken, tokens)} {} Definition::Definition(const std::string &predefined, AllSources &sources) : isPredefined_{true}, replacement_{ predefined, sources.AddCompilerInsertion(predefined).start()} {} bool Definition::set_isDisabled(bool disable) { bool was{isDisabled_}; isDisabled_ = disable; return was; } void Definition::Print(llvm::raw_ostream &out, const char *macroName) const { if (!isFunctionLike_) { // If it's not a function-like macro, then just print the replacement. out << ' ' << replacement_.ToString(); return; } size_t argCount{argumentCount()}; out << '('; for (size_t i{0}; i != argCount; ++i) { if (i != 0) { out << ", "; } out << argNames_[i]; } if (isVariadic_) { out << ", ..."; } out << ") "; for (size_t i{0}, e{replacement_.SizeInTokens()}; i != e; ++i) { std::string tok{replacement_.TokenAt(i).ToString()}; if (size_t idx{GetArgumentIndex(tok)}; idx < argCount) { out << argNames_[idx]; } else { out << tok; } } } static bool IsLegalIdentifierStart(const CharBlock &cpl) { return cpl.size() > 0 && IsLegalIdentifierStart(cpl[0]); } TokenSequence Definition::Tokenize(const std::vector &argNames, const TokenSequence &token, std::size_t firstToken, std::size_t tokens) { std::map args; char argIndex{'A'}; for (const std::string &arg : argNames) { CHECK(args.find(arg) == args.end()); args[arg] = "~"s + argIndex++; } TokenSequence result; for (std::size_t j{0}; j < tokens; ++j) { CharBlock tok{token.TokenAt(firstToken + j)}; if (IsLegalIdentifierStart(tok)) { auto it{args.find(tok.ToString())}; if (it != args.end()) { result.Put(it->second, token.GetTokenProvenance(j)); continue; } } result.Put(token, firstToken + j, 1); } return result; } std::size_t Definition::GetArgumentIndex(const CharBlock &token) const { if (token.size() >= 2 && token[0] == '~') { return static_cast(token[1] - 'A'); } return argumentCount(); } static TokenSequence Stringify( const TokenSequence &tokens, AllSources &allSources) { TokenSequence result; Provenance quoteProvenance{allSources.CompilerInsertionProvenance('"')}; result.PutNextTokenChar('"', quoteProvenance); for (std::size_t j{0}; j < tokens.SizeInTokens(); ++j) { const CharBlock &token{tokens.TokenAt(j)}; std::size_t bytes{token.size()}; for (std::size_t k{0}; k < bytes; ++k) { char ch{token[k]}; Provenance from{tokens.GetTokenProvenance(j, k)}; if (ch == '"' || ch == '\\') { result.PutNextTokenChar(ch, from); } result.PutNextTokenChar(ch, from); } } result.PutNextTokenChar('"', quoteProvenance); result.CloseToken(); return result; } constexpr bool IsTokenPasting(CharBlock opr) { return opr.size() == 2 && opr[0] == '#' && opr[1] == '#'; } static bool AnyTokenPasting(const TokenSequence &text) { std::size_t tokens{text.SizeInTokens()}; for (std::size_t j{0}; j < tokens; ++j) { if (IsTokenPasting(text.TokenAt(j))) { return true; } } return false; } static TokenSequence TokenPasting(TokenSequence &&text) { if (!AnyTokenPasting(text)) { return std::move(text); } TokenSequence result; std::size_t tokens{text.SizeInTokens()}; bool pasting{false}; for (std::size_t j{0}; j < tokens; ++j) { if (IsTokenPasting(text.TokenAt(j))) { if (!pasting) { while (!result.empty() && result.TokenAt(result.SizeInTokens() - 1).IsBlank()) { result.pop_back(); } if (!result.empty()) { result.ReopenLastToken(); pasting = true; } } } else if (pasting && text.TokenAt(j).IsBlank()) { } else { result.Put(text, j, 1); pasting = false; } } return result; } constexpr bool IsDefinedKeyword(CharBlock token) { return token.size() == 7 && (token[0] == 'd' || token[0] == 'D') && ToLowerCaseLetters(token.ToString()) == "defined"; } TokenSequence Definition::Apply(const std::vector &args, Prescanner &prescanner, bool inIfExpression) { TokenSequence result; bool skipping{false}; int parenthesesNesting{0}; std::size_t tokens{replacement_.SizeInTokens()}; for (std::size_t j{0}; j < tokens; ++j) { CharBlock token{replacement_.TokenAt(j)}; std::size_t bytes{token.size()}; if (skipping) { char ch{token.OnlyNonBlank()}; if (ch == '(') { ++parenthesesNesting; } else if (ch == ')') { if (parenthesesNesting > 0) { --parenthesesNesting; } skipping = parenthesesNesting > 0; } continue; } if (bytes == 2 && token[0] == '~') { // argument substitution std::size_t index{GetArgumentIndex(token)}; if (index >= args.size()) { continue; } std::size_t prev{j}; while (prev > 0 && replacement_.TokenAt(prev - 1).IsBlank()) { --prev; } if (prev > 0 && replacement_.TokenAt(prev - 1).size() == 1 && replacement_.TokenAt(prev - 1)[0] == '#') { // stringify argument without macro replacement std::size_t resultSize{result.SizeInTokens()}; while (resultSize > 0 && result.TokenAt(resultSize - 1).IsBlank()) { result.pop_back(); --resultSize; } CHECK(resultSize > 0 && result.TokenAt(resultSize - 1) == replacement_.TokenAt(prev - 1)); result.pop_back(); result.Put(Stringify(args[index], prescanner.allSources())); } else { const TokenSequence *arg{&args[index]}; std::optional replaced; // Don't replace macros in the actual argument if it is preceded or // followed by the token-pasting operator ## in the replacement text, // or if we have to worry about "defined(X)"/"defined X" in an // #if/#elif expression. if (!inIfExpression && (prev == 0 || !IsTokenPasting(replacement_.TokenAt(prev - 1)))) { auto next{replacement_.SkipBlanks(j + 1)}; if (next >= tokens || !IsTokenPasting(replacement_.TokenAt(next))) { // Apply macro replacement to the actual argument replaced = prescanner.preprocessor().MacroReplacement( *arg, prescanner, nullptr, inIfExpression); if (replaced) { arg = &*replaced; } } } result.Put(DEREF(arg)); } } else if (bytes == 11 && isVariadic_ && token.ToString() == "__VA_ARGS__") { Provenance commaProvenance{ prescanner.preprocessor().allSources().CompilerInsertionProvenance( ',')}; for (std::size_t k{argumentCount()}; k < args.size(); ++k) { if (k > argumentCount()) { result.Put(","s, commaProvenance); } result.Put(args[k]); } } else if (bytes == 10 && isVariadic_ && token.ToString() == "__VA_OPT__" && j + 2 < tokens && replacement_.TokenAt(j + 1).OnlyNonBlank() == '(' && parenthesesNesting == 0) { parenthesesNesting = 1; skipping = args.size() == argumentCount(); ++j; } else { if (parenthesesNesting > 0) { char ch{token.OnlyNonBlank()}; if (ch == '(') { ++parenthesesNesting; } else if (ch == ')') { if (--parenthesesNesting == 0) { skipping = false; continue; } } } result.Put(replacement_, j); } } return TokenPasting(std::move(result)); } static std::string FormatTime(const std::time_t &now, const char *format) { char buffer[16]; return {buffer, std::strftime(buffer, sizeof buffer, format, std::localtime(&now))}; } Preprocessor::Preprocessor(AllSources &allSources) : allSources_{allSources} {} void Preprocessor::DefineStandardMacros() { // Capture current local date & time once now to avoid having the values // of __DATE__ or __TIME__ change during compilation. std::time_t now; std::time(&now); Define("__DATE__"s, FormatTime(now, "\"%h %e %Y\"")); // e.g., "Jun 16 1904" Define("__TIME__"s, FormatTime(now, "\"%T\"")); // e.g., "23:59:60" // The values of these predefined macros depend on their invocation sites. Define("__FILE__"s, "__FILE__"s); Define("__LINE__"s, "__LINE__"s); Define("__TIMESTAMP__"s, "__TIMESTAMP__"s); } void Preprocessor::Define(const std::string ¯o, const std::string &value) { definitions_.emplace(SaveTokenAsName(macro), Definition{value, allSources_}); } void Preprocessor::Undefine(std::string macro) { definitions_.erase(macro); } std::optional Preprocessor::MacroReplacement( const TokenSequence &input, Prescanner &prescanner, std::optional *partialFunctionLikeMacro, bool inIfExpression) { // Do quick scan for any use of a defined name. if (definitions_.empty()) { return std::nullopt; } std::size_t tokens{input.SizeInTokens()}; std::size_t j{0}; for (; j < tokens; ++j) { CharBlock token{input.TokenAt(j)}; if (!token.empty() && IsLegalIdentifierStart(token[0]) && (IsNameDefined(token) || (inIfExpression && IsDefinedKeyword(token)))) { break; } } if (j == tokens) { return std::nullopt; // input contains nothing that would be replaced } TokenSequence result{input, 0, j}; // After rescanning after macro replacement has failed due to an unclosed // function-like macro call (no left parenthesis yet, or no closing // parenthesis), if tokens remain in the input, append them to the // replacement text and attempt to proceed. Otherwise, return, so that // the caller may try again with remaining tokens in its input. auto CompleteFunctionLikeMacro{ [this, &input, &prescanner, &result, &partialFunctionLikeMacro, inIfExpression](std::size_t after, const TokenSequence &replacement, std::size_t pFLMOffset) { if (after < input.SizeInTokens()) { result.Put(replacement, 0, pFLMOffset); TokenSequence suffix; suffix.Put( replacement, pFLMOffset, replacement.SizeInTokens() - pFLMOffset); suffix.Put(input, after, input.SizeInTokens() - after); auto further{ReplaceMacros( suffix, prescanner, partialFunctionLikeMacro, inIfExpression)}; if (partialFunctionLikeMacro && *partialFunctionLikeMacro) { // still not closed **partialFunctionLikeMacro += result.SizeInTokens(); } result.Put(further); return true; } else { if (partialFunctionLikeMacro) { *partialFunctionLikeMacro = pFLMOffset + result.SizeInTokens(); } return false; } }}; for (; j < tokens; ++j) { CharBlock token{input.TokenAt(j)}; if (token.IsBlank() || !IsLegalIdentifierStart(token[0])) { result.Put(input, j); continue; } // Process identifier in replacement text. auto it{definitions_.find(token)}; // Is in the X in "defined(X)" or "defined X" in an #if/#elif expression? if (inIfExpression) { if (auto prev{result.SkipBlanksBackwards(result.SizeInTokens())}) { bool ok{true}; std::optional rightParenthesis; if (result.TokenAt(*prev).OnlyNonBlank() == '(') { prev = result.SkipBlanksBackwards(*prev); rightParenthesis = input.SkipBlanks(j + 1); ok = *rightParenthesis < tokens && input.TokenAt(*rightParenthesis).OnlyNonBlank() == ')'; } if (ok && prev && IsDefinedKeyword(result.TokenAt(*prev))) { result = TokenSequence{result, 0, *prev}; // trims off "defined (" char truth{it != definitions_.end() ? '1' : '0'}; result.Put(&truth, 1, allSources_.CompilerInsertionProvenance(truth)); j = rightParenthesis.value_or(j); continue; } } } if (it == definitions_.end()) { result.Put(input, j); continue; } Definition *def{&it->second}; if (def->isDisabled()) { result.Put(input, j); continue; } if (!def->isFunctionLike()) { if (def->isPredefined() && !def->replacement().empty()) { std::string repl; std::string name{def->replacement().TokenAt(0).ToString()}; if (name == "__FILE__") { repl = "\""s + allSources_.GetPath(prescanner.GetCurrentProvenance()) + '"'; } else if (name == "__LINE__") { std::string buf; llvm::raw_string_ostream ss{buf}; ss << allSources_.GetLineNumber(prescanner.GetCurrentProvenance()); repl = ss.str(); } else if (name == "__TIMESTAMP__") { auto path{allSources_.GetPath( prescanner.GetCurrentProvenance(), /*topLevel=*/true)}; llvm::sys::fs::file_status status; repl = "??? ??? ?? ??:??:?? ????"; if (!llvm::sys::fs::status(path, status)) { auto modTime{llvm::sys::toTimeT(status.getLastModificationTime())}; if (std::string time{std::asctime(std::localtime(&modTime))}; time.size() > 1 && time[time.size() - 1] == '\n') { time.erase(time.size() - 1); // clip terminal '\n' repl = "\""s + time + '"'; } } } if (!repl.empty()) { ProvenanceRange insert{allSources_.AddCompilerInsertion(repl)}; ProvenanceRange call{allSources_.AddMacroCall( insert, input.GetTokenProvenanceRange(j), repl)}; result.Put(repl, call.start()); continue; } } std::optional partialFLM; def->set_isDisabled(true); TokenSequence replaced{TokenPasting(ReplaceMacros( def->replacement(), prescanner, &partialFLM, inIfExpression))}; def->set_isDisabled(false); if (partialFLM && CompleteFunctionLikeMacro(j + 1, replaced, *partialFLM)) { return result; } if (!replaced.empty()) { ProvenanceRange from{def->replacement().GetProvenanceRange()}; ProvenanceRange use{input.GetTokenProvenanceRange(j)}; ProvenanceRange newRange{ allSources_.AddMacroCall(from, use, replaced.ToString())}; result.Put(replaced, newRange); } } else { // Possible function-like macro call. Skip spaces and newlines to see // whether '(' is next. std::size_t k{j}; bool leftParen{false}; while (++k < tokens) { const CharBlock &lookAhead{input.TokenAt(k)}; if (!lookAhead.IsBlank() && lookAhead[0] != '\n') { leftParen = lookAhead[0] == '(' && lookAhead.size() == 1; break; } } if (!leftParen) { if (partialFunctionLikeMacro) { *partialFunctionLikeMacro = result.SizeInTokens(); result.Put(input, j, tokens - j); return result; } else { result.Put(input, j); continue; } } std::vector argStart{++k}; for (int nesting{0}; k < tokens; ++k) { CharBlock token{input.TokenAt(k)}; char ch{token.OnlyNonBlank()}; if (ch == '(') { ++nesting; } else if (ch == ')') { if (nesting == 0) { break; } --nesting; } else if (ch == ',' && nesting == 0) { argStart.push_back(k + 1); } } if (argStart.size() == 1 && k == argStart[0] && def->argumentCount() == 0) { // Subtle: () is zero arguments, not one empty argument, // unless one argument was expected. argStart.clear(); } if (k >= tokens && partialFunctionLikeMacro) { *partialFunctionLikeMacro = result.SizeInTokens(); result.Put(input, j, tokens - j); return result; } else if (k >= tokens || argStart.size() < def->argumentCount() || (argStart.size() > def->argumentCount() && !def->isVariadic())) { result.Put(input, j); continue; } std::vector args; for (std::size_t n{0}; n < argStart.size(); ++n) { std::size_t at{argStart[n]}; std::size_t count{ (n + 1 == argStart.size() ? k : argStart[n + 1] - 1) - at}; args.emplace_back(TokenSequence(input, at, count)); } TokenSequence applied{def->Apply(args, prescanner, inIfExpression)}; std::optional partialFLM; def->set_isDisabled(true); TokenSequence replaced{ReplaceMacros( std::move(applied), prescanner, &partialFLM, inIfExpression)}; def->set_isDisabled(false); if (partialFLM && CompleteFunctionLikeMacro(k + 1, replaced, *partialFLM)) { return result; } if (!replaced.empty()) { ProvenanceRange from{def->replacement().GetProvenanceRange()}; ProvenanceRange use{input.GetIntervalProvenanceRange(j, k - j)}; ProvenanceRange newRange{ allSources_.AddMacroCall(from, use, replaced.ToString())}; result.Put(replaced, newRange); } j = k; // advance to the terminal ')' } } return result; } TokenSequence Preprocessor::ReplaceMacros(const TokenSequence &tokens, Prescanner &prescanner, std::optional *partialFunctionLikeMacro, bool inIfExpression) { if (std::optional repl{MacroReplacement( tokens, prescanner, partialFunctionLikeMacro, inIfExpression)}) { return std::move(*repl); } return tokens; } void Preprocessor::Directive(const TokenSequence &dir, Prescanner &prescanner) { std::size_t tokens{dir.SizeInTokens()}; std::size_t j{dir.SkipBlanks(0)}; if (j == tokens) { return; } if (dir.TokenAt(j).ToString() != "#") { prescanner.Say(dir.GetTokenProvenanceRange(j), "missing '#'"_err_en_US); return; } j = dir.SkipBlanks(j + 1); while (tokens > 0 && dir.TokenAt(tokens - 1).IsBlank()) { --tokens; } if (j == tokens) { return; } if (IsDecimalDigit(dir.TokenAt(j)[0]) || dir.TokenAt(j)[0] == '"') { LineDirective(dir, j, prescanner); return; } std::size_t dirOffset{j}; std::string dirName{ToLowerCaseLetters(dir.TokenAt(dirOffset).ToString())}; j = dir.SkipBlanks(j + 1); CharBlock nameToken; if (j < tokens && IsLegalIdentifierStart(dir.TokenAt(j)[0])) { nameToken = dir.TokenAt(j); } if (dirName == "line") { LineDirective(dir, j, prescanner); } else if (dirName == "define") { if (nameToken.empty()) { prescanner.Say(dir.GetTokenProvenanceRange(j < tokens ? j : tokens - 1), "#define: missing or invalid name"_err_en_US); return; } nameToken = SaveTokenAsName(nameToken); definitions_.erase(nameToken); if (++j < tokens && dir.TokenAt(j).OnlyNonBlank() == '(') { j = dir.SkipBlanks(j + 1); std::vector argName; bool isVariadic{false}; if (dir.TokenAt(j).OnlyNonBlank() != ')') { while (true) { std::string an{dir.TokenAt(j).ToString()}; if (an == "...") { isVariadic = true; } else { if (an.empty() || !IsLegalIdentifierStart(an[0])) { prescanner.Say(dir.GetTokenProvenanceRange(j), "#define: missing or invalid argument name"_err_en_US); return; } argName.push_back(an); } j = dir.SkipBlanks(j + 1); if (j == tokens) { prescanner.Say(dir.GetTokenProvenanceRange(tokens - 1), "#define: malformed argument list"_err_en_US); return; } char punc{dir.TokenAt(j).OnlyNonBlank()}; if (punc == ')') { break; } if (isVariadic || punc != ',') { prescanner.Say(dir.GetTokenProvenanceRange(j), "#define: malformed argument list"_err_en_US); return; } j = dir.SkipBlanks(j + 1); if (j == tokens) { prescanner.Say(dir.GetTokenProvenanceRange(tokens - 1), "#define: malformed argument list"_err_en_US); return; } } if (std::set(argName.begin(), argName.end()).size() != argName.size()) { prescanner.Say(dir.GetTokenProvenance(dirOffset), "#define: argument names are not distinct"_err_en_US); return; } } j = dir.SkipBlanks(j + 1); definitions_.emplace(std::make_pair( nameToken, Definition{argName, dir, j, tokens - j, isVariadic})); } else { j = dir.SkipBlanks(j + 1); definitions_.emplace( std::make_pair(nameToken, Definition{dir, j, tokens - j})); } } else if (dirName == "undef") { if (nameToken.empty()) { prescanner.Say( dir.GetIntervalProvenanceRange(dirOffset, tokens - dirOffset), "# missing or invalid name"_err_en_US); } else { if (dir.IsAnythingLeft(++j)) { if (prescanner.features().ShouldWarn( common::UsageWarning::Portability)) { prescanner.Say(common::UsageWarning::Portability, dir.GetIntervalProvenanceRange(j, tokens - j), "#undef: excess tokens at end of directive"_port_en_US); } } else { definitions_.erase(nameToken); } } } else if (dirName == "ifdef" || dirName == "ifndef") { bool doThen{false}; if (nameToken.empty()) { prescanner.Say( dir.GetIntervalProvenanceRange(dirOffset, tokens - dirOffset), "#%s: missing name"_err_en_US, dirName); } else { if (dir.IsAnythingLeft(++j)) { if (prescanner.features().ShouldWarn( common::UsageWarning::Portability)) { prescanner.Say(common::UsageWarning::Portability, dir.GetIntervalProvenanceRange(j, tokens - j), "#%s: excess tokens at end of directive"_port_en_US, dirName); } } doThen = IsNameDefined(nameToken) == (dirName == "ifdef"); } if (doThen) { ifStack_.push(CanDeadElseAppear::Yes); } else { SkipDisabledConditionalCode(dirName, IsElseActive::Yes, prescanner, dir.GetTokenProvenance(dirOffset)); } } else if (dirName == "if") { if (IsIfPredicateTrue(dir, j, tokens - j, prescanner)) { ifStack_.push(CanDeadElseAppear::Yes); } else { SkipDisabledConditionalCode(dirName, IsElseActive::Yes, prescanner, dir.GetTokenProvenanceRange(dirOffset)); } } else if (dirName == "else") { if (dir.IsAnythingLeft(j)) { if (prescanner.features().ShouldWarn(common::UsageWarning::Portability)) { prescanner.Say(common::UsageWarning::Portability, dir.GetIntervalProvenanceRange(j, tokens - j), "#else: excess tokens at end of directive"_port_en_US); } } else if (ifStack_.empty()) { prescanner.Say(dir.GetTokenProvenanceRange(dirOffset), "#else: not nested within #if, #ifdef, or #ifndef"_err_en_US); } else if (ifStack_.top() != CanDeadElseAppear::Yes) { prescanner.Say(dir.GetTokenProvenanceRange(dirOffset), "#else: already appeared within this #if, #ifdef, or #ifndef"_err_en_US); } else { ifStack_.pop(); SkipDisabledConditionalCode("else", IsElseActive::No, prescanner, dir.GetTokenProvenanceRange(dirOffset)); } } else if (dirName == "elif") { if (ifStack_.empty()) { prescanner.Say(dir.GetTokenProvenanceRange(dirOffset), "#elif: not nested within #if, #ifdef, or #ifndef"_err_en_US); } else if (ifStack_.top() != CanDeadElseAppear::Yes) { prescanner.Say(dir.GetTokenProvenanceRange(dirOffset), "#elif: #else previously appeared within this #if, #ifdef, or #ifndef"_err_en_US); } else { ifStack_.pop(); SkipDisabledConditionalCode("elif", IsElseActive::No, prescanner, dir.GetTokenProvenanceRange(dirOffset)); } } else if (dirName == "endif") { if (dir.IsAnythingLeft(j)) { if (prescanner.features().ShouldWarn(common::UsageWarning::Portability)) { prescanner.Say(common::UsageWarning::Portability, dir.GetIntervalProvenanceRange(j, tokens - j), "#endif: excess tokens at end of directive"_port_en_US); } } else if (ifStack_.empty()) { prescanner.Say(dir.GetTokenProvenanceRange(dirOffset), "#endif: no #if, #ifdef, or #ifndef"_err_en_US); } else { ifStack_.pop(); } } else if (dirName == "error") { prescanner.Say( dir.GetIntervalProvenanceRange(dirOffset, tokens - dirOffset), "%s"_err_en_US, dir.ToString()); } else if (dirName == "warning") { prescanner.Say( dir.GetIntervalProvenanceRange(dirOffset, tokens - dirOffset), "%s"_warn_en_US, dir.ToString()); } else if (dirName == "comment" || dirName == "note") { prescanner.Say( dir.GetIntervalProvenanceRange(dirOffset, tokens - dirOffset), "%s"_en_US, dir.ToString()); } else if (dirName == "include") { if (j == tokens) { prescanner.Say( dir.GetIntervalProvenanceRange(dirOffset, tokens - dirOffset), "#include: missing name of file to include"_err_en_US); return; } std::optional prependPath; TokenSequence path{dir, j, tokens - j}; std::string include{path.TokenAt(0).ToString()}; if (include != "<" && include.substr(0, 1) != "\"" && include.substr(0, 1) != "'") { path = ReplaceMacros(path, prescanner); include = path.empty() ? ""s : path.TokenAt(0).ToString(); } auto pathTokens{path.SizeInTokens()}; std::size_t k{0}; if (include == "<") { // #include k = 1; if (k >= pathTokens) { prescanner.Say(dir.GetIntervalProvenanceRange(j, pathTokens), "#include: file name missing"_err_en_US); return; } while (k < pathTokens && path.TokenAt(k) != ">") { ++k; } if (k >= pathTokens) { if (prescanner.features().ShouldWarn( common::UsageWarning::Portability)) { prescanner.Say(common::UsageWarning::Portability, dir.GetIntervalProvenanceRange(j, tokens - j), "#include: expected '>' at end of included file"_port_en_US); } } TokenSequence braced{path, 1, k - 1}; include = braced.ToString(); } else if ((include.substr(0, 1) == "\"" || include.substr(0, 1) == "'") && include.front() == include.back()) { // #include "foo" and #include 'foo' include = include.substr(1, include.size() - 2); // Start search in directory of file containing the directive auto prov{dir.GetTokenProvenanceRange(dirOffset).start()}; if (const auto *currentFile{allSources_.GetSourceFile(prov)}) { prependPath = DirectoryName(currentFile->path()); } } else { prescanner.Say(dir.GetTokenProvenanceRange(j < tokens ? j : tokens - 1), "#include %s: expected name of file to include"_err_en_US, path.ToString()); return; } if (include.empty()) { prescanner.Say(dir.GetTokenProvenanceRange(dirOffset), "#include %s: empty include file name"_err_en_US, path.ToString()); return; } k = path.SkipBlanks(k + 1); if (k < pathTokens && path.TokenAt(k).ToString() != "!") { if (prescanner.features().ShouldWarn(common::UsageWarning::Portability)) { prescanner.Say(common::UsageWarning::Portability, dir.GetIntervalProvenanceRange(j, tokens - j), "#include: extra stuff ignored after file name"_port_en_US); } } std::string buf; llvm::raw_string_ostream error{buf}; if (const SourceFile * included{allSources_.Open(include, error, std::move(prependPath))}) { if (included->bytes() > 0) { ProvenanceRange fileRange{ allSources_.AddIncludedFile(*included, dir.GetProvenanceRange())}; Prescanner{prescanner, *this, /*isNestedInIncludeDirective=*/true} .set_encoding(included->encoding()) .Prescan(fileRange); } } else { prescanner.Say(dir.GetTokenProvenanceRange(j), "#include: %s"_err_en_US, error.str()); } } else { prescanner.Say(dir.GetTokenProvenanceRange(dirOffset), "#%s: unknown or unimplemented directive"_err_en_US, dirName); } } void Preprocessor::PrintMacros(llvm::raw_ostream &out) const { // std::set is ordered. Use that to print the macros in an // alphabetical order. std::set macroNames; for (const auto &[name, _] : definitions_) { macroNames.insert(name.ToString()); } for (const std::string &name : macroNames) { out << "#define " << name; definitions_.at(name).Print(out, name.c_str()); out << '\n'; } } CharBlock Preprocessor::SaveTokenAsName(const CharBlock &t) { names_.push_back(t.ToString()); return {names_.back().data(), names_.back().size()}; } bool Preprocessor::IsNameDefined(const CharBlock &token) { return definitions_.find(token) != definitions_.end(); } bool Preprocessor::IsFunctionLikeDefinition(const CharBlock &token) { auto it{definitions_.find(token)}; return it != definitions_.end() && it->second.isFunctionLike(); } static std::string GetDirectiveName( const TokenSequence &line, std::size_t *rest) { std::size_t tokens{line.SizeInTokens()}; std::size_t j{line.SkipBlanks(0)}; if (j == tokens || line.TokenAt(j).ToString() != "#") { *rest = tokens; return ""; } j = line.SkipBlanks(j + 1); if (j == tokens) { *rest = tokens; return ""; } *rest = line.SkipBlanks(j + 1); return ToLowerCaseLetters(line.TokenAt(j).ToString()); } void Preprocessor::SkipDisabledConditionalCode(const std::string &dirName, IsElseActive isElseActive, Prescanner &prescanner, ProvenanceRange provenanceRange) { int nesting{0}; while (!prescanner.IsAtEnd()) { if (!prescanner.IsNextLinePreprocessorDirective()) { prescanner.NextLine(); continue; } TokenSequence line{prescanner.TokenizePreprocessorDirective()}; std::size_t rest{0}; std::string dn{GetDirectiveName(line, &rest)}; if (dn == "ifdef" || dn == "ifndef" || dn == "if") { ++nesting; } else if (dn == "endif") { if (nesting-- == 0) { return; } } else if (isElseActive == IsElseActive::Yes && nesting == 0) { if (dn == "else") { ifStack_.push(CanDeadElseAppear::No); return; } if (dn == "elif" && IsIfPredicateTrue( line, rest, line.SizeInTokens() - rest, prescanner)) { ifStack_.push(CanDeadElseAppear::Yes); return; } } } prescanner.Say(provenanceRange, "#%s: missing #endif"_err_en_US, dirName); } // Precedence level codes used here to accommodate mixed Fortran and C: // 15: parentheses and constants, logical !, bitwise ~ // 14: unary + and - // 13: ** // 12: *, /, % (modulus) // 11: + and - // 10: << and >> // 9: bitwise & // 8: bitwise ^ // 7: bitwise | // 6: relations (.EQ., ==, &c.) // 5: .NOT. // 4: .AND., && // 3: .OR., || // 2: .EQV. and .NEQV. / .XOR. // 1: ? : // 0: , static std::int64_t ExpressionValue(const TokenSequence &token, int minimumPrecedence, std::size_t *atToken, std::optional *error) { enum Operator { PARENS, CONST, NOTZERO, // ! COMPLEMENT, // ~ UPLUS, UMINUS, POWER, TIMES, DIVIDE, MODULUS, ADD, SUBTRACT, LEFTSHIFT, RIGHTSHIFT, BITAND, BITXOR, BITOR, LT, LE, EQ, NE, GE, GT, NOT, AND, OR, EQV, NEQV, SELECT, COMMA }; static const int precedence[]{ 15, 15, 15, 15, // (), 6, !, ~ 14, 14, // unary +, - 13, 12, 12, 12, 11, 11, 10, 10, // **, *, /, %, +, -, <<, >> 9, 8, 7, // &, ^, | 6, 6, 6, 6, 6, 6, // relations .LT. to .GT. 5, 4, 3, 2, 2, // .NOT., .AND., .OR., .EQV., .NEQV. 1, 0 // ?: and , }; static const int operandPrecedence[]{0, -1, 15, 15, 15, 15, 13, 12, 12, 12, 11, 11, 11, 11, 9, 8, 7, 7, 7, 7, 7, 7, 7, 6, 4, 3, 3, 3, 1, 0}; static std::map opNameMap; if (opNameMap.empty()) { opNameMap["("] = PARENS; opNameMap["!"] = NOTZERO; opNameMap["~"] = COMPLEMENT; opNameMap["**"] = POWER; opNameMap["*"] = TIMES; opNameMap["/"] = DIVIDE; opNameMap["%"] = MODULUS; opNameMap["+"] = ADD; opNameMap["-"] = SUBTRACT; opNameMap["<<"] = LEFTSHIFT; opNameMap[">>"] = RIGHTSHIFT; opNameMap["&"] = BITAND; opNameMap["^"] = BITXOR; opNameMap["|"] = BITOR; opNameMap[".lt."] = opNameMap["<"] = LT; opNameMap[".le."] = opNameMap["<="] = LE; opNameMap[".eq."] = opNameMap["=="] = EQ; opNameMap[".ne."] = opNameMap["/="] = opNameMap["!="] = NE; opNameMap[".ge."] = opNameMap[">="] = GE; opNameMap[".gt."] = opNameMap[">"] = GT; opNameMap[".not."] = NOT; opNameMap[".and."] = opNameMap[".a."] = opNameMap["&&"] = AND; opNameMap[".or."] = opNameMap[".o."] = opNameMap["||"] = OR; opNameMap[".eqv."] = EQV; opNameMap[".neqv."] = opNameMap[".xor."] = opNameMap[".x."] = NEQV; opNameMap["?"] = SELECT; opNameMap[","] = COMMA; } std::size_t tokens{token.SizeInTokens()}; CHECK(tokens > 0); if (*atToken >= tokens) { *error = Message{token.GetProvenanceRange(), "incomplete expression"_err_en_US}; return 0; } // Parse and evaluate a primary or a unary operator and its operand. std::size_t opAt{*atToken}; std::string t{token.TokenAt(opAt).ToString()}; enum Operator op; std::int64_t left{0}; if (t == "(") { op = PARENS; } else if (IsDecimalDigit(t[0])) { op = CONST; std::size_t consumed{0}; left = std::stoll(t, &consumed, 0 /*base to be detected*/); if (consumed < t.size()) { *error = Message{token.GetTokenProvenanceRange(opAt), "Uninterpretable numeric constant '%s'"_err_en_US, t}; return 0; } } else if (IsLegalIdentifierStart(t[0])) { // undefined macro name -> zero // TODO: BOZ constants? op = CONST; } else if (t == "+") { op = UPLUS; } else if (t == "-") { op = UMINUS; } else if (t == "." && *atToken + 2 < tokens && ToLowerCaseLetters(token.TokenAt(*atToken + 1).ToString()) == "not" && token.TokenAt(*atToken + 2).ToString() == ".") { op = NOT; *atToken += 2; } else { auto it{opNameMap.find(t)}; if (it != opNameMap.end()) { op = it->second; } else { *error = Message{token.GetTokenProvenanceRange(opAt), "operand expected in expression"_err_en_US}; return 0; } } if (precedence[op] < minimumPrecedence) { *error = Message{token.GetTokenProvenanceRange(opAt), "operator precedence error"_err_en_US}; return 0; } ++*atToken; if (op != CONST) { left = ExpressionValue(token, operandPrecedence[op], atToken, error); if (*error) { return 0; } switch (op) { case PARENS: if (*atToken < tokens && token.TokenAt(*atToken).OnlyNonBlank() == ')') { ++*atToken; break; } if (*atToken >= tokens) { *error = Message{token.GetProvenanceRange(), "')' missing from expression"_err_en_US}; } else { *error = Message{ token.GetTokenProvenanceRange(*atToken), "expected ')'"_err_en_US}; } return 0; case NOTZERO: left = !left; break; case COMPLEMENT: left = ~left; break; case UPLUS: break; case UMINUS: left = -left; break; case NOT: left = -!left; break; default: CRASH_NO_CASE; } } // Parse and evaluate binary operators and their second operands, if present. while (*atToken < tokens) { int advance{1}; t = token.TokenAt(*atToken).ToString(); if (t == "." && *atToken + 2 < tokens && token.TokenAt(*atToken + 2).ToString() == ".") { t += ToLowerCaseLetters(token.TokenAt(*atToken + 1).ToString()) + '.'; advance = 3; } auto it{opNameMap.find(t)}; if (it == opNameMap.end()) { break; } op = it->second; if (op < POWER || precedence[op] < minimumPrecedence) { break; } opAt = *atToken; *atToken += advance; std::int64_t right{ ExpressionValue(token, operandPrecedence[op], atToken, error)}; if (*error) { return 0; } switch (op) { case POWER: if (left == 0) { if (right < 0) { *error = Message{token.GetTokenProvenanceRange(opAt), "0 ** negative power"_err_en_US}; } } else if (left != 1 && right != 1) { if (right <= 0) { left = !right; } else { std::int64_t power{1}; for (; right > 0; --right) { if ((power * left) / left != power) { *error = Message{token.GetTokenProvenanceRange(opAt), "overflow in exponentation"_err_en_US}; left = 1; } power *= left; } left = power; } } break; case TIMES: if (left != 0 && right != 0 && ((left * right) / left) != right) { *error = Message{token.GetTokenProvenanceRange(opAt), "overflow in multiplication"_err_en_US}; } left = left * right; break; case DIVIDE: if (right == 0) { *error = Message{ token.GetTokenProvenanceRange(opAt), "division by zero"_err_en_US}; left = 0; } else { left = left / right; } break; case MODULUS: if (right == 0) { *error = Message{ token.GetTokenProvenanceRange(opAt), "modulus by zero"_err_en_US}; left = 0; } else { left = left % right; } break; case ADD: if ((left < 0) == (right < 0) && (left < 0) != (left + right < 0)) { *error = Message{token.GetTokenProvenanceRange(opAt), "overflow in addition"_err_en_US}; } left = left + right; break; case SUBTRACT: if ((left < 0) != (right < 0) && (left < 0) == (left - right < 0)) { *error = Message{token.GetTokenProvenanceRange(opAt), "overflow in subtraction"_err_en_US}; } left = left - right; break; case LEFTSHIFT: if (right < 0 || right > 64) { *error = Message{token.GetTokenProvenanceRange(opAt), "bad left shift count"_err_en_US}; } left = right >= 64 ? 0 : left << right; break; case RIGHTSHIFT: if (right < 0 || right > 64) { *error = Message{token.GetTokenProvenanceRange(opAt), "bad right shift count"_err_en_US}; } left = right >= 64 ? 0 : left >> right; break; case BITAND: case AND: left = left & right; break; case BITXOR: left = left ^ right; break; case BITOR: case OR: left = left | right; break; case LT: left = -(left < right); break; case LE: left = -(left <= right); break; case EQ: left = -(left == right); break; case NE: left = -(left != right); break; case GE: left = -(left >= right); break; case GT: left = -(left > right); break; case EQV: left = -(!left == !right); break; case NEQV: left = -(!left != !right); break; case SELECT: if (*atToken >= tokens || token.TokenAt(*atToken).ToString() != ":") { *error = Message{token.GetTokenProvenanceRange(opAt), "':' required in selection expression"_err_en_US}; return 0; } else { ++*atToken; std::int64_t third{ ExpressionValue(token, operandPrecedence[op], atToken, error)}; left = left != 0 ? right : third; } break; case COMMA: left = right; break; default: CRASH_NO_CASE; } } return left; } bool Preprocessor::IsIfPredicateTrue(const TokenSequence &directive, std::size_t first, std::size_t exprTokens, Prescanner &prescanner) { TokenSequence expr{directive, first, exprTokens}; TokenSequence replaced{ ReplaceMacros(expr, prescanner, nullptr, /*inIfExpression=*/true)}; if (replaced.HasBlanks()) { replaced.RemoveBlanks(); } if (replaced.empty()) { prescanner.Say(expr.GetProvenanceRange(), "empty expression"_err_en_US); return false; } std::size_t atToken{0}; std::optional error; bool result{ExpressionValue(replaced, 0, &atToken, &error) != 0}; if (error) { prescanner.Say(std::move(*error)); } else if (atToken < replaced.SizeInTokens() && replaced.TokenAt(atToken).ToString() != "!") { prescanner.Say(replaced.GetIntervalProvenanceRange( atToken, replaced.SizeInTokens() - atToken), atToken == 0 ? "could not parse any expression"_err_en_US : "excess characters after expression"_err_en_US); } return result; } void Preprocessor::LineDirective( const TokenSequence &dir, std::size_t j, Prescanner &prescanner) { std::size_t tokens{dir.SizeInTokens()}; const std::string *linePath{nullptr}; std::optional lineNumber; SourceFile *sourceFile{nullptr}; std::optional pos; for (; j < tokens; j = dir.SkipBlanks(j + 1)) { std::string tstr{dir.TokenAt(j).ToString()}; Provenance provenance{dir.GetTokenProvenance(j)}; if (!pos) { pos = allSources_.GetSourcePosition(provenance); } if (!sourceFile && pos) { sourceFile = const_cast(&*pos->sourceFile); } if (tstr.front() == '"' && tstr.back() == '"') { tstr = tstr.substr(1, tstr.size() - 2); if (!tstr.empty() && sourceFile) { linePath = &sourceFile->SavePath(std::move(tstr)); } } else if (IsDecimalDigit(tstr[0])) { if (!lineNumber) { // ignore later column number int ln{0}; for (char c : tstr) { if (IsDecimalDigit(c)) { int nln{10 * ln + c - '0'}; if (nln / 10 == ln && nln % 10 == c - '0') { ln = nln; continue; } } prescanner.Say(provenance, "bad line number '%s' in #line directive"_err_en_US, tstr); return; } lineNumber = ln; } } else { prescanner.Say( provenance, "bad token '%s' in #line directive"_err_en_US, tstr); return; } } if (lineNumber && sourceFile) { CHECK(pos); if (!linePath) { linePath = &*pos->path; } sourceFile->LineDirective(pos->trueLineNumber + 1, *linePath, *lineNumber); } } } // namespace Fortran::parser