10b57cec5SDimitry Andric //===-- SpecialCaseList.cpp - special case list for sanitizers ------------===//
20b57cec5SDimitry Andric //
30b57cec5SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
40b57cec5SDimitry Andric // See https://llvm.org/LICENSE.txt for license information.
50b57cec5SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
60b57cec5SDimitry Andric //
70b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
80b57cec5SDimitry Andric //
90b57cec5SDimitry Andric // This is a utility class for instrumentation passes (like AddressSanitizer
100b57cec5SDimitry Andric // or ThreadSanitizer) to avoid instrumenting some functions or global
110b57cec5SDimitry Andric // variables, or to instrument some functions or global variables in a specific
120b57cec5SDimitry Andric // way, based on a user-supplied list.
130b57cec5SDimitry Andric //
140b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
150b57cec5SDimitry Andric
160b57cec5SDimitry Andric #include "llvm/Support/SpecialCaseList.h"
17*5f757f3fSDimitry Andric #include "llvm/Support/LineIterator.h"
180b57cec5SDimitry Andric #include "llvm/Support/MemoryBuffer.h"
19480093f4SDimitry Andric #include "llvm/Support/VirtualFileSystem.h"
20*5f757f3fSDimitry Andric #include <stdio.h>
210b57cec5SDimitry Andric #include <string>
220b57cec5SDimitry Andric #include <system_error>
230b57cec5SDimitry Andric #include <utility>
240b57cec5SDimitry Andric
250b57cec5SDimitry Andric namespace llvm {
260b57cec5SDimitry Andric
insert(StringRef Pattern,unsigned LineNumber,bool UseGlobs)27*5f757f3fSDimitry Andric Error SpecialCaseList::Matcher::insert(StringRef Pattern, unsigned LineNumber,
28*5f757f3fSDimitry Andric bool UseGlobs) {
29*5f757f3fSDimitry Andric if (Pattern.empty())
30*5f757f3fSDimitry Andric return createStringError(errc::invalid_argument,
31*5f757f3fSDimitry Andric Twine("Supplied ") +
32*5f757f3fSDimitry Andric (UseGlobs ? "glob" : "regex") + " was blank");
330b57cec5SDimitry Andric
34*5f757f3fSDimitry Andric if (!UseGlobs) {
350b57cec5SDimitry Andric // Replace * with .*
36*5f757f3fSDimitry Andric auto Regexp = Pattern.str();
370b57cec5SDimitry Andric for (size_t pos = 0; (pos = Regexp.find('*', pos)) != std::string::npos;
380b57cec5SDimitry Andric pos += strlen(".*")) {
390b57cec5SDimitry Andric Regexp.replace(pos, strlen("*"), ".*");
400b57cec5SDimitry Andric }
410b57cec5SDimitry Andric
420b57cec5SDimitry Andric Regexp = (Twine("^(") + StringRef(Regexp) + ")$").str();
430b57cec5SDimitry Andric
440b57cec5SDimitry Andric // Check that the regexp is valid.
450b57cec5SDimitry Andric Regex CheckRE(Regexp);
46*5f757f3fSDimitry Andric std::string REError;
470b57cec5SDimitry Andric if (!CheckRE.isValid(REError))
48*5f757f3fSDimitry Andric return createStringError(errc::invalid_argument, REError);
490b57cec5SDimitry Andric
50*5f757f3fSDimitry Andric RegExes.emplace_back(std::make_pair(
51*5f757f3fSDimitry Andric std::make_unique<Regex>(std::move(CheckRE)), LineNumber));
52*5f757f3fSDimitry Andric
53*5f757f3fSDimitry Andric return Error::success();
54*5f757f3fSDimitry Andric }
55*5f757f3fSDimitry Andric
56*5f757f3fSDimitry Andric auto [It, DidEmplace] = Globs.try_emplace(Pattern);
57*5f757f3fSDimitry Andric if (DidEmplace) {
58*5f757f3fSDimitry Andric // We must be sure to use the string in the map rather than the provided
59*5f757f3fSDimitry Andric // reference which could be destroyed before match() is called
60*5f757f3fSDimitry Andric Pattern = It->getKey();
61*5f757f3fSDimitry Andric auto &Pair = It->getValue();
62*5f757f3fSDimitry Andric if (auto Err = GlobPattern::create(Pattern, /*MaxSubPatterns=*/1024)
63*5f757f3fSDimitry Andric .moveInto(Pair.first))
64*5f757f3fSDimitry Andric return Err;
65*5f757f3fSDimitry Andric Pair.second = LineNumber;
66*5f757f3fSDimitry Andric }
67*5f757f3fSDimitry Andric return Error::success();
680b57cec5SDimitry Andric }
690b57cec5SDimitry Andric
match(StringRef Query) const700b57cec5SDimitry Andric unsigned SpecialCaseList::Matcher::match(StringRef Query) const {
71*5f757f3fSDimitry Andric for (const auto &[Pattern, Pair] : Globs)
72*5f757f3fSDimitry Andric if (Pair.first.match(Query))
73*5f757f3fSDimitry Andric return Pair.second;
74*5f757f3fSDimitry Andric for (const auto &[Regex, LineNumber] : RegExes)
75*5f757f3fSDimitry Andric if (Regex->match(Query))
76*5f757f3fSDimitry Andric return LineNumber;
770b57cec5SDimitry Andric return 0;
780b57cec5SDimitry Andric }
790b57cec5SDimitry Andric
80*5f757f3fSDimitry Andric // TODO: Refactor this to return Expected<...>
810b57cec5SDimitry Andric std::unique_ptr<SpecialCaseList>
create(const std::vector<std::string> & Paths,llvm::vfs::FileSystem & FS,std::string & Error)820b57cec5SDimitry Andric SpecialCaseList::create(const std::vector<std::string> &Paths,
83480093f4SDimitry Andric llvm::vfs::FileSystem &FS, std::string &Error) {
840b57cec5SDimitry Andric std::unique_ptr<SpecialCaseList> SCL(new SpecialCaseList());
85480093f4SDimitry Andric if (SCL->createInternal(Paths, FS, Error))
860b57cec5SDimitry Andric return SCL;
870b57cec5SDimitry Andric return nullptr;
880b57cec5SDimitry Andric }
890b57cec5SDimitry Andric
create(const MemoryBuffer * MB,std::string & Error)900b57cec5SDimitry Andric std::unique_ptr<SpecialCaseList> SpecialCaseList::create(const MemoryBuffer *MB,
910b57cec5SDimitry Andric std::string &Error) {
920b57cec5SDimitry Andric std::unique_ptr<SpecialCaseList> SCL(new SpecialCaseList());
930b57cec5SDimitry Andric if (SCL->createInternal(MB, Error))
940b57cec5SDimitry Andric return SCL;
950b57cec5SDimitry Andric return nullptr;
960b57cec5SDimitry Andric }
970b57cec5SDimitry Andric
980b57cec5SDimitry Andric std::unique_ptr<SpecialCaseList>
createOrDie(const std::vector<std::string> & Paths,llvm::vfs::FileSystem & FS)99480093f4SDimitry Andric SpecialCaseList::createOrDie(const std::vector<std::string> &Paths,
100480093f4SDimitry Andric llvm::vfs::FileSystem &FS) {
1010b57cec5SDimitry Andric std::string Error;
102480093f4SDimitry Andric if (auto SCL = create(Paths, FS, Error))
1030b57cec5SDimitry Andric return SCL;
104349cc55cSDimitry Andric report_fatal_error(Twine(Error));
1050b57cec5SDimitry Andric }
1060b57cec5SDimitry Andric
createInternal(const std::vector<std::string> & Paths,vfs::FileSystem & VFS,std::string & Error)1070b57cec5SDimitry Andric bool SpecialCaseList::createInternal(const std::vector<std::string> &Paths,
108480093f4SDimitry Andric vfs::FileSystem &VFS, std::string &Error) {
1090b57cec5SDimitry Andric for (const auto &Path : Paths) {
1100b57cec5SDimitry Andric ErrorOr<std::unique_ptr<MemoryBuffer>> FileOrErr =
111480093f4SDimitry Andric VFS.getBufferForFile(Path);
1120b57cec5SDimitry Andric if (std::error_code EC = FileOrErr.getError()) {
1130b57cec5SDimitry Andric Error = (Twine("can't open file '") + Path + "': " + EC.message()).str();
1140b57cec5SDimitry Andric return false;
1150b57cec5SDimitry Andric }
1160b57cec5SDimitry Andric std::string ParseError;
117*5f757f3fSDimitry Andric if (!parse(FileOrErr.get().get(), ParseError)) {
1180b57cec5SDimitry Andric Error = (Twine("error parsing file '") + Path + "': " + ParseError).str();
1190b57cec5SDimitry Andric return false;
1200b57cec5SDimitry Andric }
1210b57cec5SDimitry Andric }
1220b57cec5SDimitry Andric return true;
1230b57cec5SDimitry Andric }
1240b57cec5SDimitry Andric
createInternal(const MemoryBuffer * MB,std::string & Error)1250b57cec5SDimitry Andric bool SpecialCaseList::createInternal(const MemoryBuffer *MB,
1260b57cec5SDimitry Andric std::string &Error) {
127*5f757f3fSDimitry Andric if (!parse(MB, Error))
1280b57cec5SDimitry Andric return false;
1290b57cec5SDimitry Andric return true;
1300b57cec5SDimitry Andric }
1310b57cec5SDimitry Andric
132*5f757f3fSDimitry Andric Expected<SpecialCaseList::Section *>
addSection(StringRef SectionStr,unsigned LineNo,bool UseGlobs)133*5f757f3fSDimitry Andric SpecialCaseList::addSection(StringRef SectionStr, unsigned LineNo,
134*5f757f3fSDimitry Andric bool UseGlobs) {
135*5f757f3fSDimitry Andric auto [It, DidEmplace] = Sections.try_emplace(SectionStr);
136*5f757f3fSDimitry Andric auto &Section = It->getValue();
137*5f757f3fSDimitry Andric if (DidEmplace)
138*5f757f3fSDimitry Andric if (auto Err = Section.SectionMatcher->insert(SectionStr, LineNo, UseGlobs))
139*5f757f3fSDimitry Andric return createStringError(errc::invalid_argument,
140*5f757f3fSDimitry Andric "malformed section at line " + Twine(LineNo) +
141*5f757f3fSDimitry Andric ": '" + SectionStr +
142*5f757f3fSDimitry Andric "': " + toString(std::move(Err)));
143*5f757f3fSDimitry Andric return &Section;
144*5f757f3fSDimitry Andric }
1450b57cec5SDimitry Andric
parse(const MemoryBuffer * MB,std::string & Error)146*5f757f3fSDimitry Andric bool SpecialCaseList::parse(const MemoryBuffer *MB, std::string &Error) {
147*5f757f3fSDimitry Andric Section *CurrentSection;
148*5f757f3fSDimitry Andric if (auto Err = addSection("*", 1).moveInto(CurrentSection)) {
149*5f757f3fSDimitry Andric Error = toString(std::move(Err));
1500b57cec5SDimitry Andric return false;
1510b57cec5SDimitry Andric }
1520b57cec5SDimitry Andric
153*5f757f3fSDimitry Andric // In https://reviews.llvm.org/D154014 we added glob support and planned to
154*5f757f3fSDimitry Andric // remove regex support in patterns. We temporarily support the original
155*5f757f3fSDimitry Andric // behavior using regexes if "#!special-case-list-v1" is the first line of the
156*5f757f3fSDimitry Andric // file. For more details, see
157*5f757f3fSDimitry Andric // https://discourse.llvm.org/t/use-glob-instead-of-regex-for-specialcaselists/71666
158*5f757f3fSDimitry Andric bool UseGlobs = !MB->getBuffer().starts_with("#!special-case-list-v1\n");
1590b57cec5SDimitry Andric
160*5f757f3fSDimitry Andric for (line_iterator LineIt(*MB, /*SkipBlanks=*/true, /*CommentMarker=*/'#');
161*5f757f3fSDimitry Andric !LineIt.is_at_eof(); LineIt++) {
162*5f757f3fSDimitry Andric unsigned LineNo = LineIt.line_number();
163*5f757f3fSDimitry Andric StringRef Line = LineIt->trim();
164*5f757f3fSDimitry Andric if (Line.empty())
165*5f757f3fSDimitry Andric continue;
166*5f757f3fSDimitry Andric
167*5f757f3fSDimitry Andric // Save section names
168*5f757f3fSDimitry Andric if (Line.starts_with("[")) {
169*5f757f3fSDimitry Andric if (!Line.ends_with("]")) {
1700b57cec5SDimitry Andric Error =
171*5f757f3fSDimitry Andric ("malformed section header on line " + Twine(LineNo) + ": " + Line)
1720b57cec5SDimitry Andric .str();
1730b57cec5SDimitry Andric return false;
1740b57cec5SDimitry Andric }
1750b57cec5SDimitry Andric
176*5f757f3fSDimitry Andric if (auto Err = addSection(Line.drop_front().drop_back(), LineNo, UseGlobs)
177*5f757f3fSDimitry Andric .moveInto(CurrentSection)) {
178*5f757f3fSDimitry Andric Error = toString(std::move(Err));
179*5f757f3fSDimitry Andric return false;
180*5f757f3fSDimitry Andric }
1810b57cec5SDimitry Andric continue;
1820b57cec5SDimitry Andric }
1830b57cec5SDimitry Andric
184*5f757f3fSDimitry Andric // Get our prefix and unparsed glob.
185*5f757f3fSDimitry Andric auto [Prefix, Postfix] = Line.split(":");
186*5f757f3fSDimitry Andric if (Postfix.empty()) {
1870b57cec5SDimitry Andric // Missing ':' in the line.
188*5f757f3fSDimitry Andric Error = ("malformed line " + Twine(LineNo) + ": '" + Line + "'").str();
1890b57cec5SDimitry Andric return false;
1900b57cec5SDimitry Andric }
1910b57cec5SDimitry Andric
192*5f757f3fSDimitry Andric auto [Pattern, Category] = Postfix.split("=");
193*5f757f3fSDimitry Andric auto &Entry = CurrentSection->Entries[Prefix][Category];
194*5f757f3fSDimitry Andric if (auto Err = Entry.insert(Pattern, LineNo, UseGlobs)) {
195*5f757f3fSDimitry Andric Error =
196*5f757f3fSDimitry Andric (Twine("malformed ") + (UseGlobs ? "glob" : "regex") + " in line " +
197*5f757f3fSDimitry Andric Twine(LineNo) + ": '" + Pattern + "': " + toString(std::move(Err)))
198*5f757f3fSDimitry Andric .str();
1990b57cec5SDimitry Andric return false;
2000b57cec5SDimitry Andric }
2010b57cec5SDimitry Andric }
2020b57cec5SDimitry Andric return true;
2030b57cec5SDimitry Andric }
2040b57cec5SDimitry Andric
20581ad6265SDimitry Andric SpecialCaseList::~SpecialCaseList() = default;
2060b57cec5SDimitry Andric
inSection(StringRef Section,StringRef Prefix,StringRef Query,StringRef Category) const2070b57cec5SDimitry Andric bool SpecialCaseList::inSection(StringRef Section, StringRef Prefix,
2080b57cec5SDimitry Andric StringRef Query, StringRef Category) const {
2090b57cec5SDimitry Andric return inSectionBlame(Section, Prefix, Query, Category);
2100b57cec5SDimitry Andric }
2110b57cec5SDimitry Andric
inSectionBlame(StringRef Section,StringRef Prefix,StringRef Query,StringRef Category) const2120b57cec5SDimitry Andric unsigned SpecialCaseList::inSectionBlame(StringRef Section, StringRef Prefix,
2130b57cec5SDimitry Andric StringRef Query,
2140b57cec5SDimitry Andric StringRef Category) const {
215*5f757f3fSDimitry Andric for (const auto &It : Sections) {
216*5f757f3fSDimitry Andric const auto &S = It.getValue();
217*5f757f3fSDimitry Andric if (S.SectionMatcher->match(Section)) {
218*5f757f3fSDimitry Andric unsigned Blame = inSectionBlame(S.Entries, Prefix, Query, Category);
2190b57cec5SDimitry Andric if (Blame)
2200b57cec5SDimitry Andric return Blame;
2210b57cec5SDimitry Andric }
222*5f757f3fSDimitry Andric }
2230b57cec5SDimitry Andric return 0;
2240b57cec5SDimitry Andric }
2250b57cec5SDimitry Andric
inSectionBlame(const SectionEntries & Entries,StringRef Prefix,StringRef Query,StringRef Category) const2260b57cec5SDimitry Andric unsigned SpecialCaseList::inSectionBlame(const SectionEntries &Entries,
2270b57cec5SDimitry Andric StringRef Prefix, StringRef Query,
2280b57cec5SDimitry Andric StringRef Category) const {
2290b57cec5SDimitry Andric SectionEntries::const_iterator I = Entries.find(Prefix);
2300b57cec5SDimitry Andric if (I == Entries.end()) return 0;
2310b57cec5SDimitry Andric StringMap<Matcher>::const_iterator II = I->second.find(Category);
2320b57cec5SDimitry Andric if (II == I->second.end()) return 0;
2330b57cec5SDimitry Andric
2340b57cec5SDimitry Andric return II->getValue().match(Query);
2350b57cec5SDimitry Andric }
2360b57cec5SDimitry Andric
2370b57cec5SDimitry Andric } // namespace llvm
238