xref: /freebsd-src/contrib/llvm-project/llvm/lib/Support/SpecialCaseList.cpp (revision 5f757f3ff9144b609b3c433dfd370cc6bdc191ad)
10b57cec5SDimitry Andric //===-- SpecialCaseList.cpp - special case list for sanitizers ------------===//
20b57cec5SDimitry Andric //
30b57cec5SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
40b57cec5SDimitry Andric // See https://llvm.org/LICENSE.txt for license information.
50b57cec5SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
60b57cec5SDimitry Andric //
70b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
80b57cec5SDimitry Andric //
90b57cec5SDimitry Andric // This is a utility class for instrumentation passes (like AddressSanitizer
100b57cec5SDimitry Andric // or ThreadSanitizer) to avoid instrumenting some functions or global
110b57cec5SDimitry Andric // variables, or to instrument some functions or global variables in a specific
120b57cec5SDimitry Andric // way, based on a user-supplied list.
130b57cec5SDimitry Andric //
140b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
150b57cec5SDimitry Andric 
160b57cec5SDimitry Andric #include "llvm/Support/SpecialCaseList.h"
17*5f757f3fSDimitry Andric #include "llvm/Support/LineIterator.h"
180b57cec5SDimitry Andric #include "llvm/Support/MemoryBuffer.h"
19480093f4SDimitry Andric #include "llvm/Support/VirtualFileSystem.h"
20*5f757f3fSDimitry Andric #include <stdio.h>
210b57cec5SDimitry Andric #include <string>
220b57cec5SDimitry Andric #include <system_error>
230b57cec5SDimitry Andric #include <utility>
240b57cec5SDimitry Andric 
250b57cec5SDimitry Andric namespace llvm {
260b57cec5SDimitry Andric 
insert(StringRef Pattern,unsigned LineNumber,bool UseGlobs)27*5f757f3fSDimitry Andric Error SpecialCaseList::Matcher::insert(StringRef Pattern, unsigned LineNumber,
28*5f757f3fSDimitry Andric                                        bool UseGlobs) {
29*5f757f3fSDimitry Andric   if (Pattern.empty())
30*5f757f3fSDimitry Andric     return createStringError(errc::invalid_argument,
31*5f757f3fSDimitry Andric                              Twine("Supplied ") +
32*5f757f3fSDimitry Andric                                  (UseGlobs ? "glob" : "regex") + " was blank");
330b57cec5SDimitry Andric 
34*5f757f3fSDimitry Andric   if (!UseGlobs) {
350b57cec5SDimitry Andric     // Replace * with .*
36*5f757f3fSDimitry Andric     auto Regexp = Pattern.str();
370b57cec5SDimitry Andric     for (size_t pos = 0; (pos = Regexp.find('*', pos)) != std::string::npos;
380b57cec5SDimitry Andric          pos += strlen(".*")) {
390b57cec5SDimitry Andric       Regexp.replace(pos, strlen("*"), ".*");
400b57cec5SDimitry Andric     }
410b57cec5SDimitry Andric 
420b57cec5SDimitry Andric     Regexp = (Twine("^(") + StringRef(Regexp) + ")$").str();
430b57cec5SDimitry Andric 
440b57cec5SDimitry Andric     // Check that the regexp is valid.
450b57cec5SDimitry Andric     Regex CheckRE(Regexp);
46*5f757f3fSDimitry Andric     std::string REError;
470b57cec5SDimitry Andric     if (!CheckRE.isValid(REError))
48*5f757f3fSDimitry Andric       return createStringError(errc::invalid_argument, REError);
490b57cec5SDimitry Andric 
50*5f757f3fSDimitry Andric     RegExes.emplace_back(std::make_pair(
51*5f757f3fSDimitry Andric         std::make_unique<Regex>(std::move(CheckRE)), LineNumber));
52*5f757f3fSDimitry Andric 
53*5f757f3fSDimitry Andric     return Error::success();
54*5f757f3fSDimitry Andric   }
55*5f757f3fSDimitry Andric 
56*5f757f3fSDimitry Andric   auto [It, DidEmplace] = Globs.try_emplace(Pattern);
57*5f757f3fSDimitry Andric   if (DidEmplace) {
58*5f757f3fSDimitry Andric     // We must be sure to use the string in the map rather than the provided
59*5f757f3fSDimitry Andric     // reference which could be destroyed before match() is called
60*5f757f3fSDimitry Andric     Pattern = It->getKey();
61*5f757f3fSDimitry Andric     auto &Pair = It->getValue();
62*5f757f3fSDimitry Andric     if (auto Err = GlobPattern::create(Pattern, /*MaxSubPatterns=*/1024)
63*5f757f3fSDimitry Andric                        .moveInto(Pair.first))
64*5f757f3fSDimitry Andric       return Err;
65*5f757f3fSDimitry Andric     Pair.second = LineNumber;
66*5f757f3fSDimitry Andric   }
67*5f757f3fSDimitry Andric   return Error::success();
680b57cec5SDimitry Andric }
690b57cec5SDimitry Andric 
match(StringRef Query) const700b57cec5SDimitry Andric unsigned SpecialCaseList::Matcher::match(StringRef Query) const {
71*5f757f3fSDimitry Andric   for (const auto &[Pattern, Pair] : Globs)
72*5f757f3fSDimitry Andric     if (Pair.first.match(Query))
73*5f757f3fSDimitry Andric       return Pair.second;
74*5f757f3fSDimitry Andric   for (const auto &[Regex, LineNumber] : RegExes)
75*5f757f3fSDimitry Andric     if (Regex->match(Query))
76*5f757f3fSDimitry Andric       return LineNumber;
770b57cec5SDimitry Andric   return 0;
780b57cec5SDimitry Andric }
790b57cec5SDimitry Andric 
80*5f757f3fSDimitry Andric // TODO: Refactor this to return Expected<...>
810b57cec5SDimitry Andric std::unique_ptr<SpecialCaseList>
create(const std::vector<std::string> & Paths,llvm::vfs::FileSystem & FS,std::string & Error)820b57cec5SDimitry Andric SpecialCaseList::create(const std::vector<std::string> &Paths,
83480093f4SDimitry Andric                         llvm::vfs::FileSystem &FS, std::string &Error) {
840b57cec5SDimitry Andric   std::unique_ptr<SpecialCaseList> SCL(new SpecialCaseList());
85480093f4SDimitry Andric   if (SCL->createInternal(Paths, FS, Error))
860b57cec5SDimitry Andric     return SCL;
870b57cec5SDimitry Andric   return nullptr;
880b57cec5SDimitry Andric }
890b57cec5SDimitry Andric 
create(const MemoryBuffer * MB,std::string & Error)900b57cec5SDimitry Andric std::unique_ptr<SpecialCaseList> SpecialCaseList::create(const MemoryBuffer *MB,
910b57cec5SDimitry Andric                                                          std::string &Error) {
920b57cec5SDimitry Andric   std::unique_ptr<SpecialCaseList> SCL(new SpecialCaseList());
930b57cec5SDimitry Andric   if (SCL->createInternal(MB, Error))
940b57cec5SDimitry Andric     return SCL;
950b57cec5SDimitry Andric   return nullptr;
960b57cec5SDimitry Andric }
970b57cec5SDimitry Andric 
980b57cec5SDimitry Andric std::unique_ptr<SpecialCaseList>
createOrDie(const std::vector<std::string> & Paths,llvm::vfs::FileSystem & FS)99480093f4SDimitry Andric SpecialCaseList::createOrDie(const std::vector<std::string> &Paths,
100480093f4SDimitry Andric                              llvm::vfs::FileSystem &FS) {
1010b57cec5SDimitry Andric   std::string Error;
102480093f4SDimitry Andric   if (auto SCL = create(Paths, FS, Error))
1030b57cec5SDimitry Andric     return SCL;
104349cc55cSDimitry Andric   report_fatal_error(Twine(Error));
1050b57cec5SDimitry Andric }
1060b57cec5SDimitry Andric 
createInternal(const std::vector<std::string> & Paths,vfs::FileSystem & VFS,std::string & Error)1070b57cec5SDimitry Andric bool SpecialCaseList::createInternal(const std::vector<std::string> &Paths,
108480093f4SDimitry Andric                                      vfs::FileSystem &VFS, std::string &Error) {
1090b57cec5SDimitry Andric   for (const auto &Path : Paths) {
1100b57cec5SDimitry Andric     ErrorOr<std::unique_ptr<MemoryBuffer>> FileOrErr =
111480093f4SDimitry Andric         VFS.getBufferForFile(Path);
1120b57cec5SDimitry Andric     if (std::error_code EC = FileOrErr.getError()) {
1130b57cec5SDimitry Andric       Error = (Twine("can't open file '") + Path + "': " + EC.message()).str();
1140b57cec5SDimitry Andric       return false;
1150b57cec5SDimitry Andric     }
1160b57cec5SDimitry Andric     std::string ParseError;
117*5f757f3fSDimitry Andric     if (!parse(FileOrErr.get().get(), ParseError)) {
1180b57cec5SDimitry Andric       Error = (Twine("error parsing file '") + Path + "': " + ParseError).str();
1190b57cec5SDimitry Andric       return false;
1200b57cec5SDimitry Andric     }
1210b57cec5SDimitry Andric   }
1220b57cec5SDimitry Andric   return true;
1230b57cec5SDimitry Andric }
1240b57cec5SDimitry Andric 
createInternal(const MemoryBuffer * MB,std::string & Error)1250b57cec5SDimitry Andric bool SpecialCaseList::createInternal(const MemoryBuffer *MB,
1260b57cec5SDimitry Andric                                      std::string &Error) {
127*5f757f3fSDimitry Andric   if (!parse(MB, Error))
1280b57cec5SDimitry Andric     return false;
1290b57cec5SDimitry Andric   return true;
1300b57cec5SDimitry Andric }
1310b57cec5SDimitry Andric 
132*5f757f3fSDimitry Andric Expected<SpecialCaseList::Section *>
addSection(StringRef SectionStr,unsigned LineNo,bool UseGlobs)133*5f757f3fSDimitry Andric SpecialCaseList::addSection(StringRef SectionStr, unsigned LineNo,
134*5f757f3fSDimitry Andric                             bool UseGlobs) {
135*5f757f3fSDimitry Andric   auto [It, DidEmplace] = Sections.try_emplace(SectionStr);
136*5f757f3fSDimitry Andric   auto &Section = It->getValue();
137*5f757f3fSDimitry Andric   if (DidEmplace)
138*5f757f3fSDimitry Andric     if (auto Err = Section.SectionMatcher->insert(SectionStr, LineNo, UseGlobs))
139*5f757f3fSDimitry Andric       return createStringError(errc::invalid_argument,
140*5f757f3fSDimitry Andric                                "malformed section at line " + Twine(LineNo) +
141*5f757f3fSDimitry Andric                                    ": '" + SectionStr +
142*5f757f3fSDimitry Andric                                    "': " + toString(std::move(Err)));
143*5f757f3fSDimitry Andric   return &Section;
144*5f757f3fSDimitry Andric }
1450b57cec5SDimitry Andric 
parse(const MemoryBuffer * MB,std::string & Error)146*5f757f3fSDimitry Andric bool SpecialCaseList::parse(const MemoryBuffer *MB, std::string &Error) {
147*5f757f3fSDimitry Andric   Section *CurrentSection;
148*5f757f3fSDimitry Andric   if (auto Err = addSection("*", 1).moveInto(CurrentSection)) {
149*5f757f3fSDimitry Andric     Error = toString(std::move(Err));
1500b57cec5SDimitry Andric     return false;
1510b57cec5SDimitry Andric   }
1520b57cec5SDimitry Andric 
153*5f757f3fSDimitry Andric   // In https://reviews.llvm.org/D154014 we added glob support and planned to
154*5f757f3fSDimitry Andric   // remove regex support in patterns. We temporarily support the original
155*5f757f3fSDimitry Andric   // behavior using regexes if "#!special-case-list-v1" is the first line of the
156*5f757f3fSDimitry Andric   // file. For more details, see
157*5f757f3fSDimitry Andric   // https://discourse.llvm.org/t/use-glob-instead-of-regex-for-specialcaselists/71666
158*5f757f3fSDimitry Andric   bool UseGlobs = !MB->getBuffer().starts_with("#!special-case-list-v1\n");
1590b57cec5SDimitry Andric 
160*5f757f3fSDimitry Andric   for (line_iterator LineIt(*MB, /*SkipBlanks=*/true, /*CommentMarker=*/'#');
161*5f757f3fSDimitry Andric        !LineIt.is_at_eof(); LineIt++) {
162*5f757f3fSDimitry Andric     unsigned LineNo = LineIt.line_number();
163*5f757f3fSDimitry Andric     StringRef Line = LineIt->trim();
164*5f757f3fSDimitry Andric     if (Line.empty())
165*5f757f3fSDimitry Andric       continue;
166*5f757f3fSDimitry Andric 
167*5f757f3fSDimitry Andric     // Save section names
168*5f757f3fSDimitry Andric     if (Line.starts_with("[")) {
169*5f757f3fSDimitry Andric       if (!Line.ends_with("]")) {
1700b57cec5SDimitry Andric         Error =
171*5f757f3fSDimitry Andric             ("malformed section header on line " + Twine(LineNo) + ": " + Line)
1720b57cec5SDimitry Andric                 .str();
1730b57cec5SDimitry Andric         return false;
1740b57cec5SDimitry Andric       }
1750b57cec5SDimitry Andric 
176*5f757f3fSDimitry Andric       if (auto Err = addSection(Line.drop_front().drop_back(), LineNo, UseGlobs)
177*5f757f3fSDimitry Andric                          .moveInto(CurrentSection)) {
178*5f757f3fSDimitry Andric         Error = toString(std::move(Err));
179*5f757f3fSDimitry Andric         return false;
180*5f757f3fSDimitry Andric       }
1810b57cec5SDimitry Andric       continue;
1820b57cec5SDimitry Andric     }
1830b57cec5SDimitry Andric 
184*5f757f3fSDimitry Andric     // Get our prefix and unparsed glob.
185*5f757f3fSDimitry Andric     auto [Prefix, Postfix] = Line.split(":");
186*5f757f3fSDimitry Andric     if (Postfix.empty()) {
1870b57cec5SDimitry Andric       // Missing ':' in the line.
188*5f757f3fSDimitry Andric       Error = ("malformed line " + Twine(LineNo) + ": '" + Line + "'").str();
1890b57cec5SDimitry Andric       return false;
1900b57cec5SDimitry Andric     }
1910b57cec5SDimitry Andric 
192*5f757f3fSDimitry Andric     auto [Pattern, Category] = Postfix.split("=");
193*5f757f3fSDimitry Andric     auto &Entry = CurrentSection->Entries[Prefix][Category];
194*5f757f3fSDimitry Andric     if (auto Err = Entry.insert(Pattern, LineNo, UseGlobs)) {
195*5f757f3fSDimitry Andric       Error =
196*5f757f3fSDimitry Andric           (Twine("malformed ") + (UseGlobs ? "glob" : "regex") + " in line " +
197*5f757f3fSDimitry Andric            Twine(LineNo) + ": '" + Pattern + "': " + toString(std::move(Err)))
198*5f757f3fSDimitry Andric               .str();
1990b57cec5SDimitry Andric       return false;
2000b57cec5SDimitry Andric     }
2010b57cec5SDimitry Andric   }
2020b57cec5SDimitry Andric   return true;
2030b57cec5SDimitry Andric }
2040b57cec5SDimitry Andric 
20581ad6265SDimitry Andric SpecialCaseList::~SpecialCaseList() = default;
2060b57cec5SDimitry Andric 
inSection(StringRef Section,StringRef Prefix,StringRef Query,StringRef Category) const2070b57cec5SDimitry Andric bool SpecialCaseList::inSection(StringRef Section, StringRef Prefix,
2080b57cec5SDimitry Andric                                 StringRef Query, StringRef Category) const {
2090b57cec5SDimitry Andric   return inSectionBlame(Section, Prefix, Query, Category);
2100b57cec5SDimitry Andric }
2110b57cec5SDimitry Andric 
inSectionBlame(StringRef Section,StringRef Prefix,StringRef Query,StringRef Category) const2120b57cec5SDimitry Andric unsigned SpecialCaseList::inSectionBlame(StringRef Section, StringRef Prefix,
2130b57cec5SDimitry Andric                                          StringRef Query,
2140b57cec5SDimitry Andric                                          StringRef Category) const {
215*5f757f3fSDimitry Andric   for (const auto &It : Sections) {
216*5f757f3fSDimitry Andric     const auto &S = It.getValue();
217*5f757f3fSDimitry Andric     if (S.SectionMatcher->match(Section)) {
218*5f757f3fSDimitry Andric       unsigned Blame = inSectionBlame(S.Entries, Prefix, Query, Category);
2190b57cec5SDimitry Andric       if (Blame)
2200b57cec5SDimitry Andric         return Blame;
2210b57cec5SDimitry Andric     }
222*5f757f3fSDimitry Andric   }
2230b57cec5SDimitry Andric   return 0;
2240b57cec5SDimitry Andric }
2250b57cec5SDimitry Andric 
inSectionBlame(const SectionEntries & Entries,StringRef Prefix,StringRef Query,StringRef Category) const2260b57cec5SDimitry Andric unsigned SpecialCaseList::inSectionBlame(const SectionEntries &Entries,
2270b57cec5SDimitry Andric                                          StringRef Prefix, StringRef Query,
2280b57cec5SDimitry Andric                                          StringRef Category) const {
2290b57cec5SDimitry Andric   SectionEntries::const_iterator I = Entries.find(Prefix);
2300b57cec5SDimitry Andric   if (I == Entries.end()) return 0;
2310b57cec5SDimitry Andric   StringMap<Matcher>::const_iterator II = I->second.find(Category);
2320b57cec5SDimitry Andric   if (II == I->second.end()) return 0;
2330b57cec5SDimitry Andric 
2340b57cec5SDimitry Andric   return II->getValue().match(Query);
2350b57cec5SDimitry Andric }
2360b57cec5SDimitry Andric 
2370b57cec5SDimitry Andric } // namespace llvm
238