1 //===--- SarifDiagnostics.cpp - Sarif Diagnostics for Paths -----*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file defines the SarifDiagnostics object. 10 // 11 //===----------------------------------------------------------------------===// 12 13 #include "clang/Analysis/PathDiagnostic.h" 14 #include "clang/Basic/FileManager.h" 15 #include "clang/Basic/Version.h" 16 #include "clang/Lex/Preprocessor.h" 17 #include "clang/StaticAnalyzer/Core/PathDiagnosticConsumers.h" 18 #include "llvm/ADT/STLExtras.h" 19 #include "llvm/ADT/StringMap.h" 20 #include "llvm/Support/ConvertUTF.h" 21 #include "llvm/Support/JSON.h" 22 #include "llvm/Support/Path.h" 23 24 using namespace llvm; 25 using namespace clang; 26 using namespace ento; 27 28 namespace { 29 class SarifDiagnostics : public PathDiagnosticConsumer { 30 std::string OutputFile; 31 const LangOptions &LO; 32 33 public: 34 SarifDiagnostics(const std::string &Output, const LangOptions &LO) 35 : OutputFile(Output), LO(LO) {} 36 ~SarifDiagnostics() override = default; 37 38 void FlushDiagnosticsImpl(std::vector<const PathDiagnostic *> &Diags, 39 FilesMade *FM) override; 40 41 StringRef getName() const override { return "SarifDiagnostics"; } 42 PathGenerationScheme getGenerationScheme() const override { return Minimal; } 43 bool supportsLogicalOpControlFlow() const override { return true; } 44 bool supportsCrossFileDiagnostics() const override { return true; } 45 }; 46 } // end anonymous namespace 47 48 void ento::createSarifDiagnosticConsumer( 49 PathDiagnosticConsumerOptions DiagOpts, PathDiagnosticConsumers &C, 50 const std::string &Output, const Preprocessor &PP, 51 const cross_tu::CrossTranslationUnitContext &CTU) { 52 53 // TODO: Emit an error here. 54 if (Output.empty()) 55 return; 56 57 C.push_back(new SarifDiagnostics(Output, PP.getLangOpts())); 58 createTextMinimalPathDiagnosticConsumer(std::move(DiagOpts), C, Output, PP, 59 CTU); 60 } 61 62 static StringRef getFileName(const FileEntry &FE) { 63 StringRef Filename = FE.tryGetRealPathName(); 64 if (Filename.empty()) 65 Filename = FE.getName(); 66 return Filename; 67 } 68 69 static std::string percentEncodeURICharacter(char C) { 70 // RFC 3986 claims alpha, numeric, and this handful of 71 // characters are not reserved for the path component and 72 // should be written out directly. Otherwise, percent 73 // encode the character and write that out instead of the 74 // reserved character. 75 if (llvm::isAlnum(C) || 76 StringRef::npos != StringRef("-._~:@!$&'()*+,;=").find(C)) 77 return std::string(&C, 1); 78 return "%" + llvm::toHex(StringRef(&C, 1)); 79 } 80 81 static std::string fileNameToURI(StringRef Filename) { 82 llvm::SmallString<32> Ret = StringRef("file://"); 83 84 // Get the root name to see if it has a URI authority. 85 StringRef Root = sys::path::root_name(Filename); 86 if (Root.startswith("//")) { 87 // There is an authority, so add it to the URI. 88 Ret += Root.drop_front(2).str(); 89 } else if (!Root.empty()) { 90 // There is no authority, so end the component and add the root to the URI. 91 Ret += Twine("/" + Root).str(); 92 } 93 94 auto Iter = sys::path::begin(Filename), End = sys::path::end(Filename); 95 assert(Iter != End && "Expected there to be a non-root path component."); 96 // Add the rest of the path components, encoding any reserved characters; 97 // we skip past the first path component, as it was handled it above. 98 std::for_each(++Iter, End, [&Ret](StringRef Component) { 99 // For reasons unknown to me, we may get a backslash with Windows native 100 // paths for the initial backslash following the drive component, which 101 // we need to ignore as a URI path part. 102 if (Component == "\\") 103 return; 104 105 // Add the separator between the previous path part and the one being 106 // currently processed. 107 Ret += "/"; 108 109 // URI encode the part. 110 for (char C : Component) { 111 Ret += percentEncodeURICharacter(C); 112 } 113 }); 114 115 return std::string(Ret); 116 } 117 118 static json::Object createArtifactLocation(const FileEntry &FE) { 119 return json::Object{{"uri", fileNameToURI(getFileName(FE))}}; 120 } 121 122 static json::Object createArtifact(const FileEntry &FE) { 123 return json::Object{{"location", createArtifactLocation(FE)}, 124 {"roles", json::Array{"resultFile"}}, 125 {"length", FE.getSize()}, 126 {"mimeType", "text/plain"}}; 127 } 128 129 static json::Object createArtifactLocation(const FileEntry &FE, 130 json::Array &Artifacts) { 131 std::string FileURI = fileNameToURI(getFileName(FE)); 132 133 // See if the Artifacts array contains this URI already. If it does not, 134 // create a new artifact object to add to the array. 135 auto I = llvm::find_if(Artifacts, [&](const json::Value &File) { 136 if (const json::Object *Obj = File.getAsObject()) { 137 if (const json::Object *FileLoc = Obj->getObject("location")) { 138 Optional<StringRef> URI = FileLoc->getString("uri"); 139 return URI && URI->equals(FileURI); 140 } 141 } 142 return false; 143 }); 144 145 // Calculate the index within the artifact array so it can be stored in 146 // the JSON object. 147 auto Index = static_cast<unsigned>(std::distance(Artifacts.begin(), I)); 148 if (I == Artifacts.end()) 149 Artifacts.push_back(createArtifact(FE)); 150 151 return json::Object{{"uri", FileURI}, {"index", Index}}; 152 } 153 154 static unsigned int adjustColumnPos(const SourceManager &SM, SourceLocation Loc, 155 unsigned int TokenLen = 0) { 156 assert(!Loc.isInvalid() && "invalid Loc when adjusting column position"); 157 158 std::pair<FileID, unsigned> LocInfo = SM.getDecomposedExpansionLoc(Loc); 159 assert(LocInfo.second > SM.getExpansionColumnNumber(Loc) && 160 "position in file is before column number?"); 161 162 Optional<MemoryBufferRef> Buf = SM.getBufferOrNone(LocInfo.first); 163 assert(Buf && "got an invalid buffer for the location's file"); 164 assert(Buf->getBufferSize() >= (LocInfo.second + TokenLen) && 165 "token extends past end of buffer?"); 166 167 // Adjust the offset to be the start of the line, since we'll be counting 168 // Unicode characters from there until our column offset. 169 unsigned int Off = LocInfo.second - (SM.getExpansionColumnNumber(Loc) - 1); 170 unsigned int Ret = 1; 171 while (Off < (LocInfo.second + TokenLen)) { 172 Off += getNumBytesForUTF8(Buf->getBuffer()[Off]); 173 Ret++; 174 } 175 176 return Ret; 177 } 178 179 static json::Object createTextRegion(const LangOptions &LO, SourceRange R, 180 const SourceManager &SM) { 181 json::Object Region{ 182 {"startLine", SM.getExpansionLineNumber(R.getBegin())}, 183 {"startColumn", adjustColumnPos(SM, R.getBegin())}, 184 }; 185 if (R.getBegin() == R.getEnd()) { 186 Region["endColumn"] = adjustColumnPos(SM, R.getBegin()); 187 } else { 188 Region["endLine"] = SM.getExpansionLineNumber(R.getEnd()); 189 Region["endColumn"] = adjustColumnPos( 190 SM, R.getEnd(), 191 Lexer::MeasureTokenLength(R.getEnd(), SM, LO)); 192 } 193 return Region; 194 } 195 196 static json::Object createPhysicalLocation(const LangOptions &LO, 197 SourceRange R, const FileEntry &FE, 198 const SourceManager &SMgr, 199 json::Array &Artifacts) { 200 return json::Object{ 201 {{"artifactLocation", createArtifactLocation(FE, Artifacts)}, 202 {"region", createTextRegion(LO, R, SMgr)}}}; 203 } 204 205 enum class Importance { Important, Essential, Unimportant }; 206 207 static StringRef importanceToStr(Importance I) { 208 switch (I) { 209 case Importance::Important: 210 return "important"; 211 case Importance::Essential: 212 return "essential"; 213 case Importance::Unimportant: 214 return "unimportant"; 215 } 216 llvm_unreachable("Fully covered switch is not so fully covered"); 217 } 218 219 static json::Object createThreadFlowLocation(json::Object &&Location, 220 Importance I) { 221 return json::Object{{"location", std::move(Location)}, 222 {"importance", importanceToStr(I)}}; 223 } 224 225 static json::Object createMessage(StringRef Text) { 226 return json::Object{{"text", Text.str()}}; 227 } 228 229 static json::Object createLocation(json::Object &&PhysicalLocation, 230 StringRef Message = "") { 231 json::Object Ret{{"physicalLocation", std::move(PhysicalLocation)}}; 232 if (!Message.empty()) 233 Ret.insert({"message", createMessage(Message)}); 234 return Ret; 235 } 236 237 static Importance calculateImportance(const PathDiagnosticPiece &Piece) { 238 switch (Piece.getKind()) { 239 case PathDiagnosticPiece::Call: 240 case PathDiagnosticPiece::Macro: 241 case PathDiagnosticPiece::Note: 242 case PathDiagnosticPiece::PopUp: 243 // FIXME: What should be reported here? 244 break; 245 case PathDiagnosticPiece::Event: 246 return Piece.getTagStr() == "ConditionBRVisitor" ? Importance::Important 247 : Importance::Essential; 248 case PathDiagnosticPiece::ControlFlow: 249 return Importance::Unimportant; 250 } 251 return Importance::Unimportant; 252 } 253 254 static json::Object createThreadFlow(const LangOptions &LO, 255 const PathPieces &Pieces, 256 json::Array &Artifacts) { 257 const SourceManager &SMgr = Pieces.front()->getLocation().getManager(); 258 json::Array Locations; 259 for (const auto &Piece : Pieces) { 260 const PathDiagnosticLocation &P = Piece->getLocation(); 261 Locations.push_back(createThreadFlowLocation( 262 createLocation(createPhysicalLocation( 263 LO, P.asRange(), 264 *P.asLocation().getExpansionLoc().getFileEntry(), 265 SMgr, Artifacts), 266 Piece->getString()), 267 calculateImportance(*Piece))); 268 } 269 return json::Object{{"locations", std::move(Locations)}}; 270 } 271 272 static json::Object createCodeFlow(const LangOptions &LO, 273 const PathPieces &Pieces, 274 json::Array &Artifacts) { 275 return json::Object{ 276 {"threadFlows", json::Array{createThreadFlow(LO, Pieces, Artifacts)}}}; 277 } 278 279 static json::Object createResult(const LangOptions &LO, 280 const PathDiagnostic &Diag, 281 json::Array &Artifacts, 282 const StringMap<unsigned> &RuleMapping) { 283 const PathPieces &Path = Diag.path.flatten(false); 284 const SourceManager &SMgr = Path.front()->getLocation().getManager(); 285 286 auto Iter = RuleMapping.find(Diag.getCheckerName()); 287 assert(Iter != RuleMapping.end() && "Rule ID is not in the array index map?"); 288 289 return json::Object{ 290 {"message", createMessage(Diag.getVerboseDescription())}, 291 {"codeFlows", json::Array{createCodeFlow(LO, Path, Artifacts)}}, 292 {"locations", 293 json::Array{createLocation(createPhysicalLocation( 294 LO, Diag.getLocation().asRange(), 295 *Diag.getLocation().asLocation().getExpansionLoc().getFileEntry(), 296 SMgr, Artifacts))}}, 297 {"ruleIndex", Iter->getValue()}, 298 {"ruleId", Diag.getCheckerName()}}; 299 } 300 301 static StringRef getRuleDescription(StringRef CheckName) { 302 return llvm::StringSwitch<StringRef>(CheckName) 303 #define GET_CHECKERS 304 #define CHECKER(FULLNAME, CLASS, HELPTEXT, DOC_URI, IS_HIDDEN) \ 305 .Case(FULLNAME, HELPTEXT) 306 #include "clang/StaticAnalyzer/Checkers/Checkers.inc" 307 #undef CHECKER 308 #undef GET_CHECKERS 309 ; 310 } 311 312 static StringRef getRuleHelpURIStr(StringRef CheckName) { 313 return llvm::StringSwitch<StringRef>(CheckName) 314 #define GET_CHECKERS 315 #define CHECKER(FULLNAME, CLASS, HELPTEXT, DOC_URI, IS_HIDDEN) \ 316 .Case(FULLNAME, DOC_URI) 317 #include "clang/StaticAnalyzer/Checkers/Checkers.inc" 318 #undef CHECKER 319 #undef GET_CHECKERS 320 ; 321 } 322 323 static json::Object createRule(const PathDiagnostic &Diag) { 324 StringRef CheckName = Diag.getCheckerName(); 325 json::Object Ret{ 326 {"fullDescription", createMessage(getRuleDescription(CheckName))}, 327 {"name", CheckName}, 328 {"id", CheckName}}; 329 330 std::string RuleURI = std::string(getRuleHelpURIStr(CheckName)); 331 if (!RuleURI.empty()) 332 Ret["helpUri"] = RuleURI; 333 334 return Ret; 335 } 336 337 static json::Array createRules(std::vector<const PathDiagnostic *> &Diags, 338 StringMap<unsigned> &RuleMapping) { 339 json::Array Rules; 340 llvm::StringSet<> Seen; 341 342 llvm::for_each(Diags, [&](const PathDiagnostic *D) { 343 StringRef RuleID = D->getCheckerName(); 344 std::pair<llvm::StringSet<>::iterator, bool> P = Seen.insert(RuleID); 345 if (P.second) { 346 RuleMapping[RuleID] = Rules.size(); // Maps RuleID to an Array Index. 347 Rules.push_back(createRule(*D)); 348 } 349 }); 350 351 return Rules; 352 } 353 354 static json::Object createTool(std::vector<const PathDiagnostic *> &Diags, 355 StringMap<unsigned> &RuleMapping) { 356 return json::Object{ 357 {"driver", json::Object{{"name", "clang"}, 358 {"fullName", "clang static analyzer"}, 359 {"language", "en-US"}, 360 {"version", getClangFullVersion()}, 361 {"rules", createRules(Diags, RuleMapping)}}}}; 362 } 363 364 static json::Object createRun(const LangOptions &LO, 365 std::vector<const PathDiagnostic *> &Diags) { 366 json::Array Results, Artifacts; 367 StringMap<unsigned> RuleMapping; 368 json::Object Tool = createTool(Diags, RuleMapping); 369 370 llvm::for_each(Diags, [&](const PathDiagnostic *D) { 371 Results.push_back(createResult(LO, *D, Artifacts, RuleMapping)); 372 }); 373 374 return json::Object{{"tool", std::move(Tool)}, 375 {"results", std::move(Results)}, 376 {"artifacts", std::move(Artifacts)}, 377 {"columnKind", "unicodeCodePoints"}}; 378 } 379 380 void SarifDiagnostics::FlushDiagnosticsImpl( 381 std::vector<const PathDiagnostic *> &Diags, FilesMade *) { 382 // We currently overwrite the file if it already exists. However, it may be 383 // useful to add a feature someday that allows the user to append a run to an 384 // existing SARIF file. One danger from that approach is that the size of the 385 // file can become large very quickly, so decoding into JSON to append a run 386 // may be an expensive operation. 387 std::error_code EC; 388 llvm::raw_fd_ostream OS(OutputFile, EC, llvm::sys::fs::OF_Text); 389 if (EC) { 390 llvm::errs() << "warning: could not create file: " << EC.message() << '\n'; 391 return; 392 } 393 json::Object Sarif{ 394 {"$schema", 395 "https://raw.githubusercontent.com/oasis-tcs/sarif-spec/master/Schemata/sarif-schema-2.1.0.json"}, 396 {"version", "2.1.0"}, 397 {"runs", json::Array{createRun(LO, Diags)}}}; 398 OS << llvm::formatv("{0:2}\n", json::Value(std::move(Sarif))); 399 } 400