1 //===- FileCheck.cpp - Check that File's Contents match what is expected --===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // FileCheck does a line-by line check of a file that validates whether it 10 // contains the expected content. This is useful for regression tests etc. 11 // 12 // This file implements most of the API that will be used by the FileCheck utility 13 // as well as various unittests. 14 //===----------------------------------------------------------------------===// 15 16 #include "llvm/FileCheck/FileCheck.h" 17 #include "FileCheckImpl.h" 18 #include "llvm/ADT/STLExtras.h" 19 #include "llvm/ADT/StringSet.h" 20 #include "llvm/ADT/Twine.h" 21 #include "llvm/Support/CheckedArithmetic.h" 22 #include "llvm/Support/FormatVariadic.h" 23 #include <cstdint> 24 #include <list> 25 #include <set> 26 #include <tuple> 27 #include <utility> 28 29 using namespace llvm; 30 31 StringRef ExpressionFormat::toString() const { 32 switch (Value) { 33 case Kind::NoFormat: 34 return StringRef("<none>"); 35 case Kind::Unsigned: 36 return StringRef("%u"); 37 case Kind::Signed: 38 return StringRef("%d"); 39 case Kind::HexUpper: 40 return StringRef("%X"); 41 case Kind::HexLower: 42 return StringRef("%x"); 43 } 44 llvm_unreachable("unknown expression format"); 45 } 46 47 Expected<std::string> ExpressionFormat::getWildcardRegex() const { 48 auto CreatePrecisionRegex = [this](StringRef S) { 49 return (S + Twine('{') + Twine(Precision) + "}").str(); 50 }; 51 52 switch (Value) { 53 case Kind::Unsigned: 54 if (Precision) 55 return CreatePrecisionRegex("([1-9][0-9]*)?[0-9]"); 56 return std::string("[0-9]+"); 57 case Kind::Signed: 58 if (Precision) 59 return CreatePrecisionRegex("-?([1-9][0-9]*)?[0-9]"); 60 return std::string("-?[0-9]+"); 61 case Kind::HexUpper: 62 if (Precision) 63 return CreatePrecisionRegex("([1-9A-F][0-9A-F]*)?[0-9A-F]"); 64 return std::string("[0-9A-F]+"); 65 case Kind::HexLower: 66 if (Precision) 67 return CreatePrecisionRegex("([1-9a-f][0-9a-f]*)?[0-9a-f]"); 68 return std::string("[0-9a-f]+"); 69 default: 70 return createStringError(std::errc::invalid_argument, 71 "trying to match value with invalid format"); 72 } 73 } 74 75 Expected<std::string> 76 ExpressionFormat::getMatchingString(ExpressionValue IntegerValue) const { 77 uint64_t AbsoluteValue; 78 StringRef SignPrefix = IntegerValue.isNegative() ? "-" : ""; 79 80 if (Value == Kind::Signed) { 81 Expected<int64_t> SignedValue = IntegerValue.getSignedValue(); 82 if (!SignedValue) 83 return SignedValue.takeError(); 84 if (*SignedValue < 0) 85 AbsoluteValue = cantFail(IntegerValue.getAbsolute().getUnsignedValue()); 86 else 87 AbsoluteValue = *SignedValue; 88 } else { 89 Expected<uint64_t> UnsignedValue = IntegerValue.getUnsignedValue(); 90 if (!UnsignedValue) 91 return UnsignedValue.takeError(); 92 AbsoluteValue = *UnsignedValue; 93 } 94 95 std::string AbsoluteValueStr; 96 switch (Value) { 97 case Kind::Unsigned: 98 case Kind::Signed: 99 AbsoluteValueStr = utostr(AbsoluteValue); 100 break; 101 case Kind::HexUpper: 102 case Kind::HexLower: 103 AbsoluteValueStr = utohexstr(AbsoluteValue, Value == Kind::HexLower); 104 break; 105 default: 106 return createStringError(std::errc::invalid_argument, 107 "trying to match value with invalid format"); 108 } 109 110 if (Precision > AbsoluteValueStr.size()) { 111 unsigned LeadingZeros = Precision - AbsoluteValueStr.size(); 112 return (Twine(SignPrefix) + std::string(LeadingZeros, '0') + 113 AbsoluteValueStr) 114 .str(); 115 } 116 117 return (Twine(SignPrefix) + AbsoluteValueStr).str(); 118 } 119 120 Expected<ExpressionValue> 121 ExpressionFormat::valueFromStringRepr(StringRef StrVal, 122 const SourceMgr &SM) const { 123 bool ValueIsSigned = Value == Kind::Signed; 124 StringRef OverflowErrorStr = "unable to represent numeric value"; 125 if (ValueIsSigned) { 126 int64_t SignedValue; 127 128 if (StrVal.getAsInteger(10, SignedValue)) 129 return ErrorDiagnostic::get(SM, StrVal, OverflowErrorStr); 130 131 return ExpressionValue(SignedValue); 132 } 133 134 bool Hex = Value == Kind::HexUpper || Value == Kind::HexLower; 135 uint64_t UnsignedValue; 136 if (StrVal.getAsInteger(Hex ? 16 : 10, UnsignedValue)) 137 return ErrorDiagnostic::get(SM, StrVal, OverflowErrorStr); 138 139 return ExpressionValue(UnsignedValue); 140 } 141 142 static int64_t getAsSigned(uint64_t UnsignedValue) { 143 // Use memcpy to reinterpret the bitpattern in Value since casting to 144 // signed is implementation-defined if the unsigned value is too big to be 145 // represented in the signed type and using an union violates type aliasing 146 // rules. 147 int64_t SignedValue; 148 memcpy(&SignedValue, &UnsignedValue, sizeof(SignedValue)); 149 return SignedValue; 150 } 151 152 Expected<int64_t> ExpressionValue::getSignedValue() const { 153 if (Negative) 154 return getAsSigned(Value); 155 156 if (Value > (uint64_t)std::numeric_limits<int64_t>::max()) 157 return make_error<OverflowError>(); 158 159 // Value is in the representable range of int64_t so we can use cast. 160 return static_cast<int64_t>(Value); 161 } 162 163 Expected<uint64_t> ExpressionValue::getUnsignedValue() const { 164 if (Negative) 165 return make_error<OverflowError>(); 166 167 return Value; 168 } 169 170 ExpressionValue ExpressionValue::getAbsolute() const { 171 if (!Negative) 172 return *this; 173 174 int64_t SignedValue = getAsSigned(Value); 175 int64_t MaxInt64 = std::numeric_limits<int64_t>::max(); 176 // Absolute value can be represented as int64_t. 177 if (SignedValue >= -MaxInt64) 178 return ExpressionValue(-getAsSigned(Value)); 179 180 // -X == -(max int64_t + Rem), negate each component independently. 181 SignedValue += MaxInt64; 182 uint64_t RemainingValueAbsolute = -SignedValue; 183 return ExpressionValue(MaxInt64 + RemainingValueAbsolute); 184 } 185 186 Expected<ExpressionValue> llvm::operator+(const ExpressionValue &LeftOperand, 187 const ExpressionValue &RightOperand) { 188 if (LeftOperand.isNegative() && RightOperand.isNegative()) { 189 int64_t LeftValue = cantFail(LeftOperand.getSignedValue()); 190 int64_t RightValue = cantFail(RightOperand.getSignedValue()); 191 Optional<int64_t> Result = checkedAdd<int64_t>(LeftValue, RightValue); 192 if (!Result) 193 return make_error<OverflowError>(); 194 195 return ExpressionValue(*Result); 196 } 197 198 // (-A) + B == B - A. 199 if (LeftOperand.isNegative()) 200 return RightOperand - LeftOperand.getAbsolute(); 201 202 // A + (-B) == A - B. 203 if (RightOperand.isNegative()) 204 return LeftOperand - RightOperand.getAbsolute(); 205 206 // Both values are positive at this point. 207 uint64_t LeftValue = cantFail(LeftOperand.getUnsignedValue()); 208 uint64_t RightValue = cantFail(RightOperand.getUnsignedValue()); 209 Optional<uint64_t> Result = 210 checkedAddUnsigned<uint64_t>(LeftValue, RightValue); 211 if (!Result) 212 return make_error<OverflowError>(); 213 214 return ExpressionValue(*Result); 215 } 216 217 Expected<ExpressionValue> llvm::operator-(const ExpressionValue &LeftOperand, 218 const ExpressionValue &RightOperand) { 219 // Result will be negative and thus might underflow. 220 if (LeftOperand.isNegative() && !RightOperand.isNegative()) { 221 int64_t LeftValue = cantFail(LeftOperand.getSignedValue()); 222 uint64_t RightValue = cantFail(RightOperand.getUnsignedValue()); 223 // Result <= -1 - (max int64_t) which overflows on 1- and 2-complement. 224 if (RightValue > (uint64_t)std::numeric_limits<int64_t>::max()) 225 return make_error<OverflowError>(); 226 Optional<int64_t> Result = 227 checkedSub(LeftValue, static_cast<int64_t>(RightValue)); 228 if (!Result) 229 return make_error<OverflowError>(); 230 231 return ExpressionValue(*Result); 232 } 233 234 // (-A) - (-B) == B - A. 235 if (LeftOperand.isNegative()) 236 return RightOperand.getAbsolute() - LeftOperand.getAbsolute(); 237 238 // A - (-B) == A + B. 239 if (RightOperand.isNegative()) 240 return LeftOperand + RightOperand.getAbsolute(); 241 242 // Both values are positive at this point. 243 uint64_t LeftValue = cantFail(LeftOperand.getUnsignedValue()); 244 uint64_t RightValue = cantFail(RightOperand.getUnsignedValue()); 245 if (LeftValue >= RightValue) 246 return ExpressionValue(LeftValue - RightValue); 247 else { 248 uint64_t AbsoluteDifference = RightValue - LeftValue; 249 uint64_t MaxInt64 = std::numeric_limits<int64_t>::max(); 250 // Value might underflow. 251 if (AbsoluteDifference > MaxInt64) { 252 AbsoluteDifference -= MaxInt64; 253 int64_t Result = -MaxInt64; 254 int64_t MinInt64 = std::numeric_limits<int64_t>::min(); 255 // Underflow, tested by: 256 // abs(Result + (max int64_t)) > abs((min int64_t) + (max int64_t)) 257 if (AbsoluteDifference > static_cast<uint64_t>(-(MinInt64 - Result))) 258 return make_error<OverflowError>(); 259 Result -= static_cast<int64_t>(AbsoluteDifference); 260 return ExpressionValue(Result); 261 } 262 263 return ExpressionValue(-static_cast<int64_t>(AbsoluteDifference)); 264 } 265 } 266 267 Expected<ExpressionValue> llvm::operator*(const ExpressionValue &LeftOperand, 268 const ExpressionValue &RightOperand) { 269 // -A * -B == A * B 270 if (LeftOperand.isNegative() && RightOperand.isNegative()) 271 return LeftOperand.getAbsolute() * RightOperand.getAbsolute(); 272 273 // A * -B == -B * A 274 if (RightOperand.isNegative()) 275 return RightOperand * LeftOperand; 276 277 assert(!RightOperand.isNegative() && "Unexpected negative operand!"); 278 279 // Result will be negative and can underflow. 280 if (LeftOperand.isNegative()) { 281 auto Result = LeftOperand.getAbsolute() * RightOperand.getAbsolute(); 282 if (!Result) 283 return Result; 284 285 return ExpressionValue(0) - *Result; 286 } 287 288 // Result will be positive and can overflow. 289 uint64_t LeftValue = cantFail(LeftOperand.getUnsignedValue()); 290 uint64_t RightValue = cantFail(RightOperand.getUnsignedValue()); 291 Optional<uint64_t> Result = 292 checkedMulUnsigned<uint64_t>(LeftValue, RightValue); 293 if (!Result) 294 return make_error<OverflowError>(); 295 296 return ExpressionValue(*Result); 297 } 298 299 Expected<ExpressionValue> llvm::operator/(const ExpressionValue &LeftOperand, 300 const ExpressionValue &RightOperand) { 301 // -A / -B == A / B 302 if (LeftOperand.isNegative() && RightOperand.isNegative()) 303 return LeftOperand.getAbsolute() / RightOperand.getAbsolute(); 304 305 // Check for divide by zero. 306 if (RightOperand == ExpressionValue(0)) 307 return make_error<OverflowError>(); 308 309 // Result will be negative and can underflow. 310 if (LeftOperand.isNegative() || RightOperand.isNegative()) 311 return ExpressionValue(0) - 312 cantFail(LeftOperand.getAbsolute() / RightOperand.getAbsolute()); 313 314 uint64_t LeftValue = cantFail(LeftOperand.getUnsignedValue()); 315 uint64_t RightValue = cantFail(RightOperand.getUnsignedValue()); 316 return ExpressionValue(LeftValue / RightValue); 317 } 318 319 Expected<ExpressionValue> llvm::max(const ExpressionValue &LeftOperand, 320 const ExpressionValue &RightOperand) { 321 if (LeftOperand.isNegative() && RightOperand.isNegative()) { 322 int64_t LeftValue = cantFail(LeftOperand.getSignedValue()); 323 int64_t RightValue = cantFail(RightOperand.getSignedValue()); 324 return ExpressionValue(std::max(LeftValue, RightValue)); 325 } 326 327 if (!LeftOperand.isNegative() && !RightOperand.isNegative()) { 328 uint64_t LeftValue = cantFail(LeftOperand.getUnsignedValue()); 329 uint64_t RightValue = cantFail(RightOperand.getUnsignedValue()); 330 return ExpressionValue(std::max(LeftValue, RightValue)); 331 } 332 333 if (LeftOperand.isNegative()) 334 return RightOperand; 335 336 return LeftOperand; 337 } 338 339 Expected<ExpressionValue> llvm::min(const ExpressionValue &LeftOperand, 340 const ExpressionValue &RightOperand) { 341 if (cantFail(max(LeftOperand, RightOperand)) == LeftOperand) 342 return RightOperand; 343 344 return LeftOperand; 345 } 346 347 Expected<ExpressionValue> NumericVariableUse::eval() const { 348 Optional<ExpressionValue> Value = Variable->getValue(); 349 if (Value) 350 return *Value; 351 352 return make_error<UndefVarError>(getExpressionStr()); 353 } 354 355 Expected<ExpressionValue> BinaryOperation::eval() const { 356 Expected<ExpressionValue> LeftOp = LeftOperand->eval(); 357 Expected<ExpressionValue> RightOp = RightOperand->eval(); 358 359 // Bubble up any error (e.g. undefined variables) in the recursive 360 // evaluation. 361 if (!LeftOp || !RightOp) { 362 Error Err = Error::success(); 363 if (!LeftOp) 364 Err = joinErrors(std::move(Err), LeftOp.takeError()); 365 if (!RightOp) 366 Err = joinErrors(std::move(Err), RightOp.takeError()); 367 return std::move(Err); 368 } 369 370 return EvalBinop(*LeftOp, *RightOp); 371 } 372 373 Expected<ExpressionFormat> 374 BinaryOperation::getImplicitFormat(const SourceMgr &SM) const { 375 Expected<ExpressionFormat> LeftFormat = LeftOperand->getImplicitFormat(SM); 376 Expected<ExpressionFormat> RightFormat = RightOperand->getImplicitFormat(SM); 377 if (!LeftFormat || !RightFormat) { 378 Error Err = Error::success(); 379 if (!LeftFormat) 380 Err = joinErrors(std::move(Err), LeftFormat.takeError()); 381 if (!RightFormat) 382 Err = joinErrors(std::move(Err), RightFormat.takeError()); 383 return std::move(Err); 384 } 385 386 if (*LeftFormat != ExpressionFormat::Kind::NoFormat && 387 *RightFormat != ExpressionFormat::Kind::NoFormat && 388 *LeftFormat != *RightFormat) 389 return ErrorDiagnostic::get( 390 SM, getExpressionStr(), 391 "implicit format conflict between '" + LeftOperand->getExpressionStr() + 392 "' (" + LeftFormat->toString() + ") and '" + 393 RightOperand->getExpressionStr() + "' (" + RightFormat->toString() + 394 "), need an explicit format specifier"); 395 396 return *LeftFormat != ExpressionFormat::Kind::NoFormat ? *LeftFormat 397 : *RightFormat; 398 } 399 400 Expected<std::string> NumericSubstitution::getResult() const { 401 assert(ExpressionPointer->getAST() != nullptr && 402 "Substituting empty expression"); 403 Expected<ExpressionValue> EvaluatedValue = 404 ExpressionPointer->getAST()->eval(); 405 if (!EvaluatedValue) 406 return EvaluatedValue.takeError(); 407 ExpressionFormat Format = ExpressionPointer->getFormat(); 408 return Format.getMatchingString(*EvaluatedValue); 409 } 410 411 Expected<std::string> StringSubstitution::getResult() const { 412 // Look up the value and escape it so that we can put it into the regex. 413 Expected<StringRef> VarVal = Context->getPatternVarValue(FromStr); 414 if (!VarVal) 415 return VarVal.takeError(); 416 return Regex::escape(*VarVal); 417 } 418 419 bool Pattern::isValidVarNameStart(char C) { return C == '_' || isAlpha(C); } 420 421 Expected<Pattern::VariableProperties> 422 Pattern::parseVariable(StringRef &Str, const SourceMgr &SM) { 423 if (Str.empty()) 424 return ErrorDiagnostic::get(SM, Str, "empty variable name"); 425 426 size_t I = 0; 427 bool IsPseudo = Str[0] == '@'; 428 429 // Global vars start with '$'. 430 if (Str[0] == '$' || IsPseudo) 431 ++I; 432 433 if (!isValidVarNameStart(Str[I++])) 434 return ErrorDiagnostic::get(SM, Str, "invalid variable name"); 435 436 for (size_t E = Str.size(); I != E; ++I) 437 // Variable names are composed of alphanumeric characters and underscores. 438 if (Str[I] != '_' && !isAlnum(Str[I])) 439 break; 440 441 StringRef Name = Str.take_front(I); 442 Str = Str.substr(I); 443 return VariableProperties {Name, IsPseudo}; 444 } 445 446 // StringRef holding all characters considered as horizontal whitespaces by 447 // FileCheck input canonicalization. 448 constexpr StringLiteral SpaceChars = " \t"; 449 450 // Parsing helper function that strips the first character in S and returns it. 451 static char popFront(StringRef &S) { 452 char C = S.front(); 453 S = S.drop_front(); 454 return C; 455 } 456 457 char OverflowError::ID = 0; 458 char UndefVarError::ID = 0; 459 char ErrorDiagnostic::ID = 0; 460 char NotFoundError::ID = 0; 461 462 Expected<NumericVariable *> Pattern::parseNumericVariableDefinition( 463 StringRef &Expr, FileCheckPatternContext *Context, 464 Optional<size_t> LineNumber, ExpressionFormat ImplicitFormat, 465 const SourceMgr &SM) { 466 Expected<VariableProperties> ParseVarResult = parseVariable(Expr, SM); 467 if (!ParseVarResult) 468 return ParseVarResult.takeError(); 469 StringRef Name = ParseVarResult->Name; 470 471 if (ParseVarResult->IsPseudo) 472 return ErrorDiagnostic::get( 473 SM, Name, "definition of pseudo numeric variable unsupported"); 474 475 // Detect collisions between string and numeric variables when the latter 476 // is created later than the former. 477 if (Context->DefinedVariableTable.find(Name) != 478 Context->DefinedVariableTable.end()) 479 return ErrorDiagnostic::get( 480 SM, Name, "string variable with name '" + Name + "' already exists"); 481 482 Expr = Expr.ltrim(SpaceChars); 483 if (!Expr.empty()) 484 return ErrorDiagnostic::get( 485 SM, Expr, "unexpected characters after numeric variable name"); 486 487 NumericVariable *DefinedNumericVariable; 488 auto VarTableIter = Context->GlobalNumericVariableTable.find(Name); 489 if (VarTableIter != Context->GlobalNumericVariableTable.end()) { 490 DefinedNumericVariable = VarTableIter->second; 491 if (DefinedNumericVariable->getImplicitFormat() != ImplicitFormat) 492 return ErrorDiagnostic::get( 493 SM, Expr, "format different from previous variable definition"); 494 } else 495 DefinedNumericVariable = 496 Context->makeNumericVariable(Name, ImplicitFormat, LineNumber); 497 498 return DefinedNumericVariable; 499 } 500 501 Expected<std::unique_ptr<NumericVariableUse>> Pattern::parseNumericVariableUse( 502 StringRef Name, bool IsPseudo, Optional<size_t> LineNumber, 503 FileCheckPatternContext *Context, const SourceMgr &SM) { 504 if (IsPseudo && !Name.equals("@LINE")) 505 return ErrorDiagnostic::get( 506 SM, Name, "invalid pseudo numeric variable '" + Name + "'"); 507 508 // Numeric variable definitions and uses are parsed in the order in which 509 // they appear in the CHECK patterns. For each definition, the pointer to the 510 // class instance of the corresponding numeric variable definition is stored 511 // in GlobalNumericVariableTable in parsePattern. Therefore, if the pointer 512 // we get below is null, it means no such variable was defined before. When 513 // that happens, we create a dummy variable so that parsing can continue. All 514 // uses of undefined variables, whether string or numeric, are then diagnosed 515 // in printSubstitutions() after failing to match. 516 auto VarTableIter = Context->GlobalNumericVariableTable.find(Name); 517 NumericVariable *NumericVariable; 518 if (VarTableIter != Context->GlobalNumericVariableTable.end()) 519 NumericVariable = VarTableIter->second; 520 else { 521 NumericVariable = Context->makeNumericVariable( 522 Name, ExpressionFormat(ExpressionFormat::Kind::Unsigned)); 523 Context->GlobalNumericVariableTable[Name] = NumericVariable; 524 } 525 526 Optional<size_t> DefLineNumber = NumericVariable->getDefLineNumber(); 527 if (DefLineNumber && LineNumber && *DefLineNumber == *LineNumber) 528 return ErrorDiagnostic::get( 529 SM, Name, 530 "numeric variable '" + Name + 531 "' defined earlier in the same CHECK directive"); 532 533 return std::make_unique<NumericVariableUse>(Name, NumericVariable); 534 } 535 536 Expected<std::unique_ptr<ExpressionAST>> Pattern::parseNumericOperand( 537 StringRef &Expr, AllowedOperand AO, bool MaybeInvalidConstraint, 538 Optional<size_t> LineNumber, FileCheckPatternContext *Context, 539 const SourceMgr &SM) { 540 if (Expr.startswith("(")) { 541 if (AO != AllowedOperand::Any) 542 return ErrorDiagnostic::get( 543 SM, Expr, "parenthesized expression not permitted here"); 544 return parseParenExpr(Expr, LineNumber, Context, SM); 545 } 546 547 if (AO == AllowedOperand::LineVar || AO == AllowedOperand::Any) { 548 // Try to parse as a numeric variable use. 549 Expected<Pattern::VariableProperties> ParseVarResult = 550 parseVariable(Expr, SM); 551 if (ParseVarResult) { 552 // Try to parse a function call. 553 if (Expr.ltrim(SpaceChars).startswith("(")) { 554 if (AO != AllowedOperand::Any) 555 return ErrorDiagnostic::get(SM, ParseVarResult->Name, 556 "unexpected function call"); 557 558 return parseCallExpr(Expr, ParseVarResult->Name, LineNumber, Context, 559 SM); 560 } 561 562 return parseNumericVariableUse(ParseVarResult->Name, 563 ParseVarResult->IsPseudo, LineNumber, 564 Context, SM); 565 } 566 567 if (AO == AllowedOperand::LineVar) 568 return ParseVarResult.takeError(); 569 // Ignore the error and retry parsing as a literal. 570 consumeError(ParseVarResult.takeError()); 571 } 572 573 // Otherwise, parse it as a literal. 574 int64_t SignedLiteralValue; 575 uint64_t UnsignedLiteralValue; 576 StringRef SaveExpr = Expr; 577 // Accept both signed and unsigned literal, default to signed literal. 578 if (!Expr.consumeInteger((AO == AllowedOperand::LegacyLiteral) ? 10 : 0, 579 UnsignedLiteralValue)) 580 return std::make_unique<ExpressionLiteral>(SaveExpr.drop_back(Expr.size()), 581 UnsignedLiteralValue); 582 Expr = SaveExpr; 583 if (AO == AllowedOperand::Any && !Expr.consumeInteger(0, SignedLiteralValue)) 584 return std::make_unique<ExpressionLiteral>(SaveExpr.drop_back(Expr.size()), 585 SignedLiteralValue); 586 587 return ErrorDiagnostic::get( 588 SM, Expr, 589 Twine("invalid ") + 590 (MaybeInvalidConstraint ? "matching constraint or " : "") + 591 "operand format"); 592 } 593 594 Expected<std::unique_ptr<ExpressionAST>> 595 Pattern::parseParenExpr(StringRef &Expr, Optional<size_t> LineNumber, 596 FileCheckPatternContext *Context, const SourceMgr &SM) { 597 Expr = Expr.ltrim(SpaceChars); 598 assert(Expr.startswith("(")); 599 600 // Parse right operand. 601 Expr.consume_front("("); 602 Expr = Expr.ltrim(SpaceChars); 603 if (Expr.empty()) 604 return ErrorDiagnostic::get(SM, Expr, "missing operand in expression"); 605 606 // Note: parseNumericOperand handles nested opening parentheses. 607 Expected<std::unique_ptr<ExpressionAST>> SubExprResult = parseNumericOperand( 608 Expr, AllowedOperand::Any, /*MaybeInvalidConstraint=*/false, LineNumber, 609 Context, SM); 610 Expr = Expr.ltrim(SpaceChars); 611 while (SubExprResult && !Expr.empty() && !Expr.startswith(")")) { 612 StringRef OrigExpr = Expr; 613 SubExprResult = parseBinop(OrigExpr, Expr, std::move(*SubExprResult), false, 614 LineNumber, Context, SM); 615 Expr = Expr.ltrim(SpaceChars); 616 } 617 if (!SubExprResult) 618 return SubExprResult; 619 620 if (!Expr.consume_front(")")) { 621 return ErrorDiagnostic::get(SM, Expr, 622 "missing ')' at end of nested expression"); 623 } 624 return SubExprResult; 625 } 626 627 Expected<std::unique_ptr<ExpressionAST>> 628 Pattern::parseBinop(StringRef Expr, StringRef &RemainingExpr, 629 std::unique_ptr<ExpressionAST> LeftOp, 630 bool IsLegacyLineExpr, Optional<size_t> LineNumber, 631 FileCheckPatternContext *Context, const SourceMgr &SM) { 632 RemainingExpr = RemainingExpr.ltrim(SpaceChars); 633 if (RemainingExpr.empty()) 634 return std::move(LeftOp); 635 636 // Check if this is a supported operation and select a function to perform 637 // it. 638 SMLoc OpLoc = SMLoc::getFromPointer(RemainingExpr.data()); 639 char Operator = popFront(RemainingExpr); 640 binop_eval_t EvalBinop; 641 switch (Operator) { 642 case '+': 643 EvalBinop = operator+; 644 break; 645 case '-': 646 EvalBinop = operator-; 647 break; 648 default: 649 return ErrorDiagnostic::get( 650 SM, OpLoc, Twine("unsupported operation '") + Twine(Operator) + "'"); 651 } 652 653 // Parse right operand. 654 RemainingExpr = RemainingExpr.ltrim(SpaceChars); 655 if (RemainingExpr.empty()) 656 return ErrorDiagnostic::get(SM, RemainingExpr, 657 "missing operand in expression"); 658 // The second operand in a legacy @LINE expression is always a literal. 659 AllowedOperand AO = 660 IsLegacyLineExpr ? AllowedOperand::LegacyLiteral : AllowedOperand::Any; 661 Expected<std::unique_ptr<ExpressionAST>> RightOpResult = 662 parseNumericOperand(RemainingExpr, AO, /*MaybeInvalidConstraint=*/false, 663 LineNumber, Context, SM); 664 if (!RightOpResult) 665 return RightOpResult; 666 667 Expr = Expr.drop_back(RemainingExpr.size()); 668 return std::make_unique<BinaryOperation>(Expr, EvalBinop, std::move(LeftOp), 669 std::move(*RightOpResult)); 670 } 671 672 Expected<std::unique_ptr<ExpressionAST>> 673 Pattern::parseCallExpr(StringRef &Expr, StringRef FuncName, 674 Optional<size_t> LineNumber, 675 FileCheckPatternContext *Context, const SourceMgr &SM) { 676 Expr = Expr.ltrim(SpaceChars); 677 assert(Expr.startswith("(")); 678 679 auto OptFunc = StringSwitch<Optional<binop_eval_t>>(FuncName) 680 .Case("add", operator+) 681 .Case("div", operator/) 682 .Case("max", max) 683 .Case("min", min) 684 .Case("mul", operator*) 685 .Case("sub", operator-) 686 .Default(None); 687 688 if (!OptFunc) 689 return ErrorDiagnostic::get( 690 SM, FuncName, Twine("call to undefined function '") + FuncName + "'"); 691 692 Expr.consume_front("("); 693 Expr = Expr.ltrim(SpaceChars); 694 695 // Parse call arguments, which are comma separated. 696 SmallVector<std::unique_ptr<ExpressionAST>, 4> Args; 697 while (!Expr.empty() && !Expr.startswith(")")) { 698 if (Expr.startswith(",")) 699 return ErrorDiagnostic::get(SM, Expr, "missing argument"); 700 701 // Parse the argument, which is an arbitary expression. 702 StringRef OuterBinOpExpr = Expr; 703 Expected<std::unique_ptr<ExpressionAST>> Arg = parseNumericOperand( 704 Expr, AllowedOperand::Any, /*MaybeInvalidConstraint=*/false, LineNumber, 705 Context, SM); 706 while (Arg && !Expr.empty()) { 707 Expr = Expr.ltrim(SpaceChars); 708 // Have we reached an argument terminator? 709 if (Expr.startswith(",") || Expr.startswith(")")) 710 break; 711 712 // Arg = Arg <op> <expr> 713 Arg = parseBinop(OuterBinOpExpr, Expr, std::move(*Arg), false, LineNumber, 714 Context, SM); 715 } 716 717 // Prefer an expression error over a generic invalid argument message. 718 if (!Arg) 719 return Arg.takeError(); 720 Args.push_back(std::move(*Arg)); 721 722 // Have we parsed all available arguments? 723 Expr = Expr.ltrim(SpaceChars); 724 if (!Expr.consume_front(",")) 725 break; 726 727 Expr = Expr.ltrim(SpaceChars); 728 if (Expr.startswith(")")) 729 return ErrorDiagnostic::get(SM, Expr, "missing argument"); 730 } 731 732 if (!Expr.consume_front(")")) 733 return ErrorDiagnostic::get(SM, Expr, 734 "missing ')' at end of call expression"); 735 736 const unsigned NumArgs = Args.size(); 737 if (NumArgs == 2) 738 return std::make_unique<BinaryOperation>(Expr, *OptFunc, std::move(Args[0]), 739 std::move(Args[1])); 740 741 // TODO: Support more than binop_eval_t. 742 return ErrorDiagnostic::get(SM, FuncName, 743 Twine("function '") + FuncName + 744 Twine("' takes 2 arguments but ") + 745 Twine(NumArgs) + " given"); 746 } 747 748 Expected<std::unique_ptr<Expression>> Pattern::parseNumericSubstitutionBlock( 749 StringRef Expr, Optional<NumericVariable *> &DefinedNumericVariable, 750 bool IsLegacyLineExpr, Optional<size_t> LineNumber, 751 FileCheckPatternContext *Context, const SourceMgr &SM) { 752 std::unique_ptr<ExpressionAST> ExpressionASTPointer = nullptr; 753 StringRef DefExpr = StringRef(); 754 DefinedNumericVariable = None; 755 ExpressionFormat ExplicitFormat = ExpressionFormat(); 756 unsigned Precision = 0; 757 758 // Parse format specifier (NOTE: ',' is also an argument seperator). 759 size_t FormatSpecEnd = Expr.find(','); 760 size_t FunctionStart = Expr.find('('); 761 if (FormatSpecEnd != StringRef::npos && FormatSpecEnd < FunctionStart) { 762 StringRef FormatExpr = Expr.take_front(FormatSpecEnd); 763 Expr = Expr.drop_front(FormatSpecEnd + 1); 764 FormatExpr = FormatExpr.trim(SpaceChars); 765 if (!FormatExpr.consume_front("%")) 766 return ErrorDiagnostic::get( 767 SM, FormatExpr, 768 "invalid matching format specification in expression"); 769 770 // Parse precision. 771 if (FormatExpr.consume_front(".")) { 772 if (FormatExpr.consumeInteger(10, Precision)) 773 return ErrorDiagnostic::get(SM, FormatExpr, 774 "invalid precision in format specifier"); 775 } 776 777 if (!FormatExpr.empty()) { 778 // Check for unknown matching format specifier and set matching format in 779 // class instance representing this expression. 780 SMLoc FmtLoc = SMLoc::getFromPointer(FormatExpr.data()); 781 switch (popFront(FormatExpr)) { 782 case 'u': 783 ExplicitFormat = 784 ExpressionFormat(ExpressionFormat::Kind::Unsigned, Precision); 785 break; 786 case 'd': 787 ExplicitFormat = 788 ExpressionFormat(ExpressionFormat::Kind::Signed, Precision); 789 break; 790 case 'x': 791 ExplicitFormat = 792 ExpressionFormat(ExpressionFormat::Kind::HexLower, Precision); 793 break; 794 case 'X': 795 ExplicitFormat = 796 ExpressionFormat(ExpressionFormat::Kind::HexUpper, Precision); 797 break; 798 default: 799 return ErrorDiagnostic::get(SM, FmtLoc, 800 "invalid format specifier in expression"); 801 } 802 } 803 804 FormatExpr = FormatExpr.ltrim(SpaceChars); 805 if (!FormatExpr.empty()) 806 return ErrorDiagnostic::get( 807 SM, FormatExpr, 808 "invalid matching format specification in expression"); 809 } 810 811 // Save variable definition expression if any. 812 size_t DefEnd = Expr.find(':'); 813 if (DefEnd != StringRef::npos) { 814 DefExpr = Expr.substr(0, DefEnd); 815 Expr = Expr.substr(DefEnd + 1); 816 } 817 818 // Parse matching constraint. 819 Expr = Expr.ltrim(SpaceChars); 820 bool HasParsedValidConstraint = false; 821 if (Expr.consume_front("==")) 822 HasParsedValidConstraint = true; 823 824 // Parse the expression itself. 825 Expr = Expr.ltrim(SpaceChars); 826 if (Expr.empty()) { 827 if (HasParsedValidConstraint) 828 return ErrorDiagnostic::get( 829 SM, Expr, "empty numeric expression should not have a constraint"); 830 } else { 831 Expr = Expr.rtrim(SpaceChars); 832 StringRef OuterBinOpExpr = Expr; 833 // The first operand in a legacy @LINE expression is always the @LINE 834 // pseudo variable. 835 AllowedOperand AO = 836 IsLegacyLineExpr ? AllowedOperand::LineVar : AllowedOperand::Any; 837 Expected<std::unique_ptr<ExpressionAST>> ParseResult = parseNumericOperand( 838 Expr, AO, !HasParsedValidConstraint, LineNumber, Context, SM); 839 while (ParseResult && !Expr.empty()) { 840 ParseResult = parseBinop(OuterBinOpExpr, Expr, std::move(*ParseResult), 841 IsLegacyLineExpr, LineNumber, Context, SM); 842 // Legacy @LINE expressions only allow 2 operands. 843 if (ParseResult && IsLegacyLineExpr && !Expr.empty()) 844 return ErrorDiagnostic::get( 845 SM, Expr, 846 "unexpected characters at end of expression '" + Expr + "'"); 847 } 848 if (!ParseResult) 849 return ParseResult.takeError(); 850 ExpressionASTPointer = std::move(*ParseResult); 851 } 852 853 // Select format of the expression, i.e. (i) its explicit format, if any, 854 // otherwise (ii) its implicit format, if any, otherwise (iii) the default 855 // format (unsigned). Error out in case of conflicting implicit format 856 // without explicit format. 857 ExpressionFormat Format; 858 if (ExplicitFormat) 859 Format = ExplicitFormat; 860 else if (ExpressionASTPointer) { 861 Expected<ExpressionFormat> ImplicitFormat = 862 ExpressionASTPointer->getImplicitFormat(SM); 863 if (!ImplicitFormat) 864 return ImplicitFormat.takeError(); 865 Format = *ImplicitFormat; 866 } 867 if (!Format) 868 Format = ExpressionFormat(ExpressionFormat::Kind::Unsigned, Precision); 869 870 std::unique_ptr<Expression> ExpressionPointer = 871 std::make_unique<Expression>(std::move(ExpressionASTPointer), Format); 872 873 // Parse the numeric variable definition. 874 if (DefEnd != StringRef::npos) { 875 DefExpr = DefExpr.ltrim(SpaceChars); 876 Expected<NumericVariable *> ParseResult = parseNumericVariableDefinition( 877 DefExpr, Context, LineNumber, ExpressionPointer->getFormat(), SM); 878 879 if (!ParseResult) 880 return ParseResult.takeError(); 881 DefinedNumericVariable = *ParseResult; 882 } 883 884 return std::move(ExpressionPointer); 885 } 886 887 bool Pattern::parsePattern(StringRef PatternStr, StringRef Prefix, 888 SourceMgr &SM, const FileCheckRequest &Req) { 889 bool MatchFullLinesHere = Req.MatchFullLines && CheckTy != Check::CheckNot; 890 IgnoreCase = Req.IgnoreCase; 891 892 PatternLoc = SMLoc::getFromPointer(PatternStr.data()); 893 894 if (!(Req.NoCanonicalizeWhiteSpace && Req.MatchFullLines)) 895 // Ignore trailing whitespace. 896 while (!PatternStr.empty() && 897 (PatternStr.back() == ' ' || PatternStr.back() == '\t')) 898 PatternStr = PatternStr.substr(0, PatternStr.size() - 1); 899 900 // Check that there is something on the line. 901 if (PatternStr.empty() && CheckTy != Check::CheckEmpty) { 902 SM.PrintMessage(PatternLoc, SourceMgr::DK_Error, 903 "found empty check string with prefix '" + Prefix + ":'"); 904 return true; 905 } 906 907 if (!PatternStr.empty() && CheckTy == Check::CheckEmpty) { 908 SM.PrintMessage( 909 PatternLoc, SourceMgr::DK_Error, 910 "found non-empty check string for empty check with prefix '" + Prefix + 911 ":'"); 912 return true; 913 } 914 915 if (CheckTy == Check::CheckEmpty) { 916 RegExStr = "(\n$)"; 917 return false; 918 } 919 920 // If literal check, set fixed string. 921 if (CheckTy.isLiteralMatch()) { 922 FixedStr = PatternStr; 923 return false; 924 } 925 926 // Check to see if this is a fixed string, or if it has regex pieces. 927 if (!MatchFullLinesHere && 928 (PatternStr.size() < 2 || (PatternStr.find("{{") == StringRef::npos && 929 PatternStr.find("[[") == StringRef::npos))) { 930 FixedStr = PatternStr; 931 return false; 932 } 933 934 if (MatchFullLinesHere) { 935 RegExStr += '^'; 936 if (!Req.NoCanonicalizeWhiteSpace) 937 RegExStr += " *"; 938 } 939 940 // Paren value #0 is for the fully matched string. Any new parenthesized 941 // values add from there. 942 unsigned CurParen = 1; 943 944 // Otherwise, there is at least one regex piece. Build up the regex pattern 945 // by escaping scary characters in fixed strings, building up one big regex. 946 while (!PatternStr.empty()) { 947 // RegEx matches. 948 if (PatternStr.startswith("{{")) { 949 // This is the start of a regex match. Scan for the }}. 950 size_t End = PatternStr.find("}}"); 951 if (End == StringRef::npos) { 952 SM.PrintMessage(SMLoc::getFromPointer(PatternStr.data()), 953 SourceMgr::DK_Error, 954 "found start of regex string with no end '}}'"); 955 return true; 956 } 957 958 // Enclose {{}} patterns in parens just like [[]] even though we're not 959 // capturing the result for any purpose. This is required in case the 960 // expression contains an alternation like: CHECK: abc{{x|z}}def. We 961 // want this to turn into: "abc(x|z)def" not "abcx|zdef". 962 RegExStr += '('; 963 ++CurParen; 964 965 if (AddRegExToRegEx(PatternStr.substr(2, End - 2), CurParen, SM)) 966 return true; 967 RegExStr += ')'; 968 969 PatternStr = PatternStr.substr(End + 2); 970 continue; 971 } 972 973 // String and numeric substitution blocks. Pattern substitution blocks come 974 // in two forms: [[foo:.*]] and [[foo]]. The former matches .* (or some 975 // other regex) and assigns it to the string variable 'foo'. The latter 976 // substitutes foo's value. Numeric substitution blocks recognize the same 977 // form as string ones, but start with a '#' sign after the double 978 // brackets. They also accept a combined form which sets a numeric variable 979 // to the evaluation of an expression. Both string and numeric variable 980 // names must satisfy the regular expression "[a-zA-Z_][0-9a-zA-Z_]*" to be 981 // valid, as this helps catch some common errors. 982 if (PatternStr.startswith("[[")) { 983 StringRef UnparsedPatternStr = PatternStr.substr(2); 984 // Find the closing bracket pair ending the match. End is going to be an 985 // offset relative to the beginning of the match string. 986 size_t End = FindRegexVarEnd(UnparsedPatternStr, SM); 987 StringRef MatchStr = UnparsedPatternStr.substr(0, End); 988 bool IsNumBlock = MatchStr.consume_front("#"); 989 990 if (End == StringRef::npos) { 991 SM.PrintMessage(SMLoc::getFromPointer(PatternStr.data()), 992 SourceMgr::DK_Error, 993 "Invalid substitution block, no ]] found"); 994 return true; 995 } 996 // Strip the substitution block we are parsing. End points to the start 997 // of the "]]" closing the expression so account for it in computing the 998 // index of the first unparsed character. 999 PatternStr = UnparsedPatternStr.substr(End + 2); 1000 1001 bool IsDefinition = false; 1002 bool SubstNeeded = false; 1003 // Whether the substitution block is a legacy use of @LINE with string 1004 // substitution block syntax. 1005 bool IsLegacyLineExpr = false; 1006 StringRef DefName; 1007 StringRef SubstStr; 1008 std::string MatchRegexp; 1009 size_t SubstInsertIdx = RegExStr.size(); 1010 1011 // Parse string variable or legacy @LINE expression. 1012 if (!IsNumBlock) { 1013 size_t VarEndIdx = MatchStr.find(':'); 1014 size_t SpacePos = MatchStr.substr(0, VarEndIdx).find_first_of(" \t"); 1015 if (SpacePos != StringRef::npos) { 1016 SM.PrintMessage(SMLoc::getFromPointer(MatchStr.data() + SpacePos), 1017 SourceMgr::DK_Error, "unexpected whitespace"); 1018 return true; 1019 } 1020 1021 // Get the name (e.g. "foo") and verify it is well formed. 1022 StringRef OrigMatchStr = MatchStr; 1023 Expected<Pattern::VariableProperties> ParseVarResult = 1024 parseVariable(MatchStr, SM); 1025 if (!ParseVarResult) { 1026 logAllUnhandledErrors(ParseVarResult.takeError(), errs()); 1027 return true; 1028 } 1029 StringRef Name = ParseVarResult->Name; 1030 bool IsPseudo = ParseVarResult->IsPseudo; 1031 1032 IsDefinition = (VarEndIdx != StringRef::npos); 1033 SubstNeeded = !IsDefinition; 1034 if (IsDefinition) { 1035 if ((IsPseudo || !MatchStr.consume_front(":"))) { 1036 SM.PrintMessage(SMLoc::getFromPointer(Name.data()), 1037 SourceMgr::DK_Error, 1038 "invalid name in string variable definition"); 1039 return true; 1040 } 1041 1042 // Detect collisions between string and numeric variables when the 1043 // former is created later than the latter. 1044 if (Context->GlobalNumericVariableTable.find(Name) != 1045 Context->GlobalNumericVariableTable.end()) { 1046 SM.PrintMessage( 1047 SMLoc::getFromPointer(Name.data()), SourceMgr::DK_Error, 1048 "numeric variable with name '" + Name + "' already exists"); 1049 return true; 1050 } 1051 DefName = Name; 1052 MatchRegexp = MatchStr.str(); 1053 } else { 1054 if (IsPseudo) { 1055 MatchStr = OrigMatchStr; 1056 IsLegacyLineExpr = IsNumBlock = true; 1057 } else 1058 SubstStr = Name; 1059 } 1060 } 1061 1062 // Parse numeric substitution block. 1063 std::unique_ptr<Expression> ExpressionPointer; 1064 Optional<NumericVariable *> DefinedNumericVariable; 1065 if (IsNumBlock) { 1066 Expected<std::unique_ptr<Expression>> ParseResult = 1067 parseNumericSubstitutionBlock(MatchStr, DefinedNumericVariable, 1068 IsLegacyLineExpr, LineNumber, Context, 1069 SM); 1070 if (!ParseResult) { 1071 logAllUnhandledErrors(ParseResult.takeError(), errs()); 1072 return true; 1073 } 1074 ExpressionPointer = std::move(*ParseResult); 1075 SubstNeeded = ExpressionPointer->getAST() != nullptr; 1076 if (DefinedNumericVariable) { 1077 IsDefinition = true; 1078 DefName = (*DefinedNumericVariable)->getName(); 1079 } 1080 if (SubstNeeded) 1081 SubstStr = MatchStr; 1082 else { 1083 ExpressionFormat Format = ExpressionPointer->getFormat(); 1084 MatchRegexp = cantFail(Format.getWildcardRegex()); 1085 } 1086 } 1087 1088 // Handle variable definition: [[<def>:(...)]] and [[#(...)<def>:(...)]]. 1089 if (IsDefinition) { 1090 RegExStr += '('; 1091 ++SubstInsertIdx; 1092 1093 if (IsNumBlock) { 1094 NumericVariableMatch NumericVariableDefinition = { 1095 *DefinedNumericVariable, CurParen}; 1096 NumericVariableDefs[DefName] = NumericVariableDefinition; 1097 // This store is done here rather than in match() to allow 1098 // parseNumericVariableUse() to get the pointer to the class instance 1099 // of the right variable definition corresponding to a given numeric 1100 // variable use. 1101 Context->GlobalNumericVariableTable[DefName] = 1102 *DefinedNumericVariable; 1103 } else { 1104 VariableDefs[DefName] = CurParen; 1105 // Mark string variable as defined to detect collisions between 1106 // string and numeric variables in parseNumericVariableUse() and 1107 // defineCmdlineVariables() when the latter is created later than the 1108 // former. We cannot reuse GlobalVariableTable for this by populating 1109 // it with an empty string since we would then lose the ability to 1110 // detect the use of an undefined variable in match(). 1111 Context->DefinedVariableTable[DefName] = true; 1112 } 1113 1114 ++CurParen; 1115 } 1116 1117 if (!MatchRegexp.empty() && AddRegExToRegEx(MatchRegexp, CurParen, SM)) 1118 return true; 1119 1120 if (IsDefinition) 1121 RegExStr += ')'; 1122 1123 // Handle substitutions: [[foo]] and [[#<foo expr>]]. 1124 if (SubstNeeded) { 1125 // Handle substitution of string variables that were defined earlier on 1126 // the same line by emitting a backreference. Expressions do not 1127 // support substituting a numeric variable defined on the same line. 1128 if (!IsNumBlock && VariableDefs.find(SubstStr) != VariableDefs.end()) { 1129 unsigned CaptureParenGroup = VariableDefs[SubstStr]; 1130 if (CaptureParenGroup < 1 || CaptureParenGroup > 9) { 1131 SM.PrintMessage(SMLoc::getFromPointer(SubstStr.data()), 1132 SourceMgr::DK_Error, 1133 "Can't back-reference more than 9 variables"); 1134 return true; 1135 } 1136 AddBackrefToRegEx(CaptureParenGroup); 1137 } else { 1138 // Handle substitution of string variables ([[<var>]]) defined in 1139 // previous CHECK patterns, and substitution of expressions. 1140 Substitution *Substitution = 1141 IsNumBlock 1142 ? Context->makeNumericSubstitution( 1143 SubstStr, std::move(ExpressionPointer), SubstInsertIdx) 1144 : Context->makeStringSubstitution(SubstStr, SubstInsertIdx); 1145 Substitutions.push_back(Substitution); 1146 } 1147 } 1148 } 1149 1150 // Handle fixed string matches. 1151 // Find the end, which is the start of the next regex. 1152 size_t FixedMatchEnd = PatternStr.find("{{"); 1153 FixedMatchEnd = std::min(FixedMatchEnd, PatternStr.find("[[")); 1154 RegExStr += Regex::escape(PatternStr.substr(0, FixedMatchEnd)); 1155 PatternStr = PatternStr.substr(FixedMatchEnd); 1156 } 1157 1158 if (MatchFullLinesHere) { 1159 if (!Req.NoCanonicalizeWhiteSpace) 1160 RegExStr += " *"; 1161 RegExStr += '$'; 1162 } 1163 1164 return false; 1165 } 1166 1167 bool Pattern::AddRegExToRegEx(StringRef RS, unsigned &CurParen, SourceMgr &SM) { 1168 Regex R(RS); 1169 std::string Error; 1170 if (!R.isValid(Error)) { 1171 SM.PrintMessage(SMLoc::getFromPointer(RS.data()), SourceMgr::DK_Error, 1172 "invalid regex: " + Error); 1173 return true; 1174 } 1175 1176 RegExStr += RS.str(); 1177 CurParen += R.getNumMatches(); 1178 return false; 1179 } 1180 1181 void Pattern::AddBackrefToRegEx(unsigned BackrefNum) { 1182 assert(BackrefNum >= 1 && BackrefNum <= 9 && "Invalid backref number"); 1183 std::string Backref = std::string("\\") + std::string(1, '0' + BackrefNum); 1184 RegExStr += Backref; 1185 } 1186 1187 Expected<size_t> Pattern::match(StringRef Buffer, size_t &MatchLen, 1188 const SourceMgr &SM) const { 1189 // If this is the EOF pattern, match it immediately. 1190 if (CheckTy == Check::CheckEOF) { 1191 MatchLen = 0; 1192 return Buffer.size(); 1193 } 1194 1195 // If this is a fixed string pattern, just match it now. 1196 if (!FixedStr.empty()) { 1197 MatchLen = FixedStr.size(); 1198 size_t Pos = 1199 IgnoreCase ? Buffer.find_lower(FixedStr) : Buffer.find(FixedStr); 1200 if (Pos == StringRef::npos) 1201 return make_error<NotFoundError>(); 1202 return Pos; 1203 } 1204 1205 // Regex match. 1206 1207 // If there are substitutions, we need to create a temporary string with the 1208 // actual value. 1209 StringRef RegExToMatch = RegExStr; 1210 std::string TmpStr; 1211 if (!Substitutions.empty()) { 1212 TmpStr = RegExStr; 1213 if (LineNumber) 1214 Context->LineVariable->setValue(ExpressionValue(*LineNumber)); 1215 1216 size_t InsertOffset = 0; 1217 // Substitute all string variables and expressions whose values are only 1218 // now known. Use of string variables defined on the same line are handled 1219 // by back-references. 1220 for (const auto &Substitution : Substitutions) { 1221 // Substitute and check for failure (e.g. use of undefined variable). 1222 Expected<std::string> Value = Substitution->getResult(); 1223 if (!Value) { 1224 // Convert to an ErrorDiagnostic to get location information. This is 1225 // done here rather than PrintNoMatch since now we know which 1226 // substitution block caused the overflow. 1227 Error Err = 1228 handleErrors(Value.takeError(), [&](const OverflowError &E) { 1229 return ErrorDiagnostic::get(SM, Substitution->getFromString(), 1230 "unable to substitute variable or " 1231 "numeric expression: overflow error"); 1232 }); 1233 return std::move(Err); 1234 } 1235 1236 // Plop it into the regex at the adjusted offset. 1237 TmpStr.insert(TmpStr.begin() + Substitution->getIndex() + InsertOffset, 1238 Value->begin(), Value->end()); 1239 InsertOffset += Value->size(); 1240 } 1241 1242 // Match the newly constructed regex. 1243 RegExToMatch = TmpStr; 1244 } 1245 1246 SmallVector<StringRef, 4> MatchInfo; 1247 unsigned int Flags = Regex::Newline; 1248 if (IgnoreCase) 1249 Flags |= Regex::IgnoreCase; 1250 if (!Regex(RegExToMatch, Flags).match(Buffer, &MatchInfo)) 1251 return make_error<NotFoundError>(); 1252 1253 // Successful regex match. 1254 assert(!MatchInfo.empty() && "Didn't get any match"); 1255 StringRef FullMatch = MatchInfo[0]; 1256 1257 // If this defines any string variables, remember their values. 1258 for (const auto &VariableDef : VariableDefs) { 1259 assert(VariableDef.second < MatchInfo.size() && "Internal paren error"); 1260 Context->GlobalVariableTable[VariableDef.first] = 1261 MatchInfo[VariableDef.second]; 1262 } 1263 1264 // If this defines any numeric variables, remember their values. 1265 for (const auto &NumericVariableDef : NumericVariableDefs) { 1266 const NumericVariableMatch &NumericVariableMatch = 1267 NumericVariableDef.getValue(); 1268 unsigned CaptureParenGroup = NumericVariableMatch.CaptureParenGroup; 1269 assert(CaptureParenGroup < MatchInfo.size() && "Internal paren error"); 1270 NumericVariable *DefinedNumericVariable = 1271 NumericVariableMatch.DefinedNumericVariable; 1272 1273 StringRef MatchedValue = MatchInfo[CaptureParenGroup]; 1274 ExpressionFormat Format = DefinedNumericVariable->getImplicitFormat(); 1275 Expected<ExpressionValue> Value = 1276 Format.valueFromStringRepr(MatchedValue, SM); 1277 if (!Value) 1278 return Value.takeError(); 1279 DefinedNumericVariable->setValue(*Value, MatchedValue); 1280 } 1281 1282 // Like CHECK-NEXT, CHECK-EMPTY's match range is considered to start after 1283 // the required preceding newline, which is consumed by the pattern in the 1284 // case of CHECK-EMPTY but not CHECK-NEXT. 1285 size_t MatchStartSkip = CheckTy == Check::CheckEmpty; 1286 MatchLen = FullMatch.size() - MatchStartSkip; 1287 return FullMatch.data() - Buffer.data() + MatchStartSkip; 1288 } 1289 1290 unsigned Pattern::computeMatchDistance(StringRef Buffer) const { 1291 // Just compute the number of matching characters. For regular expressions, we 1292 // just compare against the regex itself and hope for the best. 1293 // 1294 // FIXME: One easy improvement here is have the regex lib generate a single 1295 // example regular expression which matches, and use that as the example 1296 // string. 1297 StringRef ExampleString(FixedStr); 1298 if (ExampleString.empty()) 1299 ExampleString = RegExStr; 1300 1301 // Only compare up to the first line in the buffer, or the string size. 1302 StringRef BufferPrefix = Buffer.substr(0, ExampleString.size()); 1303 BufferPrefix = BufferPrefix.split('\n').first; 1304 return BufferPrefix.edit_distance(ExampleString); 1305 } 1306 1307 void Pattern::printSubstitutions(const SourceMgr &SM, StringRef Buffer, 1308 SMRange Range, 1309 FileCheckDiag::MatchType MatchTy, 1310 std::vector<FileCheckDiag> *Diags) const { 1311 // Print what we know about substitutions. 1312 if (!Substitutions.empty()) { 1313 for (const auto &Substitution : Substitutions) { 1314 SmallString<256> Msg; 1315 raw_svector_ostream OS(Msg); 1316 Expected<std::string> MatchedValue = Substitution->getResult(); 1317 1318 // Substitution failed or is not known at match time, print the undefined 1319 // variables it uses. 1320 if (!MatchedValue) { 1321 bool UndefSeen = false; 1322 handleAllErrors( 1323 MatchedValue.takeError(), [](const NotFoundError &E) {}, 1324 // Handled in PrintNoMatch(). 1325 [](const ErrorDiagnostic &E) {}, 1326 // Handled in match(). 1327 [](const OverflowError &E) {}, 1328 [&](const UndefVarError &E) { 1329 if (!UndefSeen) { 1330 OS << "uses undefined variable(s):"; 1331 UndefSeen = true; 1332 } 1333 OS << " "; 1334 E.log(OS); 1335 }); 1336 } else { 1337 // Substitution succeeded. Print substituted value. 1338 OS << "with \""; 1339 OS.write_escaped(Substitution->getFromString()) << "\" equal to \""; 1340 OS.write_escaped(*MatchedValue) << "\""; 1341 } 1342 1343 // We report only the start of the match/search range to suggest we are 1344 // reporting the substitutions as set at the start of the match/search. 1345 // Indicating a non-zero-length range might instead seem to imply that the 1346 // substitution matches or was captured from exactly that range. 1347 if (Diags) 1348 Diags->emplace_back(SM, CheckTy, getLoc(), MatchTy, 1349 SMRange(Range.Start, Range.Start), OS.str()); 1350 else 1351 SM.PrintMessage(Range.Start, SourceMgr::DK_Note, OS.str()); 1352 } 1353 } 1354 } 1355 1356 void Pattern::printVariableDefs(const SourceMgr &SM, 1357 FileCheckDiag::MatchType MatchTy, 1358 std::vector<FileCheckDiag> *Diags) const { 1359 if (VariableDefs.empty() && NumericVariableDefs.empty()) 1360 return; 1361 // Build list of variable captures. 1362 struct VarCapture { 1363 StringRef Name; 1364 SMRange Range; 1365 }; 1366 SmallVector<VarCapture, 2> VarCaptures; 1367 for (const auto &VariableDef : VariableDefs) { 1368 VarCapture VC; 1369 VC.Name = VariableDef.first; 1370 StringRef Value = Context->GlobalVariableTable[VC.Name]; 1371 SMLoc Start = SMLoc::getFromPointer(Value.data()); 1372 SMLoc End = SMLoc::getFromPointer(Value.data() + Value.size()); 1373 VC.Range = SMRange(Start, End); 1374 VarCaptures.push_back(VC); 1375 } 1376 for (const auto &VariableDef : NumericVariableDefs) { 1377 VarCapture VC; 1378 VC.Name = VariableDef.getKey(); 1379 StringRef StrValue = VariableDef.getValue() 1380 .DefinedNumericVariable->getStringValue() 1381 .getValue(); 1382 SMLoc Start = SMLoc::getFromPointer(StrValue.data()); 1383 SMLoc End = SMLoc::getFromPointer(StrValue.data() + StrValue.size()); 1384 VC.Range = SMRange(Start, End); 1385 VarCaptures.push_back(VC); 1386 } 1387 // Sort variable captures by the order in which they matched the input. 1388 // Ranges shouldn't be overlapping, so we can just compare the start. 1389 std::sort(VarCaptures.begin(), VarCaptures.end(), 1390 [](const VarCapture &A, const VarCapture &B) { 1391 assert(A.Range.Start != B.Range.Start && 1392 "unexpected overlapping variable captures"); 1393 return A.Range.Start.getPointer() < B.Range.Start.getPointer(); 1394 }); 1395 // Create notes for the sorted captures. 1396 for (const VarCapture &VC : VarCaptures) { 1397 SmallString<256> Msg; 1398 raw_svector_ostream OS(Msg); 1399 OS << "captured var \"" << VC.Name << "\""; 1400 if (Diags) 1401 Diags->emplace_back(SM, CheckTy, getLoc(), MatchTy, VC.Range, OS.str()); 1402 else 1403 SM.PrintMessage(VC.Range.Start, SourceMgr::DK_Note, OS.str(), VC.Range); 1404 } 1405 } 1406 1407 static SMRange ProcessMatchResult(FileCheckDiag::MatchType MatchTy, 1408 const SourceMgr &SM, SMLoc Loc, 1409 Check::FileCheckType CheckTy, 1410 StringRef Buffer, size_t Pos, size_t Len, 1411 std::vector<FileCheckDiag> *Diags, 1412 bool AdjustPrevDiags = false) { 1413 SMLoc Start = SMLoc::getFromPointer(Buffer.data() + Pos); 1414 SMLoc End = SMLoc::getFromPointer(Buffer.data() + Pos + Len); 1415 SMRange Range(Start, End); 1416 if (Diags) { 1417 if (AdjustPrevDiags) { 1418 SMLoc CheckLoc = Diags->rbegin()->CheckLoc; 1419 for (auto I = Diags->rbegin(), E = Diags->rend(); 1420 I != E && I->CheckLoc == CheckLoc; ++I) 1421 I->MatchTy = MatchTy; 1422 } else 1423 Diags->emplace_back(SM, CheckTy, Loc, MatchTy, Range); 1424 } 1425 return Range; 1426 } 1427 1428 void Pattern::printFuzzyMatch(const SourceMgr &SM, StringRef Buffer, 1429 std::vector<FileCheckDiag> *Diags) const { 1430 // Attempt to find the closest/best fuzzy match. Usually an error happens 1431 // because some string in the output didn't exactly match. In these cases, we 1432 // would like to show the user a best guess at what "should have" matched, to 1433 // save them having to actually check the input manually. 1434 size_t NumLinesForward = 0; 1435 size_t Best = StringRef::npos; 1436 double BestQuality = 0; 1437 1438 // Use an arbitrary 4k limit on how far we will search. 1439 for (size_t i = 0, e = std::min(size_t(4096), Buffer.size()); i != e; ++i) { 1440 if (Buffer[i] == '\n') 1441 ++NumLinesForward; 1442 1443 // Patterns have leading whitespace stripped, so skip whitespace when 1444 // looking for something which looks like a pattern. 1445 if (Buffer[i] == ' ' || Buffer[i] == '\t') 1446 continue; 1447 1448 // Compute the "quality" of this match as an arbitrary combination of the 1449 // match distance and the number of lines skipped to get to this match. 1450 unsigned Distance = computeMatchDistance(Buffer.substr(i)); 1451 double Quality = Distance + (NumLinesForward / 100.); 1452 1453 if (Quality < BestQuality || Best == StringRef::npos) { 1454 Best = i; 1455 BestQuality = Quality; 1456 } 1457 } 1458 1459 // Print the "possible intended match here" line if we found something 1460 // reasonable and not equal to what we showed in the "scanning from here" 1461 // line. 1462 if (Best && Best != StringRef::npos && BestQuality < 50) { 1463 SMRange MatchRange = 1464 ProcessMatchResult(FileCheckDiag::MatchFuzzy, SM, getLoc(), 1465 getCheckTy(), Buffer, Best, 0, Diags); 1466 SM.PrintMessage(MatchRange.Start, SourceMgr::DK_Note, 1467 "possible intended match here"); 1468 1469 // FIXME: If we wanted to be really friendly we would show why the match 1470 // failed, as it can be hard to spot simple one character differences. 1471 } 1472 } 1473 1474 Expected<StringRef> 1475 FileCheckPatternContext::getPatternVarValue(StringRef VarName) { 1476 auto VarIter = GlobalVariableTable.find(VarName); 1477 if (VarIter == GlobalVariableTable.end()) 1478 return make_error<UndefVarError>(VarName); 1479 1480 return VarIter->second; 1481 } 1482 1483 template <class... Types> 1484 NumericVariable *FileCheckPatternContext::makeNumericVariable(Types... args) { 1485 NumericVariables.push_back(std::make_unique<NumericVariable>(args...)); 1486 return NumericVariables.back().get(); 1487 } 1488 1489 Substitution * 1490 FileCheckPatternContext::makeStringSubstitution(StringRef VarName, 1491 size_t InsertIdx) { 1492 Substitutions.push_back( 1493 std::make_unique<StringSubstitution>(this, VarName, InsertIdx)); 1494 return Substitutions.back().get(); 1495 } 1496 1497 Substitution *FileCheckPatternContext::makeNumericSubstitution( 1498 StringRef ExpressionStr, std::unique_ptr<Expression> Expression, 1499 size_t InsertIdx) { 1500 Substitutions.push_back(std::make_unique<NumericSubstitution>( 1501 this, ExpressionStr, std::move(Expression), InsertIdx)); 1502 return Substitutions.back().get(); 1503 } 1504 1505 size_t Pattern::FindRegexVarEnd(StringRef Str, SourceMgr &SM) { 1506 // Offset keeps track of the current offset within the input Str 1507 size_t Offset = 0; 1508 // [...] Nesting depth 1509 size_t BracketDepth = 0; 1510 1511 while (!Str.empty()) { 1512 if (Str.startswith("]]") && BracketDepth == 0) 1513 return Offset; 1514 if (Str[0] == '\\') { 1515 // Backslash escapes the next char within regexes, so skip them both. 1516 Str = Str.substr(2); 1517 Offset += 2; 1518 } else { 1519 switch (Str[0]) { 1520 default: 1521 break; 1522 case '[': 1523 BracketDepth++; 1524 break; 1525 case ']': 1526 if (BracketDepth == 0) { 1527 SM.PrintMessage(SMLoc::getFromPointer(Str.data()), 1528 SourceMgr::DK_Error, 1529 "missing closing \"]\" for regex variable"); 1530 exit(1); 1531 } 1532 BracketDepth--; 1533 break; 1534 } 1535 Str = Str.substr(1); 1536 Offset++; 1537 } 1538 } 1539 1540 return StringRef::npos; 1541 } 1542 1543 StringRef FileCheck::CanonicalizeFile(MemoryBuffer &MB, 1544 SmallVectorImpl<char> &OutputBuffer) { 1545 OutputBuffer.reserve(MB.getBufferSize()); 1546 1547 for (const char *Ptr = MB.getBufferStart(), *End = MB.getBufferEnd(); 1548 Ptr != End; ++Ptr) { 1549 // Eliminate trailing dosish \r. 1550 if (Ptr <= End - 2 && Ptr[0] == '\r' && Ptr[1] == '\n') { 1551 continue; 1552 } 1553 1554 // If current char is not a horizontal whitespace or if horizontal 1555 // whitespace canonicalization is disabled, dump it to output as is. 1556 if (Req.NoCanonicalizeWhiteSpace || (*Ptr != ' ' && *Ptr != '\t')) { 1557 OutputBuffer.push_back(*Ptr); 1558 continue; 1559 } 1560 1561 // Otherwise, add one space and advance over neighboring space. 1562 OutputBuffer.push_back(' '); 1563 while (Ptr + 1 != End && (Ptr[1] == ' ' || Ptr[1] == '\t')) 1564 ++Ptr; 1565 } 1566 1567 // Add a null byte and then return all but that byte. 1568 OutputBuffer.push_back('\0'); 1569 return StringRef(OutputBuffer.data(), OutputBuffer.size() - 1); 1570 } 1571 1572 FileCheckDiag::FileCheckDiag(const SourceMgr &SM, 1573 const Check::FileCheckType &CheckTy, 1574 SMLoc CheckLoc, MatchType MatchTy, 1575 SMRange InputRange, StringRef Note) 1576 : CheckTy(CheckTy), CheckLoc(CheckLoc), MatchTy(MatchTy), Note(Note) { 1577 auto Start = SM.getLineAndColumn(InputRange.Start); 1578 auto End = SM.getLineAndColumn(InputRange.End); 1579 InputStartLine = Start.first; 1580 InputStartCol = Start.second; 1581 InputEndLine = End.first; 1582 InputEndCol = End.second; 1583 } 1584 1585 static bool IsPartOfWord(char c) { 1586 return (isAlnum(c) || c == '-' || c == '_'); 1587 } 1588 1589 Check::FileCheckType &Check::FileCheckType::setCount(int C) { 1590 assert(Count > 0 && "zero and negative counts are not supported"); 1591 assert((C == 1 || Kind == CheckPlain) && 1592 "count supported only for plain CHECK directives"); 1593 Count = C; 1594 return *this; 1595 } 1596 1597 std::string Check::FileCheckType::getModifiersDescription() const { 1598 if (Modifiers.none()) 1599 return ""; 1600 std::string Ret; 1601 raw_string_ostream OS(Ret); 1602 OS << '{'; 1603 if (isLiteralMatch()) 1604 OS << "LITERAL"; 1605 OS << '}'; 1606 return OS.str(); 1607 } 1608 1609 std::string Check::FileCheckType::getDescription(StringRef Prefix) const { 1610 // Append directive modifiers. 1611 auto WithModifiers = [this, Prefix](StringRef Str) -> std::string { 1612 return (Prefix + Str + getModifiersDescription()).str(); 1613 }; 1614 1615 switch (Kind) { 1616 case Check::CheckNone: 1617 return "invalid"; 1618 case Check::CheckPlain: 1619 if (Count > 1) 1620 return WithModifiers("-COUNT"); 1621 return WithModifiers(""); 1622 case Check::CheckNext: 1623 return WithModifiers("-NEXT"); 1624 case Check::CheckSame: 1625 return WithModifiers("-SAME"); 1626 case Check::CheckNot: 1627 return WithModifiers("-NOT"); 1628 case Check::CheckDAG: 1629 return WithModifiers("-DAG"); 1630 case Check::CheckLabel: 1631 return WithModifiers("-LABEL"); 1632 case Check::CheckEmpty: 1633 return WithModifiers("-EMPTY"); 1634 case Check::CheckComment: 1635 return std::string(Prefix); 1636 case Check::CheckEOF: 1637 return "implicit EOF"; 1638 case Check::CheckBadNot: 1639 return "bad NOT"; 1640 case Check::CheckBadCount: 1641 return "bad COUNT"; 1642 } 1643 llvm_unreachable("unknown FileCheckType"); 1644 } 1645 1646 static std::pair<Check::FileCheckType, StringRef> 1647 FindCheckType(const FileCheckRequest &Req, StringRef Buffer, StringRef Prefix) { 1648 if (Buffer.size() <= Prefix.size()) 1649 return {Check::CheckNone, StringRef()}; 1650 1651 StringRef Rest = Buffer.drop_front(Prefix.size()); 1652 // Check for comment. 1653 if (llvm::is_contained(Req.CommentPrefixes, Prefix)) { 1654 if (Rest.consume_front(":")) 1655 return {Check::CheckComment, Rest}; 1656 // Ignore a comment prefix if it has a suffix like "-NOT". 1657 return {Check::CheckNone, StringRef()}; 1658 } 1659 1660 auto ConsumeModifiers = [&](Check::FileCheckType Ret) 1661 -> std::pair<Check::FileCheckType, StringRef> { 1662 if (Rest.consume_front(":")) 1663 return {Ret, Rest}; 1664 if (!Rest.consume_front("{")) 1665 return {Check::CheckNone, StringRef()}; 1666 1667 // Parse the modifiers, speparated by commas. 1668 do { 1669 // Allow whitespace in modifiers list. 1670 Rest = Rest.ltrim(); 1671 if (Rest.consume_front("LITERAL")) 1672 Ret.setLiteralMatch(); 1673 else 1674 return {Check::CheckNone, Rest}; 1675 // Allow whitespace in modifiers list. 1676 Rest = Rest.ltrim(); 1677 } while (Rest.consume_front(",")); 1678 if (!Rest.consume_front("}:")) 1679 return {Check::CheckNone, Rest}; 1680 return {Ret, Rest}; 1681 }; 1682 1683 // Verify that the prefix is followed by directive modifiers or a colon. 1684 if (Rest.consume_front(":")) 1685 return {Check::CheckPlain, Rest}; 1686 if (Rest.front() == '{') 1687 return ConsumeModifiers(Check::CheckPlain); 1688 1689 if (!Rest.consume_front("-")) 1690 return {Check::CheckNone, StringRef()}; 1691 1692 if (Rest.consume_front("COUNT-")) { 1693 int64_t Count; 1694 if (Rest.consumeInteger(10, Count)) 1695 // Error happened in parsing integer. 1696 return {Check::CheckBadCount, Rest}; 1697 if (Count <= 0 || Count > INT32_MAX) 1698 return {Check::CheckBadCount, Rest}; 1699 if (Rest.front() != ':' && Rest.front() != '{') 1700 return {Check::CheckBadCount, Rest}; 1701 return ConsumeModifiers( 1702 Check::FileCheckType(Check::CheckPlain).setCount(Count)); 1703 } 1704 1705 // You can't combine -NOT with another suffix. 1706 if (Rest.startswith("DAG-NOT:") || Rest.startswith("NOT-DAG:") || 1707 Rest.startswith("NEXT-NOT:") || Rest.startswith("NOT-NEXT:") || 1708 Rest.startswith("SAME-NOT:") || Rest.startswith("NOT-SAME:") || 1709 Rest.startswith("EMPTY-NOT:") || Rest.startswith("NOT-EMPTY:")) 1710 return {Check::CheckBadNot, Rest}; 1711 1712 if (Rest.consume_front("NEXT")) 1713 return ConsumeModifiers(Check::CheckNext); 1714 1715 if (Rest.consume_front("SAME")) 1716 return ConsumeModifiers(Check::CheckSame); 1717 1718 if (Rest.consume_front("NOT")) 1719 return ConsumeModifiers(Check::CheckNot); 1720 1721 if (Rest.consume_front("DAG")) 1722 return ConsumeModifiers(Check::CheckDAG); 1723 1724 if (Rest.consume_front("LABEL")) 1725 return ConsumeModifiers(Check::CheckLabel); 1726 1727 if (Rest.consume_front("EMPTY")) 1728 return ConsumeModifiers(Check::CheckEmpty); 1729 1730 return {Check::CheckNone, Rest}; 1731 } 1732 1733 // From the given position, find the next character after the word. 1734 static size_t SkipWord(StringRef Str, size_t Loc) { 1735 while (Loc < Str.size() && IsPartOfWord(Str[Loc])) 1736 ++Loc; 1737 return Loc; 1738 } 1739 1740 /// Searches the buffer for the first prefix in the prefix regular expression. 1741 /// 1742 /// This searches the buffer using the provided regular expression, however it 1743 /// enforces constraints beyond that: 1744 /// 1) The found prefix must not be a suffix of something that looks like 1745 /// a valid prefix. 1746 /// 2) The found prefix must be followed by a valid check type suffix using \c 1747 /// FindCheckType above. 1748 /// 1749 /// \returns a pair of StringRefs into the Buffer, which combines: 1750 /// - the first match of the regular expression to satisfy these two is 1751 /// returned, 1752 /// otherwise an empty StringRef is returned to indicate failure. 1753 /// - buffer rewound to the location right after parsed suffix, for parsing 1754 /// to continue from 1755 /// 1756 /// If this routine returns a valid prefix, it will also shrink \p Buffer to 1757 /// start at the beginning of the returned prefix, increment \p LineNumber for 1758 /// each new line consumed from \p Buffer, and set \p CheckTy to the type of 1759 /// check found by examining the suffix. 1760 /// 1761 /// If no valid prefix is found, the state of Buffer, LineNumber, and CheckTy 1762 /// is unspecified. 1763 static std::pair<StringRef, StringRef> 1764 FindFirstMatchingPrefix(const FileCheckRequest &Req, Regex &PrefixRE, 1765 StringRef &Buffer, unsigned &LineNumber, 1766 Check::FileCheckType &CheckTy) { 1767 SmallVector<StringRef, 2> Matches; 1768 1769 while (!Buffer.empty()) { 1770 // Find the first (longest) match using the RE. 1771 if (!PrefixRE.match(Buffer, &Matches)) 1772 // No match at all, bail. 1773 return {StringRef(), StringRef()}; 1774 1775 StringRef Prefix = Matches[0]; 1776 Matches.clear(); 1777 1778 assert(Prefix.data() >= Buffer.data() && 1779 Prefix.data() < Buffer.data() + Buffer.size() && 1780 "Prefix doesn't start inside of buffer!"); 1781 size_t Loc = Prefix.data() - Buffer.data(); 1782 StringRef Skipped = Buffer.substr(0, Loc); 1783 Buffer = Buffer.drop_front(Loc); 1784 LineNumber += Skipped.count('\n'); 1785 1786 // Check that the matched prefix isn't a suffix of some other check-like 1787 // word. 1788 // FIXME: This is a very ad-hoc check. it would be better handled in some 1789 // other way. Among other things it seems hard to distinguish between 1790 // intentional and unintentional uses of this feature. 1791 if (Skipped.empty() || !IsPartOfWord(Skipped.back())) { 1792 // Now extract the type. 1793 StringRef AfterSuffix; 1794 std::tie(CheckTy, AfterSuffix) = FindCheckType(Req, Buffer, Prefix); 1795 1796 // If we've found a valid check type for this prefix, we're done. 1797 if (CheckTy != Check::CheckNone) 1798 return {Prefix, AfterSuffix}; 1799 } 1800 1801 // If we didn't successfully find a prefix, we need to skip this invalid 1802 // prefix and continue scanning. We directly skip the prefix that was 1803 // matched and any additional parts of that check-like word. 1804 Buffer = Buffer.drop_front(SkipWord(Buffer, Prefix.size())); 1805 } 1806 1807 // We ran out of buffer while skipping partial matches so give up. 1808 return {StringRef(), StringRef()}; 1809 } 1810 1811 void FileCheckPatternContext::createLineVariable() { 1812 assert(!LineVariable && "@LINE pseudo numeric variable already created"); 1813 StringRef LineName = "@LINE"; 1814 LineVariable = makeNumericVariable( 1815 LineName, ExpressionFormat(ExpressionFormat::Kind::Unsigned)); 1816 GlobalNumericVariableTable[LineName] = LineVariable; 1817 } 1818 1819 FileCheck::FileCheck(FileCheckRequest Req) 1820 : Req(Req), PatternContext(std::make_unique<FileCheckPatternContext>()), 1821 CheckStrings(std::make_unique<std::vector<FileCheckString>>()) {} 1822 1823 FileCheck::~FileCheck() = default; 1824 1825 bool FileCheck::readCheckFile( 1826 SourceMgr &SM, StringRef Buffer, Regex &PrefixRE, 1827 std::pair<unsigned, unsigned> *ImpPatBufferIDRange) { 1828 if (ImpPatBufferIDRange) 1829 ImpPatBufferIDRange->first = ImpPatBufferIDRange->second = 0; 1830 1831 Error DefineError = 1832 PatternContext->defineCmdlineVariables(Req.GlobalDefines, SM); 1833 if (DefineError) { 1834 logAllUnhandledErrors(std::move(DefineError), errs()); 1835 return true; 1836 } 1837 1838 PatternContext->createLineVariable(); 1839 1840 std::vector<Pattern> ImplicitNegativeChecks; 1841 for (StringRef PatternString : Req.ImplicitCheckNot) { 1842 // Create a buffer with fake command line content in order to display the 1843 // command line option responsible for the specific implicit CHECK-NOT. 1844 std::string Prefix = "-implicit-check-not='"; 1845 std::string Suffix = "'"; 1846 std::unique_ptr<MemoryBuffer> CmdLine = MemoryBuffer::getMemBufferCopy( 1847 (Prefix + PatternString + Suffix).str(), "command line"); 1848 1849 StringRef PatternInBuffer = 1850 CmdLine->getBuffer().substr(Prefix.size(), PatternString.size()); 1851 unsigned BufferID = SM.AddNewSourceBuffer(std::move(CmdLine), SMLoc()); 1852 if (ImpPatBufferIDRange) { 1853 if (ImpPatBufferIDRange->first == ImpPatBufferIDRange->second) { 1854 ImpPatBufferIDRange->first = BufferID; 1855 ImpPatBufferIDRange->second = BufferID + 1; 1856 } else { 1857 assert(BufferID == ImpPatBufferIDRange->second && 1858 "expected consecutive source buffer IDs"); 1859 ++ImpPatBufferIDRange->second; 1860 } 1861 } 1862 1863 ImplicitNegativeChecks.push_back( 1864 Pattern(Check::CheckNot, PatternContext.get())); 1865 ImplicitNegativeChecks.back().parsePattern(PatternInBuffer, 1866 "IMPLICIT-CHECK", SM, Req); 1867 } 1868 1869 std::vector<Pattern> DagNotMatches = ImplicitNegativeChecks; 1870 1871 // LineNumber keeps track of the line on which CheckPrefix instances are 1872 // found. 1873 unsigned LineNumber = 1; 1874 1875 std::set<StringRef> PrefixesNotFound(Req.CheckPrefixes.begin(), 1876 Req.CheckPrefixes.end()); 1877 const size_t DistinctPrefixes = PrefixesNotFound.size(); 1878 while (true) { 1879 Check::FileCheckType CheckTy; 1880 1881 // See if a prefix occurs in the memory buffer. 1882 StringRef UsedPrefix; 1883 StringRef AfterSuffix; 1884 std::tie(UsedPrefix, AfterSuffix) = 1885 FindFirstMatchingPrefix(Req, PrefixRE, Buffer, LineNumber, CheckTy); 1886 if (UsedPrefix.empty()) 1887 break; 1888 if (CheckTy != Check::CheckComment) 1889 PrefixesNotFound.erase(UsedPrefix); 1890 1891 assert(UsedPrefix.data() == Buffer.data() && 1892 "Failed to move Buffer's start forward, or pointed prefix outside " 1893 "of the buffer!"); 1894 assert(AfterSuffix.data() >= Buffer.data() && 1895 AfterSuffix.data() < Buffer.data() + Buffer.size() && 1896 "Parsing after suffix doesn't start inside of buffer!"); 1897 1898 // Location to use for error messages. 1899 const char *UsedPrefixStart = UsedPrefix.data(); 1900 1901 // Skip the buffer to the end of parsed suffix (or just prefix, if no good 1902 // suffix was processed). 1903 Buffer = AfterSuffix.empty() ? Buffer.drop_front(UsedPrefix.size()) 1904 : AfterSuffix; 1905 1906 // Complain about useful-looking but unsupported suffixes. 1907 if (CheckTy == Check::CheckBadNot) { 1908 SM.PrintMessage(SMLoc::getFromPointer(Buffer.data()), SourceMgr::DK_Error, 1909 "unsupported -NOT combo on prefix '" + UsedPrefix + "'"); 1910 return true; 1911 } 1912 1913 // Complain about invalid count specification. 1914 if (CheckTy == Check::CheckBadCount) { 1915 SM.PrintMessage(SMLoc::getFromPointer(Buffer.data()), SourceMgr::DK_Error, 1916 "invalid count in -COUNT specification on prefix '" + 1917 UsedPrefix + "'"); 1918 return true; 1919 } 1920 1921 // Okay, we found the prefix, yay. Remember the rest of the line, but ignore 1922 // leading whitespace. 1923 if (!(Req.NoCanonicalizeWhiteSpace && Req.MatchFullLines)) 1924 Buffer = Buffer.substr(Buffer.find_first_not_of(" \t")); 1925 1926 // Scan ahead to the end of line. 1927 size_t EOL = Buffer.find_first_of("\n\r"); 1928 1929 // Remember the location of the start of the pattern, for diagnostics. 1930 SMLoc PatternLoc = SMLoc::getFromPointer(Buffer.data()); 1931 1932 // Extract the pattern from the buffer. 1933 StringRef PatternBuffer = Buffer.substr(0, EOL); 1934 Buffer = Buffer.substr(EOL); 1935 1936 // If this is a comment, we're done. 1937 if (CheckTy == Check::CheckComment) 1938 continue; 1939 1940 // Parse the pattern. 1941 Pattern P(CheckTy, PatternContext.get(), LineNumber); 1942 if (P.parsePattern(PatternBuffer, UsedPrefix, SM, Req)) 1943 return true; 1944 1945 // Verify that CHECK-LABEL lines do not define or use variables 1946 if ((CheckTy == Check::CheckLabel) && P.hasVariable()) { 1947 SM.PrintMessage( 1948 SMLoc::getFromPointer(UsedPrefixStart), SourceMgr::DK_Error, 1949 "found '" + UsedPrefix + "-LABEL:'" 1950 " with variable definition or use"); 1951 return true; 1952 } 1953 1954 // Verify that CHECK-NEXT/SAME/EMPTY lines have at least one CHECK line before them. 1955 if ((CheckTy == Check::CheckNext || CheckTy == Check::CheckSame || 1956 CheckTy == Check::CheckEmpty) && 1957 CheckStrings->empty()) { 1958 StringRef Type = CheckTy == Check::CheckNext 1959 ? "NEXT" 1960 : CheckTy == Check::CheckEmpty ? "EMPTY" : "SAME"; 1961 SM.PrintMessage(SMLoc::getFromPointer(UsedPrefixStart), 1962 SourceMgr::DK_Error, 1963 "found '" + UsedPrefix + "-" + Type + 1964 "' without previous '" + UsedPrefix + ": line"); 1965 return true; 1966 } 1967 1968 // Handle CHECK-DAG/-NOT. 1969 if (CheckTy == Check::CheckDAG || CheckTy == Check::CheckNot) { 1970 DagNotMatches.push_back(P); 1971 continue; 1972 } 1973 1974 // Okay, add the string we captured to the output vector and move on. 1975 CheckStrings->emplace_back(P, UsedPrefix, PatternLoc); 1976 std::swap(DagNotMatches, CheckStrings->back().DagNotStrings); 1977 DagNotMatches = ImplicitNegativeChecks; 1978 } 1979 1980 // When there are no used prefixes we report an error except in the case that 1981 // no prefix is specified explicitly but -implicit-check-not is specified. 1982 const bool NoPrefixesFound = PrefixesNotFound.size() == DistinctPrefixes; 1983 const bool SomePrefixesUnexpectedlyNotUsed = 1984 !Req.AllowUnusedPrefixes && !PrefixesNotFound.empty(); 1985 if ((NoPrefixesFound || SomePrefixesUnexpectedlyNotUsed) && 1986 (ImplicitNegativeChecks.empty() || !Req.IsDefaultCheckPrefix)) { 1987 errs() << "error: no check strings found with prefix" 1988 << (PrefixesNotFound.size() > 1 ? "es " : " "); 1989 bool First = true; 1990 for (StringRef MissingPrefix : PrefixesNotFound) { 1991 if (!First) 1992 errs() << ", "; 1993 errs() << "\'" << MissingPrefix << ":'"; 1994 First = false; 1995 } 1996 errs() << '\n'; 1997 return true; 1998 } 1999 2000 // Add an EOF pattern for any trailing --implicit-check-not/CHECK-DAG/-NOTs, 2001 // and use the first prefix as a filler for the error message. 2002 if (!DagNotMatches.empty()) { 2003 CheckStrings->emplace_back( 2004 Pattern(Check::CheckEOF, PatternContext.get(), LineNumber + 1), 2005 *Req.CheckPrefixes.begin(), SMLoc::getFromPointer(Buffer.data())); 2006 std::swap(DagNotMatches, CheckStrings->back().DagNotStrings); 2007 } 2008 2009 return false; 2010 } 2011 2012 static void PrintMatch(bool ExpectedMatch, const SourceMgr &SM, 2013 StringRef Prefix, SMLoc Loc, const Pattern &Pat, 2014 int MatchedCount, StringRef Buffer, size_t MatchPos, 2015 size_t MatchLen, const FileCheckRequest &Req, 2016 std::vector<FileCheckDiag> *Diags) { 2017 bool PrintDiag = true; 2018 if (ExpectedMatch) { 2019 if (!Req.Verbose) 2020 return; 2021 if (!Req.VerboseVerbose && Pat.getCheckTy() == Check::CheckEOF) 2022 return; 2023 // Due to their verbosity, we don't print verbose diagnostics here if we're 2024 // gathering them for a different rendering, but we always print other 2025 // diagnostics. 2026 PrintDiag = !Diags; 2027 } 2028 FileCheckDiag::MatchType MatchTy = ExpectedMatch 2029 ? FileCheckDiag::MatchFoundAndExpected 2030 : FileCheckDiag::MatchFoundButExcluded; 2031 SMRange MatchRange = ProcessMatchResult(MatchTy, SM, Loc, Pat.getCheckTy(), 2032 Buffer, MatchPos, MatchLen, Diags); 2033 if (Diags) { 2034 Pat.printSubstitutions(SM, Buffer, MatchRange, MatchTy, Diags); 2035 Pat.printVariableDefs(SM, MatchTy, Diags); 2036 } 2037 if (!PrintDiag) 2038 return; 2039 2040 std::string Message = formatv("{0}: {1} string found in input", 2041 Pat.getCheckTy().getDescription(Prefix), 2042 (ExpectedMatch ? "expected" : "excluded")) 2043 .str(); 2044 if (Pat.getCount() > 1) 2045 Message += formatv(" ({0} out of {1})", MatchedCount, Pat.getCount()).str(); 2046 2047 SM.PrintMessage( 2048 Loc, ExpectedMatch ? SourceMgr::DK_Remark : SourceMgr::DK_Error, Message); 2049 SM.PrintMessage(MatchRange.Start, SourceMgr::DK_Note, "found here", 2050 {MatchRange}); 2051 Pat.printSubstitutions(SM, Buffer, MatchRange, MatchTy, nullptr); 2052 Pat.printVariableDefs(SM, MatchTy, nullptr); 2053 } 2054 2055 static void PrintMatch(bool ExpectedMatch, const SourceMgr &SM, 2056 const FileCheckString &CheckStr, int MatchedCount, 2057 StringRef Buffer, size_t MatchPos, size_t MatchLen, 2058 FileCheckRequest &Req, 2059 std::vector<FileCheckDiag> *Diags) { 2060 PrintMatch(ExpectedMatch, SM, CheckStr.Prefix, CheckStr.Loc, CheckStr.Pat, 2061 MatchedCount, Buffer, MatchPos, MatchLen, Req, Diags); 2062 } 2063 2064 static void PrintNoMatch(bool ExpectedMatch, const SourceMgr &SM, 2065 StringRef Prefix, SMLoc Loc, const Pattern &Pat, 2066 int MatchedCount, StringRef Buffer, 2067 bool VerboseVerbose, std::vector<FileCheckDiag> *Diags, 2068 Error MatchErrors) { 2069 assert(MatchErrors && "Called on successful match"); 2070 bool PrintDiag = true; 2071 if (!ExpectedMatch) { 2072 if (!VerboseVerbose) { 2073 consumeError(std::move(MatchErrors)); 2074 return; 2075 } 2076 // Due to their verbosity, we don't print verbose diagnostics here if we're 2077 // gathering them for a different rendering, but we always print other 2078 // diagnostics. 2079 PrintDiag = !Diags; 2080 } 2081 2082 // If the current position is at the end of a line, advance to the start of 2083 // the next line. 2084 Buffer = Buffer.substr(Buffer.find_first_not_of(" \t\n\r")); 2085 FileCheckDiag::MatchType MatchTy = ExpectedMatch 2086 ? FileCheckDiag::MatchNoneButExpected 2087 : FileCheckDiag::MatchNoneAndExcluded; 2088 SMRange SearchRange = ProcessMatchResult(MatchTy, SM, Loc, Pat.getCheckTy(), 2089 Buffer, 0, Buffer.size(), Diags); 2090 if (Diags) 2091 Pat.printSubstitutions(SM, Buffer, SearchRange, MatchTy, Diags); 2092 if (!PrintDiag) { 2093 consumeError(std::move(MatchErrors)); 2094 return; 2095 } 2096 2097 MatchErrors = handleErrors(std::move(MatchErrors), 2098 [](const ErrorDiagnostic &E) { E.log(errs()); }); 2099 2100 // No problem matching the string per se. 2101 if (!MatchErrors) 2102 return; 2103 consumeError(std::move(MatchErrors)); 2104 2105 // Print "not found" diagnostic. 2106 std::string Message = formatv("{0}: {1} string not found in input", 2107 Pat.getCheckTy().getDescription(Prefix), 2108 (ExpectedMatch ? "expected" : "excluded")) 2109 .str(); 2110 if (Pat.getCount() > 1) 2111 Message += formatv(" ({0} out of {1})", MatchedCount, Pat.getCount()).str(); 2112 SM.PrintMessage( 2113 Loc, ExpectedMatch ? SourceMgr::DK_Error : SourceMgr::DK_Remark, Message); 2114 2115 // Print the "scanning from here" line. 2116 SM.PrintMessage(SearchRange.Start, SourceMgr::DK_Note, "scanning from here"); 2117 2118 // Allow the pattern to print additional information if desired. 2119 Pat.printSubstitutions(SM, Buffer, SearchRange, MatchTy, nullptr); 2120 2121 if (ExpectedMatch) 2122 Pat.printFuzzyMatch(SM, Buffer, Diags); 2123 } 2124 2125 static void PrintNoMatch(bool ExpectedMatch, const SourceMgr &SM, 2126 const FileCheckString &CheckStr, int MatchedCount, 2127 StringRef Buffer, bool VerboseVerbose, 2128 std::vector<FileCheckDiag> *Diags, Error MatchErrors) { 2129 PrintNoMatch(ExpectedMatch, SM, CheckStr.Prefix, CheckStr.Loc, CheckStr.Pat, 2130 MatchedCount, Buffer, VerboseVerbose, Diags, 2131 std::move(MatchErrors)); 2132 } 2133 2134 /// Counts the number of newlines in the specified range. 2135 static unsigned CountNumNewlinesBetween(StringRef Range, 2136 const char *&FirstNewLine) { 2137 unsigned NumNewLines = 0; 2138 while (1) { 2139 // Scan for newline. 2140 Range = Range.substr(Range.find_first_of("\n\r")); 2141 if (Range.empty()) 2142 return NumNewLines; 2143 2144 ++NumNewLines; 2145 2146 // Handle \n\r and \r\n as a single newline. 2147 if (Range.size() > 1 && (Range[1] == '\n' || Range[1] == '\r') && 2148 (Range[0] != Range[1])) 2149 Range = Range.substr(1); 2150 Range = Range.substr(1); 2151 2152 if (NumNewLines == 1) 2153 FirstNewLine = Range.begin(); 2154 } 2155 } 2156 2157 size_t FileCheckString::Check(const SourceMgr &SM, StringRef Buffer, 2158 bool IsLabelScanMode, size_t &MatchLen, 2159 FileCheckRequest &Req, 2160 std::vector<FileCheckDiag> *Diags) const { 2161 size_t LastPos = 0; 2162 std::vector<const Pattern *> NotStrings; 2163 2164 // IsLabelScanMode is true when we are scanning forward to find CHECK-LABEL 2165 // bounds; we have not processed variable definitions within the bounded block 2166 // yet so cannot handle any final CHECK-DAG yet; this is handled when going 2167 // over the block again (including the last CHECK-LABEL) in normal mode. 2168 if (!IsLabelScanMode) { 2169 // Match "dag strings" (with mixed "not strings" if any). 2170 LastPos = CheckDag(SM, Buffer, NotStrings, Req, Diags); 2171 if (LastPos == StringRef::npos) 2172 return StringRef::npos; 2173 } 2174 2175 // Match itself from the last position after matching CHECK-DAG. 2176 size_t LastMatchEnd = LastPos; 2177 size_t FirstMatchPos = 0; 2178 // Go match the pattern Count times. Majority of patterns only match with 2179 // count 1 though. 2180 assert(Pat.getCount() != 0 && "pattern count can not be zero"); 2181 for (int i = 1; i <= Pat.getCount(); i++) { 2182 StringRef MatchBuffer = Buffer.substr(LastMatchEnd); 2183 size_t CurrentMatchLen; 2184 // get a match at current start point 2185 Expected<size_t> MatchResult = Pat.match(MatchBuffer, CurrentMatchLen, SM); 2186 2187 // report 2188 if (!MatchResult) { 2189 PrintNoMatch(true, SM, *this, i, MatchBuffer, Req.VerboseVerbose, Diags, 2190 MatchResult.takeError()); 2191 return StringRef::npos; 2192 } 2193 size_t MatchPos = *MatchResult; 2194 PrintMatch(true, SM, *this, i, MatchBuffer, MatchPos, CurrentMatchLen, Req, 2195 Diags); 2196 if (i == 1) 2197 FirstMatchPos = LastPos + MatchPos; 2198 2199 // move start point after the match 2200 LastMatchEnd += MatchPos + CurrentMatchLen; 2201 } 2202 // Full match len counts from first match pos. 2203 MatchLen = LastMatchEnd - FirstMatchPos; 2204 2205 // Similar to the above, in "label-scan mode" we can't yet handle CHECK-NEXT 2206 // or CHECK-NOT 2207 if (!IsLabelScanMode) { 2208 size_t MatchPos = FirstMatchPos - LastPos; 2209 StringRef MatchBuffer = Buffer.substr(LastPos); 2210 StringRef SkippedRegion = Buffer.substr(LastPos, MatchPos); 2211 2212 // If this check is a "CHECK-NEXT", verify that the previous match was on 2213 // the previous line (i.e. that there is one newline between them). 2214 if (CheckNext(SM, SkippedRegion)) { 2215 ProcessMatchResult(FileCheckDiag::MatchFoundButWrongLine, SM, Loc, 2216 Pat.getCheckTy(), MatchBuffer, MatchPos, MatchLen, 2217 Diags, Req.Verbose); 2218 return StringRef::npos; 2219 } 2220 2221 // If this check is a "CHECK-SAME", verify that the previous match was on 2222 // the same line (i.e. that there is no newline between them). 2223 if (CheckSame(SM, SkippedRegion)) { 2224 ProcessMatchResult(FileCheckDiag::MatchFoundButWrongLine, SM, Loc, 2225 Pat.getCheckTy(), MatchBuffer, MatchPos, MatchLen, 2226 Diags, Req.Verbose); 2227 return StringRef::npos; 2228 } 2229 2230 // If this match had "not strings", verify that they don't exist in the 2231 // skipped region. 2232 if (CheckNot(SM, SkippedRegion, NotStrings, Req, Diags)) 2233 return StringRef::npos; 2234 } 2235 2236 return FirstMatchPos; 2237 } 2238 2239 bool FileCheckString::CheckNext(const SourceMgr &SM, StringRef Buffer) const { 2240 if (Pat.getCheckTy() != Check::CheckNext && 2241 Pat.getCheckTy() != Check::CheckEmpty) 2242 return false; 2243 2244 Twine CheckName = 2245 Prefix + 2246 Twine(Pat.getCheckTy() == Check::CheckEmpty ? "-EMPTY" : "-NEXT"); 2247 2248 // Count the number of newlines between the previous match and this one. 2249 const char *FirstNewLine = nullptr; 2250 unsigned NumNewLines = CountNumNewlinesBetween(Buffer, FirstNewLine); 2251 2252 if (NumNewLines == 0) { 2253 SM.PrintMessage(Loc, SourceMgr::DK_Error, 2254 CheckName + ": is on the same line as previous match"); 2255 SM.PrintMessage(SMLoc::getFromPointer(Buffer.end()), SourceMgr::DK_Note, 2256 "'next' match was here"); 2257 SM.PrintMessage(SMLoc::getFromPointer(Buffer.data()), SourceMgr::DK_Note, 2258 "previous match ended here"); 2259 return true; 2260 } 2261 2262 if (NumNewLines != 1) { 2263 SM.PrintMessage(Loc, SourceMgr::DK_Error, 2264 CheckName + 2265 ": is not on the line after the previous match"); 2266 SM.PrintMessage(SMLoc::getFromPointer(Buffer.end()), SourceMgr::DK_Note, 2267 "'next' match was here"); 2268 SM.PrintMessage(SMLoc::getFromPointer(Buffer.data()), SourceMgr::DK_Note, 2269 "previous match ended here"); 2270 SM.PrintMessage(SMLoc::getFromPointer(FirstNewLine), SourceMgr::DK_Note, 2271 "non-matching line after previous match is here"); 2272 return true; 2273 } 2274 2275 return false; 2276 } 2277 2278 bool FileCheckString::CheckSame(const SourceMgr &SM, StringRef Buffer) const { 2279 if (Pat.getCheckTy() != Check::CheckSame) 2280 return false; 2281 2282 // Count the number of newlines between the previous match and this one. 2283 const char *FirstNewLine = nullptr; 2284 unsigned NumNewLines = CountNumNewlinesBetween(Buffer, FirstNewLine); 2285 2286 if (NumNewLines != 0) { 2287 SM.PrintMessage(Loc, SourceMgr::DK_Error, 2288 Prefix + 2289 "-SAME: is not on the same line as the previous match"); 2290 SM.PrintMessage(SMLoc::getFromPointer(Buffer.end()), SourceMgr::DK_Note, 2291 "'next' match was here"); 2292 SM.PrintMessage(SMLoc::getFromPointer(Buffer.data()), SourceMgr::DK_Note, 2293 "previous match ended here"); 2294 return true; 2295 } 2296 2297 return false; 2298 } 2299 2300 bool FileCheckString::CheckNot(const SourceMgr &SM, StringRef Buffer, 2301 const std::vector<const Pattern *> &NotStrings, 2302 const FileCheckRequest &Req, 2303 std::vector<FileCheckDiag> *Diags) const { 2304 bool DirectiveFail = false; 2305 for (const Pattern *Pat : NotStrings) { 2306 assert((Pat->getCheckTy() == Check::CheckNot) && "Expect CHECK-NOT!"); 2307 2308 size_t MatchLen = 0; 2309 Expected<size_t> MatchResult = Pat->match(Buffer, MatchLen, SM); 2310 2311 if (!MatchResult) { 2312 PrintNoMatch(false, SM, Prefix, Pat->getLoc(), *Pat, 1, Buffer, 2313 Req.VerboseVerbose, Diags, MatchResult.takeError()); 2314 continue; 2315 } 2316 size_t Pos = *MatchResult; 2317 2318 PrintMatch(false, SM, Prefix, Pat->getLoc(), *Pat, 1, Buffer, Pos, MatchLen, 2319 Req, Diags); 2320 DirectiveFail = true; 2321 } 2322 2323 return DirectiveFail; 2324 } 2325 2326 size_t FileCheckString::CheckDag(const SourceMgr &SM, StringRef Buffer, 2327 std::vector<const Pattern *> &NotStrings, 2328 const FileCheckRequest &Req, 2329 std::vector<FileCheckDiag> *Diags) const { 2330 if (DagNotStrings.empty()) 2331 return 0; 2332 2333 // The start of the search range. 2334 size_t StartPos = 0; 2335 2336 struct MatchRange { 2337 size_t Pos; 2338 size_t End; 2339 }; 2340 // A sorted list of ranges for non-overlapping CHECK-DAG matches. Match 2341 // ranges are erased from this list once they are no longer in the search 2342 // range. 2343 std::list<MatchRange> MatchRanges; 2344 2345 // We need PatItr and PatEnd later for detecting the end of a CHECK-DAG 2346 // group, so we don't use a range-based for loop here. 2347 for (auto PatItr = DagNotStrings.begin(), PatEnd = DagNotStrings.end(); 2348 PatItr != PatEnd; ++PatItr) { 2349 const Pattern &Pat = *PatItr; 2350 assert((Pat.getCheckTy() == Check::CheckDAG || 2351 Pat.getCheckTy() == Check::CheckNot) && 2352 "Invalid CHECK-DAG or CHECK-NOT!"); 2353 2354 if (Pat.getCheckTy() == Check::CheckNot) { 2355 NotStrings.push_back(&Pat); 2356 continue; 2357 } 2358 2359 assert((Pat.getCheckTy() == Check::CheckDAG) && "Expect CHECK-DAG!"); 2360 2361 // CHECK-DAG always matches from the start. 2362 size_t MatchLen = 0, MatchPos = StartPos; 2363 2364 // Search for a match that doesn't overlap a previous match in this 2365 // CHECK-DAG group. 2366 for (auto MI = MatchRanges.begin(), ME = MatchRanges.end(); true; ++MI) { 2367 StringRef MatchBuffer = Buffer.substr(MatchPos); 2368 Expected<size_t> MatchResult = Pat.match(MatchBuffer, MatchLen, SM); 2369 // With a group of CHECK-DAGs, a single mismatching means the match on 2370 // that group of CHECK-DAGs fails immediately. 2371 if (!MatchResult) { 2372 PrintNoMatch(true, SM, Prefix, Pat.getLoc(), Pat, 1, MatchBuffer, 2373 Req.VerboseVerbose, Diags, MatchResult.takeError()); 2374 return StringRef::npos; 2375 } 2376 size_t MatchPosBuf = *MatchResult; 2377 // Re-calc it as the offset relative to the start of the original string. 2378 MatchPos += MatchPosBuf; 2379 if (Req.VerboseVerbose) 2380 PrintMatch(true, SM, Prefix, Pat.getLoc(), Pat, 1, Buffer, MatchPos, 2381 MatchLen, Req, Diags); 2382 MatchRange M{MatchPos, MatchPos + MatchLen}; 2383 if (Req.AllowDeprecatedDagOverlap) { 2384 // We don't need to track all matches in this mode, so we just maintain 2385 // one match range that encompasses the current CHECK-DAG group's 2386 // matches. 2387 if (MatchRanges.empty()) 2388 MatchRanges.insert(MatchRanges.end(), M); 2389 else { 2390 auto Block = MatchRanges.begin(); 2391 Block->Pos = std::min(Block->Pos, M.Pos); 2392 Block->End = std::max(Block->End, M.End); 2393 } 2394 break; 2395 } 2396 // Iterate previous matches until overlapping match or insertion point. 2397 bool Overlap = false; 2398 for (; MI != ME; ++MI) { 2399 if (M.Pos < MI->End) { 2400 // !Overlap => New match has no overlap and is before this old match. 2401 // Overlap => New match overlaps this old match. 2402 Overlap = MI->Pos < M.End; 2403 break; 2404 } 2405 } 2406 if (!Overlap) { 2407 // Insert non-overlapping match into list. 2408 MatchRanges.insert(MI, M); 2409 break; 2410 } 2411 if (Req.VerboseVerbose) { 2412 // Due to their verbosity, we don't print verbose diagnostics here if 2413 // we're gathering them for a different rendering, but we always print 2414 // other diagnostics. 2415 if (!Diags) { 2416 SMLoc OldStart = SMLoc::getFromPointer(Buffer.data() + MI->Pos); 2417 SMLoc OldEnd = SMLoc::getFromPointer(Buffer.data() + MI->End); 2418 SMRange OldRange(OldStart, OldEnd); 2419 SM.PrintMessage(OldStart, SourceMgr::DK_Note, 2420 "match discarded, overlaps earlier DAG match here", 2421 {OldRange}); 2422 } else { 2423 SMLoc CheckLoc = Diags->rbegin()->CheckLoc; 2424 for (auto I = Diags->rbegin(), E = Diags->rend(); 2425 I != E && I->CheckLoc == CheckLoc; ++I) 2426 I->MatchTy = FileCheckDiag::MatchFoundButDiscarded; 2427 } 2428 } 2429 MatchPos = MI->End; 2430 } 2431 if (!Req.VerboseVerbose) 2432 PrintMatch(true, SM, Prefix, Pat.getLoc(), Pat, 1, Buffer, MatchPos, 2433 MatchLen, Req, Diags); 2434 2435 // Handle the end of a CHECK-DAG group. 2436 if (std::next(PatItr) == PatEnd || 2437 std::next(PatItr)->getCheckTy() == Check::CheckNot) { 2438 if (!NotStrings.empty()) { 2439 // If there are CHECK-NOTs between two CHECK-DAGs or from CHECK to 2440 // CHECK-DAG, verify that there are no 'not' strings occurred in that 2441 // region. 2442 StringRef SkippedRegion = 2443 Buffer.slice(StartPos, MatchRanges.begin()->Pos); 2444 if (CheckNot(SM, SkippedRegion, NotStrings, Req, Diags)) 2445 return StringRef::npos; 2446 // Clear "not strings". 2447 NotStrings.clear(); 2448 } 2449 // All subsequent CHECK-DAGs and CHECK-NOTs should be matched from the 2450 // end of this CHECK-DAG group's match range. 2451 StartPos = MatchRanges.rbegin()->End; 2452 // Don't waste time checking for (impossible) overlaps before that. 2453 MatchRanges.clear(); 2454 } 2455 } 2456 2457 return StartPos; 2458 } 2459 2460 static bool ValidatePrefixes(StringRef Kind, StringSet<> &UniquePrefixes, 2461 ArrayRef<StringRef> SuppliedPrefixes) { 2462 for (StringRef Prefix : SuppliedPrefixes) { 2463 if (Prefix.empty()) { 2464 errs() << "error: supplied " << Kind << " prefix must not be the empty " 2465 << "string\n"; 2466 return false; 2467 } 2468 static const Regex Validator("^[a-zA-Z0-9_-]*$"); 2469 if (!Validator.match(Prefix)) { 2470 errs() << "error: supplied " << Kind << " prefix must start with a " 2471 << "letter and contain only alphanumeric characters, hyphens, and " 2472 << "underscores: '" << Prefix << "'\n"; 2473 return false; 2474 } 2475 if (!UniquePrefixes.insert(Prefix).second) { 2476 errs() << "error: supplied " << Kind << " prefix must be unique among " 2477 << "check and comment prefixes: '" << Prefix << "'\n"; 2478 return false; 2479 } 2480 } 2481 return true; 2482 } 2483 2484 static const char *DefaultCheckPrefixes[] = {"CHECK"}; 2485 static const char *DefaultCommentPrefixes[] = {"COM", "RUN"}; 2486 2487 bool FileCheck::ValidateCheckPrefixes() { 2488 StringSet<> UniquePrefixes; 2489 // Add default prefixes to catch user-supplied duplicates of them below. 2490 if (Req.CheckPrefixes.empty()) { 2491 for (const char *Prefix : DefaultCheckPrefixes) 2492 UniquePrefixes.insert(Prefix); 2493 } 2494 if (Req.CommentPrefixes.empty()) { 2495 for (const char *Prefix : DefaultCommentPrefixes) 2496 UniquePrefixes.insert(Prefix); 2497 } 2498 // Do not validate the default prefixes, or diagnostics about duplicates might 2499 // incorrectly indicate that they were supplied by the user. 2500 if (!ValidatePrefixes("check", UniquePrefixes, Req.CheckPrefixes)) 2501 return false; 2502 if (!ValidatePrefixes("comment", UniquePrefixes, Req.CommentPrefixes)) 2503 return false; 2504 return true; 2505 } 2506 2507 Regex FileCheck::buildCheckPrefixRegex() { 2508 if (Req.CheckPrefixes.empty()) { 2509 for (const char *Prefix : DefaultCheckPrefixes) 2510 Req.CheckPrefixes.push_back(Prefix); 2511 Req.IsDefaultCheckPrefix = true; 2512 } 2513 if (Req.CommentPrefixes.empty()) { 2514 for (const char *Prefix : DefaultCommentPrefixes) 2515 Req.CommentPrefixes.push_back(Prefix); 2516 } 2517 2518 // We already validated the contents of CheckPrefixes and CommentPrefixes so 2519 // just concatenate them as alternatives. 2520 SmallString<32> PrefixRegexStr; 2521 for (size_t I = 0, E = Req.CheckPrefixes.size(); I != E; ++I) { 2522 if (I != 0) 2523 PrefixRegexStr.push_back('|'); 2524 PrefixRegexStr.append(Req.CheckPrefixes[I]); 2525 } 2526 for (StringRef Prefix : Req.CommentPrefixes) { 2527 PrefixRegexStr.push_back('|'); 2528 PrefixRegexStr.append(Prefix); 2529 } 2530 2531 return Regex(PrefixRegexStr); 2532 } 2533 2534 Error FileCheckPatternContext::defineCmdlineVariables( 2535 ArrayRef<StringRef> CmdlineDefines, SourceMgr &SM) { 2536 assert(GlobalVariableTable.empty() && GlobalNumericVariableTable.empty() && 2537 "Overriding defined variable with command-line variable definitions"); 2538 2539 if (CmdlineDefines.empty()) 2540 return Error::success(); 2541 2542 // Create a string representing the vector of command-line definitions. Each 2543 // definition is on its own line and prefixed with a definition number to 2544 // clarify which definition a given diagnostic corresponds to. 2545 unsigned I = 0; 2546 Error Errs = Error::success(); 2547 std::string CmdlineDefsDiag; 2548 SmallVector<std::pair<size_t, size_t>, 4> CmdlineDefsIndices; 2549 for (StringRef CmdlineDef : CmdlineDefines) { 2550 std::string DefPrefix = ("Global define #" + Twine(++I) + ": ").str(); 2551 size_t EqIdx = CmdlineDef.find('='); 2552 if (EqIdx == StringRef::npos) { 2553 CmdlineDefsIndices.push_back(std::make_pair(CmdlineDefsDiag.size(), 0)); 2554 continue; 2555 } 2556 // Numeric variable definition. 2557 if (CmdlineDef[0] == '#') { 2558 // Append a copy of the command-line definition adapted to use the same 2559 // format as in the input file to be able to reuse 2560 // parseNumericSubstitutionBlock. 2561 CmdlineDefsDiag += (DefPrefix + CmdlineDef + " (parsed as: [[").str(); 2562 std::string SubstitutionStr = std::string(CmdlineDef); 2563 SubstitutionStr[EqIdx] = ':'; 2564 CmdlineDefsIndices.push_back( 2565 std::make_pair(CmdlineDefsDiag.size(), SubstitutionStr.size())); 2566 CmdlineDefsDiag += (SubstitutionStr + Twine("]])\n")).str(); 2567 } else { 2568 CmdlineDefsDiag += DefPrefix; 2569 CmdlineDefsIndices.push_back( 2570 std::make_pair(CmdlineDefsDiag.size(), CmdlineDef.size())); 2571 CmdlineDefsDiag += (CmdlineDef + "\n").str(); 2572 } 2573 } 2574 2575 // Create a buffer with fake command line content in order to display 2576 // parsing diagnostic with location information and point to the 2577 // global definition with invalid syntax. 2578 std::unique_ptr<MemoryBuffer> CmdLineDefsDiagBuffer = 2579 MemoryBuffer::getMemBufferCopy(CmdlineDefsDiag, "Global defines"); 2580 StringRef CmdlineDefsDiagRef = CmdLineDefsDiagBuffer->getBuffer(); 2581 SM.AddNewSourceBuffer(std::move(CmdLineDefsDiagBuffer), SMLoc()); 2582 2583 for (std::pair<size_t, size_t> CmdlineDefIndices : CmdlineDefsIndices) { 2584 StringRef CmdlineDef = CmdlineDefsDiagRef.substr(CmdlineDefIndices.first, 2585 CmdlineDefIndices.second); 2586 if (CmdlineDef.empty()) { 2587 Errs = joinErrors( 2588 std::move(Errs), 2589 ErrorDiagnostic::get(SM, CmdlineDef, 2590 "missing equal sign in global definition")); 2591 continue; 2592 } 2593 2594 // Numeric variable definition. 2595 if (CmdlineDef[0] == '#') { 2596 // Now parse the definition both to check that the syntax is correct and 2597 // to create the necessary class instance. 2598 StringRef CmdlineDefExpr = CmdlineDef.substr(1); 2599 Optional<NumericVariable *> DefinedNumericVariable; 2600 Expected<std::unique_ptr<Expression>> ExpressionResult = 2601 Pattern::parseNumericSubstitutionBlock( 2602 CmdlineDefExpr, DefinedNumericVariable, false, None, this, SM); 2603 if (!ExpressionResult) { 2604 Errs = joinErrors(std::move(Errs), ExpressionResult.takeError()); 2605 continue; 2606 } 2607 std::unique_ptr<Expression> Expression = std::move(*ExpressionResult); 2608 // Now evaluate the expression whose value this variable should be set 2609 // to, since the expression of a command-line variable definition should 2610 // only use variables defined earlier on the command-line. If not, this 2611 // is an error and we report it. 2612 Expected<ExpressionValue> Value = Expression->getAST()->eval(); 2613 if (!Value) { 2614 Errs = joinErrors(std::move(Errs), Value.takeError()); 2615 continue; 2616 } 2617 2618 assert(DefinedNumericVariable && "No variable defined"); 2619 (*DefinedNumericVariable)->setValue(*Value); 2620 2621 // Record this variable definition. 2622 GlobalNumericVariableTable[(*DefinedNumericVariable)->getName()] = 2623 *DefinedNumericVariable; 2624 } else { 2625 // String variable definition. 2626 std::pair<StringRef, StringRef> CmdlineNameVal = CmdlineDef.split('='); 2627 StringRef CmdlineName = CmdlineNameVal.first; 2628 StringRef OrigCmdlineName = CmdlineName; 2629 Expected<Pattern::VariableProperties> ParseVarResult = 2630 Pattern::parseVariable(CmdlineName, SM); 2631 if (!ParseVarResult) { 2632 Errs = joinErrors(std::move(Errs), ParseVarResult.takeError()); 2633 continue; 2634 } 2635 // Check that CmdlineName does not denote a pseudo variable is only 2636 // composed of the parsed numeric variable. This catches cases like 2637 // "FOO+2" in a "FOO+2=10" definition. 2638 if (ParseVarResult->IsPseudo || !CmdlineName.empty()) { 2639 Errs = joinErrors(std::move(Errs), 2640 ErrorDiagnostic::get( 2641 SM, OrigCmdlineName, 2642 "invalid name in string variable definition '" + 2643 OrigCmdlineName + "'")); 2644 continue; 2645 } 2646 StringRef Name = ParseVarResult->Name; 2647 2648 // Detect collisions between string and numeric variables when the former 2649 // is created later than the latter. 2650 if (GlobalNumericVariableTable.find(Name) != 2651 GlobalNumericVariableTable.end()) { 2652 Errs = joinErrors(std::move(Errs), 2653 ErrorDiagnostic::get(SM, Name, 2654 "numeric variable with name '" + 2655 Name + "' already exists")); 2656 continue; 2657 } 2658 GlobalVariableTable.insert(CmdlineNameVal); 2659 // Mark the string variable as defined to detect collisions between 2660 // string and numeric variables in defineCmdlineVariables when the latter 2661 // is created later than the former. We cannot reuse GlobalVariableTable 2662 // for this by populating it with an empty string since we would then 2663 // lose the ability to detect the use of an undefined variable in 2664 // match(). 2665 DefinedVariableTable[Name] = true; 2666 } 2667 } 2668 2669 return Errs; 2670 } 2671 2672 void FileCheckPatternContext::clearLocalVars() { 2673 SmallVector<StringRef, 16> LocalPatternVars, LocalNumericVars; 2674 for (const StringMapEntry<StringRef> &Var : GlobalVariableTable) 2675 if (Var.first()[0] != '$') 2676 LocalPatternVars.push_back(Var.first()); 2677 2678 // Numeric substitution reads the value of a variable directly, not via 2679 // GlobalNumericVariableTable. Therefore, we clear local variables by 2680 // clearing their value which will lead to a numeric substitution failure. We 2681 // also mark the variable for removal from GlobalNumericVariableTable since 2682 // this is what defineCmdlineVariables checks to decide that no global 2683 // variable has been defined. 2684 for (const auto &Var : GlobalNumericVariableTable) 2685 if (Var.first()[0] != '$') { 2686 Var.getValue()->clearValue(); 2687 LocalNumericVars.push_back(Var.first()); 2688 } 2689 2690 for (const auto &Var : LocalPatternVars) 2691 GlobalVariableTable.erase(Var); 2692 for (const auto &Var : LocalNumericVars) 2693 GlobalNumericVariableTable.erase(Var); 2694 } 2695 2696 bool FileCheck::checkInput(SourceMgr &SM, StringRef Buffer, 2697 std::vector<FileCheckDiag> *Diags) { 2698 bool ChecksFailed = false; 2699 2700 unsigned i = 0, j = 0, e = CheckStrings->size(); 2701 while (true) { 2702 StringRef CheckRegion; 2703 if (j == e) { 2704 CheckRegion = Buffer; 2705 } else { 2706 const FileCheckString &CheckLabelStr = (*CheckStrings)[j]; 2707 if (CheckLabelStr.Pat.getCheckTy() != Check::CheckLabel) { 2708 ++j; 2709 continue; 2710 } 2711 2712 // Scan to next CHECK-LABEL match, ignoring CHECK-NOT and CHECK-DAG 2713 size_t MatchLabelLen = 0; 2714 size_t MatchLabelPos = 2715 CheckLabelStr.Check(SM, Buffer, true, MatchLabelLen, Req, Diags); 2716 if (MatchLabelPos == StringRef::npos) 2717 // Immediately bail if CHECK-LABEL fails, nothing else we can do. 2718 return false; 2719 2720 CheckRegion = Buffer.substr(0, MatchLabelPos + MatchLabelLen); 2721 Buffer = Buffer.substr(MatchLabelPos + MatchLabelLen); 2722 ++j; 2723 } 2724 2725 // Do not clear the first region as it's the one before the first 2726 // CHECK-LABEL and it would clear variables defined on the command-line 2727 // before they get used. 2728 if (i != 0 && Req.EnableVarScope) 2729 PatternContext->clearLocalVars(); 2730 2731 for (; i != j; ++i) { 2732 const FileCheckString &CheckStr = (*CheckStrings)[i]; 2733 2734 // Check each string within the scanned region, including a second check 2735 // of any final CHECK-LABEL (to verify CHECK-NOT and CHECK-DAG) 2736 size_t MatchLen = 0; 2737 size_t MatchPos = 2738 CheckStr.Check(SM, CheckRegion, false, MatchLen, Req, Diags); 2739 2740 if (MatchPos == StringRef::npos) { 2741 ChecksFailed = true; 2742 i = j; 2743 break; 2744 } 2745 2746 CheckRegion = CheckRegion.substr(MatchPos + MatchLen); 2747 } 2748 2749 if (j == e) 2750 break; 2751 } 2752 2753 // Success if no checks failed. 2754 return !ChecksFailed; 2755 } 2756