1 //===- FileCheck.cpp - Check that File's Contents match what is expected --===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // FileCheck does a line-by line check of a file that validates whether it 10 // contains the expected content. This is useful for regression tests etc. 11 // 12 // This file implements most of the API that will be used by the FileCheck utility 13 // as well as various unittests. 14 //===----------------------------------------------------------------------===// 15 16 #include "llvm/FileCheck/FileCheck.h" 17 #include "FileCheckImpl.h" 18 #include "llvm/ADT/STLExtras.h" 19 #include "llvm/ADT/StringSet.h" 20 #include "llvm/ADT/Twine.h" 21 #include "llvm/Support/CheckedArithmetic.h" 22 #include "llvm/Support/FormatVariadic.h" 23 #include <cstdint> 24 #include <list> 25 #include <set> 26 #include <tuple> 27 #include <utility> 28 29 using namespace llvm; 30 31 StringRef ExpressionFormat::toString() const { 32 switch (Value) { 33 case Kind::NoFormat: 34 return StringRef("<none>"); 35 case Kind::Unsigned: 36 return StringRef("%u"); 37 case Kind::Signed: 38 return StringRef("%d"); 39 case Kind::HexUpper: 40 return StringRef("%X"); 41 case Kind::HexLower: 42 return StringRef("%x"); 43 } 44 llvm_unreachable("unknown expression format"); 45 } 46 47 Expected<std::string> ExpressionFormat::getWildcardRegex() const { 48 StringRef AlternateFormPrefix = AlternateForm ? StringRef("0x") : StringRef(); 49 50 auto CreatePrecisionRegex = [&](StringRef S) { 51 return (Twine(AlternateFormPrefix) + S + Twine('{') + Twine(Precision) + 52 "}") 53 .str(); 54 }; 55 56 switch (Value) { 57 case Kind::Unsigned: 58 if (Precision) 59 return CreatePrecisionRegex("([1-9][0-9]*)?[0-9]"); 60 return std::string("[0-9]+"); 61 case Kind::Signed: 62 if (Precision) 63 return CreatePrecisionRegex("-?([1-9][0-9]*)?[0-9]"); 64 return std::string("-?[0-9]+"); 65 case Kind::HexUpper: 66 if (Precision) 67 return CreatePrecisionRegex("([1-9A-F][0-9A-F]*)?[0-9A-F]"); 68 return (Twine(AlternateFormPrefix) + Twine("[0-9A-F]+")).str(); 69 case Kind::HexLower: 70 if (Precision) 71 return CreatePrecisionRegex("([1-9a-f][0-9a-f]*)?[0-9a-f]"); 72 return (Twine(AlternateFormPrefix) + Twine("[0-9a-f]+")).str(); 73 default: 74 return createStringError(std::errc::invalid_argument, 75 "trying to match value with invalid format"); 76 } 77 } 78 79 Expected<std::string> 80 ExpressionFormat::getMatchingString(ExpressionValue IntegerValue) const { 81 uint64_t AbsoluteValue; 82 StringRef SignPrefix = IntegerValue.isNegative() ? "-" : ""; 83 84 if (Value == Kind::Signed) { 85 Expected<int64_t> SignedValue = IntegerValue.getSignedValue(); 86 if (!SignedValue) 87 return SignedValue.takeError(); 88 if (*SignedValue < 0) 89 AbsoluteValue = cantFail(IntegerValue.getAbsolute().getUnsignedValue()); 90 else 91 AbsoluteValue = *SignedValue; 92 } else { 93 Expected<uint64_t> UnsignedValue = IntegerValue.getUnsignedValue(); 94 if (!UnsignedValue) 95 return UnsignedValue.takeError(); 96 AbsoluteValue = *UnsignedValue; 97 } 98 99 std::string AbsoluteValueStr; 100 switch (Value) { 101 case Kind::Unsigned: 102 case Kind::Signed: 103 AbsoluteValueStr = utostr(AbsoluteValue); 104 break; 105 case Kind::HexUpper: 106 case Kind::HexLower: 107 AbsoluteValueStr = utohexstr(AbsoluteValue, Value == Kind::HexLower); 108 break; 109 default: 110 return createStringError(std::errc::invalid_argument, 111 "trying to match value with invalid format"); 112 } 113 114 StringRef AlternateFormPrefix = AlternateForm ? StringRef("0x") : StringRef(); 115 116 if (Precision > AbsoluteValueStr.size()) { 117 unsigned LeadingZeros = Precision - AbsoluteValueStr.size(); 118 return (Twine(SignPrefix) + Twine(AlternateFormPrefix) + 119 std::string(LeadingZeros, '0') + AbsoluteValueStr) 120 .str(); 121 } 122 123 return (Twine(SignPrefix) + Twine(AlternateFormPrefix) + AbsoluteValueStr) 124 .str(); 125 } 126 127 Expected<ExpressionValue> 128 ExpressionFormat::valueFromStringRepr(StringRef StrVal, 129 const SourceMgr &SM) const { 130 bool ValueIsSigned = Value == Kind::Signed; 131 // Both the FileCheck utility and library only call this method with a valid 132 // value in StrVal. This is guaranteed by the regex returned by 133 // getWildcardRegex() above. Only underflow and overflow errors can thus 134 // occur. However new uses of this method could be added in the future so 135 // the error message does not make assumptions about StrVal. 136 StringRef IntegerParseErrorStr = "unable to represent numeric value"; 137 if (ValueIsSigned) { 138 int64_t SignedValue; 139 140 if (StrVal.getAsInteger(10, SignedValue)) 141 return ErrorDiagnostic::get(SM, StrVal, IntegerParseErrorStr); 142 143 return ExpressionValue(SignedValue); 144 } 145 146 bool Hex = Value == Kind::HexUpper || Value == Kind::HexLower; 147 uint64_t UnsignedValue; 148 bool MissingFormPrefix = AlternateForm && !StrVal.consume_front("0x"); 149 if (StrVal.getAsInteger(Hex ? 16 : 10, UnsignedValue)) 150 return ErrorDiagnostic::get(SM, StrVal, IntegerParseErrorStr); 151 152 // Error out for a missing prefix only now that we know we have an otherwise 153 // valid integer. For example, "-0x18" is reported above instead. 154 if (MissingFormPrefix) 155 return ErrorDiagnostic::get(SM, StrVal, "missing alternate form prefix"); 156 157 return ExpressionValue(UnsignedValue); 158 } 159 160 static int64_t getAsSigned(uint64_t UnsignedValue) { 161 // Use memcpy to reinterpret the bitpattern in Value since casting to 162 // signed is implementation-defined if the unsigned value is too big to be 163 // represented in the signed type and using an union violates type aliasing 164 // rules. 165 int64_t SignedValue; 166 memcpy(&SignedValue, &UnsignedValue, sizeof(SignedValue)); 167 return SignedValue; 168 } 169 170 Expected<int64_t> ExpressionValue::getSignedValue() const { 171 if (Negative) 172 return getAsSigned(Value); 173 174 if (Value > (uint64_t)std::numeric_limits<int64_t>::max()) 175 return make_error<OverflowError>(); 176 177 // Value is in the representable range of int64_t so we can use cast. 178 return static_cast<int64_t>(Value); 179 } 180 181 Expected<uint64_t> ExpressionValue::getUnsignedValue() const { 182 if (Negative) 183 return make_error<OverflowError>(); 184 185 return Value; 186 } 187 188 ExpressionValue ExpressionValue::getAbsolute() const { 189 if (!Negative) 190 return *this; 191 192 int64_t SignedValue = getAsSigned(Value); 193 int64_t MaxInt64 = std::numeric_limits<int64_t>::max(); 194 // Absolute value can be represented as int64_t. 195 if (SignedValue >= -MaxInt64) 196 return ExpressionValue(-getAsSigned(Value)); 197 198 // -X == -(max int64_t + Rem), negate each component independently. 199 SignedValue += MaxInt64; 200 uint64_t RemainingValueAbsolute = -SignedValue; 201 return ExpressionValue(MaxInt64 + RemainingValueAbsolute); 202 } 203 204 Expected<ExpressionValue> llvm::operator+(const ExpressionValue &LeftOperand, 205 const ExpressionValue &RightOperand) { 206 if (LeftOperand.isNegative() && RightOperand.isNegative()) { 207 int64_t LeftValue = cantFail(LeftOperand.getSignedValue()); 208 int64_t RightValue = cantFail(RightOperand.getSignedValue()); 209 Optional<int64_t> Result = checkedAdd<int64_t>(LeftValue, RightValue); 210 if (!Result) 211 return make_error<OverflowError>(); 212 213 return ExpressionValue(*Result); 214 } 215 216 // (-A) + B == B - A. 217 if (LeftOperand.isNegative()) 218 return RightOperand - LeftOperand.getAbsolute(); 219 220 // A + (-B) == A - B. 221 if (RightOperand.isNegative()) 222 return LeftOperand - RightOperand.getAbsolute(); 223 224 // Both values are positive at this point. 225 uint64_t LeftValue = cantFail(LeftOperand.getUnsignedValue()); 226 uint64_t RightValue = cantFail(RightOperand.getUnsignedValue()); 227 Optional<uint64_t> Result = 228 checkedAddUnsigned<uint64_t>(LeftValue, RightValue); 229 if (!Result) 230 return make_error<OverflowError>(); 231 232 return ExpressionValue(*Result); 233 } 234 235 Expected<ExpressionValue> llvm::operator-(const ExpressionValue &LeftOperand, 236 const ExpressionValue &RightOperand) { 237 // Result will be negative and thus might underflow. 238 if (LeftOperand.isNegative() && !RightOperand.isNegative()) { 239 int64_t LeftValue = cantFail(LeftOperand.getSignedValue()); 240 uint64_t RightValue = cantFail(RightOperand.getUnsignedValue()); 241 // Result <= -1 - (max int64_t) which overflows on 1- and 2-complement. 242 if (RightValue > (uint64_t)std::numeric_limits<int64_t>::max()) 243 return make_error<OverflowError>(); 244 Optional<int64_t> Result = 245 checkedSub(LeftValue, static_cast<int64_t>(RightValue)); 246 if (!Result) 247 return make_error<OverflowError>(); 248 249 return ExpressionValue(*Result); 250 } 251 252 // (-A) - (-B) == B - A. 253 if (LeftOperand.isNegative()) 254 return RightOperand.getAbsolute() - LeftOperand.getAbsolute(); 255 256 // A - (-B) == A + B. 257 if (RightOperand.isNegative()) 258 return LeftOperand + RightOperand.getAbsolute(); 259 260 // Both values are positive at this point. 261 uint64_t LeftValue = cantFail(LeftOperand.getUnsignedValue()); 262 uint64_t RightValue = cantFail(RightOperand.getUnsignedValue()); 263 if (LeftValue >= RightValue) 264 return ExpressionValue(LeftValue - RightValue); 265 else { 266 uint64_t AbsoluteDifference = RightValue - LeftValue; 267 uint64_t MaxInt64 = std::numeric_limits<int64_t>::max(); 268 // Value might underflow. 269 if (AbsoluteDifference > MaxInt64) { 270 AbsoluteDifference -= MaxInt64; 271 int64_t Result = -MaxInt64; 272 int64_t MinInt64 = std::numeric_limits<int64_t>::min(); 273 // Underflow, tested by: 274 // abs(Result + (max int64_t)) > abs((min int64_t) + (max int64_t)) 275 if (AbsoluteDifference > static_cast<uint64_t>(-(MinInt64 - Result))) 276 return make_error<OverflowError>(); 277 Result -= static_cast<int64_t>(AbsoluteDifference); 278 return ExpressionValue(Result); 279 } 280 281 return ExpressionValue(-static_cast<int64_t>(AbsoluteDifference)); 282 } 283 } 284 285 Expected<ExpressionValue> llvm::operator*(const ExpressionValue &LeftOperand, 286 const ExpressionValue &RightOperand) { 287 // -A * -B == A * B 288 if (LeftOperand.isNegative() && RightOperand.isNegative()) 289 return LeftOperand.getAbsolute() * RightOperand.getAbsolute(); 290 291 // A * -B == -B * A 292 if (RightOperand.isNegative()) 293 return RightOperand * LeftOperand; 294 295 assert(!RightOperand.isNegative() && "Unexpected negative operand!"); 296 297 // Result will be negative and can underflow. 298 if (LeftOperand.isNegative()) { 299 auto Result = LeftOperand.getAbsolute() * RightOperand.getAbsolute(); 300 if (!Result) 301 return Result; 302 303 return ExpressionValue(0) - *Result; 304 } 305 306 // Result will be positive and can overflow. 307 uint64_t LeftValue = cantFail(LeftOperand.getUnsignedValue()); 308 uint64_t RightValue = cantFail(RightOperand.getUnsignedValue()); 309 Optional<uint64_t> Result = 310 checkedMulUnsigned<uint64_t>(LeftValue, RightValue); 311 if (!Result) 312 return make_error<OverflowError>(); 313 314 return ExpressionValue(*Result); 315 } 316 317 Expected<ExpressionValue> llvm::operator/(const ExpressionValue &LeftOperand, 318 const ExpressionValue &RightOperand) { 319 // -A / -B == A / B 320 if (LeftOperand.isNegative() && RightOperand.isNegative()) 321 return LeftOperand.getAbsolute() / RightOperand.getAbsolute(); 322 323 // Check for divide by zero. 324 if (RightOperand == ExpressionValue(0)) 325 return make_error<OverflowError>(); 326 327 // Result will be negative and can underflow. 328 if (LeftOperand.isNegative() || RightOperand.isNegative()) 329 return ExpressionValue(0) - 330 cantFail(LeftOperand.getAbsolute() / RightOperand.getAbsolute()); 331 332 uint64_t LeftValue = cantFail(LeftOperand.getUnsignedValue()); 333 uint64_t RightValue = cantFail(RightOperand.getUnsignedValue()); 334 return ExpressionValue(LeftValue / RightValue); 335 } 336 337 Expected<ExpressionValue> llvm::max(const ExpressionValue &LeftOperand, 338 const ExpressionValue &RightOperand) { 339 if (LeftOperand.isNegative() && RightOperand.isNegative()) { 340 int64_t LeftValue = cantFail(LeftOperand.getSignedValue()); 341 int64_t RightValue = cantFail(RightOperand.getSignedValue()); 342 return ExpressionValue(std::max(LeftValue, RightValue)); 343 } 344 345 if (!LeftOperand.isNegative() && !RightOperand.isNegative()) { 346 uint64_t LeftValue = cantFail(LeftOperand.getUnsignedValue()); 347 uint64_t RightValue = cantFail(RightOperand.getUnsignedValue()); 348 return ExpressionValue(std::max(LeftValue, RightValue)); 349 } 350 351 if (LeftOperand.isNegative()) 352 return RightOperand; 353 354 return LeftOperand; 355 } 356 357 Expected<ExpressionValue> llvm::min(const ExpressionValue &LeftOperand, 358 const ExpressionValue &RightOperand) { 359 if (cantFail(max(LeftOperand, RightOperand)) == LeftOperand) 360 return RightOperand; 361 362 return LeftOperand; 363 } 364 365 Expected<ExpressionValue> NumericVariableUse::eval() const { 366 Optional<ExpressionValue> Value = Variable->getValue(); 367 if (Value) 368 return *Value; 369 370 return make_error<UndefVarError>(getExpressionStr()); 371 } 372 373 Expected<ExpressionValue> BinaryOperation::eval() const { 374 Expected<ExpressionValue> LeftOp = LeftOperand->eval(); 375 Expected<ExpressionValue> RightOp = RightOperand->eval(); 376 377 // Bubble up any error (e.g. undefined variables) in the recursive 378 // evaluation. 379 if (!LeftOp || !RightOp) { 380 Error Err = Error::success(); 381 if (!LeftOp) 382 Err = joinErrors(std::move(Err), LeftOp.takeError()); 383 if (!RightOp) 384 Err = joinErrors(std::move(Err), RightOp.takeError()); 385 return std::move(Err); 386 } 387 388 return EvalBinop(*LeftOp, *RightOp); 389 } 390 391 Expected<ExpressionFormat> 392 BinaryOperation::getImplicitFormat(const SourceMgr &SM) const { 393 Expected<ExpressionFormat> LeftFormat = LeftOperand->getImplicitFormat(SM); 394 Expected<ExpressionFormat> RightFormat = RightOperand->getImplicitFormat(SM); 395 if (!LeftFormat || !RightFormat) { 396 Error Err = Error::success(); 397 if (!LeftFormat) 398 Err = joinErrors(std::move(Err), LeftFormat.takeError()); 399 if (!RightFormat) 400 Err = joinErrors(std::move(Err), RightFormat.takeError()); 401 return std::move(Err); 402 } 403 404 if (*LeftFormat != ExpressionFormat::Kind::NoFormat && 405 *RightFormat != ExpressionFormat::Kind::NoFormat && 406 *LeftFormat != *RightFormat) 407 return ErrorDiagnostic::get( 408 SM, getExpressionStr(), 409 "implicit format conflict between '" + LeftOperand->getExpressionStr() + 410 "' (" + LeftFormat->toString() + ") and '" + 411 RightOperand->getExpressionStr() + "' (" + RightFormat->toString() + 412 "), need an explicit format specifier"); 413 414 return *LeftFormat != ExpressionFormat::Kind::NoFormat ? *LeftFormat 415 : *RightFormat; 416 } 417 418 Expected<std::string> NumericSubstitution::getResult() const { 419 assert(ExpressionPointer->getAST() != nullptr && 420 "Substituting empty expression"); 421 Expected<ExpressionValue> EvaluatedValue = 422 ExpressionPointer->getAST()->eval(); 423 if (!EvaluatedValue) 424 return EvaluatedValue.takeError(); 425 ExpressionFormat Format = ExpressionPointer->getFormat(); 426 return Format.getMatchingString(*EvaluatedValue); 427 } 428 429 Expected<std::string> StringSubstitution::getResult() const { 430 // Look up the value and escape it so that we can put it into the regex. 431 Expected<StringRef> VarVal = Context->getPatternVarValue(FromStr); 432 if (!VarVal) 433 return VarVal.takeError(); 434 return Regex::escape(*VarVal); 435 } 436 437 bool Pattern::isValidVarNameStart(char C) { return C == '_' || isAlpha(C); } 438 439 Expected<Pattern::VariableProperties> 440 Pattern::parseVariable(StringRef &Str, const SourceMgr &SM) { 441 if (Str.empty()) 442 return ErrorDiagnostic::get(SM, Str, "empty variable name"); 443 444 size_t I = 0; 445 bool IsPseudo = Str[0] == '@'; 446 447 // Global vars start with '$'. 448 if (Str[0] == '$' || IsPseudo) 449 ++I; 450 451 if (!isValidVarNameStart(Str[I++])) 452 return ErrorDiagnostic::get(SM, Str, "invalid variable name"); 453 454 for (size_t E = Str.size(); I != E; ++I) 455 // Variable names are composed of alphanumeric characters and underscores. 456 if (Str[I] != '_' && !isAlnum(Str[I])) 457 break; 458 459 StringRef Name = Str.take_front(I); 460 Str = Str.substr(I); 461 return VariableProperties {Name, IsPseudo}; 462 } 463 464 // StringRef holding all characters considered as horizontal whitespaces by 465 // FileCheck input canonicalization. 466 constexpr StringLiteral SpaceChars = " \t"; 467 468 // Parsing helper function that strips the first character in S and returns it. 469 static char popFront(StringRef &S) { 470 char C = S.front(); 471 S = S.drop_front(); 472 return C; 473 } 474 475 char OverflowError::ID = 0; 476 char UndefVarError::ID = 0; 477 char ErrorDiagnostic::ID = 0; 478 char NotFoundError::ID = 0; 479 480 Expected<NumericVariable *> Pattern::parseNumericVariableDefinition( 481 StringRef &Expr, FileCheckPatternContext *Context, 482 Optional<size_t> LineNumber, ExpressionFormat ImplicitFormat, 483 const SourceMgr &SM) { 484 Expected<VariableProperties> ParseVarResult = parseVariable(Expr, SM); 485 if (!ParseVarResult) 486 return ParseVarResult.takeError(); 487 StringRef Name = ParseVarResult->Name; 488 489 if (ParseVarResult->IsPseudo) 490 return ErrorDiagnostic::get( 491 SM, Name, "definition of pseudo numeric variable unsupported"); 492 493 // Detect collisions between string and numeric variables when the latter 494 // is created later than the former. 495 if (Context->DefinedVariableTable.find(Name) != 496 Context->DefinedVariableTable.end()) 497 return ErrorDiagnostic::get( 498 SM, Name, "string variable with name '" + Name + "' already exists"); 499 500 Expr = Expr.ltrim(SpaceChars); 501 if (!Expr.empty()) 502 return ErrorDiagnostic::get( 503 SM, Expr, "unexpected characters after numeric variable name"); 504 505 NumericVariable *DefinedNumericVariable; 506 auto VarTableIter = Context->GlobalNumericVariableTable.find(Name); 507 if (VarTableIter != Context->GlobalNumericVariableTable.end()) { 508 DefinedNumericVariable = VarTableIter->second; 509 if (DefinedNumericVariable->getImplicitFormat() != ImplicitFormat) 510 return ErrorDiagnostic::get( 511 SM, Expr, "format different from previous variable definition"); 512 } else 513 DefinedNumericVariable = 514 Context->makeNumericVariable(Name, ImplicitFormat, LineNumber); 515 516 return DefinedNumericVariable; 517 } 518 519 Expected<std::unique_ptr<NumericVariableUse>> Pattern::parseNumericVariableUse( 520 StringRef Name, bool IsPseudo, Optional<size_t> LineNumber, 521 FileCheckPatternContext *Context, const SourceMgr &SM) { 522 if (IsPseudo && !Name.equals("@LINE")) 523 return ErrorDiagnostic::get( 524 SM, Name, "invalid pseudo numeric variable '" + Name + "'"); 525 526 // Numeric variable definitions and uses are parsed in the order in which 527 // they appear in the CHECK patterns. For each definition, the pointer to the 528 // class instance of the corresponding numeric variable definition is stored 529 // in GlobalNumericVariableTable in parsePattern. Therefore, if the pointer 530 // we get below is null, it means no such variable was defined before. When 531 // that happens, we create a dummy variable so that parsing can continue. All 532 // uses of undefined variables, whether string or numeric, are then diagnosed 533 // in printSubstitutions() after failing to match. 534 auto VarTableIter = Context->GlobalNumericVariableTable.find(Name); 535 NumericVariable *NumericVariable; 536 if (VarTableIter != Context->GlobalNumericVariableTable.end()) 537 NumericVariable = VarTableIter->second; 538 else { 539 NumericVariable = Context->makeNumericVariable( 540 Name, ExpressionFormat(ExpressionFormat::Kind::Unsigned)); 541 Context->GlobalNumericVariableTable[Name] = NumericVariable; 542 } 543 544 Optional<size_t> DefLineNumber = NumericVariable->getDefLineNumber(); 545 if (DefLineNumber && LineNumber && *DefLineNumber == *LineNumber) 546 return ErrorDiagnostic::get( 547 SM, Name, 548 "numeric variable '" + Name + 549 "' defined earlier in the same CHECK directive"); 550 551 return std::make_unique<NumericVariableUse>(Name, NumericVariable); 552 } 553 554 Expected<std::unique_ptr<ExpressionAST>> Pattern::parseNumericOperand( 555 StringRef &Expr, AllowedOperand AO, bool MaybeInvalidConstraint, 556 Optional<size_t> LineNumber, FileCheckPatternContext *Context, 557 const SourceMgr &SM) { 558 if (Expr.startswith("(")) { 559 if (AO != AllowedOperand::Any) 560 return ErrorDiagnostic::get( 561 SM, Expr, "parenthesized expression not permitted here"); 562 return parseParenExpr(Expr, LineNumber, Context, SM); 563 } 564 565 if (AO == AllowedOperand::LineVar || AO == AllowedOperand::Any) { 566 // Try to parse as a numeric variable use. 567 Expected<Pattern::VariableProperties> ParseVarResult = 568 parseVariable(Expr, SM); 569 if (ParseVarResult) { 570 // Try to parse a function call. 571 if (Expr.ltrim(SpaceChars).startswith("(")) { 572 if (AO != AllowedOperand::Any) 573 return ErrorDiagnostic::get(SM, ParseVarResult->Name, 574 "unexpected function call"); 575 576 return parseCallExpr(Expr, ParseVarResult->Name, LineNumber, Context, 577 SM); 578 } 579 580 return parseNumericVariableUse(ParseVarResult->Name, 581 ParseVarResult->IsPseudo, LineNumber, 582 Context, SM); 583 } 584 585 if (AO == AllowedOperand::LineVar) 586 return ParseVarResult.takeError(); 587 // Ignore the error and retry parsing as a literal. 588 consumeError(ParseVarResult.takeError()); 589 } 590 591 // Otherwise, parse it as a literal. 592 int64_t SignedLiteralValue; 593 uint64_t UnsignedLiteralValue; 594 StringRef SaveExpr = Expr; 595 // Accept both signed and unsigned literal, default to signed literal. 596 if (!Expr.consumeInteger((AO == AllowedOperand::LegacyLiteral) ? 10 : 0, 597 UnsignedLiteralValue)) 598 return std::make_unique<ExpressionLiteral>(SaveExpr.drop_back(Expr.size()), 599 UnsignedLiteralValue); 600 Expr = SaveExpr; 601 if (AO == AllowedOperand::Any && !Expr.consumeInteger(0, SignedLiteralValue)) 602 return std::make_unique<ExpressionLiteral>(SaveExpr.drop_back(Expr.size()), 603 SignedLiteralValue); 604 605 return ErrorDiagnostic::get( 606 SM, Expr, 607 Twine("invalid ") + 608 (MaybeInvalidConstraint ? "matching constraint or " : "") + 609 "operand format"); 610 } 611 612 Expected<std::unique_ptr<ExpressionAST>> 613 Pattern::parseParenExpr(StringRef &Expr, Optional<size_t> LineNumber, 614 FileCheckPatternContext *Context, const SourceMgr &SM) { 615 Expr = Expr.ltrim(SpaceChars); 616 assert(Expr.startswith("(")); 617 618 // Parse right operand. 619 Expr.consume_front("("); 620 Expr = Expr.ltrim(SpaceChars); 621 if (Expr.empty()) 622 return ErrorDiagnostic::get(SM, Expr, "missing operand in expression"); 623 624 // Note: parseNumericOperand handles nested opening parentheses. 625 Expected<std::unique_ptr<ExpressionAST>> SubExprResult = parseNumericOperand( 626 Expr, AllowedOperand::Any, /*MaybeInvalidConstraint=*/false, LineNumber, 627 Context, SM); 628 Expr = Expr.ltrim(SpaceChars); 629 while (SubExprResult && !Expr.empty() && !Expr.startswith(")")) { 630 StringRef OrigExpr = Expr; 631 SubExprResult = parseBinop(OrigExpr, Expr, std::move(*SubExprResult), false, 632 LineNumber, Context, SM); 633 Expr = Expr.ltrim(SpaceChars); 634 } 635 if (!SubExprResult) 636 return SubExprResult; 637 638 if (!Expr.consume_front(")")) { 639 return ErrorDiagnostic::get(SM, Expr, 640 "missing ')' at end of nested expression"); 641 } 642 return SubExprResult; 643 } 644 645 Expected<std::unique_ptr<ExpressionAST>> 646 Pattern::parseBinop(StringRef Expr, StringRef &RemainingExpr, 647 std::unique_ptr<ExpressionAST> LeftOp, 648 bool IsLegacyLineExpr, Optional<size_t> LineNumber, 649 FileCheckPatternContext *Context, const SourceMgr &SM) { 650 RemainingExpr = RemainingExpr.ltrim(SpaceChars); 651 if (RemainingExpr.empty()) 652 return std::move(LeftOp); 653 654 // Check if this is a supported operation and select a function to perform 655 // it. 656 SMLoc OpLoc = SMLoc::getFromPointer(RemainingExpr.data()); 657 char Operator = popFront(RemainingExpr); 658 binop_eval_t EvalBinop; 659 switch (Operator) { 660 case '+': 661 EvalBinop = operator+; 662 break; 663 case '-': 664 EvalBinop = operator-; 665 break; 666 default: 667 return ErrorDiagnostic::get( 668 SM, OpLoc, Twine("unsupported operation '") + Twine(Operator) + "'"); 669 } 670 671 // Parse right operand. 672 RemainingExpr = RemainingExpr.ltrim(SpaceChars); 673 if (RemainingExpr.empty()) 674 return ErrorDiagnostic::get(SM, RemainingExpr, 675 "missing operand in expression"); 676 // The second operand in a legacy @LINE expression is always a literal. 677 AllowedOperand AO = 678 IsLegacyLineExpr ? AllowedOperand::LegacyLiteral : AllowedOperand::Any; 679 Expected<std::unique_ptr<ExpressionAST>> RightOpResult = 680 parseNumericOperand(RemainingExpr, AO, /*MaybeInvalidConstraint=*/false, 681 LineNumber, Context, SM); 682 if (!RightOpResult) 683 return RightOpResult; 684 685 Expr = Expr.drop_back(RemainingExpr.size()); 686 return std::make_unique<BinaryOperation>(Expr, EvalBinop, std::move(LeftOp), 687 std::move(*RightOpResult)); 688 } 689 690 Expected<std::unique_ptr<ExpressionAST>> 691 Pattern::parseCallExpr(StringRef &Expr, StringRef FuncName, 692 Optional<size_t> LineNumber, 693 FileCheckPatternContext *Context, const SourceMgr &SM) { 694 Expr = Expr.ltrim(SpaceChars); 695 assert(Expr.startswith("(")); 696 697 auto OptFunc = StringSwitch<Optional<binop_eval_t>>(FuncName) 698 .Case("add", operator+) 699 .Case("div", operator/) 700 .Case("max", max) 701 .Case("min", min) 702 .Case("mul", operator*) 703 .Case("sub", operator-) 704 .Default(None); 705 706 if (!OptFunc) 707 return ErrorDiagnostic::get( 708 SM, FuncName, Twine("call to undefined function '") + FuncName + "'"); 709 710 Expr.consume_front("("); 711 Expr = Expr.ltrim(SpaceChars); 712 713 // Parse call arguments, which are comma separated. 714 SmallVector<std::unique_ptr<ExpressionAST>, 4> Args; 715 while (!Expr.empty() && !Expr.startswith(")")) { 716 if (Expr.startswith(",")) 717 return ErrorDiagnostic::get(SM, Expr, "missing argument"); 718 719 // Parse the argument, which is an arbitary expression. 720 StringRef OuterBinOpExpr = Expr; 721 Expected<std::unique_ptr<ExpressionAST>> Arg = parseNumericOperand( 722 Expr, AllowedOperand::Any, /*MaybeInvalidConstraint=*/false, LineNumber, 723 Context, SM); 724 while (Arg && !Expr.empty()) { 725 Expr = Expr.ltrim(SpaceChars); 726 // Have we reached an argument terminator? 727 if (Expr.startswith(",") || Expr.startswith(")")) 728 break; 729 730 // Arg = Arg <op> <expr> 731 Arg = parseBinop(OuterBinOpExpr, Expr, std::move(*Arg), false, LineNumber, 732 Context, SM); 733 } 734 735 // Prefer an expression error over a generic invalid argument message. 736 if (!Arg) 737 return Arg.takeError(); 738 Args.push_back(std::move(*Arg)); 739 740 // Have we parsed all available arguments? 741 Expr = Expr.ltrim(SpaceChars); 742 if (!Expr.consume_front(",")) 743 break; 744 745 Expr = Expr.ltrim(SpaceChars); 746 if (Expr.startswith(")")) 747 return ErrorDiagnostic::get(SM, Expr, "missing argument"); 748 } 749 750 if (!Expr.consume_front(")")) 751 return ErrorDiagnostic::get(SM, Expr, 752 "missing ')' at end of call expression"); 753 754 const unsigned NumArgs = Args.size(); 755 if (NumArgs == 2) 756 return std::make_unique<BinaryOperation>(Expr, *OptFunc, std::move(Args[0]), 757 std::move(Args[1])); 758 759 // TODO: Support more than binop_eval_t. 760 return ErrorDiagnostic::get(SM, FuncName, 761 Twine("function '") + FuncName + 762 Twine("' takes 2 arguments but ") + 763 Twine(NumArgs) + " given"); 764 } 765 766 Expected<std::unique_ptr<Expression>> Pattern::parseNumericSubstitutionBlock( 767 StringRef Expr, Optional<NumericVariable *> &DefinedNumericVariable, 768 bool IsLegacyLineExpr, Optional<size_t> LineNumber, 769 FileCheckPatternContext *Context, const SourceMgr &SM) { 770 std::unique_ptr<ExpressionAST> ExpressionASTPointer = nullptr; 771 StringRef DefExpr = StringRef(); 772 DefinedNumericVariable = None; 773 ExpressionFormat ExplicitFormat = ExpressionFormat(); 774 unsigned Precision = 0; 775 776 // Parse format specifier (NOTE: ',' is also an argument seperator). 777 size_t FormatSpecEnd = Expr.find(','); 778 size_t FunctionStart = Expr.find('('); 779 if (FormatSpecEnd != StringRef::npos && FormatSpecEnd < FunctionStart) { 780 StringRef FormatExpr = Expr.take_front(FormatSpecEnd); 781 Expr = Expr.drop_front(FormatSpecEnd + 1); 782 FormatExpr = FormatExpr.trim(SpaceChars); 783 if (!FormatExpr.consume_front("%")) 784 return ErrorDiagnostic::get( 785 SM, FormatExpr, 786 "invalid matching format specification in expression"); 787 788 // Parse alternate form flag. 789 SMLoc AlternateFormFlagLoc = SMLoc::getFromPointer(FormatExpr.data()); 790 bool AlternateForm = FormatExpr.consume_front("#"); 791 792 // Parse precision. 793 if (FormatExpr.consume_front(".")) { 794 if (FormatExpr.consumeInteger(10, Precision)) 795 return ErrorDiagnostic::get(SM, FormatExpr, 796 "invalid precision in format specifier"); 797 } 798 799 if (!FormatExpr.empty()) { 800 // Check for unknown matching format specifier and set matching format in 801 // class instance representing this expression. 802 SMLoc FmtLoc = SMLoc::getFromPointer(FormatExpr.data()); 803 switch (popFront(FormatExpr)) { 804 case 'u': 805 ExplicitFormat = 806 ExpressionFormat(ExpressionFormat::Kind::Unsigned, Precision); 807 break; 808 case 'd': 809 ExplicitFormat = 810 ExpressionFormat(ExpressionFormat::Kind::Signed, Precision); 811 break; 812 case 'x': 813 ExplicitFormat = ExpressionFormat(ExpressionFormat::Kind::HexLower, 814 Precision, AlternateForm); 815 break; 816 case 'X': 817 ExplicitFormat = ExpressionFormat(ExpressionFormat::Kind::HexUpper, 818 Precision, AlternateForm); 819 break; 820 default: 821 return ErrorDiagnostic::get(SM, FmtLoc, 822 "invalid format specifier in expression"); 823 } 824 } 825 826 if (AlternateForm && ExplicitFormat != ExpressionFormat::Kind::HexLower && 827 ExplicitFormat != ExpressionFormat::Kind::HexUpper) 828 return ErrorDiagnostic::get( 829 SM, AlternateFormFlagLoc, 830 "alternate form only supported for hex values"); 831 832 FormatExpr = FormatExpr.ltrim(SpaceChars); 833 if (!FormatExpr.empty()) 834 return ErrorDiagnostic::get( 835 SM, FormatExpr, 836 "invalid matching format specification in expression"); 837 } 838 839 // Save variable definition expression if any. 840 size_t DefEnd = Expr.find(':'); 841 if (DefEnd != StringRef::npos) { 842 DefExpr = Expr.substr(0, DefEnd); 843 Expr = Expr.substr(DefEnd + 1); 844 } 845 846 // Parse matching constraint. 847 Expr = Expr.ltrim(SpaceChars); 848 bool HasParsedValidConstraint = false; 849 if (Expr.consume_front("==")) 850 HasParsedValidConstraint = true; 851 852 // Parse the expression itself. 853 Expr = Expr.ltrim(SpaceChars); 854 if (Expr.empty()) { 855 if (HasParsedValidConstraint) 856 return ErrorDiagnostic::get( 857 SM, Expr, "empty numeric expression should not have a constraint"); 858 } else { 859 Expr = Expr.rtrim(SpaceChars); 860 StringRef OuterBinOpExpr = Expr; 861 // The first operand in a legacy @LINE expression is always the @LINE 862 // pseudo variable. 863 AllowedOperand AO = 864 IsLegacyLineExpr ? AllowedOperand::LineVar : AllowedOperand::Any; 865 Expected<std::unique_ptr<ExpressionAST>> ParseResult = parseNumericOperand( 866 Expr, AO, !HasParsedValidConstraint, LineNumber, Context, SM); 867 while (ParseResult && !Expr.empty()) { 868 ParseResult = parseBinop(OuterBinOpExpr, Expr, std::move(*ParseResult), 869 IsLegacyLineExpr, LineNumber, Context, SM); 870 // Legacy @LINE expressions only allow 2 operands. 871 if (ParseResult && IsLegacyLineExpr && !Expr.empty()) 872 return ErrorDiagnostic::get( 873 SM, Expr, 874 "unexpected characters at end of expression '" + Expr + "'"); 875 } 876 if (!ParseResult) 877 return ParseResult.takeError(); 878 ExpressionASTPointer = std::move(*ParseResult); 879 } 880 881 // Select format of the expression, i.e. (i) its explicit format, if any, 882 // otherwise (ii) its implicit format, if any, otherwise (iii) the default 883 // format (unsigned). Error out in case of conflicting implicit format 884 // without explicit format. 885 ExpressionFormat Format; 886 if (ExplicitFormat) 887 Format = ExplicitFormat; 888 else if (ExpressionASTPointer) { 889 Expected<ExpressionFormat> ImplicitFormat = 890 ExpressionASTPointer->getImplicitFormat(SM); 891 if (!ImplicitFormat) 892 return ImplicitFormat.takeError(); 893 Format = *ImplicitFormat; 894 } 895 if (!Format) 896 Format = ExpressionFormat(ExpressionFormat::Kind::Unsigned, Precision); 897 898 std::unique_ptr<Expression> ExpressionPointer = 899 std::make_unique<Expression>(std::move(ExpressionASTPointer), Format); 900 901 // Parse the numeric variable definition. 902 if (DefEnd != StringRef::npos) { 903 DefExpr = DefExpr.ltrim(SpaceChars); 904 Expected<NumericVariable *> ParseResult = parseNumericVariableDefinition( 905 DefExpr, Context, LineNumber, ExpressionPointer->getFormat(), SM); 906 907 if (!ParseResult) 908 return ParseResult.takeError(); 909 DefinedNumericVariable = *ParseResult; 910 } 911 912 return std::move(ExpressionPointer); 913 } 914 915 bool Pattern::parsePattern(StringRef PatternStr, StringRef Prefix, 916 SourceMgr &SM, const FileCheckRequest &Req) { 917 bool MatchFullLinesHere = Req.MatchFullLines && CheckTy != Check::CheckNot; 918 IgnoreCase = Req.IgnoreCase; 919 920 PatternLoc = SMLoc::getFromPointer(PatternStr.data()); 921 922 if (!(Req.NoCanonicalizeWhiteSpace && Req.MatchFullLines)) 923 // Ignore trailing whitespace. 924 while (!PatternStr.empty() && 925 (PatternStr.back() == ' ' || PatternStr.back() == '\t')) 926 PatternStr = PatternStr.substr(0, PatternStr.size() - 1); 927 928 // Check that there is something on the line. 929 if (PatternStr.empty() && CheckTy != Check::CheckEmpty) { 930 SM.PrintMessage(PatternLoc, SourceMgr::DK_Error, 931 "found empty check string with prefix '" + Prefix + ":'"); 932 return true; 933 } 934 935 if (!PatternStr.empty() && CheckTy == Check::CheckEmpty) { 936 SM.PrintMessage( 937 PatternLoc, SourceMgr::DK_Error, 938 "found non-empty check string for empty check with prefix '" + Prefix + 939 ":'"); 940 return true; 941 } 942 943 if (CheckTy == Check::CheckEmpty) { 944 RegExStr = "(\n$)"; 945 return false; 946 } 947 948 // If literal check, set fixed string. 949 if (CheckTy.isLiteralMatch()) { 950 FixedStr = PatternStr; 951 return false; 952 } 953 954 // Check to see if this is a fixed string, or if it has regex pieces. 955 if (!MatchFullLinesHere && 956 (PatternStr.size() < 2 || (PatternStr.find("{{") == StringRef::npos && 957 PatternStr.find("[[") == StringRef::npos))) { 958 FixedStr = PatternStr; 959 return false; 960 } 961 962 if (MatchFullLinesHere) { 963 RegExStr += '^'; 964 if (!Req.NoCanonicalizeWhiteSpace) 965 RegExStr += " *"; 966 } 967 968 // Paren value #0 is for the fully matched string. Any new parenthesized 969 // values add from there. 970 unsigned CurParen = 1; 971 972 // Otherwise, there is at least one regex piece. Build up the regex pattern 973 // by escaping scary characters in fixed strings, building up one big regex. 974 while (!PatternStr.empty()) { 975 // RegEx matches. 976 if (PatternStr.startswith("{{")) { 977 // This is the start of a regex match. Scan for the }}. 978 size_t End = PatternStr.find("}}"); 979 if (End == StringRef::npos) { 980 SM.PrintMessage(SMLoc::getFromPointer(PatternStr.data()), 981 SourceMgr::DK_Error, 982 "found start of regex string with no end '}}'"); 983 return true; 984 } 985 986 // Enclose {{}} patterns in parens just like [[]] even though we're not 987 // capturing the result for any purpose. This is required in case the 988 // expression contains an alternation like: CHECK: abc{{x|z}}def. We 989 // want this to turn into: "abc(x|z)def" not "abcx|zdef". 990 RegExStr += '('; 991 ++CurParen; 992 993 if (AddRegExToRegEx(PatternStr.substr(2, End - 2), CurParen, SM)) 994 return true; 995 RegExStr += ')'; 996 997 PatternStr = PatternStr.substr(End + 2); 998 continue; 999 } 1000 1001 // String and numeric substitution blocks. Pattern substitution blocks come 1002 // in two forms: [[foo:.*]] and [[foo]]. The former matches .* (or some 1003 // other regex) and assigns it to the string variable 'foo'. The latter 1004 // substitutes foo's value. Numeric substitution blocks recognize the same 1005 // form as string ones, but start with a '#' sign after the double 1006 // brackets. They also accept a combined form which sets a numeric variable 1007 // to the evaluation of an expression. Both string and numeric variable 1008 // names must satisfy the regular expression "[a-zA-Z_][0-9a-zA-Z_]*" to be 1009 // valid, as this helps catch some common errors. 1010 if (PatternStr.startswith("[[")) { 1011 StringRef UnparsedPatternStr = PatternStr.substr(2); 1012 // Find the closing bracket pair ending the match. End is going to be an 1013 // offset relative to the beginning of the match string. 1014 size_t End = FindRegexVarEnd(UnparsedPatternStr, SM); 1015 StringRef MatchStr = UnparsedPatternStr.substr(0, End); 1016 bool IsNumBlock = MatchStr.consume_front("#"); 1017 1018 if (End == StringRef::npos) { 1019 SM.PrintMessage(SMLoc::getFromPointer(PatternStr.data()), 1020 SourceMgr::DK_Error, 1021 "Invalid substitution block, no ]] found"); 1022 return true; 1023 } 1024 // Strip the substitution block we are parsing. End points to the start 1025 // of the "]]" closing the expression so account for it in computing the 1026 // index of the first unparsed character. 1027 PatternStr = UnparsedPatternStr.substr(End + 2); 1028 1029 bool IsDefinition = false; 1030 bool SubstNeeded = false; 1031 // Whether the substitution block is a legacy use of @LINE with string 1032 // substitution block syntax. 1033 bool IsLegacyLineExpr = false; 1034 StringRef DefName; 1035 StringRef SubstStr; 1036 std::string MatchRegexp; 1037 size_t SubstInsertIdx = RegExStr.size(); 1038 1039 // Parse string variable or legacy @LINE expression. 1040 if (!IsNumBlock) { 1041 size_t VarEndIdx = MatchStr.find(':'); 1042 size_t SpacePos = MatchStr.substr(0, VarEndIdx).find_first_of(" \t"); 1043 if (SpacePos != StringRef::npos) { 1044 SM.PrintMessage(SMLoc::getFromPointer(MatchStr.data() + SpacePos), 1045 SourceMgr::DK_Error, "unexpected whitespace"); 1046 return true; 1047 } 1048 1049 // Get the name (e.g. "foo") and verify it is well formed. 1050 StringRef OrigMatchStr = MatchStr; 1051 Expected<Pattern::VariableProperties> ParseVarResult = 1052 parseVariable(MatchStr, SM); 1053 if (!ParseVarResult) { 1054 logAllUnhandledErrors(ParseVarResult.takeError(), errs()); 1055 return true; 1056 } 1057 StringRef Name = ParseVarResult->Name; 1058 bool IsPseudo = ParseVarResult->IsPseudo; 1059 1060 IsDefinition = (VarEndIdx != StringRef::npos); 1061 SubstNeeded = !IsDefinition; 1062 if (IsDefinition) { 1063 if ((IsPseudo || !MatchStr.consume_front(":"))) { 1064 SM.PrintMessage(SMLoc::getFromPointer(Name.data()), 1065 SourceMgr::DK_Error, 1066 "invalid name in string variable definition"); 1067 return true; 1068 } 1069 1070 // Detect collisions between string and numeric variables when the 1071 // former is created later than the latter. 1072 if (Context->GlobalNumericVariableTable.find(Name) != 1073 Context->GlobalNumericVariableTable.end()) { 1074 SM.PrintMessage( 1075 SMLoc::getFromPointer(Name.data()), SourceMgr::DK_Error, 1076 "numeric variable with name '" + Name + "' already exists"); 1077 return true; 1078 } 1079 DefName = Name; 1080 MatchRegexp = MatchStr.str(); 1081 } else { 1082 if (IsPseudo) { 1083 MatchStr = OrigMatchStr; 1084 IsLegacyLineExpr = IsNumBlock = true; 1085 } else 1086 SubstStr = Name; 1087 } 1088 } 1089 1090 // Parse numeric substitution block. 1091 std::unique_ptr<Expression> ExpressionPointer; 1092 Optional<NumericVariable *> DefinedNumericVariable; 1093 if (IsNumBlock) { 1094 Expected<std::unique_ptr<Expression>> ParseResult = 1095 parseNumericSubstitutionBlock(MatchStr, DefinedNumericVariable, 1096 IsLegacyLineExpr, LineNumber, Context, 1097 SM); 1098 if (!ParseResult) { 1099 logAllUnhandledErrors(ParseResult.takeError(), errs()); 1100 return true; 1101 } 1102 ExpressionPointer = std::move(*ParseResult); 1103 SubstNeeded = ExpressionPointer->getAST() != nullptr; 1104 if (DefinedNumericVariable) { 1105 IsDefinition = true; 1106 DefName = (*DefinedNumericVariable)->getName(); 1107 } 1108 if (SubstNeeded) 1109 SubstStr = MatchStr; 1110 else { 1111 ExpressionFormat Format = ExpressionPointer->getFormat(); 1112 MatchRegexp = cantFail(Format.getWildcardRegex()); 1113 } 1114 } 1115 1116 // Handle variable definition: [[<def>:(...)]] and [[#(...)<def>:(...)]]. 1117 if (IsDefinition) { 1118 RegExStr += '('; 1119 ++SubstInsertIdx; 1120 1121 if (IsNumBlock) { 1122 NumericVariableMatch NumericVariableDefinition = { 1123 *DefinedNumericVariable, CurParen}; 1124 NumericVariableDefs[DefName] = NumericVariableDefinition; 1125 // This store is done here rather than in match() to allow 1126 // parseNumericVariableUse() to get the pointer to the class instance 1127 // of the right variable definition corresponding to a given numeric 1128 // variable use. 1129 Context->GlobalNumericVariableTable[DefName] = 1130 *DefinedNumericVariable; 1131 } else { 1132 VariableDefs[DefName] = CurParen; 1133 // Mark string variable as defined to detect collisions between 1134 // string and numeric variables in parseNumericVariableUse() and 1135 // defineCmdlineVariables() when the latter is created later than the 1136 // former. We cannot reuse GlobalVariableTable for this by populating 1137 // it with an empty string since we would then lose the ability to 1138 // detect the use of an undefined variable in match(). 1139 Context->DefinedVariableTable[DefName] = true; 1140 } 1141 1142 ++CurParen; 1143 } 1144 1145 if (!MatchRegexp.empty() && AddRegExToRegEx(MatchRegexp, CurParen, SM)) 1146 return true; 1147 1148 if (IsDefinition) 1149 RegExStr += ')'; 1150 1151 // Handle substitutions: [[foo]] and [[#<foo expr>]]. 1152 if (SubstNeeded) { 1153 // Handle substitution of string variables that were defined earlier on 1154 // the same line by emitting a backreference. Expressions do not 1155 // support substituting a numeric variable defined on the same line. 1156 if (!IsNumBlock && VariableDefs.find(SubstStr) != VariableDefs.end()) { 1157 unsigned CaptureParenGroup = VariableDefs[SubstStr]; 1158 if (CaptureParenGroup < 1 || CaptureParenGroup > 9) { 1159 SM.PrintMessage(SMLoc::getFromPointer(SubstStr.data()), 1160 SourceMgr::DK_Error, 1161 "Can't back-reference more than 9 variables"); 1162 return true; 1163 } 1164 AddBackrefToRegEx(CaptureParenGroup); 1165 } else { 1166 // Handle substitution of string variables ([[<var>]]) defined in 1167 // previous CHECK patterns, and substitution of expressions. 1168 Substitution *Substitution = 1169 IsNumBlock 1170 ? Context->makeNumericSubstitution( 1171 SubstStr, std::move(ExpressionPointer), SubstInsertIdx) 1172 : Context->makeStringSubstitution(SubstStr, SubstInsertIdx); 1173 Substitutions.push_back(Substitution); 1174 } 1175 } 1176 } 1177 1178 // Handle fixed string matches. 1179 // Find the end, which is the start of the next regex. 1180 size_t FixedMatchEnd = PatternStr.find("{{"); 1181 FixedMatchEnd = std::min(FixedMatchEnd, PatternStr.find("[[")); 1182 RegExStr += Regex::escape(PatternStr.substr(0, FixedMatchEnd)); 1183 PatternStr = PatternStr.substr(FixedMatchEnd); 1184 } 1185 1186 if (MatchFullLinesHere) { 1187 if (!Req.NoCanonicalizeWhiteSpace) 1188 RegExStr += " *"; 1189 RegExStr += '$'; 1190 } 1191 1192 return false; 1193 } 1194 1195 bool Pattern::AddRegExToRegEx(StringRef RS, unsigned &CurParen, SourceMgr &SM) { 1196 Regex R(RS); 1197 std::string Error; 1198 if (!R.isValid(Error)) { 1199 SM.PrintMessage(SMLoc::getFromPointer(RS.data()), SourceMgr::DK_Error, 1200 "invalid regex: " + Error); 1201 return true; 1202 } 1203 1204 RegExStr += RS.str(); 1205 CurParen += R.getNumMatches(); 1206 return false; 1207 } 1208 1209 void Pattern::AddBackrefToRegEx(unsigned BackrefNum) { 1210 assert(BackrefNum >= 1 && BackrefNum <= 9 && "Invalid backref number"); 1211 std::string Backref = std::string("\\") + std::string(1, '0' + BackrefNum); 1212 RegExStr += Backref; 1213 } 1214 1215 Expected<size_t> Pattern::match(StringRef Buffer, size_t &MatchLen, 1216 const SourceMgr &SM) const { 1217 // If this is the EOF pattern, match it immediately. 1218 if (CheckTy == Check::CheckEOF) { 1219 MatchLen = 0; 1220 return Buffer.size(); 1221 } 1222 1223 // If this is a fixed string pattern, just match it now. 1224 if (!FixedStr.empty()) { 1225 MatchLen = FixedStr.size(); 1226 size_t Pos = 1227 IgnoreCase ? Buffer.find_lower(FixedStr) : Buffer.find(FixedStr); 1228 if (Pos == StringRef::npos) 1229 return make_error<NotFoundError>(); 1230 return Pos; 1231 } 1232 1233 // Regex match. 1234 1235 // If there are substitutions, we need to create a temporary string with the 1236 // actual value. 1237 StringRef RegExToMatch = RegExStr; 1238 std::string TmpStr; 1239 if (!Substitutions.empty()) { 1240 TmpStr = RegExStr; 1241 if (LineNumber) 1242 Context->LineVariable->setValue(ExpressionValue(*LineNumber)); 1243 1244 size_t InsertOffset = 0; 1245 // Substitute all string variables and expressions whose values are only 1246 // now known. Use of string variables defined on the same line are handled 1247 // by back-references. 1248 for (const auto &Substitution : Substitutions) { 1249 // Substitute and check for failure (e.g. use of undefined variable). 1250 Expected<std::string> Value = Substitution->getResult(); 1251 if (!Value) { 1252 // Convert to an ErrorDiagnostic to get location information. This is 1253 // done here rather than PrintNoMatch since now we know which 1254 // substitution block caused the overflow. 1255 Error Err = 1256 handleErrors(Value.takeError(), [&](const OverflowError &E) { 1257 return ErrorDiagnostic::get(SM, Substitution->getFromString(), 1258 "unable to substitute variable or " 1259 "numeric expression: overflow error"); 1260 }); 1261 return std::move(Err); 1262 } 1263 1264 // Plop it into the regex at the adjusted offset. 1265 TmpStr.insert(TmpStr.begin() + Substitution->getIndex() + InsertOffset, 1266 Value->begin(), Value->end()); 1267 InsertOffset += Value->size(); 1268 } 1269 1270 // Match the newly constructed regex. 1271 RegExToMatch = TmpStr; 1272 } 1273 1274 SmallVector<StringRef, 4> MatchInfo; 1275 unsigned int Flags = Regex::Newline; 1276 if (IgnoreCase) 1277 Flags |= Regex::IgnoreCase; 1278 if (!Regex(RegExToMatch, Flags).match(Buffer, &MatchInfo)) 1279 return make_error<NotFoundError>(); 1280 1281 // Successful regex match. 1282 assert(!MatchInfo.empty() && "Didn't get any match"); 1283 StringRef FullMatch = MatchInfo[0]; 1284 1285 // If this defines any string variables, remember their values. 1286 for (const auto &VariableDef : VariableDefs) { 1287 assert(VariableDef.second < MatchInfo.size() && "Internal paren error"); 1288 Context->GlobalVariableTable[VariableDef.first] = 1289 MatchInfo[VariableDef.second]; 1290 } 1291 1292 // If this defines any numeric variables, remember their values. 1293 for (const auto &NumericVariableDef : NumericVariableDefs) { 1294 const NumericVariableMatch &NumericVariableMatch = 1295 NumericVariableDef.getValue(); 1296 unsigned CaptureParenGroup = NumericVariableMatch.CaptureParenGroup; 1297 assert(CaptureParenGroup < MatchInfo.size() && "Internal paren error"); 1298 NumericVariable *DefinedNumericVariable = 1299 NumericVariableMatch.DefinedNumericVariable; 1300 1301 StringRef MatchedValue = MatchInfo[CaptureParenGroup]; 1302 ExpressionFormat Format = DefinedNumericVariable->getImplicitFormat(); 1303 Expected<ExpressionValue> Value = 1304 Format.valueFromStringRepr(MatchedValue, SM); 1305 if (!Value) 1306 return Value.takeError(); 1307 DefinedNumericVariable->setValue(*Value, MatchedValue); 1308 } 1309 1310 // Like CHECK-NEXT, CHECK-EMPTY's match range is considered to start after 1311 // the required preceding newline, which is consumed by the pattern in the 1312 // case of CHECK-EMPTY but not CHECK-NEXT. 1313 size_t MatchStartSkip = CheckTy == Check::CheckEmpty; 1314 MatchLen = FullMatch.size() - MatchStartSkip; 1315 return FullMatch.data() - Buffer.data() + MatchStartSkip; 1316 } 1317 1318 unsigned Pattern::computeMatchDistance(StringRef Buffer) const { 1319 // Just compute the number of matching characters. For regular expressions, we 1320 // just compare against the regex itself and hope for the best. 1321 // 1322 // FIXME: One easy improvement here is have the regex lib generate a single 1323 // example regular expression which matches, and use that as the example 1324 // string. 1325 StringRef ExampleString(FixedStr); 1326 if (ExampleString.empty()) 1327 ExampleString = RegExStr; 1328 1329 // Only compare up to the first line in the buffer, or the string size. 1330 StringRef BufferPrefix = Buffer.substr(0, ExampleString.size()); 1331 BufferPrefix = BufferPrefix.split('\n').first; 1332 return BufferPrefix.edit_distance(ExampleString); 1333 } 1334 1335 void Pattern::printSubstitutions(const SourceMgr &SM, StringRef Buffer, 1336 SMRange Range, 1337 FileCheckDiag::MatchType MatchTy, 1338 std::vector<FileCheckDiag> *Diags) const { 1339 // Print what we know about substitutions. 1340 if (!Substitutions.empty()) { 1341 for (const auto &Substitution : Substitutions) { 1342 SmallString<256> Msg; 1343 raw_svector_ostream OS(Msg); 1344 Expected<std::string> MatchedValue = Substitution->getResult(); 1345 1346 // Substitution failed or is not known at match time, print the undefined 1347 // variables it uses. 1348 if (!MatchedValue) { 1349 bool UndefSeen = false; 1350 handleAllErrors( 1351 MatchedValue.takeError(), [](const NotFoundError &E) {}, 1352 // Handled in PrintNoMatch(). 1353 [](const ErrorDiagnostic &E) {}, 1354 // Handled in match(). 1355 [](const OverflowError &E) {}, 1356 [&](const UndefVarError &E) { 1357 if (!UndefSeen) { 1358 OS << "uses undefined variable(s):"; 1359 UndefSeen = true; 1360 } 1361 OS << " "; 1362 E.log(OS); 1363 }); 1364 } else { 1365 // Substitution succeeded. Print substituted value. 1366 OS << "with \""; 1367 OS.write_escaped(Substitution->getFromString()) << "\" equal to \""; 1368 OS.write_escaped(*MatchedValue) << "\""; 1369 } 1370 1371 // We report only the start of the match/search range to suggest we are 1372 // reporting the substitutions as set at the start of the match/search. 1373 // Indicating a non-zero-length range might instead seem to imply that the 1374 // substitution matches or was captured from exactly that range. 1375 if (Diags) 1376 Diags->emplace_back(SM, CheckTy, getLoc(), MatchTy, 1377 SMRange(Range.Start, Range.Start), OS.str()); 1378 else 1379 SM.PrintMessage(Range.Start, SourceMgr::DK_Note, OS.str()); 1380 } 1381 } 1382 } 1383 1384 void Pattern::printVariableDefs(const SourceMgr &SM, 1385 FileCheckDiag::MatchType MatchTy, 1386 std::vector<FileCheckDiag> *Diags) const { 1387 if (VariableDefs.empty() && NumericVariableDefs.empty()) 1388 return; 1389 // Build list of variable captures. 1390 struct VarCapture { 1391 StringRef Name; 1392 SMRange Range; 1393 }; 1394 SmallVector<VarCapture, 2> VarCaptures; 1395 for (const auto &VariableDef : VariableDefs) { 1396 VarCapture VC; 1397 VC.Name = VariableDef.first; 1398 StringRef Value = Context->GlobalVariableTable[VC.Name]; 1399 SMLoc Start = SMLoc::getFromPointer(Value.data()); 1400 SMLoc End = SMLoc::getFromPointer(Value.data() + Value.size()); 1401 VC.Range = SMRange(Start, End); 1402 VarCaptures.push_back(VC); 1403 } 1404 for (const auto &VariableDef : NumericVariableDefs) { 1405 VarCapture VC; 1406 VC.Name = VariableDef.getKey(); 1407 StringRef StrValue = VariableDef.getValue() 1408 .DefinedNumericVariable->getStringValue() 1409 .getValue(); 1410 SMLoc Start = SMLoc::getFromPointer(StrValue.data()); 1411 SMLoc End = SMLoc::getFromPointer(StrValue.data() + StrValue.size()); 1412 VC.Range = SMRange(Start, End); 1413 VarCaptures.push_back(VC); 1414 } 1415 // Sort variable captures by the order in which they matched the input. 1416 // Ranges shouldn't be overlapping, so we can just compare the start. 1417 llvm::sort(VarCaptures, [](const VarCapture &A, const VarCapture &B) { 1418 assert(A.Range.Start != B.Range.Start && 1419 "unexpected overlapping variable captures"); 1420 return A.Range.Start.getPointer() < B.Range.Start.getPointer(); 1421 }); 1422 // Create notes for the sorted captures. 1423 for (const VarCapture &VC : VarCaptures) { 1424 SmallString<256> Msg; 1425 raw_svector_ostream OS(Msg); 1426 OS << "captured var \"" << VC.Name << "\""; 1427 if (Diags) 1428 Diags->emplace_back(SM, CheckTy, getLoc(), MatchTy, VC.Range, OS.str()); 1429 else 1430 SM.PrintMessage(VC.Range.Start, SourceMgr::DK_Note, OS.str(), VC.Range); 1431 } 1432 } 1433 1434 static SMRange ProcessMatchResult(FileCheckDiag::MatchType MatchTy, 1435 const SourceMgr &SM, SMLoc Loc, 1436 Check::FileCheckType CheckTy, 1437 StringRef Buffer, size_t Pos, size_t Len, 1438 std::vector<FileCheckDiag> *Diags, 1439 bool AdjustPrevDiags = false) { 1440 SMLoc Start = SMLoc::getFromPointer(Buffer.data() + Pos); 1441 SMLoc End = SMLoc::getFromPointer(Buffer.data() + Pos + Len); 1442 SMRange Range(Start, End); 1443 if (Diags) { 1444 if (AdjustPrevDiags) { 1445 SMLoc CheckLoc = Diags->rbegin()->CheckLoc; 1446 for (auto I = Diags->rbegin(), E = Diags->rend(); 1447 I != E && I->CheckLoc == CheckLoc; ++I) 1448 I->MatchTy = MatchTy; 1449 } else 1450 Diags->emplace_back(SM, CheckTy, Loc, MatchTy, Range); 1451 } 1452 return Range; 1453 } 1454 1455 void Pattern::printFuzzyMatch(const SourceMgr &SM, StringRef Buffer, 1456 std::vector<FileCheckDiag> *Diags) const { 1457 // Attempt to find the closest/best fuzzy match. Usually an error happens 1458 // because some string in the output didn't exactly match. In these cases, we 1459 // would like to show the user a best guess at what "should have" matched, to 1460 // save them having to actually check the input manually. 1461 size_t NumLinesForward = 0; 1462 size_t Best = StringRef::npos; 1463 double BestQuality = 0; 1464 1465 // Use an arbitrary 4k limit on how far we will search. 1466 for (size_t i = 0, e = std::min(size_t(4096), Buffer.size()); i != e; ++i) { 1467 if (Buffer[i] == '\n') 1468 ++NumLinesForward; 1469 1470 // Patterns have leading whitespace stripped, so skip whitespace when 1471 // looking for something which looks like a pattern. 1472 if (Buffer[i] == ' ' || Buffer[i] == '\t') 1473 continue; 1474 1475 // Compute the "quality" of this match as an arbitrary combination of the 1476 // match distance and the number of lines skipped to get to this match. 1477 unsigned Distance = computeMatchDistance(Buffer.substr(i)); 1478 double Quality = Distance + (NumLinesForward / 100.); 1479 1480 if (Quality < BestQuality || Best == StringRef::npos) { 1481 Best = i; 1482 BestQuality = Quality; 1483 } 1484 } 1485 1486 // Print the "possible intended match here" line if we found something 1487 // reasonable and not equal to what we showed in the "scanning from here" 1488 // line. 1489 if (Best && Best != StringRef::npos && BestQuality < 50) { 1490 SMRange MatchRange = 1491 ProcessMatchResult(FileCheckDiag::MatchFuzzy, SM, getLoc(), 1492 getCheckTy(), Buffer, Best, 0, Diags); 1493 SM.PrintMessage(MatchRange.Start, SourceMgr::DK_Note, 1494 "possible intended match here"); 1495 1496 // FIXME: If we wanted to be really friendly we would show why the match 1497 // failed, as it can be hard to spot simple one character differences. 1498 } 1499 } 1500 1501 Expected<StringRef> 1502 FileCheckPatternContext::getPatternVarValue(StringRef VarName) { 1503 auto VarIter = GlobalVariableTable.find(VarName); 1504 if (VarIter == GlobalVariableTable.end()) 1505 return make_error<UndefVarError>(VarName); 1506 1507 return VarIter->second; 1508 } 1509 1510 template <class... Types> 1511 NumericVariable *FileCheckPatternContext::makeNumericVariable(Types... args) { 1512 NumericVariables.push_back(std::make_unique<NumericVariable>(args...)); 1513 return NumericVariables.back().get(); 1514 } 1515 1516 Substitution * 1517 FileCheckPatternContext::makeStringSubstitution(StringRef VarName, 1518 size_t InsertIdx) { 1519 Substitutions.push_back( 1520 std::make_unique<StringSubstitution>(this, VarName, InsertIdx)); 1521 return Substitutions.back().get(); 1522 } 1523 1524 Substitution *FileCheckPatternContext::makeNumericSubstitution( 1525 StringRef ExpressionStr, std::unique_ptr<Expression> Expression, 1526 size_t InsertIdx) { 1527 Substitutions.push_back(std::make_unique<NumericSubstitution>( 1528 this, ExpressionStr, std::move(Expression), InsertIdx)); 1529 return Substitutions.back().get(); 1530 } 1531 1532 size_t Pattern::FindRegexVarEnd(StringRef Str, SourceMgr &SM) { 1533 // Offset keeps track of the current offset within the input Str 1534 size_t Offset = 0; 1535 // [...] Nesting depth 1536 size_t BracketDepth = 0; 1537 1538 while (!Str.empty()) { 1539 if (Str.startswith("]]") && BracketDepth == 0) 1540 return Offset; 1541 if (Str[0] == '\\') { 1542 // Backslash escapes the next char within regexes, so skip them both. 1543 Str = Str.substr(2); 1544 Offset += 2; 1545 } else { 1546 switch (Str[0]) { 1547 default: 1548 break; 1549 case '[': 1550 BracketDepth++; 1551 break; 1552 case ']': 1553 if (BracketDepth == 0) { 1554 SM.PrintMessage(SMLoc::getFromPointer(Str.data()), 1555 SourceMgr::DK_Error, 1556 "missing closing \"]\" for regex variable"); 1557 exit(1); 1558 } 1559 BracketDepth--; 1560 break; 1561 } 1562 Str = Str.substr(1); 1563 Offset++; 1564 } 1565 } 1566 1567 return StringRef::npos; 1568 } 1569 1570 StringRef FileCheck::CanonicalizeFile(MemoryBuffer &MB, 1571 SmallVectorImpl<char> &OutputBuffer) { 1572 OutputBuffer.reserve(MB.getBufferSize()); 1573 1574 for (const char *Ptr = MB.getBufferStart(), *End = MB.getBufferEnd(); 1575 Ptr != End; ++Ptr) { 1576 // Eliminate trailing dosish \r. 1577 if (Ptr <= End - 2 && Ptr[0] == '\r' && Ptr[1] == '\n') { 1578 continue; 1579 } 1580 1581 // If current char is not a horizontal whitespace or if horizontal 1582 // whitespace canonicalization is disabled, dump it to output as is. 1583 if (Req.NoCanonicalizeWhiteSpace || (*Ptr != ' ' && *Ptr != '\t')) { 1584 OutputBuffer.push_back(*Ptr); 1585 continue; 1586 } 1587 1588 // Otherwise, add one space and advance over neighboring space. 1589 OutputBuffer.push_back(' '); 1590 while (Ptr + 1 != End && (Ptr[1] == ' ' || Ptr[1] == '\t')) 1591 ++Ptr; 1592 } 1593 1594 // Add a null byte and then return all but that byte. 1595 OutputBuffer.push_back('\0'); 1596 return StringRef(OutputBuffer.data(), OutputBuffer.size() - 1); 1597 } 1598 1599 FileCheckDiag::FileCheckDiag(const SourceMgr &SM, 1600 const Check::FileCheckType &CheckTy, 1601 SMLoc CheckLoc, MatchType MatchTy, 1602 SMRange InputRange, StringRef Note) 1603 : CheckTy(CheckTy), CheckLoc(CheckLoc), MatchTy(MatchTy), Note(Note) { 1604 auto Start = SM.getLineAndColumn(InputRange.Start); 1605 auto End = SM.getLineAndColumn(InputRange.End); 1606 InputStartLine = Start.first; 1607 InputStartCol = Start.second; 1608 InputEndLine = End.first; 1609 InputEndCol = End.second; 1610 } 1611 1612 static bool IsPartOfWord(char c) { 1613 return (isAlnum(c) || c == '-' || c == '_'); 1614 } 1615 1616 Check::FileCheckType &Check::FileCheckType::setCount(int C) { 1617 assert(Count > 0 && "zero and negative counts are not supported"); 1618 assert((C == 1 || Kind == CheckPlain) && 1619 "count supported only for plain CHECK directives"); 1620 Count = C; 1621 return *this; 1622 } 1623 1624 std::string Check::FileCheckType::getModifiersDescription() const { 1625 if (Modifiers.none()) 1626 return ""; 1627 std::string Ret; 1628 raw_string_ostream OS(Ret); 1629 OS << '{'; 1630 if (isLiteralMatch()) 1631 OS << "LITERAL"; 1632 OS << '}'; 1633 return OS.str(); 1634 } 1635 1636 std::string Check::FileCheckType::getDescription(StringRef Prefix) const { 1637 // Append directive modifiers. 1638 auto WithModifiers = [this, Prefix](StringRef Str) -> std::string { 1639 return (Prefix + Str + getModifiersDescription()).str(); 1640 }; 1641 1642 switch (Kind) { 1643 case Check::CheckNone: 1644 return "invalid"; 1645 case Check::CheckPlain: 1646 if (Count > 1) 1647 return WithModifiers("-COUNT"); 1648 return WithModifiers(""); 1649 case Check::CheckNext: 1650 return WithModifiers("-NEXT"); 1651 case Check::CheckSame: 1652 return WithModifiers("-SAME"); 1653 case Check::CheckNot: 1654 return WithModifiers("-NOT"); 1655 case Check::CheckDAG: 1656 return WithModifiers("-DAG"); 1657 case Check::CheckLabel: 1658 return WithModifiers("-LABEL"); 1659 case Check::CheckEmpty: 1660 return WithModifiers("-EMPTY"); 1661 case Check::CheckComment: 1662 return std::string(Prefix); 1663 case Check::CheckEOF: 1664 return "implicit EOF"; 1665 case Check::CheckBadNot: 1666 return "bad NOT"; 1667 case Check::CheckBadCount: 1668 return "bad COUNT"; 1669 } 1670 llvm_unreachable("unknown FileCheckType"); 1671 } 1672 1673 static std::pair<Check::FileCheckType, StringRef> 1674 FindCheckType(const FileCheckRequest &Req, StringRef Buffer, StringRef Prefix) { 1675 if (Buffer.size() <= Prefix.size()) 1676 return {Check::CheckNone, StringRef()}; 1677 1678 StringRef Rest = Buffer.drop_front(Prefix.size()); 1679 // Check for comment. 1680 if (llvm::is_contained(Req.CommentPrefixes, Prefix)) { 1681 if (Rest.consume_front(":")) 1682 return {Check::CheckComment, Rest}; 1683 // Ignore a comment prefix if it has a suffix like "-NOT". 1684 return {Check::CheckNone, StringRef()}; 1685 } 1686 1687 auto ConsumeModifiers = [&](Check::FileCheckType Ret) 1688 -> std::pair<Check::FileCheckType, StringRef> { 1689 if (Rest.consume_front(":")) 1690 return {Ret, Rest}; 1691 if (!Rest.consume_front("{")) 1692 return {Check::CheckNone, StringRef()}; 1693 1694 // Parse the modifiers, speparated by commas. 1695 do { 1696 // Allow whitespace in modifiers list. 1697 Rest = Rest.ltrim(); 1698 if (Rest.consume_front("LITERAL")) 1699 Ret.setLiteralMatch(); 1700 else 1701 return {Check::CheckNone, Rest}; 1702 // Allow whitespace in modifiers list. 1703 Rest = Rest.ltrim(); 1704 } while (Rest.consume_front(",")); 1705 if (!Rest.consume_front("}:")) 1706 return {Check::CheckNone, Rest}; 1707 return {Ret, Rest}; 1708 }; 1709 1710 // Verify that the prefix is followed by directive modifiers or a colon. 1711 if (Rest.consume_front(":")) 1712 return {Check::CheckPlain, Rest}; 1713 if (Rest.front() == '{') 1714 return ConsumeModifiers(Check::CheckPlain); 1715 1716 if (!Rest.consume_front("-")) 1717 return {Check::CheckNone, StringRef()}; 1718 1719 if (Rest.consume_front("COUNT-")) { 1720 int64_t Count; 1721 if (Rest.consumeInteger(10, Count)) 1722 // Error happened in parsing integer. 1723 return {Check::CheckBadCount, Rest}; 1724 if (Count <= 0 || Count > INT32_MAX) 1725 return {Check::CheckBadCount, Rest}; 1726 if (Rest.front() != ':' && Rest.front() != '{') 1727 return {Check::CheckBadCount, Rest}; 1728 return ConsumeModifiers( 1729 Check::FileCheckType(Check::CheckPlain).setCount(Count)); 1730 } 1731 1732 // You can't combine -NOT with another suffix. 1733 if (Rest.startswith("DAG-NOT:") || Rest.startswith("NOT-DAG:") || 1734 Rest.startswith("NEXT-NOT:") || Rest.startswith("NOT-NEXT:") || 1735 Rest.startswith("SAME-NOT:") || Rest.startswith("NOT-SAME:") || 1736 Rest.startswith("EMPTY-NOT:") || Rest.startswith("NOT-EMPTY:")) 1737 return {Check::CheckBadNot, Rest}; 1738 1739 if (Rest.consume_front("NEXT")) 1740 return ConsumeModifiers(Check::CheckNext); 1741 1742 if (Rest.consume_front("SAME")) 1743 return ConsumeModifiers(Check::CheckSame); 1744 1745 if (Rest.consume_front("NOT")) 1746 return ConsumeModifiers(Check::CheckNot); 1747 1748 if (Rest.consume_front("DAG")) 1749 return ConsumeModifiers(Check::CheckDAG); 1750 1751 if (Rest.consume_front("LABEL")) 1752 return ConsumeModifiers(Check::CheckLabel); 1753 1754 if (Rest.consume_front("EMPTY")) 1755 return ConsumeModifiers(Check::CheckEmpty); 1756 1757 return {Check::CheckNone, Rest}; 1758 } 1759 1760 // From the given position, find the next character after the word. 1761 static size_t SkipWord(StringRef Str, size_t Loc) { 1762 while (Loc < Str.size() && IsPartOfWord(Str[Loc])) 1763 ++Loc; 1764 return Loc; 1765 } 1766 1767 /// Searches the buffer for the first prefix in the prefix regular expression. 1768 /// 1769 /// This searches the buffer using the provided regular expression, however it 1770 /// enforces constraints beyond that: 1771 /// 1) The found prefix must not be a suffix of something that looks like 1772 /// a valid prefix. 1773 /// 2) The found prefix must be followed by a valid check type suffix using \c 1774 /// FindCheckType above. 1775 /// 1776 /// \returns a pair of StringRefs into the Buffer, which combines: 1777 /// - the first match of the regular expression to satisfy these two is 1778 /// returned, 1779 /// otherwise an empty StringRef is returned to indicate failure. 1780 /// - buffer rewound to the location right after parsed suffix, for parsing 1781 /// to continue from 1782 /// 1783 /// If this routine returns a valid prefix, it will also shrink \p Buffer to 1784 /// start at the beginning of the returned prefix, increment \p LineNumber for 1785 /// each new line consumed from \p Buffer, and set \p CheckTy to the type of 1786 /// check found by examining the suffix. 1787 /// 1788 /// If no valid prefix is found, the state of Buffer, LineNumber, and CheckTy 1789 /// is unspecified. 1790 static std::pair<StringRef, StringRef> 1791 FindFirstMatchingPrefix(const FileCheckRequest &Req, Regex &PrefixRE, 1792 StringRef &Buffer, unsigned &LineNumber, 1793 Check::FileCheckType &CheckTy) { 1794 SmallVector<StringRef, 2> Matches; 1795 1796 while (!Buffer.empty()) { 1797 // Find the first (longest) match using the RE. 1798 if (!PrefixRE.match(Buffer, &Matches)) 1799 // No match at all, bail. 1800 return {StringRef(), StringRef()}; 1801 1802 StringRef Prefix = Matches[0]; 1803 Matches.clear(); 1804 1805 assert(Prefix.data() >= Buffer.data() && 1806 Prefix.data() < Buffer.data() + Buffer.size() && 1807 "Prefix doesn't start inside of buffer!"); 1808 size_t Loc = Prefix.data() - Buffer.data(); 1809 StringRef Skipped = Buffer.substr(0, Loc); 1810 Buffer = Buffer.drop_front(Loc); 1811 LineNumber += Skipped.count('\n'); 1812 1813 // Check that the matched prefix isn't a suffix of some other check-like 1814 // word. 1815 // FIXME: This is a very ad-hoc check. it would be better handled in some 1816 // other way. Among other things it seems hard to distinguish between 1817 // intentional and unintentional uses of this feature. 1818 if (Skipped.empty() || !IsPartOfWord(Skipped.back())) { 1819 // Now extract the type. 1820 StringRef AfterSuffix; 1821 std::tie(CheckTy, AfterSuffix) = FindCheckType(Req, Buffer, Prefix); 1822 1823 // If we've found a valid check type for this prefix, we're done. 1824 if (CheckTy != Check::CheckNone) 1825 return {Prefix, AfterSuffix}; 1826 } 1827 1828 // If we didn't successfully find a prefix, we need to skip this invalid 1829 // prefix and continue scanning. We directly skip the prefix that was 1830 // matched and any additional parts of that check-like word. 1831 Buffer = Buffer.drop_front(SkipWord(Buffer, Prefix.size())); 1832 } 1833 1834 // We ran out of buffer while skipping partial matches so give up. 1835 return {StringRef(), StringRef()}; 1836 } 1837 1838 void FileCheckPatternContext::createLineVariable() { 1839 assert(!LineVariable && "@LINE pseudo numeric variable already created"); 1840 StringRef LineName = "@LINE"; 1841 LineVariable = makeNumericVariable( 1842 LineName, ExpressionFormat(ExpressionFormat::Kind::Unsigned)); 1843 GlobalNumericVariableTable[LineName] = LineVariable; 1844 } 1845 1846 FileCheck::FileCheck(FileCheckRequest Req) 1847 : Req(Req), PatternContext(std::make_unique<FileCheckPatternContext>()), 1848 CheckStrings(std::make_unique<std::vector<FileCheckString>>()) {} 1849 1850 FileCheck::~FileCheck() = default; 1851 1852 bool FileCheck::readCheckFile( 1853 SourceMgr &SM, StringRef Buffer, Regex &PrefixRE, 1854 std::pair<unsigned, unsigned> *ImpPatBufferIDRange) { 1855 if (ImpPatBufferIDRange) 1856 ImpPatBufferIDRange->first = ImpPatBufferIDRange->second = 0; 1857 1858 Error DefineError = 1859 PatternContext->defineCmdlineVariables(Req.GlobalDefines, SM); 1860 if (DefineError) { 1861 logAllUnhandledErrors(std::move(DefineError), errs()); 1862 return true; 1863 } 1864 1865 PatternContext->createLineVariable(); 1866 1867 std::vector<Pattern> ImplicitNegativeChecks; 1868 for (StringRef PatternString : Req.ImplicitCheckNot) { 1869 // Create a buffer with fake command line content in order to display the 1870 // command line option responsible for the specific implicit CHECK-NOT. 1871 std::string Prefix = "-implicit-check-not='"; 1872 std::string Suffix = "'"; 1873 std::unique_ptr<MemoryBuffer> CmdLine = MemoryBuffer::getMemBufferCopy( 1874 (Prefix + PatternString + Suffix).str(), "command line"); 1875 1876 StringRef PatternInBuffer = 1877 CmdLine->getBuffer().substr(Prefix.size(), PatternString.size()); 1878 unsigned BufferID = SM.AddNewSourceBuffer(std::move(CmdLine), SMLoc()); 1879 if (ImpPatBufferIDRange) { 1880 if (ImpPatBufferIDRange->first == ImpPatBufferIDRange->second) { 1881 ImpPatBufferIDRange->first = BufferID; 1882 ImpPatBufferIDRange->second = BufferID + 1; 1883 } else { 1884 assert(BufferID == ImpPatBufferIDRange->second && 1885 "expected consecutive source buffer IDs"); 1886 ++ImpPatBufferIDRange->second; 1887 } 1888 } 1889 1890 ImplicitNegativeChecks.push_back( 1891 Pattern(Check::CheckNot, PatternContext.get())); 1892 ImplicitNegativeChecks.back().parsePattern(PatternInBuffer, 1893 "IMPLICIT-CHECK", SM, Req); 1894 } 1895 1896 std::vector<Pattern> DagNotMatches = ImplicitNegativeChecks; 1897 1898 // LineNumber keeps track of the line on which CheckPrefix instances are 1899 // found. 1900 unsigned LineNumber = 1; 1901 1902 std::set<StringRef> PrefixesNotFound(Req.CheckPrefixes.begin(), 1903 Req.CheckPrefixes.end()); 1904 const size_t DistinctPrefixes = PrefixesNotFound.size(); 1905 while (true) { 1906 Check::FileCheckType CheckTy; 1907 1908 // See if a prefix occurs in the memory buffer. 1909 StringRef UsedPrefix; 1910 StringRef AfterSuffix; 1911 std::tie(UsedPrefix, AfterSuffix) = 1912 FindFirstMatchingPrefix(Req, PrefixRE, Buffer, LineNumber, CheckTy); 1913 if (UsedPrefix.empty()) 1914 break; 1915 if (CheckTy != Check::CheckComment) 1916 PrefixesNotFound.erase(UsedPrefix); 1917 1918 assert(UsedPrefix.data() == Buffer.data() && 1919 "Failed to move Buffer's start forward, or pointed prefix outside " 1920 "of the buffer!"); 1921 assert(AfterSuffix.data() >= Buffer.data() && 1922 AfterSuffix.data() < Buffer.data() + Buffer.size() && 1923 "Parsing after suffix doesn't start inside of buffer!"); 1924 1925 // Location to use for error messages. 1926 const char *UsedPrefixStart = UsedPrefix.data(); 1927 1928 // Skip the buffer to the end of parsed suffix (or just prefix, if no good 1929 // suffix was processed). 1930 Buffer = AfterSuffix.empty() ? Buffer.drop_front(UsedPrefix.size()) 1931 : AfterSuffix; 1932 1933 // Complain about useful-looking but unsupported suffixes. 1934 if (CheckTy == Check::CheckBadNot) { 1935 SM.PrintMessage(SMLoc::getFromPointer(Buffer.data()), SourceMgr::DK_Error, 1936 "unsupported -NOT combo on prefix '" + UsedPrefix + "'"); 1937 return true; 1938 } 1939 1940 // Complain about invalid count specification. 1941 if (CheckTy == Check::CheckBadCount) { 1942 SM.PrintMessage(SMLoc::getFromPointer(Buffer.data()), SourceMgr::DK_Error, 1943 "invalid count in -COUNT specification on prefix '" + 1944 UsedPrefix + "'"); 1945 return true; 1946 } 1947 1948 // Okay, we found the prefix, yay. Remember the rest of the line, but ignore 1949 // leading whitespace. 1950 if (!(Req.NoCanonicalizeWhiteSpace && Req.MatchFullLines)) 1951 Buffer = Buffer.substr(Buffer.find_first_not_of(" \t")); 1952 1953 // Scan ahead to the end of line. 1954 size_t EOL = Buffer.find_first_of("\n\r"); 1955 1956 // Remember the location of the start of the pattern, for diagnostics. 1957 SMLoc PatternLoc = SMLoc::getFromPointer(Buffer.data()); 1958 1959 // Extract the pattern from the buffer. 1960 StringRef PatternBuffer = Buffer.substr(0, EOL); 1961 Buffer = Buffer.substr(EOL); 1962 1963 // If this is a comment, we're done. 1964 if (CheckTy == Check::CheckComment) 1965 continue; 1966 1967 // Parse the pattern. 1968 Pattern P(CheckTy, PatternContext.get(), LineNumber); 1969 if (P.parsePattern(PatternBuffer, UsedPrefix, SM, Req)) 1970 return true; 1971 1972 // Verify that CHECK-LABEL lines do not define or use variables 1973 if ((CheckTy == Check::CheckLabel) && P.hasVariable()) { 1974 SM.PrintMessage( 1975 SMLoc::getFromPointer(UsedPrefixStart), SourceMgr::DK_Error, 1976 "found '" + UsedPrefix + "-LABEL:'" 1977 " with variable definition or use"); 1978 return true; 1979 } 1980 1981 // Verify that CHECK-NEXT/SAME/EMPTY lines have at least one CHECK line before them. 1982 if ((CheckTy == Check::CheckNext || CheckTy == Check::CheckSame || 1983 CheckTy == Check::CheckEmpty) && 1984 CheckStrings->empty()) { 1985 StringRef Type = CheckTy == Check::CheckNext 1986 ? "NEXT" 1987 : CheckTy == Check::CheckEmpty ? "EMPTY" : "SAME"; 1988 SM.PrintMessage(SMLoc::getFromPointer(UsedPrefixStart), 1989 SourceMgr::DK_Error, 1990 "found '" + UsedPrefix + "-" + Type + 1991 "' without previous '" + UsedPrefix + ": line"); 1992 return true; 1993 } 1994 1995 // Handle CHECK-DAG/-NOT. 1996 if (CheckTy == Check::CheckDAG || CheckTy == Check::CheckNot) { 1997 DagNotMatches.push_back(P); 1998 continue; 1999 } 2000 2001 // Okay, add the string we captured to the output vector and move on. 2002 CheckStrings->emplace_back(P, UsedPrefix, PatternLoc); 2003 std::swap(DagNotMatches, CheckStrings->back().DagNotStrings); 2004 DagNotMatches = ImplicitNegativeChecks; 2005 } 2006 2007 // When there are no used prefixes we report an error except in the case that 2008 // no prefix is specified explicitly but -implicit-check-not is specified. 2009 const bool NoPrefixesFound = PrefixesNotFound.size() == DistinctPrefixes; 2010 const bool SomePrefixesUnexpectedlyNotUsed = 2011 !Req.AllowUnusedPrefixes && !PrefixesNotFound.empty(); 2012 if ((NoPrefixesFound || SomePrefixesUnexpectedlyNotUsed) && 2013 (ImplicitNegativeChecks.empty() || !Req.IsDefaultCheckPrefix)) { 2014 errs() << "error: no check strings found with prefix" 2015 << (PrefixesNotFound.size() > 1 ? "es " : " "); 2016 bool First = true; 2017 for (StringRef MissingPrefix : PrefixesNotFound) { 2018 if (!First) 2019 errs() << ", "; 2020 errs() << "\'" << MissingPrefix << ":'"; 2021 First = false; 2022 } 2023 errs() << '\n'; 2024 return true; 2025 } 2026 2027 // Add an EOF pattern for any trailing --implicit-check-not/CHECK-DAG/-NOTs, 2028 // and use the first prefix as a filler for the error message. 2029 if (!DagNotMatches.empty()) { 2030 CheckStrings->emplace_back( 2031 Pattern(Check::CheckEOF, PatternContext.get(), LineNumber + 1), 2032 *Req.CheckPrefixes.begin(), SMLoc::getFromPointer(Buffer.data())); 2033 std::swap(DagNotMatches, CheckStrings->back().DagNotStrings); 2034 } 2035 2036 return false; 2037 } 2038 2039 static void PrintMatch(bool ExpectedMatch, const SourceMgr &SM, 2040 StringRef Prefix, SMLoc Loc, const Pattern &Pat, 2041 int MatchedCount, StringRef Buffer, size_t MatchPos, 2042 size_t MatchLen, const FileCheckRequest &Req, 2043 std::vector<FileCheckDiag> *Diags) { 2044 bool PrintDiag = true; 2045 if (ExpectedMatch) { 2046 if (!Req.Verbose) 2047 return; 2048 if (!Req.VerboseVerbose && Pat.getCheckTy() == Check::CheckEOF) 2049 return; 2050 // Due to their verbosity, we don't print verbose diagnostics here if we're 2051 // gathering them for a different rendering, but we always print other 2052 // diagnostics. 2053 PrintDiag = !Diags; 2054 } 2055 FileCheckDiag::MatchType MatchTy = ExpectedMatch 2056 ? FileCheckDiag::MatchFoundAndExpected 2057 : FileCheckDiag::MatchFoundButExcluded; 2058 SMRange MatchRange = ProcessMatchResult(MatchTy, SM, Loc, Pat.getCheckTy(), 2059 Buffer, MatchPos, MatchLen, Diags); 2060 if (Diags) { 2061 Pat.printSubstitutions(SM, Buffer, MatchRange, MatchTy, Diags); 2062 Pat.printVariableDefs(SM, MatchTy, Diags); 2063 } 2064 if (!PrintDiag) 2065 return; 2066 2067 std::string Message = formatv("{0}: {1} string found in input", 2068 Pat.getCheckTy().getDescription(Prefix), 2069 (ExpectedMatch ? "expected" : "excluded")) 2070 .str(); 2071 if (Pat.getCount() > 1) 2072 Message += formatv(" ({0} out of {1})", MatchedCount, Pat.getCount()).str(); 2073 2074 SM.PrintMessage( 2075 Loc, ExpectedMatch ? SourceMgr::DK_Remark : SourceMgr::DK_Error, Message); 2076 SM.PrintMessage(MatchRange.Start, SourceMgr::DK_Note, "found here", 2077 {MatchRange}); 2078 Pat.printSubstitutions(SM, Buffer, MatchRange, MatchTy, nullptr); 2079 Pat.printVariableDefs(SM, MatchTy, nullptr); 2080 } 2081 2082 static void PrintMatch(bool ExpectedMatch, const SourceMgr &SM, 2083 const FileCheckString &CheckStr, int MatchedCount, 2084 StringRef Buffer, size_t MatchPos, size_t MatchLen, 2085 FileCheckRequest &Req, 2086 std::vector<FileCheckDiag> *Diags) { 2087 PrintMatch(ExpectedMatch, SM, CheckStr.Prefix, CheckStr.Loc, CheckStr.Pat, 2088 MatchedCount, Buffer, MatchPos, MatchLen, Req, Diags); 2089 } 2090 2091 static void PrintNoMatch(bool ExpectedMatch, const SourceMgr &SM, 2092 StringRef Prefix, SMLoc Loc, const Pattern &Pat, 2093 int MatchedCount, StringRef Buffer, 2094 bool VerboseVerbose, std::vector<FileCheckDiag> *Diags, 2095 Error MatchErrors) { 2096 assert(MatchErrors && "Called on successful match"); 2097 bool PrintDiag = true; 2098 if (!ExpectedMatch) { 2099 if (!VerboseVerbose) { 2100 consumeError(std::move(MatchErrors)); 2101 return; 2102 } 2103 // Due to their verbosity, we don't print verbose diagnostics here if we're 2104 // gathering them for a different rendering, but we always print other 2105 // diagnostics. 2106 PrintDiag = !Diags; 2107 } 2108 2109 FileCheckDiag::MatchType MatchTy = ExpectedMatch 2110 ? FileCheckDiag::MatchNoneButExpected 2111 : FileCheckDiag::MatchNoneAndExcluded; 2112 SMRange SearchRange = ProcessMatchResult(MatchTy, SM, Loc, Pat.getCheckTy(), 2113 Buffer, 0, Buffer.size(), Diags); 2114 if (Diags) 2115 Pat.printSubstitutions(SM, Buffer, SearchRange, MatchTy, Diags); 2116 if (!PrintDiag) { 2117 consumeError(std::move(MatchErrors)); 2118 return; 2119 } 2120 2121 MatchErrors = handleErrors(std::move(MatchErrors), 2122 [](const ErrorDiagnostic &E) { E.log(errs()); }); 2123 2124 // No problem matching the string per se. 2125 if (!MatchErrors) 2126 return; 2127 consumeError(std::move(MatchErrors)); 2128 2129 // Print "not found" diagnostic. 2130 std::string Message = formatv("{0}: {1} string not found in input", 2131 Pat.getCheckTy().getDescription(Prefix), 2132 (ExpectedMatch ? "expected" : "excluded")) 2133 .str(); 2134 if (Pat.getCount() > 1) 2135 Message += formatv(" ({0} out of {1})", MatchedCount, Pat.getCount()).str(); 2136 SM.PrintMessage( 2137 Loc, ExpectedMatch ? SourceMgr::DK_Error : SourceMgr::DK_Remark, Message); 2138 2139 // Print the "scanning from here" line. 2140 SM.PrintMessage(SearchRange.Start, SourceMgr::DK_Note, "scanning from here"); 2141 2142 // Allow the pattern to print additional information if desired. 2143 Pat.printSubstitutions(SM, Buffer, SearchRange, MatchTy, nullptr); 2144 2145 if (ExpectedMatch) 2146 Pat.printFuzzyMatch(SM, Buffer, Diags); 2147 } 2148 2149 static void PrintNoMatch(bool ExpectedMatch, const SourceMgr &SM, 2150 const FileCheckString &CheckStr, int MatchedCount, 2151 StringRef Buffer, bool VerboseVerbose, 2152 std::vector<FileCheckDiag> *Diags, Error MatchErrors) { 2153 PrintNoMatch(ExpectedMatch, SM, CheckStr.Prefix, CheckStr.Loc, CheckStr.Pat, 2154 MatchedCount, Buffer, VerboseVerbose, Diags, 2155 std::move(MatchErrors)); 2156 } 2157 2158 /// Counts the number of newlines in the specified range. 2159 static unsigned CountNumNewlinesBetween(StringRef Range, 2160 const char *&FirstNewLine) { 2161 unsigned NumNewLines = 0; 2162 while (1) { 2163 // Scan for newline. 2164 Range = Range.substr(Range.find_first_of("\n\r")); 2165 if (Range.empty()) 2166 return NumNewLines; 2167 2168 ++NumNewLines; 2169 2170 // Handle \n\r and \r\n as a single newline. 2171 if (Range.size() > 1 && (Range[1] == '\n' || Range[1] == '\r') && 2172 (Range[0] != Range[1])) 2173 Range = Range.substr(1); 2174 Range = Range.substr(1); 2175 2176 if (NumNewLines == 1) 2177 FirstNewLine = Range.begin(); 2178 } 2179 } 2180 2181 size_t FileCheckString::Check(const SourceMgr &SM, StringRef Buffer, 2182 bool IsLabelScanMode, size_t &MatchLen, 2183 FileCheckRequest &Req, 2184 std::vector<FileCheckDiag> *Diags) const { 2185 size_t LastPos = 0; 2186 std::vector<const Pattern *> NotStrings; 2187 2188 // IsLabelScanMode is true when we are scanning forward to find CHECK-LABEL 2189 // bounds; we have not processed variable definitions within the bounded block 2190 // yet so cannot handle any final CHECK-DAG yet; this is handled when going 2191 // over the block again (including the last CHECK-LABEL) in normal mode. 2192 if (!IsLabelScanMode) { 2193 // Match "dag strings" (with mixed "not strings" if any). 2194 LastPos = CheckDag(SM, Buffer, NotStrings, Req, Diags); 2195 if (LastPos == StringRef::npos) 2196 return StringRef::npos; 2197 } 2198 2199 // Match itself from the last position after matching CHECK-DAG. 2200 size_t LastMatchEnd = LastPos; 2201 size_t FirstMatchPos = 0; 2202 // Go match the pattern Count times. Majority of patterns only match with 2203 // count 1 though. 2204 assert(Pat.getCount() != 0 && "pattern count can not be zero"); 2205 for (int i = 1; i <= Pat.getCount(); i++) { 2206 StringRef MatchBuffer = Buffer.substr(LastMatchEnd); 2207 size_t CurrentMatchLen; 2208 // get a match at current start point 2209 Expected<size_t> MatchResult = Pat.match(MatchBuffer, CurrentMatchLen, SM); 2210 2211 // report 2212 if (!MatchResult) { 2213 PrintNoMatch(true, SM, *this, i, MatchBuffer, Req.VerboseVerbose, Diags, 2214 MatchResult.takeError()); 2215 return StringRef::npos; 2216 } 2217 size_t MatchPos = *MatchResult; 2218 PrintMatch(true, SM, *this, i, MatchBuffer, MatchPos, CurrentMatchLen, Req, 2219 Diags); 2220 if (i == 1) 2221 FirstMatchPos = LastPos + MatchPos; 2222 2223 // move start point after the match 2224 LastMatchEnd += MatchPos + CurrentMatchLen; 2225 } 2226 // Full match len counts from first match pos. 2227 MatchLen = LastMatchEnd - FirstMatchPos; 2228 2229 // Similar to the above, in "label-scan mode" we can't yet handle CHECK-NEXT 2230 // or CHECK-NOT 2231 if (!IsLabelScanMode) { 2232 size_t MatchPos = FirstMatchPos - LastPos; 2233 StringRef MatchBuffer = Buffer.substr(LastPos); 2234 StringRef SkippedRegion = Buffer.substr(LastPos, MatchPos); 2235 2236 // If this check is a "CHECK-NEXT", verify that the previous match was on 2237 // the previous line (i.e. that there is one newline between them). 2238 if (CheckNext(SM, SkippedRegion)) { 2239 ProcessMatchResult(FileCheckDiag::MatchFoundButWrongLine, SM, Loc, 2240 Pat.getCheckTy(), MatchBuffer, MatchPos, MatchLen, 2241 Diags, Req.Verbose); 2242 return StringRef::npos; 2243 } 2244 2245 // If this check is a "CHECK-SAME", verify that the previous match was on 2246 // the same line (i.e. that there is no newline between them). 2247 if (CheckSame(SM, SkippedRegion)) { 2248 ProcessMatchResult(FileCheckDiag::MatchFoundButWrongLine, SM, Loc, 2249 Pat.getCheckTy(), MatchBuffer, MatchPos, MatchLen, 2250 Diags, Req.Verbose); 2251 return StringRef::npos; 2252 } 2253 2254 // If this match had "not strings", verify that they don't exist in the 2255 // skipped region. 2256 if (CheckNot(SM, SkippedRegion, NotStrings, Req, Diags)) 2257 return StringRef::npos; 2258 } 2259 2260 return FirstMatchPos; 2261 } 2262 2263 bool FileCheckString::CheckNext(const SourceMgr &SM, StringRef Buffer) const { 2264 if (Pat.getCheckTy() != Check::CheckNext && 2265 Pat.getCheckTy() != Check::CheckEmpty) 2266 return false; 2267 2268 Twine CheckName = 2269 Prefix + 2270 Twine(Pat.getCheckTy() == Check::CheckEmpty ? "-EMPTY" : "-NEXT"); 2271 2272 // Count the number of newlines between the previous match and this one. 2273 const char *FirstNewLine = nullptr; 2274 unsigned NumNewLines = CountNumNewlinesBetween(Buffer, FirstNewLine); 2275 2276 if (NumNewLines == 0) { 2277 SM.PrintMessage(Loc, SourceMgr::DK_Error, 2278 CheckName + ": is on the same line as previous match"); 2279 SM.PrintMessage(SMLoc::getFromPointer(Buffer.end()), SourceMgr::DK_Note, 2280 "'next' match was here"); 2281 SM.PrintMessage(SMLoc::getFromPointer(Buffer.data()), SourceMgr::DK_Note, 2282 "previous match ended here"); 2283 return true; 2284 } 2285 2286 if (NumNewLines != 1) { 2287 SM.PrintMessage(Loc, SourceMgr::DK_Error, 2288 CheckName + 2289 ": is not on the line after the previous match"); 2290 SM.PrintMessage(SMLoc::getFromPointer(Buffer.end()), SourceMgr::DK_Note, 2291 "'next' match was here"); 2292 SM.PrintMessage(SMLoc::getFromPointer(Buffer.data()), SourceMgr::DK_Note, 2293 "previous match ended here"); 2294 SM.PrintMessage(SMLoc::getFromPointer(FirstNewLine), SourceMgr::DK_Note, 2295 "non-matching line after previous match is here"); 2296 return true; 2297 } 2298 2299 return false; 2300 } 2301 2302 bool FileCheckString::CheckSame(const SourceMgr &SM, StringRef Buffer) const { 2303 if (Pat.getCheckTy() != Check::CheckSame) 2304 return false; 2305 2306 // Count the number of newlines between the previous match and this one. 2307 const char *FirstNewLine = nullptr; 2308 unsigned NumNewLines = CountNumNewlinesBetween(Buffer, FirstNewLine); 2309 2310 if (NumNewLines != 0) { 2311 SM.PrintMessage(Loc, SourceMgr::DK_Error, 2312 Prefix + 2313 "-SAME: is not on the same line as the previous match"); 2314 SM.PrintMessage(SMLoc::getFromPointer(Buffer.end()), SourceMgr::DK_Note, 2315 "'next' match was here"); 2316 SM.PrintMessage(SMLoc::getFromPointer(Buffer.data()), SourceMgr::DK_Note, 2317 "previous match ended here"); 2318 return true; 2319 } 2320 2321 return false; 2322 } 2323 2324 bool FileCheckString::CheckNot(const SourceMgr &SM, StringRef Buffer, 2325 const std::vector<const Pattern *> &NotStrings, 2326 const FileCheckRequest &Req, 2327 std::vector<FileCheckDiag> *Diags) const { 2328 bool DirectiveFail = false; 2329 for (const Pattern *Pat : NotStrings) { 2330 assert((Pat->getCheckTy() == Check::CheckNot) && "Expect CHECK-NOT!"); 2331 2332 size_t MatchLen = 0; 2333 Expected<size_t> MatchResult = Pat->match(Buffer, MatchLen, SM); 2334 2335 if (!MatchResult) { 2336 PrintNoMatch(false, SM, Prefix, Pat->getLoc(), *Pat, 1, Buffer, 2337 Req.VerboseVerbose, Diags, MatchResult.takeError()); 2338 continue; 2339 } 2340 size_t Pos = *MatchResult; 2341 2342 PrintMatch(false, SM, Prefix, Pat->getLoc(), *Pat, 1, Buffer, Pos, MatchLen, 2343 Req, Diags); 2344 DirectiveFail = true; 2345 } 2346 2347 return DirectiveFail; 2348 } 2349 2350 size_t FileCheckString::CheckDag(const SourceMgr &SM, StringRef Buffer, 2351 std::vector<const Pattern *> &NotStrings, 2352 const FileCheckRequest &Req, 2353 std::vector<FileCheckDiag> *Diags) const { 2354 if (DagNotStrings.empty()) 2355 return 0; 2356 2357 // The start of the search range. 2358 size_t StartPos = 0; 2359 2360 struct MatchRange { 2361 size_t Pos; 2362 size_t End; 2363 }; 2364 // A sorted list of ranges for non-overlapping CHECK-DAG matches. Match 2365 // ranges are erased from this list once they are no longer in the search 2366 // range. 2367 std::list<MatchRange> MatchRanges; 2368 2369 // We need PatItr and PatEnd later for detecting the end of a CHECK-DAG 2370 // group, so we don't use a range-based for loop here. 2371 for (auto PatItr = DagNotStrings.begin(), PatEnd = DagNotStrings.end(); 2372 PatItr != PatEnd; ++PatItr) { 2373 const Pattern &Pat = *PatItr; 2374 assert((Pat.getCheckTy() == Check::CheckDAG || 2375 Pat.getCheckTy() == Check::CheckNot) && 2376 "Invalid CHECK-DAG or CHECK-NOT!"); 2377 2378 if (Pat.getCheckTy() == Check::CheckNot) { 2379 NotStrings.push_back(&Pat); 2380 continue; 2381 } 2382 2383 assert((Pat.getCheckTy() == Check::CheckDAG) && "Expect CHECK-DAG!"); 2384 2385 // CHECK-DAG always matches from the start. 2386 size_t MatchLen = 0, MatchPos = StartPos; 2387 2388 // Search for a match that doesn't overlap a previous match in this 2389 // CHECK-DAG group. 2390 for (auto MI = MatchRanges.begin(), ME = MatchRanges.end(); true; ++MI) { 2391 StringRef MatchBuffer = Buffer.substr(MatchPos); 2392 Expected<size_t> MatchResult = Pat.match(MatchBuffer, MatchLen, SM); 2393 // With a group of CHECK-DAGs, a single mismatching means the match on 2394 // that group of CHECK-DAGs fails immediately. 2395 if (!MatchResult) { 2396 PrintNoMatch(true, SM, Prefix, Pat.getLoc(), Pat, 1, MatchBuffer, 2397 Req.VerboseVerbose, Diags, MatchResult.takeError()); 2398 return StringRef::npos; 2399 } 2400 size_t MatchPosBuf = *MatchResult; 2401 // Re-calc it as the offset relative to the start of the original string. 2402 MatchPos += MatchPosBuf; 2403 if (Req.VerboseVerbose) 2404 PrintMatch(true, SM, Prefix, Pat.getLoc(), Pat, 1, Buffer, MatchPos, 2405 MatchLen, Req, Diags); 2406 MatchRange M{MatchPos, MatchPos + MatchLen}; 2407 if (Req.AllowDeprecatedDagOverlap) { 2408 // We don't need to track all matches in this mode, so we just maintain 2409 // one match range that encompasses the current CHECK-DAG group's 2410 // matches. 2411 if (MatchRanges.empty()) 2412 MatchRanges.insert(MatchRanges.end(), M); 2413 else { 2414 auto Block = MatchRanges.begin(); 2415 Block->Pos = std::min(Block->Pos, M.Pos); 2416 Block->End = std::max(Block->End, M.End); 2417 } 2418 break; 2419 } 2420 // Iterate previous matches until overlapping match or insertion point. 2421 bool Overlap = false; 2422 for (; MI != ME; ++MI) { 2423 if (M.Pos < MI->End) { 2424 // !Overlap => New match has no overlap and is before this old match. 2425 // Overlap => New match overlaps this old match. 2426 Overlap = MI->Pos < M.End; 2427 break; 2428 } 2429 } 2430 if (!Overlap) { 2431 // Insert non-overlapping match into list. 2432 MatchRanges.insert(MI, M); 2433 break; 2434 } 2435 if (Req.VerboseVerbose) { 2436 // Due to their verbosity, we don't print verbose diagnostics here if 2437 // we're gathering them for a different rendering, but we always print 2438 // other diagnostics. 2439 if (!Diags) { 2440 SMLoc OldStart = SMLoc::getFromPointer(Buffer.data() + MI->Pos); 2441 SMLoc OldEnd = SMLoc::getFromPointer(Buffer.data() + MI->End); 2442 SMRange OldRange(OldStart, OldEnd); 2443 SM.PrintMessage(OldStart, SourceMgr::DK_Note, 2444 "match discarded, overlaps earlier DAG match here", 2445 {OldRange}); 2446 } else { 2447 SMLoc CheckLoc = Diags->rbegin()->CheckLoc; 2448 for (auto I = Diags->rbegin(), E = Diags->rend(); 2449 I != E && I->CheckLoc == CheckLoc; ++I) 2450 I->MatchTy = FileCheckDiag::MatchFoundButDiscarded; 2451 } 2452 } 2453 MatchPos = MI->End; 2454 } 2455 if (!Req.VerboseVerbose) 2456 PrintMatch(true, SM, Prefix, Pat.getLoc(), Pat, 1, Buffer, MatchPos, 2457 MatchLen, Req, Diags); 2458 2459 // Handle the end of a CHECK-DAG group. 2460 if (std::next(PatItr) == PatEnd || 2461 std::next(PatItr)->getCheckTy() == Check::CheckNot) { 2462 if (!NotStrings.empty()) { 2463 // If there are CHECK-NOTs between two CHECK-DAGs or from CHECK to 2464 // CHECK-DAG, verify that there are no 'not' strings occurred in that 2465 // region. 2466 StringRef SkippedRegion = 2467 Buffer.slice(StartPos, MatchRanges.begin()->Pos); 2468 if (CheckNot(SM, SkippedRegion, NotStrings, Req, Diags)) 2469 return StringRef::npos; 2470 // Clear "not strings". 2471 NotStrings.clear(); 2472 } 2473 // All subsequent CHECK-DAGs and CHECK-NOTs should be matched from the 2474 // end of this CHECK-DAG group's match range. 2475 StartPos = MatchRanges.rbegin()->End; 2476 // Don't waste time checking for (impossible) overlaps before that. 2477 MatchRanges.clear(); 2478 } 2479 } 2480 2481 return StartPos; 2482 } 2483 2484 static bool ValidatePrefixes(StringRef Kind, StringSet<> &UniquePrefixes, 2485 ArrayRef<StringRef> SuppliedPrefixes) { 2486 for (StringRef Prefix : SuppliedPrefixes) { 2487 if (Prefix.empty()) { 2488 errs() << "error: supplied " << Kind << " prefix must not be the empty " 2489 << "string\n"; 2490 return false; 2491 } 2492 static const Regex Validator("^[a-zA-Z0-9_-]*$"); 2493 if (!Validator.match(Prefix)) { 2494 errs() << "error: supplied " << Kind << " prefix must start with a " 2495 << "letter and contain only alphanumeric characters, hyphens, and " 2496 << "underscores: '" << Prefix << "'\n"; 2497 return false; 2498 } 2499 if (!UniquePrefixes.insert(Prefix).second) { 2500 errs() << "error: supplied " << Kind << " prefix must be unique among " 2501 << "check and comment prefixes: '" << Prefix << "'\n"; 2502 return false; 2503 } 2504 } 2505 return true; 2506 } 2507 2508 static const char *DefaultCheckPrefixes[] = {"CHECK"}; 2509 static const char *DefaultCommentPrefixes[] = {"COM", "RUN"}; 2510 2511 bool FileCheck::ValidateCheckPrefixes() { 2512 StringSet<> UniquePrefixes; 2513 // Add default prefixes to catch user-supplied duplicates of them below. 2514 if (Req.CheckPrefixes.empty()) { 2515 for (const char *Prefix : DefaultCheckPrefixes) 2516 UniquePrefixes.insert(Prefix); 2517 } 2518 if (Req.CommentPrefixes.empty()) { 2519 for (const char *Prefix : DefaultCommentPrefixes) 2520 UniquePrefixes.insert(Prefix); 2521 } 2522 // Do not validate the default prefixes, or diagnostics about duplicates might 2523 // incorrectly indicate that they were supplied by the user. 2524 if (!ValidatePrefixes("check", UniquePrefixes, Req.CheckPrefixes)) 2525 return false; 2526 if (!ValidatePrefixes("comment", UniquePrefixes, Req.CommentPrefixes)) 2527 return false; 2528 return true; 2529 } 2530 2531 Regex FileCheck::buildCheckPrefixRegex() { 2532 if (Req.CheckPrefixes.empty()) { 2533 for (const char *Prefix : DefaultCheckPrefixes) 2534 Req.CheckPrefixes.push_back(Prefix); 2535 Req.IsDefaultCheckPrefix = true; 2536 } 2537 if (Req.CommentPrefixes.empty()) { 2538 for (const char *Prefix : DefaultCommentPrefixes) 2539 Req.CommentPrefixes.push_back(Prefix); 2540 } 2541 2542 // We already validated the contents of CheckPrefixes and CommentPrefixes so 2543 // just concatenate them as alternatives. 2544 SmallString<32> PrefixRegexStr; 2545 for (size_t I = 0, E = Req.CheckPrefixes.size(); I != E; ++I) { 2546 if (I != 0) 2547 PrefixRegexStr.push_back('|'); 2548 PrefixRegexStr.append(Req.CheckPrefixes[I]); 2549 } 2550 for (StringRef Prefix : Req.CommentPrefixes) { 2551 PrefixRegexStr.push_back('|'); 2552 PrefixRegexStr.append(Prefix); 2553 } 2554 2555 return Regex(PrefixRegexStr); 2556 } 2557 2558 Error FileCheckPatternContext::defineCmdlineVariables( 2559 ArrayRef<StringRef> CmdlineDefines, SourceMgr &SM) { 2560 assert(GlobalVariableTable.empty() && GlobalNumericVariableTable.empty() && 2561 "Overriding defined variable with command-line variable definitions"); 2562 2563 if (CmdlineDefines.empty()) 2564 return Error::success(); 2565 2566 // Create a string representing the vector of command-line definitions. Each 2567 // definition is on its own line and prefixed with a definition number to 2568 // clarify which definition a given diagnostic corresponds to. 2569 unsigned I = 0; 2570 Error Errs = Error::success(); 2571 std::string CmdlineDefsDiag; 2572 SmallVector<std::pair<size_t, size_t>, 4> CmdlineDefsIndices; 2573 for (StringRef CmdlineDef : CmdlineDefines) { 2574 std::string DefPrefix = ("Global define #" + Twine(++I) + ": ").str(); 2575 size_t EqIdx = CmdlineDef.find('='); 2576 if (EqIdx == StringRef::npos) { 2577 CmdlineDefsIndices.push_back(std::make_pair(CmdlineDefsDiag.size(), 0)); 2578 continue; 2579 } 2580 // Numeric variable definition. 2581 if (CmdlineDef[0] == '#') { 2582 // Append a copy of the command-line definition adapted to use the same 2583 // format as in the input file to be able to reuse 2584 // parseNumericSubstitutionBlock. 2585 CmdlineDefsDiag += (DefPrefix + CmdlineDef + " (parsed as: [[").str(); 2586 std::string SubstitutionStr = std::string(CmdlineDef); 2587 SubstitutionStr[EqIdx] = ':'; 2588 CmdlineDefsIndices.push_back( 2589 std::make_pair(CmdlineDefsDiag.size(), SubstitutionStr.size())); 2590 CmdlineDefsDiag += (SubstitutionStr + Twine("]])\n")).str(); 2591 } else { 2592 CmdlineDefsDiag += DefPrefix; 2593 CmdlineDefsIndices.push_back( 2594 std::make_pair(CmdlineDefsDiag.size(), CmdlineDef.size())); 2595 CmdlineDefsDiag += (CmdlineDef + "\n").str(); 2596 } 2597 } 2598 2599 // Create a buffer with fake command line content in order to display 2600 // parsing diagnostic with location information and point to the 2601 // global definition with invalid syntax. 2602 std::unique_ptr<MemoryBuffer> CmdLineDefsDiagBuffer = 2603 MemoryBuffer::getMemBufferCopy(CmdlineDefsDiag, "Global defines"); 2604 StringRef CmdlineDefsDiagRef = CmdLineDefsDiagBuffer->getBuffer(); 2605 SM.AddNewSourceBuffer(std::move(CmdLineDefsDiagBuffer), SMLoc()); 2606 2607 for (std::pair<size_t, size_t> CmdlineDefIndices : CmdlineDefsIndices) { 2608 StringRef CmdlineDef = CmdlineDefsDiagRef.substr(CmdlineDefIndices.first, 2609 CmdlineDefIndices.second); 2610 if (CmdlineDef.empty()) { 2611 Errs = joinErrors( 2612 std::move(Errs), 2613 ErrorDiagnostic::get(SM, CmdlineDef, 2614 "missing equal sign in global definition")); 2615 continue; 2616 } 2617 2618 // Numeric variable definition. 2619 if (CmdlineDef[0] == '#') { 2620 // Now parse the definition both to check that the syntax is correct and 2621 // to create the necessary class instance. 2622 StringRef CmdlineDefExpr = CmdlineDef.substr(1); 2623 Optional<NumericVariable *> DefinedNumericVariable; 2624 Expected<std::unique_ptr<Expression>> ExpressionResult = 2625 Pattern::parseNumericSubstitutionBlock( 2626 CmdlineDefExpr, DefinedNumericVariable, false, None, this, SM); 2627 if (!ExpressionResult) { 2628 Errs = joinErrors(std::move(Errs), ExpressionResult.takeError()); 2629 continue; 2630 } 2631 std::unique_ptr<Expression> Expression = std::move(*ExpressionResult); 2632 // Now evaluate the expression whose value this variable should be set 2633 // to, since the expression of a command-line variable definition should 2634 // only use variables defined earlier on the command-line. If not, this 2635 // is an error and we report it. 2636 Expected<ExpressionValue> Value = Expression->getAST()->eval(); 2637 if (!Value) { 2638 Errs = joinErrors(std::move(Errs), Value.takeError()); 2639 continue; 2640 } 2641 2642 assert(DefinedNumericVariable && "No variable defined"); 2643 (*DefinedNumericVariable)->setValue(*Value); 2644 2645 // Record this variable definition. 2646 GlobalNumericVariableTable[(*DefinedNumericVariable)->getName()] = 2647 *DefinedNumericVariable; 2648 } else { 2649 // String variable definition. 2650 std::pair<StringRef, StringRef> CmdlineNameVal = CmdlineDef.split('='); 2651 StringRef CmdlineName = CmdlineNameVal.first; 2652 StringRef OrigCmdlineName = CmdlineName; 2653 Expected<Pattern::VariableProperties> ParseVarResult = 2654 Pattern::parseVariable(CmdlineName, SM); 2655 if (!ParseVarResult) { 2656 Errs = joinErrors(std::move(Errs), ParseVarResult.takeError()); 2657 continue; 2658 } 2659 // Check that CmdlineName does not denote a pseudo variable is only 2660 // composed of the parsed numeric variable. This catches cases like 2661 // "FOO+2" in a "FOO+2=10" definition. 2662 if (ParseVarResult->IsPseudo || !CmdlineName.empty()) { 2663 Errs = joinErrors(std::move(Errs), 2664 ErrorDiagnostic::get( 2665 SM, OrigCmdlineName, 2666 "invalid name in string variable definition '" + 2667 OrigCmdlineName + "'")); 2668 continue; 2669 } 2670 StringRef Name = ParseVarResult->Name; 2671 2672 // Detect collisions between string and numeric variables when the former 2673 // is created later than the latter. 2674 if (GlobalNumericVariableTable.find(Name) != 2675 GlobalNumericVariableTable.end()) { 2676 Errs = joinErrors(std::move(Errs), 2677 ErrorDiagnostic::get(SM, Name, 2678 "numeric variable with name '" + 2679 Name + "' already exists")); 2680 continue; 2681 } 2682 GlobalVariableTable.insert(CmdlineNameVal); 2683 // Mark the string variable as defined to detect collisions between 2684 // string and numeric variables in defineCmdlineVariables when the latter 2685 // is created later than the former. We cannot reuse GlobalVariableTable 2686 // for this by populating it with an empty string since we would then 2687 // lose the ability to detect the use of an undefined variable in 2688 // match(). 2689 DefinedVariableTable[Name] = true; 2690 } 2691 } 2692 2693 return Errs; 2694 } 2695 2696 void FileCheckPatternContext::clearLocalVars() { 2697 SmallVector<StringRef, 16> LocalPatternVars, LocalNumericVars; 2698 for (const StringMapEntry<StringRef> &Var : GlobalVariableTable) 2699 if (Var.first()[0] != '$') 2700 LocalPatternVars.push_back(Var.first()); 2701 2702 // Numeric substitution reads the value of a variable directly, not via 2703 // GlobalNumericVariableTable. Therefore, we clear local variables by 2704 // clearing their value which will lead to a numeric substitution failure. We 2705 // also mark the variable for removal from GlobalNumericVariableTable since 2706 // this is what defineCmdlineVariables checks to decide that no global 2707 // variable has been defined. 2708 for (const auto &Var : GlobalNumericVariableTable) 2709 if (Var.first()[0] != '$') { 2710 Var.getValue()->clearValue(); 2711 LocalNumericVars.push_back(Var.first()); 2712 } 2713 2714 for (const auto &Var : LocalPatternVars) 2715 GlobalVariableTable.erase(Var); 2716 for (const auto &Var : LocalNumericVars) 2717 GlobalNumericVariableTable.erase(Var); 2718 } 2719 2720 bool FileCheck::checkInput(SourceMgr &SM, StringRef Buffer, 2721 std::vector<FileCheckDiag> *Diags) { 2722 bool ChecksFailed = false; 2723 2724 unsigned i = 0, j = 0, e = CheckStrings->size(); 2725 while (true) { 2726 StringRef CheckRegion; 2727 if (j == e) { 2728 CheckRegion = Buffer; 2729 } else { 2730 const FileCheckString &CheckLabelStr = (*CheckStrings)[j]; 2731 if (CheckLabelStr.Pat.getCheckTy() != Check::CheckLabel) { 2732 ++j; 2733 continue; 2734 } 2735 2736 // Scan to next CHECK-LABEL match, ignoring CHECK-NOT and CHECK-DAG 2737 size_t MatchLabelLen = 0; 2738 size_t MatchLabelPos = 2739 CheckLabelStr.Check(SM, Buffer, true, MatchLabelLen, Req, Diags); 2740 if (MatchLabelPos == StringRef::npos) 2741 // Immediately bail if CHECK-LABEL fails, nothing else we can do. 2742 return false; 2743 2744 CheckRegion = Buffer.substr(0, MatchLabelPos + MatchLabelLen); 2745 Buffer = Buffer.substr(MatchLabelPos + MatchLabelLen); 2746 ++j; 2747 } 2748 2749 // Do not clear the first region as it's the one before the first 2750 // CHECK-LABEL and it would clear variables defined on the command-line 2751 // before they get used. 2752 if (i != 0 && Req.EnableVarScope) 2753 PatternContext->clearLocalVars(); 2754 2755 for (; i != j; ++i) { 2756 const FileCheckString &CheckStr = (*CheckStrings)[i]; 2757 2758 // Check each string within the scanned region, including a second check 2759 // of any final CHECK-LABEL (to verify CHECK-NOT and CHECK-DAG) 2760 size_t MatchLen = 0; 2761 size_t MatchPos = 2762 CheckStr.Check(SM, CheckRegion, false, MatchLen, Req, Diags); 2763 2764 if (MatchPos == StringRef::npos) { 2765 ChecksFailed = true; 2766 i = j; 2767 break; 2768 } 2769 2770 CheckRegion = CheckRegion.substr(MatchPos + MatchLen); 2771 } 2772 2773 if (j == e) 2774 break; 2775 } 2776 2777 // Success if no checks failed. 2778 return !ChecksFailed; 2779 } 2780