1 //===- FileCheck.cpp - Check that File's Contents match what is expected --===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // FileCheck does a line-by line check of a file that validates whether it 10 // contains the expected content. This is useful for regression tests etc. 11 // 12 // This file implements most of the API that will be used by the FileCheck utility 13 // as well as various unittests. 14 //===----------------------------------------------------------------------===// 15 16 #include "llvm/FileCheck/FileCheck.h" 17 #include "FileCheckImpl.h" 18 #include "llvm/ADT/STLExtras.h" 19 #include "llvm/ADT/StringSet.h" 20 #include "llvm/ADT/Twine.h" 21 #include "llvm/Support/CheckedArithmetic.h" 22 #include "llvm/Support/FormatVariadic.h" 23 #include <cstdint> 24 #include <list> 25 #include <tuple> 26 #include <utility> 27 28 using namespace llvm; 29 30 StringRef ExpressionFormat::toString() const { 31 switch (Value) { 32 case Kind::NoFormat: 33 return StringRef("<none>"); 34 case Kind::Unsigned: 35 return StringRef("%u"); 36 case Kind::Signed: 37 return StringRef("%d"); 38 case Kind::HexUpper: 39 return StringRef("%X"); 40 case Kind::HexLower: 41 return StringRef("%x"); 42 } 43 llvm_unreachable("unknown expression format"); 44 } 45 46 Expected<std::string> ExpressionFormat::getWildcardRegex() const { 47 auto CreatePrecisionRegex = [this](StringRef S) { 48 return (S + Twine('{') + Twine(Precision) + "}").str(); 49 }; 50 51 switch (Value) { 52 case Kind::Unsigned: 53 if (Precision) 54 return CreatePrecisionRegex("([1-9][0-9]*)?[0-9]"); 55 return std::string("[0-9]+"); 56 case Kind::Signed: 57 if (Precision) 58 return CreatePrecisionRegex("-?([1-9][0-9]*)?[0-9]"); 59 return std::string("-?[0-9]+"); 60 case Kind::HexUpper: 61 if (Precision) 62 return CreatePrecisionRegex("([1-9A-F][0-9A-F]*)?[0-9A-F]"); 63 return std::string("[0-9A-F]+"); 64 case Kind::HexLower: 65 if (Precision) 66 return CreatePrecisionRegex("([1-9a-f][0-9a-f]*)?[0-9a-f]"); 67 return std::string("[0-9a-f]+"); 68 default: 69 return createStringError(std::errc::invalid_argument, 70 "trying to match value with invalid format"); 71 } 72 } 73 74 Expected<std::string> 75 ExpressionFormat::getMatchingString(ExpressionValue IntegerValue) const { 76 uint64_t AbsoluteValue; 77 StringRef SignPrefix = IntegerValue.isNegative() ? "-" : ""; 78 79 if (Value == Kind::Signed) { 80 Expected<int64_t> SignedValue = IntegerValue.getSignedValue(); 81 if (!SignedValue) 82 return SignedValue.takeError(); 83 if (*SignedValue < 0) 84 AbsoluteValue = cantFail(IntegerValue.getAbsolute().getUnsignedValue()); 85 else 86 AbsoluteValue = *SignedValue; 87 } else { 88 Expected<uint64_t> UnsignedValue = IntegerValue.getUnsignedValue(); 89 if (!UnsignedValue) 90 return UnsignedValue.takeError(); 91 AbsoluteValue = *UnsignedValue; 92 } 93 94 std::string AbsoluteValueStr; 95 switch (Value) { 96 case Kind::Unsigned: 97 case Kind::Signed: 98 AbsoluteValueStr = utostr(AbsoluteValue); 99 break; 100 case Kind::HexUpper: 101 case Kind::HexLower: 102 AbsoluteValueStr = utohexstr(AbsoluteValue, Value == Kind::HexLower); 103 break; 104 default: 105 return createStringError(std::errc::invalid_argument, 106 "trying to match value with invalid format"); 107 } 108 109 if (Precision > AbsoluteValueStr.size()) { 110 unsigned LeadingZeros = Precision - AbsoluteValueStr.size(); 111 return (Twine(SignPrefix) + std::string(LeadingZeros, '0') + 112 AbsoluteValueStr) 113 .str(); 114 } 115 116 return (Twine(SignPrefix) + AbsoluteValueStr).str(); 117 } 118 119 Expected<ExpressionValue> 120 ExpressionFormat::valueFromStringRepr(StringRef StrVal, 121 const SourceMgr &SM) const { 122 bool ValueIsSigned = Value == Kind::Signed; 123 StringRef OverflowErrorStr = "unable to represent numeric value"; 124 if (ValueIsSigned) { 125 int64_t SignedValue; 126 127 if (StrVal.getAsInteger(10, SignedValue)) 128 return ErrorDiagnostic::get(SM, StrVal, OverflowErrorStr); 129 130 return ExpressionValue(SignedValue); 131 } 132 133 bool Hex = Value == Kind::HexUpper || Value == Kind::HexLower; 134 uint64_t UnsignedValue; 135 if (StrVal.getAsInteger(Hex ? 16 : 10, UnsignedValue)) 136 return ErrorDiagnostic::get(SM, StrVal, OverflowErrorStr); 137 138 return ExpressionValue(UnsignedValue); 139 } 140 141 static int64_t getAsSigned(uint64_t UnsignedValue) { 142 // Use memcpy to reinterpret the bitpattern in Value since casting to 143 // signed is implementation-defined if the unsigned value is too big to be 144 // represented in the signed type and using an union violates type aliasing 145 // rules. 146 int64_t SignedValue; 147 memcpy(&SignedValue, &UnsignedValue, sizeof(SignedValue)); 148 return SignedValue; 149 } 150 151 Expected<int64_t> ExpressionValue::getSignedValue() const { 152 if (Negative) 153 return getAsSigned(Value); 154 155 if (Value > (uint64_t)std::numeric_limits<int64_t>::max()) 156 return make_error<OverflowError>(); 157 158 // Value is in the representable range of int64_t so we can use cast. 159 return static_cast<int64_t>(Value); 160 } 161 162 Expected<uint64_t> ExpressionValue::getUnsignedValue() const { 163 if (Negative) 164 return make_error<OverflowError>(); 165 166 return Value; 167 } 168 169 ExpressionValue ExpressionValue::getAbsolute() const { 170 if (!Negative) 171 return *this; 172 173 int64_t SignedValue = getAsSigned(Value); 174 int64_t MaxInt64 = std::numeric_limits<int64_t>::max(); 175 // Absolute value can be represented as int64_t. 176 if (SignedValue >= -MaxInt64) 177 return ExpressionValue(-getAsSigned(Value)); 178 179 // -X == -(max int64_t + Rem), negate each component independently. 180 SignedValue += MaxInt64; 181 uint64_t RemainingValueAbsolute = -SignedValue; 182 return ExpressionValue(MaxInt64 + RemainingValueAbsolute); 183 } 184 185 Expected<ExpressionValue> llvm::operator+(const ExpressionValue &LeftOperand, 186 const ExpressionValue &RightOperand) { 187 if (LeftOperand.isNegative() && RightOperand.isNegative()) { 188 int64_t LeftValue = cantFail(LeftOperand.getSignedValue()); 189 int64_t RightValue = cantFail(RightOperand.getSignedValue()); 190 Optional<int64_t> Result = checkedAdd<int64_t>(LeftValue, RightValue); 191 if (!Result) 192 return make_error<OverflowError>(); 193 194 return ExpressionValue(*Result); 195 } 196 197 // (-A) + B == B - A. 198 if (LeftOperand.isNegative()) 199 return RightOperand - LeftOperand.getAbsolute(); 200 201 // A + (-B) == A - B. 202 if (RightOperand.isNegative()) 203 return LeftOperand - RightOperand.getAbsolute(); 204 205 // Both values are positive at this point. 206 uint64_t LeftValue = cantFail(LeftOperand.getUnsignedValue()); 207 uint64_t RightValue = cantFail(RightOperand.getUnsignedValue()); 208 Optional<uint64_t> Result = 209 checkedAddUnsigned<uint64_t>(LeftValue, RightValue); 210 if (!Result) 211 return make_error<OverflowError>(); 212 213 return ExpressionValue(*Result); 214 } 215 216 Expected<ExpressionValue> llvm::operator-(const ExpressionValue &LeftOperand, 217 const ExpressionValue &RightOperand) { 218 // Result will be negative and thus might underflow. 219 if (LeftOperand.isNegative() && !RightOperand.isNegative()) { 220 int64_t LeftValue = cantFail(LeftOperand.getSignedValue()); 221 uint64_t RightValue = cantFail(RightOperand.getUnsignedValue()); 222 // Result <= -1 - (max int64_t) which overflows on 1- and 2-complement. 223 if (RightValue > (uint64_t)std::numeric_limits<int64_t>::max()) 224 return make_error<OverflowError>(); 225 Optional<int64_t> Result = 226 checkedSub(LeftValue, static_cast<int64_t>(RightValue)); 227 if (!Result) 228 return make_error<OverflowError>(); 229 230 return ExpressionValue(*Result); 231 } 232 233 // (-A) - (-B) == B - A. 234 if (LeftOperand.isNegative()) 235 return RightOperand.getAbsolute() - LeftOperand.getAbsolute(); 236 237 // A - (-B) == A + B. 238 if (RightOperand.isNegative()) 239 return LeftOperand + RightOperand.getAbsolute(); 240 241 // Both values are positive at this point. 242 uint64_t LeftValue = cantFail(LeftOperand.getUnsignedValue()); 243 uint64_t RightValue = cantFail(RightOperand.getUnsignedValue()); 244 if (LeftValue >= RightValue) 245 return ExpressionValue(LeftValue - RightValue); 246 else { 247 uint64_t AbsoluteDifference = RightValue - LeftValue; 248 uint64_t MaxInt64 = std::numeric_limits<int64_t>::max(); 249 // Value might underflow. 250 if (AbsoluteDifference > MaxInt64) { 251 AbsoluteDifference -= MaxInt64; 252 int64_t Result = -MaxInt64; 253 int64_t MinInt64 = std::numeric_limits<int64_t>::min(); 254 // Underflow, tested by: 255 // abs(Result + (max int64_t)) > abs((min int64_t) + (max int64_t)) 256 if (AbsoluteDifference > static_cast<uint64_t>(-(MinInt64 - Result))) 257 return make_error<OverflowError>(); 258 Result -= static_cast<int64_t>(AbsoluteDifference); 259 return ExpressionValue(Result); 260 } 261 262 return ExpressionValue(-static_cast<int64_t>(AbsoluteDifference)); 263 } 264 } 265 266 Expected<ExpressionValue> llvm::operator*(const ExpressionValue &LeftOperand, 267 const ExpressionValue &RightOperand) { 268 // -A * -B == A * B 269 if (LeftOperand.isNegative() && RightOperand.isNegative()) 270 return LeftOperand.getAbsolute() * RightOperand.getAbsolute(); 271 272 // A * -B == -B * A 273 if (RightOperand.isNegative()) 274 return RightOperand * LeftOperand; 275 276 assert(!RightOperand.isNegative() && "Unexpected negative operand!"); 277 278 // Result will be negative and can underflow. 279 if (LeftOperand.isNegative()) { 280 auto Result = LeftOperand.getAbsolute() * RightOperand.getAbsolute(); 281 if (!Result) 282 return Result; 283 284 return ExpressionValue(0) - *Result; 285 } 286 287 // Result will be positive and can overflow. 288 uint64_t LeftValue = cantFail(LeftOperand.getUnsignedValue()); 289 uint64_t RightValue = cantFail(RightOperand.getUnsignedValue()); 290 Optional<uint64_t> Result = 291 checkedMulUnsigned<uint64_t>(LeftValue, RightValue); 292 if (!Result) 293 return make_error<OverflowError>(); 294 295 return ExpressionValue(*Result); 296 } 297 298 Expected<ExpressionValue> llvm::operator/(const ExpressionValue &LeftOperand, 299 const ExpressionValue &RightOperand) { 300 // -A / -B == A / B 301 if (LeftOperand.isNegative() && RightOperand.isNegative()) 302 return LeftOperand.getAbsolute() / RightOperand.getAbsolute(); 303 304 // Check for divide by zero. 305 if (RightOperand == ExpressionValue(0)) 306 return make_error<OverflowError>(); 307 308 // Result will be negative and can underflow. 309 if (LeftOperand.isNegative() || RightOperand.isNegative()) 310 return ExpressionValue(0) - 311 cantFail(LeftOperand.getAbsolute() / RightOperand.getAbsolute()); 312 313 uint64_t LeftValue = cantFail(LeftOperand.getUnsignedValue()); 314 uint64_t RightValue = cantFail(RightOperand.getUnsignedValue()); 315 return ExpressionValue(LeftValue / RightValue); 316 } 317 318 Expected<ExpressionValue> llvm::max(const ExpressionValue &LeftOperand, 319 const ExpressionValue &RightOperand) { 320 if (LeftOperand.isNegative() && RightOperand.isNegative()) { 321 int64_t LeftValue = cantFail(LeftOperand.getSignedValue()); 322 int64_t RightValue = cantFail(RightOperand.getSignedValue()); 323 return ExpressionValue(std::max(LeftValue, RightValue)); 324 } 325 326 if (!LeftOperand.isNegative() && !RightOperand.isNegative()) { 327 uint64_t LeftValue = cantFail(LeftOperand.getUnsignedValue()); 328 uint64_t RightValue = cantFail(RightOperand.getUnsignedValue()); 329 return ExpressionValue(std::max(LeftValue, RightValue)); 330 } 331 332 if (LeftOperand.isNegative()) 333 return RightOperand; 334 335 return LeftOperand; 336 } 337 338 Expected<ExpressionValue> llvm::min(const ExpressionValue &LeftOperand, 339 const ExpressionValue &RightOperand) { 340 if (cantFail(max(LeftOperand, RightOperand)) == LeftOperand) 341 return RightOperand; 342 343 return LeftOperand; 344 } 345 346 Expected<ExpressionValue> NumericVariableUse::eval() const { 347 Optional<ExpressionValue> Value = Variable->getValue(); 348 if (Value) 349 return *Value; 350 351 return make_error<UndefVarError>(getExpressionStr()); 352 } 353 354 Expected<ExpressionValue> BinaryOperation::eval() const { 355 Expected<ExpressionValue> LeftOp = LeftOperand->eval(); 356 Expected<ExpressionValue> RightOp = RightOperand->eval(); 357 358 // Bubble up any error (e.g. undefined variables) in the recursive 359 // evaluation. 360 if (!LeftOp || !RightOp) { 361 Error Err = Error::success(); 362 if (!LeftOp) 363 Err = joinErrors(std::move(Err), LeftOp.takeError()); 364 if (!RightOp) 365 Err = joinErrors(std::move(Err), RightOp.takeError()); 366 return std::move(Err); 367 } 368 369 return EvalBinop(*LeftOp, *RightOp); 370 } 371 372 Expected<ExpressionFormat> 373 BinaryOperation::getImplicitFormat(const SourceMgr &SM) const { 374 Expected<ExpressionFormat> LeftFormat = LeftOperand->getImplicitFormat(SM); 375 Expected<ExpressionFormat> RightFormat = RightOperand->getImplicitFormat(SM); 376 if (!LeftFormat || !RightFormat) { 377 Error Err = Error::success(); 378 if (!LeftFormat) 379 Err = joinErrors(std::move(Err), LeftFormat.takeError()); 380 if (!RightFormat) 381 Err = joinErrors(std::move(Err), RightFormat.takeError()); 382 return std::move(Err); 383 } 384 385 if (*LeftFormat != ExpressionFormat::Kind::NoFormat && 386 *RightFormat != ExpressionFormat::Kind::NoFormat && 387 *LeftFormat != *RightFormat) 388 return ErrorDiagnostic::get( 389 SM, getExpressionStr(), 390 "implicit format conflict between '" + LeftOperand->getExpressionStr() + 391 "' (" + LeftFormat->toString() + ") and '" + 392 RightOperand->getExpressionStr() + "' (" + RightFormat->toString() + 393 "), need an explicit format specifier"); 394 395 return *LeftFormat != ExpressionFormat::Kind::NoFormat ? *LeftFormat 396 : *RightFormat; 397 } 398 399 Expected<std::string> NumericSubstitution::getResult() const { 400 assert(ExpressionPointer->getAST() != nullptr && 401 "Substituting empty expression"); 402 Expected<ExpressionValue> EvaluatedValue = 403 ExpressionPointer->getAST()->eval(); 404 if (!EvaluatedValue) 405 return EvaluatedValue.takeError(); 406 ExpressionFormat Format = ExpressionPointer->getFormat(); 407 return Format.getMatchingString(*EvaluatedValue); 408 } 409 410 Expected<std::string> StringSubstitution::getResult() const { 411 // Look up the value and escape it so that we can put it into the regex. 412 Expected<StringRef> VarVal = Context->getPatternVarValue(FromStr); 413 if (!VarVal) 414 return VarVal.takeError(); 415 return Regex::escape(*VarVal); 416 } 417 418 bool Pattern::isValidVarNameStart(char C) { return C == '_' || isAlpha(C); } 419 420 Expected<Pattern::VariableProperties> 421 Pattern::parseVariable(StringRef &Str, const SourceMgr &SM) { 422 if (Str.empty()) 423 return ErrorDiagnostic::get(SM, Str, "empty variable name"); 424 425 size_t I = 0; 426 bool IsPseudo = Str[0] == '@'; 427 428 // Global vars start with '$'. 429 if (Str[0] == '$' || IsPseudo) 430 ++I; 431 432 if (!isValidVarNameStart(Str[I++])) 433 return ErrorDiagnostic::get(SM, Str, "invalid variable name"); 434 435 for (size_t E = Str.size(); I != E; ++I) 436 // Variable names are composed of alphanumeric characters and underscores. 437 if (Str[I] != '_' && !isAlnum(Str[I])) 438 break; 439 440 StringRef Name = Str.take_front(I); 441 Str = Str.substr(I); 442 return VariableProperties {Name, IsPseudo}; 443 } 444 445 // StringRef holding all characters considered as horizontal whitespaces by 446 // FileCheck input canonicalization. 447 constexpr StringLiteral SpaceChars = " \t"; 448 449 // Parsing helper function that strips the first character in S and returns it. 450 static char popFront(StringRef &S) { 451 char C = S.front(); 452 S = S.drop_front(); 453 return C; 454 } 455 456 char OverflowError::ID = 0; 457 char UndefVarError::ID = 0; 458 char ErrorDiagnostic::ID = 0; 459 char NotFoundError::ID = 0; 460 461 Expected<NumericVariable *> Pattern::parseNumericVariableDefinition( 462 StringRef &Expr, FileCheckPatternContext *Context, 463 Optional<size_t> LineNumber, ExpressionFormat ImplicitFormat, 464 const SourceMgr &SM) { 465 Expected<VariableProperties> ParseVarResult = parseVariable(Expr, SM); 466 if (!ParseVarResult) 467 return ParseVarResult.takeError(); 468 StringRef Name = ParseVarResult->Name; 469 470 if (ParseVarResult->IsPseudo) 471 return ErrorDiagnostic::get( 472 SM, Name, "definition of pseudo numeric variable unsupported"); 473 474 // Detect collisions between string and numeric variables when the latter 475 // is created later than the former. 476 if (Context->DefinedVariableTable.find(Name) != 477 Context->DefinedVariableTable.end()) 478 return ErrorDiagnostic::get( 479 SM, Name, "string variable with name '" + Name + "' already exists"); 480 481 Expr = Expr.ltrim(SpaceChars); 482 if (!Expr.empty()) 483 return ErrorDiagnostic::get( 484 SM, Expr, "unexpected characters after numeric variable name"); 485 486 NumericVariable *DefinedNumericVariable; 487 auto VarTableIter = Context->GlobalNumericVariableTable.find(Name); 488 if (VarTableIter != Context->GlobalNumericVariableTable.end()) { 489 DefinedNumericVariable = VarTableIter->second; 490 if (DefinedNumericVariable->getImplicitFormat() != ImplicitFormat) 491 return ErrorDiagnostic::get( 492 SM, Expr, "format different from previous variable definition"); 493 } else 494 DefinedNumericVariable = 495 Context->makeNumericVariable(Name, ImplicitFormat, LineNumber); 496 497 return DefinedNumericVariable; 498 } 499 500 Expected<std::unique_ptr<NumericVariableUse>> Pattern::parseNumericVariableUse( 501 StringRef Name, bool IsPseudo, Optional<size_t> LineNumber, 502 FileCheckPatternContext *Context, const SourceMgr &SM) { 503 if (IsPseudo && !Name.equals("@LINE")) 504 return ErrorDiagnostic::get( 505 SM, Name, "invalid pseudo numeric variable '" + Name + "'"); 506 507 // Numeric variable definitions and uses are parsed in the order in which 508 // they appear in the CHECK patterns. For each definition, the pointer to the 509 // class instance of the corresponding numeric variable definition is stored 510 // in GlobalNumericVariableTable in parsePattern. Therefore, if the pointer 511 // we get below is null, it means no such variable was defined before. When 512 // that happens, we create a dummy variable so that parsing can continue. All 513 // uses of undefined variables, whether string or numeric, are then diagnosed 514 // in printSubstitutions() after failing to match. 515 auto VarTableIter = Context->GlobalNumericVariableTable.find(Name); 516 NumericVariable *NumericVariable; 517 if (VarTableIter != Context->GlobalNumericVariableTable.end()) 518 NumericVariable = VarTableIter->second; 519 else { 520 NumericVariable = Context->makeNumericVariable( 521 Name, ExpressionFormat(ExpressionFormat::Kind::Unsigned)); 522 Context->GlobalNumericVariableTable[Name] = NumericVariable; 523 } 524 525 Optional<size_t> DefLineNumber = NumericVariable->getDefLineNumber(); 526 if (DefLineNumber && LineNumber && *DefLineNumber == *LineNumber) 527 return ErrorDiagnostic::get( 528 SM, Name, 529 "numeric variable '" + Name + 530 "' defined earlier in the same CHECK directive"); 531 532 return std::make_unique<NumericVariableUse>(Name, NumericVariable); 533 } 534 535 Expected<std::unique_ptr<ExpressionAST>> Pattern::parseNumericOperand( 536 StringRef &Expr, AllowedOperand AO, bool MaybeInvalidConstraint, 537 Optional<size_t> LineNumber, FileCheckPatternContext *Context, 538 const SourceMgr &SM) { 539 if (Expr.startswith("(")) { 540 if (AO != AllowedOperand::Any) 541 return ErrorDiagnostic::get( 542 SM, Expr, "parenthesized expression not permitted here"); 543 return parseParenExpr(Expr, LineNumber, Context, SM); 544 } 545 546 if (AO == AllowedOperand::LineVar || AO == AllowedOperand::Any) { 547 // Try to parse as a numeric variable use. 548 Expected<Pattern::VariableProperties> ParseVarResult = 549 parseVariable(Expr, SM); 550 if (ParseVarResult) { 551 // Try to parse a function call. 552 if (Expr.ltrim(SpaceChars).startswith("(")) { 553 if (AO != AllowedOperand::Any) 554 return ErrorDiagnostic::get(SM, ParseVarResult->Name, 555 "unexpected function call"); 556 557 return parseCallExpr(Expr, ParseVarResult->Name, LineNumber, Context, 558 SM); 559 } 560 561 return parseNumericVariableUse(ParseVarResult->Name, 562 ParseVarResult->IsPseudo, LineNumber, 563 Context, SM); 564 } 565 566 if (AO == AllowedOperand::LineVar) 567 return ParseVarResult.takeError(); 568 // Ignore the error and retry parsing as a literal. 569 consumeError(ParseVarResult.takeError()); 570 } 571 572 // Otherwise, parse it as a literal. 573 int64_t SignedLiteralValue; 574 uint64_t UnsignedLiteralValue; 575 StringRef SaveExpr = Expr; 576 // Accept both signed and unsigned literal, default to signed literal. 577 if (!Expr.consumeInteger((AO == AllowedOperand::LegacyLiteral) ? 10 : 0, 578 UnsignedLiteralValue)) 579 return std::make_unique<ExpressionLiteral>(SaveExpr.drop_back(Expr.size()), 580 UnsignedLiteralValue); 581 Expr = SaveExpr; 582 if (AO == AllowedOperand::Any && !Expr.consumeInteger(0, SignedLiteralValue)) 583 return std::make_unique<ExpressionLiteral>(SaveExpr.drop_back(Expr.size()), 584 SignedLiteralValue); 585 586 return ErrorDiagnostic::get( 587 SM, Expr, 588 Twine("invalid ") + 589 (MaybeInvalidConstraint ? "matching constraint or " : "") + 590 "operand format"); 591 } 592 593 Expected<std::unique_ptr<ExpressionAST>> 594 Pattern::parseParenExpr(StringRef &Expr, Optional<size_t> LineNumber, 595 FileCheckPatternContext *Context, const SourceMgr &SM) { 596 Expr = Expr.ltrim(SpaceChars); 597 assert(Expr.startswith("(")); 598 599 // Parse right operand. 600 Expr.consume_front("("); 601 Expr = Expr.ltrim(SpaceChars); 602 if (Expr.empty()) 603 return ErrorDiagnostic::get(SM, Expr, "missing operand in expression"); 604 605 // Note: parseNumericOperand handles nested opening parentheses. 606 Expected<std::unique_ptr<ExpressionAST>> SubExprResult = parseNumericOperand( 607 Expr, AllowedOperand::Any, /*MaybeInvalidConstraint=*/false, LineNumber, 608 Context, SM); 609 Expr = Expr.ltrim(SpaceChars); 610 while (SubExprResult && !Expr.empty() && !Expr.startswith(")")) { 611 StringRef OrigExpr = Expr; 612 SubExprResult = parseBinop(OrigExpr, Expr, std::move(*SubExprResult), false, 613 LineNumber, Context, SM); 614 Expr = Expr.ltrim(SpaceChars); 615 } 616 if (!SubExprResult) 617 return SubExprResult; 618 619 if (!Expr.consume_front(")")) { 620 return ErrorDiagnostic::get(SM, Expr, 621 "missing ')' at end of nested expression"); 622 } 623 return SubExprResult; 624 } 625 626 Expected<std::unique_ptr<ExpressionAST>> 627 Pattern::parseBinop(StringRef Expr, StringRef &RemainingExpr, 628 std::unique_ptr<ExpressionAST> LeftOp, 629 bool IsLegacyLineExpr, Optional<size_t> LineNumber, 630 FileCheckPatternContext *Context, const SourceMgr &SM) { 631 RemainingExpr = RemainingExpr.ltrim(SpaceChars); 632 if (RemainingExpr.empty()) 633 return std::move(LeftOp); 634 635 // Check if this is a supported operation and select a function to perform 636 // it. 637 SMLoc OpLoc = SMLoc::getFromPointer(RemainingExpr.data()); 638 char Operator = popFront(RemainingExpr); 639 binop_eval_t EvalBinop; 640 switch (Operator) { 641 case '+': 642 EvalBinop = operator+; 643 break; 644 case '-': 645 EvalBinop = operator-; 646 break; 647 default: 648 return ErrorDiagnostic::get( 649 SM, OpLoc, Twine("unsupported operation '") + Twine(Operator) + "'"); 650 } 651 652 // Parse right operand. 653 RemainingExpr = RemainingExpr.ltrim(SpaceChars); 654 if (RemainingExpr.empty()) 655 return ErrorDiagnostic::get(SM, RemainingExpr, 656 "missing operand in expression"); 657 // The second operand in a legacy @LINE expression is always a literal. 658 AllowedOperand AO = 659 IsLegacyLineExpr ? AllowedOperand::LegacyLiteral : AllowedOperand::Any; 660 Expected<std::unique_ptr<ExpressionAST>> RightOpResult = 661 parseNumericOperand(RemainingExpr, AO, /*MaybeInvalidConstraint=*/false, 662 LineNumber, Context, SM); 663 if (!RightOpResult) 664 return RightOpResult; 665 666 Expr = Expr.drop_back(RemainingExpr.size()); 667 return std::make_unique<BinaryOperation>(Expr, EvalBinop, std::move(LeftOp), 668 std::move(*RightOpResult)); 669 } 670 671 Expected<std::unique_ptr<ExpressionAST>> 672 Pattern::parseCallExpr(StringRef &Expr, StringRef FuncName, 673 Optional<size_t> LineNumber, 674 FileCheckPatternContext *Context, const SourceMgr &SM) { 675 Expr = Expr.ltrim(SpaceChars); 676 assert(Expr.startswith("(")); 677 678 auto OptFunc = StringSwitch<Optional<binop_eval_t>>(FuncName) 679 .Case("add", operator+) 680 .Case("div", operator/) 681 .Case("max", max) 682 .Case("min", min) 683 .Case("mul", operator*) 684 .Case("sub", operator-) 685 .Default(None); 686 687 if (!OptFunc) 688 return ErrorDiagnostic::get( 689 SM, FuncName, Twine("call to undefined function '") + FuncName + "'"); 690 691 Expr.consume_front("("); 692 Expr = Expr.ltrim(SpaceChars); 693 694 // Parse call arguments, which are comma separated. 695 SmallVector<std::unique_ptr<ExpressionAST>, 4> Args; 696 while (!Expr.empty() && !Expr.startswith(")")) { 697 if (Expr.startswith(",")) 698 return ErrorDiagnostic::get(SM, Expr, "missing argument"); 699 700 // Parse the argument, which is an arbitary expression. 701 StringRef OuterBinOpExpr = Expr; 702 Expected<std::unique_ptr<ExpressionAST>> Arg = parseNumericOperand( 703 Expr, AllowedOperand::Any, /*MaybeInvalidConstraint=*/false, LineNumber, 704 Context, SM); 705 while (Arg && !Expr.empty()) { 706 Expr = Expr.ltrim(SpaceChars); 707 // Have we reached an argument terminator? 708 if (Expr.startswith(",") || Expr.startswith(")")) 709 break; 710 711 // Arg = Arg <op> <expr> 712 Arg = parseBinop(OuterBinOpExpr, Expr, std::move(*Arg), false, LineNumber, 713 Context, SM); 714 } 715 716 // Prefer an expression error over a generic invalid argument message. 717 if (!Arg) 718 return Arg.takeError(); 719 Args.push_back(std::move(*Arg)); 720 721 // Have we parsed all available arguments? 722 Expr = Expr.ltrim(SpaceChars); 723 if (!Expr.consume_front(",")) 724 break; 725 726 Expr = Expr.ltrim(SpaceChars); 727 if (Expr.startswith(")")) 728 return ErrorDiagnostic::get(SM, Expr, "missing argument"); 729 } 730 731 if (!Expr.consume_front(")")) 732 return ErrorDiagnostic::get(SM, Expr, 733 "missing ')' at end of call expression"); 734 735 const unsigned NumArgs = Args.size(); 736 if (NumArgs == 2) 737 return std::make_unique<BinaryOperation>(Expr, *OptFunc, std::move(Args[0]), 738 std::move(Args[1])); 739 740 // TODO: Support more than binop_eval_t. 741 return ErrorDiagnostic::get(SM, FuncName, 742 Twine("function '") + FuncName + 743 Twine("' takes 2 arguments but ") + 744 Twine(NumArgs) + " given"); 745 } 746 747 Expected<std::unique_ptr<Expression>> Pattern::parseNumericSubstitutionBlock( 748 StringRef Expr, Optional<NumericVariable *> &DefinedNumericVariable, 749 bool IsLegacyLineExpr, Optional<size_t> LineNumber, 750 FileCheckPatternContext *Context, const SourceMgr &SM) { 751 std::unique_ptr<ExpressionAST> ExpressionASTPointer = nullptr; 752 StringRef DefExpr = StringRef(); 753 DefinedNumericVariable = None; 754 ExpressionFormat ExplicitFormat = ExpressionFormat(); 755 unsigned Precision = 0; 756 757 // Parse format specifier (NOTE: ',' is also an argument seperator). 758 size_t FormatSpecEnd = Expr.find(','); 759 size_t FunctionStart = Expr.find('('); 760 if (FormatSpecEnd != StringRef::npos && FormatSpecEnd < FunctionStart) { 761 StringRef FormatExpr = Expr.take_front(FormatSpecEnd); 762 Expr = Expr.drop_front(FormatSpecEnd + 1); 763 FormatExpr = FormatExpr.trim(SpaceChars); 764 if (!FormatExpr.consume_front("%")) 765 return ErrorDiagnostic::get( 766 SM, FormatExpr, 767 "invalid matching format specification in expression"); 768 769 // Parse precision. 770 if (FormatExpr.consume_front(".")) { 771 if (FormatExpr.consumeInteger(10, Precision)) 772 return ErrorDiagnostic::get(SM, FormatExpr, 773 "invalid precision in format specifier"); 774 } 775 776 if (!FormatExpr.empty()) { 777 // Check for unknown matching format specifier and set matching format in 778 // class instance representing this expression. 779 SMLoc FmtLoc = SMLoc::getFromPointer(FormatExpr.data()); 780 switch (popFront(FormatExpr)) { 781 case 'u': 782 ExplicitFormat = 783 ExpressionFormat(ExpressionFormat::Kind::Unsigned, Precision); 784 break; 785 case 'd': 786 ExplicitFormat = 787 ExpressionFormat(ExpressionFormat::Kind::Signed, Precision); 788 break; 789 case 'x': 790 ExplicitFormat = 791 ExpressionFormat(ExpressionFormat::Kind::HexLower, Precision); 792 break; 793 case 'X': 794 ExplicitFormat = 795 ExpressionFormat(ExpressionFormat::Kind::HexUpper, Precision); 796 break; 797 default: 798 return ErrorDiagnostic::get(SM, FmtLoc, 799 "invalid format specifier in expression"); 800 } 801 } 802 803 FormatExpr = FormatExpr.ltrim(SpaceChars); 804 if (!FormatExpr.empty()) 805 return ErrorDiagnostic::get( 806 SM, FormatExpr, 807 "invalid matching format specification in expression"); 808 } 809 810 // Save variable definition expression if any. 811 size_t DefEnd = Expr.find(':'); 812 if (DefEnd != StringRef::npos) { 813 DefExpr = Expr.substr(0, DefEnd); 814 Expr = Expr.substr(DefEnd + 1); 815 } 816 817 // Parse matching constraint. 818 Expr = Expr.ltrim(SpaceChars); 819 bool HasParsedValidConstraint = false; 820 if (Expr.consume_front("==")) 821 HasParsedValidConstraint = true; 822 823 // Parse the expression itself. 824 Expr = Expr.ltrim(SpaceChars); 825 if (Expr.empty()) { 826 if (HasParsedValidConstraint) 827 return ErrorDiagnostic::get( 828 SM, Expr, "empty numeric expression should not have a constraint"); 829 } else { 830 Expr = Expr.rtrim(SpaceChars); 831 StringRef OuterBinOpExpr = Expr; 832 // The first operand in a legacy @LINE expression is always the @LINE 833 // pseudo variable. 834 AllowedOperand AO = 835 IsLegacyLineExpr ? AllowedOperand::LineVar : AllowedOperand::Any; 836 Expected<std::unique_ptr<ExpressionAST>> ParseResult = parseNumericOperand( 837 Expr, AO, !HasParsedValidConstraint, LineNumber, Context, SM); 838 while (ParseResult && !Expr.empty()) { 839 ParseResult = parseBinop(OuterBinOpExpr, Expr, std::move(*ParseResult), 840 IsLegacyLineExpr, LineNumber, Context, SM); 841 // Legacy @LINE expressions only allow 2 operands. 842 if (ParseResult && IsLegacyLineExpr && !Expr.empty()) 843 return ErrorDiagnostic::get( 844 SM, Expr, 845 "unexpected characters at end of expression '" + Expr + "'"); 846 } 847 if (!ParseResult) 848 return ParseResult.takeError(); 849 ExpressionASTPointer = std::move(*ParseResult); 850 } 851 852 // Select format of the expression, i.e. (i) its explicit format, if any, 853 // otherwise (ii) its implicit format, if any, otherwise (iii) the default 854 // format (unsigned). Error out in case of conflicting implicit format 855 // without explicit format. 856 ExpressionFormat Format; 857 if (ExplicitFormat) 858 Format = ExplicitFormat; 859 else if (ExpressionASTPointer) { 860 Expected<ExpressionFormat> ImplicitFormat = 861 ExpressionASTPointer->getImplicitFormat(SM); 862 if (!ImplicitFormat) 863 return ImplicitFormat.takeError(); 864 Format = *ImplicitFormat; 865 } 866 if (!Format) 867 Format = ExpressionFormat(ExpressionFormat::Kind::Unsigned, Precision); 868 869 std::unique_ptr<Expression> ExpressionPointer = 870 std::make_unique<Expression>(std::move(ExpressionASTPointer), Format); 871 872 // Parse the numeric variable definition. 873 if (DefEnd != StringRef::npos) { 874 DefExpr = DefExpr.ltrim(SpaceChars); 875 Expected<NumericVariable *> ParseResult = parseNumericVariableDefinition( 876 DefExpr, Context, LineNumber, ExpressionPointer->getFormat(), SM); 877 878 if (!ParseResult) 879 return ParseResult.takeError(); 880 DefinedNumericVariable = *ParseResult; 881 } 882 883 return std::move(ExpressionPointer); 884 } 885 886 bool Pattern::parsePattern(StringRef PatternStr, StringRef Prefix, 887 SourceMgr &SM, const FileCheckRequest &Req) { 888 bool MatchFullLinesHere = Req.MatchFullLines && CheckTy != Check::CheckNot; 889 IgnoreCase = Req.IgnoreCase; 890 891 PatternLoc = SMLoc::getFromPointer(PatternStr.data()); 892 893 if (!(Req.NoCanonicalizeWhiteSpace && Req.MatchFullLines)) 894 // Ignore trailing whitespace. 895 while (!PatternStr.empty() && 896 (PatternStr.back() == ' ' || PatternStr.back() == '\t')) 897 PatternStr = PatternStr.substr(0, PatternStr.size() - 1); 898 899 // Check that there is something on the line. 900 if (PatternStr.empty() && CheckTy != Check::CheckEmpty) { 901 SM.PrintMessage(PatternLoc, SourceMgr::DK_Error, 902 "found empty check string with prefix '" + Prefix + ":'"); 903 return true; 904 } 905 906 if (!PatternStr.empty() && CheckTy == Check::CheckEmpty) { 907 SM.PrintMessage( 908 PatternLoc, SourceMgr::DK_Error, 909 "found non-empty check string for empty check with prefix '" + Prefix + 910 ":'"); 911 return true; 912 } 913 914 if (CheckTy == Check::CheckEmpty) { 915 RegExStr = "(\n$)"; 916 return false; 917 } 918 919 // Check to see if this is a fixed string, or if it has regex pieces. 920 if (!MatchFullLinesHere && 921 (PatternStr.size() < 2 || (PatternStr.find("{{") == StringRef::npos && 922 PatternStr.find("[[") == StringRef::npos))) { 923 FixedStr = PatternStr; 924 return false; 925 } 926 927 if (MatchFullLinesHere) { 928 RegExStr += '^'; 929 if (!Req.NoCanonicalizeWhiteSpace) 930 RegExStr += " *"; 931 } 932 933 // Paren value #0 is for the fully matched string. Any new parenthesized 934 // values add from there. 935 unsigned CurParen = 1; 936 937 // Otherwise, there is at least one regex piece. Build up the regex pattern 938 // by escaping scary characters in fixed strings, building up one big regex. 939 while (!PatternStr.empty()) { 940 // RegEx matches. 941 if (PatternStr.startswith("{{")) { 942 // This is the start of a regex match. Scan for the }}. 943 size_t End = PatternStr.find("}}"); 944 if (End == StringRef::npos) { 945 SM.PrintMessage(SMLoc::getFromPointer(PatternStr.data()), 946 SourceMgr::DK_Error, 947 "found start of regex string with no end '}}'"); 948 return true; 949 } 950 951 // Enclose {{}} patterns in parens just like [[]] even though we're not 952 // capturing the result for any purpose. This is required in case the 953 // expression contains an alternation like: CHECK: abc{{x|z}}def. We 954 // want this to turn into: "abc(x|z)def" not "abcx|zdef". 955 RegExStr += '('; 956 ++CurParen; 957 958 if (AddRegExToRegEx(PatternStr.substr(2, End - 2), CurParen, SM)) 959 return true; 960 RegExStr += ')'; 961 962 PatternStr = PatternStr.substr(End + 2); 963 continue; 964 } 965 966 // String and numeric substitution blocks. Pattern substitution blocks come 967 // in two forms: [[foo:.*]] and [[foo]]. The former matches .* (or some 968 // other regex) and assigns it to the string variable 'foo'. The latter 969 // substitutes foo's value. Numeric substitution blocks recognize the same 970 // form as string ones, but start with a '#' sign after the double 971 // brackets. They also accept a combined form which sets a numeric variable 972 // to the evaluation of an expression. Both string and numeric variable 973 // names must satisfy the regular expression "[a-zA-Z_][0-9a-zA-Z_]*" to be 974 // valid, as this helps catch some common errors. 975 if (PatternStr.startswith("[[")) { 976 StringRef UnparsedPatternStr = PatternStr.substr(2); 977 // Find the closing bracket pair ending the match. End is going to be an 978 // offset relative to the beginning of the match string. 979 size_t End = FindRegexVarEnd(UnparsedPatternStr, SM); 980 StringRef MatchStr = UnparsedPatternStr.substr(0, End); 981 bool IsNumBlock = MatchStr.consume_front("#"); 982 983 if (End == StringRef::npos) { 984 SM.PrintMessage(SMLoc::getFromPointer(PatternStr.data()), 985 SourceMgr::DK_Error, 986 "Invalid substitution block, no ]] found"); 987 return true; 988 } 989 // Strip the substitution block we are parsing. End points to the start 990 // of the "]]" closing the expression so account for it in computing the 991 // index of the first unparsed character. 992 PatternStr = UnparsedPatternStr.substr(End + 2); 993 994 bool IsDefinition = false; 995 bool SubstNeeded = false; 996 // Whether the substitution block is a legacy use of @LINE with string 997 // substitution block syntax. 998 bool IsLegacyLineExpr = false; 999 StringRef DefName; 1000 StringRef SubstStr; 1001 std::string MatchRegexp; 1002 size_t SubstInsertIdx = RegExStr.size(); 1003 1004 // Parse string variable or legacy @LINE expression. 1005 if (!IsNumBlock) { 1006 size_t VarEndIdx = MatchStr.find(":"); 1007 size_t SpacePos = MatchStr.substr(0, VarEndIdx).find_first_of(" \t"); 1008 if (SpacePos != StringRef::npos) { 1009 SM.PrintMessage(SMLoc::getFromPointer(MatchStr.data() + SpacePos), 1010 SourceMgr::DK_Error, "unexpected whitespace"); 1011 return true; 1012 } 1013 1014 // Get the name (e.g. "foo") and verify it is well formed. 1015 StringRef OrigMatchStr = MatchStr; 1016 Expected<Pattern::VariableProperties> ParseVarResult = 1017 parseVariable(MatchStr, SM); 1018 if (!ParseVarResult) { 1019 logAllUnhandledErrors(ParseVarResult.takeError(), errs()); 1020 return true; 1021 } 1022 StringRef Name = ParseVarResult->Name; 1023 bool IsPseudo = ParseVarResult->IsPseudo; 1024 1025 IsDefinition = (VarEndIdx != StringRef::npos); 1026 SubstNeeded = !IsDefinition; 1027 if (IsDefinition) { 1028 if ((IsPseudo || !MatchStr.consume_front(":"))) { 1029 SM.PrintMessage(SMLoc::getFromPointer(Name.data()), 1030 SourceMgr::DK_Error, 1031 "invalid name in string variable definition"); 1032 return true; 1033 } 1034 1035 // Detect collisions between string and numeric variables when the 1036 // former is created later than the latter. 1037 if (Context->GlobalNumericVariableTable.find(Name) != 1038 Context->GlobalNumericVariableTable.end()) { 1039 SM.PrintMessage( 1040 SMLoc::getFromPointer(Name.data()), SourceMgr::DK_Error, 1041 "numeric variable with name '" + Name + "' already exists"); 1042 return true; 1043 } 1044 DefName = Name; 1045 MatchRegexp = MatchStr.str(); 1046 } else { 1047 if (IsPseudo) { 1048 MatchStr = OrigMatchStr; 1049 IsLegacyLineExpr = IsNumBlock = true; 1050 } else 1051 SubstStr = Name; 1052 } 1053 } 1054 1055 // Parse numeric substitution block. 1056 std::unique_ptr<Expression> ExpressionPointer; 1057 Optional<NumericVariable *> DefinedNumericVariable; 1058 if (IsNumBlock) { 1059 Expected<std::unique_ptr<Expression>> ParseResult = 1060 parseNumericSubstitutionBlock(MatchStr, DefinedNumericVariable, 1061 IsLegacyLineExpr, LineNumber, Context, 1062 SM); 1063 if (!ParseResult) { 1064 logAllUnhandledErrors(ParseResult.takeError(), errs()); 1065 return true; 1066 } 1067 ExpressionPointer = std::move(*ParseResult); 1068 SubstNeeded = ExpressionPointer->getAST() != nullptr; 1069 if (DefinedNumericVariable) { 1070 IsDefinition = true; 1071 DefName = (*DefinedNumericVariable)->getName(); 1072 } 1073 if (SubstNeeded) 1074 SubstStr = MatchStr; 1075 else { 1076 ExpressionFormat Format = ExpressionPointer->getFormat(); 1077 MatchRegexp = cantFail(Format.getWildcardRegex()); 1078 } 1079 } 1080 1081 // Handle variable definition: [[<def>:(...)]] and [[#(...)<def>:(...)]]. 1082 if (IsDefinition) { 1083 RegExStr += '('; 1084 ++SubstInsertIdx; 1085 1086 if (IsNumBlock) { 1087 NumericVariableMatch NumericVariableDefinition = { 1088 *DefinedNumericVariable, CurParen}; 1089 NumericVariableDefs[DefName] = NumericVariableDefinition; 1090 // This store is done here rather than in match() to allow 1091 // parseNumericVariableUse() to get the pointer to the class instance 1092 // of the right variable definition corresponding to a given numeric 1093 // variable use. 1094 Context->GlobalNumericVariableTable[DefName] = 1095 *DefinedNumericVariable; 1096 } else { 1097 VariableDefs[DefName] = CurParen; 1098 // Mark string variable as defined to detect collisions between 1099 // string and numeric variables in parseNumericVariableUse() and 1100 // defineCmdlineVariables() when the latter is created later than the 1101 // former. We cannot reuse GlobalVariableTable for this by populating 1102 // it with an empty string since we would then lose the ability to 1103 // detect the use of an undefined variable in match(). 1104 Context->DefinedVariableTable[DefName] = true; 1105 } 1106 1107 ++CurParen; 1108 } 1109 1110 if (!MatchRegexp.empty() && AddRegExToRegEx(MatchRegexp, CurParen, SM)) 1111 return true; 1112 1113 if (IsDefinition) 1114 RegExStr += ')'; 1115 1116 // Handle substitutions: [[foo]] and [[#<foo expr>]]. 1117 if (SubstNeeded) { 1118 // Handle substitution of string variables that were defined earlier on 1119 // the same line by emitting a backreference. Expressions do not 1120 // support substituting a numeric variable defined on the same line. 1121 if (!IsNumBlock && VariableDefs.find(SubstStr) != VariableDefs.end()) { 1122 unsigned CaptureParenGroup = VariableDefs[SubstStr]; 1123 if (CaptureParenGroup < 1 || CaptureParenGroup > 9) { 1124 SM.PrintMessage(SMLoc::getFromPointer(SubstStr.data()), 1125 SourceMgr::DK_Error, 1126 "Can't back-reference more than 9 variables"); 1127 return true; 1128 } 1129 AddBackrefToRegEx(CaptureParenGroup); 1130 } else { 1131 // Handle substitution of string variables ([[<var>]]) defined in 1132 // previous CHECK patterns, and substitution of expressions. 1133 Substitution *Substitution = 1134 IsNumBlock 1135 ? Context->makeNumericSubstitution( 1136 SubstStr, std::move(ExpressionPointer), SubstInsertIdx) 1137 : Context->makeStringSubstitution(SubstStr, SubstInsertIdx); 1138 Substitutions.push_back(Substitution); 1139 } 1140 } 1141 } 1142 1143 // Handle fixed string matches. 1144 // Find the end, which is the start of the next regex. 1145 size_t FixedMatchEnd = PatternStr.find("{{"); 1146 FixedMatchEnd = std::min(FixedMatchEnd, PatternStr.find("[[")); 1147 RegExStr += Regex::escape(PatternStr.substr(0, FixedMatchEnd)); 1148 PatternStr = PatternStr.substr(FixedMatchEnd); 1149 } 1150 1151 if (MatchFullLinesHere) { 1152 if (!Req.NoCanonicalizeWhiteSpace) 1153 RegExStr += " *"; 1154 RegExStr += '$'; 1155 } 1156 1157 return false; 1158 } 1159 1160 bool Pattern::AddRegExToRegEx(StringRef RS, unsigned &CurParen, SourceMgr &SM) { 1161 Regex R(RS); 1162 std::string Error; 1163 if (!R.isValid(Error)) { 1164 SM.PrintMessage(SMLoc::getFromPointer(RS.data()), SourceMgr::DK_Error, 1165 "invalid regex: " + Error); 1166 return true; 1167 } 1168 1169 RegExStr += RS.str(); 1170 CurParen += R.getNumMatches(); 1171 return false; 1172 } 1173 1174 void Pattern::AddBackrefToRegEx(unsigned BackrefNum) { 1175 assert(BackrefNum >= 1 && BackrefNum <= 9 && "Invalid backref number"); 1176 std::string Backref = std::string("\\") + std::string(1, '0' + BackrefNum); 1177 RegExStr += Backref; 1178 } 1179 1180 Expected<size_t> Pattern::match(StringRef Buffer, size_t &MatchLen, 1181 const SourceMgr &SM) const { 1182 // If this is the EOF pattern, match it immediately. 1183 if (CheckTy == Check::CheckEOF) { 1184 MatchLen = 0; 1185 return Buffer.size(); 1186 } 1187 1188 // If this is a fixed string pattern, just match it now. 1189 if (!FixedStr.empty()) { 1190 MatchLen = FixedStr.size(); 1191 size_t Pos = 1192 IgnoreCase ? Buffer.find_lower(FixedStr) : Buffer.find(FixedStr); 1193 if (Pos == StringRef::npos) 1194 return make_error<NotFoundError>(); 1195 return Pos; 1196 } 1197 1198 // Regex match. 1199 1200 // If there are substitutions, we need to create a temporary string with the 1201 // actual value. 1202 StringRef RegExToMatch = RegExStr; 1203 std::string TmpStr; 1204 if (!Substitutions.empty()) { 1205 TmpStr = RegExStr; 1206 if (LineNumber) 1207 Context->LineVariable->setValue(ExpressionValue(*LineNumber)); 1208 1209 size_t InsertOffset = 0; 1210 // Substitute all string variables and expressions whose values are only 1211 // now known. Use of string variables defined on the same line are handled 1212 // by back-references. 1213 for (const auto &Substitution : Substitutions) { 1214 // Substitute and check for failure (e.g. use of undefined variable). 1215 Expected<std::string> Value = Substitution->getResult(); 1216 if (!Value) { 1217 // Convert to an ErrorDiagnostic to get location information. This is 1218 // done here rather than PrintNoMatch since now we know which 1219 // substitution block caused the overflow. 1220 Error Err = 1221 handleErrors(Value.takeError(), [&](const OverflowError &E) { 1222 return ErrorDiagnostic::get(SM, Substitution->getFromString(), 1223 "unable to substitute variable or " 1224 "numeric expression: overflow error"); 1225 }); 1226 return std::move(Err); 1227 } 1228 1229 // Plop it into the regex at the adjusted offset. 1230 TmpStr.insert(TmpStr.begin() + Substitution->getIndex() + InsertOffset, 1231 Value->begin(), Value->end()); 1232 InsertOffset += Value->size(); 1233 } 1234 1235 // Match the newly constructed regex. 1236 RegExToMatch = TmpStr; 1237 } 1238 1239 SmallVector<StringRef, 4> MatchInfo; 1240 unsigned int Flags = Regex::Newline; 1241 if (IgnoreCase) 1242 Flags |= Regex::IgnoreCase; 1243 if (!Regex(RegExToMatch, Flags).match(Buffer, &MatchInfo)) 1244 return make_error<NotFoundError>(); 1245 1246 // Successful regex match. 1247 assert(!MatchInfo.empty() && "Didn't get any match"); 1248 StringRef FullMatch = MatchInfo[0]; 1249 1250 // If this defines any string variables, remember their values. 1251 for (const auto &VariableDef : VariableDefs) { 1252 assert(VariableDef.second < MatchInfo.size() && "Internal paren error"); 1253 Context->GlobalVariableTable[VariableDef.first] = 1254 MatchInfo[VariableDef.second]; 1255 } 1256 1257 // If this defines any numeric variables, remember their values. 1258 for (const auto &NumericVariableDef : NumericVariableDefs) { 1259 const NumericVariableMatch &NumericVariableMatch = 1260 NumericVariableDef.getValue(); 1261 unsigned CaptureParenGroup = NumericVariableMatch.CaptureParenGroup; 1262 assert(CaptureParenGroup < MatchInfo.size() && "Internal paren error"); 1263 NumericVariable *DefinedNumericVariable = 1264 NumericVariableMatch.DefinedNumericVariable; 1265 1266 StringRef MatchedValue = MatchInfo[CaptureParenGroup]; 1267 ExpressionFormat Format = DefinedNumericVariable->getImplicitFormat(); 1268 Expected<ExpressionValue> Value = 1269 Format.valueFromStringRepr(MatchedValue, SM); 1270 if (!Value) 1271 return Value.takeError(); 1272 DefinedNumericVariable->setValue(*Value, MatchedValue); 1273 } 1274 1275 // Like CHECK-NEXT, CHECK-EMPTY's match range is considered to start after 1276 // the required preceding newline, which is consumed by the pattern in the 1277 // case of CHECK-EMPTY but not CHECK-NEXT. 1278 size_t MatchStartSkip = CheckTy == Check::CheckEmpty; 1279 MatchLen = FullMatch.size() - MatchStartSkip; 1280 return FullMatch.data() - Buffer.data() + MatchStartSkip; 1281 } 1282 1283 unsigned Pattern::computeMatchDistance(StringRef Buffer) const { 1284 // Just compute the number of matching characters. For regular expressions, we 1285 // just compare against the regex itself and hope for the best. 1286 // 1287 // FIXME: One easy improvement here is have the regex lib generate a single 1288 // example regular expression which matches, and use that as the example 1289 // string. 1290 StringRef ExampleString(FixedStr); 1291 if (ExampleString.empty()) 1292 ExampleString = RegExStr; 1293 1294 // Only compare up to the first line in the buffer, or the string size. 1295 StringRef BufferPrefix = Buffer.substr(0, ExampleString.size()); 1296 BufferPrefix = BufferPrefix.split('\n').first; 1297 return BufferPrefix.edit_distance(ExampleString); 1298 } 1299 1300 void Pattern::printSubstitutions(const SourceMgr &SM, StringRef Buffer, 1301 SMRange Range, 1302 FileCheckDiag::MatchType MatchTy, 1303 std::vector<FileCheckDiag> *Diags) const { 1304 // Print what we know about substitutions. 1305 if (!Substitutions.empty()) { 1306 for (const auto &Substitution : Substitutions) { 1307 SmallString<256> Msg; 1308 raw_svector_ostream OS(Msg); 1309 Expected<std::string> MatchedValue = Substitution->getResult(); 1310 1311 // Substitution failed or is not known at match time, print the undefined 1312 // variables it uses. 1313 if (!MatchedValue) { 1314 bool UndefSeen = false; 1315 handleAllErrors( 1316 MatchedValue.takeError(), [](const NotFoundError &E) {}, 1317 // Handled in PrintNoMatch(). 1318 [](const ErrorDiagnostic &E) {}, 1319 // Handled in match(). 1320 [](const OverflowError &E) {}, 1321 [&](const UndefVarError &E) { 1322 if (!UndefSeen) { 1323 OS << "uses undefined variable(s):"; 1324 UndefSeen = true; 1325 } 1326 OS << " "; 1327 E.log(OS); 1328 }); 1329 } else { 1330 // Substitution succeeded. Print substituted value. 1331 OS << "with \""; 1332 OS.write_escaped(Substitution->getFromString()) << "\" equal to \""; 1333 OS.write_escaped(*MatchedValue) << "\""; 1334 } 1335 1336 // We report only the start of the match/search range to suggest we are 1337 // reporting the substitutions as set at the start of the match/search. 1338 // Indicating a non-zero-length range might instead seem to imply that the 1339 // substitution matches or was captured from exactly that range. 1340 if (Diags) 1341 Diags->emplace_back(SM, CheckTy, getLoc(), MatchTy, 1342 SMRange(Range.Start, Range.Start), OS.str()); 1343 else 1344 SM.PrintMessage(Range.Start, SourceMgr::DK_Note, OS.str()); 1345 } 1346 } 1347 } 1348 1349 void Pattern::printVariableDefs(const SourceMgr &SM, 1350 FileCheckDiag::MatchType MatchTy, 1351 std::vector<FileCheckDiag> *Diags) const { 1352 if (VariableDefs.empty() && NumericVariableDefs.empty()) 1353 return; 1354 // Build list of variable captures. 1355 struct VarCapture { 1356 StringRef Name; 1357 SMRange Range; 1358 }; 1359 SmallVector<VarCapture, 2> VarCaptures; 1360 for (const auto &VariableDef : VariableDefs) { 1361 VarCapture VC; 1362 VC.Name = VariableDef.first; 1363 StringRef Value = Context->GlobalVariableTable[VC.Name]; 1364 SMLoc Start = SMLoc::getFromPointer(Value.data()); 1365 SMLoc End = SMLoc::getFromPointer(Value.data() + Value.size()); 1366 VC.Range = SMRange(Start, End); 1367 VarCaptures.push_back(VC); 1368 } 1369 for (const auto &VariableDef : NumericVariableDefs) { 1370 VarCapture VC; 1371 VC.Name = VariableDef.getKey(); 1372 StringRef StrValue = VariableDef.getValue() 1373 .DefinedNumericVariable->getStringValue() 1374 .getValue(); 1375 SMLoc Start = SMLoc::getFromPointer(StrValue.data()); 1376 SMLoc End = SMLoc::getFromPointer(StrValue.data() + StrValue.size()); 1377 VC.Range = SMRange(Start, End); 1378 VarCaptures.push_back(VC); 1379 } 1380 // Sort variable captures by the order in which they matched the input. 1381 // Ranges shouldn't be overlapping, so we can just compare the start. 1382 std::sort(VarCaptures.begin(), VarCaptures.end(), 1383 [](const VarCapture &A, const VarCapture &B) { 1384 assert(A.Range.Start != B.Range.Start && 1385 "unexpected overlapping variable captures"); 1386 return A.Range.Start.getPointer() < B.Range.Start.getPointer(); 1387 }); 1388 // Create notes for the sorted captures. 1389 for (const VarCapture &VC : VarCaptures) { 1390 SmallString<256> Msg; 1391 raw_svector_ostream OS(Msg); 1392 OS << "captured var \"" << VC.Name << "\""; 1393 if (Diags) 1394 Diags->emplace_back(SM, CheckTy, getLoc(), MatchTy, VC.Range, OS.str()); 1395 else 1396 SM.PrintMessage(VC.Range.Start, SourceMgr::DK_Note, OS.str(), VC.Range); 1397 } 1398 } 1399 1400 static SMRange ProcessMatchResult(FileCheckDiag::MatchType MatchTy, 1401 const SourceMgr &SM, SMLoc Loc, 1402 Check::FileCheckType CheckTy, 1403 StringRef Buffer, size_t Pos, size_t Len, 1404 std::vector<FileCheckDiag> *Diags, 1405 bool AdjustPrevDiags = false) { 1406 SMLoc Start = SMLoc::getFromPointer(Buffer.data() + Pos); 1407 SMLoc End = SMLoc::getFromPointer(Buffer.data() + Pos + Len); 1408 SMRange Range(Start, End); 1409 if (Diags) { 1410 if (AdjustPrevDiags) { 1411 SMLoc CheckLoc = Diags->rbegin()->CheckLoc; 1412 for (auto I = Diags->rbegin(), E = Diags->rend(); 1413 I != E && I->CheckLoc == CheckLoc; ++I) 1414 I->MatchTy = MatchTy; 1415 } else 1416 Diags->emplace_back(SM, CheckTy, Loc, MatchTy, Range); 1417 } 1418 return Range; 1419 } 1420 1421 void Pattern::printFuzzyMatch(const SourceMgr &SM, StringRef Buffer, 1422 std::vector<FileCheckDiag> *Diags) const { 1423 // Attempt to find the closest/best fuzzy match. Usually an error happens 1424 // because some string in the output didn't exactly match. In these cases, we 1425 // would like to show the user a best guess at what "should have" matched, to 1426 // save them having to actually check the input manually. 1427 size_t NumLinesForward = 0; 1428 size_t Best = StringRef::npos; 1429 double BestQuality = 0; 1430 1431 // Use an arbitrary 4k limit on how far we will search. 1432 for (size_t i = 0, e = std::min(size_t(4096), Buffer.size()); i != e; ++i) { 1433 if (Buffer[i] == '\n') 1434 ++NumLinesForward; 1435 1436 // Patterns have leading whitespace stripped, so skip whitespace when 1437 // looking for something which looks like a pattern. 1438 if (Buffer[i] == ' ' || Buffer[i] == '\t') 1439 continue; 1440 1441 // Compute the "quality" of this match as an arbitrary combination of the 1442 // match distance and the number of lines skipped to get to this match. 1443 unsigned Distance = computeMatchDistance(Buffer.substr(i)); 1444 double Quality = Distance + (NumLinesForward / 100.); 1445 1446 if (Quality < BestQuality || Best == StringRef::npos) { 1447 Best = i; 1448 BestQuality = Quality; 1449 } 1450 } 1451 1452 // Print the "possible intended match here" line if we found something 1453 // reasonable and not equal to what we showed in the "scanning from here" 1454 // line. 1455 if (Best && Best != StringRef::npos && BestQuality < 50) { 1456 SMRange MatchRange = 1457 ProcessMatchResult(FileCheckDiag::MatchFuzzy, SM, getLoc(), 1458 getCheckTy(), Buffer, Best, 0, Diags); 1459 SM.PrintMessage(MatchRange.Start, SourceMgr::DK_Note, 1460 "possible intended match here"); 1461 1462 // FIXME: If we wanted to be really friendly we would show why the match 1463 // failed, as it can be hard to spot simple one character differences. 1464 } 1465 } 1466 1467 Expected<StringRef> 1468 FileCheckPatternContext::getPatternVarValue(StringRef VarName) { 1469 auto VarIter = GlobalVariableTable.find(VarName); 1470 if (VarIter == GlobalVariableTable.end()) 1471 return make_error<UndefVarError>(VarName); 1472 1473 return VarIter->second; 1474 } 1475 1476 template <class... Types> 1477 NumericVariable *FileCheckPatternContext::makeNumericVariable(Types... args) { 1478 NumericVariables.push_back(std::make_unique<NumericVariable>(args...)); 1479 return NumericVariables.back().get(); 1480 } 1481 1482 Substitution * 1483 FileCheckPatternContext::makeStringSubstitution(StringRef VarName, 1484 size_t InsertIdx) { 1485 Substitutions.push_back( 1486 std::make_unique<StringSubstitution>(this, VarName, InsertIdx)); 1487 return Substitutions.back().get(); 1488 } 1489 1490 Substitution *FileCheckPatternContext::makeNumericSubstitution( 1491 StringRef ExpressionStr, std::unique_ptr<Expression> Expression, 1492 size_t InsertIdx) { 1493 Substitutions.push_back(std::make_unique<NumericSubstitution>( 1494 this, ExpressionStr, std::move(Expression), InsertIdx)); 1495 return Substitutions.back().get(); 1496 } 1497 1498 size_t Pattern::FindRegexVarEnd(StringRef Str, SourceMgr &SM) { 1499 // Offset keeps track of the current offset within the input Str 1500 size_t Offset = 0; 1501 // [...] Nesting depth 1502 size_t BracketDepth = 0; 1503 1504 while (!Str.empty()) { 1505 if (Str.startswith("]]") && BracketDepth == 0) 1506 return Offset; 1507 if (Str[0] == '\\') { 1508 // Backslash escapes the next char within regexes, so skip them both. 1509 Str = Str.substr(2); 1510 Offset += 2; 1511 } else { 1512 switch (Str[0]) { 1513 default: 1514 break; 1515 case '[': 1516 BracketDepth++; 1517 break; 1518 case ']': 1519 if (BracketDepth == 0) { 1520 SM.PrintMessage(SMLoc::getFromPointer(Str.data()), 1521 SourceMgr::DK_Error, 1522 "missing closing \"]\" for regex variable"); 1523 exit(1); 1524 } 1525 BracketDepth--; 1526 break; 1527 } 1528 Str = Str.substr(1); 1529 Offset++; 1530 } 1531 } 1532 1533 return StringRef::npos; 1534 } 1535 1536 StringRef FileCheck::CanonicalizeFile(MemoryBuffer &MB, 1537 SmallVectorImpl<char> &OutputBuffer) { 1538 OutputBuffer.reserve(MB.getBufferSize()); 1539 1540 for (const char *Ptr = MB.getBufferStart(), *End = MB.getBufferEnd(); 1541 Ptr != End; ++Ptr) { 1542 // Eliminate trailing dosish \r. 1543 if (Ptr <= End - 2 && Ptr[0] == '\r' && Ptr[1] == '\n') { 1544 continue; 1545 } 1546 1547 // If current char is not a horizontal whitespace or if horizontal 1548 // whitespace canonicalization is disabled, dump it to output as is. 1549 if (Req.NoCanonicalizeWhiteSpace || (*Ptr != ' ' && *Ptr != '\t')) { 1550 OutputBuffer.push_back(*Ptr); 1551 continue; 1552 } 1553 1554 // Otherwise, add one space and advance over neighboring space. 1555 OutputBuffer.push_back(' '); 1556 while (Ptr + 1 != End && (Ptr[1] == ' ' || Ptr[1] == '\t')) 1557 ++Ptr; 1558 } 1559 1560 // Add a null byte and then return all but that byte. 1561 OutputBuffer.push_back('\0'); 1562 return StringRef(OutputBuffer.data(), OutputBuffer.size() - 1); 1563 } 1564 1565 FileCheckDiag::FileCheckDiag(const SourceMgr &SM, 1566 const Check::FileCheckType &CheckTy, 1567 SMLoc CheckLoc, MatchType MatchTy, 1568 SMRange InputRange, StringRef Note) 1569 : CheckTy(CheckTy), CheckLoc(CheckLoc), MatchTy(MatchTy), Note(Note) { 1570 auto Start = SM.getLineAndColumn(InputRange.Start); 1571 auto End = SM.getLineAndColumn(InputRange.End); 1572 InputStartLine = Start.first; 1573 InputStartCol = Start.second; 1574 InputEndLine = End.first; 1575 InputEndCol = End.second; 1576 } 1577 1578 static bool IsPartOfWord(char c) { 1579 return (isAlnum(c) || c == '-' || c == '_'); 1580 } 1581 1582 Check::FileCheckType &Check::FileCheckType::setCount(int C) { 1583 assert(Count > 0 && "zero and negative counts are not supported"); 1584 assert((C == 1 || Kind == CheckPlain) && 1585 "count supported only for plain CHECK directives"); 1586 Count = C; 1587 return *this; 1588 } 1589 1590 std::string Check::FileCheckType::getDescription(StringRef Prefix) const { 1591 switch (Kind) { 1592 case Check::CheckNone: 1593 return "invalid"; 1594 case Check::CheckPlain: 1595 if (Count > 1) 1596 return Prefix.str() + "-COUNT"; 1597 return std::string(Prefix); 1598 case Check::CheckNext: 1599 return Prefix.str() + "-NEXT"; 1600 case Check::CheckSame: 1601 return Prefix.str() + "-SAME"; 1602 case Check::CheckNot: 1603 return Prefix.str() + "-NOT"; 1604 case Check::CheckDAG: 1605 return Prefix.str() + "-DAG"; 1606 case Check::CheckLabel: 1607 return Prefix.str() + "-LABEL"; 1608 case Check::CheckEmpty: 1609 return Prefix.str() + "-EMPTY"; 1610 case Check::CheckComment: 1611 return std::string(Prefix); 1612 case Check::CheckEOF: 1613 return "implicit EOF"; 1614 case Check::CheckBadNot: 1615 return "bad NOT"; 1616 case Check::CheckBadCount: 1617 return "bad COUNT"; 1618 } 1619 llvm_unreachable("unknown FileCheckType"); 1620 } 1621 1622 static std::pair<Check::FileCheckType, StringRef> 1623 FindCheckType(const FileCheckRequest &Req, StringRef Buffer, StringRef Prefix) { 1624 if (Buffer.size() <= Prefix.size()) 1625 return {Check::CheckNone, StringRef()}; 1626 1627 char NextChar = Buffer[Prefix.size()]; 1628 1629 StringRef Rest = Buffer.drop_front(Prefix.size() + 1); 1630 1631 // Check for comment. 1632 if (llvm::is_contained(Req.CommentPrefixes, Prefix)) { 1633 if (NextChar == ':') 1634 return {Check::CheckComment, Rest}; 1635 // Ignore a comment prefix if it has a suffix like "-NOT". 1636 return {Check::CheckNone, StringRef()}; 1637 } 1638 1639 // Verify that the : is present after the prefix. 1640 if (NextChar == ':') 1641 return {Check::CheckPlain, Rest}; 1642 1643 if (NextChar != '-') 1644 return {Check::CheckNone, StringRef()}; 1645 1646 if (Rest.consume_front("COUNT-")) { 1647 int64_t Count; 1648 if (Rest.consumeInteger(10, Count)) 1649 // Error happened in parsing integer. 1650 return {Check::CheckBadCount, Rest}; 1651 if (Count <= 0 || Count > INT32_MAX) 1652 return {Check::CheckBadCount, Rest}; 1653 if (!Rest.consume_front(":")) 1654 return {Check::CheckBadCount, Rest}; 1655 return {Check::FileCheckType(Check::CheckPlain).setCount(Count), Rest}; 1656 } 1657 1658 if (Rest.consume_front("NEXT:")) 1659 return {Check::CheckNext, Rest}; 1660 1661 if (Rest.consume_front("SAME:")) 1662 return {Check::CheckSame, Rest}; 1663 1664 if (Rest.consume_front("NOT:")) 1665 return {Check::CheckNot, Rest}; 1666 1667 if (Rest.consume_front("DAG:")) 1668 return {Check::CheckDAG, Rest}; 1669 1670 if (Rest.consume_front("LABEL:")) 1671 return {Check::CheckLabel, Rest}; 1672 1673 if (Rest.consume_front("EMPTY:")) 1674 return {Check::CheckEmpty, Rest}; 1675 1676 // You can't combine -NOT with another suffix. 1677 if (Rest.startswith("DAG-NOT:") || Rest.startswith("NOT-DAG:") || 1678 Rest.startswith("NEXT-NOT:") || Rest.startswith("NOT-NEXT:") || 1679 Rest.startswith("SAME-NOT:") || Rest.startswith("NOT-SAME:") || 1680 Rest.startswith("EMPTY-NOT:") || Rest.startswith("NOT-EMPTY:")) 1681 return {Check::CheckBadNot, Rest}; 1682 1683 return {Check::CheckNone, Rest}; 1684 } 1685 1686 // From the given position, find the next character after the word. 1687 static size_t SkipWord(StringRef Str, size_t Loc) { 1688 while (Loc < Str.size() && IsPartOfWord(Str[Loc])) 1689 ++Loc; 1690 return Loc; 1691 } 1692 1693 /// Searches the buffer for the first prefix in the prefix regular expression. 1694 /// 1695 /// This searches the buffer using the provided regular expression, however it 1696 /// enforces constraints beyond that: 1697 /// 1) The found prefix must not be a suffix of something that looks like 1698 /// a valid prefix. 1699 /// 2) The found prefix must be followed by a valid check type suffix using \c 1700 /// FindCheckType above. 1701 /// 1702 /// \returns a pair of StringRefs into the Buffer, which combines: 1703 /// - the first match of the regular expression to satisfy these two is 1704 /// returned, 1705 /// otherwise an empty StringRef is returned to indicate failure. 1706 /// - buffer rewound to the location right after parsed suffix, for parsing 1707 /// to continue from 1708 /// 1709 /// If this routine returns a valid prefix, it will also shrink \p Buffer to 1710 /// start at the beginning of the returned prefix, increment \p LineNumber for 1711 /// each new line consumed from \p Buffer, and set \p CheckTy to the type of 1712 /// check found by examining the suffix. 1713 /// 1714 /// If no valid prefix is found, the state of Buffer, LineNumber, and CheckTy 1715 /// is unspecified. 1716 static std::pair<StringRef, StringRef> 1717 FindFirstMatchingPrefix(const FileCheckRequest &Req, Regex &PrefixRE, 1718 StringRef &Buffer, unsigned &LineNumber, 1719 Check::FileCheckType &CheckTy) { 1720 SmallVector<StringRef, 2> Matches; 1721 1722 while (!Buffer.empty()) { 1723 // Find the first (longest) match using the RE. 1724 if (!PrefixRE.match(Buffer, &Matches)) 1725 // No match at all, bail. 1726 return {StringRef(), StringRef()}; 1727 1728 StringRef Prefix = Matches[0]; 1729 Matches.clear(); 1730 1731 assert(Prefix.data() >= Buffer.data() && 1732 Prefix.data() < Buffer.data() + Buffer.size() && 1733 "Prefix doesn't start inside of buffer!"); 1734 size_t Loc = Prefix.data() - Buffer.data(); 1735 StringRef Skipped = Buffer.substr(0, Loc); 1736 Buffer = Buffer.drop_front(Loc); 1737 LineNumber += Skipped.count('\n'); 1738 1739 // Check that the matched prefix isn't a suffix of some other check-like 1740 // word. 1741 // FIXME: This is a very ad-hoc check. it would be better handled in some 1742 // other way. Among other things it seems hard to distinguish between 1743 // intentional and unintentional uses of this feature. 1744 if (Skipped.empty() || !IsPartOfWord(Skipped.back())) { 1745 // Now extract the type. 1746 StringRef AfterSuffix; 1747 std::tie(CheckTy, AfterSuffix) = FindCheckType(Req, Buffer, Prefix); 1748 1749 // If we've found a valid check type for this prefix, we're done. 1750 if (CheckTy != Check::CheckNone) 1751 return {Prefix, AfterSuffix}; 1752 } 1753 1754 // If we didn't successfully find a prefix, we need to skip this invalid 1755 // prefix and continue scanning. We directly skip the prefix that was 1756 // matched and any additional parts of that check-like word. 1757 Buffer = Buffer.drop_front(SkipWord(Buffer, Prefix.size())); 1758 } 1759 1760 // We ran out of buffer while skipping partial matches so give up. 1761 return {StringRef(), StringRef()}; 1762 } 1763 1764 void FileCheckPatternContext::createLineVariable() { 1765 assert(!LineVariable && "@LINE pseudo numeric variable already created"); 1766 StringRef LineName = "@LINE"; 1767 LineVariable = makeNumericVariable( 1768 LineName, ExpressionFormat(ExpressionFormat::Kind::Unsigned)); 1769 GlobalNumericVariableTable[LineName] = LineVariable; 1770 } 1771 1772 FileCheck::FileCheck(FileCheckRequest Req) 1773 : Req(Req), PatternContext(std::make_unique<FileCheckPatternContext>()), 1774 CheckStrings(std::make_unique<std::vector<FileCheckString>>()) {} 1775 1776 FileCheck::~FileCheck() = default; 1777 1778 bool FileCheck::readCheckFile( 1779 SourceMgr &SM, StringRef Buffer, Regex &PrefixRE, 1780 std::pair<unsigned, unsigned> *ImpPatBufferIDRange) { 1781 if (ImpPatBufferIDRange) 1782 ImpPatBufferIDRange->first = ImpPatBufferIDRange->second = 0; 1783 1784 Error DefineError = 1785 PatternContext->defineCmdlineVariables(Req.GlobalDefines, SM); 1786 if (DefineError) { 1787 logAllUnhandledErrors(std::move(DefineError), errs()); 1788 return true; 1789 } 1790 1791 PatternContext->createLineVariable(); 1792 1793 std::vector<Pattern> ImplicitNegativeChecks; 1794 for (StringRef PatternString : Req.ImplicitCheckNot) { 1795 // Create a buffer with fake command line content in order to display the 1796 // command line option responsible for the specific implicit CHECK-NOT. 1797 std::string Prefix = "-implicit-check-not='"; 1798 std::string Suffix = "'"; 1799 std::unique_ptr<MemoryBuffer> CmdLine = MemoryBuffer::getMemBufferCopy( 1800 (Prefix + PatternString + Suffix).str(), "command line"); 1801 1802 StringRef PatternInBuffer = 1803 CmdLine->getBuffer().substr(Prefix.size(), PatternString.size()); 1804 unsigned BufferID = SM.AddNewSourceBuffer(std::move(CmdLine), SMLoc()); 1805 if (ImpPatBufferIDRange) { 1806 if (ImpPatBufferIDRange->first == ImpPatBufferIDRange->second) { 1807 ImpPatBufferIDRange->first = BufferID; 1808 ImpPatBufferIDRange->second = BufferID + 1; 1809 } else { 1810 assert(BufferID == ImpPatBufferIDRange->second && 1811 "expected consecutive source buffer IDs"); 1812 ++ImpPatBufferIDRange->second; 1813 } 1814 } 1815 1816 ImplicitNegativeChecks.push_back( 1817 Pattern(Check::CheckNot, PatternContext.get())); 1818 ImplicitNegativeChecks.back().parsePattern(PatternInBuffer, 1819 "IMPLICIT-CHECK", SM, Req); 1820 } 1821 1822 std::vector<Pattern> DagNotMatches = ImplicitNegativeChecks; 1823 1824 // LineNumber keeps track of the line on which CheckPrefix instances are 1825 // found. 1826 unsigned LineNumber = 1; 1827 1828 bool FoundUsedCheckPrefix = false; 1829 while (1) { 1830 Check::FileCheckType CheckTy; 1831 1832 // See if a prefix occurs in the memory buffer. 1833 StringRef UsedPrefix; 1834 StringRef AfterSuffix; 1835 std::tie(UsedPrefix, AfterSuffix) = 1836 FindFirstMatchingPrefix(Req, PrefixRE, Buffer, LineNumber, CheckTy); 1837 if (UsedPrefix.empty()) 1838 break; 1839 if (CheckTy != Check::CheckComment) 1840 FoundUsedCheckPrefix = true; 1841 1842 assert(UsedPrefix.data() == Buffer.data() && 1843 "Failed to move Buffer's start forward, or pointed prefix outside " 1844 "of the buffer!"); 1845 assert(AfterSuffix.data() >= Buffer.data() && 1846 AfterSuffix.data() < Buffer.data() + Buffer.size() && 1847 "Parsing after suffix doesn't start inside of buffer!"); 1848 1849 // Location to use for error messages. 1850 const char *UsedPrefixStart = UsedPrefix.data(); 1851 1852 // Skip the buffer to the end of parsed suffix (or just prefix, if no good 1853 // suffix was processed). 1854 Buffer = AfterSuffix.empty() ? Buffer.drop_front(UsedPrefix.size()) 1855 : AfterSuffix; 1856 1857 // Complain about useful-looking but unsupported suffixes. 1858 if (CheckTy == Check::CheckBadNot) { 1859 SM.PrintMessage(SMLoc::getFromPointer(Buffer.data()), SourceMgr::DK_Error, 1860 "unsupported -NOT combo on prefix '" + UsedPrefix + "'"); 1861 return true; 1862 } 1863 1864 // Complain about invalid count specification. 1865 if (CheckTy == Check::CheckBadCount) { 1866 SM.PrintMessage(SMLoc::getFromPointer(Buffer.data()), SourceMgr::DK_Error, 1867 "invalid count in -COUNT specification on prefix '" + 1868 UsedPrefix + "'"); 1869 return true; 1870 } 1871 1872 // Okay, we found the prefix, yay. Remember the rest of the line, but ignore 1873 // leading whitespace. 1874 if (!(Req.NoCanonicalizeWhiteSpace && Req.MatchFullLines)) 1875 Buffer = Buffer.substr(Buffer.find_first_not_of(" \t")); 1876 1877 // Scan ahead to the end of line. 1878 size_t EOL = Buffer.find_first_of("\n\r"); 1879 1880 // Remember the location of the start of the pattern, for diagnostics. 1881 SMLoc PatternLoc = SMLoc::getFromPointer(Buffer.data()); 1882 1883 // Extract the pattern from the buffer. 1884 StringRef PatternBuffer = Buffer.substr(0, EOL); 1885 Buffer = Buffer.substr(EOL); 1886 1887 // If this is a comment, we're done. 1888 if (CheckTy == Check::CheckComment) 1889 continue; 1890 1891 // Parse the pattern. 1892 Pattern P(CheckTy, PatternContext.get(), LineNumber); 1893 if (P.parsePattern(PatternBuffer, UsedPrefix, SM, Req)) 1894 return true; 1895 1896 // Verify that CHECK-LABEL lines do not define or use variables 1897 if ((CheckTy == Check::CheckLabel) && P.hasVariable()) { 1898 SM.PrintMessage( 1899 SMLoc::getFromPointer(UsedPrefixStart), SourceMgr::DK_Error, 1900 "found '" + UsedPrefix + "-LABEL:'" 1901 " with variable definition or use"); 1902 return true; 1903 } 1904 1905 // Verify that CHECK-NEXT/SAME/EMPTY lines have at least one CHECK line before them. 1906 if ((CheckTy == Check::CheckNext || CheckTy == Check::CheckSame || 1907 CheckTy == Check::CheckEmpty) && 1908 CheckStrings->empty()) { 1909 StringRef Type = CheckTy == Check::CheckNext 1910 ? "NEXT" 1911 : CheckTy == Check::CheckEmpty ? "EMPTY" : "SAME"; 1912 SM.PrintMessage(SMLoc::getFromPointer(UsedPrefixStart), 1913 SourceMgr::DK_Error, 1914 "found '" + UsedPrefix + "-" + Type + 1915 "' without previous '" + UsedPrefix + ": line"); 1916 return true; 1917 } 1918 1919 // Handle CHECK-DAG/-NOT. 1920 if (CheckTy == Check::CheckDAG || CheckTy == Check::CheckNot) { 1921 DagNotMatches.push_back(P); 1922 continue; 1923 } 1924 1925 // Okay, add the string we captured to the output vector and move on. 1926 CheckStrings->emplace_back(P, UsedPrefix, PatternLoc); 1927 std::swap(DagNotMatches, CheckStrings->back().DagNotStrings); 1928 DagNotMatches = ImplicitNegativeChecks; 1929 } 1930 1931 // When there are no used prefixes we report an error except in the case that 1932 // no prefix is specified explicitly but -implicit-check-not is specified. 1933 if (!FoundUsedCheckPrefix && 1934 (ImplicitNegativeChecks.empty() || !Req.IsDefaultCheckPrefix)) { 1935 errs() << "error: no check strings found with prefix" 1936 << (Req.CheckPrefixes.size() > 1 ? "es " : " "); 1937 for (size_t I = 0, E = Req.CheckPrefixes.size(); I != E; ++I) { 1938 if (I != 0) 1939 errs() << ", "; 1940 errs() << "\'" << Req.CheckPrefixes[I] << ":'"; 1941 } 1942 errs() << '\n'; 1943 return true; 1944 } 1945 1946 // Add an EOF pattern for any trailing --implicit-check-not/CHECK-DAG/-NOTs, 1947 // and use the first prefix as a filler for the error message. 1948 if (!DagNotMatches.empty()) { 1949 CheckStrings->emplace_back( 1950 Pattern(Check::CheckEOF, PatternContext.get(), LineNumber + 1), 1951 *Req.CheckPrefixes.begin(), SMLoc::getFromPointer(Buffer.data())); 1952 std::swap(DagNotMatches, CheckStrings->back().DagNotStrings); 1953 } 1954 1955 return false; 1956 } 1957 1958 static void PrintMatch(bool ExpectedMatch, const SourceMgr &SM, 1959 StringRef Prefix, SMLoc Loc, const Pattern &Pat, 1960 int MatchedCount, StringRef Buffer, size_t MatchPos, 1961 size_t MatchLen, const FileCheckRequest &Req, 1962 std::vector<FileCheckDiag> *Diags) { 1963 bool PrintDiag = true; 1964 if (ExpectedMatch) { 1965 if (!Req.Verbose) 1966 return; 1967 if (!Req.VerboseVerbose && Pat.getCheckTy() == Check::CheckEOF) 1968 return; 1969 // Due to their verbosity, we don't print verbose diagnostics here if we're 1970 // gathering them for a different rendering, but we always print other 1971 // diagnostics. 1972 PrintDiag = !Diags; 1973 } 1974 FileCheckDiag::MatchType MatchTy = ExpectedMatch 1975 ? FileCheckDiag::MatchFoundAndExpected 1976 : FileCheckDiag::MatchFoundButExcluded; 1977 SMRange MatchRange = ProcessMatchResult(MatchTy, SM, Loc, Pat.getCheckTy(), 1978 Buffer, MatchPos, MatchLen, Diags); 1979 if (Diags) { 1980 Pat.printSubstitutions(SM, Buffer, MatchRange, MatchTy, Diags); 1981 Pat.printVariableDefs(SM, MatchTy, Diags); 1982 } 1983 if (!PrintDiag) 1984 return; 1985 1986 std::string Message = formatv("{0}: {1} string found in input", 1987 Pat.getCheckTy().getDescription(Prefix), 1988 (ExpectedMatch ? "expected" : "excluded")) 1989 .str(); 1990 if (Pat.getCount() > 1) 1991 Message += formatv(" ({0} out of {1})", MatchedCount, Pat.getCount()).str(); 1992 1993 SM.PrintMessage( 1994 Loc, ExpectedMatch ? SourceMgr::DK_Remark : SourceMgr::DK_Error, Message); 1995 SM.PrintMessage(MatchRange.Start, SourceMgr::DK_Note, "found here", 1996 {MatchRange}); 1997 Pat.printSubstitutions(SM, Buffer, MatchRange, MatchTy, nullptr); 1998 Pat.printVariableDefs(SM, MatchTy, nullptr); 1999 } 2000 2001 static void PrintMatch(bool ExpectedMatch, const SourceMgr &SM, 2002 const FileCheckString &CheckStr, int MatchedCount, 2003 StringRef Buffer, size_t MatchPos, size_t MatchLen, 2004 FileCheckRequest &Req, 2005 std::vector<FileCheckDiag> *Diags) { 2006 PrintMatch(ExpectedMatch, SM, CheckStr.Prefix, CheckStr.Loc, CheckStr.Pat, 2007 MatchedCount, Buffer, MatchPos, MatchLen, Req, Diags); 2008 } 2009 2010 static void PrintNoMatch(bool ExpectedMatch, const SourceMgr &SM, 2011 StringRef Prefix, SMLoc Loc, const Pattern &Pat, 2012 int MatchedCount, StringRef Buffer, 2013 bool VerboseVerbose, std::vector<FileCheckDiag> *Diags, 2014 Error MatchErrors) { 2015 assert(MatchErrors && "Called on successful match"); 2016 bool PrintDiag = true; 2017 if (!ExpectedMatch) { 2018 if (!VerboseVerbose) { 2019 consumeError(std::move(MatchErrors)); 2020 return; 2021 } 2022 // Due to their verbosity, we don't print verbose diagnostics here if we're 2023 // gathering them for a different rendering, but we always print other 2024 // diagnostics. 2025 PrintDiag = !Diags; 2026 } 2027 2028 // If the current position is at the end of a line, advance to the start of 2029 // the next line. 2030 Buffer = Buffer.substr(Buffer.find_first_not_of(" \t\n\r")); 2031 FileCheckDiag::MatchType MatchTy = ExpectedMatch 2032 ? FileCheckDiag::MatchNoneButExpected 2033 : FileCheckDiag::MatchNoneAndExcluded; 2034 SMRange SearchRange = ProcessMatchResult(MatchTy, SM, Loc, Pat.getCheckTy(), 2035 Buffer, 0, Buffer.size(), Diags); 2036 if (Diags) 2037 Pat.printSubstitutions(SM, Buffer, SearchRange, MatchTy, Diags); 2038 if (!PrintDiag) { 2039 consumeError(std::move(MatchErrors)); 2040 return; 2041 } 2042 2043 MatchErrors = handleErrors(std::move(MatchErrors), 2044 [](const ErrorDiagnostic &E) { E.log(errs()); }); 2045 2046 // No problem matching the string per se. 2047 if (!MatchErrors) 2048 return; 2049 consumeError(std::move(MatchErrors)); 2050 2051 // Print "not found" diagnostic. 2052 std::string Message = formatv("{0}: {1} string not found in input", 2053 Pat.getCheckTy().getDescription(Prefix), 2054 (ExpectedMatch ? "expected" : "excluded")) 2055 .str(); 2056 if (Pat.getCount() > 1) 2057 Message += formatv(" ({0} out of {1})", MatchedCount, Pat.getCount()).str(); 2058 SM.PrintMessage( 2059 Loc, ExpectedMatch ? SourceMgr::DK_Error : SourceMgr::DK_Remark, Message); 2060 2061 // Print the "scanning from here" line. 2062 SM.PrintMessage(SearchRange.Start, SourceMgr::DK_Note, "scanning from here"); 2063 2064 // Allow the pattern to print additional information if desired. 2065 Pat.printSubstitutions(SM, Buffer, SearchRange, MatchTy, nullptr); 2066 2067 if (ExpectedMatch) 2068 Pat.printFuzzyMatch(SM, Buffer, Diags); 2069 } 2070 2071 static void PrintNoMatch(bool ExpectedMatch, const SourceMgr &SM, 2072 const FileCheckString &CheckStr, int MatchedCount, 2073 StringRef Buffer, bool VerboseVerbose, 2074 std::vector<FileCheckDiag> *Diags, Error MatchErrors) { 2075 PrintNoMatch(ExpectedMatch, SM, CheckStr.Prefix, CheckStr.Loc, CheckStr.Pat, 2076 MatchedCount, Buffer, VerboseVerbose, Diags, 2077 std::move(MatchErrors)); 2078 } 2079 2080 /// Counts the number of newlines in the specified range. 2081 static unsigned CountNumNewlinesBetween(StringRef Range, 2082 const char *&FirstNewLine) { 2083 unsigned NumNewLines = 0; 2084 while (1) { 2085 // Scan for newline. 2086 Range = Range.substr(Range.find_first_of("\n\r")); 2087 if (Range.empty()) 2088 return NumNewLines; 2089 2090 ++NumNewLines; 2091 2092 // Handle \n\r and \r\n as a single newline. 2093 if (Range.size() > 1 && (Range[1] == '\n' || Range[1] == '\r') && 2094 (Range[0] != Range[1])) 2095 Range = Range.substr(1); 2096 Range = Range.substr(1); 2097 2098 if (NumNewLines == 1) 2099 FirstNewLine = Range.begin(); 2100 } 2101 } 2102 2103 size_t FileCheckString::Check(const SourceMgr &SM, StringRef Buffer, 2104 bool IsLabelScanMode, size_t &MatchLen, 2105 FileCheckRequest &Req, 2106 std::vector<FileCheckDiag> *Diags) const { 2107 size_t LastPos = 0; 2108 std::vector<const Pattern *> NotStrings; 2109 2110 // IsLabelScanMode is true when we are scanning forward to find CHECK-LABEL 2111 // bounds; we have not processed variable definitions within the bounded block 2112 // yet so cannot handle any final CHECK-DAG yet; this is handled when going 2113 // over the block again (including the last CHECK-LABEL) in normal mode. 2114 if (!IsLabelScanMode) { 2115 // Match "dag strings" (with mixed "not strings" if any). 2116 LastPos = CheckDag(SM, Buffer, NotStrings, Req, Diags); 2117 if (LastPos == StringRef::npos) 2118 return StringRef::npos; 2119 } 2120 2121 // Match itself from the last position after matching CHECK-DAG. 2122 size_t LastMatchEnd = LastPos; 2123 size_t FirstMatchPos = 0; 2124 // Go match the pattern Count times. Majority of patterns only match with 2125 // count 1 though. 2126 assert(Pat.getCount() != 0 && "pattern count can not be zero"); 2127 for (int i = 1; i <= Pat.getCount(); i++) { 2128 StringRef MatchBuffer = Buffer.substr(LastMatchEnd); 2129 size_t CurrentMatchLen; 2130 // get a match at current start point 2131 Expected<size_t> MatchResult = Pat.match(MatchBuffer, CurrentMatchLen, SM); 2132 2133 // report 2134 if (!MatchResult) { 2135 PrintNoMatch(true, SM, *this, i, MatchBuffer, Req.VerboseVerbose, Diags, 2136 MatchResult.takeError()); 2137 return StringRef::npos; 2138 } 2139 size_t MatchPos = *MatchResult; 2140 PrintMatch(true, SM, *this, i, MatchBuffer, MatchPos, CurrentMatchLen, Req, 2141 Diags); 2142 if (i == 1) 2143 FirstMatchPos = LastPos + MatchPos; 2144 2145 // move start point after the match 2146 LastMatchEnd += MatchPos + CurrentMatchLen; 2147 } 2148 // Full match len counts from first match pos. 2149 MatchLen = LastMatchEnd - FirstMatchPos; 2150 2151 // Similar to the above, in "label-scan mode" we can't yet handle CHECK-NEXT 2152 // or CHECK-NOT 2153 if (!IsLabelScanMode) { 2154 size_t MatchPos = FirstMatchPos - LastPos; 2155 StringRef MatchBuffer = Buffer.substr(LastPos); 2156 StringRef SkippedRegion = Buffer.substr(LastPos, MatchPos); 2157 2158 // If this check is a "CHECK-NEXT", verify that the previous match was on 2159 // the previous line (i.e. that there is one newline between them). 2160 if (CheckNext(SM, SkippedRegion)) { 2161 ProcessMatchResult(FileCheckDiag::MatchFoundButWrongLine, SM, Loc, 2162 Pat.getCheckTy(), MatchBuffer, MatchPos, MatchLen, 2163 Diags, Req.Verbose); 2164 return StringRef::npos; 2165 } 2166 2167 // If this check is a "CHECK-SAME", verify that the previous match was on 2168 // the same line (i.e. that there is no newline between them). 2169 if (CheckSame(SM, SkippedRegion)) { 2170 ProcessMatchResult(FileCheckDiag::MatchFoundButWrongLine, SM, Loc, 2171 Pat.getCheckTy(), MatchBuffer, MatchPos, MatchLen, 2172 Diags, Req.Verbose); 2173 return StringRef::npos; 2174 } 2175 2176 // If this match had "not strings", verify that they don't exist in the 2177 // skipped region. 2178 if (CheckNot(SM, SkippedRegion, NotStrings, Req, Diags)) 2179 return StringRef::npos; 2180 } 2181 2182 return FirstMatchPos; 2183 } 2184 2185 bool FileCheckString::CheckNext(const SourceMgr &SM, StringRef Buffer) const { 2186 if (Pat.getCheckTy() != Check::CheckNext && 2187 Pat.getCheckTy() != Check::CheckEmpty) 2188 return false; 2189 2190 Twine CheckName = 2191 Prefix + 2192 Twine(Pat.getCheckTy() == Check::CheckEmpty ? "-EMPTY" : "-NEXT"); 2193 2194 // Count the number of newlines between the previous match and this one. 2195 const char *FirstNewLine = nullptr; 2196 unsigned NumNewLines = CountNumNewlinesBetween(Buffer, FirstNewLine); 2197 2198 if (NumNewLines == 0) { 2199 SM.PrintMessage(Loc, SourceMgr::DK_Error, 2200 CheckName + ": is on the same line as previous match"); 2201 SM.PrintMessage(SMLoc::getFromPointer(Buffer.end()), SourceMgr::DK_Note, 2202 "'next' match was here"); 2203 SM.PrintMessage(SMLoc::getFromPointer(Buffer.data()), SourceMgr::DK_Note, 2204 "previous match ended here"); 2205 return true; 2206 } 2207 2208 if (NumNewLines != 1) { 2209 SM.PrintMessage(Loc, SourceMgr::DK_Error, 2210 CheckName + 2211 ": is not on the line after the previous match"); 2212 SM.PrintMessage(SMLoc::getFromPointer(Buffer.end()), SourceMgr::DK_Note, 2213 "'next' match was here"); 2214 SM.PrintMessage(SMLoc::getFromPointer(Buffer.data()), SourceMgr::DK_Note, 2215 "previous match ended here"); 2216 SM.PrintMessage(SMLoc::getFromPointer(FirstNewLine), SourceMgr::DK_Note, 2217 "non-matching line after previous match is here"); 2218 return true; 2219 } 2220 2221 return false; 2222 } 2223 2224 bool FileCheckString::CheckSame(const SourceMgr &SM, StringRef Buffer) const { 2225 if (Pat.getCheckTy() != Check::CheckSame) 2226 return false; 2227 2228 // Count the number of newlines between the previous match and this one. 2229 const char *FirstNewLine = nullptr; 2230 unsigned NumNewLines = CountNumNewlinesBetween(Buffer, FirstNewLine); 2231 2232 if (NumNewLines != 0) { 2233 SM.PrintMessage(Loc, SourceMgr::DK_Error, 2234 Prefix + 2235 "-SAME: is not on the same line as the previous match"); 2236 SM.PrintMessage(SMLoc::getFromPointer(Buffer.end()), SourceMgr::DK_Note, 2237 "'next' match was here"); 2238 SM.PrintMessage(SMLoc::getFromPointer(Buffer.data()), SourceMgr::DK_Note, 2239 "previous match ended here"); 2240 return true; 2241 } 2242 2243 return false; 2244 } 2245 2246 bool FileCheckString::CheckNot(const SourceMgr &SM, StringRef Buffer, 2247 const std::vector<const Pattern *> &NotStrings, 2248 const FileCheckRequest &Req, 2249 std::vector<FileCheckDiag> *Diags) const { 2250 bool DirectiveFail = false; 2251 for (const Pattern *Pat : NotStrings) { 2252 assert((Pat->getCheckTy() == Check::CheckNot) && "Expect CHECK-NOT!"); 2253 2254 size_t MatchLen = 0; 2255 Expected<size_t> MatchResult = Pat->match(Buffer, MatchLen, SM); 2256 2257 if (!MatchResult) { 2258 PrintNoMatch(false, SM, Prefix, Pat->getLoc(), *Pat, 1, Buffer, 2259 Req.VerboseVerbose, Diags, MatchResult.takeError()); 2260 continue; 2261 } 2262 size_t Pos = *MatchResult; 2263 2264 PrintMatch(false, SM, Prefix, Pat->getLoc(), *Pat, 1, Buffer, Pos, MatchLen, 2265 Req, Diags); 2266 DirectiveFail = true; 2267 continue; 2268 } 2269 2270 return DirectiveFail; 2271 } 2272 2273 size_t FileCheckString::CheckDag(const SourceMgr &SM, StringRef Buffer, 2274 std::vector<const Pattern *> &NotStrings, 2275 const FileCheckRequest &Req, 2276 std::vector<FileCheckDiag> *Diags) const { 2277 if (DagNotStrings.empty()) 2278 return 0; 2279 2280 // The start of the search range. 2281 size_t StartPos = 0; 2282 2283 struct MatchRange { 2284 size_t Pos; 2285 size_t End; 2286 }; 2287 // A sorted list of ranges for non-overlapping CHECK-DAG matches. Match 2288 // ranges are erased from this list once they are no longer in the search 2289 // range. 2290 std::list<MatchRange> MatchRanges; 2291 2292 // We need PatItr and PatEnd later for detecting the end of a CHECK-DAG 2293 // group, so we don't use a range-based for loop here. 2294 for (auto PatItr = DagNotStrings.begin(), PatEnd = DagNotStrings.end(); 2295 PatItr != PatEnd; ++PatItr) { 2296 const Pattern &Pat = *PatItr; 2297 assert((Pat.getCheckTy() == Check::CheckDAG || 2298 Pat.getCheckTy() == Check::CheckNot) && 2299 "Invalid CHECK-DAG or CHECK-NOT!"); 2300 2301 if (Pat.getCheckTy() == Check::CheckNot) { 2302 NotStrings.push_back(&Pat); 2303 continue; 2304 } 2305 2306 assert((Pat.getCheckTy() == Check::CheckDAG) && "Expect CHECK-DAG!"); 2307 2308 // CHECK-DAG always matches from the start. 2309 size_t MatchLen = 0, MatchPos = StartPos; 2310 2311 // Search for a match that doesn't overlap a previous match in this 2312 // CHECK-DAG group. 2313 for (auto MI = MatchRanges.begin(), ME = MatchRanges.end(); true; ++MI) { 2314 StringRef MatchBuffer = Buffer.substr(MatchPos); 2315 Expected<size_t> MatchResult = Pat.match(MatchBuffer, MatchLen, SM); 2316 // With a group of CHECK-DAGs, a single mismatching means the match on 2317 // that group of CHECK-DAGs fails immediately. 2318 if (!MatchResult) { 2319 PrintNoMatch(true, SM, Prefix, Pat.getLoc(), Pat, 1, MatchBuffer, 2320 Req.VerboseVerbose, Diags, MatchResult.takeError()); 2321 return StringRef::npos; 2322 } 2323 size_t MatchPosBuf = *MatchResult; 2324 // Re-calc it as the offset relative to the start of the original string. 2325 MatchPos += MatchPosBuf; 2326 if (Req.VerboseVerbose) 2327 PrintMatch(true, SM, Prefix, Pat.getLoc(), Pat, 1, Buffer, MatchPos, 2328 MatchLen, Req, Diags); 2329 MatchRange M{MatchPos, MatchPos + MatchLen}; 2330 if (Req.AllowDeprecatedDagOverlap) { 2331 // We don't need to track all matches in this mode, so we just maintain 2332 // one match range that encompasses the current CHECK-DAG group's 2333 // matches. 2334 if (MatchRanges.empty()) 2335 MatchRanges.insert(MatchRanges.end(), M); 2336 else { 2337 auto Block = MatchRanges.begin(); 2338 Block->Pos = std::min(Block->Pos, M.Pos); 2339 Block->End = std::max(Block->End, M.End); 2340 } 2341 break; 2342 } 2343 // Iterate previous matches until overlapping match or insertion point. 2344 bool Overlap = false; 2345 for (; MI != ME; ++MI) { 2346 if (M.Pos < MI->End) { 2347 // !Overlap => New match has no overlap and is before this old match. 2348 // Overlap => New match overlaps this old match. 2349 Overlap = MI->Pos < M.End; 2350 break; 2351 } 2352 } 2353 if (!Overlap) { 2354 // Insert non-overlapping match into list. 2355 MatchRanges.insert(MI, M); 2356 break; 2357 } 2358 if (Req.VerboseVerbose) { 2359 // Due to their verbosity, we don't print verbose diagnostics here if 2360 // we're gathering them for a different rendering, but we always print 2361 // other diagnostics. 2362 if (!Diags) { 2363 SMLoc OldStart = SMLoc::getFromPointer(Buffer.data() + MI->Pos); 2364 SMLoc OldEnd = SMLoc::getFromPointer(Buffer.data() + MI->End); 2365 SMRange OldRange(OldStart, OldEnd); 2366 SM.PrintMessage(OldStart, SourceMgr::DK_Note, 2367 "match discarded, overlaps earlier DAG match here", 2368 {OldRange}); 2369 } else { 2370 SMLoc CheckLoc = Diags->rbegin()->CheckLoc; 2371 for (auto I = Diags->rbegin(), E = Diags->rend(); 2372 I != E && I->CheckLoc == CheckLoc; ++I) 2373 I->MatchTy = FileCheckDiag::MatchFoundButDiscarded; 2374 } 2375 } 2376 MatchPos = MI->End; 2377 } 2378 if (!Req.VerboseVerbose) 2379 PrintMatch(true, SM, Prefix, Pat.getLoc(), Pat, 1, Buffer, MatchPos, 2380 MatchLen, Req, Diags); 2381 2382 // Handle the end of a CHECK-DAG group. 2383 if (std::next(PatItr) == PatEnd || 2384 std::next(PatItr)->getCheckTy() == Check::CheckNot) { 2385 if (!NotStrings.empty()) { 2386 // If there are CHECK-NOTs between two CHECK-DAGs or from CHECK to 2387 // CHECK-DAG, verify that there are no 'not' strings occurred in that 2388 // region. 2389 StringRef SkippedRegion = 2390 Buffer.slice(StartPos, MatchRanges.begin()->Pos); 2391 if (CheckNot(SM, SkippedRegion, NotStrings, Req, Diags)) 2392 return StringRef::npos; 2393 // Clear "not strings". 2394 NotStrings.clear(); 2395 } 2396 // All subsequent CHECK-DAGs and CHECK-NOTs should be matched from the 2397 // end of this CHECK-DAG group's match range. 2398 StartPos = MatchRanges.rbegin()->End; 2399 // Don't waste time checking for (impossible) overlaps before that. 2400 MatchRanges.clear(); 2401 } 2402 } 2403 2404 return StartPos; 2405 } 2406 2407 static bool ValidatePrefixes(StringRef Kind, StringSet<> &UniquePrefixes, 2408 ArrayRef<StringRef> SuppliedPrefixes) { 2409 for (StringRef Prefix : SuppliedPrefixes) { 2410 if (Prefix.empty()) { 2411 errs() << "error: supplied " << Kind << " prefix must not be the empty " 2412 << "string\n"; 2413 return false; 2414 } 2415 static const Regex Validator("^[a-zA-Z0-9_-]*$"); 2416 if (!Validator.match(Prefix)) { 2417 errs() << "error: supplied " << Kind << " prefix must start with a " 2418 << "letter and contain only alphanumeric characters, hyphens, and " 2419 << "underscores: '" << Prefix << "'\n"; 2420 return false; 2421 } 2422 if (!UniquePrefixes.insert(Prefix).second) { 2423 errs() << "error: supplied " << Kind << " prefix must be unique among " 2424 << "check and comment prefixes: '" << Prefix << "'\n"; 2425 return false; 2426 } 2427 } 2428 return true; 2429 } 2430 2431 static const char *DefaultCheckPrefixes[] = {"CHECK"}; 2432 static const char *DefaultCommentPrefixes[] = {"COM", "RUN"}; 2433 2434 bool FileCheck::ValidateCheckPrefixes() { 2435 StringSet<> UniquePrefixes; 2436 // Add default prefixes to catch user-supplied duplicates of them below. 2437 if (Req.CheckPrefixes.empty()) { 2438 for (const char *Prefix : DefaultCheckPrefixes) 2439 UniquePrefixes.insert(Prefix); 2440 } 2441 if (Req.CommentPrefixes.empty()) { 2442 for (const char *Prefix : DefaultCommentPrefixes) 2443 UniquePrefixes.insert(Prefix); 2444 } 2445 // Do not validate the default prefixes, or diagnostics about duplicates might 2446 // incorrectly indicate that they were supplied by the user. 2447 if (!ValidatePrefixes("check", UniquePrefixes, Req.CheckPrefixes)) 2448 return false; 2449 if (!ValidatePrefixes("comment", UniquePrefixes, Req.CommentPrefixes)) 2450 return false; 2451 return true; 2452 } 2453 2454 Regex FileCheck::buildCheckPrefixRegex() { 2455 if (Req.CheckPrefixes.empty()) { 2456 for (const char *Prefix : DefaultCheckPrefixes) 2457 Req.CheckPrefixes.push_back(Prefix); 2458 Req.IsDefaultCheckPrefix = true; 2459 } 2460 if (Req.CommentPrefixes.empty()) { 2461 for (const char *Prefix : DefaultCommentPrefixes) 2462 Req.CommentPrefixes.push_back(Prefix); 2463 } 2464 2465 // We already validated the contents of CheckPrefixes and CommentPrefixes so 2466 // just concatenate them as alternatives. 2467 SmallString<32> PrefixRegexStr; 2468 for (size_t I = 0, E = Req.CheckPrefixes.size(); I != E; ++I) { 2469 if (I != 0) 2470 PrefixRegexStr.push_back('|'); 2471 PrefixRegexStr.append(Req.CheckPrefixes[I]); 2472 } 2473 for (StringRef Prefix : Req.CommentPrefixes) { 2474 PrefixRegexStr.push_back('|'); 2475 PrefixRegexStr.append(Prefix); 2476 } 2477 2478 return Regex(PrefixRegexStr); 2479 } 2480 2481 Error FileCheckPatternContext::defineCmdlineVariables( 2482 ArrayRef<StringRef> CmdlineDefines, SourceMgr &SM) { 2483 assert(GlobalVariableTable.empty() && GlobalNumericVariableTable.empty() && 2484 "Overriding defined variable with command-line variable definitions"); 2485 2486 if (CmdlineDefines.empty()) 2487 return Error::success(); 2488 2489 // Create a string representing the vector of command-line definitions. Each 2490 // definition is on its own line and prefixed with a definition number to 2491 // clarify which definition a given diagnostic corresponds to. 2492 unsigned I = 0; 2493 Error Errs = Error::success(); 2494 std::string CmdlineDefsDiag; 2495 SmallVector<std::pair<size_t, size_t>, 4> CmdlineDefsIndices; 2496 for (StringRef CmdlineDef : CmdlineDefines) { 2497 std::string DefPrefix = ("Global define #" + Twine(++I) + ": ").str(); 2498 size_t EqIdx = CmdlineDef.find('='); 2499 if (EqIdx == StringRef::npos) { 2500 CmdlineDefsIndices.push_back(std::make_pair(CmdlineDefsDiag.size(), 0)); 2501 continue; 2502 } 2503 // Numeric variable definition. 2504 if (CmdlineDef[0] == '#') { 2505 // Append a copy of the command-line definition adapted to use the same 2506 // format as in the input file to be able to reuse 2507 // parseNumericSubstitutionBlock. 2508 CmdlineDefsDiag += (DefPrefix + CmdlineDef + " (parsed as: [[").str(); 2509 std::string SubstitutionStr = std::string(CmdlineDef); 2510 SubstitutionStr[EqIdx] = ':'; 2511 CmdlineDefsIndices.push_back( 2512 std::make_pair(CmdlineDefsDiag.size(), SubstitutionStr.size())); 2513 CmdlineDefsDiag += (SubstitutionStr + Twine("]])\n")).str(); 2514 } else { 2515 CmdlineDefsDiag += DefPrefix; 2516 CmdlineDefsIndices.push_back( 2517 std::make_pair(CmdlineDefsDiag.size(), CmdlineDef.size())); 2518 CmdlineDefsDiag += (CmdlineDef + "\n").str(); 2519 } 2520 } 2521 2522 // Create a buffer with fake command line content in order to display 2523 // parsing diagnostic with location information and point to the 2524 // global definition with invalid syntax. 2525 std::unique_ptr<MemoryBuffer> CmdLineDefsDiagBuffer = 2526 MemoryBuffer::getMemBufferCopy(CmdlineDefsDiag, "Global defines"); 2527 StringRef CmdlineDefsDiagRef = CmdLineDefsDiagBuffer->getBuffer(); 2528 SM.AddNewSourceBuffer(std::move(CmdLineDefsDiagBuffer), SMLoc()); 2529 2530 for (std::pair<size_t, size_t> CmdlineDefIndices : CmdlineDefsIndices) { 2531 StringRef CmdlineDef = CmdlineDefsDiagRef.substr(CmdlineDefIndices.first, 2532 CmdlineDefIndices.second); 2533 if (CmdlineDef.empty()) { 2534 Errs = joinErrors( 2535 std::move(Errs), 2536 ErrorDiagnostic::get(SM, CmdlineDef, 2537 "missing equal sign in global definition")); 2538 continue; 2539 } 2540 2541 // Numeric variable definition. 2542 if (CmdlineDef[0] == '#') { 2543 // Now parse the definition both to check that the syntax is correct and 2544 // to create the necessary class instance. 2545 StringRef CmdlineDefExpr = CmdlineDef.substr(1); 2546 Optional<NumericVariable *> DefinedNumericVariable; 2547 Expected<std::unique_ptr<Expression>> ExpressionResult = 2548 Pattern::parseNumericSubstitutionBlock( 2549 CmdlineDefExpr, DefinedNumericVariable, false, None, this, SM); 2550 if (!ExpressionResult) { 2551 Errs = joinErrors(std::move(Errs), ExpressionResult.takeError()); 2552 continue; 2553 } 2554 std::unique_ptr<Expression> Expression = std::move(*ExpressionResult); 2555 // Now evaluate the expression whose value this variable should be set 2556 // to, since the expression of a command-line variable definition should 2557 // only use variables defined earlier on the command-line. If not, this 2558 // is an error and we report it. 2559 Expected<ExpressionValue> Value = Expression->getAST()->eval(); 2560 if (!Value) { 2561 Errs = joinErrors(std::move(Errs), Value.takeError()); 2562 continue; 2563 } 2564 2565 assert(DefinedNumericVariable && "No variable defined"); 2566 (*DefinedNumericVariable)->setValue(*Value); 2567 2568 // Record this variable definition. 2569 GlobalNumericVariableTable[(*DefinedNumericVariable)->getName()] = 2570 *DefinedNumericVariable; 2571 } else { 2572 // String variable definition. 2573 std::pair<StringRef, StringRef> CmdlineNameVal = CmdlineDef.split('='); 2574 StringRef CmdlineName = CmdlineNameVal.first; 2575 StringRef OrigCmdlineName = CmdlineName; 2576 Expected<Pattern::VariableProperties> ParseVarResult = 2577 Pattern::parseVariable(CmdlineName, SM); 2578 if (!ParseVarResult) { 2579 Errs = joinErrors(std::move(Errs), ParseVarResult.takeError()); 2580 continue; 2581 } 2582 // Check that CmdlineName does not denote a pseudo variable is only 2583 // composed of the parsed numeric variable. This catches cases like 2584 // "FOO+2" in a "FOO+2=10" definition. 2585 if (ParseVarResult->IsPseudo || !CmdlineName.empty()) { 2586 Errs = joinErrors(std::move(Errs), 2587 ErrorDiagnostic::get( 2588 SM, OrigCmdlineName, 2589 "invalid name in string variable definition '" + 2590 OrigCmdlineName + "'")); 2591 continue; 2592 } 2593 StringRef Name = ParseVarResult->Name; 2594 2595 // Detect collisions between string and numeric variables when the former 2596 // is created later than the latter. 2597 if (GlobalNumericVariableTable.find(Name) != 2598 GlobalNumericVariableTable.end()) { 2599 Errs = joinErrors(std::move(Errs), 2600 ErrorDiagnostic::get(SM, Name, 2601 "numeric variable with name '" + 2602 Name + "' already exists")); 2603 continue; 2604 } 2605 GlobalVariableTable.insert(CmdlineNameVal); 2606 // Mark the string variable as defined to detect collisions between 2607 // string and numeric variables in defineCmdlineVariables when the latter 2608 // is created later than the former. We cannot reuse GlobalVariableTable 2609 // for this by populating it with an empty string since we would then 2610 // lose the ability to detect the use of an undefined variable in 2611 // match(). 2612 DefinedVariableTable[Name] = true; 2613 } 2614 } 2615 2616 return Errs; 2617 } 2618 2619 void FileCheckPatternContext::clearLocalVars() { 2620 SmallVector<StringRef, 16> LocalPatternVars, LocalNumericVars; 2621 for (const StringMapEntry<StringRef> &Var : GlobalVariableTable) 2622 if (Var.first()[0] != '$') 2623 LocalPatternVars.push_back(Var.first()); 2624 2625 // Numeric substitution reads the value of a variable directly, not via 2626 // GlobalNumericVariableTable. Therefore, we clear local variables by 2627 // clearing their value which will lead to a numeric substitution failure. We 2628 // also mark the variable for removal from GlobalNumericVariableTable since 2629 // this is what defineCmdlineVariables checks to decide that no global 2630 // variable has been defined. 2631 for (const auto &Var : GlobalNumericVariableTable) 2632 if (Var.first()[0] != '$') { 2633 Var.getValue()->clearValue(); 2634 LocalNumericVars.push_back(Var.first()); 2635 } 2636 2637 for (const auto &Var : LocalPatternVars) 2638 GlobalVariableTable.erase(Var); 2639 for (const auto &Var : LocalNumericVars) 2640 GlobalNumericVariableTable.erase(Var); 2641 } 2642 2643 bool FileCheck::checkInput(SourceMgr &SM, StringRef Buffer, 2644 std::vector<FileCheckDiag> *Diags) { 2645 bool ChecksFailed = false; 2646 2647 unsigned i = 0, j = 0, e = CheckStrings->size(); 2648 while (true) { 2649 StringRef CheckRegion; 2650 if (j == e) { 2651 CheckRegion = Buffer; 2652 } else { 2653 const FileCheckString &CheckLabelStr = (*CheckStrings)[j]; 2654 if (CheckLabelStr.Pat.getCheckTy() != Check::CheckLabel) { 2655 ++j; 2656 continue; 2657 } 2658 2659 // Scan to next CHECK-LABEL match, ignoring CHECK-NOT and CHECK-DAG 2660 size_t MatchLabelLen = 0; 2661 size_t MatchLabelPos = 2662 CheckLabelStr.Check(SM, Buffer, true, MatchLabelLen, Req, Diags); 2663 if (MatchLabelPos == StringRef::npos) 2664 // Immediately bail if CHECK-LABEL fails, nothing else we can do. 2665 return false; 2666 2667 CheckRegion = Buffer.substr(0, MatchLabelPos + MatchLabelLen); 2668 Buffer = Buffer.substr(MatchLabelPos + MatchLabelLen); 2669 ++j; 2670 } 2671 2672 // Do not clear the first region as it's the one before the first 2673 // CHECK-LABEL and it would clear variables defined on the command-line 2674 // before they get used. 2675 if (i != 0 && Req.EnableVarScope) 2676 PatternContext->clearLocalVars(); 2677 2678 for (; i != j; ++i) { 2679 const FileCheckString &CheckStr = (*CheckStrings)[i]; 2680 2681 // Check each string within the scanned region, including a second check 2682 // of any final CHECK-LABEL (to verify CHECK-NOT and CHECK-DAG) 2683 size_t MatchLen = 0; 2684 size_t MatchPos = 2685 CheckStr.Check(SM, CheckRegion, false, MatchLen, Req, Diags); 2686 2687 if (MatchPos == StringRef::npos) { 2688 ChecksFailed = true; 2689 i = j; 2690 break; 2691 } 2692 2693 CheckRegion = CheckRegion.substr(MatchPos + MatchLen); 2694 } 2695 2696 if (j == e) 2697 break; 2698 } 2699 2700 // Success if no checks failed. 2701 return !ChecksFailed; 2702 } 2703