1 //===- FileCheck.cpp - Check that File's Contents match what is expected --===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // FileCheck does a line-by line check of a file that validates whether it 10 // contains the expected content. This is useful for regression tests etc. 11 // 12 // This file implements most of the API that will be used by the FileCheck utility 13 // as well as various unittests. 14 //===----------------------------------------------------------------------===// 15 16 #include "llvm/FileCheck/FileCheck.h" 17 #include "FileCheckImpl.h" 18 #include "llvm/ADT/STLExtras.h" 19 #include "llvm/ADT/StringSet.h" 20 #include "llvm/ADT/Twine.h" 21 #include "llvm/Support/CheckedArithmetic.h" 22 #include "llvm/Support/FormatVariadic.h" 23 #include <cstdint> 24 #include <list> 25 #include <set> 26 #include <tuple> 27 #include <utility> 28 29 using namespace llvm; 30 31 StringRef ExpressionFormat::toString() const { 32 switch (Value) { 33 case Kind::NoFormat: 34 return StringRef("<none>"); 35 case Kind::Unsigned: 36 return StringRef("%u"); 37 case Kind::Signed: 38 return StringRef("%d"); 39 case Kind::HexUpper: 40 return StringRef("%X"); 41 case Kind::HexLower: 42 return StringRef("%x"); 43 } 44 llvm_unreachable("unknown expression format"); 45 } 46 47 Expected<std::string> ExpressionFormat::getWildcardRegex() const { 48 auto CreatePrecisionRegex = [this](StringRef S) { 49 return (S + Twine('{') + Twine(Precision) + "}").str(); 50 }; 51 52 switch (Value) { 53 case Kind::Unsigned: 54 if (Precision) 55 return CreatePrecisionRegex("([1-9][0-9]*)?[0-9]"); 56 return std::string("[0-9]+"); 57 case Kind::Signed: 58 if (Precision) 59 return CreatePrecisionRegex("-?([1-9][0-9]*)?[0-9]"); 60 return std::string("-?[0-9]+"); 61 case Kind::HexUpper: 62 if (Precision) 63 return CreatePrecisionRegex("([1-9A-F][0-9A-F]*)?[0-9A-F]"); 64 return std::string("[0-9A-F]+"); 65 case Kind::HexLower: 66 if (Precision) 67 return CreatePrecisionRegex("([1-9a-f][0-9a-f]*)?[0-9a-f]"); 68 return std::string("[0-9a-f]+"); 69 default: 70 return createStringError(std::errc::invalid_argument, 71 "trying to match value with invalid format"); 72 } 73 } 74 75 Expected<std::string> 76 ExpressionFormat::getMatchingString(ExpressionValue IntegerValue) const { 77 uint64_t AbsoluteValue; 78 StringRef SignPrefix = IntegerValue.isNegative() ? "-" : ""; 79 80 if (Value == Kind::Signed) { 81 Expected<int64_t> SignedValue = IntegerValue.getSignedValue(); 82 if (!SignedValue) 83 return SignedValue.takeError(); 84 if (*SignedValue < 0) 85 AbsoluteValue = cantFail(IntegerValue.getAbsolute().getUnsignedValue()); 86 else 87 AbsoluteValue = *SignedValue; 88 } else { 89 Expected<uint64_t> UnsignedValue = IntegerValue.getUnsignedValue(); 90 if (!UnsignedValue) 91 return UnsignedValue.takeError(); 92 AbsoluteValue = *UnsignedValue; 93 } 94 95 std::string AbsoluteValueStr; 96 switch (Value) { 97 case Kind::Unsigned: 98 case Kind::Signed: 99 AbsoluteValueStr = utostr(AbsoluteValue); 100 break; 101 case Kind::HexUpper: 102 case Kind::HexLower: 103 AbsoluteValueStr = utohexstr(AbsoluteValue, Value == Kind::HexLower); 104 break; 105 default: 106 return createStringError(std::errc::invalid_argument, 107 "trying to match value with invalid format"); 108 } 109 110 if (Precision > AbsoluteValueStr.size()) { 111 unsigned LeadingZeros = Precision - AbsoluteValueStr.size(); 112 return (Twine(SignPrefix) + std::string(LeadingZeros, '0') + 113 AbsoluteValueStr) 114 .str(); 115 } 116 117 return (Twine(SignPrefix) + AbsoluteValueStr).str(); 118 } 119 120 Expected<ExpressionValue> 121 ExpressionFormat::valueFromStringRepr(StringRef StrVal, 122 const SourceMgr &SM) const { 123 bool ValueIsSigned = Value == Kind::Signed; 124 // Both the FileCheck utility and library only call this method with a valid 125 // value in StrVal. This is guaranteed by the regex returned by 126 // getWildcardRegex() above. Only underflow and overflow errors can thus 127 // occur. However new uses of this method could be added in the future so 128 // the error message does not make assumptions about StrVal. 129 StringRef IntegerParseErrorStr = "unable to represent numeric value"; 130 if (ValueIsSigned) { 131 int64_t SignedValue; 132 133 if (StrVal.getAsInteger(10, SignedValue)) 134 return ErrorDiagnostic::get(SM, StrVal, IntegerParseErrorStr); 135 136 return ExpressionValue(SignedValue); 137 } 138 139 bool Hex = Value == Kind::HexUpper || Value == Kind::HexLower; 140 uint64_t UnsignedValue; 141 if (StrVal.getAsInteger(Hex ? 16 : 10, UnsignedValue)) 142 return ErrorDiagnostic::get(SM, StrVal, IntegerParseErrorStr); 143 144 return ExpressionValue(UnsignedValue); 145 } 146 147 static int64_t getAsSigned(uint64_t UnsignedValue) { 148 // Use memcpy to reinterpret the bitpattern in Value since casting to 149 // signed is implementation-defined if the unsigned value is too big to be 150 // represented in the signed type and using an union violates type aliasing 151 // rules. 152 int64_t SignedValue; 153 memcpy(&SignedValue, &UnsignedValue, sizeof(SignedValue)); 154 return SignedValue; 155 } 156 157 Expected<int64_t> ExpressionValue::getSignedValue() const { 158 if (Negative) 159 return getAsSigned(Value); 160 161 if (Value > (uint64_t)std::numeric_limits<int64_t>::max()) 162 return make_error<OverflowError>(); 163 164 // Value is in the representable range of int64_t so we can use cast. 165 return static_cast<int64_t>(Value); 166 } 167 168 Expected<uint64_t> ExpressionValue::getUnsignedValue() const { 169 if (Negative) 170 return make_error<OverflowError>(); 171 172 return Value; 173 } 174 175 ExpressionValue ExpressionValue::getAbsolute() const { 176 if (!Negative) 177 return *this; 178 179 int64_t SignedValue = getAsSigned(Value); 180 int64_t MaxInt64 = std::numeric_limits<int64_t>::max(); 181 // Absolute value can be represented as int64_t. 182 if (SignedValue >= -MaxInt64) 183 return ExpressionValue(-getAsSigned(Value)); 184 185 // -X == -(max int64_t + Rem), negate each component independently. 186 SignedValue += MaxInt64; 187 uint64_t RemainingValueAbsolute = -SignedValue; 188 return ExpressionValue(MaxInt64 + RemainingValueAbsolute); 189 } 190 191 Expected<ExpressionValue> llvm::operator+(const ExpressionValue &LeftOperand, 192 const ExpressionValue &RightOperand) { 193 if (LeftOperand.isNegative() && RightOperand.isNegative()) { 194 int64_t LeftValue = cantFail(LeftOperand.getSignedValue()); 195 int64_t RightValue = cantFail(RightOperand.getSignedValue()); 196 Optional<int64_t> Result = checkedAdd<int64_t>(LeftValue, RightValue); 197 if (!Result) 198 return make_error<OverflowError>(); 199 200 return ExpressionValue(*Result); 201 } 202 203 // (-A) + B == B - A. 204 if (LeftOperand.isNegative()) 205 return RightOperand - LeftOperand.getAbsolute(); 206 207 // A + (-B) == A - B. 208 if (RightOperand.isNegative()) 209 return LeftOperand - RightOperand.getAbsolute(); 210 211 // Both values are positive at this point. 212 uint64_t LeftValue = cantFail(LeftOperand.getUnsignedValue()); 213 uint64_t RightValue = cantFail(RightOperand.getUnsignedValue()); 214 Optional<uint64_t> Result = 215 checkedAddUnsigned<uint64_t>(LeftValue, RightValue); 216 if (!Result) 217 return make_error<OverflowError>(); 218 219 return ExpressionValue(*Result); 220 } 221 222 Expected<ExpressionValue> llvm::operator-(const ExpressionValue &LeftOperand, 223 const ExpressionValue &RightOperand) { 224 // Result will be negative and thus might underflow. 225 if (LeftOperand.isNegative() && !RightOperand.isNegative()) { 226 int64_t LeftValue = cantFail(LeftOperand.getSignedValue()); 227 uint64_t RightValue = cantFail(RightOperand.getUnsignedValue()); 228 // Result <= -1 - (max int64_t) which overflows on 1- and 2-complement. 229 if (RightValue > (uint64_t)std::numeric_limits<int64_t>::max()) 230 return make_error<OverflowError>(); 231 Optional<int64_t> Result = 232 checkedSub(LeftValue, static_cast<int64_t>(RightValue)); 233 if (!Result) 234 return make_error<OverflowError>(); 235 236 return ExpressionValue(*Result); 237 } 238 239 // (-A) - (-B) == B - A. 240 if (LeftOperand.isNegative()) 241 return RightOperand.getAbsolute() - LeftOperand.getAbsolute(); 242 243 // A - (-B) == A + B. 244 if (RightOperand.isNegative()) 245 return LeftOperand + RightOperand.getAbsolute(); 246 247 // Both values are positive at this point. 248 uint64_t LeftValue = cantFail(LeftOperand.getUnsignedValue()); 249 uint64_t RightValue = cantFail(RightOperand.getUnsignedValue()); 250 if (LeftValue >= RightValue) 251 return ExpressionValue(LeftValue - RightValue); 252 else { 253 uint64_t AbsoluteDifference = RightValue - LeftValue; 254 uint64_t MaxInt64 = std::numeric_limits<int64_t>::max(); 255 // Value might underflow. 256 if (AbsoluteDifference > MaxInt64) { 257 AbsoluteDifference -= MaxInt64; 258 int64_t Result = -MaxInt64; 259 int64_t MinInt64 = std::numeric_limits<int64_t>::min(); 260 // Underflow, tested by: 261 // abs(Result + (max int64_t)) > abs((min int64_t) + (max int64_t)) 262 if (AbsoluteDifference > static_cast<uint64_t>(-(MinInt64 - Result))) 263 return make_error<OverflowError>(); 264 Result -= static_cast<int64_t>(AbsoluteDifference); 265 return ExpressionValue(Result); 266 } 267 268 return ExpressionValue(-static_cast<int64_t>(AbsoluteDifference)); 269 } 270 } 271 272 Expected<ExpressionValue> llvm::operator*(const ExpressionValue &LeftOperand, 273 const ExpressionValue &RightOperand) { 274 // -A * -B == A * B 275 if (LeftOperand.isNegative() && RightOperand.isNegative()) 276 return LeftOperand.getAbsolute() * RightOperand.getAbsolute(); 277 278 // A * -B == -B * A 279 if (RightOperand.isNegative()) 280 return RightOperand * LeftOperand; 281 282 assert(!RightOperand.isNegative() && "Unexpected negative operand!"); 283 284 // Result will be negative and can underflow. 285 if (LeftOperand.isNegative()) { 286 auto Result = LeftOperand.getAbsolute() * RightOperand.getAbsolute(); 287 if (!Result) 288 return Result; 289 290 return ExpressionValue(0) - *Result; 291 } 292 293 // Result will be positive and can overflow. 294 uint64_t LeftValue = cantFail(LeftOperand.getUnsignedValue()); 295 uint64_t RightValue = cantFail(RightOperand.getUnsignedValue()); 296 Optional<uint64_t> Result = 297 checkedMulUnsigned<uint64_t>(LeftValue, RightValue); 298 if (!Result) 299 return make_error<OverflowError>(); 300 301 return ExpressionValue(*Result); 302 } 303 304 Expected<ExpressionValue> llvm::operator/(const ExpressionValue &LeftOperand, 305 const ExpressionValue &RightOperand) { 306 // -A / -B == A / B 307 if (LeftOperand.isNegative() && RightOperand.isNegative()) 308 return LeftOperand.getAbsolute() / RightOperand.getAbsolute(); 309 310 // Check for divide by zero. 311 if (RightOperand == ExpressionValue(0)) 312 return make_error<OverflowError>(); 313 314 // Result will be negative and can underflow. 315 if (LeftOperand.isNegative() || RightOperand.isNegative()) 316 return ExpressionValue(0) - 317 cantFail(LeftOperand.getAbsolute() / RightOperand.getAbsolute()); 318 319 uint64_t LeftValue = cantFail(LeftOperand.getUnsignedValue()); 320 uint64_t RightValue = cantFail(RightOperand.getUnsignedValue()); 321 return ExpressionValue(LeftValue / RightValue); 322 } 323 324 Expected<ExpressionValue> llvm::max(const ExpressionValue &LeftOperand, 325 const ExpressionValue &RightOperand) { 326 if (LeftOperand.isNegative() && RightOperand.isNegative()) { 327 int64_t LeftValue = cantFail(LeftOperand.getSignedValue()); 328 int64_t RightValue = cantFail(RightOperand.getSignedValue()); 329 return ExpressionValue(std::max(LeftValue, RightValue)); 330 } 331 332 if (!LeftOperand.isNegative() && !RightOperand.isNegative()) { 333 uint64_t LeftValue = cantFail(LeftOperand.getUnsignedValue()); 334 uint64_t RightValue = cantFail(RightOperand.getUnsignedValue()); 335 return ExpressionValue(std::max(LeftValue, RightValue)); 336 } 337 338 if (LeftOperand.isNegative()) 339 return RightOperand; 340 341 return LeftOperand; 342 } 343 344 Expected<ExpressionValue> llvm::min(const ExpressionValue &LeftOperand, 345 const ExpressionValue &RightOperand) { 346 if (cantFail(max(LeftOperand, RightOperand)) == LeftOperand) 347 return RightOperand; 348 349 return LeftOperand; 350 } 351 352 Expected<ExpressionValue> NumericVariableUse::eval() const { 353 Optional<ExpressionValue> Value = Variable->getValue(); 354 if (Value) 355 return *Value; 356 357 return make_error<UndefVarError>(getExpressionStr()); 358 } 359 360 Expected<ExpressionValue> BinaryOperation::eval() const { 361 Expected<ExpressionValue> LeftOp = LeftOperand->eval(); 362 Expected<ExpressionValue> RightOp = RightOperand->eval(); 363 364 // Bubble up any error (e.g. undefined variables) in the recursive 365 // evaluation. 366 if (!LeftOp || !RightOp) { 367 Error Err = Error::success(); 368 if (!LeftOp) 369 Err = joinErrors(std::move(Err), LeftOp.takeError()); 370 if (!RightOp) 371 Err = joinErrors(std::move(Err), RightOp.takeError()); 372 return std::move(Err); 373 } 374 375 return EvalBinop(*LeftOp, *RightOp); 376 } 377 378 Expected<ExpressionFormat> 379 BinaryOperation::getImplicitFormat(const SourceMgr &SM) const { 380 Expected<ExpressionFormat> LeftFormat = LeftOperand->getImplicitFormat(SM); 381 Expected<ExpressionFormat> RightFormat = RightOperand->getImplicitFormat(SM); 382 if (!LeftFormat || !RightFormat) { 383 Error Err = Error::success(); 384 if (!LeftFormat) 385 Err = joinErrors(std::move(Err), LeftFormat.takeError()); 386 if (!RightFormat) 387 Err = joinErrors(std::move(Err), RightFormat.takeError()); 388 return std::move(Err); 389 } 390 391 if (*LeftFormat != ExpressionFormat::Kind::NoFormat && 392 *RightFormat != ExpressionFormat::Kind::NoFormat && 393 *LeftFormat != *RightFormat) 394 return ErrorDiagnostic::get( 395 SM, getExpressionStr(), 396 "implicit format conflict between '" + LeftOperand->getExpressionStr() + 397 "' (" + LeftFormat->toString() + ") and '" + 398 RightOperand->getExpressionStr() + "' (" + RightFormat->toString() + 399 "), need an explicit format specifier"); 400 401 return *LeftFormat != ExpressionFormat::Kind::NoFormat ? *LeftFormat 402 : *RightFormat; 403 } 404 405 Expected<std::string> NumericSubstitution::getResult() const { 406 assert(ExpressionPointer->getAST() != nullptr && 407 "Substituting empty expression"); 408 Expected<ExpressionValue> EvaluatedValue = 409 ExpressionPointer->getAST()->eval(); 410 if (!EvaluatedValue) 411 return EvaluatedValue.takeError(); 412 ExpressionFormat Format = ExpressionPointer->getFormat(); 413 return Format.getMatchingString(*EvaluatedValue); 414 } 415 416 Expected<std::string> StringSubstitution::getResult() const { 417 // Look up the value and escape it so that we can put it into the regex. 418 Expected<StringRef> VarVal = Context->getPatternVarValue(FromStr); 419 if (!VarVal) 420 return VarVal.takeError(); 421 return Regex::escape(*VarVal); 422 } 423 424 bool Pattern::isValidVarNameStart(char C) { return C == '_' || isAlpha(C); } 425 426 Expected<Pattern::VariableProperties> 427 Pattern::parseVariable(StringRef &Str, const SourceMgr &SM) { 428 if (Str.empty()) 429 return ErrorDiagnostic::get(SM, Str, "empty variable name"); 430 431 size_t I = 0; 432 bool IsPseudo = Str[0] == '@'; 433 434 // Global vars start with '$'. 435 if (Str[0] == '$' || IsPseudo) 436 ++I; 437 438 if (!isValidVarNameStart(Str[I++])) 439 return ErrorDiagnostic::get(SM, Str, "invalid variable name"); 440 441 for (size_t E = Str.size(); I != E; ++I) 442 // Variable names are composed of alphanumeric characters and underscores. 443 if (Str[I] != '_' && !isAlnum(Str[I])) 444 break; 445 446 StringRef Name = Str.take_front(I); 447 Str = Str.substr(I); 448 return VariableProperties {Name, IsPseudo}; 449 } 450 451 // StringRef holding all characters considered as horizontal whitespaces by 452 // FileCheck input canonicalization. 453 constexpr StringLiteral SpaceChars = " \t"; 454 455 // Parsing helper function that strips the first character in S and returns it. 456 static char popFront(StringRef &S) { 457 char C = S.front(); 458 S = S.drop_front(); 459 return C; 460 } 461 462 char OverflowError::ID = 0; 463 char UndefVarError::ID = 0; 464 char ErrorDiagnostic::ID = 0; 465 char NotFoundError::ID = 0; 466 467 Expected<NumericVariable *> Pattern::parseNumericVariableDefinition( 468 StringRef &Expr, FileCheckPatternContext *Context, 469 Optional<size_t> LineNumber, ExpressionFormat ImplicitFormat, 470 const SourceMgr &SM) { 471 Expected<VariableProperties> ParseVarResult = parseVariable(Expr, SM); 472 if (!ParseVarResult) 473 return ParseVarResult.takeError(); 474 StringRef Name = ParseVarResult->Name; 475 476 if (ParseVarResult->IsPseudo) 477 return ErrorDiagnostic::get( 478 SM, Name, "definition of pseudo numeric variable unsupported"); 479 480 // Detect collisions between string and numeric variables when the latter 481 // is created later than the former. 482 if (Context->DefinedVariableTable.find(Name) != 483 Context->DefinedVariableTable.end()) 484 return ErrorDiagnostic::get( 485 SM, Name, "string variable with name '" + Name + "' already exists"); 486 487 Expr = Expr.ltrim(SpaceChars); 488 if (!Expr.empty()) 489 return ErrorDiagnostic::get( 490 SM, Expr, "unexpected characters after numeric variable name"); 491 492 NumericVariable *DefinedNumericVariable; 493 auto VarTableIter = Context->GlobalNumericVariableTable.find(Name); 494 if (VarTableIter != Context->GlobalNumericVariableTable.end()) { 495 DefinedNumericVariable = VarTableIter->second; 496 if (DefinedNumericVariable->getImplicitFormat() != ImplicitFormat) 497 return ErrorDiagnostic::get( 498 SM, Expr, "format different from previous variable definition"); 499 } else 500 DefinedNumericVariable = 501 Context->makeNumericVariable(Name, ImplicitFormat, LineNumber); 502 503 return DefinedNumericVariable; 504 } 505 506 Expected<std::unique_ptr<NumericVariableUse>> Pattern::parseNumericVariableUse( 507 StringRef Name, bool IsPseudo, Optional<size_t> LineNumber, 508 FileCheckPatternContext *Context, const SourceMgr &SM) { 509 if (IsPseudo && !Name.equals("@LINE")) 510 return ErrorDiagnostic::get( 511 SM, Name, "invalid pseudo numeric variable '" + Name + "'"); 512 513 // Numeric variable definitions and uses are parsed in the order in which 514 // they appear in the CHECK patterns. For each definition, the pointer to the 515 // class instance of the corresponding numeric variable definition is stored 516 // in GlobalNumericVariableTable in parsePattern. Therefore, if the pointer 517 // we get below is null, it means no such variable was defined before. When 518 // that happens, we create a dummy variable so that parsing can continue. All 519 // uses of undefined variables, whether string or numeric, are then diagnosed 520 // in printSubstitutions() after failing to match. 521 auto VarTableIter = Context->GlobalNumericVariableTable.find(Name); 522 NumericVariable *NumericVariable; 523 if (VarTableIter != Context->GlobalNumericVariableTable.end()) 524 NumericVariable = VarTableIter->second; 525 else { 526 NumericVariable = Context->makeNumericVariable( 527 Name, ExpressionFormat(ExpressionFormat::Kind::Unsigned)); 528 Context->GlobalNumericVariableTable[Name] = NumericVariable; 529 } 530 531 Optional<size_t> DefLineNumber = NumericVariable->getDefLineNumber(); 532 if (DefLineNumber && LineNumber && *DefLineNumber == *LineNumber) 533 return ErrorDiagnostic::get( 534 SM, Name, 535 "numeric variable '" + Name + 536 "' defined earlier in the same CHECK directive"); 537 538 return std::make_unique<NumericVariableUse>(Name, NumericVariable); 539 } 540 541 Expected<std::unique_ptr<ExpressionAST>> Pattern::parseNumericOperand( 542 StringRef &Expr, AllowedOperand AO, bool MaybeInvalidConstraint, 543 Optional<size_t> LineNumber, FileCheckPatternContext *Context, 544 const SourceMgr &SM) { 545 if (Expr.startswith("(")) { 546 if (AO != AllowedOperand::Any) 547 return ErrorDiagnostic::get( 548 SM, Expr, "parenthesized expression not permitted here"); 549 return parseParenExpr(Expr, LineNumber, Context, SM); 550 } 551 552 if (AO == AllowedOperand::LineVar || AO == AllowedOperand::Any) { 553 // Try to parse as a numeric variable use. 554 Expected<Pattern::VariableProperties> ParseVarResult = 555 parseVariable(Expr, SM); 556 if (ParseVarResult) { 557 // Try to parse a function call. 558 if (Expr.ltrim(SpaceChars).startswith("(")) { 559 if (AO != AllowedOperand::Any) 560 return ErrorDiagnostic::get(SM, ParseVarResult->Name, 561 "unexpected function call"); 562 563 return parseCallExpr(Expr, ParseVarResult->Name, LineNumber, Context, 564 SM); 565 } 566 567 return parseNumericVariableUse(ParseVarResult->Name, 568 ParseVarResult->IsPseudo, LineNumber, 569 Context, SM); 570 } 571 572 if (AO == AllowedOperand::LineVar) 573 return ParseVarResult.takeError(); 574 // Ignore the error and retry parsing as a literal. 575 consumeError(ParseVarResult.takeError()); 576 } 577 578 // Otherwise, parse it as a literal. 579 int64_t SignedLiteralValue; 580 uint64_t UnsignedLiteralValue; 581 StringRef SaveExpr = Expr; 582 // Accept both signed and unsigned literal, default to signed literal. 583 if (!Expr.consumeInteger((AO == AllowedOperand::LegacyLiteral) ? 10 : 0, 584 UnsignedLiteralValue)) 585 return std::make_unique<ExpressionLiteral>(SaveExpr.drop_back(Expr.size()), 586 UnsignedLiteralValue); 587 Expr = SaveExpr; 588 if (AO == AllowedOperand::Any && !Expr.consumeInteger(0, SignedLiteralValue)) 589 return std::make_unique<ExpressionLiteral>(SaveExpr.drop_back(Expr.size()), 590 SignedLiteralValue); 591 592 return ErrorDiagnostic::get( 593 SM, Expr, 594 Twine("invalid ") + 595 (MaybeInvalidConstraint ? "matching constraint or " : "") + 596 "operand format"); 597 } 598 599 Expected<std::unique_ptr<ExpressionAST>> 600 Pattern::parseParenExpr(StringRef &Expr, Optional<size_t> LineNumber, 601 FileCheckPatternContext *Context, const SourceMgr &SM) { 602 Expr = Expr.ltrim(SpaceChars); 603 assert(Expr.startswith("(")); 604 605 // Parse right operand. 606 Expr.consume_front("("); 607 Expr = Expr.ltrim(SpaceChars); 608 if (Expr.empty()) 609 return ErrorDiagnostic::get(SM, Expr, "missing operand in expression"); 610 611 // Note: parseNumericOperand handles nested opening parentheses. 612 Expected<std::unique_ptr<ExpressionAST>> SubExprResult = parseNumericOperand( 613 Expr, AllowedOperand::Any, /*MaybeInvalidConstraint=*/false, LineNumber, 614 Context, SM); 615 Expr = Expr.ltrim(SpaceChars); 616 while (SubExprResult && !Expr.empty() && !Expr.startswith(")")) { 617 StringRef OrigExpr = Expr; 618 SubExprResult = parseBinop(OrigExpr, Expr, std::move(*SubExprResult), false, 619 LineNumber, Context, SM); 620 Expr = Expr.ltrim(SpaceChars); 621 } 622 if (!SubExprResult) 623 return SubExprResult; 624 625 if (!Expr.consume_front(")")) { 626 return ErrorDiagnostic::get(SM, Expr, 627 "missing ')' at end of nested expression"); 628 } 629 return SubExprResult; 630 } 631 632 Expected<std::unique_ptr<ExpressionAST>> 633 Pattern::parseBinop(StringRef Expr, StringRef &RemainingExpr, 634 std::unique_ptr<ExpressionAST> LeftOp, 635 bool IsLegacyLineExpr, Optional<size_t> LineNumber, 636 FileCheckPatternContext *Context, const SourceMgr &SM) { 637 RemainingExpr = RemainingExpr.ltrim(SpaceChars); 638 if (RemainingExpr.empty()) 639 return std::move(LeftOp); 640 641 // Check if this is a supported operation and select a function to perform 642 // it. 643 SMLoc OpLoc = SMLoc::getFromPointer(RemainingExpr.data()); 644 char Operator = popFront(RemainingExpr); 645 binop_eval_t EvalBinop; 646 switch (Operator) { 647 case '+': 648 EvalBinop = operator+; 649 break; 650 case '-': 651 EvalBinop = operator-; 652 break; 653 default: 654 return ErrorDiagnostic::get( 655 SM, OpLoc, Twine("unsupported operation '") + Twine(Operator) + "'"); 656 } 657 658 // Parse right operand. 659 RemainingExpr = RemainingExpr.ltrim(SpaceChars); 660 if (RemainingExpr.empty()) 661 return ErrorDiagnostic::get(SM, RemainingExpr, 662 "missing operand in expression"); 663 // The second operand in a legacy @LINE expression is always a literal. 664 AllowedOperand AO = 665 IsLegacyLineExpr ? AllowedOperand::LegacyLiteral : AllowedOperand::Any; 666 Expected<std::unique_ptr<ExpressionAST>> RightOpResult = 667 parseNumericOperand(RemainingExpr, AO, /*MaybeInvalidConstraint=*/false, 668 LineNumber, Context, SM); 669 if (!RightOpResult) 670 return RightOpResult; 671 672 Expr = Expr.drop_back(RemainingExpr.size()); 673 return std::make_unique<BinaryOperation>(Expr, EvalBinop, std::move(LeftOp), 674 std::move(*RightOpResult)); 675 } 676 677 Expected<std::unique_ptr<ExpressionAST>> 678 Pattern::parseCallExpr(StringRef &Expr, StringRef FuncName, 679 Optional<size_t> LineNumber, 680 FileCheckPatternContext *Context, const SourceMgr &SM) { 681 Expr = Expr.ltrim(SpaceChars); 682 assert(Expr.startswith("(")); 683 684 auto OptFunc = StringSwitch<Optional<binop_eval_t>>(FuncName) 685 .Case("add", operator+) 686 .Case("div", operator/) 687 .Case("max", max) 688 .Case("min", min) 689 .Case("mul", operator*) 690 .Case("sub", operator-) 691 .Default(None); 692 693 if (!OptFunc) 694 return ErrorDiagnostic::get( 695 SM, FuncName, Twine("call to undefined function '") + FuncName + "'"); 696 697 Expr.consume_front("("); 698 Expr = Expr.ltrim(SpaceChars); 699 700 // Parse call arguments, which are comma separated. 701 SmallVector<std::unique_ptr<ExpressionAST>, 4> Args; 702 while (!Expr.empty() && !Expr.startswith(")")) { 703 if (Expr.startswith(",")) 704 return ErrorDiagnostic::get(SM, Expr, "missing argument"); 705 706 // Parse the argument, which is an arbitary expression. 707 StringRef OuterBinOpExpr = Expr; 708 Expected<std::unique_ptr<ExpressionAST>> Arg = parseNumericOperand( 709 Expr, AllowedOperand::Any, /*MaybeInvalidConstraint=*/false, LineNumber, 710 Context, SM); 711 while (Arg && !Expr.empty()) { 712 Expr = Expr.ltrim(SpaceChars); 713 // Have we reached an argument terminator? 714 if (Expr.startswith(",") || Expr.startswith(")")) 715 break; 716 717 // Arg = Arg <op> <expr> 718 Arg = parseBinop(OuterBinOpExpr, Expr, std::move(*Arg), false, LineNumber, 719 Context, SM); 720 } 721 722 // Prefer an expression error over a generic invalid argument message. 723 if (!Arg) 724 return Arg.takeError(); 725 Args.push_back(std::move(*Arg)); 726 727 // Have we parsed all available arguments? 728 Expr = Expr.ltrim(SpaceChars); 729 if (!Expr.consume_front(",")) 730 break; 731 732 Expr = Expr.ltrim(SpaceChars); 733 if (Expr.startswith(")")) 734 return ErrorDiagnostic::get(SM, Expr, "missing argument"); 735 } 736 737 if (!Expr.consume_front(")")) 738 return ErrorDiagnostic::get(SM, Expr, 739 "missing ')' at end of call expression"); 740 741 const unsigned NumArgs = Args.size(); 742 if (NumArgs == 2) 743 return std::make_unique<BinaryOperation>(Expr, *OptFunc, std::move(Args[0]), 744 std::move(Args[1])); 745 746 // TODO: Support more than binop_eval_t. 747 return ErrorDiagnostic::get(SM, FuncName, 748 Twine("function '") + FuncName + 749 Twine("' takes 2 arguments but ") + 750 Twine(NumArgs) + " given"); 751 } 752 753 Expected<std::unique_ptr<Expression>> Pattern::parseNumericSubstitutionBlock( 754 StringRef Expr, Optional<NumericVariable *> &DefinedNumericVariable, 755 bool IsLegacyLineExpr, Optional<size_t> LineNumber, 756 FileCheckPatternContext *Context, const SourceMgr &SM) { 757 std::unique_ptr<ExpressionAST> ExpressionASTPointer = nullptr; 758 StringRef DefExpr = StringRef(); 759 DefinedNumericVariable = None; 760 ExpressionFormat ExplicitFormat = ExpressionFormat(); 761 unsigned Precision = 0; 762 763 // Parse format specifier (NOTE: ',' is also an argument seperator). 764 size_t FormatSpecEnd = Expr.find(','); 765 size_t FunctionStart = Expr.find('('); 766 if (FormatSpecEnd != StringRef::npos && FormatSpecEnd < FunctionStart) { 767 StringRef FormatExpr = Expr.take_front(FormatSpecEnd); 768 Expr = Expr.drop_front(FormatSpecEnd + 1); 769 FormatExpr = FormatExpr.trim(SpaceChars); 770 if (!FormatExpr.consume_front("%")) 771 return ErrorDiagnostic::get( 772 SM, FormatExpr, 773 "invalid matching format specification in expression"); 774 775 // Parse precision. 776 if (FormatExpr.consume_front(".")) { 777 if (FormatExpr.consumeInteger(10, Precision)) 778 return ErrorDiagnostic::get(SM, FormatExpr, 779 "invalid precision in format specifier"); 780 } 781 782 if (!FormatExpr.empty()) { 783 // Check for unknown matching format specifier and set matching format in 784 // class instance representing this expression. 785 SMLoc FmtLoc = SMLoc::getFromPointer(FormatExpr.data()); 786 switch (popFront(FormatExpr)) { 787 case 'u': 788 ExplicitFormat = 789 ExpressionFormat(ExpressionFormat::Kind::Unsigned, Precision); 790 break; 791 case 'd': 792 ExplicitFormat = 793 ExpressionFormat(ExpressionFormat::Kind::Signed, Precision); 794 break; 795 case 'x': 796 ExplicitFormat = 797 ExpressionFormat(ExpressionFormat::Kind::HexLower, Precision); 798 break; 799 case 'X': 800 ExplicitFormat = 801 ExpressionFormat(ExpressionFormat::Kind::HexUpper, Precision); 802 break; 803 default: 804 return ErrorDiagnostic::get(SM, FmtLoc, 805 "invalid format specifier in expression"); 806 } 807 } 808 809 FormatExpr = FormatExpr.ltrim(SpaceChars); 810 if (!FormatExpr.empty()) 811 return ErrorDiagnostic::get( 812 SM, FormatExpr, 813 "invalid matching format specification in expression"); 814 } 815 816 // Save variable definition expression if any. 817 size_t DefEnd = Expr.find(':'); 818 if (DefEnd != StringRef::npos) { 819 DefExpr = Expr.substr(0, DefEnd); 820 Expr = Expr.substr(DefEnd + 1); 821 } 822 823 // Parse matching constraint. 824 Expr = Expr.ltrim(SpaceChars); 825 bool HasParsedValidConstraint = false; 826 if (Expr.consume_front("==")) 827 HasParsedValidConstraint = true; 828 829 // Parse the expression itself. 830 Expr = Expr.ltrim(SpaceChars); 831 if (Expr.empty()) { 832 if (HasParsedValidConstraint) 833 return ErrorDiagnostic::get( 834 SM, Expr, "empty numeric expression should not have a constraint"); 835 } else { 836 Expr = Expr.rtrim(SpaceChars); 837 StringRef OuterBinOpExpr = Expr; 838 // The first operand in a legacy @LINE expression is always the @LINE 839 // pseudo variable. 840 AllowedOperand AO = 841 IsLegacyLineExpr ? AllowedOperand::LineVar : AllowedOperand::Any; 842 Expected<std::unique_ptr<ExpressionAST>> ParseResult = parseNumericOperand( 843 Expr, AO, !HasParsedValidConstraint, LineNumber, Context, SM); 844 while (ParseResult && !Expr.empty()) { 845 ParseResult = parseBinop(OuterBinOpExpr, Expr, std::move(*ParseResult), 846 IsLegacyLineExpr, LineNumber, Context, SM); 847 // Legacy @LINE expressions only allow 2 operands. 848 if (ParseResult && IsLegacyLineExpr && !Expr.empty()) 849 return ErrorDiagnostic::get( 850 SM, Expr, 851 "unexpected characters at end of expression '" + Expr + "'"); 852 } 853 if (!ParseResult) 854 return ParseResult.takeError(); 855 ExpressionASTPointer = std::move(*ParseResult); 856 } 857 858 // Select format of the expression, i.e. (i) its explicit format, if any, 859 // otherwise (ii) its implicit format, if any, otherwise (iii) the default 860 // format (unsigned). Error out in case of conflicting implicit format 861 // without explicit format. 862 ExpressionFormat Format; 863 if (ExplicitFormat) 864 Format = ExplicitFormat; 865 else if (ExpressionASTPointer) { 866 Expected<ExpressionFormat> ImplicitFormat = 867 ExpressionASTPointer->getImplicitFormat(SM); 868 if (!ImplicitFormat) 869 return ImplicitFormat.takeError(); 870 Format = *ImplicitFormat; 871 } 872 if (!Format) 873 Format = ExpressionFormat(ExpressionFormat::Kind::Unsigned, Precision); 874 875 std::unique_ptr<Expression> ExpressionPointer = 876 std::make_unique<Expression>(std::move(ExpressionASTPointer), Format); 877 878 // Parse the numeric variable definition. 879 if (DefEnd != StringRef::npos) { 880 DefExpr = DefExpr.ltrim(SpaceChars); 881 Expected<NumericVariable *> ParseResult = parseNumericVariableDefinition( 882 DefExpr, Context, LineNumber, ExpressionPointer->getFormat(), SM); 883 884 if (!ParseResult) 885 return ParseResult.takeError(); 886 DefinedNumericVariable = *ParseResult; 887 } 888 889 return std::move(ExpressionPointer); 890 } 891 892 bool Pattern::parsePattern(StringRef PatternStr, StringRef Prefix, 893 SourceMgr &SM, const FileCheckRequest &Req) { 894 bool MatchFullLinesHere = Req.MatchFullLines && CheckTy != Check::CheckNot; 895 IgnoreCase = Req.IgnoreCase; 896 897 PatternLoc = SMLoc::getFromPointer(PatternStr.data()); 898 899 if (!(Req.NoCanonicalizeWhiteSpace && Req.MatchFullLines)) 900 // Ignore trailing whitespace. 901 while (!PatternStr.empty() && 902 (PatternStr.back() == ' ' || PatternStr.back() == '\t')) 903 PatternStr = PatternStr.substr(0, PatternStr.size() - 1); 904 905 // Check that there is something on the line. 906 if (PatternStr.empty() && CheckTy != Check::CheckEmpty) { 907 SM.PrintMessage(PatternLoc, SourceMgr::DK_Error, 908 "found empty check string with prefix '" + Prefix + ":'"); 909 return true; 910 } 911 912 if (!PatternStr.empty() && CheckTy == Check::CheckEmpty) { 913 SM.PrintMessage( 914 PatternLoc, SourceMgr::DK_Error, 915 "found non-empty check string for empty check with prefix '" + Prefix + 916 ":'"); 917 return true; 918 } 919 920 if (CheckTy == Check::CheckEmpty) { 921 RegExStr = "(\n$)"; 922 return false; 923 } 924 925 // If literal check, set fixed string. 926 if (CheckTy.isLiteralMatch()) { 927 FixedStr = PatternStr; 928 return false; 929 } 930 931 // Check to see if this is a fixed string, or if it has regex pieces. 932 if (!MatchFullLinesHere && 933 (PatternStr.size() < 2 || (PatternStr.find("{{") == StringRef::npos && 934 PatternStr.find("[[") == StringRef::npos))) { 935 FixedStr = PatternStr; 936 return false; 937 } 938 939 if (MatchFullLinesHere) { 940 RegExStr += '^'; 941 if (!Req.NoCanonicalizeWhiteSpace) 942 RegExStr += " *"; 943 } 944 945 // Paren value #0 is for the fully matched string. Any new parenthesized 946 // values add from there. 947 unsigned CurParen = 1; 948 949 // Otherwise, there is at least one regex piece. Build up the regex pattern 950 // by escaping scary characters in fixed strings, building up one big regex. 951 while (!PatternStr.empty()) { 952 // RegEx matches. 953 if (PatternStr.startswith("{{")) { 954 // This is the start of a regex match. Scan for the }}. 955 size_t End = PatternStr.find("}}"); 956 if (End == StringRef::npos) { 957 SM.PrintMessage(SMLoc::getFromPointer(PatternStr.data()), 958 SourceMgr::DK_Error, 959 "found start of regex string with no end '}}'"); 960 return true; 961 } 962 963 // Enclose {{}} patterns in parens just like [[]] even though we're not 964 // capturing the result for any purpose. This is required in case the 965 // expression contains an alternation like: CHECK: abc{{x|z}}def. We 966 // want this to turn into: "abc(x|z)def" not "abcx|zdef". 967 RegExStr += '('; 968 ++CurParen; 969 970 if (AddRegExToRegEx(PatternStr.substr(2, End - 2), CurParen, SM)) 971 return true; 972 RegExStr += ')'; 973 974 PatternStr = PatternStr.substr(End + 2); 975 continue; 976 } 977 978 // String and numeric substitution blocks. Pattern substitution blocks come 979 // in two forms: [[foo:.*]] and [[foo]]. The former matches .* (or some 980 // other regex) and assigns it to the string variable 'foo'. The latter 981 // substitutes foo's value. Numeric substitution blocks recognize the same 982 // form as string ones, but start with a '#' sign after the double 983 // brackets. They also accept a combined form which sets a numeric variable 984 // to the evaluation of an expression. Both string and numeric variable 985 // names must satisfy the regular expression "[a-zA-Z_][0-9a-zA-Z_]*" to be 986 // valid, as this helps catch some common errors. 987 if (PatternStr.startswith("[[")) { 988 StringRef UnparsedPatternStr = PatternStr.substr(2); 989 // Find the closing bracket pair ending the match. End is going to be an 990 // offset relative to the beginning of the match string. 991 size_t End = FindRegexVarEnd(UnparsedPatternStr, SM); 992 StringRef MatchStr = UnparsedPatternStr.substr(0, End); 993 bool IsNumBlock = MatchStr.consume_front("#"); 994 995 if (End == StringRef::npos) { 996 SM.PrintMessage(SMLoc::getFromPointer(PatternStr.data()), 997 SourceMgr::DK_Error, 998 "Invalid substitution block, no ]] found"); 999 return true; 1000 } 1001 // Strip the substitution block we are parsing. End points to the start 1002 // of the "]]" closing the expression so account for it in computing the 1003 // index of the first unparsed character. 1004 PatternStr = UnparsedPatternStr.substr(End + 2); 1005 1006 bool IsDefinition = false; 1007 bool SubstNeeded = false; 1008 // Whether the substitution block is a legacy use of @LINE with string 1009 // substitution block syntax. 1010 bool IsLegacyLineExpr = false; 1011 StringRef DefName; 1012 StringRef SubstStr; 1013 std::string MatchRegexp; 1014 size_t SubstInsertIdx = RegExStr.size(); 1015 1016 // Parse string variable or legacy @LINE expression. 1017 if (!IsNumBlock) { 1018 size_t VarEndIdx = MatchStr.find(':'); 1019 size_t SpacePos = MatchStr.substr(0, VarEndIdx).find_first_of(" \t"); 1020 if (SpacePos != StringRef::npos) { 1021 SM.PrintMessage(SMLoc::getFromPointer(MatchStr.data() + SpacePos), 1022 SourceMgr::DK_Error, "unexpected whitespace"); 1023 return true; 1024 } 1025 1026 // Get the name (e.g. "foo") and verify it is well formed. 1027 StringRef OrigMatchStr = MatchStr; 1028 Expected<Pattern::VariableProperties> ParseVarResult = 1029 parseVariable(MatchStr, SM); 1030 if (!ParseVarResult) { 1031 logAllUnhandledErrors(ParseVarResult.takeError(), errs()); 1032 return true; 1033 } 1034 StringRef Name = ParseVarResult->Name; 1035 bool IsPseudo = ParseVarResult->IsPseudo; 1036 1037 IsDefinition = (VarEndIdx != StringRef::npos); 1038 SubstNeeded = !IsDefinition; 1039 if (IsDefinition) { 1040 if ((IsPseudo || !MatchStr.consume_front(":"))) { 1041 SM.PrintMessage(SMLoc::getFromPointer(Name.data()), 1042 SourceMgr::DK_Error, 1043 "invalid name in string variable definition"); 1044 return true; 1045 } 1046 1047 // Detect collisions between string and numeric variables when the 1048 // former is created later than the latter. 1049 if (Context->GlobalNumericVariableTable.find(Name) != 1050 Context->GlobalNumericVariableTable.end()) { 1051 SM.PrintMessage( 1052 SMLoc::getFromPointer(Name.data()), SourceMgr::DK_Error, 1053 "numeric variable with name '" + Name + "' already exists"); 1054 return true; 1055 } 1056 DefName = Name; 1057 MatchRegexp = MatchStr.str(); 1058 } else { 1059 if (IsPseudo) { 1060 MatchStr = OrigMatchStr; 1061 IsLegacyLineExpr = IsNumBlock = true; 1062 } else 1063 SubstStr = Name; 1064 } 1065 } 1066 1067 // Parse numeric substitution block. 1068 std::unique_ptr<Expression> ExpressionPointer; 1069 Optional<NumericVariable *> DefinedNumericVariable; 1070 if (IsNumBlock) { 1071 Expected<std::unique_ptr<Expression>> ParseResult = 1072 parseNumericSubstitutionBlock(MatchStr, DefinedNumericVariable, 1073 IsLegacyLineExpr, LineNumber, Context, 1074 SM); 1075 if (!ParseResult) { 1076 logAllUnhandledErrors(ParseResult.takeError(), errs()); 1077 return true; 1078 } 1079 ExpressionPointer = std::move(*ParseResult); 1080 SubstNeeded = ExpressionPointer->getAST() != nullptr; 1081 if (DefinedNumericVariable) { 1082 IsDefinition = true; 1083 DefName = (*DefinedNumericVariable)->getName(); 1084 } 1085 if (SubstNeeded) 1086 SubstStr = MatchStr; 1087 else { 1088 ExpressionFormat Format = ExpressionPointer->getFormat(); 1089 MatchRegexp = cantFail(Format.getWildcardRegex()); 1090 } 1091 } 1092 1093 // Handle variable definition: [[<def>:(...)]] and [[#(...)<def>:(...)]]. 1094 if (IsDefinition) { 1095 RegExStr += '('; 1096 ++SubstInsertIdx; 1097 1098 if (IsNumBlock) { 1099 NumericVariableMatch NumericVariableDefinition = { 1100 *DefinedNumericVariable, CurParen}; 1101 NumericVariableDefs[DefName] = NumericVariableDefinition; 1102 // This store is done here rather than in match() to allow 1103 // parseNumericVariableUse() to get the pointer to the class instance 1104 // of the right variable definition corresponding to a given numeric 1105 // variable use. 1106 Context->GlobalNumericVariableTable[DefName] = 1107 *DefinedNumericVariable; 1108 } else { 1109 VariableDefs[DefName] = CurParen; 1110 // Mark string variable as defined to detect collisions between 1111 // string and numeric variables in parseNumericVariableUse() and 1112 // defineCmdlineVariables() when the latter is created later than the 1113 // former. We cannot reuse GlobalVariableTable for this by populating 1114 // it with an empty string since we would then lose the ability to 1115 // detect the use of an undefined variable in match(). 1116 Context->DefinedVariableTable[DefName] = true; 1117 } 1118 1119 ++CurParen; 1120 } 1121 1122 if (!MatchRegexp.empty() && AddRegExToRegEx(MatchRegexp, CurParen, SM)) 1123 return true; 1124 1125 if (IsDefinition) 1126 RegExStr += ')'; 1127 1128 // Handle substitutions: [[foo]] and [[#<foo expr>]]. 1129 if (SubstNeeded) { 1130 // Handle substitution of string variables that were defined earlier on 1131 // the same line by emitting a backreference. Expressions do not 1132 // support substituting a numeric variable defined on the same line. 1133 if (!IsNumBlock && VariableDefs.find(SubstStr) != VariableDefs.end()) { 1134 unsigned CaptureParenGroup = VariableDefs[SubstStr]; 1135 if (CaptureParenGroup < 1 || CaptureParenGroup > 9) { 1136 SM.PrintMessage(SMLoc::getFromPointer(SubstStr.data()), 1137 SourceMgr::DK_Error, 1138 "Can't back-reference more than 9 variables"); 1139 return true; 1140 } 1141 AddBackrefToRegEx(CaptureParenGroup); 1142 } else { 1143 // Handle substitution of string variables ([[<var>]]) defined in 1144 // previous CHECK patterns, and substitution of expressions. 1145 Substitution *Substitution = 1146 IsNumBlock 1147 ? Context->makeNumericSubstitution( 1148 SubstStr, std::move(ExpressionPointer), SubstInsertIdx) 1149 : Context->makeStringSubstitution(SubstStr, SubstInsertIdx); 1150 Substitutions.push_back(Substitution); 1151 } 1152 } 1153 } 1154 1155 // Handle fixed string matches. 1156 // Find the end, which is the start of the next regex. 1157 size_t FixedMatchEnd = PatternStr.find("{{"); 1158 FixedMatchEnd = std::min(FixedMatchEnd, PatternStr.find("[[")); 1159 RegExStr += Regex::escape(PatternStr.substr(0, FixedMatchEnd)); 1160 PatternStr = PatternStr.substr(FixedMatchEnd); 1161 } 1162 1163 if (MatchFullLinesHere) { 1164 if (!Req.NoCanonicalizeWhiteSpace) 1165 RegExStr += " *"; 1166 RegExStr += '$'; 1167 } 1168 1169 return false; 1170 } 1171 1172 bool Pattern::AddRegExToRegEx(StringRef RS, unsigned &CurParen, SourceMgr &SM) { 1173 Regex R(RS); 1174 std::string Error; 1175 if (!R.isValid(Error)) { 1176 SM.PrintMessage(SMLoc::getFromPointer(RS.data()), SourceMgr::DK_Error, 1177 "invalid regex: " + Error); 1178 return true; 1179 } 1180 1181 RegExStr += RS.str(); 1182 CurParen += R.getNumMatches(); 1183 return false; 1184 } 1185 1186 void Pattern::AddBackrefToRegEx(unsigned BackrefNum) { 1187 assert(BackrefNum >= 1 && BackrefNum <= 9 && "Invalid backref number"); 1188 std::string Backref = std::string("\\") + std::string(1, '0' + BackrefNum); 1189 RegExStr += Backref; 1190 } 1191 1192 Expected<size_t> Pattern::match(StringRef Buffer, size_t &MatchLen, 1193 const SourceMgr &SM) const { 1194 // If this is the EOF pattern, match it immediately. 1195 if (CheckTy == Check::CheckEOF) { 1196 MatchLen = 0; 1197 return Buffer.size(); 1198 } 1199 1200 // If this is a fixed string pattern, just match it now. 1201 if (!FixedStr.empty()) { 1202 MatchLen = FixedStr.size(); 1203 size_t Pos = 1204 IgnoreCase ? Buffer.find_lower(FixedStr) : Buffer.find(FixedStr); 1205 if (Pos == StringRef::npos) 1206 return make_error<NotFoundError>(); 1207 return Pos; 1208 } 1209 1210 // Regex match. 1211 1212 // If there are substitutions, we need to create a temporary string with the 1213 // actual value. 1214 StringRef RegExToMatch = RegExStr; 1215 std::string TmpStr; 1216 if (!Substitutions.empty()) { 1217 TmpStr = RegExStr; 1218 if (LineNumber) 1219 Context->LineVariable->setValue(ExpressionValue(*LineNumber)); 1220 1221 size_t InsertOffset = 0; 1222 // Substitute all string variables and expressions whose values are only 1223 // now known. Use of string variables defined on the same line are handled 1224 // by back-references. 1225 for (const auto &Substitution : Substitutions) { 1226 // Substitute and check for failure (e.g. use of undefined variable). 1227 Expected<std::string> Value = Substitution->getResult(); 1228 if (!Value) { 1229 // Convert to an ErrorDiagnostic to get location information. This is 1230 // done here rather than PrintNoMatch since now we know which 1231 // substitution block caused the overflow. 1232 Error Err = 1233 handleErrors(Value.takeError(), [&](const OverflowError &E) { 1234 return ErrorDiagnostic::get(SM, Substitution->getFromString(), 1235 "unable to substitute variable or " 1236 "numeric expression: overflow error"); 1237 }); 1238 return std::move(Err); 1239 } 1240 1241 // Plop it into the regex at the adjusted offset. 1242 TmpStr.insert(TmpStr.begin() + Substitution->getIndex() + InsertOffset, 1243 Value->begin(), Value->end()); 1244 InsertOffset += Value->size(); 1245 } 1246 1247 // Match the newly constructed regex. 1248 RegExToMatch = TmpStr; 1249 } 1250 1251 SmallVector<StringRef, 4> MatchInfo; 1252 unsigned int Flags = Regex::Newline; 1253 if (IgnoreCase) 1254 Flags |= Regex::IgnoreCase; 1255 if (!Regex(RegExToMatch, Flags).match(Buffer, &MatchInfo)) 1256 return make_error<NotFoundError>(); 1257 1258 // Successful regex match. 1259 assert(!MatchInfo.empty() && "Didn't get any match"); 1260 StringRef FullMatch = MatchInfo[0]; 1261 1262 // If this defines any string variables, remember their values. 1263 for (const auto &VariableDef : VariableDefs) { 1264 assert(VariableDef.second < MatchInfo.size() && "Internal paren error"); 1265 Context->GlobalVariableTable[VariableDef.first] = 1266 MatchInfo[VariableDef.second]; 1267 } 1268 1269 // If this defines any numeric variables, remember their values. 1270 for (const auto &NumericVariableDef : NumericVariableDefs) { 1271 const NumericVariableMatch &NumericVariableMatch = 1272 NumericVariableDef.getValue(); 1273 unsigned CaptureParenGroup = NumericVariableMatch.CaptureParenGroup; 1274 assert(CaptureParenGroup < MatchInfo.size() && "Internal paren error"); 1275 NumericVariable *DefinedNumericVariable = 1276 NumericVariableMatch.DefinedNumericVariable; 1277 1278 StringRef MatchedValue = MatchInfo[CaptureParenGroup]; 1279 ExpressionFormat Format = DefinedNumericVariable->getImplicitFormat(); 1280 Expected<ExpressionValue> Value = 1281 Format.valueFromStringRepr(MatchedValue, SM); 1282 if (!Value) 1283 return Value.takeError(); 1284 DefinedNumericVariable->setValue(*Value, MatchedValue); 1285 } 1286 1287 // Like CHECK-NEXT, CHECK-EMPTY's match range is considered to start after 1288 // the required preceding newline, which is consumed by the pattern in the 1289 // case of CHECK-EMPTY but not CHECK-NEXT. 1290 size_t MatchStartSkip = CheckTy == Check::CheckEmpty; 1291 MatchLen = FullMatch.size() - MatchStartSkip; 1292 return FullMatch.data() - Buffer.data() + MatchStartSkip; 1293 } 1294 1295 unsigned Pattern::computeMatchDistance(StringRef Buffer) const { 1296 // Just compute the number of matching characters. For regular expressions, we 1297 // just compare against the regex itself and hope for the best. 1298 // 1299 // FIXME: One easy improvement here is have the regex lib generate a single 1300 // example regular expression which matches, and use that as the example 1301 // string. 1302 StringRef ExampleString(FixedStr); 1303 if (ExampleString.empty()) 1304 ExampleString = RegExStr; 1305 1306 // Only compare up to the first line in the buffer, or the string size. 1307 StringRef BufferPrefix = Buffer.substr(0, ExampleString.size()); 1308 BufferPrefix = BufferPrefix.split('\n').first; 1309 return BufferPrefix.edit_distance(ExampleString); 1310 } 1311 1312 void Pattern::printSubstitutions(const SourceMgr &SM, StringRef Buffer, 1313 SMRange Range, 1314 FileCheckDiag::MatchType MatchTy, 1315 std::vector<FileCheckDiag> *Diags) const { 1316 // Print what we know about substitutions. 1317 if (!Substitutions.empty()) { 1318 for (const auto &Substitution : Substitutions) { 1319 SmallString<256> Msg; 1320 raw_svector_ostream OS(Msg); 1321 Expected<std::string> MatchedValue = Substitution->getResult(); 1322 1323 // Substitution failed or is not known at match time, print the undefined 1324 // variables it uses. 1325 if (!MatchedValue) { 1326 bool UndefSeen = false; 1327 handleAllErrors( 1328 MatchedValue.takeError(), [](const NotFoundError &E) {}, 1329 // Handled in PrintNoMatch(). 1330 [](const ErrorDiagnostic &E) {}, 1331 // Handled in match(). 1332 [](const OverflowError &E) {}, 1333 [&](const UndefVarError &E) { 1334 if (!UndefSeen) { 1335 OS << "uses undefined variable(s):"; 1336 UndefSeen = true; 1337 } 1338 OS << " "; 1339 E.log(OS); 1340 }); 1341 } else { 1342 // Substitution succeeded. Print substituted value. 1343 OS << "with \""; 1344 OS.write_escaped(Substitution->getFromString()) << "\" equal to \""; 1345 OS.write_escaped(*MatchedValue) << "\""; 1346 } 1347 1348 // We report only the start of the match/search range to suggest we are 1349 // reporting the substitutions as set at the start of the match/search. 1350 // Indicating a non-zero-length range might instead seem to imply that the 1351 // substitution matches or was captured from exactly that range. 1352 if (Diags) 1353 Diags->emplace_back(SM, CheckTy, getLoc(), MatchTy, 1354 SMRange(Range.Start, Range.Start), OS.str()); 1355 else 1356 SM.PrintMessage(Range.Start, SourceMgr::DK_Note, OS.str()); 1357 } 1358 } 1359 } 1360 1361 void Pattern::printVariableDefs(const SourceMgr &SM, 1362 FileCheckDiag::MatchType MatchTy, 1363 std::vector<FileCheckDiag> *Diags) const { 1364 if (VariableDefs.empty() && NumericVariableDefs.empty()) 1365 return; 1366 // Build list of variable captures. 1367 struct VarCapture { 1368 StringRef Name; 1369 SMRange Range; 1370 }; 1371 SmallVector<VarCapture, 2> VarCaptures; 1372 for (const auto &VariableDef : VariableDefs) { 1373 VarCapture VC; 1374 VC.Name = VariableDef.first; 1375 StringRef Value = Context->GlobalVariableTable[VC.Name]; 1376 SMLoc Start = SMLoc::getFromPointer(Value.data()); 1377 SMLoc End = SMLoc::getFromPointer(Value.data() + Value.size()); 1378 VC.Range = SMRange(Start, End); 1379 VarCaptures.push_back(VC); 1380 } 1381 for (const auto &VariableDef : NumericVariableDefs) { 1382 VarCapture VC; 1383 VC.Name = VariableDef.getKey(); 1384 StringRef StrValue = VariableDef.getValue() 1385 .DefinedNumericVariable->getStringValue() 1386 .getValue(); 1387 SMLoc Start = SMLoc::getFromPointer(StrValue.data()); 1388 SMLoc End = SMLoc::getFromPointer(StrValue.data() + StrValue.size()); 1389 VC.Range = SMRange(Start, End); 1390 VarCaptures.push_back(VC); 1391 } 1392 // Sort variable captures by the order in which they matched the input. 1393 // Ranges shouldn't be overlapping, so we can just compare the start. 1394 llvm::sort(VarCaptures, [](const VarCapture &A, const VarCapture &B) { 1395 assert(A.Range.Start != B.Range.Start && 1396 "unexpected overlapping variable captures"); 1397 return A.Range.Start.getPointer() < B.Range.Start.getPointer(); 1398 }); 1399 // Create notes for the sorted captures. 1400 for (const VarCapture &VC : VarCaptures) { 1401 SmallString<256> Msg; 1402 raw_svector_ostream OS(Msg); 1403 OS << "captured var \"" << VC.Name << "\""; 1404 if (Diags) 1405 Diags->emplace_back(SM, CheckTy, getLoc(), MatchTy, VC.Range, OS.str()); 1406 else 1407 SM.PrintMessage(VC.Range.Start, SourceMgr::DK_Note, OS.str(), VC.Range); 1408 } 1409 } 1410 1411 static SMRange ProcessMatchResult(FileCheckDiag::MatchType MatchTy, 1412 const SourceMgr &SM, SMLoc Loc, 1413 Check::FileCheckType CheckTy, 1414 StringRef Buffer, size_t Pos, size_t Len, 1415 std::vector<FileCheckDiag> *Diags, 1416 bool AdjustPrevDiags = false) { 1417 SMLoc Start = SMLoc::getFromPointer(Buffer.data() + Pos); 1418 SMLoc End = SMLoc::getFromPointer(Buffer.data() + Pos + Len); 1419 SMRange Range(Start, End); 1420 if (Diags) { 1421 if (AdjustPrevDiags) { 1422 SMLoc CheckLoc = Diags->rbegin()->CheckLoc; 1423 for (auto I = Diags->rbegin(), E = Diags->rend(); 1424 I != E && I->CheckLoc == CheckLoc; ++I) 1425 I->MatchTy = MatchTy; 1426 } else 1427 Diags->emplace_back(SM, CheckTy, Loc, MatchTy, Range); 1428 } 1429 return Range; 1430 } 1431 1432 void Pattern::printFuzzyMatch(const SourceMgr &SM, StringRef Buffer, 1433 std::vector<FileCheckDiag> *Diags) const { 1434 // Attempt to find the closest/best fuzzy match. Usually an error happens 1435 // because some string in the output didn't exactly match. In these cases, we 1436 // would like to show the user a best guess at what "should have" matched, to 1437 // save them having to actually check the input manually. 1438 size_t NumLinesForward = 0; 1439 size_t Best = StringRef::npos; 1440 double BestQuality = 0; 1441 1442 // Use an arbitrary 4k limit on how far we will search. 1443 for (size_t i = 0, e = std::min(size_t(4096), Buffer.size()); i != e; ++i) { 1444 if (Buffer[i] == '\n') 1445 ++NumLinesForward; 1446 1447 // Patterns have leading whitespace stripped, so skip whitespace when 1448 // looking for something which looks like a pattern. 1449 if (Buffer[i] == ' ' || Buffer[i] == '\t') 1450 continue; 1451 1452 // Compute the "quality" of this match as an arbitrary combination of the 1453 // match distance and the number of lines skipped to get to this match. 1454 unsigned Distance = computeMatchDistance(Buffer.substr(i)); 1455 double Quality = Distance + (NumLinesForward / 100.); 1456 1457 if (Quality < BestQuality || Best == StringRef::npos) { 1458 Best = i; 1459 BestQuality = Quality; 1460 } 1461 } 1462 1463 // Print the "possible intended match here" line if we found something 1464 // reasonable and not equal to what we showed in the "scanning from here" 1465 // line. 1466 if (Best && Best != StringRef::npos && BestQuality < 50) { 1467 SMRange MatchRange = 1468 ProcessMatchResult(FileCheckDiag::MatchFuzzy, SM, getLoc(), 1469 getCheckTy(), Buffer, Best, 0, Diags); 1470 SM.PrintMessage(MatchRange.Start, SourceMgr::DK_Note, 1471 "possible intended match here"); 1472 1473 // FIXME: If we wanted to be really friendly we would show why the match 1474 // failed, as it can be hard to spot simple one character differences. 1475 } 1476 } 1477 1478 Expected<StringRef> 1479 FileCheckPatternContext::getPatternVarValue(StringRef VarName) { 1480 auto VarIter = GlobalVariableTable.find(VarName); 1481 if (VarIter == GlobalVariableTable.end()) 1482 return make_error<UndefVarError>(VarName); 1483 1484 return VarIter->second; 1485 } 1486 1487 template <class... Types> 1488 NumericVariable *FileCheckPatternContext::makeNumericVariable(Types... args) { 1489 NumericVariables.push_back(std::make_unique<NumericVariable>(args...)); 1490 return NumericVariables.back().get(); 1491 } 1492 1493 Substitution * 1494 FileCheckPatternContext::makeStringSubstitution(StringRef VarName, 1495 size_t InsertIdx) { 1496 Substitutions.push_back( 1497 std::make_unique<StringSubstitution>(this, VarName, InsertIdx)); 1498 return Substitutions.back().get(); 1499 } 1500 1501 Substitution *FileCheckPatternContext::makeNumericSubstitution( 1502 StringRef ExpressionStr, std::unique_ptr<Expression> Expression, 1503 size_t InsertIdx) { 1504 Substitutions.push_back(std::make_unique<NumericSubstitution>( 1505 this, ExpressionStr, std::move(Expression), InsertIdx)); 1506 return Substitutions.back().get(); 1507 } 1508 1509 size_t Pattern::FindRegexVarEnd(StringRef Str, SourceMgr &SM) { 1510 // Offset keeps track of the current offset within the input Str 1511 size_t Offset = 0; 1512 // [...] Nesting depth 1513 size_t BracketDepth = 0; 1514 1515 while (!Str.empty()) { 1516 if (Str.startswith("]]") && BracketDepth == 0) 1517 return Offset; 1518 if (Str[0] == '\\') { 1519 // Backslash escapes the next char within regexes, so skip them both. 1520 Str = Str.substr(2); 1521 Offset += 2; 1522 } else { 1523 switch (Str[0]) { 1524 default: 1525 break; 1526 case '[': 1527 BracketDepth++; 1528 break; 1529 case ']': 1530 if (BracketDepth == 0) { 1531 SM.PrintMessage(SMLoc::getFromPointer(Str.data()), 1532 SourceMgr::DK_Error, 1533 "missing closing \"]\" for regex variable"); 1534 exit(1); 1535 } 1536 BracketDepth--; 1537 break; 1538 } 1539 Str = Str.substr(1); 1540 Offset++; 1541 } 1542 } 1543 1544 return StringRef::npos; 1545 } 1546 1547 StringRef FileCheck::CanonicalizeFile(MemoryBuffer &MB, 1548 SmallVectorImpl<char> &OutputBuffer) { 1549 OutputBuffer.reserve(MB.getBufferSize()); 1550 1551 for (const char *Ptr = MB.getBufferStart(), *End = MB.getBufferEnd(); 1552 Ptr != End; ++Ptr) { 1553 // Eliminate trailing dosish \r. 1554 if (Ptr <= End - 2 && Ptr[0] == '\r' && Ptr[1] == '\n') { 1555 continue; 1556 } 1557 1558 // If current char is not a horizontal whitespace or if horizontal 1559 // whitespace canonicalization is disabled, dump it to output as is. 1560 if (Req.NoCanonicalizeWhiteSpace || (*Ptr != ' ' && *Ptr != '\t')) { 1561 OutputBuffer.push_back(*Ptr); 1562 continue; 1563 } 1564 1565 // Otherwise, add one space and advance over neighboring space. 1566 OutputBuffer.push_back(' '); 1567 while (Ptr + 1 != End && (Ptr[1] == ' ' || Ptr[1] == '\t')) 1568 ++Ptr; 1569 } 1570 1571 // Add a null byte and then return all but that byte. 1572 OutputBuffer.push_back('\0'); 1573 return StringRef(OutputBuffer.data(), OutputBuffer.size() - 1); 1574 } 1575 1576 FileCheckDiag::FileCheckDiag(const SourceMgr &SM, 1577 const Check::FileCheckType &CheckTy, 1578 SMLoc CheckLoc, MatchType MatchTy, 1579 SMRange InputRange, StringRef Note) 1580 : CheckTy(CheckTy), CheckLoc(CheckLoc), MatchTy(MatchTy), Note(Note) { 1581 auto Start = SM.getLineAndColumn(InputRange.Start); 1582 auto End = SM.getLineAndColumn(InputRange.End); 1583 InputStartLine = Start.first; 1584 InputStartCol = Start.second; 1585 InputEndLine = End.first; 1586 InputEndCol = End.second; 1587 } 1588 1589 static bool IsPartOfWord(char c) { 1590 return (isAlnum(c) || c == '-' || c == '_'); 1591 } 1592 1593 Check::FileCheckType &Check::FileCheckType::setCount(int C) { 1594 assert(Count > 0 && "zero and negative counts are not supported"); 1595 assert((C == 1 || Kind == CheckPlain) && 1596 "count supported only for plain CHECK directives"); 1597 Count = C; 1598 return *this; 1599 } 1600 1601 std::string Check::FileCheckType::getModifiersDescription() const { 1602 if (Modifiers.none()) 1603 return ""; 1604 std::string Ret; 1605 raw_string_ostream OS(Ret); 1606 OS << '{'; 1607 if (isLiteralMatch()) 1608 OS << "LITERAL"; 1609 OS << '}'; 1610 return OS.str(); 1611 } 1612 1613 std::string Check::FileCheckType::getDescription(StringRef Prefix) const { 1614 // Append directive modifiers. 1615 auto WithModifiers = [this, Prefix](StringRef Str) -> std::string { 1616 return (Prefix + Str + getModifiersDescription()).str(); 1617 }; 1618 1619 switch (Kind) { 1620 case Check::CheckNone: 1621 return "invalid"; 1622 case Check::CheckPlain: 1623 if (Count > 1) 1624 return WithModifiers("-COUNT"); 1625 return WithModifiers(""); 1626 case Check::CheckNext: 1627 return WithModifiers("-NEXT"); 1628 case Check::CheckSame: 1629 return WithModifiers("-SAME"); 1630 case Check::CheckNot: 1631 return WithModifiers("-NOT"); 1632 case Check::CheckDAG: 1633 return WithModifiers("-DAG"); 1634 case Check::CheckLabel: 1635 return WithModifiers("-LABEL"); 1636 case Check::CheckEmpty: 1637 return WithModifiers("-EMPTY"); 1638 case Check::CheckComment: 1639 return std::string(Prefix); 1640 case Check::CheckEOF: 1641 return "implicit EOF"; 1642 case Check::CheckBadNot: 1643 return "bad NOT"; 1644 case Check::CheckBadCount: 1645 return "bad COUNT"; 1646 } 1647 llvm_unreachable("unknown FileCheckType"); 1648 } 1649 1650 static std::pair<Check::FileCheckType, StringRef> 1651 FindCheckType(const FileCheckRequest &Req, StringRef Buffer, StringRef Prefix) { 1652 if (Buffer.size() <= Prefix.size()) 1653 return {Check::CheckNone, StringRef()}; 1654 1655 StringRef Rest = Buffer.drop_front(Prefix.size()); 1656 // Check for comment. 1657 if (llvm::is_contained(Req.CommentPrefixes, Prefix)) { 1658 if (Rest.consume_front(":")) 1659 return {Check::CheckComment, Rest}; 1660 // Ignore a comment prefix if it has a suffix like "-NOT". 1661 return {Check::CheckNone, StringRef()}; 1662 } 1663 1664 auto ConsumeModifiers = [&](Check::FileCheckType Ret) 1665 -> std::pair<Check::FileCheckType, StringRef> { 1666 if (Rest.consume_front(":")) 1667 return {Ret, Rest}; 1668 if (!Rest.consume_front("{")) 1669 return {Check::CheckNone, StringRef()}; 1670 1671 // Parse the modifiers, speparated by commas. 1672 do { 1673 // Allow whitespace in modifiers list. 1674 Rest = Rest.ltrim(); 1675 if (Rest.consume_front("LITERAL")) 1676 Ret.setLiteralMatch(); 1677 else 1678 return {Check::CheckNone, Rest}; 1679 // Allow whitespace in modifiers list. 1680 Rest = Rest.ltrim(); 1681 } while (Rest.consume_front(",")); 1682 if (!Rest.consume_front("}:")) 1683 return {Check::CheckNone, Rest}; 1684 return {Ret, Rest}; 1685 }; 1686 1687 // Verify that the prefix is followed by directive modifiers or a colon. 1688 if (Rest.consume_front(":")) 1689 return {Check::CheckPlain, Rest}; 1690 if (Rest.front() == '{') 1691 return ConsumeModifiers(Check::CheckPlain); 1692 1693 if (!Rest.consume_front("-")) 1694 return {Check::CheckNone, StringRef()}; 1695 1696 if (Rest.consume_front("COUNT-")) { 1697 int64_t Count; 1698 if (Rest.consumeInteger(10, Count)) 1699 // Error happened in parsing integer. 1700 return {Check::CheckBadCount, Rest}; 1701 if (Count <= 0 || Count > INT32_MAX) 1702 return {Check::CheckBadCount, Rest}; 1703 if (Rest.front() != ':' && Rest.front() != '{') 1704 return {Check::CheckBadCount, Rest}; 1705 return ConsumeModifiers( 1706 Check::FileCheckType(Check::CheckPlain).setCount(Count)); 1707 } 1708 1709 // You can't combine -NOT with another suffix. 1710 if (Rest.startswith("DAG-NOT:") || Rest.startswith("NOT-DAG:") || 1711 Rest.startswith("NEXT-NOT:") || Rest.startswith("NOT-NEXT:") || 1712 Rest.startswith("SAME-NOT:") || Rest.startswith("NOT-SAME:") || 1713 Rest.startswith("EMPTY-NOT:") || Rest.startswith("NOT-EMPTY:")) 1714 return {Check::CheckBadNot, Rest}; 1715 1716 if (Rest.consume_front("NEXT")) 1717 return ConsumeModifiers(Check::CheckNext); 1718 1719 if (Rest.consume_front("SAME")) 1720 return ConsumeModifiers(Check::CheckSame); 1721 1722 if (Rest.consume_front("NOT")) 1723 return ConsumeModifiers(Check::CheckNot); 1724 1725 if (Rest.consume_front("DAG")) 1726 return ConsumeModifiers(Check::CheckDAG); 1727 1728 if (Rest.consume_front("LABEL")) 1729 return ConsumeModifiers(Check::CheckLabel); 1730 1731 if (Rest.consume_front("EMPTY")) 1732 return ConsumeModifiers(Check::CheckEmpty); 1733 1734 return {Check::CheckNone, Rest}; 1735 } 1736 1737 // From the given position, find the next character after the word. 1738 static size_t SkipWord(StringRef Str, size_t Loc) { 1739 while (Loc < Str.size() && IsPartOfWord(Str[Loc])) 1740 ++Loc; 1741 return Loc; 1742 } 1743 1744 /// Searches the buffer for the first prefix in the prefix regular expression. 1745 /// 1746 /// This searches the buffer using the provided regular expression, however it 1747 /// enforces constraints beyond that: 1748 /// 1) The found prefix must not be a suffix of something that looks like 1749 /// a valid prefix. 1750 /// 2) The found prefix must be followed by a valid check type suffix using \c 1751 /// FindCheckType above. 1752 /// 1753 /// \returns a pair of StringRefs into the Buffer, which combines: 1754 /// - the first match of the regular expression to satisfy these two is 1755 /// returned, 1756 /// otherwise an empty StringRef is returned to indicate failure. 1757 /// - buffer rewound to the location right after parsed suffix, for parsing 1758 /// to continue from 1759 /// 1760 /// If this routine returns a valid prefix, it will also shrink \p Buffer to 1761 /// start at the beginning of the returned prefix, increment \p LineNumber for 1762 /// each new line consumed from \p Buffer, and set \p CheckTy to the type of 1763 /// check found by examining the suffix. 1764 /// 1765 /// If no valid prefix is found, the state of Buffer, LineNumber, and CheckTy 1766 /// is unspecified. 1767 static std::pair<StringRef, StringRef> 1768 FindFirstMatchingPrefix(const FileCheckRequest &Req, Regex &PrefixRE, 1769 StringRef &Buffer, unsigned &LineNumber, 1770 Check::FileCheckType &CheckTy) { 1771 SmallVector<StringRef, 2> Matches; 1772 1773 while (!Buffer.empty()) { 1774 // Find the first (longest) match using the RE. 1775 if (!PrefixRE.match(Buffer, &Matches)) 1776 // No match at all, bail. 1777 return {StringRef(), StringRef()}; 1778 1779 StringRef Prefix = Matches[0]; 1780 Matches.clear(); 1781 1782 assert(Prefix.data() >= Buffer.data() && 1783 Prefix.data() < Buffer.data() + Buffer.size() && 1784 "Prefix doesn't start inside of buffer!"); 1785 size_t Loc = Prefix.data() - Buffer.data(); 1786 StringRef Skipped = Buffer.substr(0, Loc); 1787 Buffer = Buffer.drop_front(Loc); 1788 LineNumber += Skipped.count('\n'); 1789 1790 // Check that the matched prefix isn't a suffix of some other check-like 1791 // word. 1792 // FIXME: This is a very ad-hoc check. it would be better handled in some 1793 // other way. Among other things it seems hard to distinguish between 1794 // intentional and unintentional uses of this feature. 1795 if (Skipped.empty() || !IsPartOfWord(Skipped.back())) { 1796 // Now extract the type. 1797 StringRef AfterSuffix; 1798 std::tie(CheckTy, AfterSuffix) = FindCheckType(Req, Buffer, Prefix); 1799 1800 // If we've found a valid check type for this prefix, we're done. 1801 if (CheckTy != Check::CheckNone) 1802 return {Prefix, AfterSuffix}; 1803 } 1804 1805 // If we didn't successfully find a prefix, we need to skip this invalid 1806 // prefix and continue scanning. We directly skip the prefix that was 1807 // matched and any additional parts of that check-like word. 1808 Buffer = Buffer.drop_front(SkipWord(Buffer, Prefix.size())); 1809 } 1810 1811 // We ran out of buffer while skipping partial matches so give up. 1812 return {StringRef(), StringRef()}; 1813 } 1814 1815 void FileCheckPatternContext::createLineVariable() { 1816 assert(!LineVariable && "@LINE pseudo numeric variable already created"); 1817 StringRef LineName = "@LINE"; 1818 LineVariable = makeNumericVariable( 1819 LineName, ExpressionFormat(ExpressionFormat::Kind::Unsigned)); 1820 GlobalNumericVariableTable[LineName] = LineVariable; 1821 } 1822 1823 FileCheck::FileCheck(FileCheckRequest Req) 1824 : Req(Req), PatternContext(std::make_unique<FileCheckPatternContext>()), 1825 CheckStrings(std::make_unique<std::vector<FileCheckString>>()) {} 1826 1827 FileCheck::~FileCheck() = default; 1828 1829 bool FileCheck::readCheckFile( 1830 SourceMgr &SM, StringRef Buffer, Regex &PrefixRE, 1831 std::pair<unsigned, unsigned> *ImpPatBufferIDRange) { 1832 if (ImpPatBufferIDRange) 1833 ImpPatBufferIDRange->first = ImpPatBufferIDRange->second = 0; 1834 1835 Error DefineError = 1836 PatternContext->defineCmdlineVariables(Req.GlobalDefines, SM); 1837 if (DefineError) { 1838 logAllUnhandledErrors(std::move(DefineError), errs()); 1839 return true; 1840 } 1841 1842 PatternContext->createLineVariable(); 1843 1844 std::vector<Pattern> ImplicitNegativeChecks; 1845 for (StringRef PatternString : Req.ImplicitCheckNot) { 1846 // Create a buffer with fake command line content in order to display the 1847 // command line option responsible for the specific implicit CHECK-NOT. 1848 std::string Prefix = "-implicit-check-not='"; 1849 std::string Suffix = "'"; 1850 std::unique_ptr<MemoryBuffer> CmdLine = MemoryBuffer::getMemBufferCopy( 1851 (Prefix + PatternString + Suffix).str(), "command line"); 1852 1853 StringRef PatternInBuffer = 1854 CmdLine->getBuffer().substr(Prefix.size(), PatternString.size()); 1855 unsigned BufferID = SM.AddNewSourceBuffer(std::move(CmdLine), SMLoc()); 1856 if (ImpPatBufferIDRange) { 1857 if (ImpPatBufferIDRange->first == ImpPatBufferIDRange->second) { 1858 ImpPatBufferIDRange->first = BufferID; 1859 ImpPatBufferIDRange->second = BufferID + 1; 1860 } else { 1861 assert(BufferID == ImpPatBufferIDRange->second && 1862 "expected consecutive source buffer IDs"); 1863 ++ImpPatBufferIDRange->second; 1864 } 1865 } 1866 1867 ImplicitNegativeChecks.push_back( 1868 Pattern(Check::CheckNot, PatternContext.get())); 1869 ImplicitNegativeChecks.back().parsePattern(PatternInBuffer, 1870 "IMPLICIT-CHECK", SM, Req); 1871 } 1872 1873 std::vector<Pattern> DagNotMatches = ImplicitNegativeChecks; 1874 1875 // LineNumber keeps track of the line on which CheckPrefix instances are 1876 // found. 1877 unsigned LineNumber = 1; 1878 1879 std::set<StringRef> PrefixesNotFound(Req.CheckPrefixes.begin(), 1880 Req.CheckPrefixes.end()); 1881 const size_t DistinctPrefixes = PrefixesNotFound.size(); 1882 while (true) { 1883 Check::FileCheckType CheckTy; 1884 1885 // See if a prefix occurs in the memory buffer. 1886 StringRef UsedPrefix; 1887 StringRef AfterSuffix; 1888 std::tie(UsedPrefix, AfterSuffix) = 1889 FindFirstMatchingPrefix(Req, PrefixRE, Buffer, LineNumber, CheckTy); 1890 if (UsedPrefix.empty()) 1891 break; 1892 if (CheckTy != Check::CheckComment) 1893 PrefixesNotFound.erase(UsedPrefix); 1894 1895 assert(UsedPrefix.data() == Buffer.data() && 1896 "Failed to move Buffer's start forward, or pointed prefix outside " 1897 "of the buffer!"); 1898 assert(AfterSuffix.data() >= Buffer.data() && 1899 AfterSuffix.data() < Buffer.data() + Buffer.size() && 1900 "Parsing after suffix doesn't start inside of buffer!"); 1901 1902 // Location to use for error messages. 1903 const char *UsedPrefixStart = UsedPrefix.data(); 1904 1905 // Skip the buffer to the end of parsed suffix (or just prefix, if no good 1906 // suffix was processed). 1907 Buffer = AfterSuffix.empty() ? Buffer.drop_front(UsedPrefix.size()) 1908 : AfterSuffix; 1909 1910 // Complain about useful-looking but unsupported suffixes. 1911 if (CheckTy == Check::CheckBadNot) { 1912 SM.PrintMessage(SMLoc::getFromPointer(Buffer.data()), SourceMgr::DK_Error, 1913 "unsupported -NOT combo on prefix '" + UsedPrefix + "'"); 1914 return true; 1915 } 1916 1917 // Complain about invalid count specification. 1918 if (CheckTy == Check::CheckBadCount) { 1919 SM.PrintMessage(SMLoc::getFromPointer(Buffer.data()), SourceMgr::DK_Error, 1920 "invalid count in -COUNT specification on prefix '" + 1921 UsedPrefix + "'"); 1922 return true; 1923 } 1924 1925 // Okay, we found the prefix, yay. Remember the rest of the line, but ignore 1926 // leading whitespace. 1927 if (!(Req.NoCanonicalizeWhiteSpace && Req.MatchFullLines)) 1928 Buffer = Buffer.substr(Buffer.find_first_not_of(" \t")); 1929 1930 // Scan ahead to the end of line. 1931 size_t EOL = Buffer.find_first_of("\n\r"); 1932 1933 // Remember the location of the start of the pattern, for diagnostics. 1934 SMLoc PatternLoc = SMLoc::getFromPointer(Buffer.data()); 1935 1936 // Extract the pattern from the buffer. 1937 StringRef PatternBuffer = Buffer.substr(0, EOL); 1938 Buffer = Buffer.substr(EOL); 1939 1940 // If this is a comment, we're done. 1941 if (CheckTy == Check::CheckComment) 1942 continue; 1943 1944 // Parse the pattern. 1945 Pattern P(CheckTy, PatternContext.get(), LineNumber); 1946 if (P.parsePattern(PatternBuffer, UsedPrefix, SM, Req)) 1947 return true; 1948 1949 // Verify that CHECK-LABEL lines do not define or use variables 1950 if ((CheckTy == Check::CheckLabel) && P.hasVariable()) { 1951 SM.PrintMessage( 1952 SMLoc::getFromPointer(UsedPrefixStart), SourceMgr::DK_Error, 1953 "found '" + UsedPrefix + "-LABEL:'" 1954 " with variable definition or use"); 1955 return true; 1956 } 1957 1958 // Verify that CHECK-NEXT/SAME/EMPTY lines have at least one CHECK line before them. 1959 if ((CheckTy == Check::CheckNext || CheckTy == Check::CheckSame || 1960 CheckTy == Check::CheckEmpty) && 1961 CheckStrings->empty()) { 1962 StringRef Type = CheckTy == Check::CheckNext 1963 ? "NEXT" 1964 : CheckTy == Check::CheckEmpty ? "EMPTY" : "SAME"; 1965 SM.PrintMessage(SMLoc::getFromPointer(UsedPrefixStart), 1966 SourceMgr::DK_Error, 1967 "found '" + UsedPrefix + "-" + Type + 1968 "' without previous '" + UsedPrefix + ": line"); 1969 return true; 1970 } 1971 1972 // Handle CHECK-DAG/-NOT. 1973 if (CheckTy == Check::CheckDAG || CheckTy == Check::CheckNot) { 1974 DagNotMatches.push_back(P); 1975 continue; 1976 } 1977 1978 // Okay, add the string we captured to the output vector and move on. 1979 CheckStrings->emplace_back(P, UsedPrefix, PatternLoc); 1980 std::swap(DagNotMatches, CheckStrings->back().DagNotStrings); 1981 DagNotMatches = ImplicitNegativeChecks; 1982 } 1983 1984 // When there are no used prefixes we report an error except in the case that 1985 // no prefix is specified explicitly but -implicit-check-not is specified. 1986 const bool NoPrefixesFound = PrefixesNotFound.size() == DistinctPrefixes; 1987 const bool SomePrefixesUnexpectedlyNotUsed = 1988 !Req.AllowUnusedPrefixes && !PrefixesNotFound.empty(); 1989 if ((NoPrefixesFound || SomePrefixesUnexpectedlyNotUsed) && 1990 (ImplicitNegativeChecks.empty() || !Req.IsDefaultCheckPrefix)) { 1991 errs() << "error: no check strings found with prefix" 1992 << (PrefixesNotFound.size() > 1 ? "es " : " "); 1993 bool First = true; 1994 for (StringRef MissingPrefix : PrefixesNotFound) { 1995 if (!First) 1996 errs() << ", "; 1997 errs() << "\'" << MissingPrefix << ":'"; 1998 First = false; 1999 } 2000 errs() << '\n'; 2001 return true; 2002 } 2003 2004 // Add an EOF pattern for any trailing --implicit-check-not/CHECK-DAG/-NOTs, 2005 // and use the first prefix as a filler for the error message. 2006 if (!DagNotMatches.empty()) { 2007 CheckStrings->emplace_back( 2008 Pattern(Check::CheckEOF, PatternContext.get(), LineNumber + 1), 2009 *Req.CheckPrefixes.begin(), SMLoc::getFromPointer(Buffer.data())); 2010 std::swap(DagNotMatches, CheckStrings->back().DagNotStrings); 2011 } 2012 2013 return false; 2014 } 2015 2016 static void PrintMatch(bool ExpectedMatch, const SourceMgr &SM, 2017 StringRef Prefix, SMLoc Loc, const Pattern &Pat, 2018 int MatchedCount, StringRef Buffer, size_t MatchPos, 2019 size_t MatchLen, const FileCheckRequest &Req, 2020 std::vector<FileCheckDiag> *Diags) { 2021 bool PrintDiag = true; 2022 if (ExpectedMatch) { 2023 if (!Req.Verbose) 2024 return; 2025 if (!Req.VerboseVerbose && Pat.getCheckTy() == Check::CheckEOF) 2026 return; 2027 // Due to their verbosity, we don't print verbose diagnostics here if we're 2028 // gathering them for a different rendering, but we always print other 2029 // diagnostics. 2030 PrintDiag = !Diags; 2031 } 2032 FileCheckDiag::MatchType MatchTy = ExpectedMatch 2033 ? FileCheckDiag::MatchFoundAndExpected 2034 : FileCheckDiag::MatchFoundButExcluded; 2035 SMRange MatchRange = ProcessMatchResult(MatchTy, SM, Loc, Pat.getCheckTy(), 2036 Buffer, MatchPos, MatchLen, Diags); 2037 if (Diags) { 2038 Pat.printSubstitutions(SM, Buffer, MatchRange, MatchTy, Diags); 2039 Pat.printVariableDefs(SM, MatchTy, Diags); 2040 } 2041 if (!PrintDiag) 2042 return; 2043 2044 std::string Message = formatv("{0}: {1} string found in input", 2045 Pat.getCheckTy().getDescription(Prefix), 2046 (ExpectedMatch ? "expected" : "excluded")) 2047 .str(); 2048 if (Pat.getCount() > 1) 2049 Message += formatv(" ({0} out of {1})", MatchedCount, Pat.getCount()).str(); 2050 2051 SM.PrintMessage( 2052 Loc, ExpectedMatch ? SourceMgr::DK_Remark : SourceMgr::DK_Error, Message); 2053 SM.PrintMessage(MatchRange.Start, SourceMgr::DK_Note, "found here", 2054 {MatchRange}); 2055 Pat.printSubstitutions(SM, Buffer, MatchRange, MatchTy, nullptr); 2056 Pat.printVariableDefs(SM, MatchTy, nullptr); 2057 } 2058 2059 static void PrintMatch(bool ExpectedMatch, const SourceMgr &SM, 2060 const FileCheckString &CheckStr, int MatchedCount, 2061 StringRef Buffer, size_t MatchPos, size_t MatchLen, 2062 FileCheckRequest &Req, 2063 std::vector<FileCheckDiag> *Diags) { 2064 PrintMatch(ExpectedMatch, SM, CheckStr.Prefix, CheckStr.Loc, CheckStr.Pat, 2065 MatchedCount, Buffer, MatchPos, MatchLen, Req, Diags); 2066 } 2067 2068 static void PrintNoMatch(bool ExpectedMatch, const SourceMgr &SM, 2069 StringRef Prefix, SMLoc Loc, const Pattern &Pat, 2070 int MatchedCount, StringRef Buffer, 2071 bool VerboseVerbose, std::vector<FileCheckDiag> *Diags, 2072 Error MatchErrors) { 2073 assert(MatchErrors && "Called on successful match"); 2074 bool PrintDiag = true; 2075 if (!ExpectedMatch) { 2076 if (!VerboseVerbose) { 2077 consumeError(std::move(MatchErrors)); 2078 return; 2079 } 2080 // Due to their verbosity, we don't print verbose diagnostics here if we're 2081 // gathering them for a different rendering, but we always print other 2082 // diagnostics. 2083 PrintDiag = !Diags; 2084 } 2085 2086 FileCheckDiag::MatchType MatchTy = ExpectedMatch 2087 ? FileCheckDiag::MatchNoneButExpected 2088 : FileCheckDiag::MatchNoneAndExcluded; 2089 SMRange SearchRange = ProcessMatchResult(MatchTy, SM, Loc, Pat.getCheckTy(), 2090 Buffer, 0, Buffer.size(), Diags); 2091 if (Diags) 2092 Pat.printSubstitutions(SM, Buffer, SearchRange, MatchTy, Diags); 2093 if (!PrintDiag) { 2094 consumeError(std::move(MatchErrors)); 2095 return; 2096 } 2097 2098 MatchErrors = handleErrors(std::move(MatchErrors), 2099 [](const ErrorDiagnostic &E) { E.log(errs()); }); 2100 2101 // No problem matching the string per se. 2102 if (!MatchErrors) 2103 return; 2104 consumeError(std::move(MatchErrors)); 2105 2106 // Print "not found" diagnostic. 2107 std::string Message = formatv("{0}: {1} string not found in input", 2108 Pat.getCheckTy().getDescription(Prefix), 2109 (ExpectedMatch ? "expected" : "excluded")) 2110 .str(); 2111 if (Pat.getCount() > 1) 2112 Message += formatv(" ({0} out of {1})", MatchedCount, Pat.getCount()).str(); 2113 SM.PrintMessage( 2114 Loc, ExpectedMatch ? SourceMgr::DK_Error : SourceMgr::DK_Remark, Message); 2115 2116 // Print the "scanning from here" line. 2117 SM.PrintMessage(SearchRange.Start, SourceMgr::DK_Note, "scanning from here"); 2118 2119 // Allow the pattern to print additional information if desired. 2120 Pat.printSubstitutions(SM, Buffer, SearchRange, MatchTy, nullptr); 2121 2122 if (ExpectedMatch) 2123 Pat.printFuzzyMatch(SM, Buffer, Diags); 2124 } 2125 2126 static void PrintNoMatch(bool ExpectedMatch, const SourceMgr &SM, 2127 const FileCheckString &CheckStr, int MatchedCount, 2128 StringRef Buffer, bool VerboseVerbose, 2129 std::vector<FileCheckDiag> *Diags, Error MatchErrors) { 2130 PrintNoMatch(ExpectedMatch, SM, CheckStr.Prefix, CheckStr.Loc, CheckStr.Pat, 2131 MatchedCount, Buffer, VerboseVerbose, Diags, 2132 std::move(MatchErrors)); 2133 } 2134 2135 /// Counts the number of newlines in the specified range. 2136 static unsigned CountNumNewlinesBetween(StringRef Range, 2137 const char *&FirstNewLine) { 2138 unsigned NumNewLines = 0; 2139 while (1) { 2140 // Scan for newline. 2141 Range = Range.substr(Range.find_first_of("\n\r")); 2142 if (Range.empty()) 2143 return NumNewLines; 2144 2145 ++NumNewLines; 2146 2147 // Handle \n\r and \r\n as a single newline. 2148 if (Range.size() > 1 && (Range[1] == '\n' || Range[1] == '\r') && 2149 (Range[0] != Range[1])) 2150 Range = Range.substr(1); 2151 Range = Range.substr(1); 2152 2153 if (NumNewLines == 1) 2154 FirstNewLine = Range.begin(); 2155 } 2156 } 2157 2158 size_t FileCheckString::Check(const SourceMgr &SM, StringRef Buffer, 2159 bool IsLabelScanMode, size_t &MatchLen, 2160 FileCheckRequest &Req, 2161 std::vector<FileCheckDiag> *Diags) const { 2162 size_t LastPos = 0; 2163 std::vector<const Pattern *> NotStrings; 2164 2165 // IsLabelScanMode is true when we are scanning forward to find CHECK-LABEL 2166 // bounds; we have not processed variable definitions within the bounded block 2167 // yet so cannot handle any final CHECK-DAG yet; this is handled when going 2168 // over the block again (including the last CHECK-LABEL) in normal mode. 2169 if (!IsLabelScanMode) { 2170 // Match "dag strings" (with mixed "not strings" if any). 2171 LastPos = CheckDag(SM, Buffer, NotStrings, Req, Diags); 2172 if (LastPos == StringRef::npos) 2173 return StringRef::npos; 2174 } 2175 2176 // Match itself from the last position after matching CHECK-DAG. 2177 size_t LastMatchEnd = LastPos; 2178 size_t FirstMatchPos = 0; 2179 // Go match the pattern Count times. Majority of patterns only match with 2180 // count 1 though. 2181 assert(Pat.getCount() != 0 && "pattern count can not be zero"); 2182 for (int i = 1; i <= Pat.getCount(); i++) { 2183 StringRef MatchBuffer = Buffer.substr(LastMatchEnd); 2184 size_t CurrentMatchLen; 2185 // get a match at current start point 2186 Expected<size_t> MatchResult = Pat.match(MatchBuffer, CurrentMatchLen, SM); 2187 2188 // report 2189 if (!MatchResult) { 2190 PrintNoMatch(true, SM, *this, i, MatchBuffer, Req.VerboseVerbose, Diags, 2191 MatchResult.takeError()); 2192 return StringRef::npos; 2193 } 2194 size_t MatchPos = *MatchResult; 2195 PrintMatch(true, SM, *this, i, MatchBuffer, MatchPos, CurrentMatchLen, Req, 2196 Diags); 2197 if (i == 1) 2198 FirstMatchPos = LastPos + MatchPos; 2199 2200 // move start point after the match 2201 LastMatchEnd += MatchPos + CurrentMatchLen; 2202 } 2203 // Full match len counts from first match pos. 2204 MatchLen = LastMatchEnd - FirstMatchPos; 2205 2206 // Similar to the above, in "label-scan mode" we can't yet handle CHECK-NEXT 2207 // or CHECK-NOT 2208 if (!IsLabelScanMode) { 2209 size_t MatchPos = FirstMatchPos - LastPos; 2210 StringRef MatchBuffer = Buffer.substr(LastPos); 2211 StringRef SkippedRegion = Buffer.substr(LastPos, MatchPos); 2212 2213 // If this check is a "CHECK-NEXT", verify that the previous match was on 2214 // the previous line (i.e. that there is one newline between them). 2215 if (CheckNext(SM, SkippedRegion)) { 2216 ProcessMatchResult(FileCheckDiag::MatchFoundButWrongLine, SM, Loc, 2217 Pat.getCheckTy(), MatchBuffer, MatchPos, MatchLen, 2218 Diags, Req.Verbose); 2219 return StringRef::npos; 2220 } 2221 2222 // If this check is a "CHECK-SAME", verify that the previous match was on 2223 // the same line (i.e. that there is no newline between them). 2224 if (CheckSame(SM, SkippedRegion)) { 2225 ProcessMatchResult(FileCheckDiag::MatchFoundButWrongLine, SM, Loc, 2226 Pat.getCheckTy(), MatchBuffer, MatchPos, MatchLen, 2227 Diags, Req.Verbose); 2228 return StringRef::npos; 2229 } 2230 2231 // If this match had "not strings", verify that they don't exist in the 2232 // skipped region. 2233 if (CheckNot(SM, SkippedRegion, NotStrings, Req, Diags)) 2234 return StringRef::npos; 2235 } 2236 2237 return FirstMatchPos; 2238 } 2239 2240 bool FileCheckString::CheckNext(const SourceMgr &SM, StringRef Buffer) const { 2241 if (Pat.getCheckTy() != Check::CheckNext && 2242 Pat.getCheckTy() != Check::CheckEmpty) 2243 return false; 2244 2245 Twine CheckName = 2246 Prefix + 2247 Twine(Pat.getCheckTy() == Check::CheckEmpty ? "-EMPTY" : "-NEXT"); 2248 2249 // Count the number of newlines between the previous match and this one. 2250 const char *FirstNewLine = nullptr; 2251 unsigned NumNewLines = CountNumNewlinesBetween(Buffer, FirstNewLine); 2252 2253 if (NumNewLines == 0) { 2254 SM.PrintMessage(Loc, SourceMgr::DK_Error, 2255 CheckName + ": is on the same line as previous match"); 2256 SM.PrintMessage(SMLoc::getFromPointer(Buffer.end()), SourceMgr::DK_Note, 2257 "'next' match was here"); 2258 SM.PrintMessage(SMLoc::getFromPointer(Buffer.data()), SourceMgr::DK_Note, 2259 "previous match ended here"); 2260 return true; 2261 } 2262 2263 if (NumNewLines != 1) { 2264 SM.PrintMessage(Loc, SourceMgr::DK_Error, 2265 CheckName + 2266 ": is not on the line after the previous match"); 2267 SM.PrintMessage(SMLoc::getFromPointer(Buffer.end()), SourceMgr::DK_Note, 2268 "'next' match was here"); 2269 SM.PrintMessage(SMLoc::getFromPointer(Buffer.data()), SourceMgr::DK_Note, 2270 "previous match ended here"); 2271 SM.PrintMessage(SMLoc::getFromPointer(FirstNewLine), SourceMgr::DK_Note, 2272 "non-matching line after previous match is here"); 2273 return true; 2274 } 2275 2276 return false; 2277 } 2278 2279 bool FileCheckString::CheckSame(const SourceMgr &SM, StringRef Buffer) const { 2280 if (Pat.getCheckTy() != Check::CheckSame) 2281 return false; 2282 2283 // Count the number of newlines between the previous match and this one. 2284 const char *FirstNewLine = nullptr; 2285 unsigned NumNewLines = CountNumNewlinesBetween(Buffer, FirstNewLine); 2286 2287 if (NumNewLines != 0) { 2288 SM.PrintMessage(Loc, SourceMgr::DK_Error, 2289 Prefix + 2290 "-SAME: is not on the same line as the previous match"); 2291 SM.PrintMessage(SMLoc::getFromPointer(Buffer.end()), SourceMgr::DK_Note, 2292 "'next' match was here"); 2293 SM.PrintMessage(SMLoc::getFromPointer(Buffer.data()), SourceMgr::DK_Note, 2294 "previous match ended here"); 2295 return true; 2296 } 2297 2298 return false; 2299 } 2300 2301 bool FileCheckString::CheckNot(const SourceMgr &SM, StringRef Buffer, 2302 const std::vector<const Pattern *> &NotStrings, 2303 const FileCheckRequest &Req, 2304 std::vector<FileCheckDiag> *Diags) const { 2305 bool DirectiveFail = false; 2306 for (const Pattern *Pat : NotStrings) { 2307 assert((Pat->getCheckTy() == Check::CheckNot) && "Expect CHECK-NOT!"); 2308 2309 size_t MatchLen = 0; 2310 Expected<size_t> MatchResult = Pat->match(Buffer, MatchLen, SM); 2311 2312 if (!MatchResult) { 2313 PrintNoMatch(false, SM, Prefix, Pat->getLoc(), *Pat, 1, Buffer, 2314 Req.VerboseVerbose, Diags, MatchResult.takeError()); 2315 continue; 2316 } 2317 size_t Pos = *MatchResult; 2318 2319 PrintMatch(false, SM, Prefix, Pat->getLoc(), *Pat, 1, Buffer, Pos, MatchLen, 2320 Req, Diags); 2321 DirectiveFail = true; 2322 } 2323 2324 return DirectiveFail; 2325 } 2326 2327 size_t FileCheckString::CheckDag(const SourceMgr &SM, StringRef Buffer, 2328 std::vector<const Pattern *> &NotStrings, 2329 const FileCheckRequest &Req, 2330 std::vector<FileCheckDiag> *Diags) const { 2331 if (DagNotStrings.empty()) 2332 return 0; 2333 2334 // The start of the search range. 2335 size_t StartPos = 0; 2336 2337 struct MatchRange { 2338 size_t Pos; 2339 size_t End; 2340 }; 2341 // A sorted list of ranges for non-overlapping CHECK-DAG matches. Match 2342 // ranges are erased from this list once they are no longer in the search 2343 // range. 2344 std::list<MatchRange> MatchRanges; 2345 2346 // We need PatItr and PatEnd later for detecting the end of a CHECK-DAG 2347 // group, so we don't use a range-based for loop here. 2348 for (auto PatItr = DagNotStrings.begin(), PatEnd = DagNotStrings.end(); 2349 PatItr != PatEnd; ++PatItr) { 2350 const Pattern &Pat = *PatItr; 2351 assert((Pat.getCheckTy() == Check::CheckDAG || 2352 Pat.getCheckTy() == Check::CheckNot) && 2353 "Invalid CHECK-DAG or CHECK-NOT!"); 2354 2355 if (Pat.getCheckTy() == Check::CheckNot) { 2356 NotStrings.push_back(&Pat); 2357 continue; 2358 } 2359 2360 assert((Pat.getCheckTy() == Check::CheckDAG) && "Expect CHECK-DAG!"); 2361 2362 // CHECK-DAG always matches from the start. 2363 size_t MatchLen = 0, MatchPos = StartPos; 2364 2365 // Search for a match that doesn't overlap a previous match in this 2366 // CHECK-DAG group. 2367 for (auto MI = MatchRanges.begin(), ME = MatchRanges.end(); true; ++MI) { 2368 StringRef MatchBuffer = Buffer.substr(MatchPos); 2369 Expected<size_t> MatchResult = Pat.match(MatchBuffer, MatchLen, SM); 2370 // With a group of CHECK-DAGs, a single mismatching means the match on 2371 // that group of CHECK-DAGs fails immediately. 2372 if (!MatchResult) { 2373 PrintNoMatch(true, SM, Prefix, Pat.getLoc(), Pat, 1, MatchBuffer, 2374 Req.VerboseVerbose, Diags, MatchResult.takeError()); 2375 return StringRef::npos; 2376 } 2377 size_t MatchPosBuf = *MatchResult; 2378 // Re-calc it as the offset relative to the start of the original string. 2379 MatchPos += MatchPosBuf; 2380 if (Req.VerboseVerbose) 2381 PrintMatch(true, SM, Prefix, Pat.getLoc(), Pat, 1, Buffer, MatchPos, 2382 MatchLen, Req, Diags); 2383 MatchRange M{MatchPos, MatchPos + MatchLen}; 2384 if (Req.AllowDeprecatedDagOverlap) { 2385 // We don't need to track all matches in this mode, so we just maintain 2386 // one match range that encompasses the current CHECK-DAG group's 2387 // matches. 2388 if (MatchRanges.empty()) 2389 MatchRanges.insert(MatchRanges.end(), M); 2390 else { 2391 auto Block = MatchRanges.begin(); 2392 Block->Pos = std::min(Block->Pos, M.Pos); 2393 Block->End = std::max(Block->End, M.End); 2394 } 2395 break; 2396 } 2397 // Iterate previous matches until overlapping match or insertion point. 2398 bool Overlap = false; 2399 for (; MI != ME; ++MI) { 2400 if (M.Pos < MI->End) { 2401 // !Overlap => New match has no overlap and is before this old match. 2402 // Overlap => New match overlaps this old match. 2403 Overlap = MI->Pos < M.End; 2404 break; 2405 } 2406 } 2407 if (!Overlap) { 2408 // Insert non-overlapping match into list. 2409 MatchRanges.insert(MI, M); 2410 break; 2411 } 2412 if (Req.VerboseVerbose) { 2413 // Due to their verbosity, we don't print verbose diagnostics here if 2414 // we're gathering them for a different rendering, but we always print 2415 // other diagnostics. 2416 if (!Diags) { 2417 SMLoc OldStart = SMLoc::getFromPointer(Buffer.data() + MI->Pos); 2418 SMLoc OldEnd = SMLoc::getFromPointer(Buffer.data() + MI->End); 2419 SMRange OldRange(OldStart, OldEnd); 2420 SM.PrintMessage(OldStart, SourceMgr::DK_Note, 2421 "match discarded, overlaps earlier DAG match here", 2422 {OldRange}); 2423 } else { 2424 SMLoc CheckLoc = Diags->rbegin()->CheckLoc; 2425 for (auto I = Diags->rbegin(), E = Diags->rend(); 2426 I != E && I->CheckLoc == CheckLoc; ++I) 2427 I->MatchTy = FileCheckDiag::MatchFoundButDiscarded; 2428 } 2429 } 2430 MatchPos = MI->End; 2431 } 2432 if (!Req.VerboseVerbose) 2433 PrintMatch(true, SM, Prefix, Pat.getLoc(), Pat, 1, Buffer, MatchPos, 2434 MatchLen, Req, Diags); 2435 2436 // Handle the end of a CHECK-DAG group. 2437 if (std::next(PatItr) == PatEnd || 2438 std::next(PatItr)->getCheckTy() == Check::CheckNot) { 2439 if (!NotStrings.empty()) { 2440 // If there are CHECK-NOTs between two CHECK-DAGs or from CHECK to 2441 // CHECK-DAG, verify that there are no 'not' strings occurred in that 2442 // region. 2443 StringRef SkippedRegion = 2444 Buffer.slice(StartPos, MatchRanges.begin()->Pos); 2445 if (CheckNot(SM, SkippedRegion, NotStrings, Req, Diags)) 2446 return StringRef::npos; 2447 // Clear "not strings". 2448 NotStrings.clear(); 2449 } 2450 // All subsequent CHECK-DAGs and CHECK-NOTs should be matched from the 2451 // end of this CHECK-DAG group's match range. 2452 StartPos = MatchRanges.rbegin()->End; 2453 // Don't waste time checking for (impossible) overlaps before that. 2454 MatchRanges.clear(); 2455 } 2456 } 2457 2458 return StartPos; 2459 } 2460 2461 static bool ValidatePrefixes(StringRef Kind, StringSet<> &UniquePrefixes, 2462 ArrayRef<StringRef> SuppliedPrefixes) { 2463 for (StringRef Prefix : SuppliedPrefixes) { 2464 if (Prefix.empty()) { 2465 errs() << "error: supplied " << Kind << " prefix must not be the empty " 2466 << "string\n"; 2467 return false; 2468 } 2469 static const Regex Validator("^[a-zA-Z0-9_-]*$"); 2470 if (!Validator.match(Prefix)) { 2471 errs() << "error: supplied " << Kind << " prefix must start with a " 2472 << "letter and contain only alphanumeric characters, hyphens, and " 2473 << "underscores: '" << Prefix << "'\n"; 2474 return false; 2475 } 2476 if (!UniquePrefixes.insert(Prefix).second) { 2477 errs() << "error: supplied " << Kind << " prefix must be unique among " 2478 << "check and comment prefixes: '" << Prefix << "'\n"; 2479 return false; 2480 } 2481 } 2482 return true; 2483 } 2484 2485 static const char *DefaultCheckPrefixes[] = {"CHECK"}; 2486 static const char *DefaultCommentPrefixes[] = {"COM", "RUN"}; 2487 2488 bool FileCheck::ValidateCheckPrefixes() { 2489 StringSet<> UniquePrefixes; 2490 // Add default prefixes to catch user-supplied duplicates of them below. 2491 if (Req.CheckPrefixes.empty()) { 2492 for (const char *Prefix : DefaultCheckPrefixes) 2493 UniquePrefixes.insert(Prefix); 2494 } 2495 if (Req.CommentPrefixes.empty()) { 2496 for (const char *Prefix : DefaultCommentPrefixes) 2497 UniquePrefixes.insert(Prefix); 2498 } 2499 // Do not validate the default prefixes, or diagnostics about duplicates might 2500 // incorrectly indicate that they were supplied by the user. 2501 if (!ValidatePrefixes("check", UniquePrefixes, Req.CheckPrefixes)) 2502 return false; 2503 if (!ValidatePrefixes("comment", UniquePrefixes, Req.CommentPrefixes)) 2504 return false; 2505 return true; 2506 } 2507 2508 Regex FileCheck::buildCheckPrefixRegex() { 2509 if (Req.CheckPrefixes.empty()) { 2510 for (const char *Prefix : DefaultCheckPrefixes) 2511 Req.CheckPrefixes.push_back(Prefix); 2512 Req.IsDefaultCheckPrefix = true; 2513 } 2514 if (Req.CommentPrefixes.empty()) { 2515 for (const char *Prefix : DefaultCommentPrefixes) 2516 Req.CommentPrefixes.push_back(Prefix); 2517 } 2518 2519 // We already validated the contents of CheckPrefixes and CommentPrefixes so 2520 // just concatenate them as alternatives. 2521 SmallString<32> PrefixRegexStr; 2522 for (size_t I = 0, E = Req.CheckPrefixes.size(); I != E; ++I) { 2523 if (I != 0) 2524 PrefixRegexStr.push_back('|'); 2525 PrefixRegexStr.append(Req.CheckPrefixes[I]); 2526 } 2527 for (StringRef Prefix : Req.CommentPrefixes) { 2528 PrefixRegexStr.push_back('|'); 2529 PrefixRegexStr.append(Prefix); 2530 } 2531 2532 return Regex(PrefixRegexStr); 2533 } 2534 2535 Error FileCheckPatternContext::defineCmdlineVariables( 2536 ArrayRef<StringRef> CmdlineDefines, SourceMgr &SM) { 2537 assert(GlobalVariableTable.empty() && GlobalNumericVariableTable.empty() && 2538 "Overriding defined variable with command-line variable definitions"); 2539 2540 if (CmdlineDefines.empty()) 2541 return Error::success(); 2542 2543 // Create a string representing the vector of command-line definitions. Each 2544 // definition is on its own line and prefixed with a definition number to 2545 // clarify which definition a given diagnostic corresponds to. 2546 unsigned I = 0; 2547 Error Errs = Error::success(); 2548 std::string CmdlineDefsDiag; 2549 SmallVector<std::pair<size_t, size_t>, 4> CmdlineDefsIndices; 2550 for (StringRef CmdlineDef : CmdlineDefines) { 2551 std::string DefPrefix = ("Global define #" + Twine(++I) + ": ").str(); 2552 size_t EqIdx = CmdlineDef.find('='); 2553 if (EqIdx == StringRef::npos) { 2554 CmdlineDefsIndices.push_back(std::make_pair(CmdlineDefsDiag.size(), 0)); 2555 continue; 2556 } 2557 // Numeric variable definition. 2558 if (CmdlineDef[0] == '#') { 2559 // Append a copy of the command-line definition adapted to use the same 2560 // format as in the input file to be able to reuse 2561 // parseNumericSubstitutionBlock. 2562 CmdlineDefsDiag += (DefPrefix + CmdlineDef + " (parsed as: [[").str(); 2563 std::string SubstitutionStr = std::string(CmdlineDef); 2564 SubstitutionStr[EqIdx] = ':'; 2565 CmdlineDefsIndices.push_back( 2566 std::make_pair(CmdlineDefsDiag.size(), SubstitutionStr.size())); 2567 CmdlineDefsDiag += (SubstitutionStr + Twine("]])\n")).str(); 2568 } else { 2569 CmdlineDefsDiag += DefPrefix; 2570 CmdlineDefsIndices.push_back( 2571 std::make_pair(CmdlineDefsDiag.size(), CmdlineDef.size())); 2572 CmdlineDefsDiag += (CmdlineDef + "\n").str(); 2573 } 2574 } 2575 2576 // Create a buffer with fake command line content in order to display 2577 // parsing diagnostic with location information and point to the 2578 // global definition with invalid syntax. 2579 std::unique_ptr<MemoryBuffer> CmdLineDefsDiagBuffer = 2580 MemoryBuffer::getMemBufferCopy(CmdlineDefsDiag, "Global defines"); 2581 StringRef CmdlineDefsDiagRef = CmdLineDefsDiagBuffer->getBuffer(); 2582 SM.AddNewSourceBuffer(std::move(CmdLineDefsDiagBuffer), SMLoc()); 2583 2584 for (std::pair<size_t, size_t> CmdlineDefIndices : CmdlineDefsIndices) { 2585 StringRef CmdlineDef = CmdlineDefsDiagRef.substr(CmdlineDefIndices.first, 2586 CmdlineDefIndices.second); 2587 if (CmdlineDef.empty()) { 2588 Errs = joinErrors( 2589 std::move(Errs), 2590 ErrorDiagnostic::get(SM, CmdlineDef, 2591 "missing equal sign in global definition")); 2592 continue; 2593 } 2594 2595 // Numeric variable definition. 2596 if (CmdlineDef[0] == '#') { 2597 // Now parse the definition both to check that the syntax is correct and 2598 // to create the necessary class instance. 2599 StringRef CmdlineDefExpr = CmdlineDef.substr(1); 2600 Optional<NumericVariable *> DefinedNumericVariable; 2601 Expected<std::unique_ptr<Expression>> ExpressionResult = 2602 Pattern::parseNumericSubstitutionBlock( 2603 CmdlineDefExpr, DefinedNumericVariable, false, None, this, SM); 2604 if (!ExpressionResult) { 2605 Errs = joinErrors(std::move(Errs), ExpressionResult.takeError()); 2606 continue; 2607 } 2608 std::unique_ptr<Expression> Expression = std::move(*ExpressionResult); 2609 // Now evaluate the expression whose value this variable should be set 2610 // to, since the expression of a command-line variable definition should 2611 // only use variables defined earlier on the command-line. If not, this 2612 // is an error and we report it. 2613 Expected<ExpressionValue> Value = Expression->getAST()->eval(); 2614 if (!Value) { 2615 Errs = joinErrors(std::move(Errs), Value.takeError()); 2616 continue; 2617 } 2618 2619 assert(DefinedNumericVariable && "No variable defined"); 2620 (*DefinedNumericVariable)->setValue(*Value); 2621 2622 // Record this variable definition. 2623 GlobalNumericVariableTable[(*DefinedNumericVariable)->getName()] = 2624 *DefinedNumericVariable; 2625 } else { 2626 // String variable definition. 2627 std::pair<StringRef, StringRef> CmdlineNameVal = CmdlineDef.split('='); 2628 StringRef CmdlineName = CmdlineNameVal.first; 2629 StringRef OrigCmdlineName = CmdlineName; 2630 Expected<Pattern::VariableProperties> ParseVarResult = 2631 Pattern::parseVariable(CmdlineName, SM); 2632 if (!ParseVarResult) { 2633 Errs = joinErrors(std::move(Errs), ParseVarResult.takeError()); 2634 continue; 2635 } 2636 // Check that CmdlineName does not denote a pseudo variable is only 2637 // composed of the parsed numeric variable. This catches cases like 2638 // "FOO+2" in a "FOO+2=10" definition. 2639 if (ParseVarResult->IsPseudo || !CmdlineName.empty()) { 2640 Errs = joinErrors(std::move(Errs), 2641 ErrorDiagnostic::get( 2642 SM, OrigCmdlineName, 2643 "invalid name in string variable definition '" + 2644 OrigCmdlineName + "'")); 2645 continue; 2646 } 2647 StringRef Name = ParseVarResult->Name; 2648 2649 // Detect collisions between string and numeric variables when the former 2650 // is created later than the latter. 2651 if (GlobalNumericVariableTable.find(Name) != 2652 GlobalNumericVariableTable.end()) { 2653 Errs = joinErrors(std::move(Errs), 2654 ErrorDiagnostic::get(SM, Name, 2655 "numeric variable with name '" + 2656 Name + "' already exists")); 2657 continue; 2658 } 2659 GlobalVariableTable.insert(CmdlineNameVal); 2660 // Mark the string variable as defined to detect collisions between 2661 // string and numeric variables in defineCmdlineVariables when the latter 2662 // is created later than the former. We cannot reuse GlobalVariableTable 2663 // for this by populating it with an empty string since we would then 2664 // lose the ability to detect the use of an undefined variable in 2665 // match(). 2666 DefinedVariableTable[Name] = true; 2667 } 2668 } 2669 2670 return Errs; 2671 } 2672 2673 void FileCheckPatternContext::clearLocalVars() { 2674 SmallVector<StringRef, 16> LocalPatternVars, LocalNumericVars; 2675 for (const StringMapEntry<StringRef> &Var : GlobalVariableTable) 2676 if (Var.first()[0] != '$') 2677 LocalPatternVars.push_back(Var.first()); 2678 2679 // Numeric substitution reads the value of a variable directly, not via 2680 // GlobalNumericVariableTable. Therefore, we clear local variables by 2681 // clearing their value which will lead to a numeric substitution failure. We 2682 // also mark the variable for removal from GlobalNumericVariableTable since 2683 // this is what defineCmdlineVariables checks to decide that no global 2684 // variable has been defined. 2685 for (const auto &Var : GlobalNumericVariableTable) 2686 if (Var.first()[0] != '$') { 2687 Var.getValue()->clearValue(); 2688 LocalNumericVars.push_back(Var.first()); 2689 } 2690 2691 for (const auto &Var : LocalPatternVars) 2692 GlobalVariableTable.erase(Var); 2693 for (const auto &Var : LocalNumericVars) 2694 GlobalNumericVariableTable.erase(Var); 2695 } 2696 2697 bool FileCheck::checkInput(SourceMgr &SM, StringRef Buffer, 2698 std::vector<FileCheckDiag> *Diags) { 2699 bool ChecksFailed = false; 2700 2701 unsigned i = 0, j = 0, e = CheckStrings->size(); 2702 while (true) { 2703 StringRef CheckRegion; 2704 if (j == e) { 2705 CheckRegion = Buffer; 2706 } else { 2707 const FileCheckString &CheckLabelStr = (*CheckStrings)[j]; 2708 if (CheckLabelStr.Pat.getCheckTy() != Check::CheckLabel) { 2709 ++j; 2710 continue; 2711 } 2712 2713 // Scan to next CHECK-LABEL match, ignoring CHECK-NOT and CHECK-DAG 2714 size_t MatchLabelLen = 0; 2715 size_t MatchLabelPos = 2716 CheckLabelStr.Check(SM, Buffer, true, MatchLabelLen, Req, Diags); 2717 if (MatchLabelPos == StringRef::npos) 2718 // Immediately bail if CHECK-LABEL fails, nothing else we can do. 2719 return false; 2720 2721 CheckRegion = Buffer.substr(0, MatchLabelPos + MatchLabelLen); 2722 Buffer = Buffer.substr(MatchLabelPos + MatchLabelLen); 2723 ++j; 2724 } 2725 2726 // Do not clear the first region as it's the one before the first 2727 // CHECK-LABEL and it would clear variables defined on the command-line 2728 // before they get used. 2729 if (i != 0 && Req.EnableVarScope) 2730 PatternContext->clearLocalVars(); 2731 2732 for (; i != j; ++i) { 2733 const FileCheckString &CheckStr = (*CheckStrings)[i]; 2734 2735 // Check each string within the scanned region, including a second check 2736 // of any final CHECK-LABEL (to verify CHECK-NOT and CHECK-DAG) 2737 size_t MatchLen = 0; 2738 size_t MatchPos = 2739 CheckStr.Check(SM, CheckRegion, false, MatchLen, Req, Diags); 2740 2741 if (MatchPos == StringRef::npos) { 2742 ChecksFailed = true; 2743 i = j; 2744 break; 2745 } 2746 2747 CheckRegion = CheckRegion.substr(MatchPos + MatchLen); 2748 } 2749 2750 if (j == e) 2751 break; 2752 } 2753 2754 // Success if no checks failed. 2755 return !ChecksFailed; 2756 } 2757