1 //===- llvm/unittest/Support/RegexTest.cpp - Regex tests --===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #include "llvm/Support/Regex.h" 10 #include "llvm/ADT/SmallVector.h" 11 #include "gtest/gtest.h" 12 #include <cstring> 13 14 using namespace llvm; 15 namespace { 16 17 class RegexTest : public ::testing::Test { 18 }; 19 20 TEST_F(RegexTest, Basics) { 21 Regex r1("^[0-9]+$"); 22 EXPECT_TRUE(r1.match("916")); 23 EXPECT_TRUE(r1.match("9")); 24 EXPECT_FALSE(r1.match("9a")); 25 26 SmallVector<StringRef, 1> Matches; 27 Regex r2("[0-9]+"); 28 EXPECT_TRUE(r2.match("aa216b", &Matches)); 29 EXPECT_EQ(1u, Matches.size()); 30 EXPECT_EQ("216", Matches[0].str()); 31 32 Regex r3("[0-9]+([a-f])?:([0-9]+)"); 33 EXPECT_TRUE(r3.match("9a:513b", &Matches)); 34 EXPECT_EQ(3u, Matches.size()); 35 EXPECT_EQ("9a:513", Matches[0].str()); 36 EXPECT_EQ("a", Matches[1].str()); 37 EXPECT_EQ("513", Matches[2].str()); 38 39 EXPECT_TRUE(r3.match("9:513b", &Matches)); 40 EXPECT_EQ(3u, Matches.size()); 41 EXPECT_EQ("9:513", Matches[0].str()); 42 EXPECT_EQ("", Matches[1].str()); 43 EXPECT_EQ("513", Matches[2].str()); 44 45 Regex r4("a[^b]+b"); 46 std::string String="axxb"; 47 String[2] = '\0'; 48 EXPECT_FALSE(r4.match("abb")); 49 EXPECT_TRUE(r4.match(String, &Matches)); 50 EXPECT_EQ(1u, Matches.size()); 51 EXPECT_EQ(String, Matches[0].str()); 52 53 std::string NulPattern="X[0-9]+X([a-f])?:([0-9]+)"; 54 String="YX99a:513b"; 55 NulPattern[7] = '\0'; 56 Regex r5(NulPattern); 57 EXPECT_FALSE(r5.match(String)); 58 EXPECT_FALSE(r5.match("X9")); 59 String[3]='\0'; 60 EXPECT_TRUE(r5.match(String)); 61 } 62 63 TEST_F(RegexTest, Backreferences) { 64 Regex r1("([a-z]+)_\\1"); 65 SmallVector<StringRef, 4> Matches; 66 EXPECT_TRUE(r1.match("abc_abc", &Matches)); 67 EXPECT_EQ(2u, Matches.size()); 68 EXPECT_FALSE(r1.match("abc_ab", &Matches)); 69 70 Regex r2("a([0-9])b\\1c\\1"); 71 EXPECT_TRUE(r2.match("a4b4c4", &Matches)); 72 EXPECT_EQ(2u, Matches.size()); 73 EXPECT_EQ("4", Matches[1].str()); 74 EXPECT_FALSE(r2.match("a2b2c3")); 75 76 Regex r3("a([0-9])([a-z])b\\1\\2"); 77 EXPECT_TRUE(r3.match("a6zb6z", &Matches)); 78 EXPECT_EQ(3u, Matches.size()); 79 EXPECT_EQ("6", Matches[1].str()); 80 EXPECT_EQ("z", Matches[2].str()); 81 EXPECT_FALSE(r3.match("a6zb6y")); 82 EXPECT_FALSE(r3.match("a6zb7z")); 83 84 Regex r4("(abc|xyz|uvw)_\\1"); 85 EXPECT_TRUE(r4.match("abc_abc", &Matches)); 86 EXPECT_EQ(2u, Matches.size()); 87 EXPECT_FALSE(r4.match("abc_ab", &Matches)); 88 EXPECT_FALSE(r4.match("abc_xyz", &Matches)); 89 90 Regex r5("(xyz|abc|uvw)_\\1"); 91 EXPECT_TRUE(r5.match("abc_abc", &Matches)); 92 EXPECT_EQ(2u, Matches.size()); 93 EXPECT_FALSE(r5.match("abc_ab", &Matches)); 94 EXPECT_FALSE(r5.match("abc_xyz", &Matches)); 95 96 Regex r6("(xyz|uvw|abc)_\\1"); 97 EXPECT_TRUE(r6.match("abc_abc", &Matches)); 98 EXPECT_EQ(2u, Matches.size()); 99 EXPECT_FALSE(r6.match("abc_ab", &Matches)); 100 EXPECT_FALSE(r6.match("abc_xyz", &Matches)); 101 } 102 103 TEST_F(RegexTest, Substitution) { 104 std::string Error; 105 106 EXPECT_EQ("aNUMber", Regex("[0-9]+").sub("NUM", "a1234ber")); 107 108 // Standard Escapes 109 EXPECT_EQ("a\\ber", Regex("[0-9]+").sub("\\\\", "a1234ber", &Error)); 110 EXPECT_EQ("", Error); 111 EXPECT_EQ("a\nber", Regex("[0-9]+").sub("\\n", "a1234ber", &Error)); 112 EXPECT_EQ("", Error); 113 EXPECT_EQ("a\tber", Regex("[0-9]+").sub("\\t", "a1234ber", &Error)); 114 EXPECT_EQ("", Error); 115 EXPECT_EQ("ajber", Regex("[0-9]+").sub("\\j", "a1234ber", &Error)); 116 EXPECT_EQ("", Error); 117 118 EXPECT_EQ("aber", Regex("[0-9]+").sub("\\", "a1234ber", &Error)); 119 EXPECT_EQ(Error, "replacement string contained trailing backslash"); 120 121 // Backreferences 122 EXPECT_EQ("aa1234bber", Regex("a[0-9]+b").sub("a\\0b", "a1234ber", &Error)); 123 EXPECT_EQ("", Error); 124 125 EXPECT_EQ("a1234ber", Regex("a([0-9]+)b").sub("a\\1b", "a1234ber", &Error)); 126 EXPECT_EQ("", Error); 127 128 EXPECT_EQ("aber", Regex("a[0-9]+b").sub("a\\100b", "a1234ber", &Error)); 129 EXPECT_EQ(Error, "invalid backreference string '100'"); 130 131 EXPECT_EQ("012345", Regex("a([0-9]+).*").sub("0\\g<1>5", "a1234ber", &Error)); 132 EXPECT_EQ("", Error); 133 134 EXPECT_EQ("0a1234ber5", 135 Regex("a([0-9]+).*").sub("0\\g<0>5", "a1234ber", &Error)); 136 EXPECT_EQ("", Error); 137 138 EXPECT_EQ("0A5", Regex("a(.)(.)(.)(.)(.)(.)(.)(.)(.)(.).*") 139 .sub("0\\g<10>5", "a123456789Aber", &Error)); 140 EXPECT_EQ("", Error); 141 142 EXPECT_EQ("0g<-1>5", 143 Regex("a([0-9]+).*").sub("0\\g<-1>5", "a1234ber", &Error)); 144 EXPECT_EQ("", Error); 145 146 EXPECT_EQ("0g<15", Regex("a([0-9]+).*").sub("0\\g<15", "a1234ber", &Error)); 147 EXPECT_EQ("", Error); 148 149 EXPECT_EQ("0g<>15", Regex("a([0-9]+).*").sub("0\\g<>15", "a1234ber", &Error)); 150 EXPECT_EQ("", Error); 151 152 EXPECT_EQ("0g<3e>1", 153 Regex("a([0-9]+).*").sub("0\\g<3e>1", "a1234ber", &Error)); 154 EXPECT_EQ("", Error); 155 156 EXPECT_EQ("aber", Regex("a([0-9]+)b").sub("a\\g<100>b", "a1234ber", &Error)); 157 EXPECT_EQ(Error, "invalid backreference string 'g<100>'"); 158 } 159 160 TEST_F(RegexTest, IsLiteralERE) { 161 EXPECT_TRUE(Regex::isLiteralERE("abc")); 162 EXPECT_FALSE(Regex::isLiteralERE("a(bc)")); 163 EXPECT_FALSE(Regex::isLiteralERE("^abc")); 164 EXPECT_FALSE(Regex::isLiteralERE("abc$")); 165 EXPECT_FALSE(Regex::isLiteralERE("a|bc")); 166 EXPECT_FALSE(Regex::isLiteralERE("abc*")); 167 EXPECT_FALSE(Regex::isLiteralERE("abc+")); 168 EXPECT_FALSE(Regex::isLiteralERE("abc?")); 169 EXPECT_FALSE(Regex::isLiteralERE("abc.")); 170 EXPECT_FALSE(Regex::isLiteralERE("a[bc]")); 171 EXPECT_FALSE(Regex::isLiteralERE("abc\\1")); 172 EXPECT_FALSE(Regex::isLiteralERE("abc{1,2}")); 173 } 174 175 TEST_F(RegexTest, Escape) { 176 EXPECT_EQ("a\\[bc\\]", Regex::escape("a[bc]")); 177 EXPECT_EQ("abc\\{1\\\\,2\\}", Regex::escape("abc{1\\,2}")); 178 } 179 180 TEST_F(RegexTest, IsValid) { 181 std::string Error; 182 EXPECT_FALSE(Regex("(foo").isValid(Error)); 183 EXPECT_EQ("parentheses not balanced", Error); 184 EXPECT_FALSE(Regex("a[b-").isValid(Error)); 185 EXPECT_EQ("invalid character range", Error); 186 } 187 188 TEST_F(RegexTest, MoveConstruct) { 189 Regex r1("^[0-9]+$"); 190 Regex r2(std::move(r1)); 191 EXPECT_TRUE(r2.match("916")); 192 } 193 194 TEST_F(RegexTest, MoveAssign) { 195 Regex r1("^[0-9]+$"); 196 Regex r2("abc"); 197 r2 = std::move(r1); 198 EXPECT_TRUE(r2.match("916")); 199 std::string Error; 200 EXPECT_FALSE(r1.isValid(Error)); 201 } 202 203 TEST_F(RegexTest, NoArgConstructor) { 204 std::string Error; 205 Regex r1; 206 EXPECT_FALSE(r1.isValid(Error)); 207 EXPECT_EQ("invalid regular expression", Error); 208 r1 = Regex("abc"); 209 EXPECT_TRUE(r1.isValid(Error)); 210 } 211 212 TEST_F(RegexTest, MatchInvalid) { 213 Regex r1; 214 std::string Error; 215 EXPECT_FALSE(r1.isValid(Error)); 216 EXPECT_FALSE(r1.match("X")); 217 } 218 219 // https://bugs.chromium.org/p/oss-fuzz/issues/detail?id=3727 220 TEST_F(RegexTest, OssFuzz3727Regression) { 221 // Wrap in a StringRef so the NUL byte doesn't terminate the string 222 Regex r(StringRef("[[[=GS\x00[=][", 10)); 223 std::string Error; 224 EXPECT_FALSE(r.isValid(Error)); 225 } 226 227 } 228 229 TEST_F(RegexTest, NullStringInput) { 230 Regex r("^$"); 231 // String data points to nullptr in default constructor 232 StringRef String; 233 EXPECT_TRUE(r.match(String)); 234 } 235