xref: /llvm-project/llvm/unittests/Support/RegexTest.cpp (revision d1d2932cb1dc24e0c8149f07b75599981ac405a7)
1 //===- llvm/unittest/Support/RegexTest.cpp - Regex tests --===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "llvm/Support/Regex.h"
10 #include "llvm/ADT/SmallVector.h"
11 #include "gtest/gtest.h"
12 #include <cstring>
13 
14 using namespace llvm;
15 namespace {
16 
17 class RegexTest : public ::testing::Test {
18 };
19 
TEST_F(RegexTest,Basics)20 TEST_F(RegexTest, Basics) {
21   Regex r1("^[0-9]+$");
22   EXPECT_TRUE(r1.match("916"));
23   EXPECT_TRUE(r1.match("9"));
24   EXPECT_FALSE(r1.match("9a"));
25 
26   SmallVector<StringRef, 1> Matches;
27   Regex r2("[0-9]+");
28   EXPECT_TRUE(r2.match("aa216b", &Matches));
29   EXPECT_EQ(1u, Matches.size());
30   EXPECT_EQ("216", Matches[0].str());
31 
32   Regex r3("[0-9]+([a-f])?:([0-9]+)");
33   EXPECT_TRUE(r3.match("9a:513b", &Matches));
34   EXPECT_EQ(3u, Matches.size());
35   EXPECT_EQ("9a:513", Matches[0].str());
36   EXPECT_EQ("a", Matches[1].str());
37   EXPECT_EQ("513", Matches[2].str());
38 
39   EXPECT_TRUE(r3.match("9:513b", &Matches));
40   EXPECT_EQ(3u, Matches.size());
41   EXPECT_EQ("9:513", Matches[0].str());
42   EXPECT_EQ("", Matches[1].str());
43   EXPECT_EQ("513", Matches[2].str());
44 
45   Regex r4("a[^b]+b");
46   std::string String="axxb";
47   String[2] = '\0';
48   EXPECT_FALSE(r4.match("abb"));
49   EXPECT_TRUE(r4.match(String, &Matches));
50   EXPECT_EQ(1u, Matches.size());
51   EXPECT_EQ(String, Matches[0].str());
52 
53   std::string NulPattern="X[0-9]+X([a-f])?:([0-9]+)";
54   String="YX99a:513b";
55   NulPattern[7] = '\0';
56   Regex r5(NulPattern);
57   EXPECT_FALSE(r5.match(String));
58   EXPECT_FALSE(r5.match("X9"));
59   String[3]='\0';
60   EXPECT_TRUE(r5.match(String));
61 }
62 
TEST_F(RegexTest,EmptyPattern)63 TEST_F(RegexTest, EmptyPattern) {
64   // The empty pattern doesn't match anything -- not even the empty string.
65   // (This is different from some other regex implementations.)
66   Regex r("");
67   EXPECT_FALSE(r.match("123"));
68   EXPECT_FALSE(r.match(""));
69 }
70 
TEST_F(RegexTest,Backreferences)71 TEST_F(RegexTest, Backreferences) {
72   Regex r1("([a-z]+)_\\1");
73   SmallVector<StringRef, 4> Matches;
74   EXPECT_TRUE(r1.match("abc_abc", &Matches));
75   EXPECT_EQ(2u, Matches.size());
76   EXPECT_FALSE(r1.match("abc_ab", &Matches));
77 
78   Regex r2("a([0-9])b\\1c\\1");
79   EXPECT_TRUE(r2.match("a4b4c4", &Matches));
80   EXPECT_EQ(2u, Matches.size());
81   EXPECT_EQ("4", Matches[1].str());
82   EXPECT_FALSE(r2.match("a2b2c3"));
83 
84   Regex r3("a([0-9])([a-z])b\\1\\2");
85   EXPECT_TRUE(r3.match("a6zb6z", &Matches));
86   EXPECT_EQ(3u, Matches.size());
87   EXPECT_EQ("6", Matches[1].str());
88   EXPECT_EQ("z", Matches[2].str());
89   EXPECT_FALSE(r3.match("a6zb6y"));
90   EXPECT_FALSE(r3.match("a6zb7z"));
91 
92   Regex r4("(abc|xyz|uvw)_\\1");
93   EXPECT_TRUE(r4.match("abc_abc", &Matches));
94   EXPECT_EQ(2u, Matches.size());
95   EXPECT_FALSE(r4.match("abc_ab", &Matches));
96   EXPECT_FALSE(r4.match("abc_xyz", &Matches));
97 
98   Regex r5("(xyz|abc|uvw)_\\1");
99   EXPECT_TRUE(r5.match("abc_abc", &Matches));
100   EXPECT_EQ(2u, Matches.size());
101   EXPECT_FALSE(r5.match("abc_ab", &Matches));
102   EXPECT_FALSE(r5.match("abc_xyz", &Matches));
103 
104   Regex r6("(xyz|uvw|abc)_\\1");
105   EXPECT_TRUE(r6.match("abc_abc", &Matches));
106   EXPECT_EQ(2u, Matches.size());
107   EXPECT_FALSE(r6.match("abc_ab", &Matches));
108   EXPECT_FALSE(r6.match("abc_xyz", &Matches));
109 }
110 
TEST_F(RegexTest,Substitution)111 TEST_F(RegexTest, Substitution) {
112   std::string Error;
113 
114   EXPECT_EQ("aNUMber", Regex("[0-9]+").sub("NUM", "a1234ber"));
115 
116   // Standard Escapes
117   EXPECT_EQ("a\\ber", Regex("[0-9]+").sub("\\\\", "a1234ber", &Error));
118   EXPECT_EQ("", Error);
119   EXPECT_EQ("a\nber", Regex("[0-9]+").sub("\\n", "a1234ber", &Error));
120   EXPECT_EQ("", Error);
121   EXPECT_EQ("a\tber", Regex("[0-9]+").sub("\\t", "a1234ber", &Error));
122   EXPECT_EQ("", Error);
123   EXPECT_EQ("ajber", Regex("[0-9]+").sub("\\j", "a1234ber", &Error));
124   EXPECT_EQ("", Error);
125 
126   EXPECT_EQ("aber", Regex("[0-9]+").sub("\\", "a1234ber", &Error));
127   EXPECT_EQ(Error, "replacement string contained trailing backslash");
128 
129   // Backreferences
130   EXPECT_EQ("aa1234bber", Regex("a[0-9]+b").sub("a\\0b", "a1234ber", &Error));
131   EXPECT_EQ("", Error);
132 
133   EXPECT_EQ("a1234ber", Regex("a([0-9]+)b").sub("a\\1b", "a1234ber", &Error));
134   EXPECT_EQ("", Error);
135 
136   EXPECT_EQ("aber", Regex("a[0-9]+b").sub("a\\100b", "a1234ber", &Error));
137   EXPECT_EQ(Error, "invalid backreference string '100'");
138 
139   EXPECT_EQ("012345", Regex("a([0-9]+).*").sub("0\\g<1>5", "a1234ber", &Error));
140   EXPECT_EQ("", Error);
141 
142   EXPECT_EQ("0a1234ber5",
143             Regex("a([0-9]+).*").sub("0\\g<0>5", "a1234ber", &Error));
144   EXPECT_EQ("", Error);
145 
146   EXPECT_EQ("0A5", Regex("a(.)(.)(.)(.)(.)(.)(.)(.)(.)(.).*")
147                        .sub("0\\g<10>5", "a123456789Aber", &Error));
148   EXPECT_EQ("", Error);
149 
150   EXPECT_EQ("0g<-1>5",
151             Regex("a([0-9]+).*").sub("0\\g<-1>5", "a1234ber", &Error));
152   EXPECT_EQ("", Error);
153 
154   EXPECT_EQ("0g<15", Regex("a([0-9]+).*").sub("0\\g<15", "a1234ber", &Error));
155   EXPECT_EQ("", Error);
156 
157   EXPECT_EQ("0g<>15", Regex("a([0-9]+).*").sub("0\\g<>15", "a1234ber", &Error));
158   EXPECT_EQ("", Error);
159 
160   EXPECT_EQ("0g<3e>1",
161             Regex("a([0-9]+).*").sub("0\\g<3e>1", "a1234ber", &Error));
162   EXPECT_EQ("", Error);
163 
164   EXPECT_EQ("aber", Regex("a([0-9]+)b").sub("a\\g<100>b", "a1234ber", &Error));
165   EXPECT_EQ(Error, "invalid backreference string 'g<100>'");
166 }
167 
TEST_F(RegexTest,IsLiteralERE)168 TEST_F(RegexTest, IsLiteralERE) {
169   EXPECT_TRUE(Regex::isLiteralERE("abc"));
170   EXPECT_FALSE(Regex::isLiteralERE("a(bc)"));
171   EXPECT_FALSE(Regex::isLiteralERE("^abc"));
172   EXPECT_FALSE(Regex::isLiteralERE("abc$"));
173   EXPECT_FALSE(Regex::isLiteralERE("a|bc"));
174   EXPECT_FALSE(Regex::isLiteralERE("abc*"));
175   EXPECT_FALSE(Regex::isLiteralERE("abc+"));
176   EXPECT_FALSE(Regex::isLiteralERE("abc?"));
177   EXPECT_FALSE(Regex::isLiteralERE("abc."));
178   EXPECT_FALSE(Regex::isLiteralERE("a[bc]"));
179   EXPECT_FALSE(Regex::isLiteralERE("abc\\1"));
180   EXPECT_FALSE(Regex::isLiteralERE("abc{1,2}"));
181 }
182 
TEST_F(RegexTest,Escape)183 TEST_F(RegexTest, Escape) {
184   EXPECT_EQ("a\\[bc\\]", Regex::escape("a[bc]"));
185   EXPECT_EQ("abc\\{1\\\\,2\\}", Regex::escape("abc{1\\,2}"));
186 }
187 
TEST_F(RegexTest,IsValid)188 TEST_F(RegexTest, IsValid) {
189   std::string Error;
190   EXPECT_FALSE(Regex("(foo").isValid(Error));
191   EXPECT_EQ("parentheses not balanced", Error);
192   EXPECT_FALSE(Regex("a[b-").isValid(Error));
193   EXPECT_EQ("invalid character range", Error);
194 }
195 
TEST_F(RegexTest,MoveConstruct)196 TEST_F(RegexTest, MoveConstruct) {
197   Regex r1("^[0-9]+$");
198   Regex r2(std::move(r1));
199   EXPECT_TRUE(r2.match("916"));
200 }
201 
TEST_F(RegexTest,MoveAssign)202 TEST_F(RegexTest, MoveAssign) {
203   Regex r1("^[0-9]+$");
204   Regex r2("abc");
205   r2 = std::move(r1);
206   EXPECT_TRUE(r2.match("916"));
207   std::string Error;
208   EXPECT_FALSE(r1.isValid(Error));
209 }
210 
TEST_F(RegexTest,NoArgConstructor)211 TEST_F(RegexTest, NoArgConstructor) {
212   std::string Error;
213   Regex r1;
214   EXPECT_FALSE(r1.isValid(Error));
215   EXPECT_EQ("invalid regular expression", Error);
216   r1 = Regex("abc");
217   EXPECT_TRUE(r1.isValid(Error));
218 }
219 
TEST_F(RegexTest,MatchInvalid)220 TEST_F(RegexTest, MatchInvalid) {
221   Regex r1;
222   std::string Error;
223   EXPECT_FALSE(r1.isValid(Error));
224   EXPECT_FALSE(r1.match("X"));
225 }
226 
227 // https://bugs.chromium.org/p/oss-fuzz/issues/detail?id=3727
TEST_F(RegexTest,OssFuzz3727Regression)228 TEST_F(RegexTest, OssFuzz3727Regression) {
229   // Wrap in a StringRef so the NUL byte doesn't terminate the string
230   Regex r(StringRef("[[[=GS\x00[=][", 10));
231   std::string Error;
232   EXPECT_FALSE(r.isValid(Error));
233 }
234 
235 }
236 
TEST_F(RegexTest,NullStringInput)237 TEST_F(RegexTest, NullStringInput) {
238   Regex r("^$");
239   // String data points to nullptr in default constructor
240   StringRef String;
241   EXPECT_TRUE(r.match(String));
242 }
243