xref: /netbsd-src/external/gpl3/gcc/dist/libphobos/src/std/regex/internal/tests2.d (revision b1e838363e3c6fc78a55519254d99869742dd33c)
1*b1e83836Smrg // Split-up due to DMD's enormous memory consumption
2*b1e83836Smrg 
3*b1e83836Smrg module std.regex.internal.tests2;
4*b1e83836Smrg 
5*b1e83836Smrg package(std.regex):
6*b1e83836Smrg 
7*b1e83836Smrg import std.conv, std.exception, std.meta, std.range,
8*b1e83836Smrg     std.typecons, std.regex;
9*b1e83836Smrg 
10*b1e83836Smrg import std.uni : Escapables; // characters that need escaping
11*b1e83836Smrg 
12*b1e83836Smrg @safe unittest
13*b1e83836Smrg {
14*b1e83836Smrg     auto cr = ctRegex!("abc");
15*b1e83836Smrg     assert(bmatch("abc",cr).hit == "abc");
16*b1e83836Smrg     auto cr2 = ctRegex!("ab*c");
17*b1e83836Smrg     assert(bmatch("abbbbc",cr2).hit == "abbbbc");
18*b1e83836Smrg }
19*b1e83836Smrg @safe unittest
20*b1e83836Smrg {
21*b1e83836Smrg     auto cr3 = ctRegex!("^abc$");
22*b1e83836Smrg     assert(bmatch("abc",cr3).hit == "abc");
23*b1e83836Smrg     auto cr4 = ctRegex!(`\b(a\B[a-z]b)\b`);
24*b1e83836Smrg     assert(array(match("azb",cr4).captures) == ["azb", "azb"]);
25*b1e83836Smrg }
26*b1e83836Smrg 
27*b1e83836Smrg @safe unittest
28*b1e83836Smrg {
29*b1e83836Smrg     auto cr5 = ctRegex!("(?:a{2,4}b{1,3}){1,2}");
30*b1e83836Smrg     assert(bmatch("aaabaaaabbb", cr5).hit == "aaabaaaabbb");
31*b1e83836Smrg     auto cr6 = ctRegex!("(?:a{2,4}b{1,3}){1,2}?"w);
32*b1e83836Smrg     assert(bmatch("aaabaaaabbb"w,  cr6).hit == "aaab"w);
33*b1e83836Smrg }
34*b1e83836Smrg 
35*b1e83836Smrg @safe unittest
36*b1e83836Smrg {
37*b1e83836Smrg     auto cr7 = ctRegex!(`\r.*?$`,"sm");
38*b1e83836Smrg     assert(bmatch("abc\r\nxy",  cr7).hit == "\r\nxy");
39*b1e83836Smrg     auto greed =  ctRegex!("<packet.*?/packet>");
40*b1e83836Smrg     assert(bmatch("<packet>text</packet><packet>text</packet>", greed).hit
41*b1e83836Smrg             == "<packet>text</packet>");
42*b1e83836Smrg }
43*b1e83836Smrg 
44*b1e83836Smrg @safe unittest
45*b1e83836Smrg {
46*b1e83836Smrg     import std.algorithm.comparison : equal;
47*b1e83836Smrg     auto cr8 = ctRegex!("^(a)(b)?(c*)");
48*b1e83836Smrg     auto m8 = bmatch("abcc",cr8);
49*b1e83836Smrg     assert(m8);
50*b1e83836Smrg     assert(m8.captures[1] == "a");
51*b1e83836Smrg     assert(m8.captures[2] == "b");
52*b1e83836Smrg     assert(m8.captures[3] == "cc");
53*b1e83836Smrg     auto cr9 = ctRegex!("q(a|b)*q");
54*b1e83836Smrg     auto m9 = match("xxqababqyy",cr9);
55*b1e83836Smrg     assert(m9);
56*b1e83836Smrg     assert(equal(bmatch("xxqababqyy",cr9).captures, ["qababq", "b"]));
57*b1e83836Smrg }
58*b1e83836Smrg 
59*b1e83836Smrg @safe unittest
60*b1e83836Smrg {
61*b1e83836Smrg     import std.algorithm.comparison : equal;
62*b1e83836Smrg     auto rtr = regex("a|b|c");
63*b1e83836Smrg     static ctr = regex("a|b|c");
64*b1e83836Smrg     assert(equal(rtr.ir,ctr.ir));
65*b1e83836Smrg     //CTFE parser BUG is triggered by group
66*b1e83836Smrg     //in the middle of alternation (at least not first and not last)
67*b1e83836Smrg     static testCT = regex(`abc|(edf)|xyz`);
68*b1e83836Smrg     auto testRT = regex(`abc|(edf)|xyz`);
69*b1e83836Smrg     assert(equal(testCT.ir,testRT.ir));
70*b1e83836Smrg }
71*b1e83836Smrg 
72*b1e83836Smrg @safe unittest
73*b1e83836Smrg {
74*b1e83836Smrg     import std.algorithm.comparison : equal;
75*b1e83836Smrg     import std.algorithm.iteration : map;
76*b1e83836Smrg     enum cx = ctRegex!"(A|B|C)";
77*b1e83836Smrg     auto mx = match("B",cx);
78*b1e83836Smrg     assert(mx);
79*b1e83836Smrg     assert(equal(mx.captures, [ "B", "B"]));
80*b1e83836Smrg     enum cx2 = ctRegex!"(A|B)*";
81*b1e83836Smrg     assert(match("BAAA",cx2));
82*b1e83836Smrg 
83*b1e83836Smrg     enum cx3 = ctRegex!("a{3,4}","i");
84*b1e83836Smrg     auto mx3 = match("AaA",cx3);
85*b1e83836Smrg     assert(mx3);
86*b1e83836Smrg     assert(mx3.captures[0] == "AaA");
87*b1e83836Smrg     enum cx4 = ctRegex!(`^a{3,4}?[a-zA-Z0-9~]{1,2}`,"i");
88*b1e83836Smrg     auto mx4 = match("aaaabc", cx4);
89*b1e83836Smrg     assert(mx4);
90*b1e83836Smrg     assert(mx4.captures[0] == "aaaab");
91*b1e83836Smrg     auto cr8 = ctRegex!("(a)(b)?(c*)");
92*b1e83836Smrg     auto m8 = bmatch("abcc",cr8);
93*b1e83836Smrg     assert(m8);
94*b1e83836Smrg     assert(m8.captures[1] == "a");
95*b1e83836Smrg     assert(m8.captures[2] == "b");
96*b1e83836Smrg     assert(m8.captures[3] == "cc");
97*b1e83836Smrg     auto cr9 = ctRegex!(".*$", "gm");
98*b1e83836Smrg     auto m9 = match("First\rSecond", cr9);
99*b1e83836Smrg     assert(m9);
100*b1e83836Smrg     assert(equal(map!"a.hit"(m9), ["First", "", "Second"]));
101*b1e83836Smrg }
102*b1e83836Smrg 
103*b1e83836Smrg @safe unittest
104*b1e83836Smrg {
105*b1e83836Smrg     import std.algorithm.comparison : equal;
106*b1e83836Smrg     import std.algorithm.iteration : map;
107*b1e83836Smrg //global matching
test_body(alias matchFn)108*b1e83836Smrg     void test_body(alias matchFn)()
109*b1e83836Smrg     {
110*b1e83836Smrg         string s = "a quick brown fox jumps over a lazy dog";
111*b1e83836Smrg         auto r1 = regex("\\b[a-z]+\\b","g");
112*b1e83836Smrg         string[] test;
113*b1e83836Smrg         foreach (m; matchFn(s, r1))
114*b1e83836Smrg             test ~= m.hit;
115*b1e83836Smrg         assert(equal(test, [ "a", "quick", "brown", "fox", "jumps", "over", "a", "lazy", "dog"]));
116*b1e83836Smrg         auto free_reg = regex(`
117*b1e83836Smrg 
118*b1e83836Smrg             abc
119*b1e83836Smrg             \s+
120*b1e83836Smrg             "
121*b1e83836Smrg             (
122*b1e83836Smrg                     [^"]+
123*b1e83836Smrg                 |   \\ "
124*b1e83836Smrg             )+
125*b1e83836Smrg             "
126*b1e83836Smrg             z
127*b1e83836Smrg         `, "x");
128*b1e83836Smrg         auto m = match(`abc  "quoted string with \" inside"z`,free_reg);
129*b1e83836Smrg         assert(m);
130*b1e83836Smrg         string mails = " hey@you.com no@spam.net ";
131*b1e83836Smrg         auto rm = regex(`@(?<=\S+@)\S+`,"g");
132*b1e83836Smrg         assert(equal(map!"a[0]"(matchFn(mails, rm)), ["@you.com", "@spam.net"]));
133*b1e83836Smrg         auto m2 = matchFn("First line\nSecond line",regex(".*$","gm"));
134*b1e83836Smrg         assert(equal(map!"a[0]"(m2), ["First line", "", "Second line"]));
135*b1e83836Smrg         auto m2a = matchFn("First line\nSecond line",regex(".+$","gm"));
136*b1e83836Smrg         assert(equal(map!"a[0]"(m2a), ["First line", "Second line"]));
137*b1e83836Smrg         auto m2b = matchFn("First line\nSecond line",regex(".+?$","gm"));
138*b1e83836Smrg         assert(equal(map!"a[0]"(m2b), ["First line", "Second line"]));
139*b1e83836Smrg         debug(std_regex_test) writeln("!!! FReD FLAGS test done "~matchFn.stringof~" !!!");
140*b1e83836Smrg     }
141*b1e83836Smrg     test_body!bmatch();
142*b1e83836Smrg     test_body!match();
143*b1e83836Smrg }
144*b1e83836Smrg 
145*b1e83836Smrg //tests for accumulated std.regex issues and other regressions
146*b1e83836Smrg @safe unittest
147*b1e83836Smrg {
148*b1e83836Smrg     import std.algorithm.comparison : equal;
149*b1e83836Smrg     import std.algorithm.iteration : map;
150*b1e83836Smrg     void test_body(alias matchFn)()
151*b1e83836Smrg     {
152*b1e83836Smrg         // https://issues.dlang.org/show_bug.cgi?id=5857
153*b1e83836Smrg         //matching goes out of control if ... in (...){x} has .*/.+
154*b1e83836Smrg         auto c = matchFn("axxxzayyyyyzd",regex("(a.*z){2}d")).captures;
155*b1e83836Smrg         assert(c[0] == "axxxzayyyyyzd");
156*b1e83836Smrg         assert(c[1] == "ayyyyyz");
157*b1e83836Smrg         auto c2 = matchFn("axxxayyyyyd",regex("(a.*){2}d")).captures;
158*b1e83836Smrg         assert(c2[0] == "axxxayyyyyd");
159*b1e83836Smrg         assert(c2[1] == "ayyyyy");
160*b1e83836Smrg         // https://issues.dlang.org/show_bug.cgi?id=2108
161*b1e83836Smrg         //greedy vs non-greedy
162*b1e83836Smrg         auto nogreed = regex("<packet.*?/packet>");
163*b1e83836Smrg         assert(matchFn("<packet>text</packet><packet>text</packet>", nogreed).hit
164*b1e83836Smrg                == "<packet>text</packet>");
165*b1e83836Smrg         auto greed =  regex("<packet.*/packet>");
166*b1e83836Smrg         assert(matchFn("<packet>text</packet><packet>text</packet>", greed).hit
167*b1e83836Smrg                == "<packet>text</packet><packet>text</packet>");
168*b1e83836Smrg         // https://issues.dlang.org/show_bug.cgi?id=4574
169*b1e83836Smrg         //empty successful match still advances the input
170*b1e83836Smrg         string[] pres, posts, hits;
171*b1e83836Smrg         foreach (m; matchFn("abcabc", regex("","g")))
172*b1e83836Smrg         {
173*b1e83836Smrg             pres ~= m.pre;
174*b1e83836Smrg             posts ~= m.post;
175*b1e83836Smrg             assert(m.hit.empty);
176*b1e83836Smrg 
177*b1e83836Smrg         }
178*b1e83836Smrg         auto heads = [
179*b1e83836Smrg             "abcabc",
180*b1e83836Smrg             "abcab",
181*b1e83836Smrg             "abca",
182*b1e83836Smrg             "abc",
183*b1e83836Smrg             "ab",
184*b1e83836Smrg             "a",
185*b1e83836Smrg             ""
186*b1e83836Smrg         ];
187*b1e83836Smrg         auto tails = [
188*b1e83836Smrg             "abcabc",
189*b1e83836Smrg              "bcabc",
190*b1e83836Smrg               "cabc",
191*b1e83836Smrg                "abc",
192*b1e83836Smrg                 "bc",
193*b1e83836Smrg                  "c",
194*b1e83836Smrg                   ""
195*b1e83836Smrg         ];
196*b1e83836Smrg         assert(pres == array(retro(heads)));
197*b1e83836Smrg         assert(posts == tails);
198*b1e83836Smrg         // https://issues.dlang.org/show_bug.cgi?id=6076
199*b1e83836Smrg         //regression on .*
200*b1e83836Smrg         auto re = regex("c.*|d");
201*b1e83836Smrg         auto m = matchFn("mm", re);
202*b1e83836Smrg         assert(!m);
203*b1e83836Smrg         debug(std_regex_test) writeln("!!! FReD REGRESSION test done "~matchFn.stringof~" !!!");
204*b1e83836Smrg         auto rprealloc = regex(`((.){5}.{1,10}){5}`);
205*b1e83836Smrg         auto arr = array(repeat('0',100));
206*b1e83836Smrg         auto m2 = matchFn(arr, rprealloc);
207*b1e83836Smrg         assert(m2);
208*b1e83836Smrg         assert(collectException(
209*b1e83836Smrg                 regex(r"^(import|file|binary|config)\s+([^\(]+)\(?([^\)]*)\)?\s*$")
210*b1e83836Smrg                 ) is null);
211*b1e83836Smrg         foreach (ch; [Escapables])
212*b1e83836Smrg         {
213*b1e83836Smrg             assert(match(to!string(ch),regex(`[\`~ch~`]`)));
214*b1e83836Smrg             assert(!match(to!string(ch),regex(`[^\`~ch~`]`)));
215*b1e83836Smrg             assert(match(to!string(ch),regex(`[\`~ch~`-\`~ch~`]`)));
216*b1e83836Smrg         }
217*b1e83836Smrg         // https://issues.dlang.org/show_bug.cgi?id=7718
218*b1e83836Smrg         string strcmd = "./myApp.rb -os OSX -path \"/GIT/Ruby Apps/sec\" -conf 'notimer'";
219*b1e83836Smrg         auto reStrCmd = regex (`(".*")|('.*')`, "g");
220*b1e83836Smrg         assert(equal(map!"a[0]"(matchFn(strcmd, reStrCmd)),
221*b1e83836Smrg                      [`"/GIT/Ruby Apps/sec"`, `'notimer'`]));
222*b1e83836Smrg     }
223*b1e83836Smrg     test_body!bmatch();
224*b1e83836Smrg     test_body!match();
225*b1e83836Smrg }
226*b1e83836Smrg 
227*b1e83836Smrg // tests for replace
228*b1e83836Smrg @safe unittest
229*b1e83836Smrg {
230*b1e83836Smrg     void test(alias matchFn)()
231*b1e83836Smrg     {
232*b1e83836Smrg         import std.uni : toUpper;
233*b1e83836Smrg 
234*b1e83836Smrg         static foreach (i, v; AliasSeq!(string, wstring, dstring))
235*b1e83836Smrg         {{
236*b1e83836Smrg             auto baz(Cap)(Cap m)
237*b1e83836Smrg             if (is(Cap == Captures!(Cap.String)))
238*b1e83836Smrg             {
239*b1e83836Smrg                 return toUpper(m.hit);
240*b1e83836Smrg             }
241*b1e83836Smrg             alias String = v;
242*b1e83836Smrg             assert(std.regex.replace!(matchFn)(to!String("ark rapacity"), regex(to!String("r")), to!String("c"))
243*b1e83836Smrg                    == to!String("ack rapacity"));
244*b1e83836Smrg             assert(std.regex.replace!(matchFn)(to!String("ark rapacity"), regex(to!String("r"), "g"), to!String("c"))
245*b1e83836Smrg                    == to!String("ack capacity"));
246*b1e83836Smrg             assert(std.regex.replace!(matchFn)(to!String("noon"), regex(to!String("^n")), to!String("[$&]"))
247*b1e83836Smrg                    == to!String("[n]oon"));
248*b1e83836Smrg             assert(std.regex.replace!(matchFn)(
249*b1e83836Smrg                 to!String("test1 test2"), regex(to!String(`\w+`),"g"), to!String("$`:$'")
250*b1e83836Smrg             ) == to!String(": test2 test1 :"));
251*b1e83836Smrg             auto s = std.regex.replace!(baz!(Captures!(String)))(to!String("Strap a rocket engine on a chicken."),
252*b1e83836Smrg                     regex(to!String("[ar]"), "g"));
253*b1e83836Smrg             assert(s == "StRAp A Rocket engine on A chicken.");
254*b1e83836Smrg         }}
255*b1e83836Smrg         debug(std_regex_test) writeln("!!! Replace test done "~matchFn.stringof~"  !!!");
256*b1e83836Smrg     }
257*b1e83836Smrg     test!(bmatch)();
258*b1e83836Smrg     test!(match)();
259*b1e83836Smrg }
260*b1e83836Smrg 
261*b1e83836Smrg // tests for splitter
262*b1e83836Smrg @safe unittest
263*b1e83836Smrg {
264*b1e83836Smrg     import std.algorithm.comparison : equal;
265*b1e83836Smrg     auto s1 = ", abc, de,     fg, hi, ";
266*b1e83836Smrg     auto sp1 = splitter(s1, regex(", *"));
267*b1e83836Smrg     auto w1 = ["", "abc", "de", "fg", "hi", ""];
268*b1e83836Smrg     assert(equal(sp1, w1));
269*b1e83836Smrg 
270*b1e83836Smrg     auto s2 = ", abc, de,  fg, hi";
271*b1e83836Smrg     auto sp2 = splitter(s2, regex(", *"));
272*b1e83836Smrg     auto w2 = ["", "abc", "de", "fg", "hi"];
273*b1e83836Smrg 
274*b1e83836Smrg     uint cnt;
275*b1e83836Smrg     foreach (e; sp2)
276*b1e83836Smrg     {
277*b1e83836Smrg         assert(w2[cnt++] == e);
278*b1e83836Smrg     }
279*b1e83836Smrg     assert(equal(sp2, w2));
280*b1e83836Smrg }
281*b1e83836Smrg 
282*b1e83836Smrg @safe unittest
283*b1e83836Smrg {
284*b1e83836Smrg     char[] s1 = ", abc, de,  fg, hi, ".dup;
285*b1e83836Smrg     auto sp2 = splitter(s1, regex(", *"));
286*b1e83836Smrg }
287*b1e83836Smrg 
288*b1e83836Smrg @safe unittest
289*b1e83836Smrg {
290*b1e83836Smrg     import std.algorithm.comparison : equal;
291*b1e83836Smrg     auto s1 = ", abc, de,  fg, hi, ";
292*b1e83836Smrg     auto w1 = ["", "abc", "de", "fg", "hi", ""];
293*b1e83836Smrg     assert(equal(split(s1, regex(", *")), w1[]));
294*b1e83836Smrg }
295*b1e83836Smrg 
296*b1e83836Smrg // https://issues.dlang.org/show_bug.cgi?id=7141
297*b1e83836Smrg @safe unittest
298*b1e83836Smrg {
299*b1e83836Smrg     string pattern = `[a\--b]`;
300*b1e83836Smrg     assert(match("-", pattern));
301*b1e83836Smrg     assert(match("b", pattern));
302*b1e83836Smrg     string pattern2 = `[&-z]`;
303*b1e83836Smrg     assert(match("b", pattern2));
304*b1e83836Smrg }
305*b1e83836Smrg 
306*b1e83836Smrg // https://issues.dlang.org/show_bug.cgi?id=7111
307*b1e83836Smrg @safe unittest
308*b1e83836Smrg {
309*b1e83836Smrg     assert(match("", regex("^")));
310*b1e83836Smrg }
311*b1e83836Smrg 
312*b1e83836Smrg // https://issues.dlang.org/show_bug.cgi?id=7300
313*b1e83836Smrg @safe unittest
314*b1e83836Smrg {
315*b1e83836Smrg     assert(!match("a"d, "aa"d));
316*b1e83836Smrg }
317*b1e83836Smrg 
318*b1e83836Smrg // https://issues.dlang.org/show_bug.cgi?id=7551
319*b1e83836Smrg @safe unittest
320*b1e83836Smrg {
321*b1e83836Smrg     auto r = regex("[]abc]*");
322*b1e83836Smrg     assert("]ab".matchFirst(r).hit == "]ab");
323*b1e83836Smrg     assertThrown(regex("[]"));
324*b1e83836Smrg     auto r2 = regex("[]abc--ab]*");
325*b1e83836Smrg     assert("]ac".matchFirst(r2).hit == "]");
326*b1e83836Smrg }
327*b1e83836Smrg 
328*b1e83836Smrg // https://issues.dlang.org/show_bug.cgi?id=7674
329*b1e83836Smrg @safe unittest
330*b1e83836Smrg {
331*b1e83836Smrg     assert("1234".replace(regex("^"), "$$") == "$1234");
332*b1e83836Smrg     assert("hello?".replace(regex(r"\?", "g"), r"\?") == r"hello\?");
333*b1e83836Smrg     assert("hello?".replace(regex(r"\?", "g"), r"\\?") != r"hello\?");
334*b1e83836Smrg }
335*b1e83836Smrg 
336*b1e83836Smrg // https://issues.dlang.org/show_bug.cgi?id=7679
337*b1e83836Smrg @safe unittest
338*b1e83836Smrg {
339*b1e83836Smrg     import std.algorithm.comparison : equal;
340*b1e83836Smrg     static foreach (S; AliasSeq!(string, wstring, dstring))
341*b1e83836Smrg     {{
342*b1e83836Smrg         enum re = ctRegex!(to!S(r"\."));
343*b1e83836Smrg         auto str = to!S("a.b");
344*b1e83836Smrg         assert(equal(std.regex.splitter(str, re), [to!S("a"), to!S("b")]));
345*b1e83836Smrg         assert(split(str, re) == [to!S("a"), to!S("b")]);
346*b1e83836Smrg     }}
347*b1e83836Smrg }
348*b1e83836Smrg 
349*b1e83836Smrg // https://issues.dlang.org/show_bug.cgi?id=8203
350*b1e83836Smrg @safe unittest
351*b1e83836Smrg {
352*b1e83836Smrg     string data = "
353*b1e83836Smrg     NAME   = XPAW01_STA:STATION
354*b1e83836Smrg     NAME   = XPAW01_STA
355*b1e83836Smrg     ";
356*b1e83836Smrg     auto uniFileOld = data;
357*b1e83836Smrg     auto r = regex(
358*b1e83836Smrg        r"^NAME   = (?P<comp>[a-zA-Z0-9_]+):*(?P<blk>[a-zA-Z0-9_]*)","gm");
359*b1e83836Smrg     auto uniCapturesNew = match(uniFileOld, r);
360*b1e83836Smrg     for (int i = 0; i < 20; i++)
361*b1e83836Smrg         foreach (matchNew; uniCapturesNew) {}
362*b1e83836Smrg     //a second issue with same symptoms
363*b1e83836Smrg     auto r2 = regex(`([а-яА-Я\-_]+\s*)+(?<=[\s\.,\^])`);
364*b1e83836Smrg     match("аллея Театральная", r2);
365*b1e83836Smrg }
366*b1e83836Smrg 
367*b1e83836Smrg // https://issues.dlang.org/show_bug.cgi?id=8637 purity of enforce
368*b1e83836Smrg @safe unittest
369*b1e83836Smrg {
370*b1e83836Smrg     auto m = match("hello world", regex("world"));
371*b1e83836Smrg     enforce(m);
372*b1e83836Smrg }
373*b1e83836Smrg 
374*b1e83836Smrg // https://issues.dlang.org/show_bug.cgi?id=8725
375*b1e83836Smrg @safe unittest
376*b1e83836Smrg {
377*b1e83836Smrg   static italic = regex( r"\*
378*b1e83836Smrg                 (?!\s+)
379*b1e83836Smrg                 (.*?)
380*b1e83836Smrg                 (?!\s+)
381*b1e83836Smrg                 \*", "gx" );
382*b1e83836Smrg   string input = "this * is* interesting, *very* interesting";
383*b1e83836Smrg   assert(replace(input, italic, "<i>$1</i>") ==
384*b1e83836Smrg       "this * is* interesting, <i>very</i> interesting");
385*b1e83836Smrg }
386*b1e83836Smrg 
387*b1e83836Smrg // https://issues.dlang.org/show_bug.cgi?id=8349
388*b1e83836Smrg @safe unittest
389*b1e83836Smrg {
390*b1e83836Smrg     enum peakRegexStr = r"\>(wgEncode.*Tfbs.*\.(?:narrow)|(?:broad)Peak.gz)</a>";
391*b1e83836Smrg     enum peakRegex = ctRegex!(peakRegexStr);
392*b1e83836Smrg     //note that the regex pattern itself is probably bogus
393*b1e83836Smrg     assert(match(r"\>wgEncode-blah-Tfbs.narrow</a>", peakRegex));
394*b1e83836Smrg }
395*b1e83836Smrg 
396*b1e83836Smrg // https://issues.dlang.org/show_bug.cgi?id=9211
397*b1e83836Smrg @safe unittest
398*b1e83836Smrg {
399*b1e83836Smrg     import std.algorithm.comparison : equal;
400*b1e83836Smrg     auto rx_1 =  regex(r"^(\w)*(\d)");
401*b1e83836Smrg     auto m = match("1234", rx_1);
402*b1e83836Smrg     assert(equal(m.front, ["1234", "3", "4"]));
403*b1e83836Smrg     auto rx_2 = regex(r"^([0-9])*(\d)");
404*b1e83836Smrg     auto m2 = match("1234", rx_2);
405*b1e83836Smrg     assert(equal(m2.front, ["1234", "3", "4"]));
406*b1e83836Smrg }
407*b1e83836Smrg 
408*b1e83836Smrg // https://issues.dlang.org/show_bug.cgi?id=9280
409*b1e83836Smrg @safe unittest
410*b1e83836Smrg {
411*b1e83836Smrg     string tomatch = "a!b@c";
412*b1e83836Smrg     static r = regex(r"^(?P<nick>.*?)!(?P<ident>.*?)@(?P<host>.*?)$");
413*b1e83836Smrg     auto nm = match(tomatch, r);
414*b1e83836Smrg     assert(nm);
415*b1e83836Smrg     auto c = nm.captures;
416*b1e83836Smrg     assert(c[1] == "a");
417*b1e83836Smrg     assert(c["nick"] == "a");
418*b1e83836Smrg }
419*b1e83836Smrg 
420*b1e83836Smrg 
421*b1e83836Smrg // https://issues.dlang.org/show_bug.cgi?id=9579
422*b1e83836Smrg @safe unittest
423*b1e83836Smrg {
424*b1e83836Smrg     char[] input = ['a', 'b', 'c'];
425*b1e83836Smrg     string format = "($1)";
426*b1e83836Smrg     // used to give a compile error:
427*b1e83836Smrg     auto re = regex(`(a)`, "g");
428*b1e83836Smrg     auto r = replace(input, re, format);
429*b1e83836Smrg     assert(r == "(a)bc");
430*b1e83836Smrg }
431*b1e83836Smrg 
432*b1e83836Smrg // https://issues.dlang.org/show_bug.cgi?id=9634
433*b1e83836Smrg @safe unittest
434*b1e83836Smrg {
435*b1e83836Smrg     auto re = ctRegex!"(?:a+)";
436*b1e83836Smrg     assert(match("aaaa", re).hit == "aaaa");
437*b1e83836Smrg }
438*b1e83836Smrg 
439*b1e83836Smrg // https://issues.dlang.org/show_bug.cgi?id=10798
440*b1e83836Smrg @safe unittest
441*b1e83836Smrg {
442*b1e83836Smrg     auto cr = ctRegex!("[abcd--c]*");
443*b1e83836Smrg     auto m  = "abc".match(cr);
444*b1e83836Smrg     assert(m);
445*b1e83836Smrg     assert(m.hit == "ab");
446*b1e83836Smrg }
447*b1e83836Smrg 
448*b1e83836Smrg // https://issues.dlang.org/show_bug.cgi?id=10913
449*b1e83836Smrg @system unittest
450*b1e83836Smrg {
451*b1e83836Smrg     @system static string foo(const(char)[] s)
452*b1e83836Smrg     {
453*b1e83836Smrg         return s.dup;
454*b1e83836Smrg     }
455*b1e83836Smrg     @safe static string bar(const(char)[] s)
456*b1e83836Smrg     {
457*b1e83836Smrg         return s.dup;
458*b1e83836Smrg     }
459*b1e83836Smrg     () @system {
460*b1e83836Smrg         replace!((a) => foo(a.hit))("blah", regex(`a`));
461*b1e83836Smrg     }();
462*b1e83836Smrg     () @safe {
463*b1e83836Smrg         replace!((a) => bar(a.hit))("blah", regex(`a`));
464*b1e83836Smrg     }();
465*b1e83836Smrg }
466*b1e83836Smrg 
467*b1e83836Smrg // https://issues.dlang.org/show_bug.cgi?id=11262
468*b1e83836Smrg @safe unittest
469*b1e83836Smrg {
470*b1e83836Smrg     enum reg = ctRegex!(r",", "g");
471*b1e83836Smrg     auto str = "This,List";
472*b1e83836Smrg     str = str.replace(reg, "-");
473*b1e83836Smrg     assert(str == "This-List");
474*b1e83836Smrg }
475*b1e83836Smrg 
476*b1e83836Smrg // https://issues.dlang.org/show_bug.cgi?id=11775
477*b1e83836Smrg @safe unittest
478*b1e83836Smrg {
479*b1e83836Smrg     assert(collectException(regex("a{1,0}")));
480*b1e83836Smrg }
481*b1e83836Smrg 
482*b1e83836Smrg // https://issues.dlang.org/show_bug.cgi?id=11839
483*b1e83836Smrg @safe unittest
484*b1e83836Smrg {
485*b1e83836Smrg     import std.algorithm.comparison : equal;
486*b1e83836Smrg     assert(regex(`(?P<var1>\w+)`).namedCaptures.equal(["var1"]));
487*b1e83836Smrg     assert(collectException(regex(`(?P<1>\w+)`)));
488*b1e83836Smrg     assert(regex(`(?P<v1>\w+)`).namedCaptures.equal(["v1"]));
489*b1e83836Smrg     assert(regex(`(?P<__>\w+)`).namedCaptures.equal(["__"]));
490*b1e83836Smrg     assert(regex(`(?P<я>\w+)`).namedCaptures.equal(["я"]));
491*b1e83836Smrg }
492*b1e83836Smrg 
493*b1e83836Smrg // https://issues.dlang.org/show_bug.cgi?id=12076
494*b1e83836Smrg @safe unittest
495*b1e83836Smrg {
496*b1e83836Smrg     auto RE = ctRegex!(r"(?<!x[a-z]+)\s([a-z]+)");
497*b1e83836Smrg     string s = "one two";
498*b1e83836Smrg     auto m = match(s, RE);
499*b1e83836Smrg }
500*b1e83836Smrg 
501*b1e83836Smrg // https://issues.dlang.org/show_bug.cgi?id=12105
502*b1e83836Smrg @safe unittest
503*b1e83836Smrg {
504*b1e83836Smrg     auto r = ctRegex!`.*?(?!a)`;
505*b1e83836Smrg     assert("aaab".matchFirst(r).hit == "aaa");
506*b1e83836Smrg     auto r2 = ctRegex!`.*(?!a)`;
507*b1e83836Smrg     assert("aaab".matchFirst(r2).hit == "aaab");
508*b1e83836Smrg }
509*b1e83836Smrg 
510*b1e83836Smrg // https://issues.dlang.org/show_bug.cgi?id=11784
511*b1e83836Smrg @safe unittest
512*b1e83836Smrg {
513*b1e83836Smrg     assert("abcdefghijklmnopqrstuvwxyz"
514*b1e83836Smrg         .matchFirst("[a-z&&[^aeiuo]]").hit == "b");
515*b1e83836Smrg }
516*b1e83836Smrg 
517*b1e83836Smrg // https://issues.dlang.org/show_bug.cgi?id=12366
518*b1e83836Smrg @safe unittest
519*b1e83836Smrg {
520*b1e83836Smrg      auto re = ctRegex!(`^((?=(xx+?)\2+$)((?=\2+$)(?=(x+)(\4+$))\5){2})*x?$`);
521*b1e83836Smrg      assert("xxxxxxxx".match(re).empty);
522*b1e83836Smrg      assert(!"xxxx".match(re).empty);
523*b1e83836Smrg }
524*b1e83836Smrg 
525*b1e83836Smrg // https://issues.dlang.org/show_bug.cgi?id=12582
526*b1e83836Smrg @safe unittest
527*b1e83836Smrg {
528*b1e83836Smrg     auto r = regex(`(?P<a>abc)`);
529*b1e83836Smrg     assert(collectException("abc".matchFirst(r)["b"]));
530*b1e83836Smrg }
531*b1e83836Smrg 
532*b1e83836Smrg // https://issues.dlang.org/show_bug.cgi?id=12691
533*b1e83836Smrg @safe unittest
534*b1e83836Smrg {
535*b1e83836Smrg     assert(bmatch("e@", "^([a-z]|)*$").empty);
536*b1e83836Smrg     assert(bmatch("e@", ctRegex!`^([a-z]|)*$`).empty);
537*b1e83836Smrg }
538*b1e83836Smrg 
539*b1e83836Smrg // https://issues.dlang.org/show_bug.cgi?id=12713
540*b1e83836Smrg @safe unittest
541*b1e83836Smrg {
542*b1e83836Smrg     assertThrown(regex("[[a-z]([a-z]|(([[a-z])))"));
543*b1e83836Smrg }
544*b1e83836Smrg 
545*b1e83836Smrg // https://issues.dlang.org/show_bug.cgi?id=12747
546*b1e83836Smrg @safe unittest
547*b1e83836Smrg {
548*b1e83836Smrg     assertThrown(regex(`^x(\1)`));
549*b1e83836Smrg     assertThrown(regex(`^(x(\1))`));
550*b1e83836Smrg     assertThrown(regex(`^((x)(?=\1))`));
551*b1e83836Smrg }
552*b1e83836Smrg 
553*b1e83836Smrg // https://issues.dlang.org/show_bug.cgi?id=13532
554*b1e83836Smrg version (none) // TODO: revist once we have proper benchmark framework
555*b1e83836Smrg @safe unittest
556*b1e83836Smrg {
557*b1e83836Smrg     import std.datetime.stopwatch : StopWatch, AutoStart;
558*b1e83836Smrg     import std.math.algebraic : abs;
559*b1e83836Smrg     import std.conv : to;
560*b1e83836Smrg     enum re1 = ctRegex!`[0-9][0-9]`;
561*b1e83836Smrg     immutable static re2 = ctRegex!`[0-9][0-9]`;
562*b1e83836Smrg     immutable iterations = 1_000_000;
563*b1e83836Smrg     size_t result1 = 0, result2 = 0;
564*b1e83836Smrg     auto sw = StopWatch(AutoStart.yes);
565*b1e83836Smrg     foreach (_; 0 .. iterations)
566*b1e83836Smrg     {
567*b1e83836Smrg         result1 += matchFirst("12345678", re1).length;
568*b1e83836Smrg     }
569*b1e83836Smrg     const staticTime = sw.peek();
570*b1e83836Smrg     sw.reset();
571*b1e83836Smrg     foreach (_; 0 .. iterations)
572*b1e83836Smrg     {
573*b1e83836Smrg         result2 += matchFirst("12345678", re2).length;
574*b1e83836Smrg     }
575*b1e83836Smrg     const enumTime = sw.peek();
576*b1e83836Smrg     assert(result1 == result2);
577*b1e83836Smrg     auto ratio = 1.0 * enumTime.total!"usecs" / staticTime.total!"usecs";
578*b1e83836Smrg     // enum is faster or the diff is less < 30%
579*b1e83836Smrg     assert(ratio < 1.0 || abs(ratio - 1.0) < 0.75,
580*b1e83836Smrg         "enum regex to static regex ratio "~to!string(ratio));
581*b1e83836Smrg }
582*b1e83836Smrg 
583*b1e83836Smrg // https://issues.dlang.org/show_bug.cgi?id=14504
584*b1e83836Smrg @safe unittest
585*b1e83836Smrg {
586*b1e83836Smrg     auto p = ctRegex!("a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?" ~
587*b1e83836Smrg             "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa");
588*b1e83836Smrg }
589*b1e83836Smrg 
590*b1e83836Smrg // https://issues.dlang.org/show_bug.cgi?id=14529
591*b1e83836Smrg @safe unittest
592*b1e83836Smrg {
593*b1e83836Smrg     auto ctPat2 = regex(r"^[CDF]$", "i");
594*b1e83836Smrg     foreach (v; ["C", "c", "D", "d", "F", "f"])
595*b1e83836Smrg         assert(matchAll(v, ctPat2).front.hit == v);
596*b1e83836Smrg }
597*b1e83836Smrg 
598*b1e83836Smrg // https://issues.dlang.org/show_bug.cgi?id=14615
599*b1e83836Smrg @safe unittest
600*b1e83836Smrg {
601*b1e83836Smrg     import std.array : appender;
602*b1e83836Smrg     import std.regex : replaceFirst, replaceFirstInto, regex;
603*b1e83836Smrg     import std.stdio : writeln;
604*b1e83836Smrg 
605*b1e83836Smrg     auto example = "Hello, world!";
606*b1e83836Smrg     auto pattern = regex("^Hello, (bug)");  // won't find this one
607*b1e83836Smrg     auto result = replaceFirst(example, pattern, "$1 Sponge Bob");
608*b1e83836Smrg     assert(result == "Hello, world!");  // Ok.
609*b1e83836Smrg 
610*b1e83836Smrg     auto sink = appender!string;
611*b1e83836Smrg     replaceFirstInto(sink, example, pattern, "$1 Sponge Bob");
612*b1e83836Smrg     assert(sink.data == "Hello, world!");
613*b1e83836Smrg     replaceAllInto(sink, example, pattern, "$1 Sponge Bob");
614*b1e83836Smrg     assert(sink.data == "Hello, world!Hello, world!");
615*b1e83836Smrg }
616*b1e83836Smrg 
617*b1e83836Smrg // https://issues.dlang.org/show_bug.cgi?id=15573
618*b1e83836Smrg @safe unittest
619*b1e83836Smrg {
620*b1e83836Smrg     auto rx = regex("[c d]", "x");
621*b1e83836Smrg     assert("a b".matchFirst(rx));
622*b1e83836Smrg }
623*b1e83836Smrg 
624*b1e83836Smrg // https://issues.dlang.org/show_bug.cgi?id=15864
625*b1e83836Smrg @safe unittest
626*b1e83836Smrg {
627*b1e83836Smrg     regex(`(<a (?:(?:\w+=\"[^"]*\")?\s*)*href="\.\.?)"`);
628*b1e83836Smrg }
629*b1e83836Smrg 
630*b1e83836Smrg @safe unittest
631*b1e83836Smrg {
632*b1e83836Smrg     auto r = regex("(?# comment)abc(?# comment2)");
633*b1e83836Smrg     assert("abc".matchFirst(r));
634*b1e83836Smrg     assertThrown(regex("(?#..."));
635*b1e83836Smrg }
636*b1e83836Smrg 
637*b1e83836Smrg // https://issues.dlang.org/show_bug.cgi?id=17075
638*b1e83836Smrg @safe unittest
639*b1e83836Smrg {
640*b1e83836Smrg     enum titlePattern = `<title>(.+)</title>`;
641*b1e83836Smrg     static titleRegex = ctRegex!titlePattern;
642*b1e83836Smrg     string input = "<title>" ~ "<".repeat(100_000).join;
643*b1e83836Smrg     assert(input.matchFirst(titleRegex).empty);
644*b1e83836Smrg }
645*b1e83836Smrg 
646*b1e83836Smrg // https://issues.dlang.org/show_bug.cgi?id=17212
647*b1e83836Smrg @safe unittest
648*b1e83836Smrg {
649*b1e83836Smrg     auto r = regex(" [a] ", "x");
650*b1e83836Smrg     assert("a".matchFirst(r));
651*b1e83836Smrg }
652*b1e83836Smrg 
653*b1e83836Smrg // https://issues.dlang.org/show_bug.cgi?id=17157
654*b1e83836Smrg @safe unittest
655*b1e83836Smrg {
656*b1e83836Smrg     import std.algorithm.comparison : equal;
657*b1e83836Smrg     auto ctr = ctRegex!"(a)|(b)|(c)|(d)";
658*b1e83836Smrg     auto r = regex("(a)|(b)|(c)|(d)", "g");
659*b1e83836Smrg     auto s = "--a--b--c--d--";
660*b1e83836Smrg     auto outcomes = [
661*b1e83836Smrg         ["a", "a", "", "", ""],
662*b1e83836Smrg         ["b", "", "b", "", ""],
663*b1e83836Smrg         ["c", "", "", "c", ""],
664*b1e83836Smrg         ["d", "", "", "", "d"]
665*b1e83836Smrg     ];
666*b1e83836Smrg     assert(equal!equal(s.matchAll(ctr), outcomes));
667*b1e83836Smrg     assert(equal!equal(s.bmatch(r), outcomes));
668*b1e83836Smrg }
669*b1e83836Smrg 
670*b1e83836Smrg // https://issues.dlang.org/show_bug.cgi?id=17667
671*b1e83836Smrg @safe unittest
672*b1e83836Smrg {
673*b1e83836Smrg     import std.algorithm.searching : canFind;
674*b1e83836Smrg     void willThrow(T, size_t line = __LINE__)(T arg, string msg)
675*b1e83836Smrg     {
676*b1e83836Smrg         auto e = collectException(regex(arg));
677*b1e83836Smrg         assert(e.msg.canFind(msg), to!string(line) ~ ": " ~ e.msg);
678*b1e83836Smrg     }
679*b1e83836Smrg     willThrow([r".", r"[\(\{[\]\}\)]"], "no matching ']' found while parsing character class");
680*b1e83836Smrg     willThrow([r"[\", r"123"], "no matching ']' found while parsing character class");
681*b1e83836Smrg     willThrow([r"[a-", r"123"], "no matching ']' found while parsing character class");
682*b1e83836Smrg     willThrow([r"[a-\", r"123"], "no matching ']' found while parsing character class");
683*b1e83836Smrg     willThrow([r"\", r"123"], "invalid escape sequence");
684*b1e83836Smrg }
685*b1e83836Smrg 
686*b1e83836Smrg // https://issues.dlang.org/show_bug.cgi?id=17668
687*b1e83836Smrg @safe unittest
688*b1e83836Smrg {
689*b1e83836Smrg     import std.algorithm.searching;
690*b1e83836Smrg     auto e = collectException!RegexException(regex(q"<[^]>"));
691*b1e83836Smrg     assert(e.msg.canFind("no operand for '^'"), e.msg);
692*b1e83836Smrg }
693*b1e83836Smrg 
694*b1e83836Smrg // https://issues.dlang.org/show_bug.cgi?id=17673
695*b1e83836Smrg @safe unittest
696*b1e83836Smrg {
697*b1e83836Smrg     string str = `<">`;
698*b1e83836Smrg     string[] regexps = ["abc", "\"|x"];
699*b1e83836Smrg     auto regexp = regex(regexps);
700*b1e83836Smrg     auto c = matchFirst(str, regexp);
701*b1e83836Smrg     assert(c);
702*b1e83836Smrg     assert(c.whichPattern == 2);
703*b1e83836Smrg }
704*b1e83836Smrg 
705*b1e83836Smrg // https://issues.dlang.org/show_bug.cgi?id=18692
706*b1e83836Smrg @safe unittest
707*b1e83836Smrg {
708*b1e83836Smrg     auto rx = regex("()()()");
709*b1e83836Smrg     auto ma = "".matchFirst(rx);
710*b1e83836Smrg     auto ma2 = ma;
711*b1e83836Smrg     ma = ma2;
712*b1e83836Smrg     assert(ma[1] == "");
713*b1e83836Smrg }
714