1*b1e83836Smrg // Split-up due to DMD's enormous memory consumption 2*b1e83836Smrg 3*b1e83836Smrg module std.regex.internal.tests2; 4*b1e83836Smrg 5*b1e83836Smrg package(std.regex): 6*b1e83836Smrg 7*b1e83836Smrg import std.conv, std.exception, std.meta, std.range, 8*b1e83836Smrg std.typecons, std.regex; 9*b1e83836Smrg 10*b1e83836Smrg import std.uni : Escapables; // characters that need escaping 11*b1e83836Smrg 12*b1e83836Smrg @safe unittest 13*b1e83836Smrg { 14*b1e83836Smrg auto cr = ctRegex!("abc"); 15*b1e83836Smrg assert(bmatch("abc",cr).hit == "abc"); 16*b1e83836Smrg auto cr2 = ctRegex!("ab*c"); 17*b1e83836Smrg assert(bmatch("abbbbc",cr2).hit == "abbbbc"); 18*b1e83836Smrg } 19*b1e83836Smrg @safe unittest 20*b1e83836Smrg { 21*b1e83836Smrg auto cr3 = ctRegex!("^abc$"); 22*b1e83836Smrg assert(bmatch("abc",cr3).hit == "abc"); 23*b1e83836Smrg auto cr4 = ctRegex!(`\b(a\B[a-z]b)\b`); 24*b1e83836Smrg assert(array(match("azb",cr4).captures) == ["azb", "azb"]); 25*b1e83836Smrg } 26*b1e83836Smrg 27*b1e83836Smrg @safe unittest 28*b1e83836Smrg { 29*b1e83836Smrg auto cr5 = ctRegex!("(?:a{2,4}b{1,3}){1,2}"); 30*b1e83836Smrg assert(bmatch("aaabaaaabbb", cr5).hit == "aaabaaaabbb"); 31*b1e83836Smrg auto cr6 = ctRegex!("(?:a{2,4}b{1,3}){1,2}?"w); 32*b1e83836Smrg assert(bmatch("aaabaaaabbb"w, cr6).hit == "aaab"w); 33*b1e83836Smrg } 34*b1e83836Smrg 35*b1e83836Smrg @safe unittest 36*b1e83836Smrg { 37*b1e83836Smrg auto cr7 = ctRegex!(`\r.*?$`,"sm"); 38*b1e83836Smrg assert(bmatch("abc\r\nxy", cr7).hit == "\r\nxy"); 39*b1e83836Smrg auto greed = ctRegex!("<packet.*?/packet>"); 40*b1e83836Smrg assert(bmatch("<packet>text</packet><packet>text</packet>", greed).hit 41*b1e83836Smrg == "<packet>text</packet>"); 42*b1e83836Smrg } 43*b1e83836Smrg 44*b1e83836Smrg @safe unittest 45*b1e83836Smrg { 46*b1e83836Smrg import std.algorithm.comparison : equal; 47*b1e83836Smrg auto cr8 = ctRegex!("^(a)(b)?(c*)"); 48*b1e83836Smrg auto m8 = bmatch("abcc",cr8); 49*b1e83836Smrg assert(m8); 50*b1e83836Smrg assert(m8.captures[1] == "a"); 51*b1e83836Smrg assert(m8.captures[2] == "b"); 52*b1e83836Smrg assert(m8.captures[3] == "cc"); 53*b1e83836Smrg auto cr9 = ctRegex!("q(a|b)*q"); 54*b1e83836Smrg auto m9 = match("xxqababqyy",cr9); 55*b1e83836Smrg assert(m9); 56*b1e83836Smrg assert(equal(bmatch("xxqababqyy",cr9).captures, ["qababq", "b"])); 57*b1e83836Smrg } 58*b1e83836Smrg 59*b1e83836Smrg @safe unittest 60*b1e83836Smrg { 61*b1e83836Smrg import std.algorithm.comparison : equal; 62*b1e83836Smrg auto rtr = regex("a|b|c"); 63*b1e83836Smrg static ctr = regex("a|b|c"); 64*b1e83836Smrg assert(equal(rtr.ir,ctr.ir)); 65*b1e83836Smrg //CTFE parser BUG is triggered by group 66*b1e83836Smrg //in the middle of alternation (at least not first and not last) 67*b1e83836Smrg static testCT = regex(`abc|(edf)|xyz`); 68*b1e83836Smrg auto testRT = regex(`abc|(edf)|xyz`); 69*b1e83836Smrg assert(equal(testCT.ir,testRT.ir)); 70*b1e83836Smrg } 71*b1e83836Smrg 72*b1e83836Smrg @safe unittest 73*b1e83836Smrg { 74*b1e83836Smrg import std.algorithm.comparison : equal; 75*b1e83836Smrg import std.algorithm.iteration : map; 76*b1e83836Smrg enum cx = ctRegex!"(A|B|C)"; 77*b1e83836Smrg auto mx = match("B",cx); 78*b1e83836Smrg assert(mx); 79*b1e83836Smrg assert(equal(mx.captures, [ "B", "B"])); 80*b1e83836Smrg enum cx2 = ctRegex!"(A|B)*"; 81*b1e83836Smrg assert(match("BAAA",cx2)); 82*b1e83836Smrg 83*b1e83836Smrg enum cx3 = ctRegex!("a{3,4}","i"); 84*b1e83836Smrg auto mx3 = match("AaA",cx3); 85*b1e83836Smrg assert(mx3); 86*b1e83836Smrg assert(mx3.captures[0] == "AaA"); 87*b1e83836Smrg enum cx4 = ctRegex!(`^a{3,4}?[a-zA-Z0-9~]{1,2}`,"i"); 88*b1e83836Smrg auto mx4 = match("aaaabc", cx4); 89*b1e83836Smrg assert(mx4); 90*b1e83836Smrg assert(mx4.captures[0] == "aaaab"); 91*b1e83836Smrg auto cr8 = ctRegex!("(a)(b)?(c*)"); 92*b1e83836Smrg auto m8 = bmatch("abcc",cr8); 93*b1e83836Smrg assert(m8); 94*b1e83836Smrg assert(m8.captures[1] == "a"); 95*b1e83836Smrg assert(m8.captures[2] == "b"); 96*b1e83836Smrg assert(m8.captures[3] == "cc"); 97*b1e83836Smrg auto cr9 = ctRegex!(".*$", "gm"); 98*b1e83836Smrg auto m9 = match("First\rSecond", cr9); 99*b1e83836Smrg assert(m9); 100*b1e83836Smrg assert(equal(map!"a.hit"(m9), ["First", "", "Second"])); 101*b1e83836Smrg } 102*b1e83836Smrg 103*b1e83836Smrg @safe unittest 104*b1e83836Smrg { 105*b1e83836Smrg import std.algorithm.comparison : equal; 106*b1e83836Smrg import std.algorithm.iteration : map; 107*b1e83836Smrg //global matching test_body(alias matchFn)108*b1e83836Smrg void test_body(alias matchFn)() 109*b1e83836Smrg { 110*b1e83836Smrg string s = "a quick brown fox jumps over a lazy dog"; 111*b1e83836Smrg auto r1 = regex("\\b[a-z]+\\b","g"); 112*b1e83836Smrg string[] test; 113*b1e83836Smrg foreach (m; matchFn(s, r1)) 114*b1e83836Smrg test ~= m.hit; 115*b1e83836Smrg assert(equal(test, [ "a", "quick", "brown", "fox", "jumps", "over", "a", "lazy", "dog"])); 116*b1e83836Smrg auto free_reg = regex(` 117*b1e83836Smrg 118*b1e83836Smrg abc 119*b1e83836Smrg \s+ 120*b1e83836Smrg " 121*b1e83836Smrg ( 122*b1e83836Smrg [^"]+ 123*b1e83836Smrg | \\ " 124*b1e83836Smrg )+ 125*b1e83836Smrg " 126*b1e83836Smrg z 127*b1e83836Smrg `, "x"); 128*b1e83836Smrg auto m = match(`abc "quoted string with \" inside"z`,free_reg); 129*b1e83836Smrg assert(m); 130*b1e83836Smrg string mails = " hey@you.com no@spam.net "; 131*b1e83836Smrg auto rm = regex(`@(?<=\S+@)\S+`,"g"); 132*b1e83836Smrg assert(equal(map!"a[0]"(matchFn(mails, rm)), ["@you.com", "@spam.net"])); 133*b1e83836Smrg auto m2 = matchFn("First line\nSecond line",regex(".*$","gm")); 134*b1e83836Smrg assert(equal(map!"a[0]"(m2), ["First line", "", "Second line"])); 135*b1e83836Smrg auto m2a = matchFn("First line\nSecond line",regex(".+$","gm")); 136*b1e83836Smrg assert(equal(map!"a[0]"(m2a), ["First line", "Second line"])); 137*b1e83836Smrg auto m2b = matchFn("First line\nSecond line",regex(".+?$","gm")); 138*b1e83836Smrg assert(equal(map!"a[0]"(m2b), ["First line", "Second line"])); 139*b1e83836Smrg debug(std_regex_test) writeln("!!! FReD FLAGS test done "~matchFn.stringof~" !!!"); 140*b1e83836Smrg } 141*b1e83836Smrg test_body!bmatch(); 142*b1e83836Smrg test_body!match(); 143*b1e83836Smrg } 144*b1e83836Smrg 145*b1e83836Smrg //tests for accumulated std.regex issues and other regressions 146*b1e83836Smrg @safe unittest 147*b1e83836Smrg { 148*b1e83836Smrg import std.algorithm.comparison : equal; 149*b1e83836Smrg import std.algorithm.iteration : map; 150*b1e83836Smrg void test_body(alias matchFn)() 151*b1e83836Smrg { 152*b1e83836Smrg // https://issues.dlang.org/show_bug.cgi?id=5857 153*b1e83836Smrg //matching goes out of control if ... in (...){x} has .*/.+ 154*b1e83836Smrg auto c = matchFn("axxxzayyyyyzd",regex("(a.*z){2}d")).captures; 155*b1e83836Smrg assert(c[0] == "axxxzayyyyyzd"); 156*b1e83836Smrg assert(c[1] == "ayyyyyz"); 157*b1e83836Smrg auto c2 = matchFn("axxxayyyyyd",regex("(a.*){2}d")).captures; 158*b1e83836Smrg assert(c2[0] == "axxxayyyyyd"); 159*b1e83836Smrg assert(c2[1] == "ayyyyy"); 160*b1e83836Smrg // https://issues.dlang.org/show_bug.cgi?id=2108 161*b1e83836Smrg //greedy vs non-greedy 162*b1e83836Smrg auto nogreed = regex("<packet.*?/packet>"); 163*b1e83836Smrg assert(matchFn("<packet>text</packet><packet>text</packet>", nogreed).hit 164*b1e83836Smrg == "<packet>text</packet>"); 165*b1e83836Smrg auto greed = regex("<packet.*/packet>"); 166*b1e83836Smrg assert(matchFn("<packet>text</packet><packet>text</packet>", greed).hit 167*b1e83836Smrg == "<packet>text</packet><packet>text</packet>"); 168*b1e83836Smrg // https://issues.dlang.org/show_bug.cgi?id=4574 169*b1e83836Smrg //empty successful match still advances the input 170*b1e83836Smrg string[] pres, posts, hits; 171*b1e83836Smrg foreach (m; matchFn("abcabc", regex("","g"))) 172*b1e83836Smrg { 173*b1e83836Smrg pres ~= m.pre; 174*b1e83836Smrg posts ~= m.post; 175*b1e83836Smrg assert(m.hit.empty); 176*b1e83836Smrg 177*b1e83836Smrg } 178*b1e83836Smrg auto heads = [ 179*b1e83836Smrg "abcabc", 180*b1e83836Smrg "abcab", 181*b1e83836Smrg "abca", 182*b1e83836Smrg "abc", 183*b1e83836Smrg "ab", 184*b1e83836Smrg "a", 185*b1e83836Smrg "" 186*b1e83836Smrg ]; 187*b1e83836Smrg auto tails = [ 188*b1e83836Smrg "abcabc", 189*b1e83836Smrg "bcabc", 190*b1e83836Smrg "cabc", 191*b1e83836Smrg "abc", 192*b1e83836Smrg "bc", 193*b1e83836Smrg "c", 194*b1e83836Smrg "" 195*b1e83836Smrg ]; 196*b1e83836Smrg assert(pres == array(retro(heads))); 197*b1e83836Smrg assert(posts == tails); 198*b1e83836Smrg // https://issues.dlang.org/show_bug.cgi?id=6076 199*b1e83836Smrg //regression on .* 200*b1e83836Smrg auto re = regex("c.*|d"); 201*b1e83836Smrg auto m = matchFn("mm", re); 202*b1e83836Smrg assert(!m); 203*b1e83836Smrg debug(std_regex_test) writeln("!!! FReD REGRESSION test done "~matchFn.stringof~" !!!"); 204*b1e83836Smrg auto rprealloc = regex(`((.){5}.{1,10}){5}`); 205*b1e83836Smrg auto arr = array(repeat('0',100)); 206*b1e83836Smrg auto m2 = matchFn(arr, rprealloc); 207*b1e83836Smrg assert(m2); 208*b1e83836Smrg assert(collectException( 209*b1e83836Smrg regex(r"^(import|file|binary|config)\s+([^\(]+)\(?([^\)]*)\)?\s*$") 210*b1e83836Smrg ) is null); 211*b1e83836Smrg foreach (ch; [Escapables]) 212*b1e83836Smrg { 213*b1e83836Smrg assert(match(to!string(ch),regex(`[\`~ch~`]`))); 214*b1e83836Smrg assert(!match(to!string(ch),regex(`[^\`~ch~`]`))); 215*b1e83836Smrg assert(match(to!string(ch),regex(`[\`~ch~`-\`~ch~`]`))); 216*b1e83836Smrg } 217*b1e83836Smrg // https://issues.dlang.org/show_bug.cgi?id=7718 218*b1e83836Smrg string strcmd = "./myApp.rb -os OSX -path \"/GIT/Ruby Apps/sec\" -conf 'notimer'"; 219*b1e83836Smrg auto reStrCmd = regex (`(".*")|('.*')`, "g"); 220*b1e83836Smrg assert(equal(map!"a[0]"(matchFn(strcmd, reStrCmd)), 221*b1e83836Smrg [`"/GIT/Ruby Apps/sec"`, `'notimer'`])); 222*b1e83836Smrg } 223*b1e83836Smrg test_body!bmatch(); 224*b1e83836Smrg test_body!match(); 225*b1e83836Smrg } 226*b1e83836Smrg 227*b1e83836Smrg // tests for replace 228*b1e83836Smrg @safe unittest 229*b1e83836Smrg { 230*b1e83836Smrg void test(alias matchFn)() 231*b1e83836Smrg { 232*b1e83836Smrg import std.uni : toUpper; 233*b1e83836Smrg 234*b1e83836Smrg static foreach (i, v; AliasSeq!(string, wstring, dstring)) 235*b1e83836Smrg {{ 236*b1e83836Smrg auto baz(Cap)(Cap m) 237*b1e83836Smrg if (is(Cap == Captures!(Cap.String))) 238*b1e83836Smrg { 239*b1e83836Smrg return toUpper(m.hit); 240*b1e83836Smrg } 241*b1e83836Smrg alias String = v; 242*b1e83836Smrg assert(std.regex.replace!(matchFn)(to!String("ark rapacity"), regex(to!String("r")), to!String("c")) 243*b1e83836Smrg == to!String("ack rapacity")); 244*b1e83836Smrg assert(std.regex.replace!(matchFn)(to!String("ark rapacity"), regex(to!String("r"), "g"), to!String("c")) 245*b1e83836Smrg == to!String("ack capacity")); 246*b1e83836Smrg assert(std.regex.replace!(matchFn)(to!String("noon"), regex(to!String("^n")), to!String("[$&]")) 247*b1e83836Smrg == to!String("[n]oon")); 248*b1e83836Smrg assert(std.regex.replace!(matchFn)( 249*b1e83836Smrg to!String("test1 test2"), regex(to!String(`\w+`),"g"), to!String("$`:$'") 250*b1e83836Smrg ) == to!String(": test2 test1 :")); 251*b1e83836Smrg auto s = std.regex.replace!(baz!(Captures!(String)))(to!String("Strap a rocket engine on a chicken."), 252*b1e83836Smrg regex(to!String("[ar]"), "g")); 253*b1e83836Smrg assert(s == "StRAp A Rocket engine on A chicken."); 254*b1e83836Smrg }} 255*b1e83836Smrg debug(std_regex_test) writeln("!!! Replace test done "~matchFn.stringof~" !!!"); 256*b1e83836Smrg } 257*b1e83836Smrg test!(bmatch)(); 258*b1e83836Smrg test!(match)(); 259*b1e83836Smrg } 260*b1e83836Smrg 261*b1e83836Smrg // tests for splitter 262*b1e83836Smrg @safe unittest 263*b1e83836Smrg { 264*b1e83836Smrg import std.algorithm.comparison : equal; 265*b1e83836Smrg auto s1 = ", abc, de, fg, hi, "; 266*b1e83836Smrg auto sp1 = splitter(s1, regex(", *")); 267*b1e83836Smrg auto w1 = ["", "abc", "de", "fg", "hi", ""]; 268*b1e83836Smrg assert(equal(sp1, w1)); 269*b1e83836Smrg 270*b1e83836Smrg auto s2 = ", abc, de, fg, hi"; 271*b1e83836Smrg auto sp2 = splitter(s2, regex(", *")); 272*b1e83836Smrg auto w2 = ["", "abc", "de", "fg", "hi"]; 273*b1e83836Smrg 274*b1e83836Smrg uint cnt; 275*b1e83836Smrg foreach (e; sp2) 276*b1e83836Smrg { 277*b1e83836Smrg assert(w2[cnt++] == e); 278*b1e83836Smrg } 279*b1e83836Smrg assert(equal(sp2, w2)); 280*b1e83836Smrg } 281*b1e83836Smrg 282*b1e83836Smrg @safe unittest 283*b1e83836Smrg { 284*b1e83836Smrg char[] s1 = ", abc, de, fg, hi, ".dup; 285*b1e83836Smrg auto sp2 = splitter(s1, regex(", *")); 286*b1e83836Smrg } 287*b1e83836Smrg 288*b1e83836Smrg @safe unittest 289*b1e83836Smrg { 290*b1e83836Smrg import std.algorithm.comparison : equal; 291*b1e83836Smrg auto s1 = ", abc, de, fg, hi, "; 292*b1e83836Smrg auto w1 = ["", "abc", "de", "fg", "hi", ""]; 293*b1e83836Smrg assert(equal(split(s1, regex(", *")), w1[])); 294*b1e83836Smrg } 295*b1e83836Smrg 296*b1e83836Smrg // https://issues.dlang.org/show_bug.cgi?id=7141 297*b1e83836Smrg @safe unittest 298*b1e83836Smrg { 299*b1e83836Smrg string pattern = `[a\--b]`; 300*b1e83836Smrg assert(match("-", pattern)); 301*b1e83836Smrg assert(match("b", pattern)); 302*b1e83836Smrg string pattern2 = `[&-z]`; 303*b1e83836Smrg assert(match("b", pattern2)); 304*b1e83836Smrg } 305*b1e83836Smrg 306*b1e83836Smrg // https://issues.dlang.org/show_bug.cgi?id=7111 307*b1e83836Smrg @safe unittest 308*b1e83836Smrg { 309*b1e83836Smrg assert(match("", regex("^"))); 310*b1e83836Smrg } 311*b1e83836Smrg 312*b1e83836Smrg // https://issues.dlang.org/show_bug.cgi?id=7300 313*b1e83836Smrg @safe unittest 314*b1e83836Smrg { 315*b1e83836Smrg assert(!match("a"d, "aa"d)); 316*b1e83836Smrg } 317*b1e83836Smrg 318*b1e83836Smrg // https://issues.dlang.org/show_bug.cgi?id=7551 319*b1e83836Smrg @safe unittest 320*b1e83836Smrg { 321*b1e83836Smrg auto r = regex("[]abc]*"); 322*b1e83836Smrg assert("]ab".matchFirst(r).hit == "]ab"); 323*b1e83836Smrg assertThrown(regex("[]")); 324*b1e83836Smrg auto r2 = regex("[]abc--ab]*"); 325*b1e83836Smrg assert("]ac".matchFirst(r2).hit == "]"); 326*b1e83836Smrg } 327*b1e83836Smrg 328*b1e83836Smrg // https://issues.dlang.org/show_bug.cgi?id=7674 329*b1e83836Smrg @safe unittest 330*b1e83836Smrg { 331*b1e83836Smrg assert("1234".replace(regex("^"), "$$") == "$1234"); 332*b1e83836Smrg assert("hello?".replace(regex(r"\?", "g"), r"\?") == r"hello\?"); 333*b1e83836Smrg assert("hello?".replace(regex(r"\?", "g"), r"\\?") != r"hello\?"); 334*b1e83836Smrg } 335*b1e83836Smrg 336*b1e83836Smrg // https://issues.dlang.org/show_bug.cgi?id=7679 337*b1e83836Smrg @safe unittest 338*b1e83836Smrg { 339*b1e83836Smrg import std.algorithm.comparison : equal; 340*b1e83836Smrg static foreach (S; AliasSeq!(string, wstring, dstring)) 341*b1e83836Smrg {{ 342*b1e83836Smrg enum re = ctRegex!(to!S(r"\.")); 343*b1e83836Smrg auto str = to!S("a.b"); 344*b1e83836Smrg assert(equal(std.regex.splitter(str, re), [to!S("a"), to!S("b")])); 345*b1e83836Smrg assert(split(str, re) == [to!S("a"), to!S("b")]); 346*b1e83836Smrg }} 347*b1e83836Smrg } 348*b1e83836Smrg 349*b1e83836Smrg // https://issues.dlang.org/show_bug.cgi?id=8203 350*b1e83836Smrg @safe unittest 351*b1e83836Smrg { 352*b1e83836Smrg string data = " 353*b1e83836Smrg NAME = XPAW01_STA:STATION 354*b1e83836Smrg NAME = XPAW01_STA 355*b1e83836Smrg "; 356*b1e83836Smrg auto uniFileOld = data; 357*b1e83836Smrg auto r = regex( 358*b1e83836Smrg r"^NAME = (?P<comp>[a-zA-Z0-9_]+):*(?P<blk>[a-zA-Z0-9_]*)","gm"); 359*b1e83836Smrg auto uniCapturesNew = match(uniFileOld, r); 360*b1e83836Smrg for (int i = 0; i < 20; i++) 361*b1e83836Smrg foreach (matchNew; uniCapturesNew) {} 362*b1e83836Smrg //a second issue with same symptoms 363*b1e83836Smrg auto r2 = regex(`([а-яА-Я\-_]+\s*)+(?<=[\s\.,\^])`); 364*b1e83836Smrg match("аллея Театральная", r2); 365*b1e83836Smrg } 366*b1e83836Smrg 367*b1e83836Smrg // https://issues.dlang.org/show_bug.cgi?id=8637 purity of enforce 368*b1e83836Smrg @safe unittest 369*b1e83836Smrg { 370*b1e83836Smrg auto m = match("hello world", regex("world")); 371*b1e83836Smrg enforce(m); 372*b1e83836Smrg } 373*b1e83836Smrg 374*b1e83836Smrg // https://issues.dlang.org/show_bug.cgi?id=8725 375*b1e83836Smrg @safe unittest 376*b1e83836Smrg { 377*b1e83836Smrg static italic = regex( r"\* 378*b1e83836Smrg (?!\s+) 379*b1e83836Smrg (.*?) 380*b1e83836Smrg (?!\s+) 381*b1e83836Smrg \*", "gx" ); 382*b1e83836Smrg string input = "this * is* interesting, *very* interesting"; 383*b1e83836Smrg assert(replace(input, italic, "<i>$1</i>") == 384*b1e83836Smrg "this * is* interesting, <i>very</i> interesting"); 385*b1e83836Smrg } 386*b1e83836Smrg 387*b1e83836Smrg // https://issues.dlang.org/show_bug.cgi?id=8349 388*b1e83836Smrg @safe unittest 389*b1e83836Smrg { 390*b1e83836Smrg enum peakRegexStr = r"\>(wgEncode.*Tfbs.*\.(?:narrow)|(?:broad)Peak.gz)</a>"; 391*b1e83836Smrg enum peakRegex = ctRegex!(peakRegexStr); 392*b1e83836Smrg //note that the regex pattern itself is probably bogus 393*b1e83836Smrg assert(match(r"\>wgEncode-blah-Tfbs.narrow</a>", peakRegex)); 394*b1e83836Smrg } 395*b1e83836Smrg 396*b1e83836Smrg // https://issues.dlang.org/show_bug.cgi?id=9211 397*b1e83836Smrg @safe unittest 398*b1e83836Smrg { 399*b1e83836Smrg import std.algorithm.comparison : equal; 400*b1e83836Smrg auto rx_1 = regex(r"^(\w)*(\d)"); 401*b1e83836Smrg auto m = match("1234", rx_1); 402*b1e83836Smrg assert(equal(m.front, ["1234", "3", "4"])); 403*b1e83836Smrg auto rx_2 = regex(r"^([0-9])*(\d)"); 404*b1e83836Smrg auto m2 = match("1234", rx_2); 405*b1e83836Smrg assert(equal(m2.front, ["1234", "3", "4"])); 406*b1e83836Smrg } 407*b1e83836Smrg 408*b1e83836Smrg // https://issues.dlang.org/show_bug.cgi?id=9280 409*b1e83836Smrg @safe unittest 410*b1e83836Smrg { 411*b1e83836Smrg string tomatch = "a!b@c"; 412*b1e83836Smrg static r = regex(r"^(?P<nick>.*?)!(?P<ident>.*?)@(?P<host>.*?)$"); 413*b1e83836Smrg auto nm = match(tomatch, r); 414*b1e83836Smrg assert(nm); 415*b1e83836Smrg auto c = nm.captures; 416*b1e83836Smrg assert(c[1] == "a"); 417*b1e83836Smrg assert(c["nick"] == "a"); 418*b1e83836Smrg } 419*b1e83836Smrg 420*b1e83836Smrg 421*b1e83836Smrg // https://issues.dlang.org/show_bug.cgi?id=9579 422*b1e83836Smrg @safe unittest 423*b1e83836Smrg { 424*b1e83836Smrg char[] input = ['a', 'b', 'c']; 425*b1e83836Smrg string format = "($1)"; 426*b1e83836Smrg // used to give a compile error: 427*b1e83836Smrg auto re = regex(`(a)`, "g"); 428*b1e83836Smrg auto r = replace(input, re, format); 429*b1e83836Smrg assert(r == "(a)bc"); 430*b1e83836Smrg } 431*b1e83836Smrg 432*b1e83836Smrg // https://issues.dlang.org/show_bug.cgi?id=9634 433*b1e83836Smrg @safe unittest 434*b1e83836Smrg { 435*b1e83836Smrg auto re = ctRegex!"(?:a+)"; 436*b1e83836Smrg assert(match("aaaa", re).hit == "aaaa"); 437*b1e83836Smrg } 438*b1e83836Smrg 439*b1e83836Smrg // https://issues.dlang.org/show_bug.cgi?id=10798 440*b1e83836Smrg @safe unittest 441*b1e83836Smrg { 442*b1e83836Smrg auto cr = ctRegex!("[abcd--c]*"); 443*b1e83836Smrg auto m = "abc".match(cr); 444*b1e83836Smrg assert(m); 445*b1e83836Smrg assert(m.hit == "ab"); 446*b1e83836Smrg } 447*b1e83836Smrg 448*b1e83836Smrg // https://issues.dlang.org/show_bug.cgi?id=10913 449*b1e83836Smrg @system unittest 450*b1e83836Smrg { 451*b1e83836Smrg @system static string foo(const(char)[] s) 452*b1e83836Smrg { 453*b1e83836Smrg return s.dup; 454*b1e83836Smrg } 455*b1e83836Smrg @safe static string bar(const(char)[] s) 456*b1e83836Smrg { 457*b1e83836Smrg return s.dup; 458*b1e83836Smrg } 459*b1e83836Smrg () @system { 460*b1e83836Smrg replace!((a) => foo(a.hit))("blah", regex(`a`)); 461*b1e83836Smrg }(); 462*b1e83836Smrg () @safe { 463*b1e83836Smrg replace!((a) => bar(a.hit))("blah", regex(`a`)); 464*b1e83836Smrg }(); 465*b1e83836Smrg } 466*b1e83836Smrg 467*b1e83836Smrg // https://issues.dlang.org/show_bug.cgi?id=11262 468*b1e83836Smrg @safe unittest 469*b1e83836Smrg { 470*b1e83836Smrg enum reg = ctRegex!(r",", "g"); 471*b1e83836Smrg auto str = "This,List"; 472*b1e83836Smrg str = str.replace(reg, "-"); 473*b1e83836Smrg assert(str == "This-List"); 474*b1e83836Smrg } 475*b1e83836Smrg 476*b1e83836Smrg // https://issues.dlang.org/show_bug.cgi?id=11775 477*b1e83836Smrg @safe unittest 478*b1e83836Smrg { 479*b1e83836Smrg assert(collectException(regex("a{1,0}"))); 480*b1e83836Smrg } 481*b1e83836Smrg 482*b1e83836Smrg // https://issues.dlang.org/show_bug.cgi?id=11839 483*b1e83836Smrg @safe unittest 484*b1e83836Smrg { 485*b1e83836Smrg import std.algorithm.comparison : equal; 486*b1e83836Smrg assert(regex(`(?P<var1>\w+)`).namedCaptures.equal(["var1"])); 487*b1e83836Smrg assert(collectException(regex(`(?P<1>\w+)`))); 488*b1e83836Smrg assert(regex(`(?P<v1>\w+)`).namedCaptures.equal(["v1"])); 489*b1e83836Smrg assert(regex(`(?P<__>\w+)`).namedCaptures.equal(["__"])); 490*b1e83836Smrg assert(regex(`(?P<я>\w+)`).namedCaptures.equal(["я"])); 491*b1e83836Smrg } 492*b1e83836Smrg 493*b1e83836Smrg // https://issues.dlang.org/show_bug.cgi?id=12076 494*b1e83836Smrg @safe unittest 495*b1e83836Smrg { 496*b1e83836Smrg auto RE = ctRegex!(r"(?<!x[a-z]+)\s([a-z]+)"); 497*b1e83836Smrg string s = "one two"; 498*b1e83836Smrg auto m = match(s, RE); 499*b1e83836Smrg } 500*b1e83836Smrg 501*b1e83836Smrg // https://issues.dlang.org/show_bug.cgi?id=12105 502*b1e83836Smrg @safe unittest 503*b1e83836Smrg { 504*b1e83836Smrg auto r = ctRegex!`.*?(?!a)`; 505*b1e83836Smrg assert("aaab".matchFirst(r).hit == "aaa"); 506*b1e83836Smrg auto r2 = ctRegex!`.*(?!a)`; 507*b1e83836Smrg assert("aaab".matchFirst(r2).hit == "aaab"); 508*b1e83836Smrg } 509*b1e83836Smrg 510*b1e83836Smrg // https://issues.dlang.org/show_bug.cgi?id=11784 511*b1e83836Smrg @safe unittest 512*b1e83836Smrg { 513*b1e83836Smrg assert("abcdefghijklmnopqrstuvwxyz" 514*b1e83836Smrg .matchFirst("[a-z&&[^aeiuo]]").hit == "b"); 515*b1e83836Smrg } 516*b1e83836Smrg 517*b1e83836Smrg // https://issues.dlang.org/show_bug.cgi?id=12366 518*b1e83836Smrg @safe unittest 519*b1e83836Smrg { 520*b1e83836Smrg auto re = ctRegex!(`^((?=(xx+?)\2+$)((?=\2+$)(?=(x+)(\4+$))\5){2})*x?$`); 521*b1e83836Smrg assert("xxxxxxxx".match(re).empty); 522*b1e83836Smrg assert(!"xxxx".match(re).empty); 523*b1e83836Smrg } 524*b1e83836Smrg 525*b1e83836Smrg // https://issues.dlang.org/show_bug.cgi?id=12582 526*b1e83836Smrg @safe unittest 527*b1e83836Smrg { 528*b1e83836Smrg auto r = regex(`(?P<a>abc)`); 529*b1e83836Smrg assert(collectException("abc".matchFirst(r)["b"])); 530*b1e83836Smrg } 531*b1e83836Smrg 532*b1e83836Smrg // https://issues.dlang.org/show_bug.cgi?id=12691 533*b1e83836Smrg @safe unittest 534*b1e83836Smrg { 535*b1e83836Smrg assert(bmatch("e@", "^([a-z]|)*$").empty); 536*b1e83836Smrg assert(bmatch("e@", ctRegex!`^([a-z]|)*$`).empty); 537*b1e83836Smrg } 538*b1e83836Smrg 539*b1e83836Smrg // https://issues.dlang.org/show_bug.cgi?id=12713 540*b1e83836Smrg @safe unittest 541*b1e83836Smrg { 542*b1e83836Smrg assertThrown(regex("[[a-z]([a-z]|(([[a-z])))")); 543*b1e83836Smrg } 544*b1e83836Smrg 545*b1e83836Smrg // https://issues.dlang.org/show_bug.cgi?id=12747 546*b1e83836Smrg @safe unittest 547*b1e83836Smrg { 548*b1e83836Smrg assertThrown(regex(`^x(\1)`)); 549*b1e83836Smrg assertThrown(regex(`^(x(\1))`)); 550*b1e83836Smrg assertThrown(regex(`^((x)(?=\1))`)); 551*b1e83836Smrg } 552*b1e83836Smrg 553*b1e83836Smrg // https://issues.dlang.org/show_bug.cgi?id=13532 554*b1e83836Smrg version (none) // TODO: revist once we have proper benchmark framework 555*b1e83836Smrg @safe unittest 556*b1e83836Smrg { 557*b1e83836Smrg import std.datetime.stopwatch : StopWatch, AutoStart; 558*b1e83836Smrg import std.math.algebraic : abs; 559*b1e83836Smrg import std.conv : to; 560*b1e83836Smrg enum re1 = ctRegex!`[0-9][0-9]`; 561*b1e83836Smrg immutable static re2 = ctRegex!`[0-9][0-9]`; 562*b1e83836Smrg immutable iterations = 1_000_000; 563*b1e83836Smrg size_t result1 = 0, result2 = 0; 564*b1e83836Smrg auto sw = StopWatch(AutoStart.yes); 565*b1e83836Smrg foreach (_; 0 .. iterations) 566*b1e83836Smrg { 567*b1e83836Smrg result1 += matchFirst("12345678", re1).length; 568*b1e83836Smrg } 569*b1e83836Smrg const staticTime = sw.peek(); 570*b1e83836Smrg sw.reset(); 571*b1e83836Smrg foreach (_; 0 .. iterations) 572*b1e83836Smrg { 573*b1e83836Smrg result2 += matchFirst("12345678", re2).length; 574*b1e83836Smrg } 575*b1e83836Smrg const enumTime = sw.peek(); 576*b1e83836Smrg assert(result1 == result2); 577*b1e83836Smrg auto ratio = 1.0 * enumTime.total!"usecs" / staticTime.total!"usecs"; 578*b1e83836Smrg // enum is faster or the diff is less < 30% 579*b1e83836Smrg assert(ratio < 1.0 || abs(ratio - 1.0) < 0.75, 580*b1e83836Smrg "enum regex to static regex ratio "~to!string(ratio)); 581*b1e83836Smrg } 582*b1e83836Smrg 583*b1e83836Smrg // https://issues.dlang.org/show_bug.cgi?id=14504 584*b1e83836Smrg @safe unittest 585*b1e83836Smrg { 586*b1e83836Smrg auto p = ctRegex!("a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?" ~ 587*b1e83836Smrg "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"); 588*b1e83836Smrg } 589*b1e83836Smrg 590*b1e83836Smrg // https://issues.dlang.org/show_bug.cgi?id=14529 591*b1e83836Smrg @safe unittest 592*b1e83836Smrg { 593*b1e83836Smrg auto ctPat2 = regex(r"^[CDF]$", "i"); 594*b1e83836Smrg foreach (v; ["C", "c", "D", "d", "F", "f"]) 595*b1e83836Smrg assert(matchAll(v, ctPat2).front.hit == v); 596*b1e83836Smrg } 597*b1e83836Smrg 598*b1e83836Smrg // https://issues.dlang.org/show_bug.cgi?id=14615 599*b1e83836Smrg @safe unittest 600*b1e83836Smrg { 601*b1e83836Smrg import std.array : appender; 602*b1e83836Smrg import std.regex : replaceFirst, replaceFirstInto, regex; 603*b1e83836Smrg import std.stdio : writeln; 604*b1e83836Smrg 605*b1e83836Smrg auto example = "Hello, world!"; 606*b1e83836Smrg auto pattern = regex("^Hello, (bug)"); // won't find this one 607*b1e83836Smrg auto result = replaceFirst(example, pattern, "$1 Sponge Bob"); 608*b1e83836Smrg assert(result == "Hello, world!"); // Ok. 609*b1e83836Smrg 610*b1e83836Smrg auto sink = appender!string; 611*b1e83836Smrg replaceFirstInto(sink, example, pattern, "$1 Sponge Bob"); 612*b1e83836Smrg assert(sink.data == "Hello, world!"); 613*b1e83836Smrg replaceAllInto(sink, example, pattern, "$1 Sponge Bob"); 614*b1e83836Smrg assert(sink.data == "Hello, world!Hello, world!"); 615*b1e83836Smrg } 616*b1e83836Smrg 617*b1e83836Smrg // https://issues.dlang.org/show_bug.cgi?id=15573 618*b1e83836Smrg @safe unittest 619*b1e83836Smrg { 620*b1e83836Smrg auto rx = regex("[c d]", "x"); 621*b1e83836Smrg assert("a b".matchFirst(rx)); 622*b1e83836Smrg } 623*b1e83836Smrg 624*b1e83836Smrg // https://issues.dlang.org/show_bug.cgi?id=15864 625*b1e83836Smrg @safe unittest 626*b1e83836Smrg { 627*b1e83836Smrg regex(`(<a (?:(?:\w+=\"[^"]*\")?\s*)*href="\.\.?)"`); 628*b1e83836Smrg } 629*b1e83836Smrg 630*b1e83836Smrg @safe unittest 631*b1e83836Smrg { 632*b1e83836Smrg auto r = regex("(?# comment)abc(?# comment2)"); 633*b1e83836Smrg assert("abc".matchFirst(r)); 634*b1e83836Smrg assertThrown(regex("(?#...")); 635*b1e83836Smrg } 636*b1e83836Smrg 637*b1e83836Smrg // https://issues.dlang.org/show_bug.cgi?id=17075 638*b1e83836Smrg @safe unittest 639*b1e83836Smrg { 640*b1e83836Smrg enum titlePattern = `<title>(.+)</title>`; 641*b1e83836Smrg static titleRegex = ctRegex!titlePattern; 642*b1e83836Smrg string input = "<title>" ~ "<".repeat(100_000).join; 643*b1e83836Smrg assert(input.matchFirst(titleRegex).empty); 644*b1e83836Smrg } 645*b1e83836Smrg 646*b1e83836Smrg // https://issues.dlang.org/show_bug.cgi?id=17212 647*b1e83836Smrg @safe unittest 648*b1e83836Smrg { 649*b1e83836Smrg auto r = regex(" [a] ", "x"); 650*b1e83836Smrg assert("a".matchFirst(r)); 651*b1e83836Smrg } 652*b1e83836Smrg 653*b1e83836Smrg // https://issues.dlang.org/show_bug.cgi?id=17157 654*b1e83836Smrg @safe unittest 655*b1e83836Smrg { 656*b1e83836Smrg import std.algorithm.comparison : equal; 657*b1e83836Smrg auto ctr = ctRegex!"(a)|(b)|(c)|(d)"; 658*b1e83836Smrg auto r = regex("(a)|(b)|(c)|(d)", "g"); 659*b1e83836Smrg auto s = "--a--b--c--d--"; 660*b1e83836Smrg auto outcomes = [ 661*b1e83836Smrg ["a", "a", "", "", ""], 662*b1e83836Smrg ["b", "", "b", "", ""], 663*b1e83836Smrg ["c", "", "", "c", ""], 664*b1e83836Smrg ["d", "", "", "", "d"] 665*b1e83836Smrg ]; 666*b1e83836Smrg assert(equal!equal(s.matchAll(ctr), outcomes)); 667*b1e83836Smrg assert(equal!equal(s.bmatch(r), outcomes)); 668*b1e83836Smrg } 669*b1e83836Smrg 670*b1e83836Smrg // https://issues.dlang.org/show_bug.cgi?id=17667 671*b1e83836Smrg @safe unittest 672*b1e83836Smrg { 673*b1e83836Smrg import std.algorithm.searching : canFind; 674*b1e83836Smrg void willThrow(T, size_t line = __LINE__)(T arg, string msg) 675*b1e83836Smrg { 676*b1e83836Smrg auto e = collectException(regex(arg)); 677*b1e83836Smrg assert(e.msg.canFind(msg), to!string(line) ~ ": " ~ e.msg); 678*b1e83836Smrg } 679*b1e83836Smrg willThrow([r".", r"[\(\{[\]\}\)]"], "no matching ']' found while parsing character class"); 680*b1e83836Smrg willThrow([r"[\", r"123"], "no matching ']' found while parsing character class"); 681*b1e83836Smrg willThrow([r"[a-", r"123"], "no matching ']' found while parsing character class"); 682*b1e83836Smrg willThrow([r"[a-\", r"123"], "no matching ']' found while parsing character class"); 683*b1e83836Smrg willThrow([r"\", r"123"], "invalid escape sequence"); 684*b1e83836Smrg } 685*b1e83836Smrg 686*b1e83836Smrg // https://issues.dlang.org/show_bug.cgi?id=17668 687*b1e83836Smrg @safe unittest 688*b1e83836Smrg { 689*b1e83836Smrg import std.algorithm.searching; 690*b1e83836Smrg auto e = collectException!RegexException(regex(q"<[^]>")); 691*b1e83836Smrg assert(e.msg.canFind("no operand for '^'"), e.msg); 692*b1e83836Smrg } 693*b1e83836Smrg 694*b1e83836Smrg // https://issues.dlang.org/show_bug.cgi?id=17673 695*b1e83836Smrg @safe unittest 696*b1e83836Smrg { 697*b1e83836Smrg string str = `<">`; 698*b1e83836Smrg string[] regexps = ["abc", "\"|x"]; 699*b1e83836Smrg auto regexp = regex(regexps); 700*b1e83836Smrg auto c = matchFirst(str, regexp); 701*b1e83836Smrg assert(c); 702*b1e83836Smrg assert(c.whichPattern == 2); 703*b1e83836Smrg } 704*b1e83836Smrg 705*b1e83836Smrg // https://issues.dlang.org/show_bug.cgi?id=18692 706*b1e83836Smrg @safe unittest 707*b1e83836Smrg { 708*b1e83836Smrg auto rx = regex("()()()"); 709*b1e83836Smrg auto ma = "".matchFirst(rx); 710*b1e83836Smrg auto ma2 = ma; 711*b1e83836Smrg ma = ma2; 712*b1e83836Smrg assert(ma[1] == ""); 713*b1e83836Smrg } 714