xref: /netbsd-src/external/gpl3/gcc/dist/libphobos/src/std/regex/package.d (revision b1e838363e3c6fc78a55519254d99869742dd33c)
1 /++
2   $(LINK2 https://en.wikipedia.org/wiki/Regular_expression, Regular expressions)
3   are a commonly used method of pattern matching
4   on strings, with $(I regex) being a catchy word for a pattern in this domain
5   specific language. Typical problems usually solved by regular expressions
6   include validation of user input and the ubiquitous find $(AMP) replace
7   in text processing utilities.
8 
9 $(SCRIPT inhibitQuickIndex = 1;)
10 $(DIVC quickindex,
11 $(BOOKTABLE,
12 $(TR $(TH Category) $(TH Functions))
13 $(TR $(TD Matching) $(TD
14         $(LREF bmatch)
15         $(LREF match)
16         $(LREF matchAll)
17         $(LREF matchFirst)
18 ))
19 $(TR $(TD Building) $(TD
20         $(LREF ctRegex)
21         $(LREF escaper)
22         $(LREF regex)
23 ))
24 $(TR $(TD Replace) $(TD
25         $(LREF replace)
26         $(LREF replaceAll)
27         $(LREF replaceAllInto)
28         $(LREF replaceFirst)
29         $(LREF replaceFirstInto)
30 ))
31 $(TR $(TD Split) $(TD
32         $(LREF split)
33         $(LREF splitter)
34 ))
35 $(TR $(TD Objects) $(TD
36         $(LREF Captures)
37         $(LREF Regex)
38         $(LREF RegexException)
39         $(LREF RegexMatch)
40         $(LREF Splitter)
41         $(LREF StaticRegex)
42 ))
43 ))
44 
45   $(SECTION Synopsis)
46   ---
47   import std.regex;
48   import std.stdio;
49   void main()
50   {
51       // Print out all possible dd/mm/yy(yy) dates found in user input.
52       auto r = regex(r"\b[0-9][0-9]?/[0-9][0-9]?/[0-9][0-9](?:[0-9][0-9])?\b");
53       foreach (line; stdin.byLine)
54       {
55         // matchAll() returns a range that can be iterated
56         // to get all subsequent matches.
57         foreach (c; matchAll(line, r))
58             writeln(c.hit);
59       }
60   }
61   ...
62 
63   // Create a static regex at compile-time, which contains fast native code.
64   auto ctr = ctRegex!(`^.*/([^/]+)/?$`);
65 
66   // It works just like a normal regex:
67   auto c2 = matchFirst("foo/bar", ctr);   // First match found here, if any
68   assert(!c2.empty);   // Be sure to check if there is a match before examining contents!
69   assert(c2[1] == "bar");   // Captures is a range of submatches: 0 = full match.
70 
71   ...
72   // multi-pattern regex
73   auto multi = regex([`\d+,\d+`, `([a-z]+):(\d+)`]);
74   auto m = "abc:43 12,34".matchAll(multi);
75   assert(m.front.whichPattern == 2);
76   assert(m.front[1] == "abc");
77   assert(m.front[2] == "43");
78   m.popFront();
79   assert(m.front.whichPattern == 1);
80   assert(m.front[1] == "12");
81   ...
82 
83   // The result of `matchAll/matchFirst` is directly testable with `if/assert/while`,
84   // e.g. test if a string consists of letters only:
85   assert(matchFirst("LettersOnly", `^\p{L}+$`));
86 
87   // And we can take advantage of the ability to define a variable in the $(LINK2 https://dlang.org/spec/statement.html#IfCondition `IfCondition`):
88   if (const auto captures = matchFirst("At l34st one digit, but maybe more...", `((\d)(\d*))`))
89   {
90       assert(captures[2] == "3");
91       assert(captures[3] == "4");
92       assert(captures[1] == "34");
93   }
94   ---
95 
96   $(SECTION Syntax and general information)
97   The general usage guideline is to keep regex complexity on the side of simplicity,
98   as its capabilities reside in purely character-level manipulation.
99   As such it's ill-suited for tasks involving higher level invariants
100   like matching an integer number $(U bounded) in an [a,b] interval.
101   Checks of this sort of are better addressed by additional post-processing.
102 
103   The basic syntax shouldn't surprise experienced users of regular expressions.
104   For an introduction to `std.regex` see a
105   $(HTTP dlang.org/regular-expression.html, short tour) of the module API
106   and its abilities.
107 
108   There are other web resources on regular expressions to help newcomers,
109   and a good $(HTTP www.regular-expressions.info, reference with tutorial)
110   can easily be found.
111 
112   This library uses a remarkably common ECMAScript syntax flavor
113   with the following extensions:
114   $(UL
115     $(LI Named subexpressions, with Python syntax. )
116     $(LI Unicode properties such as Scripts, Blocks and common binary properties e.g Alphabetic, White_Space, Hex_Digit etc.)
117     $(LI Arbitrary length and complexity lookbehind, including lookahead in lookbehind and vise-versa.)
118   )
119 
120   $(REG_START Pattern syntax )
121   $(I std.regex operates on codepoint level,
122     'character' in this table denotes a single Unicode codepoint.)
123   $(REG_TABLE
124     $(REG_TITLE Pattern element, Semantics )
125     $(REG_TITLE Atoms, Match single characters )
126     $(REG_ROW any character except [{|*+?()^$, Matches the character itself. )
127     $(REG_ROW ., In single line mode matches any character.
128       Otherwise it matches any character except '\n' and '\r'. )
129     $(REG_ROW [class], Matches a single character
130       that belongs to this character class. )
131     $(REG_ROW [^class], Matches a single character that
132       does $(U not) belong to this character class.)
133     $(REG_ROW \cC, Matches the control character corresponding to letter C)
134     $(REG_ROW \xXX, Matches a character with hexadecimal value of XX. )
135     $(REG_ROW \uXXXX, Matches a character  with hexadecimal value of XXXX. )
136     $(REG_ROW \U00YYYYYY, Matches a character with hexadecimal value of YYYYYY. )
137     $(REG_ROW \f, Matches a formfeed character. )
138     $(REG_ROW \n, Matches a linefeed character. )
139     $(REG_ROW \r, Matches a carriage return character. )
140     $(REG_ROW \t, Matches a tab character. )
141     $(REG_ROW \v, Matches a vertical tab character. )
142     $(REG_ROW \d, Matches any Unicode digit. )
143     $(REG_ROW \D, Matches any character except Unicode digits. )
144     $(REG_ROW \w, Matches any word character (note: this includes numbers).)
145     $(REG_ROW \W, Matches any non-word character.)
146     $(REG_ROW \s, Matches whitespace, same as \p{White_Space}.)
147     $(REG_ROW \S, Matches any character except those recognized as $(I \s ). )
148     $(REG_ROW \\\\, Matches \ character. )
149     $(REG_ROW \c where c is one of [|*+?(), Matches the character c itself. )
150     $(REG_ROW \p{PropertyName}, Matches a character that belongs
151         to the Unicode PropertyName set.
152       Single letter abbreviations can be used without surrounding {,}. )
153     $(REG_ROW  \P{PropertyName}, Matches a character that does not belong
154         to the Unicode PropertyName set.
155       Single letter abbreviations can be used without surrounding {,}. )
156     $(REG_ROW \p{InBasicLatin}, Matches any character that is part of
157           the BasicLatin Unicode $(U block).)
158     $(REG_ROW \P{InBasicLatin}, Matches any character except ones in
159           the BasicLatin Unicode $(U block).)
160     $(REG_ROW \p{Cyrillic}, Matches any character that is part of
161         Cyrillic $(U script).)
162     $(REG_ROW \P{Cyrillic}, Matches any character except ones in
163         Cyrillic $(U script).)
164     $(REG_TITLE Quantifiers, Specify repetition of other elements)
165     $(REG_ROW *, Matches previous character/subexpression 0 or more times.
166       Greedy version - tries as many times as possible.)
167     $(REG_ROW *?, Matches previous character/subexpression 0 or more times.
168       Lazy version  - stops as early as possible.)
169     $(REG_ROW +, Matches previous character/subexpression 1 or more times.
170       Greedy version - tries as many times as possible.)
171     $(REG_ROW +?, Matches previous character/subexpression 1 or more times.
172       Lazy version  - stops as early as possible.)
173     $(REG_ROW ?, Matches previous character/subexpression 0 or 1 time.
174       Greedy version - tries as many times as possible.)
175     $(REG_ROW ??, Matches previous character/subexpression 0 or 1 time.
176       Lazy version  - stops as early as possible.)
177     $(REG_ROW {n}, Matches previous character/subexpression exactly n times. )
178     $(REG_ROW {n$(COMMA)}, Matches previous character/subexpression n times or more.
179       Greedy version - tries as many times as possible. )
180     $(REG_ROW {n$(COMMA)}?, Matches previous character/subexpression n times or more.
181       Lazy version - stops as early as possible.)
182     $(REG_ROW {n$(COMMA)m}, Matches previous character/subexpression n to m times.
183       Greedy version - tries as many times as possible, but no more than m times. )
184     $(REG_ROW {n$(COMMA)m}?, Matches previous character/subexpression n to m times.
185       Lazy version - stops as early as possible, but no less then n times.)
186     $(REG_TITLE Other, Subexpressions $(AMP) alternations )
187     $(REG_ROW (regex),  Matches subexpression regex,
188       saving matched portion of text for later retrieval. )
189     $(REG_ROW (?#comment), An inline comment that is ignored while matching.)
190     $(REG_ROW (?:regex), Matches subexpression regex,
191       $(U not) saving matched portion of text. Useful to speed up matching. )
192     $(REG_ROW A|B, Matches subexpression A, or failing that, matches B. )
193     $(REG_ROW (?P$(LT)name$(GT)regex), Matches named subexpression
194         regex labeling it with name 'name'.
195         When referring to a matched portion of text,
196         names work like aliases in addition to direct numbers.
197      )
198     $(REG_TITLE Assertions, Match position rather than character )
199     $(REG_ROW ^, Matches at the begining of input or line (in multiline mode).)
200     $(REG_ROW $, Matches at the end of input or line (in multiline mode). )
201     $(REG_ROW \b, Matches at word boundary. )
202     $(REG_ROW \B, Matches when $(U not) at word boundary. )
203     $(REG_ROW (?=regex), Zero-width lookahead assertion.
204         Matches at a point where the subexpression
205         regex could be matched starting from the current position.
206       )
207     $(REG_ROW (?!regex), Zero-width negative lookahead assertion.
208         Matches at a point where the subexpression
209         regex could $(U not) be matched starting from the current position.
210       )
211     $(REG_ROW (?<=regex), Zero-width lookbehind assertion. Matches at a point
212         where the subexpression regex could be matched ending
213         at the current position (matching goes backwards).
214       )
215     $(REG_ROW  (?<!regex), Zero-width negative lookbehind assertion.
216       Matches at a point where the subexpression regex could $(U not)
217       be matched ending at the current position (matching goes backwards).
218      )
219   )
220 
221   $(REG_START Character classes )
222   $(REG_TABLE
223     $(REG_TITLE Pattern element, Semantics )
224     $(REG_ROW Any atom, Has the same meaning as outside of a character class,
225       except for ] which must be written as \\])
226     $(REG_ROW a-z, Includes characters a, b, c, ..., z. )
227     $(REG_ROW [a||b]$(COMMA) [a--b]$(COMMA) [a~~b]$(COMMA) [a$(AMP)$(AMP)b],
228      Where a, b are arbitrary classes, means union, set difference,
229      symmetric set difference, and intersection respectively.
230      $(I Any sequence of character class elements implicitly forms a union.) )
231   )
232 
233   $(REG_START Regex flags )
234   $(REG_TABLE
235     $(REG_TITLE Flag, Semantics )
236     $(REG_ROW g, Global regex, repeat over the whole input. )
237     $(REG_ROW i, Case insensitive matching. )
238     $(REG_ROW m, Multi-line mode, match ^, $ on start and end line separators
239        as well as start and end of input.)
240     $(REG_ROW s, Single-line mode, makes . match '\n' and '\r' as well. )
241     $(REG_ROW x, Free-form syntax, ignores whitespace in pattern,
242       useful for formatting complex regular expressions. )
243   )
244 
245   $(SECTION Unicode support)
246 
247   This library provides full Level 1 support* according to
248     $(HTTP unicode.org/reports/tr18/, UTS 18). Specifically:
249   $(UL
250     $(LI 1.1 Hex notation via any of \uxxxx, \U00YYYYYY, \xZZ.)
251     $(LI 1.2 Unicode properties.)
252     $(LI 1.3 Character classes with set operations.)
253     $(LI 1.4 Word boundaries use the full set of "word" characters.)
254     $(LI 1.5 Using simple casefolding to match case
255         insensitively across the full range of codepoints.)
256     $(LI 1.6 Respecting line breaks as any of
257         \u000A | \u000B | \u000C | \u000D | \u0085 | \u2028 | \u2029 | \u000D\u000A.)
258     $(LI 1.7 Operating on codepoint level.)
259   )
260   *With exception of point 1.1.1, as of yet, normalization of input
261     is expected to be enforced by user.
262 
263     $(SECTION Replace format string)
264 
265     A set of functions in this module that do the substitution rely
266     on a simple format to guide the process. In particular the table below
267     applies to the `format` argument of
268     $(LREF replaceFirst) and $(LREF replaceAll).
269 
270     The format string can reference parts of match using the following notation.
271     $(REG_TABLE
272         $(REG_TITLE Format specifier, Replaced by )
273         $(REG_ROW $(DOLLAR)$(AMP), the whole match. )
274         $(REG_ROW $(DOLLAR)$(BACKTICK), part of input $(I preceding) the match. )
275         $(REG_ROW $', part of input $(I following) the match. )
276         $(REG_ROW $$, '$' character. )
277         $(REG_ROW \c $(COMMA) where c is any character, the character c itself. )
278         $(REG_ROW \\\\, '\\' character. )
279         $(REG_ROW $(DOLLAR)1 .. $(DOLLAR)99, submatch number 1 to 99 respectively. )
280     )
281 
282   $(SECTION Slicing and zero memory allocations orientation)
283 
284   All matches returned by pattern matching functionality in this library
285     are slices of the original input. The notable exception is the `replace`
286     family of functions  that generate a new string from the input.
287 
288     In cases where producing the replacement is the ultimate goal
289     $(LREF replaceFirstInto) and $(LREF replaceAllInto) could come in handy
290     as functions that  avoid allocations even for replacement.
291 
292     Copyright: Copyright Dmitry Olshansky, 2011-
293 
294   License: $(HTTP boost.org/LICENSE_1_0.txt, Boost License 1.0).
295 
296   Authors: Dmitry Olshansky,
297 
298     API and utility constructs are modeled after the original `std.regex`
299   by Walter Bright and Andrei Alexandrescu.
300 
301   Source: $(PHOBOSSRC std/regex/package.d)
302 
303 Macros:
304     REG_ROW = $(TR $(TD $(I $1 )) $(TD $+) )
305     REG_TITLE = $(TR $(TD $(B $1)) $(TD $(B $2)) )
306     REG_TABLE = <table border="1" cellspacing="0" cellpadding="5" > $0 </table>
307     REG_START = <h3><div align="center"> $0 </div></h3>
308     SECTION = <h3><a id="$1" href="#$1" class="anchor">$0</a></h3>
309     S_LINK = <a href="#$1">$+</a>
310  +/
311 module std.regex;
312 
313 import std.range.primitives, std.traits;
314 import std.regex.internal.ir;
315 import std.typecons : Flag, Yes, No;
316 
317 /++
318     `Regex` object holds regular expression pattern in compiled form.
319 
320     Instances of this object are constructed via calls to `regex`.
321     This is an intended form for caching and storage of frequently
322     used regular expressions.
323 
324     Example:
325 
326     Test if this object doesn't contain any compiled pattern.
327     ---
328     Regex!char r;
329     assert(r.empty);
330     r = regex(""); // Note: "" is a valid regex pattern.
331     assert(!r.empty);
332     ---
333 
334     Getting a range of all the named captures in the regex.
335     ----
336     import std.range;
337     import std.algorithm;
338 
339     auto re = regex(`(?P<name>\w+) = (?P<var>\d+)`);
340     auto nc = re.namedCaptures;
341     static assert(isRandomAccessRange!(typeof(nc)));
342     assert(!nc.empty);
343     assert(nc.length == 2);
344     assert(nc.equal(["name", "var"]));
345     assert(nc[0] == "name");
346     assert(nc[1..$].equal(["var"]));
347     ----
348 +/
349 public alias Regex(Char) = std.regex.internal.ir.Regex!(Char);
350 
351 /++
352     A `StaticRegex` is `Regex` object that contains D code specially
353     generated at compile-time to speed up matching.
354 
355     No longer used, kept as alias to Regex for backwards compatibility.
356 +/
357 public alias StaticRegex = Regex;
358 
359 /++
360     Compile regular expression pattern for the later execution.
361     Returns: `Regex` object that works on inputs having
362     the same character width as `pattern`.
363 
364     Params:
365     pattern = A single regular expression to match.
366     patterns = An array of regular expression strings.
367         The resulting `Regex` object will match any expression;
368         use $(LREF whichPattern) to know which.
369     flags = The _attributes (g, i, m, s and x accepted)
370 
371     Throws: `RegexException` if there were any errors during compilation.
372 +/
373 @trusted public auto regex(S : C[], C)(const S[] patterns, const(char)[] flags="")
374 if (isSomeString!(S))
375 {
376     import std.array : appender;
377     import std.functional : memoize;
378     enum cacheSize = 8; //TODO: invent nice interface to control regex caching
379     const(C)[] pat;
380     if (patterns.length > 1)
381     {
382         auto app = appender!S();
foreach(i,p;patterns)383         foreach (i, p; patterns)
384         {
385             if (i != 0)
386                 app.put("|");
387             app.put("(?:");
388             app.put(patterns[i]);
389             // terminator for the pattern
390             // to detect if the pattern unexpectedly ends
391             app.put("\\");
392             app.put(cast(dchar)(privateUseStart+i));
393             app.put(")");
394             // another one to return correct whichPattern
395             // for all of potential alternatives in the patterns[i]
396             app.put("\\");
397             app.put(cast(dchar)(privateUseStart+i));
398         }
399         pat = app.data;
400     }
401     else
402         pat = patterns[0];
403 
404     if (__ctfe)
405         return regexImpl(pat, flags);
406     return memoize!(regexImpl!S, cacheSize)(pat, flags);
407 }
408 
409 ///ditto
410 @trusted public auto regex(S)(S pattern, const(char)[] flags="")
411 if (isSomeString!(S))
412 {
413     return regex([pattern], flags);
414 }
415 
416 ///
417 @system unittest
418 {
419     void test(S)()
420     {
421         // multi-pattern regex example
422         S[] arr = [`([a-z]+):(\d+)`, `(\d+),\d+`];
423         auto multi = regex(arr); // multi regex
424         S str = "abc:43 12,34";
425         auto m = str.matchAll(multi);
426         assert(m.front.whichPattern == 1);
427         assert(m.front[1] == "abc");
428         assert(m.front[2] == "43");
429         m.popFront();
430         assert(m.front.whichPattern == 2);
431         assert(m.front[1] == "12");
432     }
433 
434     import std.meta : AliasSeq;
435     static foreach (C; AliasSeq!(string, wstring, dstring))
436         // Test with const array of patterns - see https://issues.dlang.org/show_bug.cgi?id=20301
437         static foreach (S; AliasSeq!(C, const C, immutable C))
438             test!S();
439 }
440 
441 @system unittest
442 {
443     import std.conv : to;
444     import std.string : indexOf;
445 
446     immutable pattern = "s+";
447     auto regexString = to!string(regex(pattern, "U"));
448     assert(regexString.length <= pattern.length + 100, "String representation shouldn't be unreasonably bloated.");
449     assert(indexOf(regexString, "s+") >= 0, "String representation should include pattern.");
450     assert(indexOf(regexString, 'U') >= 0, "String representation should include flags.");
451 }
452 
453 public auto regexImpl(S)(const S pattern, const(char)[] flags="")
454 if (isSomeString!(typeof(pattern)))
455 {
456     import std.regex.internal.parser : Parser, CodeGen;
457     auto parser = Parser!(Unqual!(typeof(pattern)), CodeGen)(pattern, flags);
458     auto r = parser.program;
459     return r;
460 }
461 
462 
463 private struct CTRegexWrapper(Char)
464 {
465     private immutable(Regex!Char)* re;
466 
467     // allow code that expects mutable Regex to still work
468     // we stay "logically const"
469     @property @trusted ref getRe() const { return *cast(Regex!Char*) re; }
470     alias getRe this;
471 }
472 
473 template ctRegexImpl(alias pattern, string flags=[])
474 {
475     import std.regex.internal.backtracking, std.regex.internal.parser;
476     static immutable r = cast(immutable) regex(pattern, flags);
477     alias Char = BasicElementOf!(typeof(pattern));
478     enum source = ctGenRegExCode(r);
479     @trusted pure bool func(BacktrackingMatcher!Char matcher)
480     {
481         debug(std_regex_ctr) pragma(msg, source);
482         cast(void) matcher;
483         mixin(source);
484     }
485     static immutable staticRe =
486         cast(immutable) r.withFactory(new CtfeFactory!(BacktrackingMatcher, Char, func));
487     enum wrapper = CTRegexWrapper!Char(&staticRe);
488 }
489 
490 @safe pure unittest
491 {
492     // test compat for logical const workaround
493     static void test(StaticRegex!char)
494     {
495     }
496     enum re = ctRegex!``;
497     test(re);
498 }
499 
500 @safe pure unittest
501 {
502     auto re = ctRegex!`foo`;
503     assert(matchFirst("foo", re));
504 
505     // test reassignment
506     re = ctRegex!`bar`;
507     assert(matchFirst("bar", re));
508     assert(!matchFirst("bar", ctRegex!`foo`));
509 }
510 
511 /++
512     Compile regular expression using CTFE
513     and generate optimized native machine code for matching it.
514 
515     Returns: StaticRegex object for faster matching.
516 
517     Params:
518     pattern = Regular expression
519     flags = The _attributes (g, i, m, s and x accepted)
520 +/
521 public enum ctRegex(alias pattern, alias flags=[]) = ctRegexImpl!(pattern, flags).wrapper;
522 
523 enum isRegexFor(RegEx, R) = is(immutable RegEx == immutable Regex!(BasicElementOf!R))
524      || is(RegEx : const(Regex!(BasicElementOf!R)))
525      || is(immutable RegEx == immutable StaticRegex!(BasicElementOf!R));
526 
527 
528 /++
529     `Captures` object contains submatches captured during a call
530     to `match` or iteration over `RegexMatch` range.
531 
532     First element of range is the whole match.
533 +/
534 @trusted public struct Captures(R)
535 if (isSomeString!R)
536 {//@trusted because of union inside
537     alias DataIndex = size_t;
538     alias String = R;
539     alias Store = SmallFixedArray!(Group!DataIndex, 3);
540 private:
541     import std.conv : text;
542     Store matches;
543     const(NamedGroup)[] _names;
544     R _input;
545     int _nMatch;
546     uint _f, _b;
547 
548     this(R input, uint n, const(NamedGroup)[] named)
549     {
550         _input = input;
551         _names = named;
552         matches = Store(n);
553         _b = n;
554         _f = 0;
555     }
556 
557     this(ref RegexMatch!R rmatch)
558     {
559         _input = rmatch._input;
560         _names = rmatch._engine.pattern.dict;
561         immutable n = rmatch._engine.pattern.ngroup;
562         matches = Store(n);
563         _b = n;
564         _f = 0;
565     }
566 
567     inout(R) getMatch(size_t index) inout
568     {
569         auto m = &matches[index];
570         return *m ? _input[m.begin .. m.end] : null;
571     }
572 
573 public:
574     ///Slice of input prior to the match.
575     @property R pre()
576     {
577         return _nMatch == 0 ? _input[] : _input[0 .. matches[0].begin];
578     }
579 
580     ///Slice of input immediately after the match.
581     @property R post()
582     {
583         return _nMatch == 0 ? _input[] : _input[matches[0].end .. $];
584     }
585 
586     ///Slice of matched portion of input.
587     @property R hit()
588     {
589         assert(_nMatch, "attempted to get hit of an empty match");
590         return _input[matches[0].begin .. matches[0].end];
591     }
592 
593     ///Range interface.
594     @property R front()
595     {
596         assert(_nMatch, "attempted to get front of an empty match");
597         return getMatch(_f);
598     }
599 
600     ///ditto
601     @property R back()
602     {
603         assert(_nMatch, "attempted to get back of an empty match");
604         return getMatch(_b - 1);
605     }
606 
607     ///ditto
608     void popFront()
609     {
610         assert(!empty);
611         ++_f;
612     }
613 
614     ///ditto
615     void popBack()
616     {
617         assert(!empty);
618         --_b;
619     }
620 
621     ///ditto
622     @property bool empty() const { return _nMatch == 0 || _f >= _b; }
623 
624     ///ditto
625     inout(R) opIndex()(size_t i) inout
626     {
627         assert(_f + i < _b,text("requested submatch number ", i," is out of range"));
628         return getMatch(_f + i);
629     }
630 
631     /++
632         Explicit cast to bool.
633         Useful as a shorthand for !(x.empty) in if and assert statements.
634 
635         ---
636         import std.regex;
637 
638         assert(!matchFirst("nothing", "something"));
639         ---
640     +/
641 
642     @safe bool opCast(T:bool)() const nothrow { return _nMatch != 0; }
643 
644     /++
645         Number of pattern matched counting, where 1 - the first pattern.
646         Returns 0 on no match.
647     +/
648 
649     @safe @property int whichPattern() const nothrow { return _nMatch; }
650 
651     ///
652     @system unittest
653     {
654         import std.regex;
655         assert(matchFirst("abc", "[0-9]+", "[a-z]+").whichPattern == 2);
656     }
657 
658     /++
659         Lookup named submatch.
660 
661         ---
662         import std.regex;
663         import std.range;
664 
665         auto c = matchFirst("a = 42;", regex(`(?P<var>\w+)\s*=\s*(?P<value>\d+);`));
666         assert(c["var"] == "a");
667         assert(c["value"] == "42");
668         popFrontN(c, 2);
669         //named groups are unaffected by range primitives
670         assert(c["var"] =="a");
671         assert(c.front == "42");
672         ----
673     +/
674     R opIndex(String)(String i) /*const*/ //@@@BUG@@@
675         if (isSomeString!String)
676     {
677         size_t index = lookupNamedGroup(_names, i);
678         return getMatch(index);
679     }
680 
681     ///Number of matches in this object.
length()682     @property size_t length() const { return _nMatch == 0 ? 0 : _b - _f;  }
683 
684     ///A hook for compatibility with original std.regex.
685     @property ref captures(){ return this; }
686 }
687 
688 ///
689 @system unittest
690 {
691     import std.range.primitives : popFrontN;
692 
693     auto c = matchFirst("@abc#", regex(`(\w)(\w)(\w)`));
694     assert(c.pre == "@"); // Part of input preceding match
695     assert(c.post == "#"); // Immediately after match
696     assert(c.hit == c[0] && c.hit == "abc"); // The whole match
697     assert(c[2] == "b");
698     assert(c.front == "abc");
699     c.popFront();
700     assert(c.front == "a");
701     assert(c.back == "c");
702     c.popBack();
703     assert(c.back == "b");
704     popFrontN(c, 2);
705     assert(c.empty);
706 
707     assert(!matchFirst("nothing", "something"));
708 
709     // Captures that are not matched will be null.
710     c = matchFirst("ac", regex(`a(b)?c`));
711     assert(c);
712     assert(!c[1]);
713 }
714 
715 @system unittest
716 {
717     Captures!string c;
718     string s = "abc";
719     assert(cast(bool)(c = matchFirst(s, regex("d")))
720         || cast(bool)(c = matchFirst(s, regex("a"))));
721 }
722 
723 // https://issues.dlang.org/show_bug.cgi?id=19979
724 @system unittest
725 {
726     auto c = matchFirst("bad", regex(`(^)(not )?bad($)`));
727     assert(c[0] && c[0].length == "bad".length);
728     assert(c[1] && !c[1].length);
729     assert(!c[2]);
730     assert(c[3] && !c[3].length);
731 }
732 
733 /++
734     A regex engine state, as returned by `match` family of functions.
735 
736     Effectively it's a forward range of Captures!R, produced
737     by lazily searching for matches in a given input.
738 +/
739 @trusted public struct RegexMatch(R)
740 if (isSomeString!R)
741 {
742     import std.typecons : Rebindable;
743 private:
744     alias Char = BasicElementOf!R;
745     Matcher!Char _engine;
746     Rebindable!(const MatcherFactory!Char) _factory;
747     R _input;
748     Captures!R _captures;
749 
750     this(RegEx)(R input, RegEx prog)
751     {
752         import std.exception : enforce;
753         _input = input;
754         if (prog.factory is null) _factory = defaultFactory!Char(prog);
755         else _factory = prog.factory;
756         _engine = _factory.create(prog, input);
757         assert(_engine.refCount == 1);
758         _captures = Captures!R(this);
759         _captures.matches.mutate((slice) pure { _captures._nMatch = _engine.match(slice); });
760     }
761 
762 public:
763     this(this)
764     {
765         if (_engine) _factory.incRef(_engine);
766     }
767 
768     ~this()
769     {
770         if (_engine) _factory.decRef(_engine);
771     }
772 
773     ///Shorthands for front.pre, front.post, front.hit.
774     @property R pre()
775     {
776         return _captures.pre;
777     }
778 
779     ///ditto
780     @property R post()
781     {
782         return _captures.post;
783     }
784 
785     ///ditto
786     @property R hit()
787     {
788         return _captures.hit;
789     }
790 
791     /++
792         Functionality for processing subsequent matches of global regexes via range interface:
793         ---
794         import std.regex;
795         auto m = matchAll("Hello, world!", regex(`\w+`));
796         assert(m.front.hit == "Hello");
797         m.popFront();
798         assert(m.front.hit == "world");
799         m.popFront();
800         assert(m.empty);
801         ---
802     +/
803     @property inout(Captures!R) front() inout
804     {
805         return _captures;
806     }
807 
808     ///ditto
809     void popFront()
810     {
811         import std.exception : enforce;
812         // CoW - if refCount is not 1, we are aliased by somebody else
813         if (_engine.refCount != 1)
814         {
815             // we create a new engine & abandon this reference
816             auto old = _engine;
817             _engine = _factory.dup(old, _input);
818             _factory.decRef(old);
819         }
820         _captures.matches.mutate((slice) { _captures._nMatch = _engine.match(slice); });
821     }
822 
823     ///ditto
824     auto save(){ return this; }
825 
826     ///Test if this match object is empty.
827     @property bool empty() const { return _captures._nMatch == 0; }
828 
829     ///Same as !(x.empty), provided for its convenience  in conditional statements.
830     T opCast(T:bool)(){ return !empty; }
831 
832     /// Same as .front, provided for compatibility with original std.regex.
833     @property inout(Captures!R) captures() inout { return _captures; }
834 }
835 
836 private auto matchOnceImpl(RegEx, R)(R input, const auto ref RegEx prog) @trusted
837 {
838     alias Char = BasicElementOf!R;
839     static struct Key
840     {
841         immutable(Char)[] pattern;
842         uint flags;
843     }
844     static Key cacheKey = Key("", -1);
845     static Matcher!Char cache;
846     auto factory = prog.factory is null ? defaultFactory!Char(prog) : prog.factory;
847     auto key = Key(prog.pattern, prog.flags);
848     Matcher!Char engine;
849     if (cacheKey == key)
850     {
851         engine = cache;
852         engine.rearm(input);
853     }
854     else
855     {
856         engine = factory.create(prog, input);
857         if (cache) factory.decRef(cache); // destroy cached engine *after* building a new one
858         cache = engine;
859         cacheKey = key;
860     }
861     auto captures = Captures!R(input, prog.ngroup, prog.dict);
862     captures.matches.mutate((slice) pure { captures._nMatch = engine.match(slice); });
863     return captures;
864 }
865 
866 // matchOnce is constructed as a safe, pure wrapper over matchOnceImpl. It can be
867 // faked as pure because the static mutable variables are used to cache the key and
868 // character matcher. The technique used avoids delegates and GC.
869 private @safe auto matchOnce(RegEx, R)(R input, const auto ref RegEx prog) pure
870 {
871     static auto impl(R input, const ref RegEx prog)
872     {
873         return matchOnceImpl(input, prog);
874     }
875 
876     static @trusted auto pureImpl(R input, const ref RegEx prog)
877     {
878         auto p = assumePureFunction(&impl);
879         return p(input, prog);
880     }
881 
882     return pureImpl(input, prog);
883 }
884 
885 private auto matchMany(RegEx, R)(R input, auto ref RegEx re) @safe
886 {
887     return RegexMatch!R(input, re.withFlags(re.flags | RegexOption.global));
888 }
889 
890 @system unittest
891 {
892     //sanity checks for new API
893     auto re = regex("abc");
894     assert(!"abc".matchOnce(re).empty);
895     assert("abc".matchOnce(re)[0] == "abc");
896 }
897 
898 // https://issues.dlang.org/show_bug.cgi?id=18135
899 @system unittest
900 {
901     static struct MapResult { RegexMatch!string m; }
902     MapResult m;
903     m = MapResult();
904     assert(m == m);
905 }
906 
907 private enum isReplaceFunctor(alias fun, R) =
908     __traits(compiles, (Captures!R c) { fun(c); });
909 
910 // the lowest level - just stuff replacements into the sink
911 private @trusted void replaceCapturesInto(alias output, Sink, R, T)
912         (ref Sink sink, R input, T captures)
913 if (isOutputRange!(Sink, dchar) && isSomeString!R)
914 {
915     if (captures.empty)
916     {
917         sink.put(input);
918         return;
919     }
920     sink.put(captures.pre);
921     // a hack to get around bogus errors, should be simply output(captures, sink)
922     // "is a nested function and cannot be accessed from"
923     static if (isReplaceFunctor!(output, R))
924         sink.put(output(captures)); //"mutator" type of function
925     else
926         output(captures, sink); //"output" type of function
927     sink.put(captures.post);
928 }
929 
930 // ditto for a range of captures
931 private void replaceMatchesInto(alias output, Sink, R, T)
932         (ref Sink sink, R input, T matches)
933 if (isOutputRange!(Sink, dchar) && isSomeString!R)
934 {
935     size_t offset = 0;
936     foreach (cap; matches)
937     {
938         sink.put(cap.pre[offset .. $]);
939         // same hack, see replaceCapturesInto
940         static if (isReplaceFunctor!(output, R))
941             sink.put(output(cap)); //"mutator" type of function
942         else
943             output(cap, sink); //"output" type of function
944         offset = cap.pre.length + cap.hit.length;
945     }
946     sink.put(input[offset .. $]);
947 }
948 
949 //  a general skeleton of replaceFirst
950 private R replaceFirstWith(alias output, R, RegEx)(R input, RegEx re)
951 if (isSomeString!R && isRegexFor!(RegEx, R))
952 {
953     import std.array : appender;
954     auto data = matchFirst(input, re);
955     if (data.empty)
956         return input;
957     auto app = appender!(R)();
958     replaceCapturesInto!output(app, input, data);
959     return app.data;
960 }
961 
962 // ditto for replaceAll
963 // the method parameter allows old API to ride on the back of the new one
964 private R replaceAllWith(alias output,
965         alias method=matchAll, R, RegEx)(R input, RegEx re)
966 if (isSomeString!R && isRegexFor!(RegEx, R))
967 {
968     import std.array : appender;
969     auto matches = method(input, re); //inout(C)[] fails
970     if (matches.empty)
971         return input;
972     auto app = appender!(R)();
973     replaceMatchesInto!output(app, input, matches);
974     return app.data;
975 }
976 
977 
978 /++
979     Start matching `input` to regex pattern `re`,
980     using Thompson NFA matching scheme.
981 
982     The use of this function is $(RED discouraged) - use either of
983     $(LREF matchAll) or $(LREF matchFirst).
984 
985     Delegating  the kind of operation
986     to "g" flag is soon to be phased out along with the
987     ability to choose the exact matching scheme. The choice of
988     matching scheme to use depends highly on the pattern kind and
989     can done automatically on case by case basis.
990 
991     Returns: a `RegexMatch` object holding engine state after first match.
992 +/
993 
994 public auto match(R, RegEx)(R input, RegEx re)
995 if (isSomeString!R && isRegexFor!(RegEx,R))
996 {
997     return RegexMatch!(Unqual!(typeof(input)))(input, re);
998 }
999 
1000 ///ditto
1001 public auto match(R, String)(R input, String re)
1002 if (isSomeString!R && isSomeString!String)
1003 {
1004     return RegexMatch!(Unqual!(typeof(input)))(input, regex(re));
1005 }
1006 
1007 /++
1008     Find the first (leftmost) slice of the `input` that
1009     matches the pattern `re`. This function picks the most suitable
1010     regular expression engine depending on the pattern properties.
1011 
1012     `re` parameter can be one of three types:
1013     $(UL
1014       $(LI Plain string(s), in which case it's compiled to bytecode before matching. )
1015       $(LI Regex!char (wchar/dchar) that contains a pattern in the form of
1016         compiled  bytecode. )
1017       $(LI StaticRegex!char (wchar/dchar) that contains a pattern in the form of
1018         compiled native machine code. )
1019     )
1020 
1021     Returns:
1022     $(LREF Captures) containing the extent of a match together with all submatches
1023     if there was a match, otherwise an empty $(LREF Captures) object.
1024 +/
1025 public auto matchFirst(R, RegEx)(R input, RegEx re)
1026 if (isSomeString!R && isRegexFor!(RegEx, R))
1027 {
1028     return matchOnce(input, re);
1029 }
1030 
1031 ///ditto
1032 public auto matchFirst(R, String)(R input, String re)
1033 if (isSomeString!R && isSomeString!String)
1034 {
1035     return matchOnce(input, regex(re));
1036 }
1037 
1038 ///ditto
1039 public auto matchFirst(R, String)(R input, String[] re...)
1040 if (isSomeString!R && isSomeString!String)
1041 {
1042     return matchOnce(input, regex(re));
1043 }
1044 
1045 /++
1046     Initiate a search for all non-overlapping matches to the pattern `re`
1047     in the given `input`. The result is a lazy range of matches generated
1048     as they are encountered in the input going left to right.
1049 
1050     This function picks the most suitable regular expression engine
1051     depending on the pattern properties.
1052 
1053     `re` parameter can be one of three types:
1054     $(UL
1055       $(LI Plain string(s), in which case it's compiled to bytecode before matching. )
1056       $(LI Regex!char (wchar/dchar) that contains a pattern in the form of
1057         compiled  bytecode. )
1058       $(LI StaticRegex!char (wchar/dchar) that contains a pattern in the form of
1059         compiled native machine code. )
1060     )
1061 
1062     Returns:
1063     $(LREF RegexMatch) object that represents matcher state
1064     after the first match was found or an empty one if not present.
1065 +/
1066 public auto matchAll(R, RegEx)(R input, RegEx re)
1067 if (isSomeString!R && isRegexFor!(RegEx, R))
1068 {
1069     return matchMany(input, re);
1070 }
1071 
1072 ///ditto
1073 public auto matchAll(R, String)(R input, String re)
1074 if (isSomeString!R && isSomeString!String)
1075 {
1076     return matchMany(input, regex(re));
1077 }
1078 
1079 ///ditto
1080 public auto matchAll(R, String)(R input, String[] re...)
1081 if (isSomeString!R && isSomeString!String)
1082 {
1083     return matchMany(input, regex(re));
1084 }
1085 
1086 // another set of tests just to cover the new API
1087 @system unittest
1088 {
1089     import std.algorithm.comparison : equal;
1090     import std.algorithm.iteration : map;
1091     import std.conv : to;
1092 
1093     static foreach (String; AliasSeq!(string, wstring, const(dchar)[]))
1094     {{
1095         auto str1 = "blah-bleh".to!String();
1096         auto pat1 = "bl[ae]h".to!String();
1097         auto mf = matchFirst(str1, pat1);
1098         assert(mf.equal(["blah".to!String()]));
1099         auto mAll = matchAll(str1, pat1);
1100         assert(mAll.equal!((a,b) => a.equal(b))
1101             ([["blah".to!String()], ["bleh".to!String()]]));
1102 
1103         auto str2 = "1/03/12 - 3/03/12".to!String();
1104         auto pat2 = regex([r"(\d+)/(\d+)/(\d+)".to!String(), "abc".to!String]);
1105         auto mf2 = matchFirst(str2, pat2);
1106         assert(mf2.equal(["1/03/12", "1", "03", "12"].map!(to!String)()));
1107         auto mAll2 = matchAll(str2, pat2);
1108         assert(mAll2.front.equal(mf2));
1109         mAll2.popFront();
1110         assert(mAll2.front.equal(["3/03/12", "3", "03", "12"].map!(to!String)()));
1111         mf2.popFrontN(3);
1112         assert(mf2.equal(["12".to!String()]));
1113 
1114         auto ctPat = ctRegex!(`(?P<Quot>\d+)/(?P<Denom>\d+)`.to!String());
1115         auto str = "2 + 34/56 - 6/1".to!String();
1116         auto cmf = matchFirst(str, ctPat);
1117         assert(cmf.equal(["34/56", "34", "56"].map!(to!String)()));
1118         assert(cmf["Quot"] == "34".to!String());
1119         assert(cmf["Denom"] == "56".to!String());
1120 
1121         auto cmAll = matchAll(str, ctPat);
1122         assert(cmAll.front.equal(cmf));
1123         cmAll.popFront();
1124         assert(cmAll.front.equal(["6/1", "6", "1"].map!(to!String)()));
1125     }}
1126 }
1127 
1128 /++
1129     Start matching of `input` to regex pattern `re`,
1130     using traditional $(LINK2 https://en.wikipedia.org/wiki/Backtracking,
1131     backtracking) matching scheme.
1132 
1133     The use of this function is $(RED discouraged) - use either of
1134     $(LREF matchAll) or $(LREF matchFirst).
1135 
1136     Delegating  the kind of operation
1137     to "g" flag is soon to be phased out along with the
1138     ability to choose the exact matching scheme. The choice of
1139     matching scheme to use depends highly on the pattern kind and
1140     can done automatically on case by case basis.
1141 
1142     Returns: a `RegexMatch` object holding engine
1143     state after first match.
1144 
1145 +/
1146 public auto bmatch(R, RegEx)(R input, RegEx re)
1147 if (isSomeString!R && isRegexFor!(RegEx, R))
1148 {
1149     return RegexMatch!(Unqual!(typeof(input)))(input, re);
1150 }
1151 
1152 ///ditto
1153 public auto bmatch(R, String)(R input, String re)
1154 if (isSomeString!R && isSomeString!String)
1155 {
1156     return RegexMatch!(Unqual!(typeof(input)))(input, regex(re));
1157 }
1158 
1159 // produces replacement string from format using captures for substitution
1160 package void replaceFmt(R, Capt, OutR)
1161     (R format, Capt captures, OutR sink, bool ignoreBadSubs = false)
1162 if (isOutputRange!(OutR, ElementEncodingType!R[]) &&
1163     isOutputRange!(OutR, ElementEncodingType!(Capt.String)[]))
1164 {
1165     import std.algorithm.searching : find;
1166     import std.ascii : isDigit, isAlpha;
1167     import std.conv : text, parse;
1168     import std.exception : enforce;
1169     enum State { Normal, Dollar }
1170     auto state = State.Normal;
1171     size_t offset;
1172 L_Replace_Loop:
1173     while (!format.empty)
1174         final switch (state)
1175         {
1176         case State.Normal:
1177             for (offset = 0; offset < format.length; offset++)//no decoding
1178             {
1179                 if (format[offset] == '$')
1180                 {
1181                     state = State.Dollar;
1182                     sink.put(format[0 .. offset]);
1183                     format = format[offset+1 .. $];//ditto
1184                     continue L_Replace_Loop;
1185                 }
1186             }
1187             sink.put(format[0 .. offset]);
1188             format = format[offset .. $];
1189             break;
1190         case State.Dollar:
1191             if (isDigit(format[0]))
1192             {
1193                 uint digit = parse!uint(format);
1194                 enforce(ignoreBadSubs || digit < captures.length, text("invalid submatch number ", digit));
1195                 if (digit < captures.length)
1196                     sink.put(captures[digit]);
1197             }
1198             else if (format[0] == '{')
1199             {
1200                 auto x = find!(a => !isAlpha(a))(format[1..$]);
1201                 enforce(!x.empty && x[0] == '}', "no matching '}' in replacement format");
1202                 auto name = format[1 .. $ - x.length];
1203                 format = x[1..$];
1204                 enforce(!name.empty, "invalid name in ${...} replacement format");
1205                 sink.put(captures[name]);
1206             }
1207             else if (format[0] == '&')
1208             {
1209                 sink.put(captures[0]);
1210                 format = format[1 .. $];
1211             }
1212             else if (format[0] == '`')
1213             {
1214                 sink.put(captures.pre);
1215                 format = format[1 .. $];
1216             }
1217             else if (format[0] == '\'')
1218             {
1219                 sink.put(captures.post);
1220                 format = format[1 .. $];
1221             }
1222             else if (format[0] == '$')
1223             {
1224                 sink.put(format[0 .. 1]);
1225                 format = format[1 .. $];
1226             }
1227             state = State.Normal;
1228             break;
1229         }
1230     enforce(state == State.Normal, "invalid format string in regex replace");
1231 }
1232 
1233 /++
1234     Construct a new string from `input` by replacing the first match with
1235     a string generated from it according to the `format` specifier.
1236 
1237     To replace all matches use $(LREF replaceAll).
1238 
1239     Params:
1240     input = string to search
1241     re = compiled regular expression to use
1242     format = _format string to generate replacements from,
1243     see $(S_LINK Replace _format string, the _format string).
1244 
1245     Returns:
1246     A string of the same type with the first match (if any) replaced.
1247     If no match is found returns the input string itself.
1248 +/
1249 public R replaceFirst(R, C, RegEx)(R input, RegEx re, const(C)[] format)
1250 if (isSomeString!R && is(C : dchar) && isRegexFor!(RegEx, R))
1251 {
1252     return replaceFirstWith!((m, sink) => replaceFmt(format, m, sink))(input, re);
1253 }
1254 
1255 ///
1256 @system unittest
1257 {
1258     assert(replaceFirst("noon", regex("n"), "[$&]") == "[n]oon");
1259 }
1260 
1261 /++
1262     This is a general replacement tool that construct a new string by replacing
1263     matches of pattern `re` in the `input`. Unlike the other overload
1264     there is no format string instead captures are passed to
1265     to a user-defined functor `fun` that returns a new string
1266     to use as replacement.
1267 
1268     This version replaces the first match in `input`,
1269     see $(LREF replaceAll) to replace the all of the matches.
1270 
1271     Returns:
1272     A new string of the same type as `input` with all matches
1273     replaced by return values of `fun`. If no matches found
1274     returns the `input` itself.
1275 +/
1276 public R replaceFirst(alias fun, R, RegEx)(R input, RegEx re)
1277 if (isSomeString!R && isRegexFor!(RegEx, R))
1278 {
1279     return replaceFirstWith!((m, sink) => sink.put(fun(m)))(input, re);
1280 }
1281 
1282 ///
1283 @system unittest
1284 {
1285     import std.conv : to;
1286     string list = "#21 out of 46";
1287     string newList = replaceFirst!(cap => to!string(to!int(cap.hit)+1))
1288         (list, regex(`[0-9]+`));
1289     assert(newList == "#22 out of 46");
1290 }
1291 
1292 /++
1293     A variation on $(LREF replaceFirst) that instead of allocating a new string
1294     on each call outputs the result piece-wise to the `sink`. In particular
1295     this enables efficient construction of a final output incrementally.
1296 
1297     Like in $(LREF replaceFirst) family of functions there is an overload
1298     for the substitution guided by the `format` string
1299     and the one with the user defined callback.
1300 +/
1301 public @trusted void replaceFirstInto(Sink, R, C, RegEx)
1302         (ref Sink sink, R input, RegEx re, const(C)[] format)
1303 if (isOutputRange!(Sink, dchar) && isSomeString!R
1304     && is(C : dchar) && isRegexFor!(RegEx, R))
1305     {
1306     replaceCapturesInto!((m, sink) => replaceFmt(format, m, sink))
1307         (sink, input, matchFirst(input, re));
1308     }
1309 
1310 ///ditto
1311 public @trusted void replaceFirstInto(alias fun, Sink, R, RegEx)
1312     (Sink sink, R input, RegEx re)
1313 if (isOutputRange!(Sink, dchar) && isSomeString!R && isRegexFor!(RegEx, R))
1314 {
1315     replaceCapturesInto!fun(sink, input, matchFirst(input, re));
1316 }
1317 
1318 ///
1319 @system unittest
1320 {
1321     import std.array;
1322     string m1 = "first message\n";
1323     string m2 = "second message\n";
1324     auto result = appender!string();
1325     replaceFirstInto(result, m1, regex(`([a-z]+) message`), "$1");
1326     //equivalent of the above with user-defined callback
1327     replaceFirstInto!(cap=>cap[1])(result, m2, regex(`([a-z]+) message`));
1328     assert(result.data == "first\nsecond\n");
1329 }
1330 
1331 //examples for replaceFirst
1332 @system unittest
1333 {
1334     import std.conv;
1335     string list = "#21 out of 46";
1336     string newList = replaceFirst!(cap => to!string(to!int(cap.hit)+1))
1337         (list, regex(`[0-9]+`));
1338     assert(newList == "#22 out of 46");
1339     import std.array;
1340     string m1 = "first message\n";
1341     string m2 = "second message\n";
1342     auto result = appender!string();
1343     replaceFirstInto(result, m1, regex(`([a-z]+) message`), "$1");
1344     //equivalent of the above with user-defined callback
1345     replaceFirstInto!(cap=>cap[1])(result, m2, regex(`([a-z]+) message`));
1346     assert(result.data == "first\nsecond\n");
1347 }
1348 
1349 /++
1350     Construct a new string from `input` by replacing all of the
1351     fragments that match a pattern `re` with a string generated
1352     from the match according to the `format` specifier.
1353 
1354     To replace only the first match use $(LREF replaceFirst).
1355 
1356     Params:
1357     input = string to search
1358     re = compiled regular expression to use
1359     format = _format string to generate replacements from,
1360     see $(S_LINK Replace _format string, the _format string).
1361 
1362     Returns:
1363     A string of the same type as `input` with the all
1364     of the matches (if any) replaced.
1365     If no match is found returns the input string itself.
1366 +/
1367 public @trusted R replaceAll(R, C, RegEx)(R input, RegEx re, const(C)[] format)
1368 if (isSomeString!R && is(C : dchar) && isRegexFor!(RegEx, R))
1369 {
1370     return replaceAllWith!((m, sink) => replaceFmt(format, m, sink))(input, re);
1371 }
1372 
1373 ///
1374 @system unittest
1375 {
1376     // insert comma as thousands delimiter
1377     auto re = regex(r"(?<=\d)(?=(\d\d\d)+\b)","g");
1378     assert(replaceAll("12000 + 42100 = 54100", re, ",") == "12,000 + 42,100 = 54,100");
1379 }
1380 
1381 /++
1382     This is a general replacement tool that construct a new string by replacing
1383     matches of pattern `re` in the `input`. Unlike the other overload
1384     there is no format string instead captures are passed to
1385     to a user-defined functor `fun` that returns a new string
1386     to use as replacement.
1387 
1388     This version replaces all of the matches found in `input`,
1389     see $(LREF replaceFirst) to replace the first match only.
1390 
1391     Returns:
1392     A new string of the same type as `input` with all matches
1393     replaced by return values of `fun`. If no matches found
1394     returns the `input` itself.
1395 
1396     Params:
1397     input = string to search
1398     re = compiled regular expression
1399     fun = delegate to use
1400 +/
1401 public @trusted R replaceAll(alias fun, R, RegEx)(R input, RegEx re)
1402 if (isSomeString!R && isRegexFor!(RegEx, R))
1403 {
1404     return replaceAllWith!((m, sink) => sink.put(fun(m)))(input, re);
1405 }
1406 
1407 ///
1408 @system unittest
1409 {
1410     string baz(Captures!(string) m)
1411     {
1412         import std.string : toUpper;
1413         return toUpper(m.hit);
1414     }
1415     // Capitalize the letters 'a' and 'r':
1416     auto s = replaceAll!(baz)("Strap a rocket engine on a chicken.",
1417             regex("[ar]"));
1418     assert(s == "StRAp A Rocket engine on A chicken.");
1419 }
1420 
1421 /++
1422     A variation on $(LREF replaceAll) that instead of allocating a new string
1423     on each call outputs the result piece-wise to the `sink`. In particular
1424     this enables efficient construction of a final output incrementally.
1425 
1426     As with $(LREF replaceAll) there are 2 overloads - one with a format string,
1427     the other one with a user defined functor.
1428 +/
1429 public @trusted void replaceAllInto(Sink, R, C, RegEx)
1430         (Sink sink, R input, RegEx re, const(C)[] format)
1431 if (isOutputRange!(Sink, dchar) && isSomeString!R
1432     && is(C : dchar) && isRegexFor!(RegEx, R))
1433     {
1434     replaceMatchesInto!((m, sink) => replaceFmt(format, m, sink))
1435         (sink, input, matchAll(input, re));
1436     }
1437 
1438 ///ditto
1439 public @trusted void replaceAllInto(alias fun, Sink, R, RegEx)
1440         (Sink sink, R input, RegEx re)
1441 if (isOutputRange!(Sink, dchar) && isSomeString!R && isRegexFor!(RegEx, R))
1442 {
1443     replaceMatchesInto!fun(sink, input, matchAll(input, re));
1444 }
1445 
1446 ///
1447 @system unittest
1448 {
1449     // insert comma as thousands delimiter in fifty randomly produced big numbers
1450     import std.array, std.conv, std.random, std.range;
1451     static re = regex(`(?<=\d)(?=(\d\d\d)+\b)`, "g");
1452     auto sink = appender!(char [])();
1453     enum ulong min = 10UL ^^ 10, max = 10UL ^^ 19;
1454     foreach (i; 0 .. 50)
1455     {
1456         sink.clear();
1457         replaceAllInto(sink, text(uniform(min, max)), re, ",");
1458         foreach (pos; iota(sink.data.length - 4, 0, -4))
1459             assert(sink.data[pos] == ',');
1460     }
1461 }
1462 
1463 // exercise all of the replace APIs
1464 @system unittest
1465 {
1466     import std.array : appender;
1467     import std.conv;
1468     // try and check first/all simple substitution
1469     static foreach (S; AliasSeq!(string, wstring, dstring, char[], wchar[], dchar[]))
1470     {{
1471         S s1 = "curt trial".to!S();
1472         S s2 = "round dome".to!S();
1473         S t1F = "court trial".to!S();
1474         S t2F = "hound dome".to!S();
1475         S t1A = "court trial".to!S();
1476         S t2A = "hound home".to!S();
1477         auto re1 = regex("curt".to!S());
1478         auto re2 = regex("[dr]o".to!S());
1479 
1480         assert(replaceFirst(s1, re1, "court") == t1F);
1481         assert(replaceFirst(s2, re2, "ho") == t2F);
1482         assert(replaceAll(s1, re1, "court") == t1A);
1483         assert(replaceAll(s2, re2, "ho") == t2A);
1484 
1485         auto rep1 = replaceFirst!(cap => cap[0][0]~"o".to!S()~cap[0][1..$])(s1, re1);
1486         assert(rep1 == t1F);
1487         assert(replaceFirst!(cap => "ho".to!S())(s2, re2) == t2F);
1488         auto rep1A = replaceAll!(cap => cap[0][0]~"o".to!S()~cap[0][1..$])(s1, re1);
1489         assert(rep1A == t1A);
1490         assert(replaceAll!(cap => "ho".to!S())(s2, re2) == t2A);
1491 
1492         auto sink = appender!S();
1493         replaceFirstInto(sink, s1, re1, "court");
1494         assert(sink.data == t1F);
1495         replaceFirstInto(sink, s2, re2, "ho");
1496         assert(sink.data == t1F~t2F);
1497         replaceAllInto(sink, s1, re1, "court");
1498         assert(sink.data == t1F~t2F~t1A);
1499         replaceAllInto(sink, s2, re2, "ho");
1500         assert(sink.data == t1F~t2F~t1A~t2A);
1501     }}
1502 }
1503 
1504 /++
1505     Old API for replacement, operation depends on flags of pattern `re`.
1506     With "g" flag it performs the equivalent of $(LREF replaceAll) otherwise it
1507     works the same as $(LREF replaceFirst).
1508 
1509     The use of this function is $(RED discouraged), please use $(LREF replaceAll)
1510     or $(LREF replaceFirst) explicitly.
1511 +/
1512 public R replace(alias scheme = match, R, C, RegEx)(R input, RegEx re, const(C)[] format)
1513 if (isSomeString!R && isRegexFor!(RegEx, R))
1514 {
1515     return replaceAllWith!((m, sink) => replaceFmt(format, m, sink), match)(input, re);
1516 }
1517 
1518 ///ditto
1519 public R replace(alias fun, R, RegEx)(R input, RegEx re)
1520 if (isSomeString!R && isRegexFor!(RegEx, R))
1521 {
1522     return replaceAllWith!(fun, match)(input, re);
1523 }
1524 
1525 /**
1526 Splits a string `r` using a regular expression `pat` as a separator.
1527 
1528 Params:
1529     keepSeparators = flag to specify if the matches should be in the resulting range
1530     r = the string to split
1531     pat = the pattern to split on
1532 Returns:
1533     A lazy range of strings
1534 */
1535 public struct Splitter(Flag!"keepSeparators" keepSeparators = No.keepSeparators, Range, alias RegEx = Regex)
1536 if (isSomeString!Range && isRegexFor!(RegEx, Range))
1537 {
1538 private:
1539     Range _input;
1540     size_t _offset;
1541     alias Rx = typeof(match(Range.init,RegEx.init));
1542     Rx _match;
1543 
1544     static if (keepSeparators) bool onMatch = false;
1545 
thisSplitter1546     @trusted this(Range input, RegEx separator)
1547     {//@@@BUG@@@ generated opAssign of RegexMatch is not @trusted
1548         _input = input;
1549         const re = separator.withFlags(separator.flags | RegexOption.global);
1550         if (_input.empty)
1551         {
1552             //there is nothing to match at all, make _offset > 0
1553             _offset = 1;
1554         }
1555         else
1556         {
1557             _match = Rx(_input, re);
1558 
1559             static if (keepSeparators)
1560                 if (_match.pre.empty)
1561                     popFront();
1562         }
1563     }
1564 
1565 public:
1566     auto ref opSlice()
1567     {
1568         return this.save;
1569     }
1570 
1571     ///Forward range primitives.
1572     @property Range front()
1573     {
1574         import std.algorithm.comparison : min;
1575 
1576         assert(!empty && _offset <= _match.pre.length
1577                 && _match.pre.length <= _input.length);
1578 
1579         static if (keepSeparators)
1580         {
1581             if (!onMatch)
1582                 return _input[_offset .. min($, _match.pre.length)];
1583             else
1584                 return _match.hit();
1585         }
1586         else
1587         {
1588             return _input[_offset .. min($, _match.pre.length)];
1589         }
1590     }
1591 
1592     ///ditto
1593     @property bool empty()
1594     {
1595         static if (keepSeparators)
1596             return _offset >= _input.length;
1597         else
1598             return _offset > _input.length;
1599     }
1600 
1601     ///ditto
1602     void popFront()
1603     {
1604         assert(!empty);
1605         if (_match.empty)
1606         {
1607             //No more separators, work is done here
1608             _offset = _input.length + 1;
1609         }
1610         else
1611         {
1612             static if (keepSeparators)
1613             {
1614                 if (!onMatch)
1615                 {
1616                     //skip past the separator
1617                     _offset = _match.pre.length;
1618                 }
1619                 else
1620                 {
1621                     _offset += _match.hit.length;
1622                     _match.popFront();
1623                 }
1624 
1625                 onMatch = !onMatch;
1626             }
1627             else
1628             {
1629                 //skip past the separator
1630                 _offset = _match.pre.length + _match.hit.length;
1631                 _match.popFront();
1632             }
1633         }
1634     }
1635 
1636     ///ditto
1637     @property auto save()
1638     {
1639         return this;
1640     }
1641 }
1642 
1643 /// ditto
1644 public Splitter!(keepSeparators, Range, RegEx) splitter(
1645     Flag!"keepSeparators" keepSeparators = No.keepSeparators, Range, RegEx)(Range r, RegEx pat)
1646 if (
1647     is(BasicElementOf!Range : dchar) && isRegexFor!(RegEx, Range))
1648 {
1649     return Splitter!(keepSeparators, Range, RegEx)(r, pat);
1650 }
1651 
1652 ///
1653 @system unittest
1654 {
1655     import std.algorithm.comparison : equal;
1656     auto s1 = ", abc, de,  fg, hi, ";
1657     assert(equal(splitter(s1, regex(", *")),
1658         ["", "abc", "de", "fg", "hi", ""]));
1659 }
1660 
1661 /// Split on a pattern, but keep the matches in the resulting range
1662 @system unittest
1663 {
1664     import std.algorithm.comparison : equal;
1665     import std.typecons : Yes;
1666 
1667     auto pattern = regex(`([\.,])`);
1668 
1669     assert("2003.04.05"
1670         .splitter!(Yes.keepSeparators)(pattern)
1671         .equal(["2003", ".", "04", ".", "05"]));
1672 
1673     assert(",1,2,3"
1674         .splitter!(Yes.keepSeparators)(pattern)
1675         .equal([",", "1", ",", "2", ",", "3"]));
1676 }
1677 
1678 ///An eager version of `splitter` that creates an array with splitted slices of `input`.
1679 public @trusted String[] split(String, RegEx)(String input, RegEx rx)
1680 if (isSomeString!String  && isRegexFor!(RegEx, String))
1681 {
1682     import std.array : appender;
1683     auto a = appender!(String[])();
1684     foreach (e; splitter(input, rx))
1685         a.put(e);
1686     return a.data;
1687 }
1688 
1689 ///Exception object thrown in case of errors during regex compilation.
1690 public alias RegexException = std.regex.internal.ir.RegexException;
1691 
1692 /++
1693   A range that lazily produces a string output escaped
1694   to be used inside of a regular expression.
1695 +/
1696 auto escaper(Range)(Range r)
1697 {
1698     import std.algorithm.searching : find;
1699     static immutable escapables = [Escapables];
1700     static struct Escaper // template to deduce attributes
1701     {
1702         Range r;
1703         bool escaped;
1704 
1705         @property ElementType!Range front(){
1706           if (escaped)
1707               return '\\';
1708           else
1709               return r.front;
1710         }
1711 
1712         @property bool empty(){ return r.empty; }
1713 
1714         void popFront(){
1715           if (escaped) escaped = false;
1716           else
1717           {
1718               r.popFront();
1719               if (!r.empty && !escapables.find(r.front).empty)
1720                   escaped = true;
1721           }
1722         }
1723 
1724         @property auto save(){ return Escaper(r.save, escaped); }
1725     }
1726 
1727     bool escaped = !r.empty && !escapables.find(r.front).empty;
1728     return Escaper(r, escaped);
1729 }
1730 
1731 ///
1732 @system unittest
1733 {
1734     import std.algorithm.comparison;
1735     import std.regex;
1736     string s = `This is {unfriendly} to *regex*`;
1737     assert(s.escaper.equal(`This is \{unfriendly\} to \*regex\*`));
1738 }
1739 
1740 @system unittest
1741 {
1742     import std.algorithm.comparison;
1743     import std.conv;
1744     static foreach (S; AliasSeq!(string, wstring, dstring))
1745     {{
1746       auto s = "^".to!S;
1747       assert(s.escaper.equal(`\^`));
1748       auto s2 = "";
1749       assert(s2.escaper.equal(""));
1750     }}
1751 }
1752 
1753 @system unittest
1754 {
1755     assert("ab".matchFirst(regex(`a?b?`)).hit == "ab");
1756     assert("ab".matchFirst(regex(`a??b?`)).hit == "");
1757 }
1758