xref: /netbsd-src/external/gpl3/gcc/dist/libphobos/src/std/regex/internal/generator.d (revision b1e838363e3c6fc78a55519254d99869742dd33c)
1 /*
2     Generators - components that generate strings for a given regex pattern.
3 
4     For the moment undocumented, and is subject to change.
5 */
6 module std.regex.internal.generator;
7 
8 /*
9     Useful utility for self-testing, an infinite range of string samples
10     that _have_ to match given compiled regex.
11     Caveats: supports only a simple subset of bytecode.
12 */
SampleGenerator(Char)13 @trusted private struct SampleGenerator(Char)
14 {
15     import std.array : appender, Appender;
16     import std.format.write : formattedWrite;
17     import std.random : Xorshift;
18     import std.regex.internal.ir : Regex, IR, IRL;
19     import std.utf : isValidDchar, byChar;
20     Regex!Char re;
21     Appender!(char[]) app;
22     uint limit, seed;
23     Xorshift gen;
24     //generator for pattern r, with soft maximum of threshold elements
25     //and a given random seed
26     this(ref Regex!Char r, uint threshold, uint randomSeed)
27     {
28         re = r;
29         limit = threshold;
30         seed = randomSeed;
31         app = appender!(Char[])();
32         compose();
33     }
34 
35     uint rand(uint x)
36     {
37         uint r = gen.front % x;
38         gen.popFront();
39         return r;
40     }
41 
42     void compose()
43     {
44         uint pc = 0, counter = 0, dataLenOld = uint.max;
45         for (;;)
46         {
47             switch (re.ir[pc].code)
48             {
49             case IR.Char:
50                     formattedWrite(app,"%s", cast(dchar) re.ir[pc].data);
51                     pc += IRL!(IR.Char);
52                     break;
53                 case IR.OrChar:
54                     uint len = re.ir[pc].sequence;
55                     formattedWrite(app, "%s", cast(dchar) re.ir[pc + rand(len)].data);
56                     pc += len;
57                     break;
58                 case IR.CodepointSet:
59                 case IR.Trie:
60                     auto set = re.charsets[re.ir[pc].data];
61                     auto x = rand(cast(uint) set.byInterval.length);
62                     auto y = rand(set.byInterval[x].b - set.byInterval[x].a);
63                     formattedWrite(app, "%s", cast(dchar)(set.byInterval[x].a+y));
64                     pc += IRL!(IR.CodepointSet);
65                     break;
66                 case IR.Any:
67                     uint x;
68                     do
69                     {
70                         x = rand(0x11_000);
71                     }while (x == '\r' || x == '\n' || !isValidDchar(x));
72                     formattedWrite(app, "%s", cast(dchar) x);
73                     pc += IRL!(IR.Any);
74                     break;
75                 case IR.GotoEndOr:
76                     pc += IRL!(IR.GotoEndOr)+re.ir[pc].data;
77                     assert(re.ir[pc].code == IR.OrEnd);
78                     goto case;
79                 case IR.OrEnd:
80                     pc += IRL!(IR.OrEnd);
81                     break;
82                 case IR.OrStart:
83                     pc += IRL!(IR.OrStart);
84                     goto case;
85                 case IR.Option:
86                     uint next = pc + re.ir[pc].data + IRL!(IR.Option);
87                     uint nOpt = 0;
88                     //queue next Option
89                     while (re.ir[next].code == IR.Option)
90                     {
91                         nOpt++;
92                         next += re.ir[next].data + IRL!(IR.Option);
93                     }
94                     nOpt++;
95                     nOpt = rand(nOpt);
96                     for (;nOpt; nOpt--)
97                     {
98                         pc += re.ir[pc].data + IRL!(IR.Option);
99                     }
100                     assert(re.ir[pc].code == IR.Option);
101                     pc += IRL!(IR.Option);
102                     break;
103                 case IR.RepeatStart:case IR.RepeatQStart:
104                     pc += IRL!(IR.RepeatStart)+re.ir[pc].data;
105                     goto case IR.RepeatEnd;
106                 case IR.RepeatEnd:
107                 case IR.RepeatQEnd:
108                     uint len = re.ir[pc].data;
109                     uint step = re.ir[pc+2].raw;
110                     uint min = re.ir[pc+3].raw;
111                     if (counter < min)
112                     {
113                         counter += step;
114                         pc -= len;
115                         break;
116                     }
117                     uint max = re.ir[pc+4].raw;
118                     if (counter < max)
119                     {
120                         if (app.data.length < limit && rand(3) > 0)
121                         {
122                             pc -= len;
123                             counter += step;
124                         }
125                         else
126                         {
127                             counter = counter%step;
128                             pc += IRL!(IR.RepeatEnd);
129                         }
130                     }
131                     else
132                     {
133                         counter = counter%step;
134                         pc += IRL!(IR.RepeatEnd);
135                     }
136                     break;
137                 case IR.InfiniteStart, IR.InfiniteBloomStart, IR.InfiniteQStart:
138                     pc += re.ir[pc].data + IRL!(IR.InfiniteStart);
139                     goto case IR.InfiniteEnd; //both Q and non-Q
140                 case IR.InfiniteEnd, IR.InfiniteBloomEnd, IR.InfiniteQEnd:
141                     uint len = re.ir[pc].data;
142                     if (app.data.length == dataLenOld)
143                     {
144                         pc += IRL!(IR.InfiniteEnd);
145                         break;
146                     }
147                     dataLenOld = cast(uint) app.data.length;
148                     if (app.data.length < limit && rand(3) > 0)
149                         pc = pc - len;
150                     else
151                         pc = pc + re.ir[pc].length;
152                     break;
153                 case IR.GroupStart, IR.GroupEnd:
154                     pc += IRL!(IR.GroupStart);
155                     break;
156                 case IR.Bol, IR.Wordboundary, IR.Notwordboundary:
157                 case IR.LookaheadStart, IR.NeglookaheadStart, IR.LookbehindStart, IR.NeglookbehindStart:
158                 default:
159                     return;
160             }
161         }
162     }
163 
164     @property Char[] front()
165     {
166         return app.data;
167     }
168 
169     enum empty = false;
170 
171     void popFront()
172     {
173         app.shrinkTo(0);
174         compose();
175     }
176 }
177 
178 @system unittest
179 {
180     import std.range, std.regex;
181     auto re = regex(`P[a-z]{3,}q`);
182     auto gen = SampleGenerator!char(re, 20, 3141592);
183     static assert(isInputRange!(typeof(gen)));
184     //@@@BUG@@@ somehow gen.take(1_000) doesn't work
185     foreach (v; take(gen, 1_000))
186         assert(v.match(re));
187 }
188