1 /*
2 Generators - components that generate strings for a given regex pattern.
3
4 For the moment undocumented, and is subject to change.
5 */
6 module std.regex.internal.generator;
7
8 /*
9 Useful utility for self-testing, an infinite range of string samples
10 that _have_ to match given compiled regex.
11 Caveats: supports only a simple subset of bytecode.
12 */
SampleGenerator(Char)13 @trusted private struct SampleGenerator(Char)
14 {
15 import std.array : appender, Appender;
16 import std.format.write : formattedWrite;
17 import std.random : Xorshift;
18 import std.regex.internal.ir : Regex, IR, IRL;
19 import std.utf : isValidDchar, byChar;
20 Regex!Char re;
21 Appender!(char[]) app;
22 uint limit, seed;
23 Xorshift gen;
24 //generator for pattern r, with soft maximum of threshold elements
25 //and a given random seed
26 this(ref Regex!Char r, uint threshold, uint randomSeed)
27 {
28 re = r;
29 limit = threshold;
30 seed = randomSeed;
31 app = appender!(Char[])();
32 compose();
33 }
34
35 uint rand(uint x)
36 {
37 uint r = gen.front % x;
38 gen.popFront();
39 return r;
40 }
41
42 void compose()
43 {
44 uint pc = 0, counter = 0, dataLenOld = uint.max;
45 for (;;)
46 {
47 switch (re.ir[pc].code)
48 {
49 case IR.Char:
50 formattedWrite(app,"%s", cast(dchar) re.ir[pc].data);
51 pc += IRL!(IR.Char);
52 break;
53 case IR.OrChar:
54 uint len = re.ir[pc].sequence;
55 formattedWrite(app, "%s", cast(dchar) re.ir[pc + rand(len)].data);
56 pc += len;
57 break;
58 case IR.CodepointSet:
59 case IR.Trie:
60 auto set = re.charsets[re.ir[pc].data];
61 auto x = rand(cast(uint) set.byInterval.length);
62 auto y = rand(set.byInterval[x].b - set.byInterval[x].a);
63 formattedWrite(app, "%s", cast(dchar)(set.byInterval[x].a+y));
64 pc += IRL!(IR.CodepointSet);
65 break;
66 case IR.Any:
67 uint x;
68 do
69 {
70 x = rand(0x11_000);
71 }while (x == '\r' || x == '\n' || !isValidDchar(x));
72 formattedWrite(app, "%s", cast(dchar) x);
73 pc += IRL!(IR.Any);
74 break;
75 case IR.GotoEndOr:
76 pc += IRL!(IR.GotoEndOr)+re.ir[pc].data;
77 assert(re.ir[pc].code == IR.OrEnd);
78 goto case;
79 case IR.OrEnd:
80 pc += IRL!(IR.OrEnd);
81 break;
82 case IR.OrStart:
83 pc += IRL!(IR.OrStart);
84 goto case;
85 case IR.Option:
86 uint next = pc + re.ir[pc].data + IRL!(IR.Option);
87 uint nOpt = 0;
88 //queue next Option
89 while (re.ir[next].code == IR.Option)
90 {
91 nOpt++;
92 next += re.ir[next].data + IRL!(IR.Option);
93 }
94 nOpt++;
95 nOpt = rand(nOpt);
96 for (;nOpt; nOpt--)
97 {
98 pc += re.ir[pc].data + IRL!(IR.Option);
99 }
100 assert(re.ir[pc].code == IR.Option);
101 pc += IRL!(IR.Option);
102 break;
103 case IR.RepeatStart:case IR.RepeatQStart:
104 pc += IRL!(IR.RepeatStart)+re.ir[pc].data;
105 goto case IR.RepeatEnd;
106 case IR.RepeatEnd:
107 case IR.RepeatQEnd:
108 uint len = re.ir[pc].data;
109 uint step = re.ir[pc+2].raw;
110 uint min = re.ir[pc+3].raw;
111 if (counter < min)
112 {
113 counter += step;
114 pc -= len;
115 break;
116 }
117 uint max = re.ir[pc+4].raw;
118 if (counter < max)
119 {
120 if (app.data.length < limit && rand(3) > 0)
121 {
122 pc -= len;
123 counter += step;
124 }
125 else
126 {
127 counter = counter%step;
128 pc += IRL!(IR.RepeatEnd);
129 }
130 }
131 else
132 {
133 counter = counter%step;
134 pc += IRL!(IR.RepeatEnd);
135 }
136 break;
137 case IR.InfiniteStart, IR.InfiniteBloomStart, IR.InfiniteQStart:
138 pc += re.ir[pc].data + IRL!(IR.InfiniteStart);
139 goto case IR.InfiniteEnd; //both Q and non-Q
140 case IR.InfiniteEnd, IR.InfiniteBloomEnd, IR.InfiniteQEnd:
141 uint len = re.ir[pc].data;
142 if (app.data.length == dataLenOld)
143 {
144 pc += IRL!(IR.InfiniteEnd);
145 break;
146 }
147 dataLenOld = cast(uint) app.data.length;
148 if (app.data.length < limit && rand(3) > 0)
149 pc = pc - len;
150 else
151 pc = pc + re.ir[pc].length;
152 break;
153 case IR.GroupStart, IR.GroupEnd:
154 pc += IRL!(IR.GroupStart);
155 break;
156 case IR.Bol, IR.Wordboundary, IR.Notwordboundary:
157 case IR.LookaheadStart, IR.NeglookaheadStart, IR.LookbehindStart, IR.NeglookbehindStart:
158 default:
159 return;
160 }
161 }
162 }
163
164 @property Char[] front()
165 {
166 return app.data;
167 }
168
169 enum empty = false;
170
171 void popFront()
172 {
173 app.shrinkTo(0);
174 compose();
175 }
176 }
177
178 @system unittest
179 {
180 import std.range, std.regex;
181 auto re = regex(`P[a-z]{3,}q`);
182 auto gen = SampleGenerator!char(re, 20, 3141592);
183 static assert(isInputRange!(typeof(gen)));
184 //@@@BUG@@@ somehow gen.take(1_000) doesn't work
185 foreach (v; take(gen, 1_000))
186 assert(v.match(re));
187 }
188