xref: /openbsd-src/gnu/usr.bin/perl/regnodes.h (revision b0f539e9923c93d213bbde92bfd6b7a67cb6927c)
1 /* -*- buffer-read-only: t -*-
2    !!!!!!!   DO NOT EDIT THIS FILE   !!!!!!!
3    This file is built by regen/regcomp.pl from regcomp.sym.
4    Any changes made here will be lost!
5  */
6 
7 /* Regops and State definitions */
8 
9 #define REGNODE_MAX           	97
10 #define REGMATCH_STATE_MAX    	139
11 
12 #define	END                   	0	/* 0000 End of program. */
13 #define	SUCCEED               	1	/* 0x01 Return from a subroutine, basically. */
14 #define	SBOL                  	2	/* 0x02 Match "" at beginning of line: /^/, /\A/ */
15 #define	BOL                   	2	/* 0x02 type alias */
16 #define	MBOL                  	3	/* 0x03 Same, assuming multiline: /^/m */
17 #define	SEOL                  	4	/* 0x04 Match "" at end of line: /$/ */
18 #define	EOL                   	4	/* 0x04 type alias */
19 #define	MEOL                  	5	/* 0x05 Same, assuming multiline: /$/m */
20 #define	EOS                   	6	/* 0x06 Match "" at end of string: /\z/ */
21 #define	GPOS                  	7	/* 0x07 Matches where last m//g left off. */
22 #define	BOUND                 	8	/* 0x08 Like BOUNDA for non-utf8, otherwise match "" between any Unicode \w\W or \W\w */
23 #define	BOUNDL                	9	/* 0x09 Like BOUND/BOUNDU, but \w and \W are defined by current locale */
24 #define	BOUNDU                	10	/* 0x0a Match "" at any boundary of a given type using Unicode rules */
25 #define	BOUNDA                	11	/* 0x0b Match "" at any boundary between \w\W or \W\w, where \w is [_a-zA-Z0-9] */
26 #define	NBOUND                	12	/* 0x0c Like NBOUNDA for non-utf8, otherwise match "" between any Unicode \w\w or \W\W */
27 #define	NBOUNDL               	13	/* 0x0d Like NBOUND/NBOUNDU, but \w and \W are defined by current locale */
28 #define	NBOUNDU               	14	/* 0x0e Match "" at any non-boundary of a given type using using Unicode rules */
29 #define	NBOUNDA               	15	/* 0x0f Match "" betweeen any \w\w or \W\W, where \w is [_a-zA-Z0-9] */
30 #define	REG_ANY               	16	/* 0x10 Match any one character (except newline). */
31 #define	SANY                  	17	/* 0x11 Match any one character. */
32 #define	ANYOF                 	18	/* 0x12 Match character in (or not in) this class, single char match only */
33 #define	ANYOFD                	19	/* 0x13 Like ANYOF, but /d is in effect */
34 #define	ANYOFL                	20	/* 0x14 Like ANYOF, but /l is in effect */
35 #define	ANYOFM                	21	/* 0x15 Like ANYOF, but matches an invariant byte as determined by the mask and arg */
36 #define	POSIXD                	22	/* 0x16 Some [[:class:]] under /d; the FLAGS field gives which one */
37 #define	POSIXL                	23	/* 0x17 Some [[:class:]] under /l; the FLAGS field gives which one */
38 #define	POSIXU                	24	/* 0x18 Some [[:class:]] under /u; the FLAGS field gives which one */
39 #define	POSIXA                	25	/* 0x19 Some [[:class:]] under /a; the FLAGS field gives which one */
40 #define	NPOSIXD               	26	/* 0x1a complement of POSIXD, [[:^class:]] */
41 #define	NPOSIXL               	27	/* 0x1b complement of POSIXL, [[:^class:]] */
42 #define	NPOSIXU               	28	/* 0x1c complement of POSIXU, [[:^class:]] */
43 #define	NPOSIXA               	29	/* 0x1d complement of POSIXA, [[:^class:]] */
44 #define	ASCII                 	30	/* 0x1e [[:ascii:]] */
45 #define	NASCII                	31	/* 0x1f [[:^ascii:]] */
46 #define	CLUMP                 	32	/* 0x20 Match any extended grapheme cluster sequence */
47 #define	BRANCH                	33	/* 0x21 Match this alternative, or the next... */
48 #define	EXACT                 	34	/* 0x22 Match this string (preceded by length). */
49 #define	EXACTL                	35	/* 0x23 Like EXACT, but /l is in effect (used so locale-related warnings can be checked for). */
50 #define	EXACTF                	36	/* 0x24 Match this non-UTF-8 string (not guaranteed to be folded) using /id rules (w/len). */
51 #define	EXACTFL               	37	/* 0x25 Match this string (not guaranteed to be folded) using /il rules (w/len). */
52 #define	EXACTFU               	38	/* 0x26 Match this string (folded iff in UTF-8, length in folding doesn't change if not in UTF-8) using /iu rules (w/len). */
53 #define	EXACTFAA              	39	/* 0x27 Match this string (not guaranteed to be folded) using /iaa rules (w/len). */
54 #define	EXACTFU_SS            	40	/* 0x28 Match this string (folded iff in UTF-8, length in folding may change even if not in UTF-8) using /iu rules (w/len). */
55 #define	EXACTFLU8             	41	/* 0x29 Rare circumstances: like EXACTFU, but is under /l, UTF-8, folded, and everything in it is above 255. */
56 #define	EXACTFAA_NO_TRIE      	42	/* 0x2a Match this string (which is not trie-able; not guaranteed to be folded) using /iaa rules (w/len). */
57 #define	NOTHING               	43	/* 0x2b Match empty string. */
58 #define	TAIL                  	44	/* 0x2c Match empty string. Can jump here from outside. */
59 #define	STAR                  	45	/* 0x2d Match this (simple) thing 0 or more times. */
60 #define	PLUS                  	46	/* 0x2e Match this (simple) thing 1 or more times. */
61 #define	CURLY                 	47	/* 0x2f Match this simple thing {n,m} times. */
62 #define	CURLYN                	48	/* 0x30 Capture next-after-this simple thing */
63 #define	CURLYM                	49	/* 0x31 Capture this medium-complex thing {n,m} times. */
64 #define	CURLYX                	50	/* 0x32 Match this complex thing {n,m} times. */
65 #define	WHILEM                	51	/* 0x33 Do curly processing and see if rest matches. */
66 #define	OPEN                  	52	/* 0x34 Mark this point in input as start of #n. */
67 #define	CLOSE                 	53	/* 0x35 Close corresponding OPEN of #n. */
68 #define	SROPEN                	54	/* 0x36 Same as OPEN, but for script run */
69 #define	SRCLOSE               	55	/* 0x37 Close preceding SROPEN */
70 #define	REF                   	56	/* 0x38 Match some already matched string */
71 #define	REFF                  	57	/* 0x39 Match already matched string, folded using native charset rules for non-utf8 */
72 #define	REFFL                 	58	/* 0x3a Match already matched string, folded in loc. */
73 #define	REFFU                 	59	/* 0x3b Match already matched string, folded using unicode rules for non-utf8 */
74 #define	REFFA                 	60	/* 0x3c Match already matched string, folded using unicode rules for non-utf8, no mixing ASCII, non-ASCII */
75 #define	NREF                  	61	/* 0x3d Match some already matched string */
76 #define	NREFF                 	62	/* 0x3e Match already matched string, folded using native charset rules for non-utf8 */
77 #define	NREFFL                	63	/* 0x3f Match already matched string, folded in loc. */
78 #define	NREFFU                	64	/* 0x40 Match already matched string, folded using unicode rules for non-utf8 */
79 #define	NREFFA                	65	/* 0x41 Match already matched string, folded using unicode rules for non-utf8, no mixing ASCII, non-ASCII */
80 #define	LONGJMP               	66	/* 0x42 Jump far away. */
81 #define	BRANCHJ               	67	/* 0x43 BRANCH with long offset. */
82 #define	IFMATCH               	68	/* 0x44 Succeeds if the following matches. */
83 #define	UNLESSM               	69	/* 0x45 Fails if the following matches. */
84 #define	SUSPEND               	70	/* 0x46 "Independent" sub-RE. */
85 #define	IFTHEN                	71	/* 0x47 Switch, should be preceded by switcher. */
86 #define	GROUPP                	72	/* 0x48 Whether the group matched. */
87 #define	EVAL                  	73	/* 0x49 Execute some Perl code. */
88 #define	MINMOD                	74	/* 0x4a Next operator is not greedy. */
89 #define	LOGICAL               	75	/* 0x4b Next opcode should set the flag only. */
90 #define	RENUM                 	76	/* 0x4c Group with independently numbered parens. */
91 #define	TRIE                  	77	/* 0x4d Match many EXACT(F[ALU]?)? at once. flags==type */
92 #define	TRIEC                 	78	/* 0x4e Same as TRIE, but with embedded charclass data */
93 #define	AHOCORASICK           	79	/* 0x4f Aho Corasick stclass. flags==type */
94 #define	AHOCORASICKC          	80	/* 0x50 Same as AHOCORASICK, but with embedded charclass data */
95 #define	GOSUB                 	81	/* 0x51 recurse to paren arg1 at (signed) ofs arg2 */
96 #define	NGROUPP               	82	/* 0x52 Whether the group matched. */
97 #define	INSUBP                	83	/* 0x53 Whether we are in a specific recurse. */
98 #define	DEFINEP               	84	/* 0x54 Never execute directly. */
99 #define	ENDLIKE               	85	/* 0x55 Used only for the type field of verbs */
100 #define	OPFAIL                	86	/* 0x56 Same as (?!), but with verb arg */
101 #define	ACCEPT                	87	/* 0x57 Accepts the current matched string, with verbar */
102 #define	VERB                  	88	/* 0x58 Used only for the type field of verbs */
103 #define	PRUNE                 	89	/* 0x59 Pattern fails at this startpoint if no-backtracking through this */
104 #define	MARKPOINT             	90	/* 0x5a Push the current location for rollback by cut. */
105 #define	SKIP                  	91	/* 0x5b On failure skip forward (to the mark) before retrying */
106 #define	COMMIT                	92	/* 0x5c Pattern fails outright if backtracking through this */
107 #define	CUTGROUP              	93	/* 0x5d On failure go to the next alternation in the group */
108 #define	KEEPS                 	94	/* 0x5e $& begins here. */
109 #define	LNBREAK               	95	/* 0x5f generic newline pattern */
110 #define	OPTIMIZED             	96	/* 0x60 Placeholder for dump. */
111 #define	PSEUDO                	97	/* 0x61 Pseudo opcode for internal use. */
112 	/* ------------ States ------------- */
113 #define	TRIE_next             	(REGNODE_MAX + 1)	/* state for TRIE */
114 #define	TRIE_next_fail        	(REGNODE_MAX + 2)	/* state for TRIE */
115 #define	EVAL_B                	(REGNODE_MAX + 3)	/* state for EVAL */
116 #define	EVAL_B_fail           	(REGNODE_MAX + 4)	/* state for EVAL */
117 #define	EVAL_postponed_AB     	(REGNODE_MAX + 5)	/* state for EVAL */
118 #define	EVAL_postponed_AB_fail	(REGNODE_MAX + 6)	/* state for EVAL */
119 #define	CURLYX_end            	(REGNODE_MAX + 7)	/* state for CURLYX */
120 #define	CURLYX_end_fail       	(REGNODE_MAX + 8)	/* state for CURLYX */
121 #define	WHILEM_A_pre          	(REGNODE_MAX + 9)	/* state for WHILEM */
122 #define	WHILEM_A_pre_fail     	(REGNODE_MAX + 10)	/* state for WHILEM */
123 #define	WHILEM_A_min          	(REGNODE_MAX + 11)	/* state for WHILEM */
124 #define	WHILEM_A_min_fail     	(REGNODE_MAX + 12)	/* state for WHILEM */
125 #define	WHILEM_A_max          	(REGNODE_MAX + 13)	/* state for WHILEM */
126 #define	WHILEM_A_max_fail     	(REGNODE_MAX + 14)	/* state for WHILEM */
127 #define	WHILEM_B_min          	(REGNODE_MAX + 15)	/* state for WHILEM */
128 #define	WHILEM_B_min_fail     	(REGNODE_MAX + 16)	/* state for WHILEM */
129 #define	WHILEM_B_max          	(REGNODE_MAX + 17)	/* state for WHILEM */
130 #define	WHILEM_B_max_fail     	(REGNODE_MAX + 18)	/* state for WHILEM */
131 #define	BRANCH_next           	(REGNODE_MAX + 19)	/* state for BRANCH */
132 #define	BRANCH_next_fail      	(REGNODE_MAX + 20)	/* state for BRANCH */
133 #define	CURLYM_A              	(REGNODE_MAX + 21)	/* state for CURLYM */
134 #define	CURLYM_A_fail         	(REGNODE_MAX + 22)	/* state for CURLYM */
135 #define	CURLYM_B              	(REGNODE_MAX + 23)	/* state for CURLYM */
136 #define	CURLYM_B_fail         	(REGNODE_MAX + 24)	/* state for CURLYM */
137 #define	IFMATCH_A             	(REGNODE_MAX + 25)	/* state for IFMATCH */
138 #define	IFMATCH_A_fail        	(REGNODE_MAX + 26)	/* state for IFMATCH */
139 #define	CURLY_B_min_known     	(REGNODE_MAX + 27)	/* state for CURLY */
140 #define	CURLY_B_min_known_fail	(REGNODE_MAX + 28)	/* state for CURLY */
141 #define	CURLY_B_min           	(REGNODE_MAX + 29)	/* state for CURLY */
142 #define	CURLY_B_min_fail      	(REGNODE_MAX + 30)	/* state for CURLY */
143 #define	CURLY_B_max           	(REGNODE_MAX + 31)	/* state for CURLY */
144 #define	CURLY_B_max_fail      	(REGNODE_MAX + 32)	/* state for CURLY */
145 #define	COMMIT_next           	(REGNODE_MAX + 33)	/* state for COMMIT */
146 #define	COMMIT_next_fail      	(REGNODE_MAX + 34)	/* state for COMMIT */
147 #define	MARKPOINT_next        	(REGNODE_MAX + 35)	/* state for MARKPOINT */
148 #define	MARKPOINT_next_fail   	(REGNODE_MAX + 36)	/* state for MARKPOINT */
149 #define	SKIP_next             	(REGNODE_MAX + 37)	/* state for SKIP */
150 #define	SKIP_next_fail        	(REGNODE_MAX + 38)	/* state for SKIP */
151 #define	CUTGROUP_next         	(REGNODE_MAX + 39)	/* state for CUTGROUP */
152 #define	CUTGROUP_next_fail    	(REGNODE_MAX + 40)	/* state for CUTGROUP */
153 #define	KEEPS_next            	(REGNODE_MAX + 41)	/* state for KEEPS */
154 #define	KEEPS_next_fail       	(REGNODE_MAX + 42)	/* state for KEEPS */
155 
156 /* PL_regkind[] What type of regop or state is this. */
157 
158 #ifndef DOINIT
159 EXTCONST U8 PL_regkind[];
160 #else
161 EXTCONST U8 PL_regkind[] = {
162 	END,      	/* END                    */
163 	END,      	/* SUCCEED                */
164 	BOL,      	/* SBOL                   */
165 	BOL,      	/* MBOL                   */
166 	EOL,      	/* SEOL                   */
167 	EOL,      	/* MEOL                   */
168 	EOL,      	/* EOS                    */
169 	GPOS,     	/* GPOS                   */
170 	BOUND,    	/* BOUND                  */
171 	BOUND,    	/* BOUNDL                 */
172 	BOUND,    	/* BOUNDU                 */
173 	BOUND,    	/* BOUNDA                 */
174 	NBOUND,   	/* NBOUND                 */
175 	NBOUND,   	/* NBOUNDL                */
176 	NBOUND,   	/* NBOUNDU                */
177 	NBOUND,   	/* NBOUNDA                */
178 	REG_ANY,  	/* REG_ANY                */
179 	REG_ANY,  	/* SANY                   */
180 	ANYOF,    	/* ANYOF                  */
181 	ANYOF,    	/* ANYOFD                 */
182 	ANYOF,    	/* ANYOFL                 */
183 	ANYOFM,   	/* ANYOFM                 */
184 	POSIXD,   	/* POSIXD                 */
185 	POSIXD,   	/* POSIXL                 */
186 	POSIXD,   	/* POSIXU                 */
187 	POSIXD,   	/* POSIXA                 */
188 	NPOSIXD,  	/* NPOSIXD                */
189 	NPOSIXD,  	/* NPOSIXL                */
190 	NPOSIXD,  	/* NPOSIXU                */
191 	NPOSIXD,  	/* NPOSIXA                */
192 	ASCII,    	/* ASCII                  */
193 	ASCII,    	/* NASCII                 */
194 	CLUMP,    	/* CLUMP                  */
195 	BRANCH,   	/* BRANCH                 */
196 	EXACT,    	/* EXACT                  */
197 	EXACT,    	/* EXACTL                 */
198 	EXACT,    	/* EXACTF                 */
199 	EXACT,    	/* EXACTFL                */
200 	EXACT,    	/* EXACTFU                */
201 	EXACT,    	/* EXACTFAA               */
202 	EXACT,    	/* EXACTFU_SS             */
203 	EXACT,    	/* EXACTFLU8              */
204 	EXACT,    	/* EXACTFAA_NO_TRIE       */
205 	NOTHING,  	/* NOTHING                */
206 	NOTHING,  	/* TAIL                   */
207 	STAR,     	/* STAR                   */
208 	PLUS,     	/* PLUS                   */
209 	CURLY,    	/* CURLY                  */
210 	CURLY,    	/* CURLYN                 */
211 	CURLY,    	/* CURLYM                 */
212 	CURLY,    	/* CURLYX                 */
213 	WHILEM,   	/* WHILEM                 */
214 	OPEN,     	/* OPEN                   */
215 	CLOSE,    	/* CLOSE                  */
216 	SROPEN,   	/* SROPEN                 */
217 	SRCLOSE,  	/* SRCLOSE                */
218 	REF,      	/* REF                    */
219 	REF,      	/* REFF                   */
220 	REF,      	/* REFFL                  */
221 	REF,      	/* REFFU                  */
222 	REF,      	/* REFFA                  */
223 	REF,      	/* NREF                   */
224 	REF,      	/* NREFF                  */
225 	REF,      	/* NREFFL                 */
226 	REF,      	/* NREFFU                 */
227 	REF,      	/* NREFFA                 */
228 	LONGJMP,  	/* LONGJMP                */
229 	BRANCHJ,  	/* BRANCHJ                */
230 	BRANCHJ,  	/* IFMATCH                */
231 	BRANCHJ,  	/* UNLESSM                */
232 	BRANCHJ,  	/* SUSPEND                */
233 	BRANCHJ,  	/* IFTHEN                 */
234 	GROUPP,   	/* GROUPP                 */
235 	EVAL,     	/* EVAL                   */
236 	MINMOD,   	/* MINMOD                 */
237 	LOGICAL,  	/* LOGICAL                */
238 	BRANCHJ,  	/* RENUM                  */
239 	TRIE,     	/* TRIE                   */
240 	TRIE,     	/* TRIEC                  */
241 	TRIE,     	/* AHOCORASICK            */
242 	TRIE,     	/* AHOCORASICKC           */
243 	GOSUB,    	/* GOSUB                  */
244 	NGROUPP,  	/* NGROUPP                */
245 	INSUBP,   	/* INSUBP                 */
246 	DEFINEP,  	/* DEFINEP                */
247 	ENDLIKE,  	/* ENDLIKE                */
248 	ENDLIKE,  	/* OPFAIL                 */
249 	ENDLIKE,  	/* ACCEPT                 */
250 	VERB,     	/* VERB                   */
251 	VERB,     	/* PRUNE                  */
252 	VERB,     	/* MARKPOINT              */
253 	VERB,     	/* SKIP                   */
254 	VERB,     	/* COMMIT                 */
255 	VERB,     	/* CUTGROUP               */
256 	KEEPS,    	/* KEEPS                  */
257 	LNBREAK,  	/* LNBREAK                */
258 	NOTHING,  	/* OPTIMIZED              */
259 	PSEUDO,   	/* PSEUDO                 */
260 	/* ------------ States ------------- */
261 	TRIE,     	/* TRIE_next              */
262 	TRIE,     	/* TRIE_next_fail         */
263 	EVAL,     	/* EVAL_B                 */
264 	EVAL,     	/* EVAL_B_fail            */
265 	EVAL,     	/* EVAL_postponed_AB      */
266 	EVAL,     	/* EVAL_postponed_AB_fail */
267 	CURLYX,   	/* CURLYX_end             */
268 	CURLYX,   	/* CURLYX_end_fail        */
269 	WHILEM,   	/* WHILEM_A_pre           */
270 	WHILEM,   	/* WHILEM_A_pre_fail      */
271 	WHILEM,   	/* WHILEM_A_min           */
272 	WHILEM,   	/* WHILEM_A_min_fail      */
273 	WHILEM,   	/* WHILEM_A_max           */
274 	WHILEM,   	/* WHILEM_A_max_fail      */
275 	WHILEM,   	/* WHILEM_B_min           */
276 	WHILEM,   	/* WHILEM_B_min_fail      */
277 	WHILEM,   	/* WHILEM_B_max           */
278 	WHILEM,   	/* WHILEM_B_max_fail      */
279 	BRANCH,   	/* BRANCH_next            */
280 	BRANCH,   	/* BRANCH_next_fail       */
281 	CURLYM,   	/* CURLYM_A               */
282 	CURLYM,   	/* CURLYM_A_fail          */
283 	CURLYM,   	/* CURLYM_B               */
284 	CURLYM,   	/* CURLYM_B_fail          */
285 	IFMATCH,  	/* IFMATCH_A              */
286 	IFMATCH,  	/* IFMATCH_A_fail         */
287 	CURLY,    	/* CURLY_B_min_known      */
288 	CURLY,    	/* CURLY_B_min_known_fail */
289 	CURLY,    	/* CURLY_B_min            */
290 	CURLY,    	/* CURLY_B_min_fail       */
291 	CURLY,    	/* CURLY_B_max            */
292 	CURLY,    	/* CURLY_B_max_fail       */
293 	COMMIT,   	/* COMMIT_next            */
294 	COMMIT,   	/* COMMIT_next_fail       */
295 	MARKPOINT,	/* MARKPOINT_next         */
296 	MARKPOINT,	/* MARKPOINT_next_fail    */
297 	SKIP,     	/* SKIP_next              */
298 	SKIP,     	/* SKIP_next_fail         */
299 	CUTGROUP, 	/* CUTGROUP_next          */
300 	CUTGROUP, 	/* CUTGROUP_next_fail     */
301 	KEEPS,    	/* KEEPS_next             */
302 	KEEPS,    	/* KEEPS_next_fail        */
303 };
304 #endif
305 
306 #ifdef REG_COMP_C
307 
308 /* regarglen[] - How large is the argument part of the node (in regnodes) */
309 
310 static const U8 regarglen[] = {
311 	0,                                   	/* END          */
312 	0,                                   	/* SUCCEED      */
313 	0,                                   	/* SBOL         */
314 	0,                                   	/* MBOL         */
315 	0,                                   	/* SEOL         */
316 	0,                                   	/* MEOL         */
317 	0,                                   	/* EOS          */
318 	0,                                   	/* GPOS         */
319 	0,                                   	/* BOUND        */
320 	0,                                   	/* BOUNDL       */
321 	0,                                   	/* BOUNDU       */
322 	0,                                   	/* BOUNDA       */
323 	0,                                   	/* NBOUND       */
324 	0,                                   	/* NBOUNDL      */
325 	0,                                   	/* NBOUNDU      */
326 	0,                                   	/* NBOUNDA      */
327 	0,                                   	/* REG_ANY      */
328 	0,                                   	/* SANY         */
329 	EXTRA_SIZE(struct regnode_1),        	/* ANYOF        */
330 	EXTRA_SIZE(struct regnode_1),        	/* ANYOFD       */
331 	EXTRA_SIZE(struct regnode_1),        	/* ANYOFL       */
332 	EXTRA_SIZE(struct regnode_1),        	/* ANYOFM       */
333 	0,                                   	/* POSIXD       */
334 	0,                                   	/* POSIXL       */
335 	0,                                   	/* POSIXU       */
336 	0,                                   	/* POSIXA       */
337 	0,                                   	/* NPOSIXD      */
338 	0,                                   	/* NPOSIXL      */
339 	0,                                   	/* NPOSIXU      */
340 	0,                                   	/* NPOSIXA      */
341 	0,                                   	/* ASCII        */
342 	0,                                   	/* NASCII       */
343 	0,                                   	/* CLUMP        */
344 	0,                                   	/* BRANCH       */
345 	0,                                   	/* EXACT        */
346 	0,                                   	/* EXACTL       */
347 	0,                                   	/* EXACTF       */
348 	0,                                   	/* EXACTFL      */
349 	0,                                   	/* EXACTFU      */
350 	0,                                   	/* EXACTFAA     */
351 	0,                                   	/* EXACTFU_SS   */
352 	0,                                   	/* EXACTFLU8    */
353 	0,                                   	/* EXACTFAA_NO_TRIE */
354 	0,                                   	/* NOTHING      */
355 	0,                                   	/* TAIL         */
356 	0,                                   	/* STAR         */
357 	0,                                   	/* PLUS         */
358 	EXTRA_SIZE(struct regnode_2),        	/* CURLY        */
359 	EXTRA_SIZE(struct regnode_2),        	/* CURLYN       */
360 	EXTRA_SIZE(struct regnode_2),        	/* CURLYM       */
361 	EXTRA_SIZE(struct regnode_2),        	/* CURLYX       */
362 	0,                                   	/* WHILEM       */
363 	EXTRA_SIZE(struct regnode_1),        	/* OPEN         */
364 	EXTRA_SIZE(struct regnode_1),        	/* CLOSE        */
365 	0,                                   	/* SROPEN       */
366 	0,                                   	/* SRCLOSE      */
367 	EXTRA_SIZE(struct regnode_1),        	/* REF          */
368 	EXTRA_SIZE(struct regnode_1),        	/* REFF         */
369 	EXTRA_SIZE(struct regnode_1),        	/* REFFL        */
370 	EXTRA_SIZE(struct regnode_1),        	/* REFFU        */
371 	EXTRA_SIZE(struct regnode_1),        	/* REFFA        */
372 	EXTRA_SIZE(struct regnode_1),        	/* NREF         */
373 	EXTRA_SIZE(struct regnode_1),        	/* NREFF        */
374 	EXTRA_SIZE(struct regnode_1),        	/* NREFFL       */
375 	EXTRA_SIZE(struct regnode_1),        	/* NREFFU       */
376 	EXTRA_SIZE(struct regnode_1),        	/* NREFFA       */
377 	EXTRA_SIZE(struct regnode_1),        	/* LONGJMP      */
378 	EXTRA_SIZE(struct regnode_1),        	/* BRANCHJ      */
379 	EXTRA_SIZE(struct regnode_1),        	/* IFMATCH      */
380 	EXTRA_SIZE(struct regnode_1),        	/* UNLESSM      */
381 	EXTRA_SIZE(struct regnode_1),        	/* SUSPEND      */
382 	EXTRA_SIZE(struct regnode_1),        	/* IFTHEN       */
383 	EXTRA_SIZE(struct regnode_1),        	/* GROUPP       */
384 	EXTRA_SIZE(struct regnode_2L),       	/* EVAL         */
385 	0,                                   	/* MINMOD       */
386 	0,                                   	/* LOGICAL      */
387 	EXTRA_SIZE(struct regnode_1),        	/* RENUM        */
388 	EXTRA_SIZE(struct regnode_1),        	/* TRIE         */
389 	EXTRA_SIZE(struct regnode_charclass),	/* TRIEC        */
390 	EXTRA_SIZE(struct regnode_1),        	/* AHOCORASICK  */
391 	EXTRA_SIZE(struct regnode_charclass),	/* AHOCORASICKC */
392 	EXTRA_SIZE(struct regnode_2L),       	/* GOSUB        */
393 	EXTRA_SIZE(struct regnode_1),        	/* NGROUPP      */
394 	EXTRA_SIZE(struct regnode_1),        	/* INSUBP       */
395 	EXTRA_SIZE(struct regnode_1),        	/* DEFINEP      */
396 	0,                                   	/* ENDLIKE      */
397 	EXTRA_SIZE(struct regnode_1),        	/* OPFAIL       */
398 	EXTRA_SIZE(struct regnode_2L),       	/* ACCEPT       */
399 	EXTRA_SIZE(struct regnode_1),        	/* VERB         */
400 	EXTRA_SIZE(struct regnode_1),        	/* PRUNE        */
401 	EXTRA_SIZE(struct regnode_1),        	/* MARKPOINT    */
402 	EXTRA_SIZE(struct regnode_1),        	/* SKIP         */
403 	EXTRA_SIZE(struct regnode_1),        	/* COMMIT       */
404 	EXTRA_SIZE(struct regnode_1),        	/* CUTGROUP     */
405 	0,                                   	/* KEEPS        */
406 	0,                                   	/* LNBREAK      */
407 	0,                                   	/* OPTIMIZED    */
408 	0,                                   	/* PSEUDO       */
409 };
410 
411 /* reg_off_by_arg[] - Which argument holds the offset to the next node */
412 
413 static const char reg_off_by_arg[] = {
414 	0,	/* END          */
415 	0,	/* SUCCEED      */
416 	0,	/* SBOL         */
417 	0,	/* MBOL         */
418 	0,	/* SEOL         */
419 	0,	/* MEOL         */
420 	0,	/* EOS          */
421 	0,	/* GPOS         */
422 	0,	/* BOUND        */
423 	0,	/* BOUNDL       */
424 	0,	/* BOUNDU       */
425 	0,	/* BOUNDA       */
426 	0,	/* NBOUND       */
427 	0,	/* NBOUNDL      */
428 	0,	/* NBOUNDU      */
429 	0,	/* NBOUNDA      */
430 	0,	/* REG_ANY      */
431 	0,	/* SANY         */
432 	0,	/* ANYOF        */
433 	0,	/* ANYOFD       */
434 	0,	/* ANYOFL       */
435 	0,	/* ANYOFM       */
436 	0,	/* POSIXD       */
437 	0,	/* POSIXL       */
438 	0,	/* POSIXU       */
439 	0,	/* POSIXA       */
440 	0,	/* NPOSIXD      */
441 	0,	/* NPOSIXL      */
442 	0,	/* NPOSIXU      */
443 	0,	/* NPOSIXA      */
444 	0,	/* ASCII        */
445 	0,	/* NASCII       */
446 	0,	/* CLUMP        */
447 	0,	/* BRANCH       */
448 	0,	/* EXACT        */
449 	0,	/* EXACTL       */
450 	0,	/* EXACTF       */
451 	0,	/* EXACTFL      */
452 	0,	/* EXACTFU      */
453 	0,	/* EXACTFAA     */
454 	0,	/* EXACTFU_SS   */
455 	0,	/* EXACTFLU8    */
456 	0,	/* EXACTFAA_NO_TRIE */
457 	0,	/* NOTHING      */
458 	0,	/* TAIL         */
459 	0,	/* STAR         */
460 	0,	/* PLUS         */
461 	0,	/* CURLY        */
462 	0,	/* CURLYN       */
463 	0,	/* CURLYM       */
464 	0,	/* CURLYX       */
465 	0,	/* WHILEM       */
466 	0,	/* OPEN         */
467 	0,	/* CLOSE        */
468 	0,	/* SROPEN       */
469 	0,	/* SRCLOSE      */
470 	0,	/* REF          */
471 	0,	/* REFF         */
472 	0,	/* REFFL        */
473 	0,	/* REFFU        */
474 	0,	/* REFFA        */
475 	0,	/* NREF         */
476 	0,	/* NREFF        */
477 	0,	/* NREFFL       */
478 	0,	/* NREFFU       */
479 	0,	/* NREFFA       */
480 	1,	/* LONGJMP      */
481 	1,	/* BRANCHJ      */
482 	2,	/* IFMATCH      */
483 	2,	/* UNLESSM      */
484 	1,	/* SUSPEND      */
485 	1,	/* IFTHEN       */
486 	0,	/* GROUPP       */
487 	0,	/* EVAL         */
488 	0,	/* MINMOD       */
489 	0,	/* LOGICAL      */
490 	1,	/* RENUM        */
491 	0,	/* TRIE         */
492 	0,	/* TRIEC        */
493 	0,	/* AHOCORASICK  */
494 	0,	/* AHOCORASICKC */
495 	0,	/* GOSUB        */
496 	0,	/* NGROUPP      */
497 	0,	/* INSUBP       */
498 	0,	/* DEFINEP      */
499 	0,	/* ENDLIKE      */
500 	0,	/* OPFAIL       */
501 	0,	/* ACCEPT       */
502 	0,	/* VERB         */
503 	0,	/* PRUNE        */
504 	0,	/* MARKPOINT    */
505 	0,	/* SKIP         */
506 	0,	/* COMMIT       */
507 	0,	/* CUTGROUP     */
508 	0,	/* KEEPS        */
509 	0,	/* LNBREAK      */
510 	0,	/* OPTIMIZED    */
511 	0,	/* PSEUDO       */
512 };
513 
514 #endif /* REG_COMP_C */
515 
516 
517 /* reg_name[] - Opcode/state names in string form, for debugging */
518 
519 #ifndef DOINIT
520 EXTCONST char * PL_reg_name[];
521 #else
522 EXTCONST char * const PL_reg_name[] = {
523 	"END",                   	/* 0000 */
524 	"SUCCEED",               	/* 0x01 */
525 	"SBOL",                  	/* 0x02 */
526 	"MBOL",                  	/* 0x03 */
527 	"SEOL",                  	/* 0x04 */
528 	"MEOL",                  	/* 0x05 */
529 	"EOS",                   	/* 0x06 */
530 	"GPOS",                  	/* 0x07 */
531 	"BOUND",                 	/* 0x08 */
532 	"BOUNDL",                	/* 0x09 */
533 	"BOUNDU",                	/* 0x0a */
534 	"BOUNDA",                	/* 0x0b */
535 	"NBOUND",                	/* 0x0c */
536 	"NBOUNDL",               	/* 0x0d */
537 	"NBOUNDU",               	/* 0x0e */
538 	"NBOUNDA",               	/* 0x0f */
539 	"REG_ANY",               	/* 0x10 */
540 	"SANY",                  	/* 0x11 */
541 	"ANYOF",                 	/* 0x12 */
542 	"ANYOFD",                	/* 0x13 */
543 	"ANYOFL",                	/* 0x14 */
544 	"ANYOFM",                	/* 0x15 */
545 	"POSIXD",                	/* 0x16 */
546 	"POSIXL",                	/* 0x17 */
547 	"POSIXU",                	/* 0x18 */
548 	"POSIXA",                	/* 0x19 */
549 	"NPOSIXD",               	/* 0x1a */
550 	"NPOSIXL",               	/* 0x1b */
551 	"NPOSIXU",               	/* 0x1c */
552 	"NPOSIXA",               	/* 0x1d */
553 	"ASCII",                 	/* 0x1e */
554 	"NASCII",                	/* 0x1f */
555 	"CLUMP",                 	/* 0x20 */
556 	"BRANCH",                	/* 0x21 */
557 	"EXACT",                 	/* 0x22 */
558 	"EXACTL",                	/* 0x23 */
559 	"EXACTF",                	/* 0x24 */
560 	"EXACTFL",               	/* 0x25 */
561 	"EXACTFU",               	/* 0x26 */
562 	"EXACTFAA",              	/* 0x27 */
563 	"EXACTFU_SS",            	/* 0x28 */
564 	"EXACTFLU8",             	/* 0x29 */
565 	"EXACTFAA_NO_TRIE",      	/* 0x2a */
566 	"NOTHING",               	/* 0x2b */
567 	"TAIL",                  	/* 0x2c */
568 	"STAR",                  	/* 0x2d */
569 	"PLUS",                  	/* 0x2e */
570 	"CURLY",                 	/* 0x2f */
571 	"CURLYN",                	/* 0x30 */
572 	"CURLYM",                	/* 0x31 */
573 	"CURLYX",                	/* 0x32 */
574 	"WHILEM",                	/* 0x33 */
575 	"OPEN",                  	/* 0x34 */
576 	"CLOSE",                 	/* 0x35 */
577 	"SROPEN",                	/* 0x36 */
578 	"SRCLOSE",               	/* 0x37 */
579 	"REF",                   	/* 0x38 */
580 	"REFF",                  	/* 0x39 */
581 	"REFFL",                 	/* 0x3a */
582 	"REFFU",                 	/* 0x3b */
583 	"REFFA",                 	/* 0x3c */
584 	"NREF",                  	/* 0x3d */
585 	"NREFF",                 	/* 0x3e */
586 	"NREFFL",                	/* 0x3f */
587 	"NREFFU",                	/* 0x40 */
588 	"NREFFA",                	/* 0x41 */
589 	"LONGJMP",               	/* 0x42 */
590 	"BRANCHJ",               	/* 0x43 */
591 	"IFMATCH",               	/* 0x44 */
592 	"UNLESSM",               	/* 0x45 */
593 	"SUSPEND",               	/* 0x46 */
594 	"IFTHEN",                	/* 0x47 */
595 	"GROUPP",                	/* 0x48 */
596 	"EVAL",                  	/* 0x49 */
597 	"MINMOD",                	/* 0x4a */
598 	"LOGICAL",               	/* 0x4b */
599 	"RENUM",                 	/* 0x4c */
600 	"TRIE",                  	/* 0x4d */
601 	"TRIEC",                 	/* 0x4e */
602 	"AHOCORASICK",           	/* 0x4f */
603 	"AHOCORASICKC",          	/* 0x50 */
604 	"GOSUB",                 	/* 0x51 */
605 	"NGROUPP",               	/* 0x52 */
606 	"INSUBP",                	/* 0x53 */
607 	"DEFINEP",               	/* 0x54 */
608 	"ENDLIKE",               	/* 0x55 */
609 	"OPFAIL",                	/* 0x56 */
610 	"ACCEPT",                	/* 0x57 */
611 	"VERB",                  	/* 0x58 */
612 	"PRUNE",                 	/* 0x59 */
613 	"MARKPOINT",             	/* 0x5a */
614 	"SKIP",                  	/* 0x5b */
615 	"COMMIT",                	/* 0x5c */
616 	"CUTGROUP",              	/* 0x5d */
617 	"KEEPS",                 	/* 0x5e */
618 	"LNBREAK",               	/* 0x5f */
619 	"OPTIMIZED",             	/* 0x60 */
620 	"PSEUDO",                	/* 0x61 */
621 	/* ------------ States ------------- */
622 	"TRIE_next",             	/* REGNODE_MAX +0x01 */
623 	"TRIE_next_fail",        	/* REGNODE_MAX +0x02 */
624 	"EVAL_B",                	/* REGNODE_MAX +0x03 */
625 	"EVAL_B_fail",           	/* REGNODE_MAX +0x04 */
626 	"EVAL_postponed_AB",     	/* REGNODE_MAX +0x05 */
627 	"EVAL_postponed_AB_fail",	/* REGNODE_MAX +0x06 */
628 	"CURLYX_end",            	/* REGNODE_MAX +0x07 */
629 	"CURLYX_end_fail",       	/* REGNODE_MAX +0x08 */
630 	"WHILEM_A_pre",          	/* REGNODE_MAX +0x09 */
631 	"WHILEM_A_pre_fail",     	/* REGNODE_MAX +0x0a */
632 	"WHILEM_A_min",          	/* REGNODE_MAX +0x0b */
633 	"WHILEM_A_min_fail",     	/* REGNODE_MAX +0x0c */
634 	"WHILEM_A_max",          	/* REGNODE_MAX +0x0d */
635 	"WHILEM_A_max_fail",     	/* REGNODE_MAX +0x0e */
636 	"WHILEM_B_min",          	/* REGNODE_MAX +0x0f */
637 	"WHILEM_B_min_fail",     	/* REGNODE_MAX +0x10 */
638 	"WHILEM_B_max",          	/* REGNODE_MAX +0x11 */
639 	"WHILEM_B_max_fail",     	/* REGNODE_MAX +0x12 */
640 	"BRANCH_next",           	/* REGNODE_MAX +0x13 */
641 	"BRANCH_next_fail",      	/* REGNODE_MAX +0x14 */
642 	"CURLYM_A",              	/* REGNODE_MAX +0x15 */
643 	"CURLYM_A_fail",         	/* REGNODE_MAX +0x16 */
644 	"CURLYM_B",              	/* REGNODE_MAX +0x17 */
645 	"CURLYM_B_fail",         	/* REGNODE_MAX +0x18 */
646 	"IFMATCH_A",             	/* REGNODE_MAX +0x19 */
647 	"IFMATCH_A_fail",        	/* REGNODE_MAX +0x1a */
648 	"CURLY_B_min_known",     	/* REGNODE_MAX +0x1b */
649 	"CURLY_B_min_known_fail",	/* REGNODE_MAX +0x1c */
650 	"CURLY_B_min",           	/* REGNODE_MAX +0x1d */
651 	"CURLY_B_min_fail",      	/* REGNODE_MAX +0x1e */
652 	"CURLY_B_max",           	/* REGNODE_MAX +0x1f */
653 	"CURLY_B_max_fail",      	/* REGNODE_MAX +0x20 */
654 	"COMMIT_next",           	/* REGNODE_MAX +0x21 */
655 	"COMMIT_next_fail",      	/* REGNODE_MAX +0x22 */
656 	"MARKPOINT_next",        	/* REGNODE_MAX +0x23 */
657 	"MARKPOINT_next_fail",   	/* REGNODE_MAX +0x24 */
658 	"SKIP_next",             	/* REGNODE_MAX +0x25 */
659 	"SKIP_next_fail",        	/* REGNODE_MAX +0x26 */
660 	"CUTGROUP_next",         	/* REGNODE_MAX +0x27 */
661 	"CUTGROUP_next_fail",    	/* REGNODE_MAX +0x28 */
662 	"KEEPS_next",            	/* REGNODE_MAX +0x29 */
663 	"KEEPS_next_fail",       	/* REGNODE_MAX +0x2a */
664 };
665 #endif /* DOINIT */
666 
667 /* PL_reg_extflags_name[] - Opcode/state names in string form, for debugging */
668 
669 #ifndef DOINIT
670 EXTCONST char * PL_reg_extflags_name[];
671 #else
672 EXTCONST char * const PL_reg_extflags_name[] = {
673 	/* Bits in extflags defined: 11111111111111110000111111111111 */
674 	"MULTILINE",        /* 0x00000001 */
675 	"SINGLELINE",       /* 0x00000002 */
676 	"FOLD",             /* 0x00000004 */
677 	"EXTENDED",         /* 0x00000008 */
678 	"EXTENDED_MORE",    /* 0x00000010 */
679 	"NOCAPTURE",        /* 0x00000020 */
680 	"KEEPCOPY",         /* 0x00000040 */
681 	"CHARSET0",         /* 0x00000080 : "CHARSET" - 0x00000380 */
682 	"CHARSET1",         /* 0x00000100 : "CHARSET" - 0x00000380 */
683 	"CHARSET2",         /* 0x00000200 : "CHARSET" - 0x00000380 */
684 	"STRICT",           /* 0x00000400 */
685 	"SPLIT",            /* 0x00000800 */
686 	"UNUSED_BIT_12",    /* 0x00001000 */
687 	"UNUSED_BIT_13",    /* 0x00002000 */
688 	"UNUSED_BIT_14",    /* 0x00004000 */
689 	"UNUSED_BIT_15",    /* 0x00008000 */
690 	"NO_INPLACE_SUBST", /* 0x00010000 */
691 	"EVAL_SEEN",        /* 0x00020000 */
692 	"UNBOUNDED_QUANTIFIER_SEEN",/* 0x00040000 */
693 	"CHECK_ALL",        /* 0x00080000 */
694 	"MATCH_UTF8",       /* 0x00100000 */
695 	"USE_INTUIT_NOML",  /* 0x00200000 */
696 	"USE_INTUIT_ML",    /* 0x00400000 */
697 	"INTUIT_TAIL",      /* 0x00800000 */
698 	"IS_ANCHORED",      /* 0x01000000 */
699 	"COPY_DONE",        /* 0x02000000 */
700 	"TAINTED_SEEN",     /* 0x04000000 */
701 	"TAINTED",          /* 0x08000000 */
702 	"START_ONLY",       /* 0x10000000 */
703 	"SKIPWHITE",        /* 0x20000000 */
704 	"WHITE",            /* 0x40000000 */
705 	"NULL",             /* 0x80000000 */
706 };
707 #endif /* DOINIT */
708 
709 #ifdef DEBUGGING
710 #  define REG_EXTFLAGS_NAME_SIZE 32
711 #endif
712 
713 /* PL_reg_intflags_name[] - Opcode/state names in string form, for debugging */
714 
715 #ifndef DOINIT
716 EXTCONST char * PL_reg_intflags_name[];
717 #else
718 EXTCONST char * const PL_reg_intflags_name[] = {
719 	"SKIP",                       /* 0x00000001 - PREGf_SKIP */
720 	"IMPLICIT",                   /* 0x00000002 - PREGf_IMPLICIT -  Converted .* to ^.*  */
721 	"NAUGHTY",                    /* 0x00000004 - PREGf_NAUGHTY -  how exponential is this pattern?  */
722 	"VERBARG_SEEN",               /* 0x00000008 - PREGf_VERBARG_SEEN */
723 	"CUTGROUP_SEEN",              /* 0x00000010 - PREGf_CUTGROUP_SEEN */
724 	"USE_RE_EVAL",                /* 0x00000020 - PREGf_USE_RE_EVAL -  compiled with "use re 'eval'"  */
725 	"NOSCAN",                     /* 0x00000040 - PREGf_NOSCAN */
726 	"GPOS_SEEN",                  /* 0x00000100 - PREGf_GPOS_SEEN */
727 	"GPOS_FLOAT",                 /* 0x00000200 - PREGf_GPOS_FLOAT */
728 	"ANCH_MBOL",                  /* 0x00000400 - PREGf_ANCH_MBOL */
729 	"ANCH_SBOL",                  /* 0x00000800 - PREGf_ANCH_SBOL */
730 	"ANCH_GPOS",                  /* 0x00001000 - PREGf_ANCH_GPOS */
731 	"RECURSE_SEEN",               /* 0x00002000 - PREGf_RECURSE_SEEN */
732 };
733 #endif /* DOINIT */
734 
735 #ifdef DEBUGGING
736 #  define REG_INTFLAGS_NAME_SIZE 13
737 #endif
738 
739 /* The following have no fixed length. U8 so we can do strchr() on it. */
740 #define REGNODE_VARIES(node) (PL_varies_bitmask[(node) >> 3] & (1 << ((node) & 7)))
741 
742 #ifndef DOINIT
743 EXTCONST U8 PL_varies[] __attribute__deprecated__;
744 #else
745 EXTCONST U8 PL_varies[] __attribute__deprecated__ = {
746     CLUMP, BRANCH, STAR, PLUS, CURLY, CURLYN, CURLYM, CURLYX, WHILEM, REF,
747     REFF, REFFL, REFFU, REFFA, NREF, NREFF, NREFFL, NREFFU, NREFFA,
748     BRANCHJ, SUSPEND, IFTHEN,
749     0
750 };
751 #endif /* DOINIT */
752 
753 #ifndef DOINIT
754 EXTCONST U8 PL_varies_bitmask[];
755 #else
756 EXTCONST U8 PL_varies_bitmask[] = {
757     0x00, 0x00, 0x00, 0x00, 0x03, 0xE0, 0x0F, 0xFF, 0xCB, 0x00, 0x00, 0x00, 0x00
758 };
759 #endif /* DOINIT */
760 
761 /* The following always have a length of 1. U8 we can do strchr() on it. */
762 /* (Note that length 1 means "one character" under UTF8, not "one octet".) */
763 #define REGNODE_SIMPLE(node) (PL_simple_bitmask[(node) >> 3] & (1 << ((node) & 7)))
764 
765 #ifndef DOINIT
766 EXTCONST U8 PL_simple[] __attribute__deprecated__;
767 #else
768 EXTCONST U8 PL_simple[] __attribute__deprecated__ = {
769     REG_ANY, SANY, ANYOF, ANYOFD, ANYOFL, ANYOFM, POSIXD, POSIXL, POSIXU,
770     POSIXA, NPOSIXD, NPOSIXL, NPOSIXU, NPOSIXA, ASCII, NASCII,
771     0
772 };
773 #endif /* DOINIT */
774 
775 #ifndef DOINIT
776 EXTCONST U8 PL_simple_bitmask[];
777 #else
778 EXTCONST U8 PL_simple_bitmask[] = {
779     0x00, 0x00, 0xFF, 0xFF, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
780 };
781 #endif /* DOINIT */
782 
783 /* ex: set ro: */
784