xref: /netbsd-src/external/bsd/libpcap/dist/scanner.l (revision 8bda04910f1f2c366c51d4cbd40df19f5b57f1dd)
1 /*	$NetBSD: scanner.l,v 1.14 2024/09/02 15:33:38 christos Exp $	*/
2 
3 %top {
4 /* Must come first for _LARGE_FILE_API on AIX. */
5 #include <config.h>
6 
7 /*
8  * Must come first to avoid warnings on Windows.
9  *
10  * Flex-generated scanners may only include <inttypes.h> if __STDC_VERSION__
11  * is defined with a value >= 199901, meaning "full C99", and MSVC may not
12  * define it with that value, because it isn't 100% C99-compliant, even
13  * though it has an <inttypes.h> capable of defining everything the Flex
14  * scanner needs.
15  *
16  * We, however, will include it if we know we have an MSVC version that has
17  * it; this means that we may define the INTn_MAX and UINTn_MAX values in
18  * scanner.c, and then include <stdint.h>, which may define them differently
19  * (same value, but different string of characters), causing compiler warnings.
20  *
21  * If we include it here, and they're defined, that'll prevent scanner.c
22  * from defining them.  So we include <pcap/pcap-inttypes.h>, to get
23  * <inttypes.h> if we have it.
24  */
25 #include <pcap/pcap-inttypes.h>
26 
27 /*
28  * grammar.h requires gencode.h and sometimes breaks in a polluted namespace
29  * (see ftmacros.h), so include it early.
30  */
31 #include "gencode.h"
32 #include "grammar.h"
33 
34 #include "diag-control.h"
35 
36 /*
37  * Convert string to 32-bit unsigned integer; the string starts at
38  * string and is string_len bytes long.
39  *
40  * On success, sets *val to the value and returns 1.
41  * On failure, sets the BPF error string and returns 0.
42  *
43  * Also used in gencode.c
44  */
45 typedef enum {
46 	STOULEN_OK,
47 	STOULEN_NOT_HEX_NUMBER,
48 	STOULEN_NOT_OCTAL_NUMBER,
49 	STOULEN_NOT_DECIMAL_NUMBER,
50 	STOULEN_ERROR
51 } stoulen_ret;
52 
53 stoulen_ret stoulen(const char *string, size_t stringlen, bpf_u_int32 *val,
54     compiler_state_t *cstate);
55 }
56 
57 /*
58  * We want a reentrant scanner.
59  */
60 %option reentrant
61 
62 /*
63  * And we need to pass the compiler state to the scanner.
64  */
65 %option extra-type="compiler_state_t *"
66 
67 /*
68  * We don't use input, so don't generate code for it.
69  */
70 %option noinput
71 
72 /*
73  * We don't use unput, so don't generate code for it.
74  */
75 %option nounput
76 
77 /*
78  * We don't read from the terminal.
79  */
80 %option never-interactive
81 
82 /*
83  * We want to stop processing when we get to the end of the input.
84  */
85 %option noyywrap
86 
87 /*
88  * We want to generate code that can be used by a reentrant parser
89  * generated by Bison or Berkeley YACC.
90  */
91 %option bison-bridge
92 
93 %{
94 /*
95  * Copyright (c) 1988, 1989, 1990, 1991, 1992, 1993, 1994, 1995, 1996, 1997
96  *	The Regents of the University of California.  All rights reserved.
97  *
98  * Redistribution and use in source and binary forms, with or without
99  * modification, are permitted provided that: (1) source code distributions
100  * retain the above copyright notice and this paragraph in its entirety, (2)
101  * distributions including binary code include the above copyright notice and
102  * this paragraph in its entirety in the documentation or other materials
103  * provided with the distribution, and (3) all advertising materials mentioning
104  * features or use of this software display the following acknowledgement:
105  * ``This product includes software developed by the University of California,
106  * Lawrence Berkeley Laboratory and its contributors.'' Neither the name of
107  * the University nor the names of its contributors may be used to endorse
108  * or promote products derived from this software without specific prior
109  * written permission.
110  * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR IMPLIED
111  * WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF
112  * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.
113  */
114 #include <sys/cdefs.h>
115 __RCSID("$NetBSD: scanner.l,v 1.14 2024/09/02 15:33:38 christos Exp $");
116 
117 #include <string.h>
118 
119 #include "pcap-int.h"
120 
121 /*
122  * Earlier versions of Flex don't declare these, so we declare them
123  * ourselves to squelch warnings.
124  */
125 int pcap_get_column(yyscan_t);
126 void pcap_set_column(int, yyscan_t);
127 
128 #ifdef INET6
129 
130 #ifdef _WIN32
131 #include <winsock2.h>
132 #include <ws2tcpip.h>
133 /*
134  * To quote the MSDN page for getaddrinfo() at
135  *
136  *    https://msdn.microsoft.com/en-us/library/windows/desktop/ms738520(v=vs.85).aspx
137  *
138  * "Support for getaddrinfo on Windows 2000 and older versions
139  * The getaddrinfo function was added to the Ws2_32.dll on Windows XP and
140  * later. To execute an application that uses this function on earlier
141  * versions of Windows, then you need to include the Ws2tcpip.h and
142  * Wspiapi.h files. When the Wspiapi.h include file is added, the
143  * getaddrinfo function is defined to the WspiapiGetAddrInfo inline
144  * function in the Wspiapi.h file. At runtime, the WspiapiGetAddrInfo
145  * function is implemented in such a way that if the Ws2_32.dll or the
146  * Wship6.dll (the file containing getaddrinfo in the IPv6 Technology
147  * Preview for Windows 2000) does not include getaddrinfo, then a
148  * version of getaddrinfo is implemented inline based on code in the
149  * Wspiapi.h header file. This inline code will be used on older Windows
150  * platforms that do not natively support the getaddrinfo function."
151  *
152  * We use getaddrinfo(), so we include Wspiapi.h here.
153  */
154 #include <wspiapi.h>
155 #else /* _WIN32 */
156 #include <sys/socket.h>	/* for "struct sockaddr" in "struct addrinfo" */
157 #include <netdb.h>	/* for "struct addrinfo" */
158 #endif /* _WIN32 */
159 
160 /* Workaround for AIX 4.3 */
161 #if !defined(AI_NUMERICHOST)
162 #define AI_NUMERICHOST 0x04
163 #endif
164 
165 #endif /*INET6*/
166 
167 #include <pcap/namedb.h>
168 #include "grammar.h"
169 
170 #ifdef HAVE_OS_PROTO_H
171 #include "os-proto.h"
172 #endif
173 
174 static int stou(const char *, YYSTYPE *, compiler_state_t *);
175 
176 /*
177  * Disable diagnostics in the code generated by Flex.
178  */
179 DIAG_OFF_FLEX
180 
181 %}
182 
183 N		([0-9]+|(0X|0x)[0-9A-Fa-f]+)
184 B		([0-9A-Fa-f][0-9A-Fa-f]?)
185 B2		([0-9A-Fa-f][0-9A-Fa-f][0-9A-Fa-f][0-9A-Fa-f])
186 W		([0-9A-Fa-f][0-9A-Fa-f]?[0-9A-Fa-f]?[0-9A-Fa-f]?)
187 
188 %a 18400
189 %o 21500
190 %e 7600
191 %k 4550
192 %p 27600
193 %n 2000
194 
195 V680		{W}:{W}:{W}:{W}:{W}:{W}:{W}:{W}
196 
197 V670		::{W}:{W}:{W}:{W}:{W}:{W}:{W}
198 V671		{W}::{W}:{W}:{W}:{W}:{W}:{W}
199 V672		{W}:{W}::{W}:{W}:{W}:{W}:{W}
200 V673		{W}:{W}:{W}::{W}:{W}:{W}:{W}
201 V674		{W}:{W}:{W}:{W}::{W}:{W}:{W}
202 V675		{W}:{W}:{W}:{W}:{W}::{W}:{W}
203 V676		{W}:{W}:{W}:{W}:{W}:{W}::{W}
204 V677		{W}:{W}:{W}:{W}:{W}:{W}:{W}::
205 
206 V660		::{W}:{W}:{W}:{W}:{W}:{W}
207 V661		{W}::{W}:{W}:{W}:{W}:{W}
208 V662		{W}:{W}::{W}:{W}:{W}:{W}
209 V663		{W}:{W}:{W}::{W}:{W}:{W}
210 V664		{W}:{W}:{W}:{W}::{W}:{W}
211 V665		{W}:{W}:{W}:{W}:{W}::{W}
212 V666		{W}:{W}:{W}:{W}:{W}:{W}::
213 
214 V650		::{W}:{W}:{W}:{W}:{W}
215 V651		{W}::{W}:{W}:{W}:{W}
216 V652		{W}:{W}::{W}:{W}:{W}
217 V653		{W}:{W}:{W}::{W}:{W}
218 V654		{W}:{W}:{W}:{W}::{W}
219 V655		{W}:{W}:{W}:{W}:{W}::
220 
221 V640		::{W}:{W}:{W}:{W}
222 V641		{W}::{W}:{W}:{W}
223 V642		{W}:{W}::{W}:{W}
224 V643		{W}:{W}:{W}::{W}
225 V644		{W}:{W}:{W}:{W}::
226 
227 V630		::{W}:{W}:{W}
228 V631		{W}::{W}:{W}
229 V632		{W}:{W}::{W}
230 V633		{W}:{W}:{W}::
231 
232 V620		::{W}:{W}
233 V621		{W}::{W}
234 V622		{W}:{W}::
235 
236 V610		::{W}
237 V611		{W}::
238 
239 V600		::
240 
241 V6604		{W}:{W}:{W}:{W}:{W}:{W}:{N}\.{N}\.{N}\.{N}
242 
243 V6504		::{W}:{W}:{W}:{W}:{W}:{N}\.{N}\.{N}\.{N}
244 V6514		{W}::{W}:{W}:{W}:{W}:{N}\.{N}\.{N}\.{N}
245 V6524		{W}:{W}::{W}:{W}:{W}:{N}\.{N}\.{N}\.{N}
246 V6534		{W}:{W}:{W}::{W}:{W}:{N}\.{N}\.{N}\.{N}
247 V6544		{W}:{W}:{W}:{W}::{W}:{N}\.{N}\.{N}\.{N}
248 V6554		{W}:{W}:{W}:{W}:{W}::{N}\.{N}\.{N}\.{N}
249 
250 V6404		::{W}:{W}:{W}:{W}:{N}\.{N}\.{N}\.{N}
251 V6414		{W}::{W}:{W}:{W}:{N}\.{N}\.{N}\.{N}
252 V6424		{W}:{W}::{W}:{W}:{N}\.{N}\.{N}\.{N}
253 V6434		{W}:{W}:{W}::{W}:{N}\.{N}\.{N}\.{N}
254 V6444		{W}:{W}:{W}:{W}::{N}\.{N}\.{N}\.{N}
255 
256 V6304		::{W}:{W}:{W}:{N}\.{N}\.{N}\.{N}
257 V6314		{W}::{W}:{W}:{N}\.{N}\.{N}\.{N}
258 V6324		{W}:{W}::{W}:{N}\.{N}\.{N}\.{N}
259 V6334		{W}:{W}:{W}::{N}\.{N}\.{N}\.{N}
260 
261 V6204		::{W}:{W}:{N}\.{N}\.{N}\.{N}
262 V6214		{W}::{W}:{N}\.{N}\.{N}\.{N}
263 V6224		{W}:{W}::{N}\.{N}\.{N}\.{N}
264 
265 V6104		::{W}:{N}\.{N}\.{N}\.{N}
266 V6114		{W}::{N}\.{N}\.{N}\.{N}
267 
268 V6004		::{N}\.{N}\.{N}\.{N}
269 
270 
271 V6		({V680}|{V670}|{V671}|{V672}|{V673}|{V674}|{V675}|{V676}|{V677}|{V660}|{V661}|{V662}|{V663}|{V664}|{V665}|{V666}|{V650}|{V651}|{V652}|{V653}|{V654}|{V655}|{V640}|{V641}|{V642}|{V643}|{V644}|{V630}|{V631}|{V632}|{V633}|{V620}|{V621}|{V622}|{V610}|{V611}|{V600}|{V6604}|{V6504}|{V6514}|{V6524}|{V6534}|{V6544}|{V6554}|{V6404}|{V6414}|{V6424}|{V6434}|{V6444}|{V6304}|{V6314}|{V6324}|{V6334}|{V6204}|{V6214}|{V6224}|{V6104}|{V6114}|{V6004})
272 
273 MAC		({B}:{B}:{B}:{B}:{B}:{B}|{B}\-{B}\-{B}\-{B}\-{B}\-{B}|{B}\.{B}\.{B}\.{B}\.{B}\.{B}|{B2}\.{B2}\.{B2}|{B2}{3})
274 
275 
276 
277 %option nounput
278 %option noinput
279 
280 %%
281 dst		return DST;
282 src		return SRC;
283 
284 link|ether|ppp|slip  return LINK;
285 fddi|tr|wlan	return LINK;
286 arp		return ARP;
287 rarp		return RARP;
288 ip		return IP;
289 sctp		return SCTP;
290 tcp		return TCP;
291 udp		return UDP;
292 icmp		return ICMP;
293 igmp		return IGMP;
294 igrp		return IGRP;
295 pim		return PIM;
296 vrrp		return VRRP;
297 carp		return CARP;
298 radio		return RADIO;
299 
300 ip6		return IPV6;
301 icmp6		return ICMPV6;
302 ah		return AH;
303 esp		return ESP;
304 
305 atalk		return ATALK;
306 aarp		return AARP;
307 decnet		return DECNET;
308 lat		return LAT;
309 sca		return SCA;
310 moprc		return MOPRC;
311 mopdl		return MOPDL;
312 
313 iso		return ISO;
314 esis		return ESIS;
315 es-is		return ESIS;
316 isis		return ISIS;
317 is-is		return ISIS;
318 l1              return L1;
319 l2              return L2;
320 iih             return IIH;
321 lsp             return LSP;
322 snp             return SNP;
323 csnp            return CSNP;
324 psnp            return PSNP;
325 
326 clnp		return CLNP;
327 
328 stp		return STP;
329 
330 ipx		return IPX;
331 
332 netbeui		return NETBEUI;
333 
334 host		return HOST;
335 net		return NET;
336 mask		return NETMASK;
337 port		return PORT;
338 portrange	return PORTRANGE;
339 proto		return PROTO;
340 protochain	return PROTOCHAIN;
341 
342 gateway		return GATEWAY;
343 
344 type		return TYPE;
345 subtype		return SUBTYPE;
346 direction|dir	return DIR;
347 address1|addr1	return ADDR1;
348 address2|addr2	return ADDR2;
349 address3|addr3	return ADDR3;
350 address4|addr4	return ADDR4;
351 ra		return RA;
352 ta		return TA;
353 
354 less		return LESS;
355 greater		return GREATER;
356 byte		return CBYTE;
357 broadcast	return TK_BROADCAST;
358 multicast	return TK_MULTICAST;
359 
360 and|"&&"	return AND;
361 or|"||"		return OR;
362 not		return '!';
363 
364 len|length	return LEN;
365 inbound		return INBOUND;
366 outbound	return OUTBOUND;
367 
368 ifindex		return IFINDEX;
369 
370 vlan		return VLAN;
371 mpls		return MPLS;
372 pppoed		return PPPOED;
373 pppoes		return PPPOES;
374 geneve		return GENEVE;
375 
376 lane		return LANE;
377 llc		return LLC;
378 metac		return METAC;
379 bcc		return BCC;
380 oam		return OAM;
381 oamf4		return OAMF4;
382 oamf4ec		return OAMF4EC;
383 oamf4sc		return OAMF4SC;
384 sc		return SC;
385 ilmic		return ILMIC;
386 vpi		return VPI;
387 vci		return VCI;
388 connectmsg	return CONNECTMSG;
389 metaconnect	return METACONNECT;
390 
391 on|ifname	return PF_IFNAME;
392 rset|ruleset	return PF_RSET;
393 rnr|rulenum	return PF_RNR;
394 srnr|subrulenum	return PF_SRNR;
395 reason		return PF_REASON;
396 action		return PF_ACTION;
397 
398 fisu		return FISU;
399 lssu		return LSSU;
400 lsu		return LSSU;
401 msu		return MSU;
402 hfisu		return HFISU;
403 hlssu		return HLSSU;
404 hmsu		return HMSU;
405 sio		return SIO;
406 opc		return OPC;
407 dpc		return DPC;
408 sls		return SLS;
409 hsio		return HSIO;
410 hopc		return HOPC;
411 hdpc		return HDPC;
412 hsls		return HSLS;
413 
414 [ \r\n\t]		;
415 [+\-*/%:\[\]!<>()&|\^=]	return yytext[0];
416 ">="			return GEQ;
417 "<="			return LEQ;
418 "!="			return NEQ;
419 "=="			return '=';
420 "<<"			return LSH;
421 ">>"			return RSH;
422 ${B}			{ yylval->s = sdup(yyextra, yytext); return AID; }
423 {MAC}			{ yylval->s = sdup(yyextra, yytext); return EID; }
424 {N}			{ return stou(yytext, yylval, yyextra); }
425 ({N}\.{N})|({N}\.{N}\.{N})|({N}\.{N}\.{N}\.{N})	{
426 			yylval->s = sdup(yyextra, (char *)yytext); return HID; }
427 {V6}			{
428 #ifdef INET6
429 			  struct addrinfo hints, *res;
430 			  memset(&hints, 0, sizeof(hints));
431 			  hints.ai_family = AF_INET6;
432 			  hints.ai_flags = AI_NUMERICHOST;
433 			  if (getaddrinfo(yytext, NULL, &hints, &res)) {
434 				bpf_set_error(yyextra, "bogus IPv6 address %s", yytext);
435 				yylval->s = NULL;
436 			  } else {
437 				freeaddrinfo(res);
438 				yylval->s = sdup(yyextra, (char *)yytext);
439 			  }
440 #else
441 			  bpf_set_error(yyextra, "IPv6 address %s not supported", yytext);
442 			  yylval->s = NULL;
443 #endif /*INET6*/
444 			  return HID6;
445 			}
446 {B}:+({B}:+)+		{ bpf_set_error(yyextra, "bogus ethernet address %s", yytext); yylval->s = NULL; return EID; }
447 icmptype		{ yylval->h = 0; return NUM; }
448 icmpcode		{ yylval->h = 1; return NUM; }
449 icmp-echoreply		{ yylval->h = 0; return NUM; }
450 icmp-unreach		{ yylval->h = 3; return NUM; }
451 icmp-sourcequench	{ yylval->h = 4; return NUM; }
452 icmp-redirect		{ yylval->h = 5; return NUM; }
453 icmp-echo		{ yylval->h = 8; return NUM; }
454 icmp-routeradvert	{ yylval->h = 9; return NUM; }
455 icmp-routersolicit	{ yylval->h = 10; return NUM; }
456 icmp-timxceed		{ yylval->h = 11; return NUM; }
457 icmp-paramprob		{ yylval->h = 12; return NUM; }
458 icmp-tstamp		{ yylval->h = 13; return NUM; }
459 icmp-tstampreply	{ yylval->h = 14; return NUM; }
460 icmp-ireq		{ yylval->h = 15; return NUM; }
461 icmp-ireqreply		{ yylval->h = 16; return NUM; }
462 icmp-maskreq		{ yylval->h = 17; return NUM; }
463 icmp-maskreply		{ yylval->h = 18; return NUM; }
464 
465 icmp6type       { yylval->h = 0; return NUM; }
466 icmp6code       { yylval->h = 1; return NUM; }
467 
468 icmp6-destinationunreach	{ yylval->h = 1; return NUM; }
469 icmp6-packettoobig		{ yylval->h = 2; return NUM; }
470 icmp6-timeexceeded		{ yylval->h = 3; return NUM; }
471 icmp6-parameterproblem		{ yylval->h = 4; return NUM; }
472 icmp6-echo      { yylval->h = 128; return NUM; }
473 icmp6-echoreply { yylval->h = 129; return NUM; }
474 icmp6-multicastlistenerquery    { yylval->h = 130; return NUM; }
475 icmp6-multicastlistenerreportv1 { yylval->h = 131; return NUM; }
476 icmp6-multicastlistenerdone     { yylval->h = 132; return NUM; }
477 icmp6-routersolicit   { yylval->h = 133; return NUM; }
478 icmp6-routeradvert    { yylval->h = 134; return NUM; }
479 icmp6-neighborsolicit { yylval->h = 135; return NUM; }
480 icmp6-neighboradvert  { yylval->h = 136; return NUM; }
481 icmp6-redirect    { yylval->h = 137; return NUM; }
482 icmp6-routerrenum { yylval->h = 138; return NUM; }
483 icmp6-nodeinformationquery      { yylval->h = 139; return NUM; }
484 icmp6-nodeinformationresponse   { yylval->h = 140; return NUM; }
485 icmp6-ineighbordiscoverysolicit { yylval->h = 141; return NUM; }
486 icmp6-ineighbordiscoveryadvert  { yylval->h = 142; return NUM; }
487 icmp6-multicastlistenerreportv2 { yylval->h = 143; return NUM; }
488 icmp6-homeagentdiscoveryrequest { yylval->h = 144; return NUM; }
489 icmp6-homeagentdiscoveryreply   { yylval->h = 145; return NUM; }
490 icmp6-mobileprefixsolicit       { yylval->h = 146; return NUM; }
491 icmp6-mobileprefixadvert        { yylval->h = 147; return NUM; }
492 icmp6-certpathsolicit           { yylval->h = 148; return NUM; }
493 icmp6-certpathadvert            { yylval->h = 149; return NUM; }
494 icmp6-multicastrouteradvert     { yylval->h = 151; return NUM; }
495 icmp6-multicastroutersolicit    { yylval->h = 152; return NUM; }
496 icmp6-multicastrouterterm       { yylval->h = 153; return NUM; }
497 
498 tcpflags		{ yylval->h = 13; return NUM; }
499 tcp-fin			{ yylval->h = 0x01; return NUM; }
500 tcp-syn			{ yylval->h = 0x02; return NUM; }
501 tcp-rst			{ yylval->h = 0x04; return NUM; }
502 tcp-push		{ yylval->h = 0x08; return NUM; }
503 tcp-ack			{ yylval->h = 0x10; return NUM; }
504 tcp-urg			{ yylval->h = 0x20; return NUM; }
505 tcp-ece			{ yylval->h = 0x40; return NUM; }
506 tcp-cwr			{ yylval->h = 0x80; return NUM; }
507 [A-Za-z0-9]([-_.A-Za-z0-9]*[.A-Za-z0-9])? {
508 			 yylval->s = sdup(yyextra, (char *)yytext); return ID; }
509 "\\"[^ !()\n\t]+	{ yylval->s = sdup(yyextra, (char *)yytext + 1); return ID; }
510 .			{ return LEX_ERROR; }
511 %%
512 
513 /*
514  * Turn diagnostics back on, so we check the code that we've written.
515  */
516 DIAG_ON_FLEX
517 
518 stoulen_ret
519 stoulen(const char *string, size_t string_len, bpf_u_int32 *val,
520     compiler_state_t *cstate)
521 {
522 	bpf_u_int32 n = 0;
523 	unsigned int digit;
524 	const char *s = string;
525 
526 	/*
527 	 * string is guaranteed either to be a string of decimal digits
528 	 * or 0[xX] followed by a string of hex digits.
529 	 */
530 	if (string_len >= 1 && *s == '0') {
531 		if (string_len >= 2  && (s[1] == 'x' || s[1] == 'X')) {
532 			/*
533 			 * Begins with 0x or 0X, so hex.
534 			 * Guaranteed to be all hex digits following the
535 			 * prefix, so anything that's not 0-9 or a-f is
536 			 * A-F.
537 			 */
538 			s += 2;	/* skip the prefix */
539 			string_len -= 2;
540 			while (string_len != 0) {
541 				digit = *s++;
542 				string_len--;
543 				if (digit >= '0' && digit <= '9')
544 					digit = digit - '0';
545 				else if (digit >= 'a' && digit <= 'f')
546 					digit = digit - 'a' + 10;
547 				else if (digit >= 'A' && digit <= 'F')
548 					digit = digit - 'A' + 10;
549 				else {
550 					/*
551 					 * Not a valid hex number.
552 					 * Don't treat this as an error,
553 					 * in case the caller wants to
554 					 * interpret it as something else.
555 					 */
556 					return STOULEN_NOT_HEX_NUMBER;
557 				}
558 
559 				/*
560 				 * Check for overflow.
561 				 */
562 				if (n > 0xFFFFFFFU) {
563 					/*
564 					 * We have more than 28 bits of
565 					 * number, and are about to
566 					 * add 4 more; that won't fit
567 					 * in 32 bits.
568 					 */
569 					bpf_set_error(cstate,
570 					    "number %.*s overflows 32 bits",
571 					   (int)string_len, string);
572 					return STOULEN_ERROR;
573 				}
574 				n = (n << 4) + digit;
575 			}
576 		} else {
577 			/*
578 			 * Begins with 0, but not 0x or 0X, so octal.
579 			 * Guaranteed to be all *decimal* digits following
580 			 * the prefix, so we need to catch 8 and 9 and
581 			 * report an error.
582 			 */
583 			s += 1;
584 			string_len -= 1;
585 			while (string_len != 0) {
586 				digit = *s++;
587 				string_len--;
588 				if (digit >= '0' && digit <= '7')
589 					digit = digit - '0';
590 				else {
591 					/*
592 					 * Not a valid octal number.
593 					 * Don't treat this as an error,
594 					 * in case the caller wants to
595 					 * interpret it as something else.
596 					 */
597 					return STOULEN_NOT_OCTAL_NUMBER;
598 				}
599 				if (n > 03777777777U) {
600 					/*
601 					 * We have more than 29 bits of
602 					 * number, and are about to add
603 					 * 3 more; that won't fit in
604 					 * 32 bits.
605 					 */
606 					bpf_set_error(cstate,
607 					    "number %.*s overflows 32 bits",
608 					   (int)string_len, string);
609 					return STOULEN_ERROR;
610 				}
611 				n = (n << 3) + digit;
612 			}
613 		}
614 	} else {
615 		/*
616 		 * Decimal.
617 		 */
618 		while (string_len != 0) {
619 			digit = *s++;
620 			string_len--;
621 			if (digit >= '0' && digit <= '9')
622 				digit = digit - '0';
623 			else {
624 				/*
625 				 * Not a valid decimal number.
626 				 * Don't treat this as an error,
627 				 * in case the caller wants to
628 				 * interpret it as something else.
629 				 */
630 				return STOULEN_NOT_DECIMAL_NUMBER;
631 			}
632 #define CUTOFF_DEC	(0xFFFFFFFFU / 10U)
633 #define CUTLIM_DEC	(0xFFFFFFFFU % 10U)
634 			if (n > CUTOFF_DEC ||
635 			    (n == CUTOFF_DEC && digit > CUTLIM_DEC)) {
636 				/*
637 				 * Adding that digit will result in a
638 				 * number that won't fit in 32 bits.
639 				 */
640 				bpf_set_error(cstate,
641 				    "number %.*s overflows 32 bits",
642 				   (int)string_len, string);
643 				return STOULEN_ERROR;
644 			}
645 			n = (n * 10) + digit;
646 		}
647 	}
648 
649 	*val = n;
650 	return STOULEN_OK;
651 }
652 
653 /*
654  * Convert string to 32-bit unsigned integer.  Just like atoi(), but checks for
655  * preceding 0x or 0 and uses hex or octal instead of decimal.
656  *
657  * On success, sets yylval->h to the value and returns NUM.
658  * On failure, sets the BPF error string and returns LEX_ERROR, to force
659  * the parse to stop.
660  */
661 static int
662 stou(const char *yytext_arg, YYSTYPE *yylval_arg, compiler_state_t *yyextra_arg)
663 {
664 	stoulen_ret ret;
665 
666 	ret = stoulen(yytext_arg, strlen(yytext_arg), &yylval_arg->h,
667 	    yyextra_arg);
668 	switch (ret) {
669 
670 	case STOULEN_OK:
671 		return NUM;
672 
673 	case STOULEN_NOT_OCTAL_NUMBER:
674 		bpf_set_error(yyextra_arg, "number %s contains non-octal digit",
675 		    yytext_arg);
676 		return LEX_ERROR;
677 
678 	case STOULEN_NOT_HEX_NUMBER:
679 		bpf_set_error(yyextra_arg, "number %s contains non-hex digit",
680 		    yytext_arg);
681 		return LEX_ERROR;
682 
683 	case STOULEN_NOT_DECIMAL_NUMBER:
684 		bpf_set_error(yyextra_arg, "number %s contains non-decimal digit",
685 		    yytext_arg);
686 		return LEX_ERROR;
687 
688 	case STOULEN_ERROR:
689 		/* Error already set. */
690 		return LEX_ERROR;
691 
692 	default:
693 		/* Should not happen */
694 		bpf_set_error(yyextra_arg, "stoulen returned %d - this should not happen", ret);
695 		return LEX_ERROR;
696 	}
697 }
698