xref: /netbsd-src/usr.bin/xlint/lint1/scan.l (revision a8016b51bcf5204ff836e46c2432dc27e5b12586)
1 %{
2 /* $NetBSD: scan.l,v 1.143 2024/12/08 17:12:01 rillig Exp $ */
3 
4 /*
5  * Copyright (c) 1996 Christopher G. Demetriou.  All Rights Reserved.
6  * Copyright (c) 1994, 1995 Jochen Pohl
7  * All Rights Reserved.
8  *
9  * Redistribution and use in source and binary forms, with or without
10  * modification, are permitted provided that the following conditions
11  * are met:
12  * 1. Redistributions of source code must retain the above copyright
13  *    notice, this list of conditions and the following disclaimer.
14  * 2. Redistributions in binary form must reproduce the above copyright
15  *    notice, this list of conditions and the following disclaimer in the
16  *    documentation and/or other materials provided with the distribution.
17  * 3. All advertising materials mentioning features or use of this software
18  *    must display the following acknowledgement:
19  *	This product includes software developed by Jochen Pohl for
20  *	The NetBSD Project.
21  * 4. The name of the author may not be used to endorse or promote products
22  *    derived from this software without specific prior written permission.
23  *
24  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
25  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
26  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
27  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
28  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
29  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
30  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
31  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
32  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
33  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
34  */
35 
36 #include <sys/cdefs.h>
37 #if defined(__RCSID)
38 __RCSID("$NetBSD: scan.l,v 1.143 2024/12/08 17:12:01 rillig Exp $");
39 #endif
40 
41 #include "lint1.h"
42 #include "cgram.h"
43 
44 %}
45 
46 
47 HEX	[0-9A-Fa-f]
48 EXP	([eE][+-]?[0-9]+)
49 PEXP	(p[+-]?[0-9A-Fa-f]+)
50 FSUF	([fFlL]?[i]?)
51 
52 punctuator_1	[\[\](){}.]|->
53 punctuator_2	{punctuator_1}|\+\+|--|[&*+\-~!]
54 punctuator_3	{punctuator_2}|\/|%|<<|>>|<|>|<=|>=|==|!=|\^|\||&&|\|\|
55 punctuator_4	{punctuator_3}|\?|:|::|;|\.\.\.
56 punctuator_5	{punctuator_4}|=|\*=|\/=|%=|\+=|-=|<<=|>>=|&=|\^=|\|=
57 punctuator_6	{punctuator_5}|,|#|##
58 punctuator_7	{punctuator_6}|<:|:>|<%|%>|%:|%:%:
59 punctuator	{punctuator_7}|@
60 
61 %pointer
62 %option nounput
63 
64 %x preprocessing
65 
66 %%
67 
68 [_A-Za-z][_A-Za-z0-9]*		return lex_name(yytext, yyleng);
69 0[bB][01]+[lLuU]*		return lex_integer_constant(yytext, yyleng, 2);
70 0[0-7]*[lLuU]*			return lex_integer_constant(yytext, yyleng, 8);
71 [1-9][0-9]*[lLuU]*		return lex_integer_constant(yytext, yyleng, 10);
72 0[xX]{HEX}+[lLuU]*		return lex_integer_constant(yytext, yyleng, 16);
73 [0-9]+\.[0-9]*{EXP}?{FSUF}	|
74 [0-9]+{EXP}{FSUF}		|
75 0[xX]{HEX}+\.{HEX}*{PEXP}{FSUF}	|
76 0[xX]{HEX}+{PEXP}{FSUF}		|
77 \.[0-9]+{EXP}?{FSUF}		return lex_floating_constant(yytext, yyleng);
78 "="				return T_ASSIGN;
79 "*="				return lex_operator(T_OPASSIGN, MULASS);
80 "/="				return lex_operator(T_OPASSIGN, DIVASS);
81 "%="				return lex_operator(T_OPASSIGN, MODASS);
82 "+="				return lex_operator(T_OPASSIGN, ADDASS);
83 "-="				return lex_operator(T_OPASSIGN, SUBASS);
84 "<<="				return lex_operator(T_OPASSIGN, SHLASS);
85 ">>="				return lex_operator(T_OPASSIGN, SHRASS);
86 "&="				return lex_operator(T_OPASSIGN, ANDASS);
87 "^="				return lex_operator(T_OPASSIGN, XORASS);
88 "|="				return lex_operator(T_OPASSIGN, ORASS);
89 "||"				return T_LOGOR;
90 "&&"				return T_LOGAND;
91 "|"				return T_BITOR;
92 "&"				return T_AMPER;
93 "^"				return T_BITXOR;
94 "=="				return lex_operator(T_EQUALITY, EQ);
95 "!="				return lex_operator(T_EQUALITY, NE);
96 "<"				return lex_operator(T_RELATIONAL, LT);
97 ">"				return lex_operator(T_RELATIONAL, GT);
98 "<="				return lex_operator(T_RELATIONAL, LE);
99 ">="				return lex_operator(T_RELATIONAL, GE);
100 "<<"				return lex_operator(T_SHIFT, SHL);
101 ">>"				return lex_operator(T_SHIFT, SHR);
102 "++"				return yylval.y_inc = true, T_INCDEC;
103 "--"				return yylval.y_inc = false, T_INCDEC;
104 "->"				return T_ARROW;
105 "."				return T_POINT;
106 "+"				return lex_operator(T_ADDITIVE, PLUS);
107 "-"				return lex_operator(T_ADDITIVE, MINUS);
108 "*"				return T_ASTERISK;
109 "/"				return lex_operator(T_MULTIPLICATIVE, DIV);
110 "%"				return lex_operator(T_MULTIPLICATIVE, MOD);
111 "!"				return T_LOGNOT;
112 "~"				return T_COMPLEMENT;
113 "\""				return lex_string();
114 "L\""				return lex_wide_string();
115 ";"				return T_SEMI;
116 "{"				return T_LBRACE;
117 "}"				return T_RBRACE;
118 ","				return T_COMMA;
119 ":"				return T_COLON;
120 "?"				return T_QUEST;
121 "["				return T_LBRACK;
122 "]"				return T_RBRACK;
123 "("				return T_LPAREN;
124 ")"				return T_RPAREN;
125 "..."				return T_ELLIPSIS;
126 "::"				return T_DCOLON;
127 "'"				return lex_character_constant();
128 "L'"				return lex_wide_character_constant();
129 \n				lex_next_line();
130 \t|" "|\f|\v			;
131 "/*"				lex_comment();
132 "//"				lex_slash_slash_comment();
133 
134 ^#				{
135 					BEGIN preprocessing;
136 					lex_pp_begin();
137 				}
138 <preprocessing>[_A-Za-z][_A-Za-z0-9]*	lex_pp_identifier(yytext);
139 <preprocessing>\.?[0-9]('?[_A-Za-z0-9]|[EePp][-+][0-9]+|\.)*	lex_pp_number(yytext);
140 <preprocessing>\'		lex_pp_character_constant();
141 <preprocessing>\"		lex_pp_string_literal();
142 <preprocessing>{punctuator}	lex_pp_punctuator(yytext);
143 <preprocessing>\/\*		lex_pp_comment();
144 <preprocessing>[ \f\t\v]+	lex_pp_whitespace();
145 <preprocessing>.		lex_unknown_character(yytext[0]);
146 <preprocessing>\n		{
147 					lex_pp_end();
148 					lex_next_line();
149 					BEGIN INITIAL;
150 				}
151 
152 .				lex_unknown_character(yytext[0]);
153 
154 %%
155 
156 /*
157  * In the above list of regular expressions, the tokens for character
158  * constants, string literals and comments are incomplete; they only match
159  * a prefix.  The remainder of these tokens is scanned by reading bytes
160  * directly from the input stream.
161  */
162 int
163 lex_input(void)
164 {
165 	return input();
166 }
167