xref: /netbsd-src/usr.bin/indent/lexi.c (revision dd8677cc23866d4c6046e3ece028d7d834d2fd0e)
1 /*	$NetBSD: lexi.c,v 1.242 2023/12/03 21:44:42 rillig Exp $	*/
2 
3 /*-
4  * SPDX-License-Identifier: BSD-4-Clause
5  *
6  * Copyright (c) 1985 Sun Microsystems, Inc.
7  * Copyright (c) 1980, 1993
8  *	The Regents of the University of California.  All rights reserved.
9  * All rights reserved.
10  *
11  * Redistribution and use in source and binary forms, with or without
12  * modification, are permitted provided that the following conditions
13  * are met:
14  * 1. Redistributions of source code must retain the above copyright
15  *    notice, this list of conditions and the following disclaimer.
16  * 2. Redistributions in binary form must reproduce the above copyright
17  *    notice, this list of conditions and the following disclaimer in the
18  *    documentation and/or other materials provided with the distribution.
19  * 3. All advertising materials mentioning features or use of this software
20  *    must display the following acknowledgement:
21  *	This product includes software developed by the University of
22  *	California, Berkeley and its contributors.
23  * 4. Neither the name of the University nor the names of its contributors
24  *    may be used to endorse or promote products derived from this software
25  *    without specific prior written permission.
26  *
27  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
28  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
29  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
30  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
31  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
32  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
33  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
34  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
35  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
36  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
37  * SUCH DAMAGE.
38  */
39 
40 #include <sys/cdefs.h>
41 __RCSID("$NetBSD: lexi.c,v 1.242 2023/12/03 21:44:42 rillig Exp $");
42 
43 #include <stdlib.h>
44 #include <string.h>
45 
46 #include "indent.h"
47 
48 /* must be sorted alphabetically, is used in binary search */
49 static const struct keyword {
50 	const char name[12];
51 	lexer_symbol lsym;
52 } keywords[] = {
53 	{"_Bool", lsym_type},
54 	{"_Complex", lsym_modifier},
55 	{"_Imaginary", lsym_modifier},
56 	{"auto", lsym_modifier},
57 	{"bool", lsym_type},
58 	{"break", lsym_word},
59 	{"case", lsym_case},
60 	{"char", lsym_type},
61 	{"complex", lsym_modifier},
62 	{"const", lsym_modifier},
63 	{"continue", lsym_word},
64 	{"default", lsym_default},
65 	{"do", lsym_do},
66 	{"double", lsym_type},
67 	{"else", lsym_else},
68 	{"enum", lsym_tag},
69 	{"extern", lsym_modifier},
70 	{"float", lsym_type},
71 	{"for", lsym_for},
72 	{"goto", lsym_word},
73 	{"if", lsym_if},
74 	{"imaginary", lsym_modifier},
75 	{"inline", lsym_modifier},
76 	{"int", lsym_type},
77 	{"long", lsym_type},
78 	{"offsetof", lsym_offsetof},
79 	{"register", lsym_modifier},
80 	{"restrict", lsym_word},
81 	{"return", lsym_return},
82 	{"short", lsym_type},
83 	{"signed", lsym_type},
84 	{"sizeof", lsym_sizeof},
85 	{"static", lsym_modifier},
86 	{"struct", lsym_tag},
87 	{"switch", lsym_switch},
88 	{"typedef", lsym_typedef},
89 	{"union", lsym_tag},
90 	{"unsigned", lsym_type},
91 	{"void", lsym_type},
92 	{"volatile", lsym_modifier},
93 	{"while", lsym_while}
94 };
95 
96 static struct {
97 	const char **items;
98 	unsigned int len;
99 	unsigned int cap;
100 } typenames;
101 
102 /*-
103  * The transition table below was rewritten by hand from lx's output, given
104  * the following definitions. lx is Katherine Flavel's lexer generator.
105  *
106  * O  = /[0-7]/;        D  = /[0-9]/;          NZ = /[1-9]/;
107  * H  = /[a-f0-9]/i;    B  = /[0-1]/;          HP = /0x/i;
108  * BP = /0b/i;          E  = /e[+\-]?/i D+;    P  = /p[+\-]?/i D+;
109  * FS = /[fl]/i;        IS = /u/i /(l|L|ll|LL)/? | /(l|L|ll|LL)/ /u/i?;
110  *
111  * D+           E  FS? -> $float;
112  * D*    "." D+ E? FS? -> $float;
113  * D+    "."    E? FS? -> $float;    HP H+           IS? -> $int;
114  * HP H+        P  FS? -> $float;    NZ D*           IS? -> $int;
115  * HP H* "." H+ P  FS? -> $float;    "0" O*          IS? -> $int;
116  * HP H+ "."    P  FS  -> $float;    BP B+           IS? -> $int;
117  */
118 /* INDENT OFF */
119 static const unsigned char lex_number_state[][26] = {
120 	/*                examples:
121 	                                 00
122 	         s                      0xx
123 	         t                    00xaa
124 	         a     11       101100xxa..
125 	         r   11ee0001101lbuuxx.a.pp
126 	         t.01.e+008bLuxll0Ll.aa.p+0
127 	states:  ABCDEFGHIJKLMNOPQRSTUVWXYZ */
128 	[0] =   "uuiifuufiuuiiuiiiiiuiuuuuu",	/* (other) */
129 	[1] =   "CEIDEHHHIJQ  U  Q  VUVVZZZ",	/* 0 */
130 	[2] =   "DEIDEHHHIJQ  U  Q  VUVVZZZ",	/* 1 */
131 	[3] =   "DEIDEHHHIJ   U     VUVVZZZ",	/* 2 3 4 5 6 7 */
132 	[4] =   "DEJDEHHHJJ   U     VUVVZZZ",	/* 8 9 */
133 	[5] =   "             U     VUVV   ",	/* A a C c D d */
134 	[6] =   "  K          U     VUVV   ",	/* B b */
135 	[7] =   "  FFF   FF   U     VUVV   ",	/* E e */
136 	[8] =   "    f  f     U     VUVV  f",	/* F f */
137 	[9] =   "  LLf  fL  PR   Li  L    f",	/* L */
138 	[10] =  "  OOf  fO   S P O i O    f",	/* l */
139 	[11] =  "                    FFX   ",	/* P p */
140 	[12] =  "  MM    M  i  iiM   M     ",	/* U u */
141 	[13] =  "  N                       ",	/* X x */
142 	[14] =  "     G                 Y  ",	/* + - */
143 	[15] =  "B EE    EE   T      W     ",	/* . */
144 	/*       ABCDEFGHIJKLMNOPQRSTUVWXYZ */
145 };
146 /* INDENT ON */
147 
148 static const unsigned char lex_number_row[] = {
149 	['0'] = 1,
150 	['1'] = 2,
151 	['2'] = 3, ['3'] = 3, ['4'] = 3, ['5'] = 3, ['6'] = 3, ['7'] = 3,
152 	['8'] = 4, ['9'] = 4,
153 	['A'] = 5, ['a'] = 5, ['C'] = 5, ['c'] = 5, ['D'] = 5, ['d'] = 5,
154 	['B'] = 6, ['b'] = 6,
155 	['E'] = 7, ['e'] = 7,
156 	['F'] = 8, ['f'] = 8,
157 	['L'] = 9,
158 	['l'] = 10,
159 	['P'] = 11, ['p'] = 11,
160 	['U'] = 12, ['u'] = 12,
161 	['X'] = 13, ['x'] = 13,
162 	['+'] = 14, ['-'] = 14,
163 	['.'] = 15,
164 };
165 
166 
167 static bool
is_identifier_start(char ch)168 is_identifier_start(char ch)
169 {
170 	return ch_isalpha(ch) || ch == '_' || ch == '$';
171 }
172 
173 static bool
is_identifier_part(char ch)174 is_identifier_part(char ch)
175 {
176 	return ch_isalnum(ch) || ch == '_' || ch == '$';
177 }
178 
179 static void
token_add_char(char ch)180 token_add_char(char ch)
181 {
182 	buf_add_char(&token, ch);
183 }
184 
185 static bool
skip_line_continuation(void)186 skip_line_continuation(void)
187 {
188 	if (in.p[0] == '\\' && in.p[1] == '\n') {
189 		in.p++;
190 		inp_skip();
191 		in.token_end_line++;
192 		return true;
193 	}
194 	return false;
195 }
196 
197 static void
lex_number(void)198 lex_number(void)
199 {
200 	for (unsigned char s = 'A'; s != 'f' && s != 'i' && s != 'u';) {
201 		unsigned char ch = (unsigned char)*in.p;
202 		if (skip_line_continuation())
203 			continue;
204 		if (ch >= array_length(lex_number_row)
205 		    || lex_number_row[ch] == 0)
206 			break;
207 
208 		unsigned char row = lex_number_row[ch];
209 		if (lex_number_state[row][s - 'A'] == ' ') {
210 			// lex_number_state[0][s - 'A'] now indicates the type:
211 			// f = floating, i = integer, u = unknown
212 			return;
213 		}
214 
215 		s = lex_number_state[row][s - 'A'];
216 		token_add_char(inp_next());
217 	}
218 }
219 
220 static void
lex_word(void)221 lex_word(void)
222 {
223 	for (;;) {
224 		if (is_identifier_part(*in.p))
225 			token_add_char(*in.p++);
226 		else if (skip_line_continuation())
227 			continue;
228 		else
229 			return;
230 	}
231 }
232 
233 static void
lex_char_or_string(void)234 lex_char_or_string(void)
235 {
236 	for (char delim = token.s[token.len - 1];;) {
237 		if (*in.p == '\n') {
238 			diag(1, "Unterminated literal");
239 			return;
240 		}
241 
242 		token_add_char(*in.p++);
243 		if (token.s[token.len - 1] == delim)
244 			return;
245 
246 		if (token.s[token.len - 1] == '\\') {
247 			if (*in.p == '\n')
248 				in.token_end_line++;
249 			token_add_char(inp_next());
250 		}
251 	}
252 }
253 
254 /* Guess whether the current token is a declared type. */
255 static bool
probably_typename(void)256 probably_typename(void)
257 {
258 	if (ps.prev_lsym == lsym_modifier)
259 		return true;
260 	if (ps.in_init)
261 		return false;
262 	if (ps.in_stmt_or_decl)	/* XXX: this condition looks incorrect */
263 		return false;
264 	if (ps.prev_lsym == lsym_semicolon
265 	    || ps.prev_lsym == lsym_lbrace
266 	    || ps.prev_lsym == lsym_rbrace) {
267 		if (in.p[0] == '*' && in.p[1] != '=')
268 			return true;
269 		/* XXX: is_identifier_start */
270 		if (ch_isalpha(in.p[0]))
271 			return true;
272 	}
273 	return false;
274 }
275 
276 static int
bsearch_typenames(const char * key)277 bsearch_typenames(const char *key)
278 {
279 	const char **arr = typenames.items;
280 	unsigned lo = 0;
281 	unsigned hi = typenames.len;
282 
283 	while (lo < hi) {
284 		unsigned mid = (lo + hi) / 2;
285 		int cmp = strcmp(arr[mid], key);
286 		if (cmp < 0)
287 			lo = mid + 1;
288 		else if (cmp > 0)
289 			hi = mid;
290 		else
291 			return (int)mid;
292 	}
293 	return -1 - (int)lo;
294 }
295 
296 static bool
is_typename(void)297 is_typename(void)
298 {
299 	if (ps.prev_lsym == lsym_tag)
300 		return true;
301 	if (opt.auto_typedefs &&
302 	    token.len >= 2 && memcmp(token.s + token.len - 2, "_t", 2) == 0)
303 		return true;
304 
305 	return bsearch_typenames(token.s) >= 0;
306 }
307 
308 void
register_typename(const char * name)309 register_typename(const char *name)
310 {
311 	if (typenames.len >= typenames.cap) {
312 		typenames.cap = 16 + 2 * typenames.cap;
313 		typenames.items = nonnull(realloc(typenames.items,
314 			sizeof(typenames.items[0]) * typenames.cap));
315 	}
316 
317 	int pos = bsearch_typenames(name);
318 	if (pos >= 0)
319 		return;		/* already in the list */
320 
321 	pos = -1 - pos;
322 	memmove(typenames.items + pos + 1, typenames.items + pos,
323 	    sizeof(typenames.items[0]) * (typenames.len++ - (unsigned)pos));
324 	typenames.items[pos] = nonnull(strdup(name));
325 }
326 
327 static int
cmp_keyword_by_name(const void * key,const void * elem)328 cmp_keyword_by_name(const void *key, const void *elem)
329 {
330 	return strcmp(key, ((const struct keyword *)elem)->name);
331 }
332 
333 /*
334  * Looking at the '(', guess whether this starts a function definition or a
335  * function declaration.
336  */
337 static bool
probably_function_definition(const char * p)338 probably_function_definition(const char *p)
339 {
340 	// TODO: Don't look at characters in comments, see lsym_funcname.c.
341 	int paren_level = 0;
342 	for (; *p != '\n'; p++) {
343 		if (*p == '(')
344 			paren_level++;
345 		if (*p == ')' && --paren_level == 0) {
346 			p++;
347 
348 			while (*p != '\n'
349 			    && (ch_isspace(*p) || is_identifier_part(*p)))
350 				p++;	/* '__dead' or '__unused' */
351 
352 			if (*p == '\n')	/* func(...) */
353 				break;
354 			if (*p == ';')	/* func(...); */
355 				return false;
356 			if (*p == ',')	/* double abs(), pi; */
357 				return false;
358 			if (*p == '(')	/* func(...) __attribute__((...)) */
359 				paren_level++;	/* func(...) __printflike(...)
360 						 */
361 			else
362 				break;	/* func(...) { ... */
363 		}
364 
365 		if (paren_level == 1 && p[0] == '*' && p[1] == ',')
366 			return false;
367 	}
368 
369 	/*
370 	 * To further reduce the cases where indent wrongly treats an
371 	 * incomplete function declaration as a function definition, thus
372 	 * adding a newline before the function name, it may be worth looking
373 	 * for parameter names, as these are often omitted in function
374 	 * declarations and only included in function definitions. Or just
375 	 * increase the lookahead to more than just the current line of input,
376 	 * until the next '{'.
377 	 */
378 	return true;
379 }
380 
381 static lexer_symbol
lexi_alnum(void)382 lexi_alnum(void)
383 {
384 	if (ch_isdigit(in.p[0]) ||
385 	    (in.p[0] == '.' && ch_isdigit(in.p[1]))) {
386 		lex_number();
387 	} else if (is_identifier_start(in.p[0])) {
388 		lex_word();
389 
390 		if (token.len == 1 && token.s[0] == 'L' &&
391 		    (in.p[0] == '"' || in.p[0] == '\'')) {
392 			token_add_char(*in.p++);
393 			lex_char_or_string();
394 			ps.next_unary = false;
395 			return lsym_word;
396 		}
397 	} else
398 		return lsym_eof;	/* just as a placeholder */
399 
400 	while (ch_isblank(*in.p))
401 		in.p++;
402 
403 	ps.next_unary = ps.prev_lsym == lsym_tag
404 	    || ps.prev_lsym == lsym_typedef
405 	    || (ps.prev_lsym == lsym_modifier && *in.p == '*');
406 
407 	if (ps.prev_lsym == lsym_tag && ps.paren.len == 0)
408 		return lsym_type;
409 	if (ps.spaced_expr_psym == psym_for_exprs
410 	    && ps.prev_lsym == lsym_lparen && ps.paren.len == 1
411 	    && *in.p == '*') {
412 		ps.next_unary = true;
413 		return lsym_type;
414 	}
415 
416 	token_add_char('\0');	// Terminate in non-debug mode as well.
417 	token.len--;
418 	const struct keyword *kw = bsearch(token.s, keywords,
419 	    array_length(keywords), sizeof(keywords[0]), cmp_keyword_by_name);
420 	lexer_symbol lsym = lsym_word;
421 	if (kw != NULL) {
422 		lsym = kw->lsym;
423 		ps.next_unary = true;
424 		if (lsym == lsym_tag || lsym == lsym_type)
425 			goto found_typename;
426 		return lsym;
427 	}
428 
429 	if (is_typename()) {
430 		lsym = lsym_type;
431 		ps.next_unary = true;
432 found_typename:
433 		if (ps.prev_lsym != lsym_period
434 		    && ps.prev_lsym != lsym_unary_op) {
435 			if (lsym == lsym_tag)
436 				return lsym_tag;
437 			if (ps.paren.len == 0)
438 				return lsym_type;
439 		}
440 	}
441 
442 	const char *p = in.p;
443 	if (*p == ')')
444 		p++;
445 	if (*p == '(' && ps.psyms.len < 3 && ps.ind_level == 0 &&
446 	    !ps.in_func_def_params && !ps.in_init) {
447 
448 		bool maybe_function_definition = *in.p == ')'
449 		    ? ps.paren.len == 1 && ps.prev_lsym != lsym_unary_op
450 		    : ps.paren.len == 0;
451 		if (maybe_function_definition
452 		    && probably_function_definition(p)) {
453 			ps.line_has_func_def = true;
454 			if (ps.in_decl)
455 				ps.in_func_def_params = true;
456 			return lsym_funcname;
457 		}
458 
459 	} else if (ps.paren.len == 0 && probably_typename()) {
460 		ps.next_unary = true;
461 		return lsym_type;
462 	}
463 
464 	return lsym;
465 }
466 
467 static void
check_parenthesized_function_definition(void)468 check_parenthesized_function_definition(void)
469 {
470 	const char *p = in.p;
471 	while (ch_isblank(*p))
472 		p++;
473 	if (is_identifier_start(*p))
474 		while (is_identifier_part(*p))
475 			p++;
476 	while (ch_isblank(*p))
477 		p++;
478 	if (*p == ')') {
479 		p++;
480 		while (ch_isblank(*p))
481 			p++;
482 		if (*p == '(' && probably_function_definition(p))
483 			ps.line_has_func_def = true;
484 	}
485 }
486 
487 static bool
is_asterisk_unary(void)488 is_asterisk_unary(void)
489 {
490 	const char *p = in.p;
491 	while (*p == '*' || ch_isblank(*p))
492 		p++;
493 	if (*p == ')')
494 		return true;
495 	if (ps.next_unary || ps.in_func_def_params)
496 		return true;
497 	if (ps.prev_lsym == lsym_word ||
498 	    ps.prev_lsym == lsym_rparen ||
499 	    ps.prev_lsym == lsym_rbracket)
500 		return false;
501 	return ps.in_decl && ps.paren.len > 0;
502 }
503 
504 static bool
probably_in_function_definition(void)505 probably_in_function_definition(void)
506 {
507 	for (const char *p = in.p; *p != '\n';) {
508 		if (ch_isspace(*p))
509 			p++;
510 		else if (is_identifier_start(*p)) {
511 			p++;
512 			while (is_identifier_part(*p))
513 				p++;
514 		} else
515 			return *p == '(';
516 	}
517 	return false;
518 }
519 
520 static void
lex_asterisk_unary(void)521 lex_asterisk_unary(void)
522 {
523 	while (*in.p == '*' || ch_isspace(*in.p)) {
524 		if (*in.p == '*')
525 			token_add_char('*');
526 		if (*in.p == '\n')
527 			in.token_end_line++;
528 		inp_skip();
529 	}
530 
531 	if (ps.in_decl && probably_in_function_definition())
532 		ps.line_has_func_def = true;
533 }
534 
535 static bool
skip(const char ** pp,const char * s)536 skip(const char **pp, const char *s)
537 {
538 	size_t len = strlen(s);
539 	while (ch_isblank(**pp))
540 		(*pp)++;
541 	if (strncmp(*pp, s, len) == 0) {
542 		*pp += len;
543 		return true;
544 	}
545 	return false;
546 }
547 
548 static void
lex_indent_comment(void)549 lex_indent_comment(void)
550 {
551 	const char *p = in.line.s;
552 	if (skip(&p, "/*") && skip(&p, "INDENT")) {
553 		enum indent_enabled enabled;
554 		if (skip(&p, "ON") || *p == '*')
555 			enabled = indent_last_off_line;
556 		else if (skip(&p, "OFF"))
557 			enabled = indent_off;
558 		else
559 			return;
560 		if (skip(&p, "*/\n")) {
561 			if (lab.len > 0 || code.len > 0 || com.len > 0)
562 				output_line();
563 			indent_enabled = enabled;
564 		}
565 	}
566 }
567 
568 /* Reads the next token, placing it in the global variable "token". */
569 lexer_symbol
lexi(void)570 lexi(void)
571 {
572 	buf_clear(&token);
573 
574 	for (;;) {
575 		if (ch_isblank(*in.p))
576 			in.p++;
577 		else if (skip_line_continuation())
578 			continue;
579 		else
580 			break;
581 	}
582 	in.token_start_line = in.token_end_line;
583 
584 	lexer_symbol alnum_lsym = lexi_alnum();
585 	if (alnum_lsym != lsym_eof)
586 		return alnum_lsym;
587 
588 	/* Scan a non-alphanumeric token */
589 
590 	token_add_char(inp_next());
591 
592 	lexer_symbol lsym;
593 	bool next_unary;
594 
595 	switch (token.s[token.len - 1]) {
596 
597 	case '#':
598 		lsym = lsym_preprocessing;
599 		next_unary = ps.next_unary;
600 		break;
601 
602 	case '\n':
603 		/* if data has been exhausted, the '\n' is a dummy. */
604 		lsym = had_eof ? lsym_eof : lsym_newline;
605 		next_unary = ps.next_unary;
606 		break;
607 
608 	/* INDENT OFF */
609 	case ')':	lsym = lsym_rparen;	next_unary = false;	break;
610 	case '[':	lsym = lsym_lbracket;	next_unary = true;	break;
611 	case ']':	lsym = lsym_rbracket;	next_unary = false;	break;
612 	case '{':	lsym = lsym_lbrace;	next_unary = true;	break;
613 	case '}':	lsym = lsym_rbrace;	next_unary = true;	break;
614 	case '.':	lsym = lsym_period;	next_unary = false;	break;
615 	case '?':	lsym = lsym_question;	next_unary = true;	break;
616 	case ',':	lsym = lsym_comma;	next_unary = true;	break;
617 	case ';':	lsym = lsym_semicolon;	next_unary = true;	break;
618 	/* INDENT ON */
619 
620 	case '(':
621 		if (in.p == in.line.s + 1)
622 			check_parenthesized_function_definition();
623 		lsym = lsym_lparen;
624 		next_unary = true;
625 		break;
626 
627 	case '+':
628 	case '-':
629 		lsym = ps.next_unary ? lsym_unary_op : lsym_binary_op;
630 		next_unary = true;
631 
632 		/* '++' or '--' */
633 		if (*in.p == token.s[token.len - 1]) {
634 			token_add_char(*in.p++);
635 			if (ps.prev_lsym == lsym_word ||
636 			    ps.prev_lsym == lsym_rparen ||
637 			    ps.prev_lsym == lsym_rbracket) {
638 				lsym = ps.next_unary
639 				    ? lsym_unary_op : lsym_postfix_op;
640 				next_unary = false;
641 			}
642 
643 		} else if (*in.p == '=') {	/* '+=' or '-=' */
644 			token_add_char(*in.p++);
645 
646 		} else if (*in.p == '>') {	/* '->' */
647 			token_add_char(*in.p++);
648 			lsym = lsym_unary_op;
649 			next_unary = false;
650 			ps.want_blank = false;
651 		}
652 		break;
653 
654 	case ':':
655 		lsym = ps.quest_level > 0
656 		    ? (ps.quest_level--, lsym_question_colon)
657 		    : ps.in_var_decl ? lsym_other_colon : lsym_label_colon;
658 		next_unary = true;
659 		break;
660 
661 	case '*':
662 		if (*in.p == '=') {
663 			token_add_char(*in.p++);
664 			lsym = lsym_binary_op;
665 		} else if (is_asterisk_unary()) {
666 			lex_asterisk_unary();
667 			lsym = lsym_unary_op;
668 		} else
669 			lsym = lsym_binary_op;
670 		next_unary = true;
671 		break;
672 
673 	case '=':
674 		if (ps.in_var_decl)
675 			ps.in_init = true;
676 		if (*in.p == '=')
677 			token_add_char(*in.p++);
678 		lsym = lsym_binary_op;
679 		next_unary = true;
680 		break;
681 
682 	case '>':
683 	case '<':
684 	case '!':		/* ops like <, <<, <=, !=, etc. */
685 		if (*in.p == '>' || *in.p == '<' || *in.p == '=')
686 			token_add_char(*in.p++);
687 		if (*in.p == '=')
688 			token_add_char(*in.p++);
689 		lsym = ps.next_unary ? lsym_unary_op : lsym_binary_op;
690 		next_unary = true;
691 		break;
692 
693 	case '\'':
694 	case '"':
695 		lex_char_or_string();
696 		lsym = lsym_word;
697 		next_unary = false;
698 		break;
699 
700 	default:
701 		if (token.s[token.len - 1] == '/'
702 		    && (*in.p == '*' || *in.p == '/')) {
703 			enum indent_enabled prev = indent_enabled;
704 			lex_indent_comment();
705 			if (prev == indent_on && indent_enabled == indent_off)
706 				buf_clear(&out.indent_off_text);
707 			token_add_char(*in.p++);
708 			lsym = lsym_comment;
709 			next_unary = ps.next_unary;
710 			break;
711 		}
712 
713 		/* punctuation like '%', '&&', '/', '^', '||', '~' */
714 		lsym = ps.next_unary ? lsym_unary_op : lsym_binary_op;
715 		if (*in.p == token.s[token.len - 1])
716 			token_add_char(*in.p++), lsym = lsym_binary_op;
717 		if (*in.p == '=')
718 			token_add_char(*in.p++), lsym = lsym_binary_op;
719 
720 		next_unary = true;
721 	}
722 
723 	ps.next_unary = next_unary;
724 
725 	return lsym;
726 }
727