xref: /plan9-contrib/sys/src/cmd/cpp/macro.c (revision 9a747e4fd48b9f4522c70c07e8f882a15030f964)
1 #include <u.h>
2 #include <libc.h>
3 #include <stdio.h>
4 #include "cpp.h"
5 
6 /*
7  * do a macro definition.  tp points to the name being defined in the line
8  */
9 void
10 dodefine(Tokenrow *trp)
11 {
12 	Token *tp;
13 	Nlist *np;
14 	Tokenrow *def, *args;
15 
16 	tp = trp->tp+1;
17 	if (tp>=trp->lp || tp->type!=NAME) {
18 		error(ERROR, "#defined token is not a name");
19 		return;
20 	}
21 	np = lookup(tp, 1);
22 	if (np->flag&ISUNCHANGE) {
23 		error(ERROR, "#defined token %t can't be redefined", tp);
24 		return;
25 	}
26 	/* collect arguments */
27 	tp += 1;
28 	args = NULL;
29 	if (tp<trp->lp && tp->type==LP && tp->wslen==0) {
30 		/* macro with args */
31 		int narg = 0;
32 		tp += 1;
33 		args = new(Tokenrow);
34 		maketokenrow(2, args);
35 		if (tp->type!=RP) {
36 			int err = 0;
37 			for (;;) {
38 				Token *atp;
39 				if (tp->type!=NAME) {
40 					err++;
41 					break;
42 				}
43 				if (narg>=args->max)
44 					growtokenrow(args);
45 				for (atp=args->bp; atp<args->lp; atp++)
46 					if (atp->len==tp->len
47 					 && strncmp((char*)atp->t, (char*)tp->t, tp->len)==0)
48 						error(ERROR, "Duplicate macro argument");
49 				*args->lp++ = *tp;
50 				narg++;
51 				tp += 1;
52 				if (tp->type==RP)
53 					break;
54 				if (tp->type!=COMMA) {
55 					err++;
56 					break;
57 				}
58 				tp += 1;
59 			}
60 			if (err) {
61 				error(ERROR, "Syntax error in macro parameters");
62 				return;
63 			}
64 		}
65 		tp += 1;
66 	}
67 	trp->tp = tp;
68 	if (((trp->lp)-1)->type==NL)
69 		trp->lp -= 1;
70 	def = normtokenrow(trp);
71 	if (np->flag&ISDEFINED) {
72 		if (comparetokens(def, np->vp)
73 		 || (np->ap==NULL) != (args==NULL)
74 		 || np->ap && comparetokens(args, np->ap))
75 			error(ERROR, "Macro redefinition of %t", trp->bp+2);
76 	}
77 	if (args) {
78 		Tokenrow *tap;
79 		tap = normtokenrow(args);
80 		dofree(args->bp);
81 		args = tap;
82 	}
83 	np->ap = args;
84 	np->vp = def;
85 	np->flag |= ISDEFINED;
86 }
87 
88 /*
89  * Definition received via -D or -U
90  */
91 void
92 doadefine(Tokenrow *trp, int type)
93 {
94 	Nlist *np;
95 	static unsigned char one[] = "1";
96 	static Token onetoken[1] = {{ NUMBER, 0, 0, 0, 1, one }};
97 	static Tokenrow onetr = { onetoken, onetoken, onetoken+1, 1 };
98 
99 	trp->tp = trp->bp;
100 	if (type=='U') {
101 		if (trp->lp-trp->tp != 2 || trp->tp->type!=NAME)
102 			goto syntax;
103 		if ((np = lookup(trp->tp, 0)) == NULL)
104 			return;
105 		np->flag &= ~ISDEFINED;
106 		return;
107 	}
108 	if (trp->tp >= trp->lp || trp->tp->type!=NAME)
109 		goto syntax;
110 	np = lookup(trp->tp, 1);
111 	np->flag |= ISDEFINED;
112 	trp->tp += 1;
113 	if (trp->tp >= trp->lp || trp->tp->type==END) {
114 		np->vp = &onetr;
115 		return;
116 	}
117 	if (trp->tp->type!=ASGN)
118 		goto syntax;
119 	trp->tp += 1;
120 	if ((trp->lp-1)->type == END)
121 		trp->lp -= 1;
122 	np->vp = normtokenrow(trp);
123 	return;
124 syntax:
125 	error(FATAL, "Illegal -D or -U argument %r", trp);
126 }
127 
128 /*
129  * Do macro expansion in a row of tokens.
130  * Flag is NULL if more input can be gathered.
131  */
132 void
133 expandrow(Tokenrow *trp, char *flag)
134 {
135 	Token *tp;
136 	Nlist *np;
137 
138 	if (flag)
139 		setsource(flag, -1, "");
140 	for (tp = trp->tp; tp<trp->lp; ) {
141 		if (tp->type!=NAME
142 		 || quicklook(tp->t[0], tp->len>1?tp->t[1]:0)==0
143 		 || (np = lookup(tp, 0))==NULL
144 		 || (np->flag&(ISDEFINED|ISMAC))==0
145 		 || tp->hideset && checkhideset(tp->hideset, np)) {
146 			tp++;
147 			continue;
148 		}
149 		trp->tp = tp;
150 		if (np->val==KDEFINED) {
151 			tp->type = DEFINED;
152 			if ((tp+1)<trp->lp && (tp+1)->type==NAME)
153 				(tp+1)->type = NAME1;
154 			else if ((tp+3)<trp->lp && (tp+1)->type==LP
155 			 && (tp+2)->type==NAME && (tp+3)->type==RP)
156 				(tp+2)->type = NAME1;
157 			else
158 				error(ERROR, "Incorrect syntax for `defined'");
159 			tp++;
160 			continue;
161 		}
162 		if (np->flag&ISMAC)
163 			builtin(trp, np->val);
164 		else {
165 			expand(trp, np);
166 		}
167 		tp = trp->tp;
168 	}
169 	if (flag)
170 		unsetsource();
171 }
172 
173 /*
174  * Expand the macro whose name is np, at token trp->tp, in the tokenrow.
175  * Return trp->tp at the first token next to be expanded
176  * (ordinarily the beginning of the expansion)
177  */
178 void
179 expand(Tokenrow *trp, Nlist *np)
180 {
181 	Tokenrow ntr;
182 	int ntokc, narg, i;
183 	Token *tp;
184 	Tokenrow *atr[NARG+1];
185 	int hs;
186 
187 	copytokenrow(&ntr, np->vp);		/* copy macro value */
188 	if (np->ap==NULL)			/* parameterless */
189 		ntokc = 1;
190 	else {
191 		ntokc = gatherargs(trp, atr, &narg);
192 		if (narg<0) {			/* not actually a call (no '(') */
193 /* error(WARNING, "%d %r\n", narg, trp); */
194 			/* gatherargs has already pushed trp->tr to the next token */
195 			return;
196 		}
197 		if (narg != rowlen(np->ap)) {
198 			error(ERROR, "Disagreement in number of macro arguments");
199 			trp->tp->hideset = newhideset(trp->tp->hideset, np);
200 			trp->tp += ntokc;
201 			return;
202 		}
203 		substargs(np, &ntr, atr);	/* put args into replacement */
204 		for (i=0; i<narg; i++) {
205 			dofree(atr[i]->bp);
206 			dofree(atr[i]);
207 		}
208 	}
209 	doconcat(&ntr);				/* execute ## operators */
210 	hs = newhideset(trp->tp->hideset, np);
211 	for (tp=ntr.bp; tp<ntr.lp; tp++) {	/* distribute hidesets */
212 		if (tp->type==NAME) {
213 			if (tp->hideset==0)
214 				tp->hideset = hs;
215 			else
216 				tp->hideset = unionhideset(tp->hideset, hs);
217 		}
218 	}
219 	ntr.tp = ntr.bp;
220 	insertrow(trp, ntokc, &ntr);
221 	trp->tp -= rowlen(&ntr);
222 	dofree(ntr.bp);
223 	return;
224 }
225 
226 /*
227  * Gather an arglist, starting in trp with tp pointing at the macro name.
228  * Return total number of tokens passed, stash number of args found.
229  * trp->tp is not changed relative to the tokenrow.
230  */
231 int
232 gatherargs(Tokenrow *trp, Tokenrow **atr, int *narg)
233 {
234 	int parens = 1;
235 	int ntok = 0;
236 	Token *bp, *lp;
237 	Tokenrow ttr;
238 	int ntokp;
239 	int needspace;
240 
241 	*narg = -1;			/* means that there is no macro call */
242 	/* look for the ( */
243 	for (;;) {
244 		trp->tp++;
245 		ntok++;
246 		if (trp->tp >= trp->lp) {
247 			gettokens(trp, 0);
248 			if ((trp->lp-1)->type==END) {
249 /* error(WARNING, "reach END\n"); */
250 				trp->lp -= 1;
251 				if (*narg>=0)
252 					trp->tp -= ntok;
253 				return ntok;
254 			}
255 		}
256 		if (trp->tp->type==LP)
257 			break;
258 		if (trp->tp->type!=NL)
259 			return ntok;
260 	}
261 	*narg = 0;
262 	ntok++;
263 	ntokp = ntok;
264 	trp->tp++;
265 	/* search for the terminating ), possibly extending the row */
266 	needspace = 0;
267 	while (parens>0) {
268 		if (trp->tp >= trp->lp)
269 			gettokens(trp, 0);
270 		if (needspace) {
271 			needspace = 0;
272 			makespace(trp);
273 		}
274 		if (trp->tp->type==END) {
275 			trp->lp -= 1;
276 			trp->tp -= ntok;
277 			error(ERROR, "EOF in macro arglist");
278 			return ntok;
279 		}
280 		if (trp->tp->type==NL) {
281 			trp->tp += 1;
282 			adjustrow(trp, -1);
283 			trp->tp -= 1;
284 			makespace(trp);
285 			needspace = 1;
286 			continue;
287 		}
288 		if (trp->tp->type==LP)
289 			parens++;
290 		else if (trp->tp->type==RP)
291 			parens--;
292 		trp->tp++;
293 		ntok++;
294 	}
295 	trp->tp -= ntok;
296 	/* Now trp->tp won't move underneath us */
297 	lp = bp = trp->tp+ntokp;
298 	for (; parens>=0; lp++) {
299 		if (lp->type == LP) {
300 			parens++;
301 			continue;
302 		}
303 		if (lp->type==RP)
304 			parens--;
305 		if (lp->type==DSHARP)
306 			lp->type = DSHARP1;	/* ## not special in arg */
307 		if (lp->type==COMMA && parens==0 || parens<0 && (lp-1)->type!=LP) {
308 			if (*narg>=NARG-1)
309 				error(FATAL, "Sorry, too many macro arguments");
310 			ttr.bp = ttr.tp = bp;
311 			ttr.lp = lp;
312 			atr[(*narg)++] = normtokenrow(&ttr);
313 			bp = lp+1;
314 		}
315 	}
316 	return ntok;
317 }
318 
319 /*
320  * substitute the argument list into the replacement string
321  *  This would be simple except for ## and #
322  */
323 void
324 substargs(Nlist *np, Tokenrow *rtr, Tokenrow **atr)
325 {
326 	Tokenrow tatr;
327 	Token *tp;
328 	int ntok, argno;
329 
330 	for (rtr->tp=rtr->bp; rtr->tp<rtr->lp; ) {
331 		if (rtr->tp->type==SHARP) {	/* string operator */
332 			tp = rtr->tp;
333 			rtr->tp += 1;
334 			if ((argno = lookuparg(np, rtr->tp))<0) {
335 				error(ERROR, "# not followed by macro parameter");
336 				continue;
337 			}
338 			ntok = 1 + (rtr->tp - tp);
339 			rtr->tp = tp;
340 			insertrow(rtr, ntok, stringify(atr[argno]));
341 			continue;
342 		}
343 		if (rtr->tp->type==NAME
344 		 && (argno = lookuparg(np, rtr->tp)) >= 0) {
345 			if (rtr->tp < rtr->bp)
346 				error(ERROR, "access out of bounds");
347 			if ((rtr->tp+1)->type==DSHARP
348 			 || rtr->tp!=rtr->bp && (rtr->tp-1)->type==DSHARP)
349 				insertrow(rtr, 1, atr[argno]);
350 			else {
351 				copytokenrow(&tatr, atr[argno]);
352 				expandrow(&tatr, "<macro>");
353 				insertrow(rtr, 1, &tatr);
354 				dofree(tatr.bp);
355 			}
356 			continue;
357 		}
358 		rtr->tp++;
359 	}
360 }
361 
362 /*
363  * Evaluate the ## operators in a tokenrow
364  */
365 void
366 doconcat(Tokenrow *trp)
367 {
368 	Token *ltp, *ntp;
369 	Tokenrow ntr;
370 	int len;
371 
372 	for (trp->tp=trp->bp; trp->tp<trp->lp; trp->tp++) {
373 		if (trp->tp->type==DSHARP1)
374 			trp->tp->type = DSHARP;
375 		else if (trp->tp->type==DSHARP) {
376 			char tt[128];
377 			ltp = trp->tp-1;
378 			ntp = trp->tp+1;
379 			if (ltp<trp->bp || ntp>=trp->lp) {
380 				error(ERROR, "## occurs at border of replacement");
381 				continue;
382 			}
383 			len = ltp->len + ntp->len;
384 			strncpy((char*)tt, (char*)ltp->t, ltp->len);
385 			strncpy((char*)tt+ltp->len, (char*)ntp->t, ntp->len);
386 			tt[len] = '\0';
387 			setsource("<##>", -1, tt);
388 			maketokenrow(3, &ntr);
389 			gettokens(&ntr, 1);
390 			unsetsource();
391 			if (ntr.lp-ntr.bp!=2 || ntr.bp->type==UNCLASS)
392 				error(WARNING, "Bad token %r produced by ##", &ntr);
393 			ntr.lp = ntr.bp+1;
394 			trp->tp = ltp;
395 			makespace(&ntr);
396 			insertrow(trp, (ntp-ltp)+1, &ntr);
397 			dofree(ntr.bp);
398 			trp->tp--;
399 		}
400 	}
401 }
402 
403 /*
404  * tp is a potential parameter name of macro mac;
405  * look it up in mac's arglist, and if found, return the
406  * corresponding index in the argname array.  Return -1 if not found.
407  */
408 int
409 lookuparg(Nlist *mac, Token *tp)
410 {
411 	Token *ap;
412 
413 	if (tp->type!=NAME || mac->ap==NULL)
414 		return -1;
415 	for (ap=mac->ap->bp; ap<mac->ap->lp; ap++) {
416 		if (ap->len==tp->len && strncmp((char*)ap->t,(char*)tp->t,ap->len)==0)
417 			return ap - mac->ap->bp;
418 	}
419 	return -1;
420 }
421 
422 /*
423  * Return a quoted version of the tokenrow (from # arg)
424  */
425 #define	STRLEN	512
426 Tokenrow *
427 stringify(Tokenrow *vp)
428 {
429 	static Token t = { STRING };
430 	static Tokenrow tr = { &t, &t, &t+1, 1 };
431 	Token *tp;
432 	uchar s[STRLEN];
433 	uchar *sp = s, *cp;
434 	int i, instring;
435 
436 	*sp++ = '"';
437 	for (tp = vp->bp; tp < vp->lp; tp++) {
438 		instring = tp->type==STRING || tp->type==CCON;
439 		if (sp+2*tp->len >= &s[STRLEN-10]) {
440 			error(ERROR, "Stringified macro arg is too long");
441 			break;
442 		}
443 		if (tp->wslen && (tp->flag&XPWS)==0)
444 			*sp++ = ' ';
445 		for (i=0, cp=tp->t; i<tp->len; i++) {
446 			if (instring && (*cp=='"' || *cp=='\\'))
447 				*sp++ = '\\';
448 			*sp++ = *cp++;
449 		}
450 	}
451 	*sp++ = '"';
452 	*sp = '\0';
453 	sp = s;
454 	t.len = strlen((char*)sp);
455 	t.t = newstring(sp, t.len, 0);
456 	return &tr;
457 }
458 
459 /*
460  * expand a builtin name
461  */
462 void
463 builtin(Tokenrow *trp, int biname)
464 {
465 	char *op;
466 	Token *tp;
467 	Source *s;
468 
469 	tp = trp->tp;
470 	trp->tp++;
471 	/* need to find the real source */
472 	s = cursource;
473 	while (s && s->fd==-1)
474 		s = s->next;
475 	if (s==NULL)
476 		s = cursource;
477 	/* most are strings */
478 	tp->type = STRING;
479 	if (tp->wslen) {
480 		*outp++ = ' ';
481 		tp->wslen = 1;
482 	}
483 	op = outp;
484 	*op++ = '"';
485 	switch (biname) {
486 
487 	case KLINENO:
488 		tp->type = NUMBER;
489 		op = outnum(op-1, s->line);
490 		break;
491 
492 	case KFILE:
493 		strcpy(op, s->filename);
494 		op += strlen(s->filename);
495 		break;
496 
497 	case KDATE:
498 		strncpy(op, curtime+4, 7);
499 		strncpy(op+7, curtime+24, 4); /* Plan 9 asctime disobeys standard */
500 		op += 11;
501 		break;
502 
503 	case KTIME:
504 		strncpy(op, curtime+11, 8);
505 		op += 8;
506 		break;
507 
508 	default:
509 		error(ERROR, "cpp botch: unknown internal macro");
510 		return;
511 	}
512 	if (tp->type==STRING)
513 		*op++ = '"';
514 	tp->t = (uchar*)outp;
515 	tp->len = op - outp;
516 	outp = op;
517 }
518