xref: /csrg-svn/local/toolchest/ksh/sh/word.c (revision 35175)
1*35175Smarc /* @(#)word.c	1.1 */
2*35175Smarc /*
3*35175Smarc  * UNIX shell
4*35175Smarc  *
5*35175Smarc  * S. R. Bourne
6*35175Smarc  * Rewritten by David Korn
7*35175Smarc  * AT&T Bell Laboratories
8*35175Smarc  *
9*35175Smarc  */
10*35175Smarc 
11*35175Smarc #include	"flags.h"
12*35175Smarc #include	"defs.h"
13*35175Smarc #include	"io.h"
14*35175Smarc #include	"history.h"
15*35175Smarc #include	"stak.h"
16*35175Smarc #include	"sym.h"
17*35175Smarc #include	"shtype.h"
18*35175Smarc #include	"brkincr.h"
19*35175Smarc #include	"name.h"
20*35175Smarc #include	"builtins.h"
21*35175Smarc #ifdef JOBS
22*35175Smarc #ifdef BSD
23*35175Smarc #include	<signal.h>
24*35175Smarc static jmp_buf readerr;
25*35175Smarc static int nreadc;
26*35175Smarc #endif	/* BSD */
27*35175Smarc #endif	/* JOBS */
28*35175Smarc 
29*35175Smarc 
30*35175Smarc 
31*35175Smarc static int letflg = 0;
32*35175Smarc static FILEBLK a_fb;
33*35175Smarc static FILE a_fd;
34*35175Smarc 
35*35175Smarc /* This module defines the following routines */
36*35175Smarc char	*match_paren();
37*35175Smarc int	nextc();
38*35175Smarc int	readc();
39*35175Smarc int	word();
40*35175Smarc 
41*35175Smarc /* This module references these external routines */
42*35175Smarc extern void	arg_clear();
43*35175Smarc extern void	chktrap();
44*35175Smarc extern void	chkpr();
45*35175Smarc extern void	exitsh();
46*35175Smarc extern NAMPTR	findnod();
47*35175Smarc extern long	lseek();
48*35175Smarc extern char	*movstr();
49*35175Smarc extern void	synbad();
50*35175Smarc extern char	*tilde();
51*35175Smarc extern char	*valup();
52*35175Smarc 
53*35175Smarc /* ========	character handling for command lines	========*/
54*35175Smarc 
55*35175Smarc /*
56*35175Smarc  * Get the next word and put it on the top of the stak
57*35175Smarc  * Determine the type of word and set wdnum and wdset accordingly
58*35175Smarc  * Returns the token type
59*35175Smarc  */
60*35175Smarc 
word()61*35175Smarc word()
62*35175Smarc {
63*35175Smarc 	register int c;
64*35175Smarc 	register int d;
65*35175Smarc 	register char *argp;
66*35175Smarc 	register char *tildp;
67*35175Smarc 	char not_alias;
68*35175Smarc 	char chk_keywd;
69*35175Smarc 	int 	alpha = 0;
70*35175Smarc 	wdnum=0;
71*35175Smarc 	/* condition needed to check for keywords, name=value */
72*35175Smarc 	chk_keywd = reserv!=0 || (wdset&KEYFLG);
73*35175Smarc 	wdset &= ~KEYFLG;
74*35175Smarc 	wdarg = (ARGPTR)locstak();
75*35175Smarc 	argp = wdarg->argval;
76*35175Smarc 	if(letflg)
77*35175Smarc 	{
78*35175Smarc 		letflg = 0;
79*35175Smarc 		*argp++ =(DQUOTE);
80*35175Smarc 		argp = match_paren(argp, LPAREN, RPAREN, 1);
81*35175Smarc 		*(argp-1)=(DQUOTE);
82*35175Smarc 		c = nextc();
83*35175Smarc 		wdval = 0;
84*35175Smarc 		if(c != ')')
85*35175Smarc 		{
86*35175Smarc 			stakbot = wdarg->argval;
87*35175Smarc 			cpystak(let_syntax);
88*35175Smarc 			synbad();
89*35175Smarc 		}
90*35175Smarc 		endstak(--argp);
91*35175Smarc 		return(0);
92*35175Smarc 	}
93*35175Smarc 	tildp = NULL;
94*35175Smarc 	while(1)
95*35175Smarc 	{
96*35175Smarc 		while((c=nextc(), isspace(c)));
97*35175Smarc 		if(c==COMCHAR)
98*35175Smarc 		{
99*35175Smarc 			while((c=readc()) != NL && c != ENDOF);
100*35175Smarc 			peekc=c;
101*35175Smarc 		}
102*35175Smarc 		else	 /* out of comment - white isspace loop */
103*35175Smarc 			break;
104*35175Smarc 	}
105*35175Smarc 	if(c=='~')
106*35175Smarc 		tildp = argp;
107*35175Smarc 	not_alias = (aliflg==0);
108*35175Smarc 	if(!ismeta(c))
109*35175Smarc 	{
110*35175Smarc 		do
111*35175Smarc 		{
112*35175Smarc 			if(c==LITERAL)
113*35175Smarc 				argp = match_paren(argp,c,c,0);
114*35175Smarc 			else
115*35175Smarc 			{
116*35175Smarc 				if(argp==wdarg->argval&&chk_keywd&&isalpha(c))
117*35175Smarc 				{
118*35175Smarc 					alpha++;
119*35175Smarc 				}
120*35175Smarc 				*argp++=(c);
121*35175Smarc 				if(c == ESCAPE)
122*35175Smarc 					*argp++ = readc();
123*35175Smarc 				if(alpha)
124*35175Smarc 				{
125*35175Smarc 					if(c == '[')
126*35175Smarc 					{
127*35175Smarc 						argp = match_paren(argp,'[',']',-1);
128*35175Smarc 					}
129*35175Smarc 					else if(c=='=')
130*35175Smarc 					{
131*35175Smarc 						wdset |= KEYFLG;
132*35175Smarc 						tildp = argp;
133*35175Smarc 						alpha = 0;
134*35175Smarc 					}
135*35175Smarc 					else if(!isalnum(c))
136*35175Smarc 						alpha = 0;
137*35175Smarc 				}
138*35175Smarc 				if(qotchar(c))
139*35175Smarc 				{
140*35175Smarc 					argp = match_paren(argp,c,c,0);
141*35175Smarc 				}
142*35175Smarc 			}
143*35175Smarc 			d = c;
144*35175Smarc 			c = nextc();
145*35175Smarc 			if(d==DOLLAR && c ==LPAREN)
146*35175Smarc 			{
147*35175Smarc 				subflag++;
148*35175Smarc 				*argp++ = c;
149*35175Smarc 				argp = match_paren(argp, LPAREN, RPAREN, 0);
150*35175Smarc 				c = nextc();
151*35175Smarc 			}
152*35175Smarc 			else if(tildp!=NULL &&  (c == '/'  || c==':' || ismeta(c)))
153*35175Smarc 			{
154*35175Smarc 				/* check for tilde expansion */
155*35175Smarc 				register char *dir;
156*35175Smarc 				*argp = 0;
157*35175Smarc 				staktop = argp;
158*35175Smarc 				dir=tilde(tildp);
159*35175Smarc 				/* This check needed if tilde() uses malloc() */
160*35175Smarc #ifndef INT16
161*35175Smarc 				if(stakbot != (STKPTR)wdarg)
162*35175Smarc 				{
163*35175Smarc 					tildp += ((char*)staktop-argp);
164*35175Smarc 					argp = staktop;
165*35175Smarc 					wdarg = (ARGPTR)stakbot;
166*35175Smarc 				}
167*35175Smarc #endif /*INT16 */
168*35175Smarc 				if(dir)
169*35175Smarc 				{
170*35175Smarc 					argp=tildp;
171*35175Smarc 					argp = movstr(dir,argp);
172*35175Smarc 				}
173*35175Smarc 				else
174*35175Smarc 					tildp = NULL;
175*35175Smarc 			}
176*35175Smarc 			if(c==':' && (wdset&KEYFLG))
177*35175Smarc 				tildp = argp+1;
178*35175Smarc 		}
179*35175Smarc 		while(!ismeta(c));
180*35175Smarc 		argp=endstak(argp);
181*35175Smarc 		peekn=c|MARK;
182*35175Smarc 		if(((ARGPTR) argp)->argval[1]==0 &&
183*35175Smarc 			(d=((ARGPTR) argp)->argval[0], isdigit(d)) && (c=='>' || c=='<'))
184*35175Smarc 		{
185*35175Smarc 			word();
186*35175Smarc 			wdnum=d-'0';
187*35175Smarc 		}
188*35175Smarc 		else
189*35175Smarc 		{
190*35175Smarc 			/*check for reserved words and aliases */
191*35175Smarc 			wdval = (reserv!=0?syslook(((ARGPTR)argp)->argval,reserved):0);
192*35175Smarc 			/* for unity database software, allow select to be aliased */
193*35175Smarc 			if((reserv!=0 && (wdval==0||wdval==SELSYM)) || (wdset&S_FLAG))
194*35175Smarc 			{
195*35175Smarc 				/* check for aliases */
196*35175Smarc 				NAMPTR np;
197*35175Smarc 				char *alp = ((ARGPTR)argp)->argval;
198*35175Smarc 				if(not_alias && (wdset&(E_FLAG|KEYFLG))==0 &&
199*35175Smarc 					(np=findnod(alp,alias,CHK_FOR)))
200*35175Smarc 				{
201*35175Smarc 					if(attest(np,T_FLAG)==0 && (alp=valup(np)))
202*35175Smarc 					{
203*35175Smarc 						wdval = 0;
204*35175Smarc 						c = standin->flin;
205*35175Smarc 						push(&a_fb);
206*35175Smarc 						estabf(alp,&a_fd);
207*35175Smarc 						a_fb.flin = c;
208*35175Smarc 						aliflg = peekn;
209*35175Smarc 						peekn = 0;
210*35175Smarc 						wdset |= KEYFLG;
211*35175Smarc 						c = word();
212*35175Smarc 						return(c);
213*35175Smarc 					}
214*35175Smarc 				}
215*35175Smarc 			}
216*35175Smarc 		}
217*35175Smarc 	}
218*35175Smarc 	else if(dipchar(c))
219*35175Smarc 	{
220*35175Smarc 		if((d=nextc())==c)
221*35175Smarc 		{
222*35175Smarc 			wdval = c|SYMREP;
223*35175Smarc 			if(c=='<')
224*35175Smarc 			{
225*35175Smarc 				if((d=nextc())=='-')
226*35175Smarc 					wdnum |= IOSTRIP;
227*35175Smarc 				else
228*35175Smarc 					 peekn = d|MARK;
229*35175Smarc 			}
230*35175Smarc 			/* arithmetic evaluation ((expr)) */
231*35175Smarc 			else if(c == LPAREN && reserv != 0)
232*35175Smarc 			{
233*35175Smarc 				wdval = 0;
234*35175Smarc 				letflg = 1;
235*35175Smarc 				argp = endstak(movstr(blet,argp));
236*35175Smarc 			}
237*35175Smarc 		}
238*35175Smarc 		else if(c=='|' && d=='&')
239*35175Smarc 			wdval = COOPSYM;
240*35175Smarc #ifdef DEVFD
241*35175Smarc 		else if(d==LPAREN && (c=='<'||c == '>'))
242*35175Smarc 			wdval = (c=='>'?OPROC:IPROC);
243*35175Smarc #endif	/* DEVFD */
244*35175Smarc 		else
245*35175Smarc 		{
246*35175Smarc 			peekn = d|MARK;
247*35175Smarc 			wdval = c;
248*35175Smarc 		}
249*35175Smarc 	}
250*35175Smarc 	else
251*35175Smarc 	{
252*35175Smarc 		if((wdval=c)==ENDOF)
253*35175Smarc 		{
254*35175Smarc 			wdval=EOFSYM;
255*35175Smarc 		}
256*35175Smarc 		if(iopend && eolchar(c))
257*35175Smarc 		{
258*35175Smarc 			copy(iopend);
259*35175Smarc 			iopend=0;
260*35175Smarc 		}
261*35175Smarc 	}
262*35175Smarc 	reserv=0;
263*35175Smarc 	return(wdval);
264*35175Smarc }
265*35175Smarc 
266*35175Smarc /*
267*35175Smarc  * skip until matching <closed>
268*35175Smarc  * if flag > 0, then newlines and spaces are removed
269*35175Smarc  * if flag < 0, then each newline cause syntax errors
270*35175Smarc  */
271*35175Smarc 
match_paren(argp,open,close,flag)272*35175Smarc char *match_paren(argp,open,close,flag)
273*35175Smarc register char *argp;
274*35175Smarc register int open;
275*35175Smarc {
276*35175Smarc 	register int c;
277*35175Smarc 	register int count = 1;
278*35175Smarc 	register int quoted = 0;
279*35175Smarc 	int was_dollar=0;
280*35175Smarc 	char *oldargp = argp;
281*35175Smarc 	int line = standin->flin;
282*35175Smarc 	while(count)
283*35175Smarc 	{
284*35175Smarc 		/* check for unmatched <open> */
285*35175Smarc 		if((c=(open==LITERAL?readc():nextc()))==0)
286*35175Smarc 		{
287*35175Smarc 			/* eof before matching quote */
288*35175Smarc 			/* This keeps old shell scripts running */
289*35175Smarc 			if(fileno(input) == F_STRING)
290*35175Smarc 				break;
291*35175Smarc 			standin->flin = line;
292*35175Smarc 			wdval = open|EOFSYM;
293*35175Smarc 			synbad();
294*35175Smarc 		}
295*35175Smarc 		if(c == NL)
296*35175Smarc 		{
297*35175Smarc 			if(flag<0)
298*35175Smarc 				break;
299*35175Smarc 			chkpr(0);
300*35175Smarc 			if(flag)
301*35175Smarc 				continue;
302*35175Smarc 		}
303*35175Smarc 		else if(c == close)
304*35175Smarc 		{
305*35175Smarc 			if(!quoted)
306*35175Smarc 				count--;
307*35175Smarc 		}
308*35175Smarc 		else if(c == open && !quoted)
309*35175Smarc 			count++;
310*35175Smarc 		if(flag<=0 || c != SP )
311*35175Smarc 		{
312*35175Smarc 			if(open==LITERAL)
313*35175Smarc 				*argp++ = ESCAPE;
314*35175Smarc 			if(argp >= (char*)brkend)
315*35175Smarc 				setbrk(BRKINCR);
316*35175Smarc 			*argp++ = c;
317*35175Smarc 			if(open==LITERAL)
318*35175Smarc 				continue;
319*35175Smarc 		}
320*35175Smarc 		if(!quoted && flag==0)
321*35175Smarc 		{
322*35175Smarc 			/* check for nested '', "", and `` within $() */
323*35175Smarc 			if(open!=close)
324*35175Smarc 			{
325*35175Smarc 				if(c==LITERAL)
326*35175Smarc 					argp--;
327*35175Smarc 				else if(!qotchar(c))
328*35175Smarc 					goto skip;
329*35175Smarc 				argp = match_paren(argp,c,c,0);
330*35175Smarc 			}
331*35175Smarc 			/* check for $() within '', "", and `` */
332*35175Smarc 			else if(was_dollar && c==LPAREN)
333*35175Smarc 			{
334*35175Smarc 				argp = match_paren(argp,LPAREN,RPAREN,0);
335*35175Smarc 			}
336*35175Smarc 		skip:
337*35175Smarc 			was_dollar = (c==DOLLAR);
338*35175Smarc 		}
339*35175Smarc 		if(c == ESCAPE)
340*35175Smarc 			quoted = 1 - quoted;
341*35175Smarc 		else
342*35175Smarc 			quoted = 0;
343*35175Smarc 	}
344*35175Smarc 	if(open==LITERAL)
345*35175Smarc 	{
346*35175Smarc 		argp -= 2;
347*35175Smarc 		if(argp==oldargp)
348*35175Smarc 		{
349*35175Smarc 			/* handle null argument specially */
350*35175Smarc 			*argp++ = '"';
351*35175Smarc 			*argp++ = '"';
352*35175Smarc 		}
353*35175Smarc 	}
354*35175Smarc 	return(argp);
355*35175Smarc }
356*35175Smarc 
357*35175Smarc /*
358*35175Smarc  * If quote is equal to zero then
359*35175Smarc  * this routine returns the next input character but strips shell
360*35175Smarc  * line continuations and issues prompts at end of line
361*35175Smarc  * Otherwise this routine is the same as readc()
362*35175Smarc  */
363*35175Smarc 
nextc()364*35175Smarc nextc()
365*35175Smarc {
366*35175Smarc 	register int c, d;
367*35175Smarc 	static int oldd;
368*35175Smarc retry:
369*35175Smarc 	d = readc();
370*35175Smarc 	if(d==ESCAPE && oldd!=ESCAPE)
371*35175Smarc 	{
372*35175Smarc 		if((c=readc())==NL)
373*35175Smarc 		{
374*35175Smarc 			chkpr(0);
375*35175Smarc 			goto retry;
376*35175Smarc 		}
377*35175Smarc 		peekc = c|MARK;
378*35175Smarc 	}
379*35175Smarc 	oldd = d;
380*35175Smarc 	return(d);
381*35175Smarc }
382*35175Smarc 
readc()383*35175Smarc readc()
384*35175Smarc {
385*35175Smarc 	register int c;
386*35175Smarc 	register SHFILE	f = standin;
387*35175Smarc 	register FILE *fd = input;
388*35175Smarc 	int maxtry = 20;
389*35175Smarc 	if(staktop >= brkend)
390*35175Smarc 		setbrk(BRKINCR);
391*35175Smarc 	if(peekn)
392*35175Smarc 	{
393*35175Smarc 		c = peekn&~MARK;
394*35175Smarc 		peekn = 0;
395*35175Smarc 		return(c);
396*35175Smarc 	}
397*35175Smarc 	if(peekc)
398*35175Smarc 	{
399*35175Smarc 		c = peekc&~MARK;
400*35175Smarc 		peekc = 0;
401*35175Smarc 		return(c);
402*35175Smarc 	}
403*35175Smarc retry:
404*35175Smarc #ifdef JOBS
405*35175Smarc #ifdef BSD
406*35175Smarc 	if(states&READC)
407*35175Smarc 		nreadc++;
408*35175Smarc 	else
409*35175Smarc 	{
410*35175Smarc 		nreadc = 1;
411*35175Smarc 		states |= READC;
412*35175Smarc 	}
413*35175Smarc 	/* this is needed to implement Bourne shell semantics of traps */
414*35175Smarc 	/* reads automatically restart with jobs library */
415*35175Smarc 	if(fd->_cnt==0 && setjmp(readerr))
416*35175Smarc 		goto trapfound;
417*35175Smarc #endif	/* BSD */
418*35175Smarc #endif	/* JOBS */
419*35175Smarc 	if((c=getc(fd)) != EOF)
420*35175Smarc 	{
421*35175Smarc 		if(c==0)
422*35175Smarc 		{
423*35175Smarc 			if(f->feval && estabf(*f->feval++,fd)==0)
424*35175Smarc 				c = SP;
425*35175Smarc 			/* treat the NULL byte as eof for TMPIO */
426*35175Smarc 			else if(fileno(fd) == TMPIO)
427*35175Smarc 			{
428*35175Smarc 				setbuf(fd,NIL);
429*35175Smarc 				lseek(TMPIO,0L,0);
430*35175Smarc 			}
431*35175Smarc 			/* skip over null bytes in files */
432*35175Smarc 			else if(fileno(fd) !=  F_STRING)
433*35175Smarc 				goto retry;
434*35175Smarc 			else if(aliflg)
435*35175Smarc 			{
436*35175Smarc 				c = (aliflg&~MARK);
437*35175Smarc 				aliflg = 0;
438*35175Smarc 				wdset |= S_FLAG;
439*35175Smarc 				pop(1);
440*35175Smarc 			}
441*35175Smarc 			else
442*35175Smarc 			/* end-of-string is end-of-file */
443*35175Smarc 			{
444*35175Smarc 				f->feval = 0;
445*35175Smarc 				estabf(nullstr,fd);
446*35175Smarc 				fd->_flag |= _IOEOF;
447*35175Smarc 			}
448*35175Smarc 		}
449*35175Smarc 		if((f->fstak==0  || (states&FIXFLG)) && c != 0)
450*35175Smarc 		{
451*35175Smarc 			if((states&READPR) && aliflg==0)
452*35175Smarc 				 putc(c,output);
453*35175Smarc 			if((states&(FIXFLG)) && fileno(fd)!=F_STRING)
454*35175Smarc 				putc(c,fc_fix->fixfd);
455*35175Smarc 		}
456*35175Smarc 		if(c==NL)
457*35175Smarc 			f->flin++;
458*35175Smarc 	}
459*35175Smarc 	else if(feof(fd))
460*35175Smarc 	{
461*35175Smarc 		fd->_flag |= _IOEOF;
462*35175Smarc 		c = ENDOF;
463*35175Smarc 	}
464*35175Smarc 	else
465*35175Smarc 	{
466*35175Smarc 		clearerr(fd);
467*35175Smarc 		if(trapnote&SIGSET)
468*35175Smarc 		{
469*35175Smarc 			newline();
470*35175Smarc 			exitsh(SIGFAIL);
471*35175Smarc 		}
472*35175Smarc 		else if((trapnote&TRAPSET) && (states&RWAIT))
473*35175Smarc 		{
474*35175Smarc 		trapfound:
475*35175Smarc 			newline();
476*35175Smarc 			chktrap();
477*35175Smarc 			arg_clear();
478*35175Smarc 		}
479*35175Smarc 		else if(--maxtry > 0)
480*35175Smarc 			goto retry;
481*35175Smarc 		else
482*35175Smarc 			fd->_flag |= _IOERR;
483*35175Smarc 		c = ENDOF;
484*35175Smarc 	}
485*35175Smarc #ifdef JOBS
486*35175Smarc #ifdef BSD
487*35175Smarc 	if(--nreadc <=0)
488*35175Smarc 		states &= ~READC;
489*35175Smarc #endif	/* BSD */
490*35175Smarc #endif	/* JOBS */
491*35175Smarc 	return(c);
492*35175Smarc }
493*35175Smarc 
494*35175Smarc #ifdef JOBS
495*35175Smarc #ifdef BSD
496*35175Smarc /*
497*35175Smarc  * This routine is here because signals behave differently with sigset
498*35175Smarc  */
499*35175Smarc 
interrupt()500*35175Smarc interrupt()
501*35175Smarc {
502*35175Smarc 	register FILE *fd = input;
503*35175Smarc 	clearerr(fd);
504*35175Smarc 	if(trapnote&SIGSET)
505*35175Smarc 	{
506*35175Smarc 		newline();
507*35175Smarc 		trapnote = 0;
508*35175Smarc 		exitsh(SIGFAIL);
509*35175Smarc 	}
510*35175Smarc 	else if((trapnote&TRAPSET) && (states&RWAIT))
511*35175Smarc 		longjmp(readerr,1);
512*35175Smarc }
513*35175Smarc 
514*35175Smarc #endif	/* BSD */
515*35175Smarc #endif	/* JOBS */
516*35175Smarc 
517