xref: /inferno-os/lib9/tokenize.c (revision 7ded4a527bdfd0e8b3a9049955f2af89e5f039ee)
137da2899SCharles.Forsyth #include "lib9.h"
237da2899SCharles.Forsyth 
337da2899SCharles.Forsyth static char qsep[] = " \t\r\n";
437da2899SCharles.Forsyth 
537da2899SCharles.Forsyth static char*
qtoken(char * s,char * sep)6*7ded4a52SCharles Forsyth qtoken(char *s, char *sep)
737da2899SCharles.Forsyth {
837da2899SCharles.Forsyth 	int quoting;
937da2899SCharles.Forsyth 	char *t;
1037da2899SCharles.Forsyth 
1137da2899SCharles.Forsyth 	quoting = 0;
1237da2899SCharles.Forsyth 	t = s;	/* s is output string, t is input string */
13*7ded4a52SCharles Forsyth 	while(*t!='\0' && (quoting || utfrune(sep, *t)==nil)){
1437da2899SCharles.Forsyth 		if(*t != '\''){
1537da2899SCharles.Forsyth 			*s++ = *t++;
1637da2899SCharles.Forsyth 			continue;
1737da2899SCharles.Forsyth 		}
1837da2899SCharles.Forsyth 		/* *t is a quote */
1937da2899SCharles.Forsyth 		if(!quoting){
2037da2899SCharles.Forsyth 			quoting = 1;
2137da2899SCharles.Forsyth 			t++;
2237da2899SCharles.Forsyth 			continue;
2337da2899SCharles.Forsyth 		}
2437da2899SCharles.Forsyth 		/* quoting and we're on a quote */
2537da2899SCharles.Forsyth 		if(t[1] != '\''){
2637da2899SCharles.Forsyth 			/* end of quoted section; absorb closing quote */
2737da2899SCharles.Forsyth 			t++;
2837da2899SCharles.Forsyth 			quoting = 0;
2937da2899SCharles.Forsyth 			continue;
3037da2899SCharles.Forsyth 		}
3137da2899SCharles.Forsyth 		/* doubled quote; fold one quote into two */
3237da2899SCharles.Forsyth 		t++;
3337da2899SCharles.Forsyth 		*s++ = *t++;
3437da2899SCharles.Forsyth 	}
3537da2899SCharles.Forsyth 	if(*s != '\0'){
3637da2899SCharles.Forsyth 		*s = '\0';
3737da2899SCharles.Forsyth 		if(t == s)
3837da2899SCharles.Forsyth 			t++;
3937da2899SCharles.Forsyth 	}
4037da2899SCharles.Forsyth 	return t;
4137da2899SCharles.Forsyth }
4237da2899SCharles.Forsyth 
43*7ded4a52SCharles Forsyth static char*
etoken(char * t,char * sep)44*7ded4a52SCharles Forsyth etoken(char *t, char *sep)
45*7ded4a52SCharles Forsyth {
46*7ded4a52SCharles Forsyth 	int quoting;
47*7ded4a52SCharles Forsyth 
48*7ded4a52SCharles Forsyth 	/* move to end of next token */
49*7ded4a52SCharles Forsyth 	quoting = 0;
50*7ded4a52SCharles Forsyth 	while(*t!='\0' && (quoting || utfrune(sep, *t)==nil)){
51*7ded4a52SCharles Forsyth 		if(*t != '\''){
52*7ded4a52SCharles Forsyth 			t++;
53*7ded4a52SCharles Forsyth 			continue;
54*7ded4a52SCharles Forsyth 		}
55*7ded4a52SCharles Forsyth 		/* *t is a quote */
56*7ded4a52SCharles Forsyth 		if(!quoting){
57*7ded4a52SCharles Forsyth 			quoting = 1;
58*7ded4a52SCharles Forsyth 			t++;
59*7ded4a52SCharles Forsyth 			continue;
60*7ded4a52SCharles Forsyth 		}
61*7ded4a52SCharles Forsyth 		/* quoting and we're on a quote */
62*7ded4a52SCharles Forsyth 		if(t[1] != '\''){
63*7ded4a52SCharles Forsyth 			/* end of quoted section; absorb closing quote */
64*7ded4a52SCharles Forsyth 			t++;
65*7ded4a52SCharles Forsyth 			quoting = 0;
66*7ded4a52SCharles Forsyth 			continue;
67*7ded4a52SCharles Forsyth 		}
68*7ded4a52SCharles Forsyth 		/* doubled quote; fold one quote into two */
69*7ded4a52SCharles Forsyth 		t += 2;
70*7ded4a52SCharles Forsyth 	}
71*7ded4a52SCharles Forsyth 	return t;
72*7ded4a52SCharles Forsyth }
73*7ded4a52SCharles Forsyth 
74*7ded4a52SCharles Forsyth int
gettokens(char * s,char ** args,int maxargs,char * sep)75*7ded4a52SCharles Forsyth gettokens(char *s, char **args, int maxargs, char *sep)
76*7ded4a52SCharles Forsyth {
77*7ded4a52SCharles Forsyth 	int nargs;
78*7ded4a52SCharles Forsyth 
79*7ded4a52SCharles Forsyth 	for(nargs=0; nargs<maxargs; nargs++){
80*7ded4a52SCharles Forsyth 		while(*s!='\0' && utfrune(sep, *s)!=nil)
81*7ded4a52SCharles Forsyth 			*s++ = '\0';
82*7ded4a52SCharles Forsyth 		if(*s == '\0')
83*7ded4a52SCharles Forsyth 			break;
84*7ded4a52SCharles Forsyth 		args[nargs] = s;
85*7ded4a52SCharles Forsyth 		s = etoken(s, sep);
86*7ded4a52SCharles Forsyth 	}
87*7ded4a52SCharles Forsyth 
88*7ded4a52SCharles Forsyth 	return nargs;
89*7ded4a52SCharles Forsyth }
90*7ded4a52SCharles Forsyth 
9137da2899SCharles.Forsyth int
tokenize(char * s,char ** args,int maxargs)9237da2899SCharles.Forsyth tokenize(char *s, char **args, int maxargs)
9337da2899SCharles.Forsyth {
9437da2899SCharles.Forsyth 	int nargs;
9537da2899SCharles.Forsyth 
9637da2899SCharles.Forsyth 	for(nargs=0; nargs<maxargs; nargs++){
9737da2899SCharles.Forsyth 		while(*s!='\0' && utfrune(qsep, *s)!=nil)
9837da2899SCharles.Forsyth 			s++;
9937da2899SCharles.Forsyth 		if(*s == '\0')
10037da2899SCharles.Forsyth 			break;
10137da2899SCharles.Forsyth 		args[nargs] = s;
102*7ded4a52SCharles Forsyth 		s = qtoken(s, qsep);
10337da2899SCharles.Forsyth 	}
10437da2899SCharles.Forsyth 
10537da2899SCharles.Forsyth 	return nargs;
10637da2899SCharles.Forsyth }
107