xref: /inferno-os/libkern/tokenize.c (revision 3cd4f1d15146c08f05206d6328ecbc1c7fdc8dfa)
137da2899SCharles.Forsyth #include "lib9.h"
237da2899SCharles.Forsyth 
337da2899SCharles.Forsyth static char qsep[] = " \t\r\n";
437da2899SCharles.Forsyth 
537da2899SCharles.Forsyth static char*
qtoken(char * s,char * sep)6*3cd4f1d1SCharles Forsyth qtoken(char *s, char *sep)
737da2899SCharles.Forsyth {
837da2899SCharles.Forsyth 	int quoting;
937da2899SCharles.Forsyth 	char *t;
1037da2899SCharles.Forsyth 
1137da2899SCharles.Forsyth 	quoting = 0;
1237da2899SCharles.Forsyth 	t = s;	/* s is output string, t is input string */
13*3cd4f1d1SCharles Forsyth 	while(*t!='\0' && (quoting || utfrune(sep, *t)==nil)){
1437da2899SCharles.Forsyth 		if(*t != '\''){
1537da2899SCharles.Forsyth 			*s++ = *t++;
1637da2899SCharles.Forsyth 			continue;
1737da2899SCharles.Forsyth 		}
1837da2899SCharles.Forsyth 		/* *t is a quote */
1937da2899SCharles.Forsyth 		if(!quoting){
2037da2899SCharles.Forsyth 			quoting = 1;
2137da2899SCharles.Forsyth 			t++;
2237da2899SCharles.Forsyth 			continue;
2337da2899SCharles.Forsyth 		}
2437da2899SCharles.Forsyth 		/* quoting and we're on a quote */
2537da2899SCharles.Forsyth 		if(t[1] != '\''){
2637da2899SCharles.Forsyth 			/* end of quoted section; absorb closing quote */
2737da2899SCharles.Forsyth 			t++;
2837da2899SCharles.Forsyth 			quoting = 0;
2937da2899SCharles.Forsyth 			continue;
3037da2899SCharles.Forsyth 		}
3137da2899SCharles.Forsyth 		/* doubled quote; fold one quote into two */
3237da2899SCharles.Forsyth 		t++;
3337da2899SCharles.Forsyth 		*s++ = *t++;
3437da2899SCharles.Forsyth 	}
3537da2899SCharles.Forsyth 	if(*s != '\0'){
3637da2899SCharles.Forsyth 		*s = '\0';
3737da2899SCharles.Forsyth 		if(t == s)
3837da2899SCharles.Forsyth 			t++;
3937da2899SCharles.Forsyth 	}
4037da2899SCharles.Forsyth 	return t;
4137da2899SCharles.Forsyth }
4237da2899SCharles.Forsyth 
43*3cd4f1d1SCharles Forsyth static char*
etoken(char * t,char * sep)44*3cd4f1d1SCharles Forsyth etoken(char *t, char *sep)
45*3cd4f1d1SCharles Forsyth {
46*3cd4f1d1SCharles Forsyth 	int quoting;
47*3cd4f1d1SCharles Forsyth 
48*3cd4f1d1SCharles Forsyth 	/* move to end of next token */
49*3cd4f1d1SCharles Forsyth 	quoting = 0;
50*3cd4f1d1SCharles Forsyth 	while(*t!='\0' && (quoting || utfrune(sep, *t)==nil)){
51*3cd4f1d1SCharles Forsyth 		if(*t != '\''){
52*3cd4f1d1SCharles Forsyth 			t++;
53*3cd4f1d1SCharles Forsyth 			continue;
54*3cd4f1d1SCharles Forsyth 		}
55*3cd4f1d1SCharles Forsyth 		/* *t is a quote */
56*3cd4f1d1SCharles Forsyth 		if(!quoting){
57*3cd4f1d1SCharles Forsyth 			quoting = 1;
58*3cd4f1d1SCharles Forsyth 			t++;
59*3cd4f1d1SCharles Forsyth 			continue;
60*3cd4f1d1SCharles Forsyth 		}
61*3cd4f1d1SCharles Forsyth 		/* quoting and we're on a quote */
62*3cd4f1d1SCharles Forsyth 		if(t[1] != '\''){
63*3cd4f1d1SCharles Forsyth 			/* end of quoted section; absorb closing quote */
64*3cd4f1d1SCharles Forsyth 			t++;
65*3cd4f1d1SCharles Forsyth 			quoting = 0;
66*3cd4f1d1SCharles Forsyth 			continue;
67*3cd4f1d1SCharles Forsyth 		}
68*3cd4f1d1SCharles Forsyth 		/* doubled quote; fold one quote into two */
69*3cd4f1d1SCharles Forsyth 		t += 2;
70*3cd4f1d1SCharles Forsyth 	}
71*3cd4f1d1SCharles Forsyth 	return t;
72*3cd4f1d1SCharles Forsyth }
73*3cd4f1d1SCharles Forsyth 
74*3cd4f1d1SCharles Forsyth int
gettokens(char * s,char ** args,int maxargs,char * sep)75*3cd4f1d1SCharles Forsyth gettokens(char *s, char **args, int maxargs, char *sep)
76*3cd4f1d1SCharles Forsyth {
77*3cd4f1d1SCharles Forsyth 	int nargs;
78*3cd4f1d1SCharles Forsyth 
79*3cd4f1d1SCharles Forsyth 	for(nargs=0; nargs<maxargs; nargs++){
80*3cd4f1d1SCharles Forsyth 		while(*s!='\0' && utfrune(sep, *s)!=nil)
81*3cd4f1d1SCharles Forsyth 			*s++ = '\0';
82*3cd4f1d1SCharles Forsyth 		if(*s == '\0')
83*3cd4f1d1SCharles Forsyth 			break;
84*3cd4f1d1SCharles Forsyth 		args[nargs] = s;
85*3cd4f1d1SCharles Forsyth 		s = etoken(s, sep);
86*3cd4f1d1SCharles Forsyth 	}
87*3cd4f1d1SCharles Forsyth 
88*3cd4f1d1SCharles Forsyth 	return nargs;
89*3cd4f1d1SCharles Forsyth }
90*3cd4f1d1SCharles Forsyth 
9137da2899SCharles.Forsyth int
tokenize(char * s,char ** args,int maxargs)9237da2899SCharles.Forsyth tokenize(char *s, char **args, int maxargs)
9337da2899SCharles.Forsyth {
9437da2899SCharles.Forsyth 	int nargs;
9537da2899SCharles.Forsyth 
9637da2899SCharles.Forsyth 	for(nargs=0; nargs<maxargs; nargs++){
9737da2899SCharles.Forsyth 		while(*s!='\0' && utfrune(qsep, *s)!=nil)
9837da2899SCharles.Forsyth 			s++;
9937da2899SCharles.Forsyth 		if(*s == '\0')
10037da2899SCharles.Forsyth 			break;
10137da2899SCharles.Forsyth 		args[nargs] = s;
102*3cd4f1d1SCharles Forsyth 		s = qtoken(s, qsep);
10337da2899SCharles.Forsyth 	}
10437da2899SCharles.Forsyth 
10537da2899SCharles.Forsyth 	return nargs;
10637da2899SCharles.Forsyth }
107