xref: /minix3/external/mit/expat/dist/xmlwf/ct.c (revision 1230fdc108a70388f87f1b3abdb6731e789a6d94)
1*1230fdc1SLionel Sambuc #define CHARSET_MAX 41
2*1230fdc1SLionel Sambuc 
3*1230fdc1SLionel Sambuc static const char *
getTok(const char ** pp)4*1230fdc1SLionel Sambuc getTok(const char **pp)
5*1230fdc1SLionel Sambuc {
6*1230fdc1SLionel Sambuc   enum { inAtom, inString, init, inComment };
7*1230fdc1SLionel Sambuc   int state = init;
8*1230fdc1SLionel Sambuc   const char *tokStart = 0;
9*1230fdc1SLionel Sambuc   for (;;) {
10*1230fdc1SLionel Sambuc     switch (**pp) {
11*1230fdc1SLionel Sambuc     case '\0':
12*1230fdc1SLionel Sambuc       return 0;
13*1230fdc1SLionel Sambuc     case ' ':
14*1230fdc1SLionel Sambuc     case '\r':
15*1230fdc1SLionel Sambuc     case '\t':
16*1230fdc1SLionel Sambuc     case '\n':
17*1230fdc1SLionel Sambuc       if (state == inAtom)
18*1230fdc1SLionel Sambuc         return tokStart;
19*1230fdc1SLionel Sambuc       break;
20*1230fdc1SLionel Sambuc     case '(':
21*1230fdc1SLionel Sambuc       if (state == inAtom)
22*1230fdc1SLionel Sambuc         return tokStart;
23*1230fdc1SLionel Sambuc       if (state != inString)
24*1230fdc1SLionel Sambuc         state++;
25*1230fdc1SLionel Sambuc       break;
26*1230fdc1SLionel Sambuc     case ')':
27*1230fdc1SLionel Sambuc       if (state > init)
28*1230fdc1SLionel Sambuc         --state;
29*1230fdc1SLionel Sambuc       else if (state != inString)
30*1230fdc1SLionel Sambuc         return 0;
31*1230fdc1SLionel Sambuc       break;
32*1230fdc1SLionel Sambuc     case ';':
33*1230fdc1SLionel Sambuc     case '/':
34*1230fdc1SLionel Sambuc     case '=':
35*1230fdc1SLionel Sambuc       if (state == inAtom)
36*1230fdc1SLionel Sambuc         return tokStart;
37*1230fdc1SLionel Sambuc       if (state == init)
38*1230fdc1SLionel Sambuc         return (*pp)++;
39*1230fdc1SLionel Sambuc       break;
40*1230fdc1SLionel Sambuc     case '\\':
41*1230fdc1SLionel Sambuc       ++*pp;
42*1230fdc1SLionel Sambuc       if (**pp == '\0')
43*1230fdc1SLionel Sambuc         return 0;
44*1230fdc1SLionel Sambuc       break;
45*1230fdc1SLionel Sambuc     case '"':
46*1230fdc1SLionel Sambuc       switch (state) {
47*1230fdc1SLionel Sambuc       case inString:
48*1230fdc1SLionel Sambuc         ++*pp;
49*1230fdc1SLionel Sambuc         return tokStart;
50*1230fdc1SLionel Sambuc       case inAtom:
51*1230fdc1SLionel Sambuc         return tokStart;
52*1230fdc1SLionel Sambuc       case init:
53*1230fdc1SLionel Sambuc         tokStart = *pp;
54*1230fdc1SLionel Sambuc         state = inString;
55*1230fdc1SLionel Sambuc         break;
56*1230fdc1SLionel Sambuc       }
57*1230fdc1SLionel Sambuc       break;
58*1230fdc1SLionel Sambuc     default:
59*1230fdc1SLionel Sambuc       if (state == init) {
60*1230fdc1SLionel Sambuc         tokStart = *pp;
61*1230fdc1SLionel Sambuc         state = inAtom;
62*1230fdc1SLionel Sambuc       }
63*1230fdc1SLionel Sambuc       break;
64*1230fdc1SLionel Sambuc     }
65*1230fdc1SLionel Sambuc     ++*pp;
66*1230fdc1SLionel Sambuc   }
67*1230fdc1SLionel Sambuc   /* not reached */
68*1230fdc1SLionel Sambuc }
69*1230fdc1SLionel Sambuc 
70*1230fdc1SLionel Sambuc /* key must be lowercase ASCII */
71*1230fdc1SLionel Sambuc 
72*1230fdc1SLionel Sambuc static int
matchkey(const char * start,const char * end,const char * key)73*1230fdc1SLionel Sambuc matchkey(const char *start, const char *end, const char *key)
74*1230fdc1SLionel Sambuc {
75*1230fdc1SLionel Sambuc   if (!start)
76*1230fdc1SLionel Sambuc     return 0;
77*1230fdc1SLionel Sambuc   for (; start != end; start++, key++)
78*1230fdc1SLionel Sambuc     if (*start != *key && *start != 'A' + (*key - 'a'))
79*1230fdc1SLionel Sambuc       return 0;
80*1230fdc1SLionel Sambuc   return *key == '\0';
81*1230fdc1SLionel Sambuc }
82*1230fdc1SLionel Sambuc 
83*1230fdc1SLionel Sambuc void
getXMLCharset(const char * buf,char * charset)84*1230fdc1SLionel Sambuc getXMLCharset(const char *buf, char *charset)
85*1230fdc1SLionel Sambuc {
86*1230fdc1SLionel Sambuc   const char *next, *p;
87*1230fdc1SLionel Sambuc 
88*1230fdc1SLionel Sambuc   charset[0] = '\0';
89*1230fdc1SLionel Sambuc   next = buf;
90*1230fdc1SLionel Sambuc   p = getTok(&next);
91*1230fdc1SLionel Sambuc   if (matchkey(p, next, "text"))
92*1230fdc1SLionel Sambuc     strcpy(charset, "us-ascii");
93*1230fdc1SLionel Sambuc   else if (!matchkey(p, next, "application"))
94*1230fdc1SLionel Sambuc     return;
95*1230fdc1SLionel Sambuc   p = getTok(&next);
96*1230fdc1SLionel Sambuc   if (!p || *p != '/')
97*1230fdc1SLionel Sambuc     return;
98*1230fdc1SLionel Sambuc   p = getTok(&next);
99*1230fdc1SLionel Sambuc   if (matchkey(p, next, "xml"))
100*1230fdc1SLionel Sambuc     isXml = 1;
101*1230fdc1SLionel Sambuc   p = getTok(&next);
102*1230fdc1SLionel Sambuc   while (p) {
103*1230fdc1SLionel Sambuc     if (*p == ';') {
104*1230fdc1SLionel Sambuc       p = getTok(&next);
105*1230fdc1SLionel Sambuc       if (matchkey(p, next, "charset")) {
106*1230fdc1SLionel Sambuc         p = getTok(&next);
107*1230fdc1SLionel Sambuc         if (p && *p == '=') {
108*1230fdc1SLionel Sambuc           p = getTok(&next);
109*1230fdc1SLionel Sambuc           if (p) {
110*1230fdc1SLionel Sambuc             char *s = charset;
111*1230fdc1SLionel Sambuc             if (*p == '"') {
112*1230fdc1SLionel Sambuc               while (++p != next - 1) {
113*1230fdc1SLionel Sambuc                 if (*p == '\\')
114*1230fdc1SLionel Sambuc                   ++p;
115*1230fdc1SLionel Sambuc                 if (s == charset + CHARSET_MAX - 1) {
116*1230fdc1SLionel Sambuc                   charset[0] = '\0';
117*1230fdc1SLionel Sambuc                   break;
118*1230fdc1SLionel Sambuc                 }
119*1230fdc1SLionel Sambuc                 *s++ = *p;
120*1230fdc1SLionel Sambuc               }
121*1230fdc1SLionel Sambuc               *s++ = '\0';
122*1230fdc1SLionel Sambuc             }
123*1230fdc1SLionel Sambuc             else {
124*1230fdc1SLionel Sambuc               if (next - p > CHARSET_MAX - 1)
125*1230fdc1SLionel Sambuc                 break;
126*1230fdc1SLionel Sambuc               while (p != next)
127*1230fdc1SLionel Sambuc                 *s++ = *p++;
128*1230fdc1SLionel Sambuc               *s = 0;
129*1230fdc1SLionel Sambuc               break;
130*1230fdc1SLionel Sambuc             }
131*1230fdc1SLionel Sambuc           }
132*1230fdc1SLionel Sambuc         }
133*1230fdc1SLionel Sambuc       }
134*1230fdc1SLionel Sambuc     }
135*1230fdc1SLionel Sambuc   else
136*1230fdc1SLionel Sambuc     p = getTok(&next);
137*1230fdc1SLionel Sambuc   }
138*1230fdc1SLionel Sambuc }
139*1230fdc1SLionel Sambuc 
140*1230fdc1SLionel Sambuc int
main(int argc,char ** argv)141*1230fdc1SLionel Sambuc main(int argc, char **argv)
142*1230fdc1SLionel Sambuc {
143*1230fdc1SLionel Sambuc   char buf[CHARSET_MAX];
144*1230fdc1SLionel Sambuc   getXMLCharset(argv[1], buf);
145*1230fdc1SLionel Sambuc   printf("charset = \"%s\"\n", buf);
146*1230fdc1SLionel Sambuc   return 0;
147*1230fdc1SLionel Sambuc }
148