xref: /minix3/external/mit/expat/dist/xmlwf/xmlmime.c (revision 1230fdc108a70388f87f1b3abdb6731e789a6d94)
1*1230fdc1SLionel Sambuc #include <string.h>
2*1230fdc1SLionel Sambuc #include "xmlmime.h"
3*1230fdc1SLionel Sambuc 
4*1230fdc1SLionel Sambuc static const char *
getTok(const char ** pp)5*1230fdc1SLionel Sambuc getTok(const char **pp)
6*1230fdc1SLionel Sambuc {
7*1230fdc1SLionel Sambuc   /* inComment means one level of nesting; inComment+1 means two levels etc */
8*1230fdc1SLionel Sambuc   enum { inAtom, inString, init, inComment };
9*1230fdc1SLionel Sambuc   int state = init;
10*1230fdc1SLionel Sambuc   const char *tokStart = 0;
11*1230fdc1SLionel Sambuc   for (;;) {
12*1230fdc1SLionel Sambuc     switch (**pp) {
13*1230fdc1SLionel Sambuc     case '\0':
14*1230fdc1SLionel Sambuc       if (state == inAtom)
15*1230fdc1SLionel Sambuc         return tokStart;
16*1230fdc1SLionel Sambuc       return 0;
17*1230fdc1SLionel Sambuc     case ' ':
18*1230fdc1SLionel Sambuc     case '\r':
19*1230fdc1SLionel Sambuc     case '\t':
20*1230fdc1SLionel Sambuc     case '\n':
21*1230fdc1SLionel Sambuc       if (state == inAtom)
22*1230fdc1SLionel Sambuc         return tokStart;
23*1230fdc1SLionel Sambuc       break;
24*1230fdc1SLionel Sambuc     case '(':
25*1230fdc1SLionel Sambuc       if (state == inAtom)
26*1230fdc1SLionel Sambuc         return tokStart;
27*1230fdc1SLionel Sambuc       if (state != inString)
28*1230fdc1SLionel Sambuc         state++;
29*1230fdc1SLionel Sambuc       break;
30*1230fdc1SLionel Sambuc     case ')':
31*1230fdc1SLionel Sambuc       if (state > init)
32*1230fdc1SLionel Sambuc         --state;
33*1230fdc1SLionel Sambuc       else if (state != inString)
34*1230fdc1SLionel Sambuc         return 0;
35*1230fdc1SLionel Sambuc       break;
36*1230fdc1SLionel Sambuc     case ';':
37*1230fdc1SLionel Sambuc     case '/':
38*1230fdc1SLionel Sambuc     case '=':
39*1230fdc1SLionel Sambuc       if (state == inAtom)
40*1230fdc1SLionel Sambuc         return tokStart;
41*1230fdc1SLionel Sambuc       if (state == init)
42*1230fdc1SLionel Sambuc         return (*pp)++;
43*1230fdc1SLionel Sambuc       break;
44*1230fdc1SLionel Sambuc     case '\\':
45*1230fdc1SLionel Sambuc       ++*pp;
46*1230fdc1SLionel Sambuc       if (**pp == '\0')
47*1230fdc1SLionel Sambuc         return 0;
48*1230fdc1SLionel Sambuc       break;
49*1230fdc1SLionel Sambuc     case '"':
50*1230fdc1SLionel Sambuc       switch (state) {
51*1230fdc1SLionel Sambuc       case inString:
52*1230fdc1SLionel Sambuc         ++*pp;
53*1230fdc1SLionel Sambuc         return tokStart;
54*1230fdc1SLionel Sambuc       case inAtom:
55*1230fdc1SLionel Sambuc         return tokStart;
56*1230fdc1SLionel Sambuc       case init:
57*1230fdc1SLionel Sambuc         tokStart = *pp;
58*1230fdc1SLionel Sambuc         state = inString;
59*1230fdc1SLionel Sambuc         break;
60*1230fdc1SLionel Sambuc       }
61*1230fdc1SLionel Sambuc       break;
62*1230fdc1SLionel Sambuc     default:
63*1230fdc1SLionel Sambuc       if (state == init) {
64*1230fdc1SLionel Sambuc         tokStart = *pp;
65*1230fdc1SLionel Sambuc         state = inAtom;
66*1230fdc1SLionel Sambuc       }
67*1230fdc1SLionel Sambuc       break;
68*1230fdc1SLionel Sambuc     }
69*1230fdc1SLionel Sambuc     ++*pp;
70*1230fdc1SLionel Sambuc   }
71*1230fdc1SLionel Sambuc   /* not reached */
72*1230fdc1SLionel Sambuc }
73*1230fdc1SLionel Sambuc 
74*1230fdc1SLionel Sambuc /* key must be lowercase ASCII */
75*1230fdc1SLionel Sambuc 
76*1230fdc1SLionel Sambuc static int
matchkey(const char * start,const char * end,const char * key)77*1230fdc1SLionel Sambuc matchkey(const char *start, const char *end, const char *key)
78*1230fdc1SLionel Sambuc {
79*1230fdc1SLionel Sambuc   if (!start)
80*1230fdc1SLionel Sambuc     return 0;
81*1230fdc1SLionel Sambuc   for (; start != end; start++, key++)
82*1230fdc1SLionel Sambuc     if (*start != *key && *start != 'A' + (*key - 'a'))
83*1230fdc1SLionel Sambuc       return 0;
84*1230fdc1SLionel Sambuc   return *key == '\0';
85*1230fdc1SLionel Sambuc }
86*1230fdc1SLionel Sambuc 
87*1230fdc1SLionel Sambuc void
getXMLCharset(const char * buf,char * charset)88*1230fdc1SLionel Sambuc getXMLCharset(const char *buf, char *charset)
89*1230fdc1SLionel Sambuc {
90*1230fdc1SLionel Sambuc   const char *next, *p;
91*1230fdc1SLionel Sambuc 
92*1230fdc1SLionel Sambuc   charset[0] = '\0';
93*1230fdc1SLionel Sambuc   next = buf;
94*1230fdc1SLionel Sambuc   p = getTok(&next);
95*1230fdc1SLionel Sambuc   if (matchkey(p, next, "text"))
96*1230fdc1SLionel Sambuc     strcpy(charset, "us-ascii");
97*1230fdc1SLionel Sambuc   else if (!matchkey(p, next, "application"))
98*1230fdc1SLionel Sambuc     return;
99*1230fdc1SLionel Sambuc   p = getTok(&next);
100*1230fdc1SLionel Sambuc   if (!p || *p != '/')
101*1230fdc1SLionel Sambuc     return;
102*1230fdc1SLionel Sambuc   p = getTok(&next);
103*1230fdc1SLionel Sambuc #if 0
104*1230fdc1SLionel Sambuc   if (!matchkey(p, next, "xml") && charset[0] == '\0')
105*1230fdc1SLionel Sambuc     return;
106*1230fdc1SLionel Sambuc #endif
107*1230fdc1SLionel Sambuc   p = getTok(&next);
108*1230fdc1SLionel Sambuc   while (p) {
109*1230fdc1SLionel Sambuc     if (*p == ';') {
110*1230fdc1SLionel Sambuc       p = getTok(&next);
111*1230fdc1SLionel Sambuc       if (matchkey(p, next, "charset")) {
112*1230fdc1SLionel Sambuc         p = getTok(&next);
113*1230fdc1SLionel Sambuc         if (p && *p == '=') {
114*1230fdc1SLionel Sambuc           p = getTok(&next);
115*1230fdc1SLionel Sambuc           if (p) {
116*1230fdc1SLionel Sambuc             char *s = charset;
117*1230fdc1SLionel Sambuc             if (*p == '"') {
118*1230fdc1SLionel Sambuc               while (++p != next - 1) {
119*1230fdc1SLionel Sambuc                 if (*p == '\\')
120*1230fdc1SLionel Sambuc                   ++p;
121*1230fdc1SLionel Sambuc                 if (s == charset + CHARSET_MAX - 1) {
122*1230fdc1SLionel Sambuc                   charset[0] = '\0';
123*1230fdc1SLionel Sambuc                   break;
124*1230fdc1SLionel Sambuc                 }
125*1230fdc1SLionel Sambuc                 *s++ = *p;
126*1230fdc1SLionel Sambuc               }
127*1230fdc1SLionel Sambuc               *s++ = '\0';
128*1230fdc1SLionel Sambuc             }
129*1230fdc1SLionel Sambuc             else {
130*1230fdc1SLionel Sambuc               if (next - p > CHARSET_MAX - 1)
131*1230fdc1SLionel Sambuc                 break;
132*1230fdc1SLionel Sambuc               while (p != next)
133*1230fdc1SLionel Sambuc                 *s++ = *p++;
134*1230fdc1SLionel Sambuc               *s = 0;
135*1230fdc1SLionel Sambuc               break;
136*1230fdc1SLionel Sambuc             }
137*1230fdc1SLionel Sambuc           }
138*1230fdc1SLionel Sambuc         }
139*1230fdc1SLionel Sambuc         break;
140*1230fdc1SLionel Sambuc       }
141*1230fdc1SLionel Sambuc     }
142*1230fdc1SLionel Sambuc   else
143*1230fdc1SLionel Sambuc     p = getTok(&next);
144*1230fdc1SLionel Sambuc   }
145*1230fdc1SLionel Sambuc }
146*1230fdc1SLionel Sambuc 
147*1230fdc1SLionel Sambuc #ifdef TEST
148*1230fdc1SLionel Sambuc 
149*1230fdc1SLionel Sambuc #include <stdio.h>
150*1230fdc1SLionel Sambuc 
151*1230fdc1SLionel Sambuc int
main(int argc,char * argv[])152*1230fdc1SLionel Sambuc main(int argc, char *argv[])
153*1230fdc1SLionel Sambuc {
154*1230fdc1SLionel Sambuc   char buf[CHARSET_MAX];
155*1230fdc1SLionel Sambuc   if (argc <= 1)
156*1230fdc1SLionel Sambuc     return 1;
157*1230fdc1SLionel Sambuc   printf("%s\n", argv[1]);
158*1230fdc1SLionel Sambuc   getXMLCharset(argv[1], buf);
159*1230fdc1SLionel Sambuc   printf("charset=\"%s\"\n", buf);
160*1230fdc1SLionel Sambuc   return 0;
161*1230fdc1SLionel Sambuc }
162*1230fdc1SLionel Sambuc 
163*1230fdc1SLionel Sambuc #endif /* TEST */
164