1*1230fdc1SLionel Sambuc #include <string.h>
2*1230fdc1SLionel Sambuc #include "xmlmime.h"
3*1230fdc1SLionel Sambuc
4*1230fdc1SLionel Sambuc static const char *
getTok(const char ** pp)5*1230fdc1SLionel Sambuc getTok(const char **pp)
6*1230fdc1SLionel Sambuc {
7*1230fdc1SLionel Sambuc /* inComment means one level of nesting; inComment+1 means two levels etc */
8*1230fdc1SLionel Sambuc enum { inAtom, inString, init, inComment };
9*1230fdc1SLionel Sambuc int state = init;
10*1230fdc1SLionel Sambuc const char *tokStart = 0;
11*1230fdc1SLionel Sambuc for (;;) {
12*1230fdc1SLionel Sambuc switch (**pp) {
13*1230fdc1SLionel Sambuc case '\0':
14*1230fdc1SLionel Sambuc if (state == inAtom)
15*1230fdc1SLionel Sambuc return tokStart;
16*1230fdc1SLionel Sambuc return 0;
17*1230fdc1SLionel Sambuc case ' ':
18*1230fdc1SLionel Sambuc case '\r':
19*1230fdc1SLionel Sambuc case '\t':
20*1230fdc1SLionel Sambuc case '\n':
21*1230fdc1SLionel Sambuc if (state == inAtom)
22*1230fdc1SLionel Sambuc return tokStart;
23*1230fdc1SLionel Sambuc break;
24*1230fdc1SLionel Sambuc case '(':
25*1230fdc1SLionel Sambuc if (state == inAtom)
26*1230fdc1SLionel Sambuc return tokStart;
27*1230fdc1SLionel Sambuc if (state != inString)
28*1230fdc1SLionel Sambuc state++;
29*1230fdc1SLionel Sambuc break;
30*1230fdc1SLionel Sambuc case ')':
31*1230fdc1SLionel Sambuc if (state > init)
32*1230fdc1SLionel Sambuc --state;
33*1230fdc1SLionel Sambuc else if (state != inString)
34*1230fdc1SLionel Sambuc return 0;
35*1230fdc1SLionel Sambuc break;
36*1230fdc1SLionel Sambuc case ';':
37*1230fdc1SLionel Sambuc case '/':
38*1230fdc1SLionel Sambuc case '=':
39*1230fdc1SLionel Sambuc if (state == inAtom)
40*1230fdc1SLionel Sambuc return tokStart;
41*1230fdc1SLionel Sambuc if (state == init)
42*1230fdc1SLionel Sambuc return (*pp)++;
43*1230fdc1SLionel Sambuc break;
44*1230fdc1SLionel Sambuc case '\\':
45*1230fdc1SLionel Sambuc ++*pp;
46*1230fdc1SLionel Sambuc if (**pp == '\0')
47*1230fdc1SLionel Sambuc return 0;
48*1230fdc1SLionel Sambuc break;
49*1230fdc1SLionel Sambuc case '"':
50*1230fdc1SLionel Sambuc switch (state) {
51*1230fdc1SLionel Sambuc case inString:
52*1230fdc1SLionel Sambuc ++*pp;
53*1230fdc1SLionel Sambuc return tokStart;
54*1230fdc1SLionel Sambuc case inAtom:
55*1230fdc1SLionel Sambuc return tokStart;
56*1230fdc1SLionel Sambuc case init:
57*1230fdc1SLionel Sambuc tokStart = *pp;
58*1230fdc1SLionel Sambuc state = inString;
59*1230fdc1SLionel Sambuc break;
60*1230fdc1SLionel Sambuc }
61*1230fdc1SLionel Sambuc break;
62*1230fdc1SLionel Sambuc default:
63*1230fdc1SLionel Sambuc if (state == init) {
64*1230fdc1SLionel Sambuc tokStart = *pp;
65*1230fdc1SLionel Sambuc state = inAtom;
66*1230fdc1SLionel Sambuc }
67*1230fdc1SLionel Sambuc break;
68*1230fdc1SLionel Sambuc }
69*1230fdc1SLionel Sambuc ++*pp;
70*1230fdc1SLionel Sambuc }
71*1230fdc1SLionel Sambuc /* not reached */
72*1230fdc1SLionel Sambuc }
73*1230fdc1SLionel Sambuc
74*1230fdc1SLionel Sambuc /* key must be lowercase ASCII */
75*1230fdc1SLionel Sambuc
76*1230fdc1SLionel Sambuc static int
matchkey(const char * start,const char * end,const char * key)77*1230fdc1SLionel Sambuc matchkey(const char *start, const char *end, const char *key)
78*1230fdc1SLionel Sambuc {
79*1230fdc1SLionel Sambuc if (!start)
80*1230fdc1SLionel Sambuc return 0;
81*1230fdc1SLionel Sambuc for (; start != end; start++, key++)
82*1230fdc1SLionel Sambuc if (*start != *key && *start != 'A' + (*key - 'a'))
83*1230fdc1SLionel Sambuc return 0;
84*1230fdc1SLionel Sambuc return *key == '\0';
85*1230fdc1SLionel Sambuc }
86*1230fdc1SLionel Sambuc
87*1230fdc1SLionel Sambuc void
getXMLCharset(const char * buf,char * charset)88*1230fdc1SLionel Sambuc getXMLCharset(const char *buf, char *charset)
89*1230fdc1SLionel Sambuc {
90*1230fdc1SLionel Sambuc const char *next, *p;
91*1230fdc1SLionel Sambuc
92*1230fdc1SLionel Sambuc charset[0] = '\0';
93*1230fdc1SLionel Sambuc next = buf;
94*1230fdc1SLionel Sambuc p = getTok(&next);
95*1230fdc1SLionel Sambuc if (matchkey(p, next, "text"))
96*1230fdc1SLionel Sambuc strcpy(charset, "us-ascii");
97*1230fdc1SLionel Sambuc else if (!matchkey(p, next, "application"))
98*1230fdc1SLionel Sambuc return;
99*1230fdc1SLionel Sambuc p = getTok(&next);
100*1230fdc1SLionel Sambuc if (!p || *p != '/')
101*1230fdc1SLionel Sambuc return;
102*1230fdc1SLionel Sambuc p = getTok(&next);
103*1230fdc1SLionel Sambuc #if 0
104*1230fdc1SLionel Sambuc if (!matchkey(p, next, "xml") && charset[0] == '\0')
105*1230fdc1SLionel Sambuc return;
106*1230fdc1SLionel Sambuc #endif
107*1230fdc1SLionel Sambuc p = getTok(&next);
108*1230fdc1SLionel Sambuc while (p) {
109*1230fdc1SLionel Sambuc if (*p == ';') {
110*1230fdc1SLionel Sambuc p = getTok(&next);
111*1230fdc1SLionel Sambuc if (matchkey(p, next, "charset")) {
112*1230fdc1SLionel Sambuc p = getTok(&next);
113*1230fdc1SLionel Sambuc if (p && *p == '=') {
114*1230fdc1SLionel Sambuc p = getTok(&next);
115*1230fdc1SLionel Sambuc if (p) {
116*1230fdc1SLionel Sambuc char *s = charset;
117*1230fdc1SLionel Sambuc if (*p == '"') {
118*1230fdc1SLionel Sambuc while (++p != next - 1) {
119*1230fdc1SLionel Sambuc if (*p == '\\')
120*1230fdc1SLionel Sambuc ++p;
121*1230fdc1SLionel Sambuc if (s == charset + CHARSET_MAX - 1) {
122*1230fdc1SLionel Sambuc charset[0] = '\0';
123*1230fdc1SLionel Sambuc break;
124*1230fdc1SLionel Sambuc }
125*1230fdc1SLionel Sambuc *s++ = *p;
126*1230fdc1SLionel Sambuc }
127*1230fdc1SLionel Sambuc *s++ = '\0';
128*1230fdc1SLionel Sambuc }
129*1230fdc1SLionel Sambuc else {
130*1230fdc1SLionel Sambuc if (next - p > CHARSET_MAX - 1)
131*1230fdc1SLionel Sambuc break;
132*1230fdc1SLionel Sambuc while (p != next)
133*1230fdc1SLionel Sambuc *s++ = *p++;
134*1230fdc1SLionel Sambuc *s = 0;
135*1230fdc1SLionel Sambuc break;
136*1230fdc1SLionel Sambuc }
137*1230fdc1SLionel Sambuc }
138*1230fdc1SLionel Sambuc }
139*1230fdc1SLionel Sambuc break;
140*1230fdc1SLionel Sambuc }
141*1230fdc1SLionel Sambuc }
142*1230fdc1SLionel Sambuc else
143*1230fdc1SLionel Sambuc p = getTok(&next);
144*1230fdc1SLionel Sambuc }
145*1230fdc1SLionel Sambuc }
146*1230fdc1SLionel Sambuc
147*1230fdc1SLionel Sambuc #ifdef TEST
148*1230fdc1SLionel Sambuc
149*1230fdc1SLionel Sambuc #include <stdio.h>
150*1230fdc1SLionel Sambuc
151*1230fdc1SLionel Sambuc int
main(int argc,char * argv[])152*1230fdc1SLionel Sambuc main(int argc, char *argv[])
153*1230fdc1SLionel Sambuc {
154*1230fdc1SLionel Sambuc char buf[CHARSET_MAX];
155*1230fdc1SLionel Sambuc if (argc <= 1)
156*1230fdc1SLionel Sambuc return 1;
157*1230fdc1SLionel Sambuc printf("%s\n", argv[1]);
158*1230fdc1SLionel Sambuc getXMLCharset(argv[1], buf);
159*1230fdc1SLionel Sambuc printf("charset=\"%s\"\n", buf);
160*1230fdc1SLionel Sambuc return 0;
161*1230fdc1SLionel Sambuc }
162*1230fdc1SLionel Sambuc
163*1230fdc1SLionel Sambuc #endif /* TEST */
164