1*1230fdc1SLionel Sambuc #define CHARSET_MAX 41
2*1230fdc1SLionel Sambuc
3*1230fdc1SLionel Sambuc static const char *
getTok(const char ** pp)4*1230fdc1SLionel Sambuc getTok(const char **pp)
5*1230fdc1SLionel Sambuc {
6*1230fdc1SLionel Sambuc enum { inAtom, inString, init, inComment };
7*1230fdc1SLionel Sambuc int state = init;
8*1230fdc1SLionel Sambuc const char *tokStart = 0;
9*1230fdc1SLionel Sambuc for (;;) {
10*1230fdc1SLionel Sambuc switch (**pp) {
11*1230fdc1SLionel Sambuc case '\0':
12*1230fdc1SLionel Sambuc return 0;
13*1230fdc1SLionel Sambuc case ' ':
14*1230fdc1SLionel Sambuc case '\r':
15*1230fdc1SLionel Sambuc case '\t':
16*1230fdc1SLionel Sambuc case '\n':
17*1230fdc1SLionel Sambuc if (state == inAtom)
18*1230fdc1SLionel Sambuc return tokStart;
19*1230fdc1SLionel Sambuc break;
20*1230fdc1SLionel Sambuc case '(':
21*1230fdc1SLionel Sambuc if (state == inAtom)
22*1230fdc1SLionel Sambuc return tokStart;
23*1230fdc1SLionel Sambuc if (state != inString)
24*1230fdc1SLionel Sambuc state++;
25*1230fdc1SLionel Sambuc break;
26*1230fdc1SLionel Sambuc case ')':
27*1230fdc1SLionel Sambuc if (state > init)
28*1230fdc1SLionel Sambuc --state;
29*1230fdc1SLionel Sambuc else if (state != inString)
30*1230fdc1SLionel Sambuc return 0;
31*1230fdc1SLionel Sambuc break;
32*1230fdc1SLionel Sambuc case ';':
33*1230fdc1SLionel Sambuc case '/':
34*1230fdc1SLionel Sambuc case '=':
35*1230fdc1SLionel Sambuc if (state == inAtom)
36*1230fdc1SLionel Sambuc return tokStart;
37*1230fdc1SLionel Sambuc if (state == init)
38*1230fdc1SLionel Sambuc return (*pp)++;
39*1230fdc1SLionel Sambuc break;
40*1230fdc1SLionel Sambuc case '\\':
41*1230fdc1SLionel Sambuc ++*pp;
42*1230fdc1SLionel Sambuc if (**pp == '\0')
43*1230fdc1SLionel Sambuc return 0;
44*1230fdc1SLionel Sambuc break;
45*1230fdc1SLionel Sambuc case '"':
46*1230fdc1SLionel Sambuc switch (state) {
47*1230fdc1SLionel Sambuc case inString:
48*1230fdc1SLionel Sambuc ++*pp;
49*1230fdc1SLionel Sambuc return tokStart;
50*1230fdc1SLionel Sambuc case inAtom:
51*1230fdc1SLionel Sambuc return tokStart;
52*1230fdc1SLionel Sambuc case init:
53*1230fdc1SLionel Sambuc tokStart = *pp;
54*1230fdc1SLionel Sambuc state = inString;
55*1230fdc1SLionel Sambuc break;
56*1230fdc1SLionel Sambuc }
57*1230fdc1SLionel Sambuc break;
58*1230fdc1SLionel Sambuc default:
59*1230fdc1SLionel Sambuc if (state == init) {
60*1230fdc1SLionel Sambuc tokStart = *pp;
61*1230fdc1SLionel Sambuc state = inAtom;
62*1230fdc1SLionel Sambuc }
63*1230fdc1SLionel Sambuc break;
64*1230fdc1SLionel Sambuc }
65*1230fdc1SLionel Sambuc ++*pp;
66*1230fdc1SLionel Sambuc }
67*1230fdc1SLionel Sambuc /* not reached */
68*1230fdc1SLionel Sambuc }
69*1230fdc1SLionel Sambuc
70*1230fdc1SLionel Sambuc /* key must be lowercase ASCII */
71*1230fdc1SLionel Sambuc
72*1230fdc1SLionel Sambuc static int
matchkey(const char * start,const char * end,const char * key)73*1230fdc1SLionel Sambuc matchkey(const char *start, const char *end, const char *key)
74*1230fdc1SLionel Sambuc {
75*1230fdc1SLionel Sambuc if (!start)
76*1230fdc1SLionel Sambuc return 0;
77*1230fdc1SLionel Sambuc for (; start != end; start++, key++)
78*1230fdc1SLionel Sambuc if (*start != *key && *start != 'A' + (*key - 'a'))
79*1230fdc1SLionel Sambuc return 0;
80*1230fdc1SLionel Sambuc return *key == '\0';
81*1230fdc1SLionel Sambuc }
82*1230fdc1SLionel Sambuc
83*1230fdc1SLionel Sambuc void
getXMLCharset(const char * buf,char * charset)84*1230fdc1SLionel Sambuc getXMLCharset(const char *buf, char *charset)
85*1230fdc1SLionel Sambuc {
86*1230fdc1SLionel Sambuc const char *next, *p;
87*1230fdc1SLionel Sambuc
88*1230fdc1SLionel Sambuc charset[0] = '\0';
89*1230fdc1SLionel Sambuc next = buf;
90*1230fdc1SLionel Sambuc p = getTok(&next);
91*1230fdc1SLionel Sambuc if (matchkey(p, next, "text"))
92*1230fdc1SLionel Sambuc strcpy(charset, "us-ascii");
93*1230fdc1SLionel Sambuc else if (!matchkey(p, next, "application"))
94*1230fdc1SLionel Sambuc return;
95*1230fdc1SLionel Sambuc p = getTok(&next);
96*1230fdc1SLionel Sambuc if (!p || *p != '/')
97*1230fdc1SLionel Sambuc return;
98*1230fdc1SLionel Sambuc p = getTok(&next);
99*1230fdc1SLionel Sambuc if (matchkey(p, next, "xml"))
100*1230fdc1SLionel Sambuc isXml = 1;
101*1230fdc1SLionel Sambuc p = getTok(&next);
102*1230fdc1SLionel Sambuc while (p) {
103*1230fdc1SLionel Sambuc if (*p == ';') {
104*1230fdc1SLionel Sambuc p = getTok(&next);
105*1230fdc1SLionel Sambuc if (matchkey(p, next, "charset")) {
106*1230fdc1SLionel Sambuc p = getTok(&next);
107*1230fdc1SLionel Sambuc if (p && *p == '=') {
108*1230fdc1SLionel Sambuc p = getTok(&next);
109*1230fdc1SLionel Sambuc if (p) {
110*1230fdc1SLionel Sambuc char *s = charset;
111*1230fdc1SLionel Sambuc if (*p == '"') {
112*1230fdc1SLionel Sambuc while (++p != next - 1) {
113*1230fdc1SLionel Sambuc if (*p == '\\')
114*1230fdc1SLionel Sambuc ++p;
115*1230fdc1SLionel Sambuc if (s == charset + CHARSET_MAX - 1) {
116*1230fdc1SLionel Sambuc charset[0] = '\0';
117*1230fdc1SLionel Sambuc break;
118*1230fdc1SLionel Sambuc }
119*1230fdc1SLionel Sambuc *s++ = *p;
120*1230fdc1SLionel Sambuc }
121*1230fdc1SLionel Sambuc *s++ = '\0';
122*1230fdc1SLionel Sambuc }
123*1230fdc1SLionel Sambuc else {
124*1230fdc1SLionel Sambuc if (next - p > CHARSET_MAX - 1)
125*1230fdc1SLionel Sambuc break;
126*1230fdc1SLionel Sambuc while (p != next)
127*1230fdc1SLionel Sambuc *s++ = *p++;
128*1230fdc1SLionel Sambuc *s = 0;
129*1230fdc1SLionel Sambuc break;
130*1230fdc1SLionel Sambuc }
131*1230fdc1SLionel Sambuc }
132*1230fdc1SLionel Sambuc }
133*1230fdc1SLionel Sambuc }
134*1230fdc1SLionel Sambuc }
135*1230fdc1SLionel Sambuc else
136*1230fdc1SLionel Sambuc p = getTok(&next);
137*1230fdc1SLionel Sambuc }
138*1230fdc1SLionel Sambuc }
139*1230fdc1SLionel Sambuc
140*1230fdc1SLionel Sambuc int
main(int argc,char ** argv)141*1230fdc1SLionel Sambuc main(int argc, char **argv)
142*1230fdc1SLionel Sambuc {
143*1230fdc1SLionel Sambuc char buf[CHARSET_MAX];
144*1230fdc1SLionel Sambuc getXMLCharset(argv[1], buf);
145*1230fdc1SLionel Sambuc printf("charset = \"%s\"\n", buf);
146*1230fdc1SLionel Sambuc return 0;
147*1230fdc1SLionel Sambuc }
148