1*1230fdc1SLionel Sambuc /* Copyright (c) 1998, 1999 Thai Open Source Software Center Ltd
2*1230fdc1SLionel Sambuc See the file COPYING for copying permission.
3*1230fdc1SLionel Sambuc */
4*1230fdc1SLionel Sambuc
5*1230fdc1SLionel Sambuc /* This file is included! */
6*1230fdc1SLionel Sambuc #ifdef XML_TOK_IMPL_C
7*1230fdc1SLionel Sambuc
8*1230fdc1SLionel Sambuc #ifndef IS_INVALID_CHAR
9*1230fdc1SLionel Sambuc #define IS_INVALID_CHAR(enc, ptr, n) (0)
10*1230fdc1SLionel Sambuc #endif
11*1230fdc1SLionel Sambuc
12*1230fdc1SLionel Sambuc #define INVALID_LEAD_CASE(n, ptr, nextTokPtr) \
13*1230fdc1SLionel Sambuc case BT_LEAD ## n: \
14*1230fdc1SLionel Sambuc if (end - ptr < n) \
15*1230fdc1SLionel Sambuc return XML_TOK_PARTIAL_CHAR; \
16*1230fdc1SLionel Sambuc if (IS_INVALID_CHAR(enc, ptr, n)) { \
17*1230fdc1SLionel Sambuc *(nextTokPtr) = (ptr); \
18*1230fdc1SLionel Sambuc return XML_TOK_INVALID; \
19*1230fdc1SLionel Sambuc } \
20*1230fdc1SLionel Sambuc ptr += n; \
21*1230fdc1SLionel Sambuc break;
22*1230fdc1SLionel Sambuc
23*1230fdc1SLionel Sambuc #define INVALID_CASES(ptr, nextTokPtr) \
24*1230fdc1SLionel Sambuc INVALID_LEAD_CASE(2, ptr, nextTokPtr) \
25*1230fdc1SLionel Sambuc INVALID_LEAD_CASE(3, ptr, nextTokPtr) \
26*1230fdc1SLionel Sambuc INVALID_LEAD_CASE(4, ptr, nextTokPtr) \
27*1230fdc1SLionel Sambuc case BT_NONXML: \
28*1230fdc1SLionel Sambuc case BT_MALFORM: \
29*1230fdc1SLionel Sambuc case BT_TRAIL: \
30*1230fdc1SLionel Sambuc *(nextTokPtr) = (ptr); \
31*1230fdc1SLionel Sambuc return XML_TOK_INVALID;
32*1230fdc1SLionel Sambuc
33*1230fdc1SLionel Sambuc #define CHECK_NAME_CASE(n, enc, ptr, end, nextTokPtr) \
34*1230fdc1SLionel Sambuc case BT_LEAD ## n: \
35*1230fdc1SLionel Sambuc if (end - ptr < n) \
36*1230fdc1SLionel Sambuc return XML_TOK_PARTIAL_CHAR; \
37*1230fdc1SLionel Sambuc if (!IS_NAME_CHAR(enc, ptr, n)) { \
38*1230fdc1SLionel Sambuc *nextTokPtr = ptr; \
39*1230fdc1SLionel Sambuc return XML_TOK_INVALID; \
40*1230fdc1SLionel Sambuc } \
41*1230fdc1SLionel Sambuc ptr += n; \
42*1230fdc1SLionel Sambuc break;
43*1230fdc1SLionel Sambuc
44*1230fdc1SLionel Sambuc #define CHECK_NAME_CASES(enc, ptr, end, nextTokPtr) \
45*1230fdc1SLionel Sambuc case BT_NONASCII: \
46*1230fdc1SLionel Sambuc if (!IS_NAME_CHAR_MINBPC(enc, ptr)) { \
47*1230fdc1SLionel Sambuc *nextTokPtr = ptr; \
48*1230fdc1SLionel Sambuc return XML_TOK_INVALID; \
49*1230fdc1SLionel Sambuc } \
50*1230fdc1SLionel Sambuc case BT_NMSTRT: \
51*1230fdc1SLionel Sambuc case BT_HEX: \
52*1230fdc1SLionel Sambuc case BT_DIGIT: \
53*1230fdc1SLionel Sambuc case BT_NAME: \
54*1230fdc1SLionel Sambuc case BT_MINUS: \
55*1230fdc1SLionel Sambuc ptr += MINBPC(enc); \
56*1230fdc1SLionel Sambuc break; \
57*1230fdc1SLionel Sambuc CHECK_NAME_CASE(2, enc, ptr, end, nextTokPtr) \
58*1230fdc1SLionel Sambuc CHECK_NAME_CASE(3, enc, ptr, end, nextTokPtr) \
59*1230fdc1SLionel Sambuc CHECK_NAME_CASE(4, enc, ptr, end, nextTokPtr)
60*1230fdc1SLionel Sambuc
61*1230fdc1SLionel Sambuc #define CHECK_NMSTRT_CASE(n, enc, ptr, end, nextTokPtr) \
62*1230fdc1SLionel Sambuc case BT_LEAD ## n: \
63*1230fdc1SLionel Sambuc if (end - ptr < n) \
64*1230fdc1SLionel Sambuc return XML_TOK_PARTIAL_CHAR; \
65*1230fdc1SLionel Sambuc if (!IS_NMSTRT_CHAR(enc, ptr, n)) { \
66*1230fdc1SLionel Sambuc *nextTokPtr = ptr; \
67*1230fdc1SLionel Sambuc return XML_TOK_INVALID; \
68*1230fdc1SLionel Sambuc } \
69*1230fdc1SLionel Sambuc ptr += n; \
70*1230fdc1SLionel Sambuc break;
71*1230fdc1SLionel Sambuc
72*1230fdc1SLionel Sambuc #define CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr) \
73*1230fdc1SLionel Sambuc case BT_NONASCII: \
74*1230fdc1SLionel Sambuc if (!IS_NMSTRT_CHAR_MINBPC(enc, ptr)) { \
75*1230fdc1SLionel Sambuc *nextTokPtr = ptr; \
76*1230fdc1SLionel Sambuc return XML_TOK_INVALID; \
77*1230fdc1SLionel Sambuc } \
78*1230fdc1SLionel Sambuc case BT_NMSTRT: \
79*1230fdc1SLionel Sambuc case BT_HEX: \
80*1230fdc1SLionel Sambuc ptr += MINBPC(enc); \
81*1230fdc1SLionel Sambuc break; \
82*1230fdc1SLionel Sambuc CHECK_NMSTRT_CASE(2, enc, ptr, end, nextTokPtr) \
83*1230fdc1SLionel Sambuc CHECK_NMSTRT_CASE(3, enc, ptr, end, nextTokPtr) \
84*1230fdc1SLionel Sambuc CHECK_NMSTRT_CASE(4, enc, ptr, end, nextTokPtr)
85*1230fdc1SLionel Sambuc
86*1230fdc1SLionel Sambuc #ifndef PREFIX
87*1230fdc1SLionel Sambuc #define PREFIX(ident) ident
88*1230fdc1SLionel Sambuc #endif
89*1230fdc1SLionel Sambuc
90*1230fdc1SLionel Sambuc /* ptr points to character following "<!-" */
91*1230fdc1SLionel Sambuc
92*1230fdc1SLionel Sambuc static int PTRCALL
PREFIX(scanComment)93*1230fdc1SLionel Sambuc PREFIX(scanComment)(const ENCODING *enc, const char *ptr,
94*1230fdc1SLionel Sambuc const char *end, const char **nextTokPtr)
95*1230fdc1SLionel Sambuc {
96*1230fdc1SLionel Sambuc if (ptr != end) {
97*1230fdc1SLionel Sambuc if (!CHAR_MATCHES(enc, ptr, ASCII_MINUS)) {
98*1230fdc1SLionel Sambuc *nextTokPtr = ptr;
99*1230fdc1SLionel Sambuc return XML_TOK_INVALID;
100*1230fdc1SLionel Sambuc }
101*1230fdc1SLionel Sambuc ptr += MINBPC(enc);
102*1230fdc1SLionel Sambuc while (ptr != end) {
103*1230fdc1SLionel Sambuc switch (BYTE_TYPE(enc, ptr)) {
104*1230fdc1SLionel Sambuc INVALID_CASES(ptr, nextTokPtr)
105*1230fdc1SLionel Sambuc case BT_MINUS:
106*1230fdc1SLionel Sambuc if ((ptr += MINBPC(enc)) == end)
107*1230fdc1SLionel Sambuc return XML_TOK_PARTIAL;
108*1230fdc1SLionel Sambuc if (CHAR_MATCHES(enc, ptr, ASCII_MINUS)) {
109*1230fdc1SLionel Sambuc if ((ptr += MINBPC(enc)) == end)
110*1230fdc1SLionel Sambuc return XML_TOK_PARTIAL;
111*1230fdc1SLionel Sambuc if (!CHAR_MATCHES(enc, ptr, ASCII_GT)) {
112*1230fdc1SLionel Sambuc *nextTokPtr = ptr;
113*1230fdc1SLionel Sambuc return XML_TOK_INVALID;
114*1230fdc1SLionel Sambuc }
115*1230fdc1SLionel Sambuc *nextTokPtr = ptr + MINBPC(enc);
116*1230fdc1SLionel Sambuc return XML_TOK_COMMENT;
117*1230fdc1SLionel Sambuc }
118*1230fdc1SLionel Sambuc break;
119*1230fdc1SLionel Sambuc default:
120*1230fdc1SLionel Sambuc ptr += MINBPC(enc);
121*1230fdc1SLionel Sambuc break;
122*1230fdc1SLionel Sambuc }
123*1230fdc1SLionel Sambuc }
124*1230fdc1SLionel Sambuc }
125*1230fdc1SLionel Sambuc return XML_TOK_PARTIAL;
126*1230fdc1SLionel Sambuc }
127*1230fdc1SLionel Sambuc
128*1230fdc1SLionel Sambuc /* ptr points to character following "<!" */
129*1230fdc1SLionel Sambuc
130*1230fdc1SLionel Sambuc static int PTRCALL
PREFIX(scanDecl)131*1230fdc1SLionel Sambuc PREFIX(scanDecl)(const ENCODING *enc, const char *ptr,
132*1230fdc1SLionel Sambuc const char *end, const char **nextTokPtr)
133*1230fdc1SLionel Sambuc {
134*1230fdc1SLionel Sambuc if (ptr == end)
135*1230fdc1SLionel Sambuc return XML_TOK_PARTIAL;
136*1230fdc1SLionel Sambuc switch (BYTE_TYPE(enc, ptr)) {
137*1230fdc1SLionel Sambuc case BT_MINUS:
138*1230fdc1SLionel Sambuc return PREFIX(scanComment)(enc, ptr + MINBPC(enc), end, nextTokPtr);
139*1230fdc1SLionel Sambuc case BT_LSQB:
140*1230fdc1SLionel Sambuc *nextTokPtr = ptr + MINBPC(enc);
141*1230fdc1SLionel Sambuc return XML_TOK_COND_SECT_OPEN;
142*1230fdc1SLionel Sambuc case BT_NMSTRT:
143*1230fdc1SLionel Sambuc case BT_HEX:
144*1230fdc1SLionel Sambuc ptr += MINBPC(enc);
145*1230fdc1SLionel Sambuc break;
146*1230fdc1SLionel Sambuc default:
147*1230fdc1SLionel Sambuc *nextTokPtr = ptr;
148*1230fdc1SLionel Sambuc return XML_TOK_INVALID;
149*1230fdc1SLionel Sambuc }
150*1230fdc1SLionel Sambuc while (ptr != end) {
151*1230fdc1SLionel Sambuc switch (BYTE_TYPE(enc, ptr)) {
152*1230fdc1SLionel Sambuc case BT_PERCNT:
153*1230fdc1SLionel Sambuc if (ptr + MINBPC(enc) == end)
154*1230fdc1SLionel Sambuc return XML_TOK_PARTIAL;
155*1230fdc1SLionel Sambuc /* don't allow <!ENTITY% foo "whatever"> */
156*1230fdc1SLionel Sambuc switch (BYTE_TYPE(enc, ptr + MINBPC(enc))) {
157*1230fdc1SLionel Sambuc case BT_S: case BT_CR: case BT_LF: case BT_PERCNT:
158*1230fdc1SLionel Sambuc *nextTokPtr = ptr;
159*1230fdc1SLionel Sambuc return XML_TOK_INVALID;
160*1230fdc1SLionel Sambuc }
161*1230fdc1SLionel Sambuc /* fall through */
162*1230fdc1SLionel Sambuc case BT_S: case BT_CR: case BT_LF:
163*1230fdc1SLionel Sambuc *nextTokPtr = ptr;
164*1230fdc1SLionel Sambuc return XML_TOK_DECL_OPEN;
165*1230fdc1SLionel Sambuc case BT_NMSTRT:
166*1230fdc1SLionel Sambuc case BT_HEX:
167*1230fdc1SLionel Sambuc ptr += MINBPC(enc);
168*1230fdc1SLionel Sambuc break;
169*1230fdc1SLionel Sambuc default:
170*1230fdc1SLionel Sambuc *nextTokPtr = ptr;
171*1230fdc1SLionel Sambuc return XML_TOK_INVALID;
172*1230fdc1SLionel Sambuc }
173*1230fdc1SLionel Sambuc }
174*1230fdc1SLionel Sambuc return XML_TOK_PARTIAL;
175*1230fdc1SLionel Sambuc }
176*1230fdc1SLionel Sambuc
177*1230fdc1SLionel Sambuc static int PTRCALL
PREFIX(checkPiTarget)178*1230fdc1SLionel Sambuc PREFIX(checkPiTarget)(const ENCODING *enc, const char *ptr,
179*1230fdc1SLionel Sambuc const char *end, int *tokPtr)
180*1230fdc1SLionel Sambuc {
181*1230fdc1SLionel Sambuc int upper = 0;
182*1230fdc1SLionel Sambuc *tokPtr = XML_TOK_PI;
183*1230fdc1SLionel Sambuc if (end - ptr != MINBPC(enc)*3)
184*1230fdc1SLionel Sambuc return 1;
185*1230fdc1SLionel Sambuc switch (BYTE_TO_ASCII(enc, ptr)) {
186*1230fdc1SLionel Sambuc case ASCII_x:
187*1230fdc1SLionel Sambuc break;
188*1230fdc1SLionel Sambuc case ASCII_X:
189*1230fdc1SLionel Sambuc upper = 1;
190*1230fdc1SLionel Sambuc break;
191*1230fdc1SLionel Sambuc default:
192*1230fdc1SLionel Sambuc return 1;
193*1230fdc1SLionel Sambuc }
194*1230fdc1SLionel Sambuc ptr += MINBPC(enc);
195*1230fdc1SLionel Sambuc switch (BYTE_TO_ASCII(enc, ptr)) {
196*1230fdc1SLionel Sambuc case ASCII_m:
197*1230fdc1SLionel Sambuc break;
198*1230fdc1SLionel Sambuc case ASCII_M:
199*1230fdc1SLionel Sambuc upper = 1;
200*1230fdc1SLionel Sambuc break;
201*1230fdc1SLionel Sambuc default:
202*1230fdc1SLionel Sambuc return 1;
203*1230fdc1SLionel Sambuc }
204*1230fdc1SLionel Sambuc ptr += MINBPC(enc);
205*1230fdc1SLionel Sambuc switch (BYTE_TO_ASCII(enc, ptr)) {
206*1230fdc1SLionel Sambuc case ASCII_l:
207*1230fdc1SLionel Sambuc break;
208*1230fdc1SLionel Sambuc case ASCII_L:
209*1230fdc1SLionel Sambuc upper = 1;
210*1230fdc1SLionel Sambuc break;
211*1230fdc1SLionel Sambuc default:
212*1230fdc1SLionel Sambuc return 1;
213*1230fdc1SLionel Sambuc }
214*1230fdc1SLionel Sambuc if (upper)
215*1230fdc1SLionel Sambuc return 0;
216*1230fdc1SLionel Sambuc *tokPtr = XML_TOK_XML_DECL;
217*1230fdc1SLionel Sambuc return 1;
218*1230fdc1SLionel Sambuc }
219*1230fdc1SLionel Sambuc
220*1230fdc1SLionel Sambuc /* ptr points to character following "<?" */
221*1230fdc1SLionel Sambuc
222*1230fdc1SLionel Sambuc static int PTRCALL
PREFIX(scanPi)223*1230fdc1SLionel Sambuc PREFIX(scanPi)(const ENCODING *enc, const char *ptr,
224*1230fdc1SLionel Sambuc const char *end, const char **nextTokPtr)
225*1230fdc1SLionel Sambuc {
226*1230fdc1SLionel Sambuc int tok;
227*1230fdc1SLionel Sambuc const char *target = ptr;
228*1230fdc1SLionel Sambuc if (ptr == end)
229*1230fdc1SLionel Sambuc return XML_TOK_PARTIAL;
230*1230fdc1SLionel Sambuc switch (BYTE_TYPE(enc, ptr)) {
231*1230fdc1SLionel Sambuc CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)
232*1230fdc1SLionel Sambuc default:
233*1230fdc1SLionel Sambuc *nextTokPtr = ptr;
234*1230fdc1SLionel Sambuc return XML_TOK_INVALID;
235*1230fdc1SLionel Sambuc }
236*1230fdc1SLionel Sambuc while (ptr != end) {
237*1230fdc1SLionel Sambuc switch (BYTE_TYPE(enc, ptr)) {
238*1230fdc1SLionel Sambuc CHECK_NAME_CASES(enc, ptr, end, nextTokPtr)
239*1230fdc1SLionel Sambuc case BT_S: case BT_CR: case BT_LF:
240*1230fdc1SLionel Sambuc if (!PREFIX(checkPiTarget)(enc, target, ptr, &tok)) {
241*1230fdc1SLionel Sambuc *nextTokPtr = ptr;
242*1230fdc1SLionel Sambuc return XML_TOK_INVALID;
243*1230fdc1SLionel Sambuc }
244*1230fdc1SLionel Sambuc ptr += MINBPC(enc);
245*1230fdc1SLionel Sambuc while (ptr != end) {
246*1230fdc1SLionel Sambuc switch (BYTE_TYPE(enc, ptr)) {
247*1230fdc1SLionel Sambuc INVALID_CASES(ptr, nextTokPtr)
248*1230fdc1SLionel Sambuc case BT_QUEST:
249*1230fdc1SLionel Sambuc ptr += MINBPC(enc);
250*1230fdc1SLionel Sambuc if (ptr == end)
251*1230fdc1SLionel Sambuc return XML_TOK_PARTIAL;
252*1230fdc1SLionel Sambuc if (CHAR_MATCHES(enc, ptr, ASCII_GT)) {
253*1230fdc1SLionel Sambuc *nextTokPtr = ptr + MINBPC(enc);
254*1230fdc1SLionel Sambuc return tok;
255*1230fdc1SLionel Sambuc }
256*1230fdc1SLionel Sambuc break;
257*1230fdc1SLionel Sambuc default:
258*1230fdc1SLionel Sambuc ptr += MINBPC(enc);
259*1230fdc1SLionel Sambuc break;
260*1230fdc1SLionel Sambuc }
261*1230fdc1SLionel Sambuc }
262*1230fdc1SLionel Sambuc return XML_TOK_PARTIAL;
263*1230fdc1SLionel Sambuc case BT_QUEST:
264*1230fdc1SLionel Sambuc if (!PREFIX(checkPiTarget)(enc, target, ptr, &tok)) {
265*1230fdc1SLionel Sambuc *nextTokPtr = ptr;
266*1230fdc1SLionel Sambuc return XML_TOK_INVALID;
267*1230fdc1SLionel Sambuc }
268*1230fdc1SLionel Sambuc ptr += MINBPC(enc);
269*1230fdc1SLionel Sambuc if (ptr == end)
270*1230fdc1SLionel Sambuc return XML_TOK_PARTIAL;
271*1230fdc1SLionel Sambuc if (CHAR_MATCHES(enc, ptr, ASCII_GT)) {
272*1230fdc1SLionel Sambuc *nextTokPtr = ptr + MINBPC(enc);
273*1230fdc1SLionel Sambuc return tok;
274*1230fdc1SLionel Sambuc }
275*1230fdc1SLionel Sambuc /* fall through */
276*1230fdc1SLionel Sambuc default:
277*1230fdc1SLionel Sambuc *nextTokPtr = ptr;
278*1230fdc1SLionel Sambuc return XML_TOK_INVALID;
279*1230fdc1SLionel Sambuc }
280*1230fdc1SLionel Sambuc }
281*1230fdc1SLionel Sambuc return XML_TOK_PARTIAL;
282*1230fdc1SLionel Sambuc }
283*1230fdc1SLionel Sambuc
284*1230fdc1SLionel Sambuc static int PTRCALL
PREFIX(scanCdataSection)285*1230fdc1SLionel Sambuc PREFIX(scanCdataSection)(const ENCODING *enc, const char *ptr,
286*1230fdc1SLionel Sambuc const char *end, const char **nextTokPtr)
287*1230fdc1SLionel Sambuc {
288*1230fdc1SLionel Sambuc static const char CDATA_LSQB[] = { ASCII_C, ASCII_D, ASCII_A,
289*1230fdc1SLionel Sambuc ASCII_T, ASCII_A, ASCII_LSQB };
290*1230fdc1SLionel Sambuc int i;
291*1230fdc1SLionel Sambuc /* CDATA[ */
292*1230fdc1SLionel Sambuc if (end - ptr < 6 * MINBPC(enc))
293*1230fdc1SLionel Sambuc return XML_TOK_PARTIAL;
294*1230fdc1SLionel Sambuc for (i = 0; i < 6; i++, ptr += MINBPC(enc)) {
295*1230fdc1SLionel Sambuc if (!CHAR_MATCHES(enc, ptr, CDATA_LSQB[i])) {
296*1230fdc1SLionel Sambuc *nextTokPtr = ptr;
297*1230fdc1SLionel Sambuc return XML_TOK_INVALID;
298*1230fdc1SLionel Sambuc }
299*1230fdc1SLionel Sambuc }
300*1230fdc1SLionel Sambuc *nextTokPtr = ptr;
301*1230fdc1SLionel Sambuc return XML_TOK_CDATA_SECT_OPEN;
302*1230fdc1SLionel Sambuc }
303*1230fdc1SLionel Sambuc
304*1230fdc1SLionel Sambuc static int PTRCALL
PREFIX(cdataSectionTok)305*1230fdc1SLionel Sambuc PREFIX(cdataSectionTok)(const ENCODING *enc, const char *ptr,
306*1230fdc1SLionel Sambuc const char *end, const char **nextTokPtr)
307*1230fdc1SLionel Sambuc {
308*1230fdc1SLionel Sambuc if (ptr == end)
309*1230fdc1SLionel Sambuc return XML_TOK_NONE;
310*1230fdc1SLionel Sambuc if (MINBPC(enc) > 1) {
311*1230fdc1SLionel Sambuc size_t n = end - ptr;
312*1230fdc1SLionel Sambuc if (n & (MINBPC(enc) - 1)) {
313*1230fdc1SLionel Sambuc n &= ~(MINBPC(enc) - 1);
314*1230fdc1SLionel Sambuc if (n == 0)
315*1230fdc1SLionel Sambuc return XML_TOK_PARTIAL;
316*1230fdc1SLionel Sambuc end = ptr + n;
317*1230fdc1SLionel Sambuc }
318*1230fdc1SLionel Sambuc }
319*1230fdc1SLionel Sambuc switch (BYTE_TYPE(enc, ptr)) {
320*1230fdc1SLionel Sambuc case BT_RSQB:
321*1230fdc1SLionel Sambuc ptr += MINBPC(enc);
322*1230fdc1SLionel Sambuc if (ptr == end)
323*1230fdc1SLionel Sambuc return XML_TOK_PARTIAL;
324*1230fdc1SLionel Sambuc if (!CHAR_MATCHES(enc, ptr, ASCII_RSQB))
325*1230fdc1SLionel Sambuc break;
326*1230fdc1SLionel Sambuc ptr += MINBPC(enc);
327*1230fdc1SLionel Sambuc if (ptr == end)
328*1230fdc1SLionel Sambuc return XML_TOK_PARTIAL;
329*1230fdc1SLionel Sambuc if (!CHAR_MATCHES(enc, ptr, ASCII_GT)) {
330*1230fdc1SLionel Sambuc ptr -= MINBPC(enc);
331*1230fdc1SLionel Sambuc break;
332*1230fdc1SLionel Sambuc }
333*1230fdc1SLionel Sambuc *nextTokPtr = ptr + MINBPC(enc);
334*1230fdc1SLionel Sambuc return XML_TOK_CDATA_SECT_CLOSE;
335*1230fdc1SLionel Sambuc case BT_CR:
336*1230fdc1SLionel Sambuc ptr += MINBPC(enc);
337*1230fdc1SLionel Sambuc if (ptr == end)
338*1230fdc1SLionel Sambuc return XML_TOK_PARTIAL;
339*1230fdc1SLionel Sambuc if (BYTE_TYPE(enc, ptr) == BT_LF)
340*1230fdc1SLionel Sambuc ptr += MINBPC(enc);
341*1230fdc1SLionel Sambuc *nextTokPtr = ptr;
342*1230fdc1SLionel Sambuc return XML_TOK_DATA_NEWLINE;
343*1230fdc1SLionel Sambuc case BT_LF:
344*1230fdc1SLionel Sambuc *nextTokPtr = ptr + MINBPC(enc);
345*1230fdc1SLionel Sambuc return XML_TOK_DATA_NEWLINE;
346*1230fdc1SLionel Sambuc INVALID_CASES(ptr, nextTokPtr)
347*1230fdc1SLionel Sambuc default:
348*1230fdc1SLionel Sambuc ptr += MINBPC(enc);
349*1230fdc1SLionel Sambuc break;
350*1230fdc1SLionel Sambuc }
351*1230fdc1SLionel Sambuc while (ptr != end) {
352*1230fdc1SLionel Sambuc switch (BYTE_TYPE(enc, ptr)) {
353*1230fdc1SLionel Sambuc #define LEAD_CASE(n) \
354*1230fdc1SLionel Sambuc case BT_LEAD ## n: \
355*1230fdc1SLionel Sambuc if (end - ptr < n || IS_INVALID_CHAR(enc, ptr, n)) { \
356*1230fdc1SLionel Sambuc *nextTokPtr = ptr; \
357*1230fdc1SLionel Sambuc return XML_TOK_DATA_CHARS; \
358*1230fdc1SLionel Sambuc } \
359*1230fdc1SLionel Sambuc ptr += n; \
360*1230fdc1SLionel Sambuc break;
361*1230fdc1SLionel Sambuc LEAD_CASE(2) LEAD_CASE(3) LEAD_CASE(4)
362*1230fdc1SLionel Sambuc #undef LEAD_CASE
363*1230fdc1SLionel Sambuc case BT_NONXML:
364*1230fdc1SLionel Sambuc case BT_MALFORM:
365*1230fdc1SLionel Sambuc case BT_TRAIL:
366*1230fdc1SLionel Sambuc case BT_CR:
367*1230fdc1SLionel Sambuc case BT_LF:
368*1230fdc1SLionel Sambuc case BT_RSQB:
369*1230fdc1SLionel Sambuc *nextTokPtr = ptr;
370*1230fdc1SLionel Sambuc return XML_TOK_DATA_CHARS;
371*1230fdc1SLionel Sambuc default:
372*1230fdc1SLionel Sambuc ptr += MINBPC(enc);
373*1230fdc1SLionel Sambuc break;
374*1230fdc1SLionel Sambuc }
375*1230fdc1SLionel Sambuc }
376*1230fdc1SLionel Sambuc *nextTokPtr = ptr;
377*1230fdc1SLionel Sambuc return XML_TOK_DATA_CHARS;
378*1230fdc1SLionel Sambuc }
379*1230fdc1SLionel Sambuc
380*1230fdc1SLionel Sambuc /* ptr points to character following "</" */
381*1230fdc1SLionel Sambuc
382*1230fdc1SLionel Sambuc static int PTRCALL
PREFIX(scanEndTag)383*1230fdc1SLionel Sambuc PREFIX(scanEndTag)(const ENCODING *enc, const char *ptr,
384*1230fdc1SLionel Sambuc const char *end, const char **nextTokPtr)
385*1230fdc1SLionel Sambuc {
386*1230fdc1SLionel Sambuc if (ptr == end)
387*1230fdc1SLionel Sambuc return XML_TOK_PARTIAL;
388*1230fdc1SLionel Sambuc switch (BYTE_TYPE(enc, ptr)) {
389*1230fdc1SLionel Sambuc CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)
390*1230fdc1SLionel Sambuc default:
391*1230fdc1SLionel Sambuc *nextTokPtr = ptr;
392*1230fdc1SLionel Sambuc return XML_TOK_INVALID;
393*1230fdc1SLionel Sambuc }
394*1230fdc1SLionel Sambuc while (ptr != end) {
395*1230fdc1SLionel Sambuc switch (BYTE_TYPE(enc, ptr)) {
396*1230fdc1SLionel Sambuc CHECK_NAME_CASES(enc, ptr, end, nextTokPtr)
397*1230fdc1SLionel Sambuc case BT_S: case BT_CR: case BT_LF:
398*1230fdc1SLionel Sambuc for (ptr += MINBPC(enc); ptr != end; ptr += MINBPC(enc)) {
399*1230fdc1SLionel Sambuc switch (BYTE_TYPE(enc, ptr)) {
400*1230fdc1SLionel Sambuc case BT_S: case BT_CR: case BT_LF:
401*1230fdc1SLionel Sambuc break;
402*1230fdc1SLionel Sambuc case BT_GT:
403*1230fdc1SLionel Sambuc *nextTokPtr = ptr + MINBPC(enc);
404*1230fdc1SLionel Sambuc return XML_TOK_END_TAG;
405*1230fdc1SLionel Sambuc default:
406*1230fdc1SLionel Sambuc *nextTokPtr = ptr;
407*1230fdc1SLionel Sambuc return XML_TOK_INVALID;
408*1230fdc1SLionel Sambuc }
409*1230fdc1SLionel Sambuc }
410*1230fdc1SLionel Sambuc return XML_TOK_PARTIAL;
411*1230fdc1SLionel Sambuc #ifdef XML_NS
412*1230fdc1SLionel Sambuc case BT_COLON:
413*1230fdc1SLionel Sambuc /* no need to check qname syntax here,
414*1230fdc1SLionel Sambuc since end-tag must match exactly */
415*1230fdc1SLionel Sambuc ptr += MINBPC(enc);
416*1230fdc1SLionel Sambuc break;
417*1230fdc1SLionel Sambuc #endif
418*1230fdc1SLionel Sambuc case BT_GT:
419*1230fdc1SLionel Sambuc *nextTokPtr = ptr + MINBPC(enc);
420*1230fdc1SLionel Sambuc return XML_TOK_END_TAG;
421*1230fdc1SLionel Sambuc default:
422*1230fdc1SLionel Sambuc *nextTokPtr = ptr;
423*1230fdc1SLionel Sambuc return XML_TOK_INVALID;
424*1230fdc1SLionel Sambuc }
425*1230fdc1SLionel Sambuc }
426*1230fdc1SLionel Sambuc return XML_TOK_PARTIAL;
427*1230fdc1SLionel Sambuc }
428*1230fdc1SLionel Sambuc
429*1230fdc1SLionel Sambuc /* ptr points to character following "&#X" */
430*1230fdc1SLionel Sambuc
431*1230fdc1SLionel Sambuc static int PTRCALL
PREFIX(scanHexCharRef)432*1230fdc1SLionel Sambuc PREFIX(scanHexCharRef)(const ENCODING *enc, const char *ptr,
433*1230fdc1SLionel Sambuc const char *end, const char **nextTokPtr)
434*1230fdc1SLionel Sambuc {
435*1230fdc1SLionel Sambuc if (ptr != end) {
436*1230fdc1SLionel Sambuc switch (BYTE_TYPE(enc, ptr)) {
437*1230fdc1SLionel Sambuc case BT_DIGIT:
438*1230fdc1SLionel Sambuc case BT_HEX:
439*1230fdc1SLionel Sambuc break;
440*1230fdc1SLionel Sambuc default:
441*1230fdc1SLionel Sambuc *nextTokPtr = ptr;
442*1230fdc1SLionel Sambuc return XML_TOK_INVALID;
443*1230fdc1SLionel Sambuc }
444*1230fdc1SLionel Sambuc for (ptr += MINBPC(enc); ptr != end; ptr += MINBPC(enc)) {
445*1230fdc1SLionel Sambuc switch (BYTE_TYPE(enc, ptr)) {
446*1230fdc1SLionel Sambuc case BT_DIGIT:
447*1230fdc1SLionel Sambuc case BT_HEX:
448*1230fdc1SLionel Sambuc break;
449*1230fdc1SLionel Sambuc case BT_SEMI:
450*1230fdc1SLionel Sambuc *nextTokPtr = ptr + MINBPC(enc);
451*1230fdc1SLionel Sambuc return XML_TOK_CHAR_REF;
452*1230fdc1SLionel Sambuc default:
453*1230fdc1SLionel Sambuc *nextTokPtr = ptr;
454*1230fdc1SLionel Sambuc return XML_TOK_INVALID;
455*1230fdc1SLionel Sambuc }
456*1230fdc1SLionel Sambuc }
457*1230fdc1SLionel Sambuc }
458*1230fdc1SLionel Sambuc return XML_TOK_PARTIAL;
459*1230fdc1SLionel Sambuc }
460*1230fdc1SLionel Sambuc
461*1230fdc1SLionel Sambuc /* ptr points to character following "&#" */
462*1230fdc1SLionel Sambuc
463*1230fdc1SLionel Sambuc static int PTRCALL
PREFIX(scanCharRef)464*1230fdc1SLionel Sambuc PREFIX(scanCharRef)(const ENCODING *enc, const char *ptr,
465*1230fdc1SLionel Sambuc const char *end, const char **nextTokPtr)
466*1230fdc1SLionel Sambuc {
467*1230fdc1SLionel Sambuc if (ptr != end) {
468*1230fdc1SLionel Sambuc if (CHAR_MATCHES(enc, ptr, ASCII_x))
469*1230fdc1SLionel Sambuc return PREFIX(scanHexCharRef)(enc, ptr + MINBPC(enc), end, nextTokPtr);
470*1230fdc1SLionel Sambuc switch (BYTE_TYPE(enc, ptr)) {
471*1230fdc1SLionel Sambuc case BT_DIGIT:
472*1230fdc1SLionel Sambuc break;
473*1230fdc1SLionel Sambuc default:
474*1230fdc1SLionel Sambuc *nextTokPtr = ptr;
475*1230fdc1SLionel Sambuc return XML_TOK_INVALID;
476*1230fdc1SLionel Sambuc }
477*1230fdc1SLionel Sambuc for (ptr += MINBPC(enc); ptr != end; ptr += MINBPC(enc)) {
478*1230fdc1SLionel Sambuc switch (BYTE_TYPE(enc, ptr)) {
479*1230fdc1SLionel Sambuc case BT_DIGIT:
480*1230fdc1SLionel Sambuc break;
481*1230fdc1SLionel Sambuc case BT_SEMI:
482*1230fdc1SLionel Sambuc *nextTokPtr = ptr + MINBPC(enc);
483*1230fdc1SLionel Sambuc return XML_TOK_CHAR_REF;
484*1230fdc1SLionel Sambuc default:
485*1230fdc1SLionel Sambuc *nextTokPtr = ptr;
486*1230fdc1SLionel Sambuc return XML_TOK_INVALID;
487*1230fdc1SLionel Sambuc }
488*1230fdc1SLionel Sambuc }
489*1230fdc1SLionel Sambuc }
490*1230fdc1SLionel Sambuc return XML_TOK_PARTIAL;
491*1230fdc1SLionel Sambuc }
492*1230fdc1SLionel Sambuc
493*1230fdc1SLionel Sambuc /* ptr points to character following "&" */
494*1230fdc1SLionel Sambuc
495*1230fdc1SLionel Sambuc static int PTRCALL
PREFIX(scanRef)496*1230fdc1SLionel Sambuc PREFIX(scanRef)(const ENCODING *enc, const char *ptr, const char *end,
497*1230fdc1SLionel Sambuc const char **nextTokPtr)
498*1230fdc1SLionel Sambuc {
499*1230fdc1SLionel Sambuc if (ptr == end)
500*1230fdc1SLionel Sambuc return XML_TOK_PARTIAL;
501*1230fdc1SLionel Sambuc switch (BYTE_TYPE(enc, ptr)) {
502*1230fdc1SLionel Sambuc CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)
503*1230fdc1SLionel Sambuc case BT_NUM:
504*1230fdc1SLionel Sambuc return PREFIX(scanCharRef)(enc, ptr + MINBPC(enc), end, nextTokPtr);
505*1230fdc1SLionel Sambuc default:
506*1230fdc1SLionel Sambuc *nextTokPtr = ptr;
507*1230fdc1SLionel Sambuc return XML_TOK_INVALID;
508*1230fdc1SLionel Sambuc }
509*1230fdc1SLionel Sambuc while (ptr != end) {
510*1230fdc1SLionel Sambuc switch (BYTE_TYPE(enc, ptr)) {
511*1230fdc1SLionel Sambuc CHECK_NAME_CASES(enc, ptr, end, nextTokPtr)
512*1230fdc1SLionel Sambuc case BT_SEMI:
513*1230fdc1SLionel Sambuc *nextTokPtr = ptr + MINBPC(enc);
514*1230fdc1SLionel Sambuc return XML_TOK_ENTITY_REF;
515*1230fdc1SLionel Sambuc default:
516*1230fdc1SLionel Sambuc *nextTokPtr = ptr;
517*1230fdc1SLionel Sambuc return XML_TOK_INVALID;
518*1230fdc1SLionel Sambuc }
519*1230fdc1SLionel Sambuc }
520*1230fdc1SLionel Sambuc return XML_TOK_PARTIAL;
521*1230fdc1SLionel Sambuc }
522*1230fdc1SLionel Sambuc
523*1230fdc1SLionel Sambuc /* ptr points to character following first character of attribute name */
524*1230fdc1SLionel Sambuc
525*1230fdc1SLionel Sambuc static int PTRCALL
PREFIX(scanAtts)526*1230fdc1SLionel Sambuc PREFIX(scanAtts)(const ENCODING *enc, const char *ptr, const char *end,
527*1230fdc1SLionel Sambuc const char **nextTokPtr)
528*1230fdc1SLionel Sambuc {
529*1230fdc1SLionel Sambuc #ifdef XML_NS
530*1230fdc1SLionel Sambuc int hadColon = 0;
531*1230fdc1SLionel Sambuc #endif
532*1230fdc1SLionel Sambuc while (ptr != end) {
533*1230fdc1SLionel Sambuc switch (BYTE_TYPE(enc, ptr)) {
534*1230fdc1SLionel Sambuc CHECK_NAME_CASES(enc, ptr, end, nextTokPtr)
535*1230fdc1SLionel Sambuc #ifdef XML_NS
536*1230fdc1SLionel Sambuc case BT_COLON:
537*1230fdc1SLionel Sambuc if (hadColon) {
538*1230fdc1SLionel Sambuc *nextTokPtr = ptr;
539*1230fdc1SLionel Sambuc return XML_TOK_INVALID;
540*1230fdc1SLionel Sambuc }
541*1230fdc1SLionel Sambuc hadColon = 1;
542*1230fdc1SLionel Sambuc ptr += MINBPC(enc);
543*1230fdc1SLionel Sambuc if (ptr == end)
544*1230fdc1SLionel Sambuc return XML_TOK_PARTIAL;
545*1230fdc1SLionel Sambuc switch (BYTE_TYPE(enc, ptr)) {
546*1230fdc1SLionel Sambuc CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)
547*1230fdc1SLionel Sambuc default:
548*1230fdc1SLionel Sambuc *nextTokPtr = ptr;
549*1230fdc1SLionel Sambuc return XML_TOK_INVALID;
550*1230fdc1SLionel Sambuc }
551*1230fdc1SLionel Sambuc break;
552*1230fdc1SLionel Sambuc #endif
553*1230fdc1SLionel Sambuc case BT_S: case BT_CR: case BT_LF:
554*1230fdc1SLionel Sambuc for (;;) {
555*1230fdc1SLionel Sambuc int t;
556*1230fdc1SLionel Sambuc
557*1230fdc1SLionel Sambuc ptr += MINBPC(enc);
558*1230fdc1SLionel Sambuc if (ptr == end)
559*1230fdc1SLionel Sambuc return XML_TOK_PARTIAL;
560*1230fdc1SLionel Sambuc t = BYTE_TYPE(enc, ptr);
561*1230fdc1SLionel Sambuc if (t == BT_EQUALS)
562*1230fdc1SLionel Sambuc break;
563*1230fdc1SLionel Sambuc switch (t) {
564*1230fdc1SLionel Sambuc case BT_S:
565*1230fdc1SLionel Sambuc case BT_LF:
566*1230fdc1SLionel Sambuc case BT_CR:
567*1230fdc1SLionel Sambuc break;
568*1230fdc1SLionel Sambuc default:
569*1230fdc1SLionel Sambuc *nextTokPtr = ptr;
570*1230fdc1SLionel Sambuc return XML_TOK_INVALID;
571*1230fdc1SLionel Sambuc }
572*1230fdc1SLionel Sambuc }
573*1230fdc1SLionel Sambuc /* fall through */
574*1230fdc1SLionel Sambuc case BT_EQUALS:
575*1230fdc1SLionel Sambuc {
576*1230fdc1SLionel Sambuc int open;
577*1230fdc1SLionel Sambuc #ifdef XML_NS
578*1230fdc1SLionel Sambuc hadColon = 0;
579*1230fdc1SLionel Sambuc #endif
580*1230fdc1SLionel Sambuc for (;;) {
581*1230fdc1SLionel Sambuc ptr += MINBPC(enc);
582*1230fdc1SLionel Sambuc if (ptr == end)
583*1230fdc1SLionel Sambuc return XML_TOK_PARTIAL;
584*1230fdc1SLionel Sambuc open = BYTE_TYPE(enc, ptr);
585*1230fdc1SLionel Sambuc if (open == BT_QUOT || open == BT_APOS)
586*1230fdc1SLionel Sambuc break;
587*1230fdc1SLionel Sambuc switch (open) {
588*1230fdc1SLionel Sambuc case BT_S:
589*1230fdc1SLionel Sambuc case BT_LF:
590*1230fdc1SLionel Sambuc case BT_CR:
591*1230fdc1SLionel Sambuc break;
592*1230fdc1SLionel Sambuc default:
593*1230fdc1SLionel Sambuc *nextTokPtr = ptr;
594*1230fdc1SLionel Sambuc return XML_TOK_INVALID;
595*1230fdc1SLionel Sambuc }
596*1230fdc1SLionel Sambuc }
597*1230fdc1SLionel Sambuc ptr += MINBPC(enc);
598*1230fdc1SLionel Sambuc /* in attribute value */
599*1230fdc1SLionel Sambuc for (;;) {
600*1230fdc1SLionel Sambuc int t;
601*1230fdc1SLionel Sambuc if (ptr == end)
602*1230fdc1SLionel Sambuc return XML_TOK_PARTIAL;
603*1230fdc1SLionel Sambuc t = BYTE_TYPE(enc, ptr);
604*1230fdc1SLionel Sambuc if (t == open)
605*1230fdc1SLionel Sambuc break;
606*1230fdc1SLionel Sambuc switch (t) {
607*1230fdc1SLionel Sambuc INVALID_CASES(ptr, nextTokPtr)
608*1230fdc1SLionel Sambuc case BT_AMP:
609*1230fdc1SLionel Sambuc {
610*1230fdc1SLionel Sambuc int tok = PREFIX(scanRef)(enc, ptr + MINBPC(enc), end, &ptr);
611*1230fdc1SLionel Sambuc if (tok <= 0) {
612*1230fdc1SLionel Sambuc if (tok == XML_TOK_INVALID)
613*1230fdc1SLionel Sambuc *nextTokPtr = ptr;
614*1230fdc1SLionel Sambuc return tok;
615*1230fdc1SLionel Sambuc }
616*1230fdc1SLionel Sambuc break;
617*1230fdc1SLionel Sambuc }
618*1230fdc1SLionel Sambuc case BT_LT:
619*1230fdc1SLionel Sambuc *nextTokPtr = ptr;
620*1230fdc1SLionel Sambuc return XML_TOK_INVALID;
621*1230fdc1SLionel Sambuc default:
622*1230fdc1SLionel Sambuc ptr += MINBPC(enc);
623*1230fdc1SLionel Sambuc break;
624*1230fdc1SLionel Sambuc }
625*1230fdc1SLionel Sambuc }
626*1230fdc1SLionel Sambuc ptr += MINBPC(enc);
627*1230fdc1SLionel Sambuc if (ptr == end)
628*1230fdc1SLionel Sambuc return XML_TOK_PARTIAL;
629*1230fdc1SLionel Sambuc switch (BYTE_TYPE(enc, ptr)) {
630*1230fdc1SLionel Sambuc case BT_S:
631*1230fdc1SLionel Sambuc case BT_CR:
632*1230fdc1SLionel Sambuc case BT_LF:
633*1230fdc1SLionel Sambuc break;
634*1230fdc1SLionel Sambuc case BT_SOL:
635*1230fdc1SLionel Sambuc goto sol;
636*1230fdc1SLionel Sambuc case BT_GT:
637*1230fdc1SLionel Sambuc goto gt;
638*1230fdc1SLionel Sambuc default:
639*1230fdc1SLionel Sambuc *nextTokPtr = ptr;
640*1230fdc1SLionel Sambuc return XML_TOK_INVALID;
641*1230fdc1SLionel Sambuc }
642*1230fdc1SLionel Sambuc /* ptr points to closing quote */
643*1230fdc1SLionel Sambuc for (;;) {
644*1230fdc1SLionel Sambuc ptr += MINBPC(enc);
645*1230fdc1SLionel Sambuc if (ptr == end)
646*1230fdc1SLionel Sambuc return XML_TOK_PARTIAL;
647*1230fdc1SLionel Sambuc switch (BYTE_TYPE(enc, ptr)) {
648*1230fdc1SLionel Sambuc CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)
649*1230fdc1SLionel Sambuc case BT_S: case BT_CR: case BT_LF:
650*1230fdc1SLionel Sambuc continue;
651*1230fdc1SLionel Sambuc case BT_GT:
652*1230fdc1SLionel Sambuc gt:
653*1230fdc1SLionel Sambuc *nextTokPtr = ptr + MINBPC(enc);
654*1230fdc1SLionel Sambuc return XML_TOK_START_TAG_WITH_ATTS;
655*1230fdc1SLionel Sambuc case BT_SOL:
656*1230fdc1SLionel Sambuc sol:
657*1230fdc1SLionel Sambuc ptr += MINBPC(enc);
658*1230fdc1SLionel Sambuc if (ptr == end)
659*1230fdc1SLionel Sambuc return XML_TOK_PARTIAL;
660*1230fdc1SLionel Sambuc if (!CHAR_MATCHES(enc, ptr, ASCII_GT)) {
661*1230fdc1SLionel Sambuc *nextTokPtr = ptr;
662*1230fdc1SLionel Sambuc return XML_TOK_INVALID;
663*1230fdc1SLionel Sambuc }
664*1230fdc1SLionel Sambuc *nextTokPtr = ptr + MINBPC(enc);
665*1230fdc1SLionel Sambuc return XML_TOK_EMPTY_ELEMENT_WITH_ATTS;
666*1230fdc1SLionel Sambuc default:
667*1230fdc1SLionel Sambuc *nextTokPtr = ptr;
668*1230fdc1SLionel Sambuc return XML_TOK_INVALID;
669*1230fdc1SLionel Sambuc }
670*1230fdc1SLionel Sambuc break;
671*1230fdc1SLionel Sambuc }
672*1230fdc1SLionel Sambuc break;
673*1230fdc1SLionel Sambuc }
674*1230fdc1SLionel Sambuc default:
675*1230fdc1SLionel Sambuc *nextTokPtr = ptr;
676*1230fdc1SLionel Sambuc return XML_TOK_INVALID;
677*1230fdc1SLionel Sambuc }
678*1230fdc1SLionel Sambuc }
679*1230fdc1SLionel Sambuc return XML_TOK_PARTIAL;
680*1230fdc1SLionel Sambuc }
681*1230fdc1SLionel Sambuc
682*1230fdc1SLionel Sambuc /* ptr points to character following "<" */
683*1230fdc1SLionel Sambuc
684*1230fdc1SLionel Sambuc static int PTRCALL
PREFIX(scanLt)685*1230fdc1SLionel Sambuc PREFIX(scanLt)(const ENCODING *enc, const char *ptr, const char *end,
686*1230fdc1SLionel Sambuc const char **nextTokPtr)
687*1230fdc1SLionel Sambuc {
688*1230fdc1SLionel Sambuc #ifdef XML_NS
689*1230fdc1SLionel Sambuc int hadColon;
690*1230fdc1SLionel Sambuc #endif
691*1230fdc1SLionel Sambuc if (ptr == end)
692*1230fdc1SLionel Sambuc return XML_TOK_PARTIAL;
693*1230fdc1SLionel Sambuc switch (BYTE_TYPE(enc, ptr)) {
694*1230fdc1SLionel Sambuc CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)
695*1230fdc1SLionel Sambuc case BT_EXCL:
696*1230fdc1SLionel Sambuc if ((ptr += MINBPC(enc)) == end)
697*1230fdc1SLionel Sambuc return XML_TOK_PARTIAL;
698*1230fdc1SLionel Sambuc switch (BYTE_TYPE(enc, ptr)) {
699*1230fdc1SLionel Sambuc case BT_MINUS:
700*1230fdc1SLionel Sambuc return PREFIX(scanComment)(enc, ptr + MINBPC(enc), end, nextTokPtr);
701*1230fdc1SLionel Sambuc case BT_LSQB:
702*1230fdc1SLionel Sambuc return PREFIX(scanCdataSection)(enc, ptr + MINBPC(enc),
703*1230fdc1SLionel Sambuc end, nextTokPtr);
704*1230fdc1SLionel Sambuc }
705*1230fdc1SLionel Sambuc *nextTokPtr = ptr;
706*1230fdc1SLionel Sambuc return XML_TOK_INVALID;
707*1230fdc1SLionel Sambuc case BT_QUEST:
708*1230fdc1SLionel Sambuc return PREFIX(scanPi)(enc, ptr + MINBPC(enc), end, nextTokPtr);
709*1230fdc1SLionel Sambuc case BT_SOL:
710*1230fdc1SLionel Sambuc return PREFIX(scanEndTag)(enc, ptr + MINBPC(enc), end, nextTokPtr);
711*1230fdc1SLionel Sambuc default:
712*1230fdc1SLionel Sambuc *nextTokPtr = ptr;
713*1230fdc1SLionel Sambuc return XML_TOK_INVALID;
714*1230fdc1SLionel Sambuc }
715*1230fdc1SLionel Sambuc #ifdef XML_NS
716*1230fdc1SLionel Sambuc hadColon = 0;
717*1230fdc1SLionel Sambuc #endif
718*1230fdc1SLionel Sambuc /* we have a start-tag */
719*1230fdc1SLionel Sambuc while (ptr != end) {
720*1230fdc1SLionel Sambuc switch (BYTE_TYPE(enc, ptr)) {
721*1230fdc1SLionel Sambuc CHECK_NAME_CASES(enc, ptr, end, nextTokPtr)
722*1230fdc1SLionel Sambuc #ifdef XML_NS
723*1230fdc1SLionel Sambuc case BT_COLON:
724*1230fdc1SLionel Sambuc if (hadColon) {
725*1230fdc1SLionel Sambuc *nextTokPtr = ptr;
726*1230fdc1SLionel Sambuc return XML_TOK_INVALID;
727*1230fdc1SLionel Sambuc }
728*1230fdc1SLionel Sambuc hadColon = 1;
729*1230fdc1SLionel Sambuc ptr += MINBPC(enc);
730*1230fdc1SLionel Sambuc if (ptr == end)
731*1230fdc1SLionel Sambuc return XML_TOK_PARTIAL;
732*1230fdc1SLionel Sambuc switch (BYTE_TYPE(enc, ptr)) {
733*1230fdc1SLionel Sambuc CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)
734*1230fdc1SLionel Sambuc default:
735*1230fdc1SLionel Sambuc *nextTokPtr = ptr;
736*1230fdc1SLionel Sambuc return XML_TOK_INVALID;
737*1230fdc1SLionel Sambuc }
738*1230fdc1SLionel Sambuc break;
739*1230fdc1SLionel Sambuc #endif
740*1230fdc1SLionel Sambuc case BT_S: case BT_CR: case BT_LF:
741*1230fdc1SLionel Sambuc {
742*1230fdc1SLionel Sambuc ptr += MINBPC(enc);
743*1230fdc1SLionel Sambuc while (ptr != end) {
744*1230fdc1SLionel Sambuc switch (BYTE_TYPE(enc, ptr)) {
745*1230fdc1SLionel Sambuc CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)
746*1230fdc1SLionel Sambuc case BT_GT:
747*1230fdc1SLionel Sambuc goto gt;
748*1230fdc1SLionel Sambuc case BT_SOL:
749*1230fdc1SLionel Sambuc goto sol;
750*1230fdc1SLionel Sambuc case BT_S: case BT_CR: case BT_LF:
751*1230fdc1SLionel Sambuc ptr += MINBPC(enc);
752*1230fdc1SLionel Sambuc continue;
753*1230fdc1SLionel Sambuc default:
754*1230fdc1SLionel Sambuc *nextTokPtr = ptr;
755*1230fdc1SLionel Sambuc return XML_TOK_INVALID;
756*1230fdc1SLionel Sambuc }
757*1230fdc1SLionel Sambuc return PREFIX(scanAtts)(enc, ptr, end, nextTokPtr);
758*1230fdc1SLionel Sambuc }
759*1230fdc1SLionel Sambuc return XML_TOK_PARTIAL;
760*1230fdc1SLionel Sambuc }
761*1230fdc1SLionel Sambuc case BT_GT:
762*1230fdc1SLionel Sambuc gt:
763*1230fdc1SLionel Sambuc *nextTokPtr = ptr + MINBPC(enc);
764*1230fdc1SLionel Sambuc return XML_TOK_START_TAG_NO_ATTS;
765*1230fdc1SLionel Sambuc case BT_SOL:
766*1230fdc1SLionel Sambuc sol:
767*1230fdc1SLionel Sambuc ptr += MINBPC(enc);
768*1230fdc1SLionel Sambuc if (ptr == end)
769*1230fdc1SLionel Sambuc return XML_TOK_PARTIAL;
770*1230fdc1SLionel Sambuc if (!CHAR_MATCHES(enc, ptr, ASCII_GT)) {
771*1230fdc1SLionel Sambuc *nextTokPtr = ptr;
772*1230fdc1SLionel Sambuc return XML_TOK_INVALID;
773*1230fdc1SLionel Sambuc }
774*1230fdc1SLionel Sambuc *nextTokPtr = ptr + MINBPC(enc);
775*1230fdc1SLionel Sambuc return XML_TOK_EMPTY_ELEMENT_NO_ATTS;
776*1230fdc1SLionel Sambuc default:
777*1230fdc1SLionel Sambuc *nextTokPtr = ptr;
778*1230fdc1SLionel Sambuc return XML_TOK_INVALID;
779*1230fdc1SLionel Sambuc }
780*1230fdc1SLionel Sambuc }
781*1230fdc1SLionel Sambuc return XML_TOK_PARTIAL;
782*1230fdc1SLionel Sambuc }
783*1230fdc1SLionel Sambuc
784*1230fdc1SLionel Sambuc static int PTRCALL
PREFIX(contentTok)785*1230fdc1SLionel Sambuc PREFIX(contentTok)(const ENCODING *enc, const char *ptr, const char *end,
786*1230fdc1SLionel Sambuc const char **nextTokPtr)
787*1230fdc1SLionel Sambuc {
788*1230fdc1SLionel Sambuc if (ptr == end)
789*1230fdc1SLionel Sambuc return XML_TOK_NONE;
790*1230fdc1SLionel Sambuc if (MINBPC(enc) > 1) {
791*1230fdc1SLionel Sambuc size_t n = end - ptr;
792*1230fdc1SLionel Sambuc if (n & (MINBPC(enc) - 1)) {
793*1230fdc1SLionel Sambuc n &= ~(MINBPC(enc) - 1);
794*1230fdc1SLionel Sambuc if (n == 0)
795*1230fdc1SLionel Sambuc return XML_TOK_PARTIAL;
796*1230fdc1SLionel Sambuc end = ptr + n;
797*1230fdc1SLionel Sambuc }
798*1230fdc1SLionel Sambuc }
799*1230fdc1SLionel Sambuc switch (BYTE_TYPE(enc, ptr)) {
800*1230fdc1SLionel Sambuc case BT_LT:
801*1230fdc1SLionel Sambuc return PREFIX(scanLt)(enc, ptr + MINBPC(enc), end, nextTokPtr);
802*1230fdc1SLionel Sambuc case BT_AMP:
803*1230fdc1SLionel Sambuc return PREFIX(scanRef)(enc, ptr + MINBPC(enc), end, nextTokPtr);
804*1230fdc1SLionel Sambuc case BT_CR:
805*1230fdc1SLionel Sambuc ptr += MINBPC(enc);
806*1230fdc1SLionel Sambuc if (ptr == end)
807*1230fdc1SLionel Sambuc return XML_TOK_TRAILING_CR;
808*1230fdc1SLionel Sambuc if (BYTE_TYPE(enc, ptr) == BT_LF)
809*1230fdc1SLionel Sambuc ptr += MINBPC(enc);
810*1230fdc1SLionel Sambuc *nextTokPtr = ptr;
811*1230fdc1SLionel Sambuc return XML_TOK_DATA_NEWLINE;
812*1230fdc1SLionel Sambuc case BT_LF:
813*1230fdc1SLionel Sambuc *nextTokPtr = ptr + MINBPC(enc);
814*1230fdc1SLionel Sambuc return XML_TOK_DATA_NEWLINE;
815*1230fdc1SLionel Sambuc case BT_RSQB:
816*1230fdc1SLionel Sambuc ptr += MINBPC(enc);
817*1230fdc1SLionel Sambuc if (ptr == end)
818*1230fdc1SLionel Sambuc return XML_TOK_TRAILING_RSQB;
819*1230fdc1SLionel Sambuc if (!CHAR_MATCHES(enc, ptr, ASCII_RSQB))
820*1230fdc1SLionel Sambuc break;
821*1230fdc1SLionel Sambuc ptr += MINBPC(enc);
822*1230fdc1SLionel Sambuc if (ptr == end)
823*1230fdc1SLionel Sambuc return XML_TOK_TRAILING_RSQB;
824*1230fdc1SLionel Sambuc if (!CHAR_MATCHES(enc, ptr, ASCII_GT)) {
825*1230fdc1SLionel Sambuc ptr -= MINBPC(enc);
826*1230fdc1SLionel Sambuc break;
827*1230fdc1SLionel Sambuc }
828*1230fdc1SLionel Sambuc *nextTokPtr = ptr;
829*1230fdc1SLionel Sambuc return XML_TOK_INVALID;
830*1230fdc1SLionel Sambuc INVALID_CASES(ptr, nextTokPtr)
831*1230fdc1SLionel Sambuc default:
832*1230fdc1SLionel Sambuc ptr += MINBPC(enc);
833*1230fdc1SLionel Sambuc break;
834*1230fdc1SLionel Sambuc }
835*1230fdc1SLionel Sambuc while (ptr != end) {
836*1230fdc1SLionel Sambuc switch (BYTE_TYPE(enc, ptr)) {
837*1230fdc1SLionel Sambuc #define LEAD_CASE(n) \
838*1230fdc1SLionel Sambuc case BT_LEAD ## n: \
839*1230fdc1SLionel Sambuc if (end - ptr < n || IS_INVALID_CHAR(enc, ptr, n)) { \
840*1230fdc1SLionel Sambuc *nextTokPtr = ptr; \
841*1230fdc1SLionel Sambuc return XML_TOK_DATA_CHARS; \
842*1230fdc1SLionel Sambuc } \
843*1230fdc1SLionel Sambuc ptr += n; \
844*1230fdc1SLionel Sambuc break;
845*1230fdc1SLionel Sambuc LEAD_CASE(2) LEAD_CASE(3) LEAD_CASE(4)
846*1230fdc1SLionel Sambuc #undef LEAD_CASE
847*1230fdc1SLionel Sambuc case BT_RSQB:
848*1230fdc1SLionel Sambuc if (ptr + MINBPC(enc) != end) {
849*1230fdc1SLionel Sambuc if (!CHAR_MATCHES(enc, ptr + MINBPC(enc), ASCII_RSQB)) {
850*1230fdc1SLionel Sambuc ptr += MINBPC(enc);
851*1230fdc1SLionel Sambuc break;
852*1230fdc1SLionel Sambuc }
853*1230fdc1SLionel Sambuc if (ptr + 2*MINBPC(enc) != end) {
854*1230fdc1SLionel Sambuc if (!CHAR_MATCHES(enc, ptr + 2*MINBPC(enc), ASCII_GT)) {
855*1230fdc1SLionel Sambuc ptr += MINBPC(enc);
856*1230fdc1SLionel Sambuc break;
857*1230fdc1SLionel Sambuc }
858*1230fdc1SLionel Sambuc *nextTokPtr = ptr + 2*MINBPC(enc);
859*1230fdc1SLionel Sambuc return XML_TOK_INVALID;
860*1230fdc1SLionel Sambuc }
861*1230fdc1SLionel Sambuc }
862*1230fdc1SLionel Sambuc /* fall through */
863*1230fdc1SLionel Sambuc case BT_AMP:
864*1230fdc1SLionel Sambuc case BT_LT:
865*1230fdc1SLionel Sambuc case BT_NONXML:
866*1230fdc1SLionel Sambuc case BT_MALFORM:
867*1230fdc1SLionel Sambuc case BT_TRAIL:
868*1230fdc1SLionel Sambuc case BT_CR:
869*1230fdc1SLionel Sambuc case BT_LF:
870*1230fdc1SLionel Sambuc *nextTokPtr = ptr;
871*1230fdc1SLionel Sambuc return XML_TOK_DATA_CHARS;
872*1230fdc1SLionel Sambuc default:
873*1230fdc1SLionel Sambuc ptr += MINBPC(enc);
874*1230fdc1SLionel Sambuc break;
875*1230fdc1SLionel Sambuc }
876*1230fdc1SLionel Sambuc }
877*1230fdc1SLionel Sambuc *nextTokPtr = ptr;
878*1230fdc1SLionel Sambuc return XML_TOK_DATA_CHARS;
879*1230fdc1SLionel Sambuc }
880*1230fdc1SLionel Sambuc
881*1230fdc1SLionel Sambuc /* ptr points to character following "%" */
882*1230fdc1SLionel Sambuc
883*1230fdc1SLionel Sambuc static int PTRCALL
PREFIX(scanPercent)884*1230fdc1SLionel Sambuc PREFIX(scanPercent)(const ENCODING *enc, const char *ptr, const char *end,
885*1230fdc1SLionel Sambuc const char **nextTokPtr)
886*1230fdc1SLionel Sambuc {
887*1230fdc1SLionel Sambuc if (ptr == end)
888*1230fdc1SLionel Sambuc return XML_TOK_PARTIAL;
889*1230fdc1SLionel Sambuc switch (BYTE_TYPE(enc, ptr)) {
890*1230fdc1SLionel Sambuc CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)
891*1230fdc1SLionel Sambuc case BT_S: case BT_LF: case BT_CR: case BT_PERCNT:
892*1230fdc1SLionel Sambuc *nextTokPtr = ptr;
893*1230fdc1SLionel Sambuc return XML_TOK_PERCENT;
894*1230fdc1SLionel Sambuc default:
895*1230fdc1SLionel Sambuc *nextTokPtr = ptr;
896*1230fdc1SLionel Sambuc return XML_TOK_INVALID;
897*1230fdc1SLionel Sambuc }
898*1230fdc1SLionel Sambuc while (ptr != end) {
899*1230fdc1SLionel Sambuc switch (BYTE_TYPE(enc, ptr)) {
900*1230fdc1SLionel Sambuc CHECK_NAME_CASES(enc, ptr, end, nextTokPtr)
901*1230fdc1SLionel Sambuc case BT_SEMI:
902*1230fdc1SLionel Sambuc *nextTokPtr = ptr + MINBPC(enc);
903*1230fdc1SLionel Sambuc return XML_TOK_PARAM_ENTITY_REF;
904*1230fdc1SLionel Sambuc default:
905*1230fdc1SLionel Sambuc *nextTokPtr = ptr;
906*1230fdc1SLionel Sambuc return XML_TOK_INVALID;
907*1230fdc1SLionel Sambuc }
908*1230fdc1SLionel Sambuc }
909*1230fdc1SLionel Sambuc return XML_TOK_PARTIAL;
910*1230fdc1SLionel Sambuc }
911*1230fdc1SLionel Sambuc
912*1230fdc1SLionel Sambuc static int PTRCALL
PREFIX(scanPoundName)913*1230fdc1SLionel Sambuc PREFIX(scanPoundName)(const ENCODING *enc, const char *ptr, const char *end,
914*1230fdc1SLionel Sambuc const char **nextTokPtr)
915*1230fdc1SLionel Sambuc {
916*1230fdc1SLionel Sambuc if (ptr == end)
917*1230fdc1SLionel Sambuc return XML_TOK_PARTIAL;
918*1230fdc1SLionel Sambuc switch (BYTE_TYPE(enc, ptr)) {
919*1230fdc1SLionel Sambuc CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)
920*1230fdc1SLionel Sambuc default:
921*1230fdc1SLionel Sambuc *nextTokPtr = ptr;
922*1230fdc1SLionel Sambuc return XML_TOK_INVALID;
923*1230fdc1SLionel Sambuc }
924*1230fdc1SLionel Sambuc while (ptr != end) {
925*1230fdc1SLionel Sambuc switch (BYTE_TYPE(enc, ptr)) {
926*1230fdc1SLionel Sambuc CHECK_NAME_CASES(enc, ptr, end, nextTokPtr)
927*1230fdc1SLionel Sambuc case BT_CR: case BT_LF: case BT_S:
928*1230fdc1SLionel Sambuc case BT_RPAR: case BT_GT: case BT_PERCNT: case BT_VERBAR:
929*1230fdc1SLionel Sambuc *nextTokPtr = ptr;
930*1230fdc1SLionel Sambuc return XML_TOK_POUND_NAME;
931*1230fdc1SLionel Sambuc default:
932*1230fdc1SLionel Sambuc *nextTokPtr = ptr;
933*1230fdc1SLionel Sambuc return XML_TOK_INVALID;
934*1230fdc1SLionel Sambuc }
935*1230fdc1SLionel Sambuc }
936*1230fdc1SLionel Sambuc return -XML_TOK_POUND_NAME;
937*1230fdc1SLionel Sambuc }
938*1230fdc1SLionel Sambuc
939*1230fdc1SLionel Sambuc static int PTRCALL
PREFIX(scanLit)940*1230fdc1SLionel Sambuc PREFIX(scanLit)(int open, const ENCODING *enc,
941*1230fdc1SLionel Sambuc const char *ptr, const char *end,
942*1230fdc1SLionel Sambuc const char **nextTokPtr)
943*1230fdc1SLionel Sambuc {
944*1230fdc1SLionel Sambuc while (ptr != end) {
945*1230fdc1SLionel Sambuc int t = BYTE_TYPE(enc, ptr);
946*1230fdc1SLionel Sambuc switch (t) {
947*1230fdc1SLionel Sambuc INVALID_CASES(ptr, nextTokPtr)
948*1230fdc1SLionel Sambuc case BT_QUOT:
949*1230fdc1SLionel Sambuc case BT_APOS:
950*1230fdc1SLionel Sambuc ptr += MINBPC(enc);
951*1230fdc1SLionel Sambuc if (t != open)
952*1230fdc1SLionel Sambuc break;
953*1230fdc1SLionel Sambuc if (ptr == end)
954*1230fdc1SLionel Sambuc return -XML_TOK_LITERAL;
955*1230fdc1SLionel Sambuc *nextTokPtr = ptr;
956*1230fdc1SLionel Sambuc switch (BYTE_TYPE(enc, ptr)) {
957*1230fdc1SLionel Sambuc case BT_S: case BT_CR: case BT_LF:
958*1230fdc1SLionel Sambuc case BT_GT: case BT_PERCNT: case BT_LSQB:
959*1230fdc1SLionel Sambuc return XML_TOK_LITERAL;
960*1230fdc1SLionel Sambuc default:
961*1230fdc1SLionel Sambuc return XML_TOK_INVALID;
962*1230fdc1SLionel Sambuc }
963*1230fdc1SLionel Sambuc default:
964*1230fdc1SLionel Sambuc ptr += MINBPC(enc);
965*1230fdc1SLionel Sambuc break;
966*1230fdc1SLionel Sambuc }
967*1230fdc1SLionel Sambuc }
968*1230fdc1SLionel Sambuc return XML_TOK_PARTIAL;
969*1230fdc1SLionel Sambuc }
970*1230fdc1SLionel Sambuc
971*1230fdc1SLionel Sambuc static int PTRCALL
PREFIX(prologTok)972*1230fdc1SLionel Sambuc PREFIX(prologTok)(const ENCODING *enc, const char *ptr, const char *end,
973*1230fdc1SLionel Sambuc const char **nextTokPtr)
974*1230fdc1SLionel Sambuc {
975*1230fdc1SLionel Sambuc int tok;
976*1230fdc1SLionel Sambuc if (ptr == end)
977*1230fdc1SLionel Sambuc return XML_TOK_NONE;
978*1230fdc1SLionel Sambuc if (MINBPC(enc) > 1) {
979*1230fdc1SLionel Sambuc size_t n = end - ptr;
980*1230fdc1SLionel Sambuc if (n & (MINBPC(enc) - 1)) {
981*1230fdc1SLionel Sambuc n &= ~(MINBPC(enc) - 1);
982*1230fdc1SLionel Sambuc if (n == 0)
983*1230fdc1SLionel Sambuc return XML_TOK_PARTIAL;
984*1230fdc1SLionel Sambuc end = ptr + n;
985*1230fdc1SLionel Sambuc }
986*1230fdc1SLionel Sambuc }
987*1230fdc1SLionel Sambuc switch (BYTE_TYPE(enc, ptr)) {
988*1230fdc1SLionel Sambuc case BT_QUOT:
989*1230fdc1SLionel Sambuc return PREFIX(scanLit)(BT_QUOT, enc, ptr + MINBPC(enc), end, nextTokPtr);
990*1230fdc1SLionel Sambuc case BT_APOS:
991*1230fdc1SLionel Sambuc return PREFIX(scanLit)(BT_APOS, enc, ptr + MINBPC(enc), end, nextTokPtr);
992*1230fdc1SLionel Sambuc case BT_LT:
993*1230fdc1SLionel Sambuc {
994*1230fdc1SLionel Sambuc ptr += MINBPC(enc);
995*1230fdc1SLionel Sambuc if (ptr == end)
996*1230fdc1SLionel Sambuc return XML_TOK_PARTIAL;
997*1230fdc1SLionel Sambuc switch (BYTE_TYPE(enc, ptr)) {
998*1230fdc1SLionel Sambuc case BT_EXCL:
999*1230fdc1SLionel Sambuc return PREFIX(scanDecl)(enc, ptr + MINBPC(enc), end, nextTokPtr);
1000*1230fdc1SLionel Sambuc case BT_QUEST:
1001*1230fdc1SLionel Sambuc return PREFIX(scanPi)(enc, ptr + MINBPC(enc), end, nextTokPtr);
1002*1230fdc1SLionel Sambuc case BT_NMSTRT:
1003*1230fdc1SLionel Sambuc case BT_HEX:
1004*1230fdc1SLionel Sambuc case BT_NONASCII:
1005*1230fdc1SLionel Sambuc case BT_LEAD2:
1006*1230fdc1SLionel Sambuc case BT_LEAD3:
1007*1230fdc1SLionel Sambuc case BT_LEAD4:
1008*1230fdc1SLionel Sambuc *nextTokPtr = ptr - MINBPC(enc);
1009*1230fdc1SLionel Sambuc return XML_TOK_INSTANCE_START;
1010*1230fdc1SLionel Sambuc }
1011*1230fdc1SLionel Sambuc *nextTokPtr = ptr;
1012*1230fdc1SLionel Sambuc return XML_TOK_INVALID;
1013*1230fdc1SLionel Sambuc }
1014*1230fdc1SLionel Sambuc case BT_CR:
1015*1230fdc1SLionel Sambuc if (ptr + MINBPC(enc) == end) {
1016*1230fdc1SLionel Sambuc *nextTokPtr = end;
1017*1230fdc1SLionel Sambuc /* indicate that this might be part of a CR/LF pair */
1018*1230fdc1SLionel Sambuc return -XML_TOK_PROLOG_S;
1019*1230fdc1SLionel Sambuc }
1020*1230fdc1SLionel Sambuc /* fall through */
1021*1230fdc1SLionel Sambuc case BT_S: case BT_LF:
1022*1230fdc1SLionel Sambuc for (;;) {
1023*1230fdc1SLionel Sambuc ptr += MINBPC(enc);
1024*1230fdc1SLionel Sambuc if (ptr == end)
1025*1230fdc1SLionel Sambuc break;
1026*1230fdc1SLionel Sambuc switch (BYTE_TYPE(enc, ptr)) {
1027*1230fdc1SLionel Sambuc case BT_S: case BT_LF:
1028*1230fdc1SLionel Sambuc break;
1029*1230fdc1SLionel Sambuc case BT_CR:
1030*1230fdc1SLionel Sambuc /* don't split CR/LF pair */
1031*1230fdc1SLionel Sambuc if (ptr + MINBPC(enc) != end)
1032*1230fdc1SLionel Sambuc break;
1033*1230fdc1SLionel Sambuc /* fall through */
1034*1230fdc1SLionel Sambuc default:
1035*1230fdc1SLionel Sambuc *nextTokPtr = ptr;
1036*1230fdc1SLionel Sambuc return XML_TOK_PROLOG_S;
1037*1230fdc1SLionel Sambuc }
1038*1230fdc1SLionel Sambuc }
1039*1230fdc1SLionel Sambuc *nextTokPtr = ptr;
1040*1230fdc1SLionel Sambuc return XML_TOK_PROLOG_S;
1041*1230fdc1SLionel Sambuc case BT_PERCNT:
1042*1230fdc1SLionel Sambuc return PREFIX(scanPercent)(enc, ptr + MINBPC(enc), end, nextTokPtr);
1043*1230fdc1SLionel Sambuc case BT_COMMA:
1044*1230fdc1SLionel Sambuc *nextTokPtr = ptr + MINBPC(enc);
1045*1230fdc1SLionel Sambuc return XML_TOK_COMMA;
1046*1230fdc1SLionel Sambuc case BT_LSQB:
1047*1230fdc1SLionel Sambuc *nextTokPtr = ptr + MINBPC(enc);
1048*1230fdc1SLionel Sambuc return XML_TOK_OPEN_BRACKET;
1049*1230fdc1SLionel Sambuc case BT_RSQB:
1050*1230fdc1SLionel Sambuc ptr += MINBPC(enc);
1051*1230fdc1SLionel Sambuc if (ptr == end)
1052*1230fdc1SLionel Sambuc return -XML_TOK_CLOSE_BRACKET;
1053*1230fdc1SLionel Sambuc if (CHAR_MATCHES(enc, ptr, ASCII_RSQB)) {
1054*1230fdc1SLionel Sambuc if (ptr + MINBPC(enc) == end)
1055*1230fdc1SLionel Sambuc return XML_TOK_PARTIAL;
1056*1230fdc1SLionel Sambuc if (CHAR_MATCHES(enc, ptr + MINBPC(enc), ASCII_GT)) {
1057*1230fdc1SLionel Sambuc *nextTokPtr = ptr + 2*MINBPC(enc);
1058*1230fdc1SLionel Sambuc return XML_TOK_COND_SECT_CLOSE;
1059*1230fdc1SLionel Sambuc }
1060*1230fdc1SLionel Sambuc }
1061*1230fdc1SLionel Sambuc *nextTokPtr = ptr;
1062*1230fdc1SLionel Sambuc return XML_TOK_CLOSE_BRACKET;
1063*1230fdc1SLionel Sambuc case BT_LPAR:
1064*1230fdc1SLionel Sambuc *nextTokPtr = ptr + MINBPC(enc);
1065*1230fdc1SLionel Sambuc return XML_TOK_OPEN_PAREN;
1066*1230fdc1SLionel Sambuc case BT_RPAR:
1067*1230fdc1SLionel Sambuc ptr += MINBPC(enc);
1068*1230fdc1SLionel Sambuc if (ptr == end)
1069*1230fdc1SLionel Sambuc return -XML_TOK_CLOSE_PAREN;
1070*1230fdc1SLionel Sambuc switch (BYTE_TYPE(enc, ptr)) {
1071*1230fdc1SLionel Sambuc case BT_AST:
1072*1230fdc1SLionel Sambuc *nextTokPtr = ptr + MINBPC(enc);
1073*1230fdc1SLionel Sambuc return XML_TOK_CLOSE_PAREN_ASTERISK;
1074*1230fdc1SLionel Sambuc case BT_QUEST:
1075*1230fdc1SLionel Sambuc *nextTokPtr = ptr + MINBPC(enc);
1076*1230fdc1SLionel Sambuc return XML_TOK_CLOSE_PAREN_QUESTION;
1077*1230fdc1SLionel Sambuc case BT_PLUS:
1078*1230fdc1SLionel Sambuc *nextTokPtr = ptr + MINBPC(enc);
1079*1230fdc1SLionel Sambuc return XML_TOK_CLOSE_PAREN_PLUS;
1080*1230fdc1SLionel Sambuc case BT_CR: case BT_LF: case BT_S:
1081*1230fdc1SLionel Sambuc case BT_GT: case BT_COMMA: case BT_VERBAR:
1082*1230fdc1SLionel Sambuc case BT_RPAR:
1083*1230fdc1SLionel Sambuc *nextTokPtr = ptr;
1084*1230fdc1SLionel Sambuc return XML_TOK_CLOSE_PAREN;
1085*1230fdc1SLionel Sambuc }
1086*1230fdc1SLionel Sambuc *nextTokPtr = ptr;
1087*1230fdc1SLionel Sambuc return XML_TOK_INVALID;
1088*1230fdc1SLionel Sambuc case BT_VERBAR:
1089*1230fdc1SLionel Sambuc *nextTokPtr = ptr + MINBPC(enc);
1090*1230fdc1SLionel Sambuc return XML_TOK_OR;
1091*1230fdc1SLionel Sambuc case BT_GT:
1092*1230fdc1SLionel Sambuc *nextTokPtr = ptr + MINBPC(enc);
1093*1230fdc1SLionel Sambuc return XML_TOK_DECL_CLOSE;
1094*1230fdc1SLionel Sambuc case BT_NUM:
1095*1230fdc1SLionel Sambuc return PREFIX(scanPoundName)(enc, ptr + MINBPC(enc), end, nextTokPtr);
1096*1230fdc1SLionel Sambuc #define LEAD_CASE(n) \
1097*1230fdc1SLionel Sambuc case BT_LEAD ## n: \
1098*1230fdc1SLionel Sambuc if (end - ptr < n) \
1099*1230fdc1SLionel Sambuc return XML_TOK_PARTIAL_CHAR; \
1100*1230fdc1SLionel Sambuc if (IS_NMSTRT_CHAR(enc, ptr, n)) { \
1101*1230fdc1SLionel Sambuc ptr += n; \
1102*1230fdc1SLionel Sambuc tok = XML_TOK_NAME; \
1103*1230fdc1SLionel Sambuc break; \
1104*1230fdc1SLionel Sambuc } \
1105*1230fdc1SLionel Sambuc if (IS_NAME_CHAR(enc, ptr, n)) { \
1106*1230fdc1SLionel Sambuc ptr += n; \
1107*1230fdc1SLionel Sambuc tok = XML_TOK_NMTOKEN; \
1108*1230fdc1SLionel Sambuc break; \
1109*1230fdc1SLionel Sambuc } \
1110*1230fdc1SLionel Sambuc *nextTokPtr = ptr; \
1111*1230fdc1SLionel Sambuc return XML_TOK_INVALID;
1112*1230fdc1SLionel Sambuc LEAD_CASE(2) LEAD_CASE(3) LEAD_CASE(4)
1113*1230fdc1SLionel Sambuc #undef LEAD_CASE
1114*1230fdc1SLionel Sambuc case BT_NMSTRT:
1115*1230fdc1SLionel Sambuc case BT_HEX:
1116*1230fdc1SLionel Sambuc tok = XML_TOK_NAME;
1117*1230fdc1SLionel Sambuc ptr += MINBPC(enc);
1118*1230fdc1SLionel Sambuc break;
1119*1230fdc1SLionel Sambuc case BT_DIGIT:
1120*1230fdc1SLionel Sambuc case BT_NAME:
1121*1230fdc1SLionel Sambuc case BT_MINUS:
1122*1230fdc1SLionel Sambuc #ifdef XML_NS
1123*1230fdc1SLionel Sambuc case BT_COLON:
1124*1230fdc1SLionel Sambuc #endif
1125*1230fdc1SLionel Sambuc tok = XML_TOK_NMTOKEN;
1126*1230fdc1SLionel Sambuc ptr += MINBPC(enc);
1127*1230fdc1SLionel Sambuc break;
1128*1230fdc1SLionel Sambuc case BT_NONASCII:
1129*1230fdc1SLionel Sambuc if (IS_NMSTRT_CHAR_MINBPC(enc, ptr)) {
1130*1230fdc1SLionel Sambuc ptr += MINBPC(enc);
1131*1230fdc1SLionel Sambuc tok = XML_TOK_NAME;
1132*1230fdc1SLionel Sambuc break;
1133*1230fdc1SLionel Sambuc }
1134*1230fdc1SLionel Sambuc if (IS_NAME_CHAR_MINBPC(enc, ptr)) {
1135*1230fdc1SLionel Sambuc ptr += MINBPC(enc);
1136*1230fdc1SLionel Sambuc tok = XML_TOK_NMTOKEN;
1137*1230fdc1SLionel Sambuc break;
1138*1230fdc1SLionel Sambuc }
1139*1230fdc1SLionel Sambuc /* fall through */
1140*1230fdc1SLionel Sambuc default:
1141*1230fdc1SLionel Sambuc *nextTokPtr = ptr;
1142*1230fdc1SLionel Sambuc return XML_TOK_INVALID;
1143*1230fdc1SLionel Sambuc }
1144*1230fdc1SLionel Sambuc while (ptr != end) {
1145*1230fdc1SLionel Sambuc switch (BYTE_TYPE(enc, ptr)) {
1146*1230fdc1SLionel Sambuc CHECK_NAME_CASES(enc, ptr, end, nextTokPtr)
1147*1230fdc1SLionel Sambuc case BT_GT: case BT_RPAR: case BT_COMMA:
1148*1230fdc1SLionel Sambuc case BT_VERBAR: case BT_LSQB: case BT_PERCNT:
1149*1230fdc1SLionel Sambuc case BT_S: case BT_CR: case BT_LF:
1150*1230fdc1SLionel Sambuc *nextTokPtr = ptr;
1151*1230fdc1SLionel Sambuc return tok;
1152*1230fdc1SLionel Sambuc #ifdef XML_NS
1153*1230fdc1SLionel Sambuc case BT_COLON:
1154*1230fdc1SLionel Sambuc ptr += MINBPC(enc);
1155*1230fdc1SLionel Sambuc switch (tok) {
1156*1230fdc1SLionel Sambuc case XML_TOK_NAME:
1157*1230fdc1SLionel Sambuc if (ptr == end)
1158*1230fdc1SLionel Sambuc return XML_TOK_PARTIAL;
1159*1230fdc1SLionel Sambuc tok = XML_TOK_PREFIXED_NAME;
1160*1230fdc1SLionel Sambuc switch (BYTE_TYPE(enc, ptr)) {
1161*1230fdc1SLionel Sambuc CHECK_NAME_CASES(enc, ptr, end, nextTokPtr)
1162*1230fdc1SLionel Sambuc default:
1163*1230fdc1SLionel Sambuc tok = XML_TOK_NMTOKEN;
1164*1230fdc1SLionel Sambuc break;
1165*1230fdc1SLionel Sambuc }
1166*1230fdc1SLionel Sambuc break;
1167*1230fdc1SLionel Sambuc case XML_TOK_PREFIXED_NAME:
1168*1230fdc1SLionel Sambuc tok = XML_TOK_NMTOKEN;
1169*1230fdc1SLionel Sambuc break;
1170*1230fdc1SLionel Sambuc }
1171*1230fdc1SLionel Sambuc break;
1172*1230fdc1SLionel Sambuc #endif
1173*1230fdc1SLionel Sambuc case BT_PLUS:
1174*1230fdc1SLionel Sambuc if (tok == XML_TOK_NMTOKEN) {
1175*1230fdc1SLionel Sambuc *nextTokPtr = ptr;
1176*1230fdc1SLionel Sambuc return XML_TOK_INVALID;
1177*1230fdc1SLionel Sambuc }
1178*1230fdc1SLionel Sambuc *nextTokPtr = ptr + MINBPC(enc);
1179*1230fdc1SLionel Sambuc return XML_TOK_NAME_PLUS;
1180*1230fdc1SLionel Sambuc case BT_AST:
1181*1230fdc1SLionel Sambuc if (tok == XML_TOK_NMTOKEN) {
1182*1230fdc1SLionel Sambuc *nextTokPtr = ptr;
1183*1230fdc1SLionel Sambuc return XML_TOK_INVALID;
1184*1230fdc1SLionel Sambuc }
1185*1230fdc1SLionel Sambuc *nextTokPtr = ptr + MINBPC(enc);
1186*1230fdc1SLionel Sambuc return XML_TOK_NAME_ASTERISK;
1187*1230fdc1SLionel Sambuc case BT_QUEST:
1188*1230fdc1SLionel Sambuc if (tok == XML_TOK_NMTOKEN) {
1189*1230fdc1SLionel Sambuc *nextTokPtr = ptr;
1190*1230fdc1SLionel Sambuc return XML_TOK_INVALID;
1191*1230fdc1SLionel Sambuc }
1192*1230fdc1SLionel Sambuc *nextTokPtr = ptr + MINBPC(enc);
1193*1230fdc1SLionel Sambuc return XML_TOK_NAME_QUESTION;
1194*1230fdc1SLionel Sambuc default:
1195*1230fdc1SLionel Sambuc *nextTokPtr = ptr;
1196*1230fdc1SLionel Sambuc return XML_TOK_INVALID;
1197*1230fdc1SLionel Sambuc }
1198*1230fdc1SLionel Sambuc }
1199*1230fdc1SLionel Sambuc return -tok;
1200*1230fdc1SLionel Sambuc }
1201*1230fdc1SLionel Sambuc
1202*1230fdc1SLionel Sambuc static int PTRCALL
PREFIX(attributeValueTok)1203*1230fdc1SLionel Sambuc PREFIX(attributeValueTok)(const ENCODING *enc, const char *ptr,
1204*1230fdc1SLionel Sambuc const char *end, const char **nextTokPtr)
1205*1230fdc1SLionel Sambuc {
1206*1230fdc1SLionel Sambuc const char *start;
1207*1230fdc1SLionel Sambuc if (ptr == end)
1208*1230fdc1SLionel Sambuc return XML_TOK_NONE;
1209*1230fdc1SLionel Sambuc start = ptr;
1210*1230fdc1SLionel Sambuc while (ptr != end) {
1211*1230fdc1SLionel Sambuc switch (BYTE_TYPE(enc, ptr)) {
1212*1230fdc1SLionel Sambuc #define LEAD_CASE(n) \
1213*1230fdc1SLionel Sambuc case BT_LEAD ## n: ptr += n; break;
1214*1230fdc1SLionel Sambuc LEAD_CASE(2) LEAD_CASE(3) LEAD_CASE(4)
1215*1230fdc1SLionel Sambuc #undef LEAD_CASE
1216*1230fdc1SLionel Sambuc case BT_AMP:
1217*1230fdc1SLionel Sambuc if (ptr == start)
1218*1230fdc1SLionel Sambuc return PREFIX(scanRef)(enc, ptr + MINBPC(enc), end, nextTokPtr);
1219*1230fdc1SLionel Sambuc *nextTokPtr = ptr;
1220*1230fdc1SLionel Sambuc return XML_TOK_DATA_CHARS;
1221*1230fdc1SLionel Sambuc case BT_LT:
1222*1230fdc1SLionel Sambuc /* this is for inside entity references */
1223*1230fdc1SLionel Sambuc *nextTokPtr = ptr;
1224*1230fdc1SLionel Sambuc return XML_TOK_INVALID;
1225*1230fdc1SLionel Sambuc case BT_LF:
1226*1230fdc1SLionel Sambuc if (ptr == start) {
1227*1230fdc1SLionel Sambuc *nextTokPtr = ptr + MINBPC(enc);
1228*1230fdc1SLionel Sambuc return XML_TOK_DATA_NEWLINE;
1229*1230fdc1SLionel Sambuc }
1230*1230fdc1SLionel Sambuc *nextTokPtr = ptr;
1231*1230fdc1SLionel Sambuc return XML_TOK_DATA_CHARS;
1232*1230fdc1SLionel Sambuc case BT_CR:
1233*1230fdc1SLionel Sambuc if (ptr == start) {
1234*1230fdc1SLionel Sambuc ptr += MINBPC(enc);
1235*1230fdc1SLionel Sambuc if (ptr == end)
1236*1230fdc1SLionel Sambuc return XML_TOK_TRAILING_CR;
1237*1230fdc1SLionel Sambuc if (BYTE_TYPE(enc, ptr) == BT_LF)
1238*1230fdc1SLionel Sambuc ptr += MINBPC(enc);
1239*1230fdc1SLionel Sambuc *nextTokPtr = ptr;
1240*1230fdc1SLionel Sambuc return XML_TOK_DATA_NEWLINE;
1241*1230fdc1SLionel Sambuc }
1242*1230fdc1SLionel Sambuc *nextTokPtr = ptr;
1243*1230fdc1SLionel Sambuc return XML_TOK_DATA_CHARS;
1244*1230fdc1SLionel Sambuc case BT_S:
1245*1230fdc1SLionel Sambuc if (ptr == start) {
1246*1230fdc1SLionel Sambuc *nextTokPtr = ptr + MINBPC(enc);
1247*1230fdc1SLionel Sambuc return XML_TOK_ATTRIBUTE_VALUE_S;
1248*1230fdc1SLionel Sambuc }
1249*1230fdc1SLionel Sambuc *nextTokPtr = ptr;
1250*1230fdc1SLionel Sambuc return XML_TOK_DATA_CHARS;
1251*1230fdc1SLionel Sambuc default:
1252*1230fdc1SLionel Sambuc ptr += MINBPC(enc);
1253*1230fdc1SLionel Sambuc break;
1254*1230fdc1SLionel Sambuc }
1255*1230fdc1SLionel Sambuc }
1256*1230fdc1SLionel Sambuc *nextTokPtr = ptr;
1257*1230fdc1SLionel Sambuc return XML_TOK_DATA_CHARS;
1258*1230fdc1SLionel Sambuc }
1259*1230fdc1SLionel Sambuc
1260*1230fdc1SLionel Sambuc static int PTRCALL
PREFIX(entityValueTok)1261*1230fdc1SLionel Sambuc PREFIX(entityValueTok)(const ENCODING *enc, const char *ptr,
1262*1230fdc1SLionel Sambuc const char *end, const char **nextTokPtr)
1263*1230fdc1SLionel Sambuc {
1264*1230fdc1SLionel Sambuc const char *start;
1265*1230fdc1SLionel Sambuc if (ptr == end)
1266*1230fdc1SLionel Sambuc return XML_TOK_NONE;
1267*1230fdc1SLionel Sambuc start = ptr;
1268*1230fdc1SLionel Sambuc while (ptr != end) {
1269*1230fdc1SLionel Sambuc switch (BYTE_TYPE(enc, ptr)) {
1270*1230fdc1SLionel Sambuc #define LEAD_CASE(n) \
1271*1230fdc1SLionel Sambuc case BT_LEAD ## n: ptr += n; break;
1272*1230fdc1SLionel Sambuc LEAD_CASE(2) LEAD_CASE(3) LEAD_CASE(4)
1273*1230fdc1SLionel Sambuc #undef LEAD_CASE
1274*1230fdc1SLionel Sambuc case BT_AMP:
1275*1230fdc1SLionel Sambuc if (ptr == start)
1276*1230fdc1SLionel Sambuc return PREFIX(scanRef)(enc, ptr + MINBPC(enc), end, nextTokPtr);
1277*1230fdc1SLionel Sambuc *nextTokPtr = ptr;
1278*1230fdc1SLionel Sambuc return XML_TOK_DATA_CHARS;
1279*1230fdc1SLionel Sambuc case BT_PERCNT:
1280*1230fdc1SLionel Sambuc if (ptr == start) {
1281*1230fdc1SLionel Sambuc int tok = PREFIX(scanPercent)(enc, ptr + MINBPC(enc),
1282*1230fdc1SLionel Sambuc end, nextTokPtr);
1283*1230fdc1SLionel Sambuc return (tok == XML_TOK_PERCENT) ? XML_TOK_INVALID : tok;
1284*1230fdc1SLionel Sambuc }
1285*1230fdc1SLionel Sambuc *nextTokPtr = ptr;
1286*1230fdc1SLionel Sambuc return XML_TOK_DATA_CHARS;
1287*1230fdc1SLionel Sambuc case BT_LF:
1288*1230fdc1SLionel Sambuc if (ptr == start) {
1289*1230fdc1SLionel Sambuc *nextTokPtr = ptr + MINBPC(enc);
1290*1230fdc1SLionel Sambuc return XML_TOK_DATA_NEWLINE;
1291*1230fdc1SLionel Sambuc }
1292*1230fdc1SLionel Sambuc *nextTokPtr = ptr;
1293*1230fdc1SLionel Sambuc return XML_TOK_DATA_CHARS;
1294*1230fdc1SLionel Sambuc case BT_CR:
1295*1230fdc1SLionel Sambuc if (ptr == start) {
1296*1230fdc1SLionel Sambuc ptr += MINBPC(enc);
1297*1230fdc1SLionel Sambuc if (ptr == end)
1298*1230fdc1SLionel Sambuc return XML_TOK_TRAILING_CR;
1299*1230fdc1SLionel Sambuc if (BYTE_TYPE(enc, ptr) == BT_LF)
1300*1230fdc1SLionel Sambuc ptr += MINBPC(enc);
1301*1230fdc1SLionel Sambuc *nextTokPtr = ptr;
1302*1230fdc1SLionel Sambuc return XML_TOK_DATA_NEWLINE;
1303*1230fdc1SLionel Sambuc }
1304*1230fdc1SLionel Sambuc *nextTokPtr = ptr;
1305*1230fdc1SLionel Sambuc return XML_TOK_DATA_CHARS;
1306*1230fdc1SLionel Sambuc default:
1307*1230fdc1SLionel Sambuc ptr += MINBPC(enc);
1308*1230fdc1SLionel Sambuc break;
1309*1230fdc1SLionel Sambuc }
1310*1230fdc1SLionel Sambuc }
1311*1230fdc1SLionel Sambuc *nextTokPtr = ptr;
1312*1230fdc1SLionel Sambuc return XML_TOK_DATA_CHARS;
1313*1230fdc1SLionel Sambuc }
1314*1230fdc1SLionel Sambuc
1315*1230fdc1SLionel Sambuc #ifdef XML_DTD
1316*1230fdc1SLionel Sambuc
1317*1230fdc1SLionel Sambuc static int PTRCALL
PREFIX(ignoreSectionTok)1318*1230fdc1SLionel Sambuc PREFIX(ignoreSectionTok)(const ENCODING *enc, const char *ptr,
1319*1230fdc1SLionel Sambuc const char *end, const char **nextTokPtr)
1320*1230fdc1SLionel Sambuc {
1321*1230fdc1SLionel Sambuc int level = 0;
1322*1230fdc1SLionel Sambuc if (MINBPC(enc) > 1) {
1323*1230fdc1SLionel Sambuc size_t n = end - ptr;
1324*1230fdc1SLionel Sambuc if (n & (MINBPC(enc) - 1)) {
1325*1230fdc1SLionel Sambuc n &= ~(MINBPC(enc) - 1);
1326*1230fdc1SLionel Sambuc end = ptr + n;
1327*1230fdc1SLionel Sambuc }
1328*1230fdc1SLionel Sambuc }
1329*1230fdc1SLionel Sambuc while (ptr != end) {
1330*1230fdc1SLionel Sambuc switch (BYTE_TYPE(enc, ptr)) {
1331*1230fdc1SLionel Sambuc INVALID_CASES(ptr, nextTokPtr)
1332*1230fdc1SLionel Sambuc case BT_LT:
1333*1230fdc1SLionel Sambuc if ((ptr += MINBPC(enc)) == end)
1334*1230fdc1SLionel Sambuc return XML_TOK_PARTIAL;
1335*1230fdc1SLionel Sambuc if (CHAR_MATCHES(enc, ptr, ASCII_EXCL)) {
1336*1230fdc1SLionel Sambuc if ((ptr += MINBPC(enc)) == end)
1337*1230fdc1SLionel Sambuc return XML_TOK_PARTIAL;
1338*1230fdc1SLionel Sambuc if (CHAR_MATCHES(enc, ptr, ASCII_LSQB)) {
1339*1230fdc1SLionel Sambuc ++level;
1340*1230fdc1SLionel Sambuc ptr += MINBPC(enc);
1341*1230fdc1SLionel Sambuc }
1342*1230fdc1SLionel Sambuc }
1343*1230fdc1SLionel Sambuc break;
1344*1230fdc1SLionel Sambuc case BT_RSQB:
1345*1230fdc1SLionel Sambuc if ((ptr += MINBPC(enc)) == end)
1346*1230fdc1SLionel Sambuc return XML_TOK_PARTIAL;
1347*1230fdc1SLionel Sambuc if (CHAR_MATCHES(enc, ptr, ASCII_RSQB)) {
1348*1230fdc1SLionel Sambuc if ((ptr += MINBPC(enc)) == end)
1349*1230fdc1SLionel Sambuc return XML_TOK_PARTIAL;
1350*1230fdc1SLionel Sambuc if (CHAR_MATCHES(enc, ptr, ASCII_GT)) {
1351*1230fdc1SLionel Sambuc ptr += MINBPC(enc);
1352*1230fdc1SLionel Sambuc if (level == 0) {
1353*1230fdc1SLionel Sambuc *nextTokPtr = ptr;
1354*1230fdc1SLionel Sambuc return XML_TOK_IGNORE_SECT;
1355*1230fdc1SLionel Sambuc }
1356*1230fdc1SLionel Sambuc --level;
1357*1230fdc1SLionel Sambuc }
1358*1230fdc1SLionel Sambuc }
1359*1230fdc1SLionel Sambuc break;
1360*1230fdc1SLionel Sambuc default:
1361*1230fdc1SLionel Sambuc ptr += MINBPC(enc);
1362*1230fdc1SLionel Sambuc break;
1363*1230fdc1SLionel Sambuc }
1364*1230fdc1SLionel Sambuc }
1365*1230fdc1SLionel Sambuc return XML_TOK_PARTIAL;
1366*1230fdc1SLionel Sambuc }
1367*1230fdc1SLionel Sambuc
1368*1230fdc1SLionel Sambuc #endif /* XML_DTD */
1369*1230fdc1SLionel Sambuc
1370*1230fdc1SLionel Sambuc static int PTRCALL
PREFIX(isPublicId)1371*1230fdc1SLionel Sambuc PREFIX(isPublicId)(const ENCODING *enc, const char *ptr, const char *end,
1372*1230fdc1SLionel Sambuc const char **badPtr)
1373*1230fdc1SLionel Sambuc {
1374*1230fdc1SLionel Sambuc ptr += MINBPC(enc);
1375*1230fdc1SLionel Sambuc end -= MINBPC(enc);
1376*1230fdc1SLionel Sambuc for (; ptr != end; ptr += MINBPC(enc)) {
1377*1230fdc1SLionel Sambuc switch (BYTE_TYPE(enc, ptr)) {
1378*1230fdc1SLionel Sambuc case BT_DIGIT:
1379*1230fdc1SLionel Sambuc case BT_HEX:
1380*1230fdc1SLionel Sambuc case BT_MINUS:
1381*1230fdc1SLionel Sambuc case BT_APOS:
1382*1230fdc1SLionel Sambuc case BT_LPAR:
1383*1230fdc1SLionel Sambuc case BT_RPAR:
1384*1230fdc1SLionel Sambuc case BT_PLUS:
1385*1230fdc1SLionel Sambuc case BT_COMMA:
1386*1230fdc1SLionel Sambuc case BT_SOL:
1387*1230fdc1SLionel Sambuc case BT_EQUALS:
1388*1230fdc1SLionel Sambuc case BT_QUEST:
1389*1230fdc1SLionel Sambuc case BT_CR:
1390*1230fdc1SLionel Sambuc case BT_LF:
1391*1230fdc1SLionel Sambuc case BT_SEMI:
1392*1230fdc1SLionel Sambuc case BT_EXCL:
1393*1230fdc1SLionel Sambuc case BT_AST:
1394*1230fdc1SLionel Sambuc case BT_PERCNT:
1395*1230fdc1SLionel Sambuc case BT_NUM:
1396*1230fdc1SLionel Sambuc #ifdef XML_NS
1397*1230fdc1SLionel Sambuc case BT_COLON:
1398*1230fdc1SLionel Sambuc #endif
1399*1230fdc1SLionel Sambuc break;
1400*1230fdc1SLionel Sambuc case BT_S:
1401*1230fdc1SLionel Sambuc if (CHAR_MATCHES(enc, ptr, ASCII_TAB)) {
1402*1230fdc1SLionel Sambuc *badPtr = ptr;
1403*1230fdc1SLionel Sambuc return 0;
1404*1230fdc1SLionel Sambuc }
1405*1230fdc1SLionel Sambuc break;
1406*1230fdc1SLionel Sambuc case BT_NAME:
1407*1230fdc1SLionel Sambuc case BT_NMSTRT:
1408*1230fdc1SLionel Sambuc if (!(BYTE_TO_ASCII(enc, ptr) & ~0x7f))
1409*1230fdc1SLionel Sambuc break;
1410*1230fdc1SLionel Sambuc default:
1411*1230fdc1SLionel Sambuc switch (BYTE_TO_ASCII(enc, ptr)) {
1412*1230fdc1SLionel Sambuc case 0x24: /* $ */
1413*1230fdc1SLionel Sambuc case 0x40: /* @ */
1414*1230fdc1SLionel Sambuc break;
1415*1230fdc1SLionel Sambuc default:
1416*1230fdc1SLionel Sambuc *badPtr = ptr;
1417*1230fdc1SLionel Sambuc return 0;
1418*1230fdc1SLionel Sambuc }
1419*1230fdc1SLionel Sambuc break;
1420*1230fdc1SLionel Sambuc }
1421*1230fdc1SLionel Sambuc }
1422*1230fdc1SLionel Sambuc return 1;
1423*1230fdc1SLionel Sambuc }
1424*1230fdc1SLionel Sambuc
1425*1230fdc1SLionel Sambuc /* This must only be called for a well-formed start-tag or empty
1426*1230fdc1SLionel Sambuc element tag. Returns the number of attributes. Pointers to the
1427*1230fdc1SLionel Sambuc first attsMax attributes are stored in atts.
1428*1230fdc1SLionel Sambuc */
1429*1230fdc1SLionel Sambuc
1430*1230fdc1SLionel Sambuc static int PTRCALL
PREFIX(getAtts)1431*1230fdc1SLionel Sambuc PREFIX(getAtts)(const ENCODING *enc, const char *ptr,
1432*1230fdc1SLionel Sambuc int attsMax, ATTRIBUTE *atts)
1433*1230fdc1SLionel Sambuc {
1434*1230fdc1SLionel Sambuc enum { other, inName, inValue } state = inName;
1435*1230fdc1SLionel Sambuc int nAtts = 0;
1436*1230fdc1SLionel Sambuc int open = 0; /* defined when state == inValue;
1437*1230fdc1SLionel Sambuc initialization just to shut up compilers */
1438*1230fdc1SLionel Sambuc
1439*1230fdc1SLionel Sambuc for (ptr += MINBPC(enc);; ptr += MINBPC(enc)) {
1440*1230fdc1SLionel Sambuc switch (BYTE_TYPE(enc, ptr)) {
1441*1230fdc1SLionel Sambuc #define START_NAME \
1442*1230fdc1SLionel Sambuc if (state == other) { \
1443*1230fdc1SLionel Sambuc if (nAtts < attsMax) { \
1444*1230fdc1SLionel Sambuc atts[nAtts].name = ptr; \
1445*1230fdc1SLionel Sambuc atts[nAtts].normalized = 1; \
1446*1230fdc1SLionel Sambuc } \
1447*1230fdc1SLionel Sambuc state = inName; \
1448*1230fdc1SLionel Sambuc }
1449*1230fdc1SLionel Sambuc #define LEAD_CASE(n) \
1450*1230fdc1SLionel Sambuc case BT_LEAD ## n: START_NAME ptr += (n - MINBPC(enc)); break;
1451*1230fdc1SLionel Sambuc LEAD_CASE(2) LEAD_CASE(3) LEAD_CASE(4)
1452*1230fdc1SLionel Sambuc #undef LEAD_CASE
1453*1230fdc1SLionel Sambuc case BT_NONASCII:
1454*1230fdc1SLionel Sambuc case BT_NMSTRT:
1455*1230fdc1SLionel Sambuc case BT_HEX:
1456*1230fdc1SLionel Sambuc START_NAME
1457*1230fdc1SLionel Sambuc break;
1458*1230fdc1SLionel Sambuc #undef START_NAME
1459*1230fdc1SLionel Sambuc case BT_QUOT:
1460*1230fdc1SLionel Sambuc if (state != inValue) {
1461*1230fdc1SLionel Sambuc if (nAtts < attsMax)
1462*1230fdc1SLionel Sambuc atts[nAtts].valuePtr = ptr + MINBPC(enc);
1463*1230fdc1SLionel Sambuc state = inValue;
1464*1230fdc1SLionel Sambuc open = BT_QUOT;
1465*1230fdc1SLionel Sambuc }
1466*1230fdc1SLionel Sambuc else if (open == BT_QUOT) {
1467*1230fdc1SLionel Sambuc state = other;
1468*1230fdc1SLionel Sambuc if (nAtts < attsMax)
1469*1230fdc1SLionel Sambuc atts[nAtts].valueEnd = ptr;
1470*1230fdc1SLionel Sambuc nAtts++;
1471*1230fdc1SLionel Sambuc }
1472*1230fdc1SLionel Sambuc break;
1473*1230fdc1SLionel Sambuc case BT_APOS:
1474*1230fdc1SLionel Sambuc if (state != inValue) {
1475*1230fdc1SLionel Sambuc if (nAtts < attsMax)
1476*1230fdc1SLionel Sambuc atts[nAtts].valuePtr = ptr + MINBPC(enc);
1477*1230fdc1SLionel Sambuc state = inValue;
1478*1230fdc1SLionel Sambuc open = BT_APOS;
1479*1230fdc1SLionel Sambuc }
1480*1230fdc1SLionel Sambuc else if (open == BT_APOS) {
1481*1230fdc1SLionel Sambuc state = other;
1482*1230fdc1SLionel Sambuc if (nAtts < attsMax)
1483*1230fdc1SLionel Sambuc atts[nAtts].valueEnd = ptr;
1484*1230fdc1SLionel Sambuc nAtts++;
1485*1230fdc1SLionel Sambuc }
1486*1230fdc1SLionel Sambuc break;
1487*1230fdc1SLionel Sambuc case BT_AMP:
1488*1230fdc1SLionel Sambuc if (nAtts < attsMax)
1489*1230fdc1SLionel Sambuc atts[nAtts].normalized = 0;
1490*1230fdc1SLionel Sambuc break;
1491*1230fdc1SLionel Sambuc case BT_S:
1492*1230fdc1SLionel Sambuc if (state == inName)
1493*1230fdc1SLionel Sambuc state = other;
1494*1230fdc1SLionel Sambuc else if (state == inValue
1495*1230fdc1SLionel Sambuc && nAtts < attsMax
1496*1230fdc1SLionel Sambuc && atts[nAtts].normalized
1497*1230fdc1SLionel Sambuc && (ptr == atts[nAtts].valuePtr
1498*1230fdc1SLionel Sambuc || BYTE_TO_ASCII(enc, ptr) != ASCII_SPACE
1499*1230fdc1SLionel Sambuc || BYTE_TO_ASCII(enc, ptr + MINBPC(enc)) == ASCII_SPACE
1500*1230fdc1SLionel Sambuc || BYTE_TYPE(enc, ptr + MINBPC(enc)) == open))
1501*1230fdc1SLionel Sambuc atts[nAtts].normalized = 0;
1502*1230fdc1SLionel Sambuc break;
1503*1230fdc1SLionel Sambuc case BT_CR: case BT_LF:
1504*1230fdc1SLionel Sambuc /* This case ensures that the first attribute name is counted
1505*1230fdc1SLionel Sambuc Apart from that we could just change state on the quote. */
1506*1230fdc1SLionel Sambuc if (state == inName)
1507*1230fdc1SLionel Sambuc state = other;
1508*1230fdc1SLionel Sambuc else if (state == inValue && nAtts < attsMax)
1509*1230fdc1SLionel Sambuc atts[nAtts].normalized = 0;
1510*1230fdc1SLionel Sambuc break;
1511*1230fdc1SLionel Sambuc case BT_GT:
1512*1230fdc1SLionel Sambuc case BT_SOL:
1513*1230fdc1SLionel Sambuc if (state != inValue)
1514*1230fdc1SLionel Sambuc return nAtts;
1515*1230fdc1SLionel Sambuc break;
1516*1230fdc1SLionel Sambuc default:
1517*1230fdc1SLionel Sambuc break;
1518*1230fdc1SLionel Sambuc }
1519*1230fdc1SLionel Sambuc }
1520*1230fdc1SLionel Sambuc /* not reached */
1521*1230fdc1SLionel Sambuc }
1522*1230fdc1SLionel Sambuc
1523*1230fdc1SLionel Sambuc static int PTRFASTCALL
PREFIX(charRefNumber)1524*1230fdc1SLionel Sambuc PREFIX(charRefNumber)(const ENCODING *enc, const char *ptr)
1525*1230fdc1SLionel Sambuc {
1526*1230fdc1SLionel Sambuc int result = 0;
1527*1230fdc1SLionel Sambuc /* skip &# */
1528*1230fdc1SLionel Sambuc ptr += 2*MINBPC(enc);
1529*1230fdc1SLionel Sambuc if (CHAR_MATCHES(enc, ptr, ASCII_x)) {
1530*1230fdc1SLionel Sambuc for (ptr += MINBPC(enc);
1531*1230fdc1SLionel Sambuc !CHAR_MATCHES(enc, ptr, ASCII_SEMI);
1532*1230fdc1SLionel Sambuc ptr += MINBPC(enc)) {
1533*1230fdc1SLionel Sambuc int c = BYTE_TO_ASCII(enc, ptr);
1534*1230fdc1SLionel Sambuc switch (c) {
1535*1230fdc1SLionel Sambuc case ASCII_0: case ASCII_1: case ASCII_2: case ASCII_3: case ASCII_4:
1536*1230fdc1SLionel Sambuc case ASCII_5: case ASCII_6: case ASCII_7: case ASCII_8: case ASCII_9:
1537*1230fdc1SLionel Sambuc result <<= 4;
1538*1230fdc1SLionel Sambuc result |= (c - ASCII_0);
1539*1230fdc1SLionel Sambuc break;
1540*1230fdc1SLionel Sambuc case ASCII_A: case ASCII_B: case ASCII_C:
1541*1230fdc1SLionel Sambuc case ASCII_D: case ASCII_E: case ASCII_F:
1542*1230fdc1SLionel Sambuc result <<= 4;
1543*1230fdc1SLionel Sambuc result += 10 + (c - ASCII_A);
1544*1230fdc1SLionel Sambuc break;
1545*1230fdc1SLionel Sambuc case ASCII_a: case ASCII_b: case ASCII_c:
1546*1230fdc1SLionel Sambuc case ASCII_d: case ASCII_e: case ASCII_f:
1547*1230fdc1SLionel Sambuc result <<= 4;
1548*1230fdc1SLionel Sambuc result += 10 + (c - ASCII_a);
1549*1230fdc1SLionel Sambuc break;
1550*1230fdc1SLionel Sambuc }
1551*1230fdc1SLionel Sambuc if (result >= 0x110000)
1552*1230fdc1SLionel Sambuc return -1;
1553*1230fdc1SLionel Sambuc }
1554*1230fdc1SLionel Sambuc }
1555*1230fdc1SLionel Sambuc else {
1556*1230fdc1SLionel Sambuc for (; !CHAR_MATCHES(enc, ptr, ASCII_SEMI); ptr += MINBPC(enc)) {
1557*1230fdc1SLionel Sambuc int c = BYTE_TO_ASCII(enc, ptr);
1558*1230fdc1SLionel Sambuc result *= 10;
1559*1230fdc1SLionel Sambuc result += (c - ASCII_0);
1560*1230fdc1SLionel Sambuc if (result >= 0x110000)
1561*1230fdc1SLionel Sambuc return -1;
1562*1230fdc1SLionel Sambuc }
1563*1230fdc1SLionel Sambuc }
1564*1230fdc1SLionel Sambuc return checkCharRefNumber(result);
1565*1230fdc1SLionel Sambuc }
1566*1230fdc1SLionel Sambuc
1567*1230fdc1SLionel Sambuc static int PTRCALL
PREFIX(predefinedEntityName)1568*1230fdc1SLionel Sambuc PREFIX(predefinedEntityName)(const ENCODING *enc, const char *ptr,
1569*1230fdc1SLionel Sambuc const char *end)
1570*1230fdc1SLionel Sambuc {
1571*1230fdc1SLionel Sambuc switch ((end - ptr)/MINBPC(enc)) {
1572*1230fdc1SLionel Sambuc case 2:
1573*1230fdc1SLionel Sambuc if (CHAR_MATCHES(enc, ptr + MINBPC(enc), ASCII_t)) {
1574*1230fdc1SLionel Sambuc switch (BYTE_TO_ASCII(enc, ptr)) {
1575*1230fdc1SLionel Sambuc case ASCII_l:
1576*1230fdc1SLionel Sambuc return ASCII_LT;
1577*1230fdc1SLionel Sambuc case ASCII_g:
1578*1230fdc1SLionel Sambuc return ASCII_GT;
1579*1230fdc1SLionel Sambuc }
1580*1230fdc1SLionel Sambuc }
1581*1230fdc1SLionel Sambuc break;
1582*1230fdc1SLionel Sambuc case 3:
1583*1230fdc1SLionel Sambuc if (CHAR_MATCHES(enc, ptr, ASCII_a)) {
1584*1230fdc1SLionel Sambuc ptr += MINBPC(enc);
1585*1230fdc1SLionel Sambuc if (CHAR_MATCHES(enc, ptr, ASCII_m)) {
1586*1230fdc1SLionel Sambuc ptr += MINBPC(enc);
1587*1230fdc1SLionel Sambuc if (CHAR_MATCHES(enc, ptr, ASCII_p))
1588*1230fdc1SLionel Sambuc return ASCII_AMP;
1589*1230fdc1SLionel Sambuc }
1590*1230fdc1SLionel Sambuc }
1591*1230fdc1SLionel Sambuc break;
1592*1230fdc1SLionel Sambuc case 4:
1593*1230fdc1SLionel Sambuc switch (BYTE_TO_ASCII(enc, ptr)) {
1594*1230fdc1SLionel Sambuc case ASCII_q:
1595*1230fdc1SLionel Sambuc ptr += MINBPC(enc);
1596*1230fdc1SLionel Sambuc if (CHAR_MATCHES(enc, ptr, ASCII_u)) {
1597*1230fdc1SLionel Sambuc ptr += MINBPC(enc);
1598*1230fdc1SLionel Sambuc if (CHAR_MATCHES(enc, ptr, ASCII_o)) {
1599*1230fdc1SLionel Sambuc ptr += MINBPC(enc);
1600*1230fdc1SLionel Sambuc if (CHAR_MATCHES(enc, ptr, ASCII_t))
1601*1230fdc1SLionel Sambuc return ASCII_QUOT;
1602*1230fdc1SLionel Sambuc }
1603*1230fdc1SLionel Sambuc }
1604*1230fdc1SLionel Sambuc break;
1605*1230fdc1SLionel Sambuc case ASCII_a:
1606*1230fdc1SLionel Sambuc ptr += MINBPC(enc);
1607*1230fdc1SLionel Sambuc if (CHAR_MATCHES(enc, ptr, ASCII_p)) {
1608*1230fdc1SLionel Sambuc ptr += MINBPC(enc);
1609*1230fdc1SLionel Sambuc if (CHAR_MATCHES(enc, ptr, ASCII_o)) {
1610*1230fdc1SLionel Sambuc ptr += MINBPC(enc);
1611*1230fdc1SLionel Sambuc if (CHAR_MATCHES(enc, ptr, ASCII_s))
1612*1230fdc1SLionel Sambuc return ASCII_APOS;
1613*1230fdc1SLionel Sambuc }
1614*1230fdc1SLionel Sambuc }
1615*1230fdc1SLionel Sambuc break;
1616*1230fdc1SLionel Sambuc }
1617*1230fdc1SLionel Sambuc }
1618*1230fdc1SLionel Sambuc return 0;
1619*1230fdc1SLionel Sambuc }
1620*1230fdc1SLionel Sambuc
1621*1230fdc1SLionel Sambuc static int PTRCALL
PREFIX(sameName)1622*1230fdc1SLionel Sambuc PREFIX(sameName)(const ENCODING *enc, const char *ptr1, const char *ptr2)
1623*1230fdc1SLionel Sambuc {
1624*1230fdc1SLionel Sambuc for (;;) {
1625*1230fdc1SLionel Sambuc switch (BYTE_TYPE(enc, ptr1)) {
1626*1230fdc1SLionel Sambuc #define LEAD_CASE(n) \
1627*1230fdc1SLionel Sambuc case BT_LEAD ## n: \
1628*1230fdc1SLionel Sambuc if (*ptr1++ != *ptr2++) \
1629*1230fdc1SLionel Sambuc return 0;
1630*1230fdc1SLionel Sambuc LEAD_CASE(4) LEAD_CASE(3) LEAD_CASE(2)
1631*1230fdc1SLionel Sambuc #undef LEAD_CASE
1632*1230fdc1SLionel Sambuc /* fall through */
1633*1230fdc1SLionel Sambuc if (*ptr1++ != *ptr2++)
1634*1230fdc1SLionel Sambuc return 0;
1635*1230fdc1SLionel Sambuc break;
1636*1230fdc1SLionel Sambuc case BT_NONASCII:
1637*1230fdc1SLionel Sambuc case BT_NMSTRT:
1638*1230fdc1SLionel Sambuc #ifdef XML_NS
1639*1230fdc1SLionel Sambuc case BT_COLON:
1640*1230fdc1SLionel Sambuc #endif
1641*1230fdc1SLionel Sambuc case BT_HEX:
1642*1230fdc1SLionel Sambuc case BT_DIGIT:
1643*1230fdc1SLionel Sambuc case BT_NAME:
1644*1230fdc1SLionel Sambuc case BT_MINUS:
1645*1230fdc1SLionel Sambuc if (*ptr2++ != *ptr1++)
1646*1230fdc1SLionel Sambuc return 0;
1647*1230fdc1SLionel Sambuc if (MINBPC(enc) > 1) {
1648*1230fdc1SLionel Sambuc if (*ptr2++ != *ptr1++)
1649*1230fdc1SLionel Sambuc return 0;
1650*1230fdc1SLionel Sambuc if (MINBPC(enc) > 2) {
1651*1230fdc1SLionel Sambuc if (*ptr2++ != *ptr1++)
1652*1230fdc1SLionel Sambuc return 0;
1653*1230fdc1SLionel Sambuc if (MINBPC(enc) > 3) {
1654*1230fdc1SLionel Sambuc if (*ptr2++ != *ptr1++)
1655*1230fdc1SLionel Sambuc return 0;
1656*1230fdc1SLionel Sambuc }
1657*1230fdc1SLionel Sambuc }
1658*1230fdc1SLionel Sambuc }
1659*1230fdc1SLionel Sambuc break;
1660*1230fdc1SLionel Sambuc default:
1661*1230fdc1SLionel Sambuc if (MINBPC(enc) == 1 && *ptr1 == *ptr2)
1662*1230fdc1SLionel Sambuc return 1;
1663*1230fdc1SLionel Sambuc switch (BYTE_TYPE(enc, ptr2)) {
1664*1230fdc1SLionel Sambuc case BT_LEAD2:
1665*1230fdc1SLionel Sambuc case BT_LEAD3:
1666*1230fdc1SLionel Sambuc case BT_LEAD4:
1667*1230fdc1SLionel Sambuc case BT_NONASCII:
1668*1230fdc1SLionel Sambuc case BT_NMSTRT:
1669*1230fdc1SLionel Sambuc #ifdef XML_NS
1670*1230fdc1SLionel Sambuc case BT_COLON:
1671*1230fdc1SLionel Sambuc #endif
1672*1230fdc1SLionel Sambuc case BT_HEX:
1673*1230fdc1SLionel Sambuc case BT_DIGIT:
1674*1230fdc1SLionel Sambuc case BT_NAME:
1675*1230fdc1SLionel Sambuc case BT_MINUS:
1676*1230fdc1SLionel Sambuc return 0;
1677*1230fdc1SLionel Sambuc default:
1678*1230fdc1SLionel Sambuc return 1;
1679*1230fdc1SLionel Sambuc }
1680*1230fdc1SLionel Sambuc }
1681*1230fdc1SLionel Sambuc }
1682*1230fdc1SLionel Sambuc /* not reached */
1683*1230fdc1SLionel Sambuc }
1684*1230fdc1SLionel Sambuc
1685*1230fdc1SLionel Sambuc static int PTRCALL
PREFIX(nameMatchesAscii)1686*1230fdc1SLionel Sambuc PREFIX(nameMatchesAscii)(const ENCODING *enc, const char *ptr1,
1687*1230fdc1SLionel Sambuc const char *end1, const char *ptr2)
1688*1230fdc1SLionel Sambuc {
1689*1230fdc1SLionel Sambuc for (; *ptr2; ptr1 += MINBPC(enc), ptr2++) {
1690*1230fdc1SLionel Sambuc if (ptr1 == end1)
1691*1230fdc1SLionel Sambuc return 0;
1692*1230fdc1SLionel Sambuc if (!CHAR_MATCHES(enc, ptr1, *ptr2))
1693*1230fdc1SLionel Sambuc return 0;
1694*1230fdc1SLionel Sambuc }
1695*1230fdc1SLionel Sambuc return ptr1 == end1;
1696*1230fdc1SLionel Sambuc }
1697*1230fdc1SLionel Sambuc
1698*1230fdc1SLionel Sambuc static int PTRFASTCALL
PREFIX(nameLength)1699*1230fdc1SLionel Sambuc PREFIX(nameLength)(const ENCODING *enc, const char *ptr)
1700*1230fdc1SLionel Sambuc {
1701*1230fdc1SLionel Sambuc const char *start = ptr;
1702*1230fdc1SLionel Sambuc for (;;) {
1703*1230fdc1SLionel Sambuc switch (BYTE_TYPE(enc, ptr)) {
1704*1230fdc1SLionel Sambuc #define LEAD_CASE(n) \
1705*1230fdc1SLionel Sambuc case BT_LEAD ## n: ptr += n; break;
1706*1230fdc1SLionel Sambuc LEAD_CASE(2) LEAD_CASE(3) LEAD_CASE(4)
1707*1230fdc1SLionel Sambuc #undef LEAD_CASE
1708*1230fdc1SLionel Sambuc case BT_NONASCII:
1709*1230fdc1SLionel Sambuc case BT_NMSTRT:
1710*1230fdc1SLionel Sambuc #ifdef XML_NS
1711*1230fdc1SLionel Sambuc case BT_COLON:
1712*1230fdc1SLionel Sambuc #endif
1713*1230fdc1SLionel Sambuc case BT_HEX:
1714*1230fdc1SLionel Sambuc case BT_DIGIT:
1715*1230fdc1SLionel Sambuc case BT_NAME:
1716*1230fdc1SLionel Sambuc case BT_MINUS:
1717*1230fdc1SLionel Sambuc ptr += MINBPC(enc);
1718*1230fdc1SLionel Sambuc break;
1719*1230fdc1SLionel Sambuc default:
1720*1230fdc1SLionel Sambuc return (int)(ptr - start);
1721*1230fdc1SLionel Sambuc }
1722*1230fdc1SLionel Sambuc }
1723*1230fdc1SLionel Sambuc }
1724*1230fdc1SLionel Sambuc
1725*1230fdc1SLionel Sambuc static const char * PTRFASTCALL
PREFIX(skipS)1726*1230fdc1SLionel Sambuc PREFIX(skipS)(const ENCODING *enc, const char *ptr)
1727*1230fdc1SLionel Sambuc {
1728*1230fdc1SLionel Sambuc for (;;) {
1729*1230fdc1SLionel Sambuc switch (BYTE_TYPE(enc, ptr)) {
1730*1230fdc1SLionel Sambuc case BT_LF:
1731*1230fdc1SLionel Sambuc case BT_CR:
1732*1230fdc1SLionel Sambuc case BT_S:
1733*1230fdc1SLionel Sambuc ptr += MINBPC(enc);
1734*1230fdc1SLionel Sambuc break;
1735*1230fdc1SLionel Sambuc default:
1736*1230fdc1SLionel Sambuc return ptr;
1737*1230fdc1SLionel Sambuc }
1738*1230fdc1SLionel Sambuc }
1739*1230fdc1SLionel Sambuc }
1740*1230fdc1SLionel Sambuc
1741*1230fdc1SLionel Sambuc static void PTRCALL
PREFIX(updatePosition)1742*1230fdc1SLionel Sambuc PREFIX(updatePosition)(const ENCODING *enc,
1743*1230fdc1SLionel Sambuc const char *ptr,
1744*1230fdc1SLionel Sambuc const char *end,
1745*1230fdc1SLionel Sambuc POSITION *pos)
1746*1230fdc1SLionel Sambuc {
1747*1230fdc1SLionel Sambuc while (ptr < end) {
1748*1230fdc1SLionel Sambuc switch (BYTE_TYPE(enc, ptr)) {
1749*1230fdc1SLionel Sambuc #define LEAD_CASE(n) \
1750*1230fdc1SLionel Sambuc case BT_LEAD ## n: \
1751*1230fdc1SLionel Sambuc ptr += n; \
1752*1230fdc1SLionel Sambuc break;
1753*1230fdc1SLionel Sambuc LEAD_CASE(2) LEAD_CASE(3) LEAD_CASE(4)
1754*1230fdc1SLionel Sambuc #undef LEAD_CASE
1755*1230fdc1SLionel Sambuc case BT_LF:
1756*1230fdc1SLionel Sambuc pos->columnNumber = (XML_Size)-1;
1757*1230fdc1SLionel Sambuc pos->lineNumber++;
1758*1230fdc1SLionel Sambuc ptr += MINBPC(enc);
1759*1230fdc1SLionel Sambuc break;
1760*1230fdc1SLionel Sambuc case BT_CR:
1761*1230fdc1SLionel Sambuc pos->lineNumber++;
1762*1230fdc1SLionel Sambuc ptr += MINBPC(enc);
1763*1230fdc1SLionel Sambuc if (ptr != end && BYTE_TYPE(enc, ptr) == BT_LF)
1764*1230fdc1SLionel Sambuc ptr += MINBPC(enc);
1765*1230fdc1SLionel Sambuc pos->columnNumber = (XML_Size)-1;
1766*1230fdc1SLionel Sambuc break;
1767*1230fdc1SLionel Sambuc default:
1768*1230fdc1SLionel Sambuc ptr += MINBPC(enc);
1769*1230fdc1SLionel Sambuc break;
1770*1230fdc1SLionel Sambuc }
1771*1230fdc1SLionel Sambuc pos->columnNumber++;
1772*1230fdc1SLionel Sambuc }
1773*1230fdc1SLionel Sambuc }
1774*1230fdc1SLionel Sambuc
1775*1230fdc1SLionel Sambuc #undef DO_LEAD_CASE
1776*1230fdc1SLionel Sambuc #undef MULTIBYTE_CASES
1777*1230fdc1SLionel Sambuc #undef INVALID_CASES
1778*1230fdc1SLionel Sambuc #undef CHECK_NAME_CASE
1779*1230fdc1SLionel Sambuc #undef CHECK_NAME_CASES
1780*1230fdc1SLionel Sambuc #undef CHECK_NMSTRT_CASE
1781*1230fdc1SLionel Sambuc #undef CHECK_NMSTRT_CASES
1782*1230fdc1SLionel Sambuc
1783*1230fdc1SLionel Sambuc #endif /* XML_TOK_IMPL_C */
1784