Geant4  9.6.p02
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
xmltok_impl.c
Go to the documentation of this file.
1 /* Copyright (c) 1998, 1999 Thai Open Source Software Center Ltd
2  See the file COPYING for copying permission.
3 */
4 
5 /* This file is included! */
6 #ifdef XML_TOK_IMPL_C
7 
8 #ifndef IS_INVALID_CHAR
9 #define IS_INVALID_CHAR(enc, ptr, n) (0)
10 #endif
11 
12 #define INVALID_LEAD_CASE(n, ptr, nextTokPtr) \
13  case BT_LEAD ## n: \
14  if (end - ptr < n) \
15  return XML_TOK_PARTIAL_CHAR; \
16  if (IS_INVALID_CHAR(enc, ptr, n)) { \
17  *(nextTokPtr) = (ptr); \
18  return XML_TOK_INVALID; \
19  } \
20  ptr += n; \
21  break;
22 
23 #define INVALID_CASES(ptr, nextTokPtr) \
24  INVALID_LEAD_CASE(2, ptr, nextTokPtr) \
25  INVALID_LEAD_CASE(3, ptr, nextTokPtr) \
26  INVALID_LEAD_CASE(4, ptr, nextTokPtr) \
27  case BT_NONXML: \
28  case BT_MALFORM: \
29  case BT_TRAIL: \
30  *(nextTokPtr) = (ptr); \
31  return XML_TOK_INVALID;
32 
33 #define CHECK_NAME_CASE(n, enc, ptr, end, nextTokPtr) \
34  case BT_LEAD ## n: \
35  if (end - ptr < n) \
36  return XML_TOK_PARTIAL_CHAR; \
37  if (!IS_NAME_CHAR(enc, ptr, n)) { \
38  *nextTokPtr = ptr; \
39  return XML_TOK_INVALID; \
40  } \
41  ptr += n; \
42  break;
43 
44 #define CHECK_NAME_CASES(enc, ptr, end, nextTokPtr) \
45  case BT_NONASCII: \
46  if (!IS_NAME_CHAR_MINBPC(enc, ptr)) { \
47  *nextTokPtr = ptr; \
48  return XML_TOK_INVALID; \
49  } \
50  case BT_NMSTRT: \
51  case BT_HEX: \
52  case BT_DIGIT: \
53  case BT_NAME: \
54  case BT_MINUS: \
55  ptr += MINBPC(enc); \
56  break; \
57  CHECK_NAME_CASE(2, enc, ptr, end, nextTokPtr) \
58  CHECK_NAME_CASE(3, enc, ptr, end, nextTokPtr) \
59  CHECK_NAME_CASE(4, enc, ptr, end, nextTokPtr)
60 
61 #define CHECK_NMSTRT_CASE(n, enc, ptr, end, nextTokPtr) \
62  case BT_LEAD ## n: \
63  if (end - ptr < n) \
64  return XML_TOK_PARTIAL_CHAR; \
65  if (!IS_NMSTRT_CHAR(enc, ptr, n)) { \
66  *nextTokPtr = ptr; \
67  return XML_TOK_INVALID; \
68  } \
69  ptr += n; \
70  break;
71 
72 #define CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr) \
73  case BT_NONASCII: \
74  if (!IS_NMSTRT_CHAR_MINBPC(enc, ptr)) { \
75  *nextTokPtr = ptr; \
76  return XML_TOK_INVALID; \
77  } \
78  case BT_NMSTRT: \
79  case BT_HEX: \
80  ptr += MINBPC(enc); \
81  break; \
82  CHECK_NMSTRT_CASE(2, enc, ptr, end, nextTokPtr) \
83  CHECK_NMSTRT_CASE(3, enc, ptr, end, nextTokPtr) \
84  CHECK_NMSTRT_CASE(4, enc, ptr, end, nextTokPtr)
85 
86 #ifndef PREFIX
87 #define PREFIX(ident) ident
88 #endif
89 
90 /* ptr points to character following "<!-" */
91 
92 static int PTRCALL
93 PREFIX(scanComment)(const ENCODING *enc, const char *ptr,
94  const char *end, const char **nextTokPtr)
95 {
96  if (ptr != end) {
97  if (!CHAR_MATCHES(enc, ptr, ASCII_MINUS)) {
98  *nextTokPtr = ptr;
99  return XML_TOK_INVALID;
100  }
101  ptr += MINBPC(enc);
102  while (ptr != end) {
103  switch (BYTE_TYPE(enc, ptr)) {
104  INVALID_CASES(ptr, nextTokPtr)
105  case BT_MINUS:
106  if ((ptr += MINBPC(enc)) == end)
107  return XML_TOK_PARTIAL;
108  if (CHAR_MATCHES(enc, ptr, ASCII_MINUS)) {
109  if ((ptr += MINBPC(enc)) == end)
110  return XML_TOK_PARTIAL;
111  if (!CHAR_MATCHES(enc, ptr, ASCII_GT)) {
112  *nextTokPtr = ptr;
113  return XML_TOK_INVALID;
114  }
115  *nextTokPtr = ptr + MINBPC(enc);
116  return XML_TOK_COMMENT;
117  }
118  break;
119  default:
120  ptr += MINBPC(enc);
121  break;
122  }
123  }
124  }
125  return XML_TOK_PARTIAL;
126 }
127 
128 /* ptr points to character following "<!" */
129 
130 static int PTRCALL
131 PREFIX(scanDecl)(const ENCODING *enc, const char *ptr,
132  const char *end, const char **nextTokPtr)
133 {
134  if (ptr == end)
135  return XML_TOK_PARTIAL;
136  switch (BYTE_TYPE(enc, ptr)) {
137  case BT_MINUS:
138  return PREFIX(scanComment)(enc, ptr + MINBPC(enc), end, nextTokPtr);
139  case BT_LSQB:
140  *nextTokPtr = ptr + MINBPC(enc);
141  return XML_TOK_COND_SECT_OPEN;
142  case BT_NMSTRT:
143  case BT_HEX:
144  ptr += MINBPC(enc);
145  break;
146  default:
147  *nextTokPtr = ptr;
148  return XML_TOK_INVALID;
149  }
150  while (ptr != end) {
151  switch (BYTE_TYPE(enc, ptr)) {
152  case BT_PERCNT:
153  if (ptr + MINBPC(enc) == end)
154  return XML_TOK_PARTIAL;
155  /* don't allow <!ENTITY% foo "whatever"> */
156  switch (BYTE_TYPE(enc, ptr + MINBPC(enc))) {
157  case BT_S: case BT_CR: case BT_LF: case BT_PERCNT:
158  *nextTokPtr = ptr;
159  return XML_TOK_INVALID;
160  }
161  /* fall through */
162  case BT_S: case BT_CR: case BT_LF:
163  *nextTokPtr = ptr;
164  return XML_TOK_DECL_OPEN;
165  case BT_NMSTRT:
166  case BT_HEX:
167  ptr += MINBPC(enc);
168  break;
169  default:
170  *nextTokPtr = ptr;
171  return XML_TOK_INVALID;
172  }
173  }
174  return XML_TOK_PARTIAL;
175 }
176 
177 static int PTRCALL
178 PREFIX(checkPiTarget)(const ENCODING *enc, const char *ptr,
179  const char *end, int *tokPtr)
180 {
181  int upper = 0;
182  *tokPtr = XML_TOK_PI;
183  if (end - ptr != MINBPC(enc)*3)
184  return 1;
185  switch (BYTE_TO_ASCII(enc, ptr)) {
186  case ASCII_x:
187  break;
188  case ASCII_X:
189  upper = 1;
190  break;
191  default:
192  return 1;
193  }
194  ptr += MINBPC(enc);
195  switch (BYTE_TO_ASCII(enc, ptr)) {
196  case ASCII_m:
197  break;
198  case ASCII_M:
199  upper = 1;
200  break;
201  default:
202  return 1;
203  }
204  ptr += MINBPC(enc);
205  switch (BYTE_TO_ASCII(enc, ptr)) {
206  case ASCII_l:
207  break;
208  case ASCII_L:
209  upper = 1;
210  break;
211  default:
212  return 1;
213  }
214  if (upper)
215  return 0;
216  *tokPtr = XML_TOK_XML_DECL;
217  return 1;
218 }
219 
220 /* ptr points to character following "<?" */
221 
222 static int PTRCALL
223 PREFIX(scanPi)(const ENCODING *enc, const char *ptr,
224  const char *end, const char **nextTokPtr)
225 {
226  int tok;
227  const char *target = ptr;
228  if (ptr == end)
229  return XML_TOK_PARTIAL;
230  switch (BYTE_TYPE(enc, ptr)) {
231  CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)
232  default:
233  *nextTokPtr = ptr;
234  return XML_TOK_INVALID;
235  }
236  while (ptr != end) {
237  switch (BYTE_TYPE(enc, ptr)) {
238  CHECK_NAME_CASES(enc, ptr, end, nextTokPtr)
239  case BT_S: case BT_CR: case BT_LF:
240  if (!PREFIX(checkPiTarget)(enc, target, ptr, &tok)) {
241  *nextTokPtr = ptr;
242  return XML_TOK_INVALID;
243  }
244  ptr += MINBPC(enc);
245  while (ptr != end) {
246  switch (BYTE_TYPE(enc, ptr)) {
247  INVALID_CASES(ptr, nextTokPtr)
248  case BT_QUEST:
249  ptr += MINBPC(enc);
250  if (ptr == end)
251  return XML_TOK_PARTIAL;
252  if (CHAR_MATCHES(enc, ptr, ASCII_GT)) {
253  *nextTokPtr = ptr + MINBPC(enc);
254  return tok;
255  }
256  break;
257  default:
258  ptr += MINBPC(enc);
259  break;
260  }
261  }
262  return XML_TOK_PARTIAL;
263  case BT_QUEST:
264  if (!PREFIX(checkPiTarget)(enc, target, ptr, &tok)) {
265  *nextTokPtr = ptr;
266  return XML_TOK_INVALID;
267  }
268  ptr += MINBPC(enc);
269  if (ptr == end)
270  return XML_TOK_PARTIAL;
271  if (CHAR_MATCHES(enc, ptr, ASCII_GT)) {
272  *nextTokPtr = ptr + MINBPC(enc);
273  return tok;
274  }
275  /* fall through */
276  default:
277  *nextTokPtr = ptr;
278  return XML_TOK_INVALID;
279  }
280  }
281  return XML_TOK_PARTIAL;
282 }
283 
284 static int PTRCALL
285 PREFIX(scanCdataSection)(const ENCODING *enc, const char *ptr,
286  const char *end, const char **nextTokPtr)
287 {
288  static const char CDATA_LSQB[] = { ASCII_C, ASCII_D, ASCII_A,
290  int i;
291  /* CDATA[ */
292  if (end - ptr < 6 * MINBPC(enc))
293  return XML_TOK_PARTIAL;
294  for (i = 0; i < 6; i++, ptr += MINBPC(enc)) {
295  if (!CHAR_MATCHES(enc, ptr, CDATA_LSQB[i])) {
296  *nextTokPtr = ptr;
297  return XML_TOK_INVALID;
298  }
299  }
300  *nextTokPtr = ptr;
302 }
303 
304 static int PTRCALL
305 PREFIX(cdataSectionTok)(const ENCODING *enc, const char *ptr,
306  const char *end, const char **nextTokPtr)
307 {
308  if (ptr == end)
309  return XML_TOK_NONE;
310  if (MINBPC(enc) > 1) {
311  size_t n = end - ptr;
312  if (n & (MINBPC(enc) - 1)) {
313  n &= ~(MINBPC(enc) - 1);
314  if (n == 0)
315  return XML_TOK_PARTIAL;
316  end = ptr + n;
317  }
318  }
319  switch (BYTE_TYPE(enc, ptr)) {
320  case BT_RSQB:
321  ptr += MINBPC(enc);
322  if (ptr == end)
323  return XML_TOK_PARTIAL;
324  if (!CHAR_MATCHES(enc, ptr, ASCII_RSQB))
325  break;
326  ptr += MINBPC(enc);
327  if (ptr == end)
328  return XML_TOK_PARTIAL;
329  if (!CHAR_MATCHES(enc, ptr, ASCII_GT)) {
330  ptr -= MINBPC(enc);
331  break;
332  }
333  *nextTokPtr = ptr + MINBPC(enc);
335  case BT_CR:
336  ptr += MINBPC(enc);
337  if (ptr == end)
338  return XML_TOK_PARTIAL;
339  if (BYTE_TYPE(enc, ptr) == BT_LF)
340  ptr += MINBPC(enc);
341  *nextTokPtr = ptr;
342  return XML_TOK_DATA_NEWLINE;
343  case BT_LF:
344  *nextTokPtr = ptr + MINBPC(enc);
345  return XML_TOK_DATA_NEWLINE;
346  INVALID_CASES(ptr, nextTokPtr)
347  default:
348  ptr += MINBPC(enc);
349  break;
350  }
351  while (ptr != end) {
352  switch (BYTE_TYPE(enc, ptr)) {
353 #define LEAD_CASE(n) \
354  case BT_LEAD ## n: \
355  if (end - ptr < n || IS_INVALID_CHAR(enc, ptr, n)) { \
356  *nextTokPtr = ptr; \
357  return XML_TOK_DATA_CHARS; \
358  } \
359  ptr += n; \
360  break;
361  LEAD_CASE(2) LEAD_CASE(3) LEAD_CASE(4)
362 #undef LEAD_CASE
363  case BT_NONXML:
364  case BT_MALFORM:
365  case BT_TRAIL:
366  case BT_CR:
367  case BT_LF:
368  case BT_RSQB:
369  *nextTokPtr = ptr;
370  return XML_TOK_DATA_CHARS;
371  default:
372  ptr += MINBPC(enc);
373  break;
374  }
375  }
376  *nextTokPtr = ptr;
377  return XML_TOK_DATA_CHARS;
378 }
379 
380 /* ptr points to character following "</" */
381 
382 static int PTRCALL
383 PREFIX(scanEndTag)(const ENCODING *enc, const char *ptr,
384  const char *end, const char **nextTokPtr)
385 {
386  if (ptr == end)
387  return XML_TOK_PARTIAL;
388  switch (BYTE_TYPE(enc, ptr)) {
389  CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)
390  default:
391  *nextTokPtr = ptr;
392  return XML_TOK_INVALID;
393  }
394  while (ptr != end) {
395  switch (BYTE_TYPE(enc, ptr)) {
396  CHECK_NAME_CASES(enc, ptr, end, nextTokPtr)
397  case BT_S: case BT_CR: case BT_LF:
398  for (ptr += MINBPC(enc); ptr != end; ptr += MINBPC(enc)) {
399  switch (BYTE_TYPE(enc, ptr)) {
400  case BT_S: case BT_CR: case BT_LF:
401  break;
402  case BT_GT:
403  *nextTokPtr = ptr + MINBPC(enc);
404  return XML_TOK_END_TAG;
405  default:
406  *nextTokPtr = ptr;
407  return XML_TOK_INVALID;
408  }
409  }
410  return XML_TOK_PARTIAL;
411 #ifdef XML_NS
412  case BT_COLON:
413  /* no need to check qname syntax here,
414  since end-tag must match exactly */
415  ptr += MINBPC(enc);
416  break;
417 #endif
418  case BT_GT:
419  *nextTokPtr = ptr + MINBPC(enc);
420  return XML_TOK_END_TAG;
421  default:
422  *nextTokPtr = ptr;
423  return XML_TOK_INVALID;
424  }
425  }
426  return XML_TOK_PARTIAL;
427 }
428 
429 /* ptr points to character following "&#X" */
430 
431 static int PTRCALL
432 PREFIX(scanHexCharRef)(const ENCODING *enc, const char *ptr,
433  const char *end, const char **nextTokPtr)
434 {
435  if (ptr != end) {
436  switch (BYTE_TYPE(enc, ptr)) {
437  case BT_DIGIT:
438  case BT_HEX:
439  break;
440  default:
441  *nextTokPtr = ptr;
442  return XML_TOK_INVALID;
443  }
444  for (ptr += MINBPC(enc); ptr != end; ptr += MINBPC(enc)) {
445  switch (BYTE_TYPE(enc, ptr)) {
446  case BT_DIGIT:
447  case BT_HEX:
448  break;
449  case BT_SEMI:
450  *nextTokPtr = ptr + MINBPC(enc);
451  return XML_TOK_CHAR_REF;
452  default:
453  *nextTokPtr = ptr;
454  return XML_TOK_INVALID;
455  }
456  }
457  }
458  return XML_TOK_PARTIAL;
459 }
460 
461 /* ptr points to character following "&#" */
462 
463 static int PTRCALL
464 PREFIX(scanCharRef)(const ENCODING *enc, const char *ptr,
465  const char *end, const char **nextTokPtr)
466 {
467  if (ptr != end) {
468  if (CHAR_MATCHES(enc, ptr, ASCII_x))
469  return PREFIX(scanHexCharRef)(enc, ptr + MINBPC(enc), end, nextTokPtr);
470  switch (BYTE_TYPE(enc, ptr)) {
471  case BT_DIGIT:
472  break;
473  default:
474  *nextTokPtr = ptr;
475  return XML_TOK_INVALID;
476  }
477  for (ptr += MINBPC(enc); ptr != end; ptr += MINBPC(enc)) {
478  switch (BYTE_TYPE(enc, ptr)) {
479  case BT_DIGIT:
480  break;
481  case BT_SEMI:
482  *nextTokPtr = ptr + MINBPC(enc);
483  return XML_TOK_CHAR_REF;
484  default:
485  *nextTokPtr = ptr;
486  return XML_TOK_INVALID;
487  }
488  }
489  }
490  return XML_TOK_PARTIAL;
491 }
492 
493 /* ptr points to character following "&" */
494 
495 static int PTRCALL
496 PREFIX(scanRef)(const ENCODING *enc, const char *ptr, const char *end,
497  const char **nextTokPtr)
498 {
499  if (ptr == end)
500  return XML_TOK_PARTIAL;
501  switch (BYTE_TYPE(enc, ptr)) {
502  CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)
503  case BT_NUM:
504  return PREFIX(scanCharRef)(enc, ptr + MINBPC(enc), end, nextTokPtr);
505  default:
506  *nextTokPtr = ptr;
507  return XML_TOK_INVALID;
508  }
509  while (ptr != end) {
510  switch (BYTE_TYPE(enc, ptr)) {
511  CHECK_NAME_CASES(enc, ptr, end, nextTokPtr)
512  case BT_SEMI:
513  *nextTokPtr = ptr + MINBPC(enc);
514  return XML_TOK_ENTITY_REF;
515  default:
516  *nextTokPtr = ptr;
517  return XML_TOK_INVALID;
518  }
519  }
520  return XML_TOK_PARTIAL;
521 }
522 
523 /* ptr points to character following first character of attribute name */
524 
525 static int PTRCALL
526 PREFIX(scanAtts)(const ENCODING *enc, const char *ptr, const char *end,
527  const char **nextTokPtr)
528 {
529 #ifdef XML_NS
530  int hadColon = 0;
531 #endif
532  while (ptr != end) {
533  switch (BYTE_TYPE(enc, ptr)) {
534  CHECK_NAME_CASES(enc, ptr, end, nextTokPtr)
535 #ifdef XML_NS
536  case BT_COLON:
537  if (hadColon) {
538  *nextTokPtr = ptr;
539  return XML_TOK_INVALID;
540  }
541  hadColon = 1;
542  ptr += MINBPC(enc);
543  if (ptr == end)
544  return XML_TOK_PARTIAL;
545  switch (BYTE_TYPE(enc, ptr)) {
546  CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)
547  default:
548  *nextTokPtr = ptr;
549  return XML_TOK_INVALID;
550  }
551  break;
552 #endif
553  case BT_S: case BT_CR: case BT_LF:
554  for (;;) {
555  int t;
556 
557  ptr += MINBPC(enc);
558  if (ptr == end)
559  return XML_TOK_PARTIAL;
560  t = BYTE_TYPE(enc, ptr);
561  if (t == BT_EQUALS)
562  break;
563  switch (t) {
564  case BT_S:
565  case BT_LF:
566  case BT_CR:
567  break;
568  default:
569  *nextTokPtr = ptr;
570  return XML_TOK_INVALID;
571  }
572  }
573  /* fall through */
574  case BT_EQUALS:
575  {
576  int open;
577 #ifdef XML_NS
578  hadColon = 0;
579 #endif
580  for (;;) {
581  ptr += MINBPC(enc);
582  if (ptr == end)
583  return XML_TOK_PARTIAL;
584  open = BYTE_TYPE(enc, ptr);
585  if (open == BT_QUOT || open == BT_APOS)
586  break;
587  switch (open) {
588  case BT_S:
589  case BT_LF:
590  case BT_CR:
591  break;
592  default:
593  *nextTokPtr = ptr;
594  return XML_TOK_INVALID;
595  }
596  }
597  ptr += MINBPC(enc);
598  /* in attribute value */
599  for (;;) {
600  int t;
601  if (ptr == end)
602  return XML_TOK_PARTIAL;
603  t = BYTE_TYPE(enc, ptr);
604  if (t == open)
605  break;
606  switch (t) {
607  INVALID_CASES(ptr, nextTokPtr)
608  case BT_AMP:
609  {
610  int tok = PREFIX(scanRef)(enc, ptr + MINBPC(enc), end, &ptr);
611  if (tok <= 0) {
612  if (tok == XML_TOK_INVALID)
613  *nextTokPtr = ptr;
614  return tok;
615  }
616  break;
617  }
618  case BT_LT:
619  *nextTokPtr = ptr;
620  return XML_TOK_INVALID;
621  default:
622  ptr += MINBPC(enc);
623  break;
624  }
625  }
626  ptr += MINBPC(enc);
627  if (ptr == end)
628  return XML_TOK_PARTIAL;
629  switch (BYTE_TYPE(enc, ptr)) {
630  case BT_S:
631  case BT_CR:
632  case BT_LF:
633  break;
634  case BT_SOL:
635  goto sol;
636  case BT_GT:
637  goto gt;
638  default:
639  *nextTokPtr = ptr;
640  return XML_TOK_INVALID;
641  }
642  /* ptr points to closing quote */
643  for (;;) {
644  ptr += MINBPC(enc);
645  if (ptr == end)
646  return XML_TOK_PARTIAL;
647  switch (BYTE_TYPE(enc, ptr)) {
648  CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)
649  case BT_S: case BT_CR: case BT_LF:
650  continue;
651  case BT_GT:
652  gt:
653  *nextTokPtr = ptr + MINBPC(enc);
655  case BT_SOL:
656  sol:
657  ptr += MINBPC(enc);
658  if (ptr == end)
659  return XML_TOK_PARTIAL;
660  if (!CHAR_MATCHES(enc, ptr, ASCII_GT)) {
661  *nextTokPtr = ptr;
662  return XML_TOK_INVALID;
663  }
664  *nextTokPtr = ptr + MINBPC(enc);
666  default:
667  *nextTokPtr = ptr;
668  return XML_TOK_INVALID;
669  }
670  break;
671  }
672  break;
673  }
674  default:
675  *nextTokPtr = ptr;
676  return XML_TOK_INVALID;
677  }
678  }
679  return XML_TOK_PARTIAL;
680 }
681 
682 /* ptr points to character following "<" */
683 
684 static int PTRCALL
685 PREFIX(scanLt)(const ENCODING *enc, const char *ptr, const char *end,
686  const char **nextTokPtr)
687 {
688 #ifdef XML_NS
689  int hadColon;
690 #endif
691  if (ptr == end)
692  return XML_TOK_PARTIAL;
693  switch (BYTE_TYPE(enc, ptr)) {
694  CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)
695  case BT_EXCL:
696  if ((ptr += MINBPC(enc)) == end)
697  return XML_TOK_PARTIAL;
698  switch (BYTE_TYPE(enc, ptr)) {
699  case BT_MINUS:
700  return PREFIX(scanComment)(enc, ptr + MINBPC(enc), end, nextTokPtr);
701  case BT_LSQB:
702  return PREFIX(scanCdataSection)(enc, ptr + MINBPC(enc),
703  end, nextTokPtr);
704  }
705  *nextTokPtr = ptr;
706  return XML_TOK_INVALID;
707  case BT_QUEST:
708  return PREFIX(scanPi)(enc, ptr + MINBPC(enc), end, nextTokPtr);
709  case BT_SOL:
710  return PREFIX(scanEndTag)(enc, ptr + MINBPC(enc), end, nextTokPtr);
711  default:
712  *nextTokPtr = ptr;
713  return XML_TOK_INVALID;
714  }
715 #ifdef XML_NS
716  hadColon = 0;
717 #endif
718  /* we have a start-tag */
719  while (ptr != end) {
720  switch (BYTE_TYPE(enc, ptr)) {
721  CHECK_NAME_CASES(enc, ptr, end, nextTokPtr)
722 #ifdef XML_NS
723  case BT_COLON:
724  if (hadColon) {
725  *nextTokPtr = ptr;
726  return XML_TOK_INVALID;
727  }
728  hadColon = 1;
729  ptr += MINBPC(enc);
730  if (ptr == end)
731  return XML_TOK_PARTIAL;
732  switch (BYTE_TYPE(enc, ptr)) {
733  CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)
734  default:
735  *nextTokPtr = ptr;
736  return XML_TOK_INVALID;
737  }
738  break;
739 #endif
740  case BT_S: case BT_CR: case BT_LF:
741  {
742  ptr += MINBPC(enc);
743  while (ptr != end) {
744  switch (BYTE_TYPE(enc, ptr)) {
745  CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)
746  case BT_GT:
747  goto gt;
748  case BT_SOL:
749  goto sol;
750  case BT_S: case BT_CR: case BT_LF:
751  ptr += MINBPC(enc);
752  continue;
753  default:
754  *nextTokPtr = ptr;
755  return XML_TOK_INVALID;
756  }
757  return PREFIX(scanAtts)(enc, ptr, end, nextTokPtr);
758  }
759  return XML_TOK_PARTIAL;
760  }
761  case BT_GT:
762  gt:
763  *nextTokPtr = ptr + MINBPC(enc);
765  case BT_SOL:
766  sol:
767  ptr += MINBPC(enc);
768  if (ptr == end)
769  return XML_TOK_PARTIAL;
770  if (!CHAR_MATCHES(enc, ptr, ASCII_GT)) {
771  *nextTokPtr = ptr;
772  return XML_TOK_INVALID;
773  }
774  *nextTokPtr = ptr + MINBPC(enc);
776  default:
777  *nextTokPtr = ptr;
778  return XML_TOK_INVALID;
779  }
780  }
781  return XML_TOK_PARTIAL;
782 }
783 
784 static int PTRCALL
785 PREFIX(contentTok)(const ENCODING *enc, const char *ptr, const char *end,
786  const char **nextTokPtr)
787 {
788  if (ptr == end)
789  return XML_TOK_NONE;
790  if (MINBPC(enc) > 1) {
791  size_t n = end - ptr;
792  if (n & (MINBPC(enc) - 1)) {
793  n &= ~(MINBPC(enc) - 1);
794  if (n == 0)
795  return XML_TOK_PARTIAL;
796  end = ptr + n;
797  }
798  }
799  switch (BYTE_TYPE(enc, ptr)) {
800  case BT_LT:
801  return PREFIX(scanLt)(enc, ptr + MINBPC(enc), end, nextTokPtr);
802  case BT_AMP:
803  return PREFIX(scanRef)(enc, ptr + MINBPC(enc), end, nextTokPtr);
804  case BT_CR:
805  ptr += MINBPC(enc);
806  if (ptr == end)
807  return XML_TOK_TRAILING_CR;
808  if (BYTE_TYPE(enc, ptr) == BT_LF)
809  ptr += MINBPC(enc);
810  *nextTokPtr = ptr;
811  return XML_TOK_DATA_NEWLINE;
812  case BT_LF:
813  *nextTokPtr = ptr + MINBPC(enc);
814  return XML_TOK_DATA_NEWLINE;
815  case BT_RSQB:
816  ptr += MINBPC(enc);
817  if (ptr == end)
818  return XML_TOK_TRAILING_RSQB;
819  if (!CHAR_MATCHES(enc, ptr, ASCII_RSQB))
820  break;
821  ptr += MINBPC(enc);
822  if (ptr == end)
823  return XML_TOK_TRAILING_RSQB;
824  if (!CHAR_MATCHES(enc, ptr, ASCII_GT)) {
825  ptr -= MINBPC(enc);
826  break;
827  }
828  *nextTokPtr = ptr;
829  return XML_TOK_INVALID;
830  INVALID_CASES(ptr, nextTokPtr)
831  default:
832  ptr += MINBPC(enc);
833  break;
834  }
835  while (ptr != end) {
836  switch (BYTE_TYPE(enc, ptr)) {
837 #define LEAD_CASE(n) \
838  case BT_LEAD ## n: \
839  if (end - ptr < n || IS_INVALID_CHAR(enc, ptr, n)) { \
840  *nextTokPtr = ptr; \
841  return XML_TOK_DATA_CHARS; \
842  } \
843  ptr += n; \
844  break;
845  LEAD_CASE(2) LEAD_CASE(3) LEAD_CASE(4)
846 #undef LEAD_CASE
847  case BT_RSQB:
848  if (ptr + MINBPC(enc) != end) {
849  if (!CHAR_MATCHES(enc, ptr + MINBPC(enc), ASCII_RSQB)) {
850  ptr += MINBPC(enc);
851  break;
852  }
853  if (ptr + 2*MINBPC(enc) != end) {
854  if (!CHAR_MATCHES(enc, ptr + 2*MINBPC(enc), ASCII_GT)) {
855  ptr += MINBPC(enc);
856  break;
857  }
858  *nextTokPtr = ptr + 2*MINBPC(enc);
859  return XML_TOK_INVALID;
860  }
861  }
862  /* fall through */
863  case BT_AMP:
864  case BT_LT:
865  case BT_NONXML:
866  case BT_MALFORM:
867  case BT_TRAIL:
868  case BT_CR:
869  case BT_LF:
870  *nextTokPtr = ptr;
871  return XML_TOK_DATA_CHARS;
872  default:
873  ptr += MINBPC(enc);
874  break;
875  }
876  }
877  *nextTokPtr = ptr;
878  return XML_TOK_DATA_CHARS;
879 }
880 
881 /* ptr points to character following "%" */
882 
883 static int PTRCALL
884 PREFIX(scanPercent)(const ENCODING *enc, const char *ptr, const char *end,
885  const char **nextTokPtr)
886 {
887  if (ptr == end)
888  return -XML_TOK_PERCENT;
889  switch (BYTE_TYPE(enc, ptr)) {
890  CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)
891  case BT_S: case BT_LF: case BT_CR: case BT_PERCNT:
892  *nextTokPtr = ptr;
893  return XML_TOK_PERCENT;
894  default:
895  *nextTokPtr = ptr;
896  return XML_TOK_INVALID;
897  }
898  while (ptr != end) {
899  switch (BYTE_TYPE(enc, ptr)) {
900  CHECK_NAME_CASES(enc, ptr, end, nextTokPtr)
901  case BT_SEMI:
902  *nextTokPtr = ptr + MINBPC(enc);
904  default:
905  *nextTokPtr = ptr;
906  return XML_TOK_INVALID;
907  }
908  }
909  return XML_TOK_PARTIAL;
910 }
911 
912 static int PTRCALL
913 PREFIX(scanPoundName)(const ENCODING *enc, const char *ptr, const char *end,
914  const char **nextTokPtr)
915 {
916  if (ptr == end)
917  return XML_TOK_PARTIAL;
918  switch (BYTE_TYPE(enc, ptr)) {
919  CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)
920  default:
921  *nextTokPtr = ptr;
922  return XML_TOK_INVALID;
923  }
924  while (ptr != end) {
925  switch (BYTE_TYPE(enc, ptr)) {
926  CHECK_NAME_CASES(enc, ptr, end, nextTokPtr)
927  case BT_CR: case BT_LF: case BT_S:
928  case BT_RPAR: case BT_GT: case BT_PERCNT: case BT_VERBAR:
929  *nextTokPtr = ptr;
930  return XML_TOK_POUND_NAME;
931  default:
932  *nextTokPtr = ptr;
933  return XML_TOK_INVALID;
934  }
935  }
936  return -XML_TOK_POUND_NAME;
937 }
938 
939 static int PTRCALL
940 PREFIX(scanLit)(int open, const ENCODING *enc,
941  const char *ptr, const char *end,
942  const char **nextTokPtr)
943 {
944  while (ptr != end) {
945  int t = BYTE_TYPE(enc, ptr);
946  switch (t) {
947  INVALID_CASES(ptr, nextTokPtr)
948  case BT_QUOT:
949  case BT_APOS:
950  ptr += MINBPC(enc);
951  if (t != open)
952  break;
953  if (ptr == end)
954  return -XML_TOK_LITERAL;
955  *nextTokPtr = ptr;
956  switch (BYTE_TYPE(enc, ptr)) {
957  case BT_S: case BT_CR: case BT_LF:
958  case BT_GT: case BT_PERCNT: case BT_LSQB:
959  return XML_TOK_LITERAL;
960  default:
961  return XML_TOK_INVALID;
962  }
963  default:
964  ptr += MINBPC(enc);
965  break;
966  }
967  }
968  return XML_TOK_PARTIAL;
969 }
970 
971 static int PTRCALL
972 PREFIX(prologTok)(const ENCODING *enc, const char *ptr, const char *end,
973  const char **nextTokPtr)
974 {
975  int tok;
976  if (ptr == end)
977  return XML_TOK_NONE;
978  if (MINBPC(enc) > 1) {
979  size_t n = end - ptr;
980  if (n & (MINBPC(enc) - 1)) {
981  n &= ~(MINBPC(enc) - 1);
982  if (n == 0)
983  return XML_TOK_PARTIAL;
984  end = ptr + n;
985  }
986  }
987  switch (BYTE_TYPE(enc, ptr)) {
988  case BT_QUOT:
989  return PREFIX(scanLit)(BT_QUOT, enc, ptr + MINBPC(enc), end, nextTokPtr);
990  case BT_APOS:
991  return PREFIX(scanLit)(BT_APOS, enc, ptr + MINBPC(enc), end, nextTokPtr);
992  case BT_LT:
993  {
994  ptr += MINBPC(enc);
995  if (ptr == end)
996  return XML_TOK_PARTIAL;
997  switch (BYTE_TYPE(enc, ptr)) {
998  case BT_EXCL:
999  return PREFIX(scanDecl)(enc, ptr + MINBPC(enc), end, nextTokPtr);
1000  case BT_QUEST:
1001  return PREFIX(scanPi)(enc, ptr + MINBPC(enc), end, nextTokPtr);
1002  case BT_NMSTRT:
1003  case BT_HEX:
1004  case BT_NONASCII:
1005  case BT_LEAD2:
1006  case BT_LEAD3:
1007  case BT_LEAD4:
1008  *nextTokPtr = ptr - MINBPC(enc);
1009  return XML_TOK_INSTANCE_START;
1010  }
1011  *nextTokPtr = ptr;
1012  return XML_TOK_INVALID;
1013  }
1014  case BT_CR:
1015  if (ptr + MINBPC(enc) == end) {
1016  *nextTokPtr = end;
1017  /* indicate that this might be part of a CR/LF pair */
1018  return -XML_TOK_PROLOG_S;
1019  }
1020  /* fall through */
1021  case BT_S: case BT_LF:
1022  for (;;) {
1023  ptr += MINBPC(enc);
1024  if (ptr == end)
1025  break;
1026  switch (BYTE_TYPE(enc, ptr)) {
1027  case BT_S: case BT_LF:
1028  break;
1029  case BT_CR:
1030  /* don't split CR/LF pair */
1031  if (ptr + MINBPC(enc) != end)
1032  break;
1033  /* fall through */
1034  default:
1035  *nextTokPtr = ptr;
1036  return XML_TOK_PROLOG_S;
1037  }
1038  }
1039  *nextTokPtr = ptr;
1040  return XML_TOK_PROLOG_S;
1041  case BT_PERCNT:
1042  return PREFIX(scanPercent)(enc, ptr + MINBPC(enc), end, nextTokPtr);
1043  case BT_COMMA:
1044  *nextTokPtr = ptr + MINBPC(enc);
1045  return XML_TOK_COMMA;
1046  case BT_LSQB:
1047  *nextTokPtr = ptr + MINBPC(enc);
1048  return XML_TOK_OPEN_BRACKET;
1049  case BT_RSQB:
1050  ptr += MINBPC(enc);
1051  if (ptr == end)
1052  return -XML_TOK_CLOSE_BRACKET;
1053  if (CHAR_MATCHES(enc, ptr, ASCII_RSQB)) {
1054  if (ptr + MINBPC(enc) == end)
1055  return XML_TOK_PARTIAL;
1056  if (CHAR_MATCHES(enc, ptr + MINBPC(enc), ASCII_GT)) {
1057  *nextTokPtr = ptr + 2*MINBPC(enc);
1058  return XML_TOK_COND_SECT_CLOSE;
1059  }
1060  }
1061  *nextTokPtr = ptr;
1062  return XML_TOK_CLOSE_BRACKET;
1063  case BT_LPAR:
1064  *nextTokPtr = ptr + MINBPC(enc);
1065  return XML_TOK_OPEN_PAREN;
1066  case BT_RPAR:
1067  ptr += MINBPC(enc);
1068  if (ptr == end)
1069  return -XML_TOK_CLOSE_PAREN;
1070  switch (BYTE_TYPE(enc, ptr)) {
1071  case BT_AST:
1072  *nextTokPtr = ptr + MINBPC(enc);
1074  case BT_QUEST:
1075  *nextTokPtr = ptr + MINBPC(enc);
1077  case BT_PLUS:
1078  *nextTokPtr = ptr + MINBPC(enc);
1079  return XML_TOK_CLOSE_PAREN_PLUS;
1080  case BT_CR: case BT_LF: case BT_S:
1081  case BT_GT: case BT_COMMA: case BT_VERBAR:
1082  case BT_RPAR:
1083  *nextTokPtr = ptr;
1084  return XML_TOK_CLOSE_PAREN;
1085  }
1086  *nextTokPtr = ptr;
1087  return XML_TOK_INVALID;
1088  case BT_VERBAR:
1089  *nextTokPtr = ptr + MINBPC(enc);
1090  return XML_TOK_OR;
1091  case BT_GT:
1092  *nextTokPtr = ptr + MINBPC(enc);
1093  return XML_TOK_DECL_CLOSE;
1094  case BT_NUM:
1095  return PREFIX(scanPoundName)(enc, ptr + MINBPC(enc), end, nextTokPtr);
1096 #define LEAD_CASE(n) \
1097  case BT_LEAD ## n: \
1098  if (end - ptr < n) \
1099  return XML_TOK_PARTIAL_CHAR; \
1100  if (IS_NMSTRT_CHAR(enc, ptr, n)) { \
1101  ptr += n; \
1102  tok = XML_TOK_NAME; \
1103  break; \
1104  } \
1105  if (IS_NAME_CHAR(enc, ptr, n)) { \
1106  ptr += n; \
1107  tok = XML_TOK_NMTOKEN; \
1108  break; \
1109  } \
1110  *nextTokPtr = ptr; \
1111  return XML_TOK_INVALID;
1112  LEAD_CASE(2) LEAD_CASE(3) LEAD_CASE(4)
1113 #undef LEAD_CASE
1114  case BT_NMSTRT:
1115  case BT_HEX:
1116  tok = XML_TOK_NAME;
1117  ptr += MINBPC(enc);
1118  break;
1119  case BT_DIGIT:
1120  case BT_NAME:
1121  case BT_MINUS:
1122 #ifdef XML_NS
1123  case BT_COLON:
1124 #endif
1125  tok = XML_TOK_NMTOKEN;
1126  ptr += MINBPC(enc);
1127  break;
1128  case BT_NONASCII:
1129  if (IS_NMSTRT_CHAR_MINBPC(enc, ptr)) {
1130  ptr += MINBPC(enc);
1131  tok = XML_TOK_NAME;
1132  break;
1133  }
1134  if (IS_NAME_CHAR_MINBPC(enc, ptr)) {
1135  ptr += MINBPC(enc);
1136  tok = XML_TOK_NMTOKEN;
1137  break;
1138  }
1139  /* fall through */
1140  default:
1141  *nextTokPtr = ptr;
1142  return XML_TOK_INVALID;
1143  }
1144  while (ptr != end) {
1145  switch (BYTE_TYPE(enc, ptr)) {
1146  CHECK_NAME_CASES(enc, ptr, end, nextTokPtr)
1147  case BT_GT: case BT_RPAR: case BT_COMMA:
1148  case BT_VERBAR: case BT_LSQB: case BT_PERCNT:
1149  case BT_S: case BT_CR: case BT_LF:
1150  *nextTokPtr = ptr;
1151  return tok;
1152 #ifdef XML_NS
1153  case BT_COLON:
1154  ptr += MINBPC(enc);
1155  switch (tok) {
1156  case XML_TOK_NAME:
1157  if (ptr == end)
1158  return XML_TOK_PARTIAL;
1159  tok = XML_TOK_PREFIXED_NAME;
1160  switch (BYTE_TYPE(enc, ptr)) {
1161  CHECK_NAME_CASES(enc, ptr, end, nextTokPtr)
1162  default:
1163  tok = XML_TOK_NMTOKEN;
1164  break;
1165  }
1166  break;
1167  case XML_TOK_PREFIXED_NAME:
1168  tok = XML_TOK_NMTOKEN;
1169  break;
1170  }
1171  break;
1172 #endif
1173  case BT_PLUS:
1174  if (tok == XML_TOK_NMTOKEN) {
1175  *nextTokPtr = ptr;
1176  return XML_TOK_INVALID;
1177  }
1178  *nextTokPtr = ptr + MINBPC(enc);
1179  return XML_TOK_NAME_PLUS;
1180  case BT_AST:
1181  if (tok == XML_TOK_NMTOKEN) {
1182  *nextTokPtr = ptr;
1183  return XML_TOK_INVALID;
1184  }
1185  *nextTokPtr = ptr + MINBPC(enc);
1186  return XML_TOK_NAME_ASTERISK;
1187  case BT_QUEST:
1188  if (tok == XML_TOK_NMTOKEN) {
1189  *nextTokPtr = ptr;
1190  return XML_TOK_INVALID;
1191  }
1192  *nextTokPtr = ptr + MINBPC(enc);
1193  return XML_TOK_NAME_QUESTION;
1194  default:
1195  *nextTokPtr = ptr;
1196  return XML_TOK_INVALID;
1197  }
1198  }
1199  return -tok;
1200 }
1201 
1202 static int PTRCALL
1203 PREFIX(attributeValueTok)(const ENCODING *enc, const char *ptr,
1204  const char *end, const char **nextTokPtr)
1205 {
1206  const char *start;
1207  if (ptr == end)
1208  return XML_TOK_NONE;
1209  start = ptr;
1210  while (ptr != end) {
1211  switch (BYTE_TYPE(enc, ptr)) {
1212 #define LEAD_CASE(n) \
1213  case BT_LEAD ## n: ptr += n; break;
1214  LEAD_CASE(2) LEAD_CASE(3) LEAD_CASE(4)
1215 #undef LEAD_CASE
1216  case BT_AMP:
1217  if (ptr == start)
1218  return PREFIX(scanRef)(enc, ptr + MINBPC(enc), end, nextTokPtr);
1219  *nextTokPtr = ptr;
1220  return XML_TOK_DATA_CHARS;
1221  case BT_LT:
1222  /* this is for inside entity references */
1223  *nextTokPtr = ptr;
1224  return XML_TOK_INVALID;
1225  case BT_LF:
1226  if (ptr == start) {
1227  *nextTokPtr = ptr + MINBPC(enc);
1228  return XML_TOK_DATA_NEWLINE;
1229  }
1230  *nextTokPtr = ptr;
1231  return XML_TOK_DATA_CHARS;
1232  case BT_CR:
1233  if (ptr == start) {
1234  ptr += MINBPC(enc);
1235  if (ptr == end)
1236  return XML_TOK_TRAILING_CR;
1237  if (BYTE_TYPE(enc, ptr) == BT_LF)
1238  ptr += MINBPC(enc);
1239  *nextTokPtr = ptr;
1240  return XML_TOK_DATA_NEWLINE;
1241  }
1242  *nextTokPtr = ptr;
1243  return XML_TOK_DATA_CHARS;
1244  case BT_S:
1245  if (ptr == start) {
1246  *nextTokPtr = ptr + MINBPC(enc);
1248  }
1249  *nextTokPtr = ptr;
1250  return XML_TOK_DATA_CHARS;
1251  default:
1252  ptr += MINBPC(enc);
1253  break;
1254  }
1255  }
1256  *nextTokPtr = ptr;
1257  return XML_TOK_DATA_CHARS;
1258 }
1259 
1260 static int PTRCALL
1261 PREFIX(entityValueTok)(const ENCODING *enc, const char *ptr,
1262  const char *end, const char **nextTokPtr)
1263 {
1264  const char *start;
1265  if (ptr == end)
1266  return XML_TOK_NONE;
1267  start = ptr;
1268  while (ptr != end) {
1269  switch (BYTE_TYPE(enc, ptr)) {
1270 #define LEAD_CASE(n) \
1271  case BT_LEAD ## n: ptr += n; break;
1272  LEAD_CASE(2) LEAD_CASE(3) LEAD_CASE(4)
1273 #undef LEAD_CASE
1274  case BT_AMP:
1275  if (ptr == start)
1276  return PREFIX(scanRef)(enc, ptr + MINBPC(enc), end, nextTokPtr);
1277  *nextTokPtr = ptr;
1278  return XML_TOK_DATA_CHARS;
1279  case BT_PERCNT:
1280  if (ptr == start) {
1281  int tok = PREFIX(scanPercent)(enc, ptr + MINBPC(enc),
1282  end, nextTokPtr);
1283  return (tok == XML_TOK_PERCENT) ? XML_TOK_INVALID : tok;
1284  }
1285  *nextTokPtr = ptr;
1286  return XML_TOK_DATA_CHARS;
1287  case BT_LF:
1288  if (ptr == start) {
1289  *nextTokPtr = ptr + MINBPC(enc);
1290  return XML_TOK_DATA_NEWLINE;
1291  }
1292  *nextTokPtr = ptr;
1293  return XML_TOK_DATA_CHARS;
1294  case BT_CR:
1295  if (ptr == start) {
1296  ptr += MINBPC(enc);
1297  if (ptr == end)
1298  return XML_TOK_TRAILING_CR;
1299  if (BYTE_TYPE(enc, ptr) == BT_LF)
1300  ptr += MINBPC(enc);
1301  *nextTokPtr = ptr;
1302  return XML_TOK_DATA_NEWLINE;
1303  }
1304  *nextTokPtr = ptr;
1305  return XML_TOK_DATA_CHARS;
1306  default:
1307  ptr += MINBPC(enc);
1308  break;
1309  }
1310  }
1311  *nextTokPtr = ptr;
1312  return XML_TOK_DATA_CHARS;
1313 }
1314 
1315 #ifdef XML_DTD
1316 
1317 static int PTRCALL
1318 PREFIX(ignoreSectionTok)(const ENCODING *enc, const char *ptr,
1319  const char *end, const char **nextTokPtr)
1320 {
1321  int level = 0;
1322  if (MINBPC(enc) > 1) {
1323  size_t n = end - ptr;
1324  if (n & (MINBPC(enc) - 1)) {
1325  n &= ~(MINBPC(enc) - 1);
1326  end = ptr + n;
1327  }
1328  }
1329  while (ptr != end) {
1330  switch (BYTE_TYPE(enc, ptr)) {
1331  INVALID_CASES(ptr, nextTokPtr)
1332  case BT_LT:
1333  if ((ptr += MINBPC(enc)) == end)
1334  return XML_TOK_PARTIAL;
1335  if (CHAR_MATCHES(enc, ptr, ASCII_EXCL)) {
1336  if ((ptr += MINBPC(enc)) == end)
1337  return XML_TOK_PARTIAL;
1338  if (CHAR_MATCHES(enc, ptr, ASCII_LSQB)) {
1339  ++level;
1340  ptr += MINBPC(enc);
1341  }
1342  }
1343  break;
1344  case BT_RSQB:
1345  if ((ptr += MINBPC(enc)) == end)
1346  return XML_TOK_PARTIAL;
1347  if (CHAR_MATCHES(enc, ptr, ASCII_RSQB)) {
1348  if ((ptr += MINBPC(enc)) == end)
1349  return XML_TOK_PARTIAL;
1350  if (CHAR_MATCHES(enc, ptr, ASCII_GT)) {
1351  ptr += MINBPC(enc);
1352  if (level == 0) {
1353  *nextTokPtr = ptr;
1354  return XML_TOK_IGNORE_SECT;
1355  }
1356  --level;
1357  }
1358  }
1359  break;
1360  default:
1361  ptr += MINBPC(enc);
1362  break;
1363  }
1364  }
1365  return XML_TOK_PARTIAL;
1366 }
1367 
1368 #endif /* XML_DTD */
1369 
1370 static int PTRCALL
1371 PREFIX(isPublicId)(const ENCODING *enc, const char *ptr, const char *end,
1372  const char **badPtr)
1373 {
1374  ptr += MINBPC(enc);
1375  end -= MINBPC(enc);
1376  for (; ptr != end; ptr += MINBPC(enc)) {
1377  switch (BYTE_TYPE(enc, ptr)) {
1378  case BT_DIGIT:
1379  case BT_HEX:
1380  case BT_MINUS:
1381  case BT_APOS:
1382  case BT_LPAR:
1383  case BT_RPAR:
1384  case BT_PLUS:
1385  case BT_COMMA:
1386  case BT_SOL:
1387  case BT_EQUALS:
1388  case BT_QUEST:
1389  case BT_CR:
1390  case BT_LF:
1391  case BT_SEMI:
1392  case BT_EXCL:
1393  case BT_AST:
1394  case BT_PERCNT:
1395  case BT_NUM:
1396 #ifdef XML_NS
1397  case BT_COLON:
1398 #endif
1399  break;
1400  case BT_S:
1401  if (CHAR_MATCHES(enc, ptr, ASCII_TAB)) {
1402  *badPtr = ptr;
1403  return 0;
1404  }
1405  break;
1406  case BT_NAME:
1407  case BT_NMSTRT:
1408  if (!(BYTE_TO_ASCII(enc, ptr) & ~0x7f))
1409  break;
1410  default:
1411  switch (BYTE_TO_ASCII(enc, ptr)) {
1412  case 0x24: /* $ */
1413  case 0x40: /* @ */
1414  break;
1415  default:
1416  *badPtr = ptr;
1417  return 0;
1418  }
1419  break;
1420  }
1421  }
1422  return 1;
1423 }
1424 
1425 /* This must only be called for a well-formed start-tag or empty
1426  element tag. Returns the number of attributes. Pointers to the
1427  first attsMax attributes are stored in atts.
1428 */
1429 
1430 static int PTRCALL
1431 PREFIX(getAtts)(const ENCODING *enc, const char *ptr,
1432  int attsMax, ATTRIBUTE *atts)
1433 {
1434  enum { other, inName, inValue } state = inName;
1435  int nAtts = 0;
1436  int open = 0; /* defined when state == inValue;
1437  initialization just to shut up compilers */
1438 
1439  for (ptr += MINBPC(enc);; ptr += MINBPC(enc)) {
1440  switch (BYTE_TYPE(enc, ptr)) {
1441 #define START_NAME \
1442  if (state == other) { \
1443  if (nAtts < attsMax) { \
1444  atts[nAtts].name = ptr; \
1445  atts[nAtts].normalized = 1; \
1446  } \
1447  state = inName; \
1448  }
1449 #define LEAD_CASE(n) \
1450  case BT_LEAD ## n: START_NAME ptr += (n - MINBPC(enc)); break;
1451  LEAD_CASE(2) LEAD_CASE(3) LEAD_CASE(4)
1452 #undef LEAD_CASE
1453  case BT_NONASCII:
1454  case BT_NMSTRT:
1455  case BT_HEX:
1456  START_NAME
1457  break;
1458 #undef START_NAME
1459  case BT_QUOT:
1460  if (state != inValue) {
1461  if (nAtts < attsMax)
1462  atts[nAtts].valuePtr = ptr + MINBPC(enc);
1463  state = inValue;
1464  open = BT_QUOT;
1465  }
1466  else if (open == BT_QUOT) {
1467  state = other;
1468  if (nAtts < attsMax)
1469  atts[nAtts].valueEnd = ptr;
1470  nAtts++;
1471  }
1472  break;
1473  case BT_APOS:
1474  if (state != inValue) {
1475  if (nAtts < attsMax)
1476  atts[nAtts].valuePtr = ptr + MINBPC(enc);
1477  state = inValue;
1478  open = BT_APOS;
1479  }
1480  else if (open == BT_APOS) {
1481  state = other;
1482  if (nAtts < attsMax)
1483  atts[nAtts].valueEnd = ptr;
1484  nAtts++;
1485  }
1486  break;
1487  case BT_AMP:
1488  if (nAtts < attsMax)
1489  atts[nAtts].normalized = 0;
1490  break;
1491  case BT_S:
1492  if (state == inName)
1493  state = other;
1494  else if (state == inValue
1495  && nAtts < attsMax
1496  && atts[nAtts].normalized
1497  && (ptr == atts[nAtts].valuePtr
1498  || BYTE_TO_ASCII(enc, ptr) != ASCII_SPACE
1499  || BYTE_TO_ASCII(enc, ptr + MINBPC(enc)) == ASCII_SPACE
1500  || BYTE_TYPE(enc, ptr + MINBPC(enc)) == open))
1501  atts[nAtts].normalized = 0;
1502  break;
1503  case BT_CR: case BT_LF:
1504  /* This case ensures that the first attribute name is counted
1505  Apart from that we could just change state on the quote. */
1506  if (state == inName)
1507  state = other;
1508  else if (state == inValue && nAtts < attsMax)
1509  atts[nAtts].normalized = 0;
1510  break;
1511  case BT_GT:
1512  case BT_SOL:
1513  if (state != inValue)
1514  return nAtts;
1515  break;
1516  default:
1517  break;
1518  }
1519  }
1520  /* not reached */
1521 }
1522 
1523 static int PTRFASTCALL
1524 PREFIX(charRefNumber)(const ENCODING *enc, const char *ptr)
1525 {
1526  int result = 0;
1527  /* skip &# */
1528  ptr += 2*MINBPC(enc);
1529  if (CHAR_MATCHES(enc, ptr, ASCII_x)) {
1530  for (ptr += MINBPC(enc);
1531  !CHAR_MATCHES(enc, ptr, ASCII_SEMI);
1532  ptr += MINBPC(enc)) {
1533  int c = BYTE_TO_ASCII(enc, ptr);
1534  switch (c) {
1535  case ASCII_0: case ASCII_1: case ASCII_2: case ASCII_3: case ASCII_4:
1536  case ASCII_5: case ASCII_6: case ASCII_7: case ASCII_8: case ASCII_9:
1537  result <<= 4;
1538  result |= (c - ASCII_0);
1539  break;
1540  case ASCII_A: case ASCII_B: case ASCII_C:
1541  case ASCII_D: case ASCII_E: case ASCII_F:
1542  result <<= 4;
1543  result += 10 + (c - ASCII_A);
1544  break;
1545  case ASCII_a: case ASCII_b: case ASCII_c:
1546  case ASCII_d: case ASCII_e: case ASCII_f:
1547  result <<= 4;
1548  result += 10 + (c - ASCII_a);
1549  break;
1550  }
1551  if (result >= 0x110000)
1552  return -1;
1553  }
1554  }
1555  else {
1556  for (; !CHAR_MATCHES(enc, ptr, ASCII_SEMI); ptr += MINBPC(enc)) {
1557  int c = BYTE_TO_ASCII(enc, ptr);
1558  result *= 10;
1559  result += (c - ASCII_0);
1560  if (result >= 0x110000)
1561  return -1;
1562  }
1563  }
1564  return checkCharRefNumber(result);
1565 }
1566 
1567 static int PTRCALL
1568 PREFIX(predefinedEntityName)(const ENCODING *enc, const char *ptr,
1569  const char *end)
1570 {
1571  switch ((end - ptr)/MINBPC(enc)) {
1572  case 2:
1573  if (CHAR_MATCHES(enc, ptr + MINBPC(enc), ASCII_t)) {
1574  switch (BYTE_TO_ASCII(enc, ptr)) {
1575  case ASCII_l:
1576  return ASCII_LT;
1577  case ASCII_g:
1578  return ASCII_GT;
1579  }
1580  }
1581  break;
1582  case 3:
1583  if (CHAR_MATCHES(enc, ptr, ASCII_a)) {
1584  ptr += MINBPC(enc);
1585  if (CHAR_MATCHES(enc, ptr, ASCII_m)) {
1586  ptr += MINBPC(enc);
1587  if (CHAR_MATCHES(enc, ptr, ASCII_p))
1588  return ASCII_AMP;
1589  }
1590  }
1591  break;
1592  case 4:
1593  switch (BYTE_TO_ASCII(enc, ptr)) {
1594  case ASCII_q:
1595  ptr += MINBPC(enc);
1596  if (CHAR_MATCHES(enc, ptr, ASCII_u)) {
1597  ptr += MINBPC(enc);
1598  if (CHAR_MATCHES(enc, ptr, ASCII_o)) {
1599  ptr += MINBPC(enc);
1600  if (CHAR_MATCHES(enc, ptr, ASCII_t))
1601  return ASCII_QUOT;
1602  }
1603  }
1604  break;
1605  case ASCII_a:
1606  ptr += MINBPC(enc);
1607  if (CHAR_MATCHES(enc, ptr, ASCII_p)) {
1608  ptr += MINBPC(enc);
1609  if (CHAR_MATCHES(enc, ptr, ASCII_o)) {
1610  ptr += MINBPC(enc);
1611  if (CHAR_MATCHES(enc, ptr, ASCII_s))
1612  return ASCII_APOS;
1613  }
1614  }
1615  break;
1616  }
1617  }
1618  return 0;
1619 }
1620 
1621 static int PTRCALL
1622 PREFIX(sameName)(const ENCODING *enc, const char *ptr1, const char *ptr2)
1623 {
1624  for (;;) {
1625  switch (BYTE_TYPE(enc, ptr1)) {
1626 #define LEAD_CASE(n) \
1627  case BT_LEAD ## n: \
1628  if (*ptr1++ != *ptr2++) \
1629  return 0;
1630  LEAD_CASE(4) LEAD_CASE(3) LEAD_CASE(2)
1631 #undef LEAD_CASE
1632  /* fall through */
1633  if (*ptr1++ != *ptr2++)
1634  return 0;
1635  break;
1636  case BT_NONASCII:
1637  case BT_NMSTRT:
1638 #ifdef XML_NS
1639  case BT_COLON:
1640 #endif
1641  case BT_HEX:
1642  case BT_DIGIT:
1643  case BT_NAME:
1644  case BT_MINUS:
1645  if (*ptr2++ != *ptr1++)
1646  return 0;
1647  if (MINBPC(enc) > 1) {
1648  if (*ptr2++ != *ptr1++)
1649  return 0;
1650  if (MINBPC(enc) > 2) {
1651  if (*ptr2++ != *ptr1++)
1652  return 0;
1653  if (MINBPC(enc) > 3) {
1654  if (*ptr2++ != *ptr1++)
1655  return 0;
1656  }
1657  }
1658  }
1659  break;
1660  default:
1661  if (MINBPC(enc) == 1 && *ptr1 == *ptr2)
1662  return 1;
1663  switch (BYTE_TYPE(enc, ptr2)) {
1664  case BT_LEAD2:
1665  case BT_LEAD3:
1666  case BT_LEAD4:
1667  case BT_NONASCII:
1668  case BT_NMSTRT:
1669 #ifdef XML_NS
1670  case BT_COLON:
1671 #endif
1672  case BT_HEX:
1673  case BT_DIGIT:
1674  case BT_NAME:
1675  case BT_MINUS:
1676  return 0;
1677  default:
1678  return 1;
1679  }
1680  }
1681  }
1682  /* not reached */
1683 }
1684 
1685 static int PTRCALL
1686 PREFIX(nameMatchesAscii)(const ENCODING *enc, const char *ptr1,
1687  const char *end1, const char *ptr2)
1688 {
1689  for (; *ptr2; ptr1 += MINBPC(enc), ptr2++) {
1690  if (ptr1 == end1)
1691  return 0;
1692  if (!CHAR_MATCHES(enc, ptr1, *ptr2))
1693  return 0;
1694  }
1695  return ptr1 == end1;
1696 }
1697 
1698 static int PTRFASTCALL
1699 PREFIX(nameLength)(const ENCODING *enc, const char *ptr)
1700 {
1701  const char *start = ptr;
1702  for (;;) {
1703  switch (BYTE_TYPE(enc, ptr)) {
1704 #define LEAD_CASE(n) \
1705  case BT_LEAD ## n: ptr += n; break;
1706  LEAD_CASE(2) LEAD_CASE(3) LEAD_CASE(4)
1707 #undef LEAD_CASE
1708  case BT_NONASCII:
1709  case BT_NMSTRT:
1710 #ifdef XML_NS
1711  case BT_COLON:
1712 #endif
1713  case BT_HEX:
1714  case BT_DIGIT:
1715  case BT_NAME:
1716  case BT_MINUS:
1717  ptr += MINBPC(enc);
1718  break;
1719  default:
1720  return (int)(ptr - start);
1721  }
1722  }
1723 }
1724 
1725 static const char * PTRFASTCALL
1726 PREFIX(skipS)(const ENCODING *enc, const char *ptr)
1727 {
1728  for (;;) {
1729  switch (BYTE_TYPE(enc, ptr)) {
1730  case BT_LF:
1731  case BT_CR:
1732  case BT_S:
1733  ptr += MINBPC(enc);
1734  break;
1735  default:
1736  return ptr;
1737  }
1738  }
1739 }
1740 
1741 static void PTRCALL
1742 PREFIX(updatePosition)(const ENCODING *enc,
1743  const char *ptr,
1744  const char *end,
1745  POSITION *pos)
1746 {
1747  while (ptr != end) {
1748  switch (BYTE_TYPE(enc, ptr)) {
1749 #define LEAD_CASE(n) \
1750  case BT_LEAD ## n: \
1751  ptr += n; \
1752  break;
1753  LEAD_CASE(2) LEAD_CASE(3) LEAD_CASE(4)
1754 #undef LEAD_CASE
1755  case BT_LF:
1756  pos->columnNumber = (XML_Size)-1;
1757  pos->lineNumber++;
1758  ptr += MINBPC(enc);
1759  break;
1760  case BT_CR:
1761  pos->lineNumber++;
1762  ptr += MINBPC(enc);
1763  if (ptr != end && BYTE_TYPE(enc, ptr) == BT_LF)
1764  ptr += MINBPC(enc);
1765  pos->columnNumber = (XML_Size)-1;
1766  break;
1767  default:
1768  ptr += MINBPC(enc);
1769  break;
1770  }
1771  pos->columnNumber++;
1772  }
1773 }
1774 
1775 #undef DO_LEAD_CASE
1776 #undef MULTIBYTE_CASES
1777 #undef INVALID_CASES
1778 #undef CHECK_NAME_CASE
1779 #undef CHECK_NAME_CASES
1780 #undef CHECK_NMSTRT_CASE
1781 #undef CHECK_NMSTRT_CASES
1782 
1783 #endif /* XML_TOK_IMPL_C */