5 #if defined(__clang__) || defined(__GNUC__)
6 #pragma GCC diagnostic ignored "-Wunused-parameter"
7 #pragma GCC diagnostic ignored "-Wmissing-field-initializers"
12 #ifdef COMPILED_FROM_DSP
13 #include "winconfig.h"
14 #elif defined(MACOS_CLASSIC)
15 #include "macconfig.h"
16 #elif defined(__amigaos__)
17 #include "amigaconfig.h"
18 #elif defined(__WATCOMC__)
19 #include "watcomconfig.h"
21 #ifdef HAVE_EXPAT_CONFIG_H
22 #include <expat_config.h>
26 #include "expat_external.h"
32 #define IGNORE_SECTION_TOK_VTABLE , PREFIX(ignoreSectionTok)
34 #define IGNORE_SECTION_TOK_VTABLE
38 { PREFIX(prologTok), PREFIX(contentTok), \
39 PREFIX(cdataSectionTok) IGNORE_SECTION_TOK_VTABLE }, \
40 { PREFIX(attributeValueTok), PREFIX(entityValueTok) }, \
42 PREFIX(nameMatchesAscii), \
46 PREFIX(charRefNumber), \
47 PREFIX(predefinedEntityName), \
48 PREFIX(updatePosition), \
51 #define VTABLE VTABLE1, PREFIX(toUtf8), PREFIX(toUtf16)
53 #define UCS2_GET_NAMING(pages, hi, lo) \
54 (namingBitmap[(pages[hi] << 3) + ((lo) >> 5)] & (1 << ((lo) & 0x1F)))
60 #define UTF8_GET_NAMING2(pages, byte) \
61 (namingBitmap[((pages)[(((byte)[0]) >> 2) & 7] << 3) \
62 + ((((byte)[0]) & 3) << 1) \
63 + ((((byte)[1]) >> 5) & 1)] \
64 & (1 << (((byte)[1]) & 0x1F)))
71 #define UTF8_GET_NAMING3(pages, byte) \
72 (namingBitmap[((pages)[((((byte)[0]) & 0xF) << 4) \
73 + ((((byte)[1]) >> 2) & 0xF)] \
75 + ((((byte)[1]) & 3) << 1) \
76 + ((((byte)[2]) >> 5) & 1)] \
77 & (1 << (((byte)[2]) & 0x1F)))
79 #define UTF8_GET_NAMING(pages, p, n) \
81 ? UTF8_GET_NAMING2(pages, (const unsigned char *)(p)) \
83 ? UTF8_GET_NAMING3(pages, (const unsigned char *)(p)) \
96 #define UTF8_INVALID2(p) \
97 ((*p) < 0xC2 || ((p)[1] & 0x80) == 0 || ((p)[1] & 0xC0) == 0xC0)
99 #define UTF8_INVALID3(p) \
100 (((p)[2] & 0x80) == 0 \
102 ((*p) == 0xEF && (p)[1] == 0xBF \
106 ((p)[2] & 0xC0) == 0xC0) \
110 (p)[1] < 0xA0 || ((p)[1] & 0xC0) == 0xC0 \
112 ((p)[1] & 0x80) == 0 \
114 ((*p) == 0xED ? (p)[1] > 0x9F : ((p)[1] & 0xC0) == 0xC0)))
116 #define UTF8_INVALID4(p) \
117 (((p)[3] & 0x80) == 0 || ((p)[3] & 0xC0) == 0xC0 \
119 ((p)[2] & 0x80) == 0 || ((p)[2] & 0xC0) == 0xC0 \
123 (p)[1] < 0x90 || ((p)[1] & 0xC0) == 0xC0 \
125 ((p)[1] & 0x80) == 0 \
127 ((*p) == 0xF4 ? (p)[1] > 0x8F : ((p)[1] & 0xC0) == 0xC0)))
129 static int PTRFASTCALL
135 static int PTRFASTCALL
141 static int PTRFASTCALL
147 #define utf8_isName4 isNever
149 static int PTRFASTCALL
155 static int PTRFASTCALL
161 #define utf8_isNmstrt4 isNever
163 static int PTRFASTCALL
169 static int PTRFASTCALL
175 static int PTRFASTCALL
185 int (PTRFASTCALL *byteType)(
const ENCODING *,
const char *);
186 int (PTRFASTCALL *isNameMin)(
const ENCODING *,
const char *);
187 int (PTRFASTCALL *isNmstrtMin)(
const ENCODING *,
const char *);
188 int (PTRFASTCALL *byteToAscii)(
const ENCODING *,
const char *);
189 int (PTRCALL *charMatches)(
const ENCODING *,
const char *,
int);
191 int (PTRFASTCALL *isName2)(
const ENCODING *,
const char *);
192 int (PTRFASTCALL *isName3)(
const ENCODING *,
const char *);
193 int (PTRFASTCALL *isName4)(
const ENCODING *,
const char *);
194 int (PTRFASTCALL *isNmstrt2)(
const ENCODING *,
const char *);
195 int (PTRFASTCALL *isNmstrt3)(
const ENCODING *,
const char *);
196 int (PTRFASTCALL *isNmstrt4)(
const ENCODING *,
const char *);
197 int (PTRFASTCALL *isInvalid2)(
const ENCODING *,
const char *);
198 int (PTRFASTCALL *isInvalid3)(
const ENCODING *,
const char *);
199 int (PTRFASTCALL *isInvalid4)(
const ENCODING *,
const char *);
202 #define AS_NORMAL_ENCODING(enc) ((const struct normal_encoding *) (enc))
206 #define STANDARD_VTABLE(E) \
215 #define STANDARD_VTABLE(E)
219 #define NORMAL_VTABLE(E) \
232 #include "xmltok_impl.h"
236 #define sb_isNameMin isNever
237 #define sb_isNmstrtMin isNever
241 #define MINBPC(enc) ((enc)->minBytesPerChar)
244 #define MINBPC(enc) 1
247 #define SB_BYTE_TYPE(enc, p) \
248 (((struct normal_encoding *)(enc))->type[(unsigned char)*(p)])
251 static int PTRFASTCALL
252 sb_byteType(
const ENCODING *enc,
const char *p)
256 #define BYTE_TYPE(enc, p) \
257 (AS_NORMAL_ENCODING(enc)->byteType(enc, p))
259 #define BYTE_TYPE(enc, p) SB_BYTE_TYPE(enc, p)
263 #define BYTE_TO_ASCII(enc, p) \
264 (AS_NORMAL_ENCODING(enc)->byteToAscii(enc, p))
265 static int PTRFASTCALL
266 sb_byteToAscii(
const ENCODING *enc,
const char *p)
271 #define BYTE_TO_ASCII(enc, p) (*(p))
274 #define IS_NAME_CHAR(enc, p, n) \
275 (AS_NORMAL_ENCODING(enc)->isName ## n(enc, p))
276 #define IS_NMSTRT_CHAR(enc, p, n) \
277 (AS_NORMAL_ENCODING(enc)->isNmstrt ## n(enc, p))
278 #define IS_INVALID_CHAR(enc, p, n) \
279 (AS_NORMAL_ENCODING(enc)->isInvalid ## n(enc, p))
282 #define IS_NAME_CHAR_MINBPC(enc, p) \
283 (AS_NORMAL_ENCODING(enc)->isNameMin(enc, p))
284 #define IS_NMSTRT_CHAR_MINBPC(enc, p) \
285 (AS_NORMAL_ENCODING(enc)->isNmstrtMin(enc, p))
287 #define IS_NAME_CHAR_MINBPC(enc, p) (0)
288 #define IS_NMSTRT_CHAR_MINBPC(enc, p) (0)
292 #define CHAR_MATCHES(enc, p, c) \
293 (AS_NORMAL_ENCODING(enc)->charMatches(enc, p, c))
295 sb_charMatches(
const ENCODING *enc,
const char *p,
int c)
301 #define CHAR_MATCHES(enc, p, c) (*(p) == c)
304 #define PREFIX(ident) normal_ ## ident
305 #define XML_TOK_IMPL_C
307 #undef XML_TOK_IMPL_C
314 #undef IS_NAME_CHAR_MINBPC
315 #undef IS_NMSTRT_CHAR
316 #undef IS_NMSTRT_CHAR_MINBPC
317 #undef IS_INVALID_CHAR
328 const char **fromP,
const char *fromLim,
329 char **toP,
const char *toLim)
333 if (fromLim - *fromP > toLim - *toP) {
335 for (fromLim = *fromP + (toLim - *toP); fromLim > *fromP; fromLim--)
336 if (((
unsigned char)fromLim[-1] & 0xc0) != 0x80)
339 for (to = *toP, from = *fromP; from != fromLim; from++, to++)
347 const char **fromP,
const char *fromLim,
348 unsigned short **toP,
const unsigned short *toLim)
350 unsigned short *to = *toP;
351 const char *from = *fromP;
352 while (from != fromLim && to != toLim) {
355 *to++ = (
unsigned short)(((from[0] & 0x1f) << 6) | (from[1] & 0x3f));
359 *to++ = (
unsigned short)(((from[0] & 0xf) << 12)
360 | ((from[1] & 0x3f) << 6) | (from[2] & 0x3f));
368 n = ((from[0] & 0x7) << 18) | ((from[1] & 0x3f) << 12)
369 | ((from[2] & 0x3f) << 6) | (from[3] & 0x3f);
371 to[0] = (
unsigned short)((n >> 10) | 0xD800);
372 to[1] = (
unsigned short)((n & 0x3FF) | 0xDC00);
391 #include "asciitab.h"
401 #define BT_COLON BT_NMSTRT
402 #include "asciitab.h"
414 #include "iasciitab.h"
425 #define BT_COLON BT_NMSTRT
426 #include "iasciitab.h"
435 const char **fromP,
const char *fromLim,
436 char **toP,
const char *toLim)
440 if (*fromP == fromLim)
442 c = (
unsigned char)**fromP;
444 if (toLim - *toP < 2)
447 *(*toP)++ = (char)((c & 0x3f) | 0x80);
453 *(*toP)++ = *(*fromP)++;
460 const char **fromP,
const char *fromLim,
461 unsigned short **toP,
const unsigned short *toLim)
463 while (*fromP != fromLim && *toP != toLim)
464 *(*toP)++ = (
unsigned char)*(*fromP)++;
472 #include "asciitab.h"
473 #include "latin1tab.h"
483 #define BT_COLON BT_NMSTRT
484 #include "asciitab.h"
486 #include "latin1tab.h"
493 const char **fromP,
const char *fromLim,
494 char **toP,
const char *toLim)
496 while (*fromP != fromLim && *toP != toLim)
497 *(*toP)++ = *(*fromP)++;
505 #include "asciitab.h"
516 #define BT_COLON BT_NMSTRT
517 #include "asciitab.h"
524 static int PTRFASTCALL
527 switch ((
unsigned char)hi) {
528 case 0xD8:
case 0xD9:
case 0xDA:
case 0xDB:
530 case 0xDC:
case 0xDD:
case 0xDE:
case 0xDF:
533 switch ((
unsigned char)lo) {
543 #define DEFINE_UTF16_TO_UTF8(E) \
544 static void PTRCALL \
545 E ## toUtf8(const ENCODING *enc, \
546 const char **fromP, const char *fromLim, \
547 char **toP, const char *toLim) \
550 for (from = *fromP; from != fromLim; from += 2) { \
553 unsigned char lo = GET_LO(from); \
554 unsigned char hi = GET_HI(from); \
558 if (*toP == toLim) { \
566 case 0x1: case 0x2: case 0x3: \
567 case 0x4: case 0x5: case 0x6: case 0x7: \
568 if (toLim - *toP < 2) { \
572 *(*toP)++ = ((lo >> 6) | (hi << 2) | UTF8_cval2); \
573 *(*toP)++ = ((lo & 0x3f) | 0x80); \
576 if (toLim - *toP < 3) { \
581 *(*toP)++ = ((hi >> 4) | UTF8_cval3); \
582 *(*toP)++ = (((hi & 0xf) << 2) | (lo >> 6) | 0x80); \
583 *(*toP)++ = ((lo & 0x3f) | 0x80); \
585 case 0xD8: case 0xD9: case 0xDA: case 0xDB: \
586 if (toLim - *toP < 4) { \
590 plane = (((hi & 0x3) << 2) | ((lo >> 6) & 0x3)) + 1; \
591 *(*toP)++ = ((plane >> 2) | UTF8_cval4); \
592 *(*toP)++ = (((lo >> 2) & 0xF) | ((plane & 0x3) << 4) | 0x80); \
594 lo2 = GET_LO(from); \
595 *(*toP)++ = (((lo & 0x3) << 4) \
596 | ((GET_HI(from) & 0x3) << 2) \
599 *(*toP)++ = ((lo2 & 0x3f) | 0x80); \
606 #define DEFINE_UTF16_TO_UTF16(E) \
607 static void PTRCALL \
608 E ## toUtf16(const ENCODING *enc, \
609 const char **fromP, const char *fromLim, \
610 unsigned short **toP, const unsigned short *toLim) \
613 if (fromLim - *fromP > ((toLim - *toP) << 1) \
614 && (GET_HI(fromLim - 2) & 0xF8) == 0xD8) \
616 for (; *fromP != fromLim && *toP != toLim; *fromP += 2) \
617 *(*toP)++ = (GET_HI(*fromP) << 8) | GET_LO(*fromP); \
620 #define SET2(ptr, ch) \
621 (((ptr)[0] = ((ch) & 0xff)), ((ptr)[1] = ((ch) >> 8)))
622 #define GET_LO(ptr) ((unsigned char)(ptr)[0])
623 #define GET_HI(ptr) ((unsigned char)(ptr)[1])
632 #define SET2(ptr, ch) \
633 (((ptr)[0] = ((ch) >> 8)), ((ptr)[1] = ((ch) & 0xFF)))
634 #define GET_LO(ptr) ((unsigned char)(ptr)[1])
635 #define GET_HI(ptr) ((unsigned char)(ptr)[0])
644 #define LITTLE2_BYTE_TYPE(enc, p) \
646 ? ((struct normal_encoding *)(enc))->type[(unsigned char)*(p)] \
647 : unicode_byte_type((p)[1], (p)[0]))
648 #define LITTLE2_BYTE_TO_ASCII(enc, p) ((p)[1] == 0 ? (p)[0] : -1)
649 #define LITTLE2_CHAR_MATCHES(enc, p, c) ((p)[1] == 0 && (p)[0] == c)
650 #define LITTLE2_IS_NAME_CHAR_MINBPC(enc, p) \
651 UCS2_GET_NAMING(namePages, (unsigned char)p[1], (unsigned char)p[0])
652 #define LITTLE2_IS_NMSTRT_CHAR_MINBPC(enc, p) \
653 UCS2_GET_NAMING(nmstrtPages, (unsigned char)p[1], (unsigned char)p[0])
657 static int PTRFASTCALL
658 little2_byteType(
const ENCODING *
enc,
const char *p)
663 static int PTRFASTCALL
664 little2_byteToAscii(
const ENCODING *
enc,
const char *p)
670 little2_charMatches(
const ENCODING *
enc,
const char *p,
int c)
675 static int PTRFASTCALL
676 little2_isNameMin(
const ENCODING *
enc,
const char *p)
681 static int PTRFASTCALL
682 little2_isNmstrtMin(
const ENCODING *
enc,
const char *p)
688 #define VTABLE VTABLE1, little2_toUtf8, little2_toUtf16
693 #define PREFIX(ident) little2_ ## ident
694 #define MINBPC(enc) 2
696 #define BYTE_TYPE(enc, p) LITTLE2_BYTE_TYPE(enc, p)
697 #define BYTE_TO_ASCII(enc, p) LITTLE2_BYTE_TO_ASCII(enc, p)
698 #define CHAR_MATCHES(enc, p, c) LITTLE2_CHAR_MATCHES(enc, p, c)
699 #define IS_NAME_CHAR(enc, p, n) 0
700 #define IS_NAME_CHAR_MINBPC(enc, p) LITTLE2_IS_NAME_CHAR_MINBPC(enc, p)
701 #define IS_NMSTRT_CHAR(enc, p, n) (0)
702 #define IS_NMSTRT_CHAR_MINBPC(enc, p) LITTLE2_IS_NMSTRT_CHAR_MINBPC(enc, p)
704 #define XML_TOK_IMPL_C
706 #undef XML_TOK_IMPL_C
713 #undef IS_NAME_CHAR_MINBPC
714 #undef IS_NMSTRT_CHAR
715 #undef IS_NMSTRT_CHAR_MINBPC
716 #undef IS_INVALID_CHAR
724 #if BYTEORDER == 1234
731 #include "asciitab.h"
732 #include "latin1tab.h"
741 #if BYTEORDER == 1234
748 #define BT_COLON BT_NMSTRT
749 #include "asciitab.h"
751 #include "latin1tab.h"
756 #if BYTEORDER != 4321
763 #include "iasciitab.h"
764 #include "latin1tab.h"
774 #define BT_COLON BT_NMSTRT
775 #include "iasciitab.h"
777 #include "latin1tab.h"
785 #define BIG2_BYTE_TYPE(enc, p) \
787 ? ((struct normal_encoding *)(enc))->type[(unsigned char)(p)[1]] \
788 : unicode_byte_type((p)[0], (p)[1]))
789 #define BIG2_BYTE_TO_ASCII(enc, p) ((p)[0] == 0 ? (p)[1] : -1)
790 #define BIG2_CHAR_MATCHES(enc, p, c) ((p)[0] == 0 && (p)[1] == c)
791 #define BIG2_IS_NAME_CHAR_MINBPC(enc, p) \
792 UCS2_GET_NAMING(namePages, (unsigned char)p[0], (unsigned char)p[1])
793 #define BIG2_IS_NMSTRT_CHAR_MINBPC(enc, p) \
794 UCS2_GET_NAMING(nmstrtPages, (unsigned char)p[0], (unsigned char)p[1])
798 static int PTRFASTCALL
799 big2_byteType(
const ENCODING *
enc,
const char *p)
804 static int PTRFASTCALL
805 big2_byteToAscii(
const ENCODING *
enc,
const char *p)
811 big2_charMatches(
const ENCODING *
enc,
const char *p,
int c)
816 static int PTRFASTCALL
817 big2_isNameMin(
const ENCODING *
enc,
const char *p)
822 static int PTRFASTCALL
823 big2_isNmstrtMin(
const ENCODING *
enc,
const char *p)
829 #define VTABLE VTABLE1, big2_toUtf8, big2_toUtf16
834 #define PREFIX(ident) big2_ ## ident
835 #define MINBPC(enc) 2
837 #define BYTE_TYPE(enc, p) BIG2_BYTE_TYPE(enc, p)
838 #define BYTE_TO_ASCII(enc, p) BIG2_BYTE_TO_ASCII(enc, p)
839 #define CHAR_MATCHES(enc, p, c) BIG2_CHAR_MATCHES(enc, p, c)
840 #define IS_NAME_CHAR(enc, p, n) 0
841 #define IS_NAME_CHAR_MINBPC(enc, p) BIG2_IS_NAME_CHAR_MINBPC(enc, p)
842 #define IS_NMSTRT_CHAR(enc, p, n) (0)
843 #define IS_NMSTRT_CHAR_MINBPC(enc, p) BIG2_IS_NMSTRT_CHAR_MINBPC(enc, p)
845 #define XML_TOK_IMPL_C
847 #undef XML_TOK_IMPL_C
854 #undef IS_NAME_CHAR_MINBPC
855 #undef IS_NMSTRT_CHAR
856 #undef IS_NMSTRT_CHAR_MINBPC
857 #undef IS_INVALID_CHAR
865 #if BYTEORDER == 4321
872 #include "asciitab.h"
873 #include "latin1tab.h"
882 #if BYTEORDER == 4321
889 #define BT_COLON BT_NMSTRT
890 #include "asciitab.h"
892 #include "latin1tab.h"
897 #if BYTEORDER != 1234
904 #include "iasciitab.h"
905 #include "latin1tab.h"
915 #define BT_COLON BT_NMSTRT
916 #include "iasciitab.h"
918 #include "latin1tab.h"
933 if (ASCII_a <= c1 && c1 <= ASCII_z)
934 c1 += ASCII_A - ASCII_a;
935 if (ASCII_a <= c2 && c2 <= ASCII_z)
936 c2 += ASCII_A - ASCII_a;
949 normal_updatePosition(&utf8_encoding.
enc, ptr, end, pos);
953 toAscii(
const ENCODING *
enc,
const char *ptr,
const char *end)
957 XmlUtf8Convert(enc, &ptr, end, &p, p + 1);
984 const char **namePtr,
985 const char **nameEndPtr,
987 const char **nextTokPtr)
1000 ptr += enc->minBytesPerChar;
1013 if (c == ASCII_EQUALS) {
1020 ptr += enc->minBytesPerChar;
1022 if (c != ASCII_EQUALS) {
1028 ptr += enc->minBytesPerChar;
1030 if (ptr == *namePtr) {
1034 ptr += enc->minBytesPerChar;
1037 ptr += enc->minBytesPerChar;
1040 if (c != ASCII_QUOT && c != ASCII_APOS) {
1045 ptr += enc->minBytesPerChar;
1047 for (;; ptr += enc->minBytesPerChar) {
1051 if (!(ASCII_a <= c && c <= ASCII_z)
1052 && !(ASCII_A <= c && c <= ASCII_Z)
1053 && !(ASCII_0 <= c && c <= ASCII_9)
1054 && c != ASCII_PERIOD
1056 && c != ASCII_UNDERSCORE) {
1061 *nextTokPtr = ptr + enc->minBytesPerChar;
1066 ASCII_v, ASCII_e, ASCII_r, ASCII_s, ASCII_i, ASCII_o, ASCII_n,
'\0'
1070 ASCII_e, ASCII_n, ASCII_c, ASCII_o, ASCII_d, ASCII_i, ASCII_n, ASCII_g,
'\0'
1074 ASCII_s, ASCII_t, ASCII_a, ASCII_n, ASCII_d, ASCII_a, ASCII_l, ASCII_o,
1075 ASCII_n, ASCII_e,
'\0'
1079 ASCII_y, ASCII_e, ASCII_s,
'\0'
1083 ASCII_n, ASCII_o,
'\0'
1090 int isGeneralTextEntity,
1091 const ENCODING *
enc,
1094 const char **badPtr,
1095 const char **versionPtr,
1096 const char **versionEndPtr,
1097 const char **encodingName,
1101 const char *val = NULL;
1102 const char *
name = NULL;
1103 const char *nameEnd = NULL;
1104 ptr += 5 * enc->minBytesPerChar;
1105 end -= 2 * enc->minBytesPerChar;
1111 if (!XmlNameMatchesAscii(enc, name, nameEnd, KW_version)) {
1112 if (!isGeneralTextEntity) {
1121 *versionEndPtr = ptr;
1127 if (isGeneralTextEntity) {
1135 if (XmlNameMatchesAscii(enc, name, nameEnd, KW_encoding)) {
1136 int c =
toAscii(enc, val, end);
1137 if (!(ASCII_a <= c && c <= ASCII_z) && !(ASCII_A <= c && c <= ASCII_Z)) {
1142 *encodingName = val;
1144 *encoding = encodingFinder(enc, val, ptr - enc->minBytesPerChar);
1152 if (!XmlNameMatchesAscii(enc, name, nameEnd, KW_standalone)
1153 || isGeneralTextEntity) {
1157 if (XmlNameMatchesAscii(enc, val, ptr - enc->minBytesPerChar, KW_yes)) {
1161 else if (XmlNameMatchesAscii(enc, val, ptr - enc->minBytesPerChar, KW_no)) {
1170 ptr += enc->minBytesPerChar;
1181 switch (result >> 8) {
1182 case 0xD8:
case 0xD9:
case 0xDA:
case 0xDB:
1183 case 0xDC:
case 0xDD:
case 0xDE:
case 0xDF:
1186 if (latin1_encoding.
type[result] == BT_NONXML)
1190 if (result == 0xFFFE || result == 0xFFFF)
1215 buf[1] = (char)((c & 0x3f) | 0x80);
1220 buf[1] = (char)(((c >> 6) & 0x3f) | 0x80);
1221 buf[2] = (char)((c & 0x3f) | 0x80);
1226 buf[1] = (char)(((c >> 12) & 0x3f) | 0x80);
1227 buf[2] = (char)(((c >> 6) & 0x3f) | 0x80);
1228 buf[3] = (char)((c & 0x3f) | 0x80);
1239 if (charNum < 0x10000) {
1240 buf[0] = (
unsigned short)charNum;
1243 if (charNum < 0x110000) {
1245 buf[0] = (
unsigned short)((charNum >> 10) + 0xD800);
1246 buf[1] = (
unsigned short)((charNum & 0x3FF) + 0xDC00);
1260 #define AS_UNKNOWN_ENCODING(enc) ((const struct unknown_encoding *) (enc))
1268 static int PTRFASTCALL
1278 static int PTRFASTCALL
1288 static int PTRFASTCALL
1298 const char **fromP,
const char *fromLim,
1299 char **toP,
const char *toLim)
1302 char buf[XML_UTF8_ENCODE_MAX];
1306 if (*fromP == fromLim)
1308 utf8 = uenc->
utf8[(
unsigned char)**fromP];
1313 if (n > toLim - *toP)
1320 if (n > toLim - *toP)
1325 *(*toP)++ = *utf8++;
1332 const char **fromP,
const char *fromLim,
1333 unsigned short **toP,
const unsigned short *toLim)
1336 while (*fromP != fromLim && *toP != toLim) {
1337 unsigned short c = uenc->
utf16[(
unsigned char)**fromP];
1339 c = (
unsigned short)
1360 for (i = 0; i < 128; i++)
1361 if (latin1_encoding.
type[i] != BT_OTHER
1362 && latin1_encoding.
type[i] != BT_NONXML
1365 for (i = 0; i < 256; i++) {
1370 e->
utf16[i] = 0xFFFF;
1377 e->
normal.
type[i] = (
unsigned char)(BT_LEAD2 - (c + 2));
1381 else if (c < 0x80) {
1382 if (latin1_encoding.
type[c] != BT_OTHER
1383 && latin1_encoding.
type[c] != BT_NONXML
1388 e->
utf8[i][1] = (char)c;
1389 e->
utf16[i] = (
unsigned short)(c == 0 ? 0xFFFF : c);
1394 e->
utf16[i] = 0xFFFF;
1408 e->
utf16[i] = (
unsigned short)c;
1444 ASCII_I, ASCII_S, ASCII_O, ASCII_MINUS, ASCII_8, ASCII_8, ASCII_5, ASCII_9,
1445 ASCII_MINUS, ASCII_1,
'\0'
1448 ASCII_U, ASCII_S, ASCII_MINUS, ASCII_A, ASCII_S, ASCII_C, ASCII_I, ASCII_I,
1452 ASCII_U, ASCII_T, ASCII_F, ASCII_MINUS, ASCII_8,
'\0'
1455 ASCII_U, ASCII_T, ASCII_F, ASCII_MINUS, ASCII_1, ASCII_6,
'\0'
1458 ASCII_U, ASCII_T, ASCII_F, ASCII_MINUS, ASCII_1, ASCII_6, ASCII_B, ASCII_E,
1462 ASCII_U, ASCII_T, ASCII_F, ASCII_MINUS, ASCII_1, ASCII_6, ASCII_L, ASCII_E,
1469 static const char *
const encodingNames[] = {
1480 for (i = 0; i < (int)(
sizeof(encodingNames)/
sizeof(encodingNames[0])); i++)
1481 if (
streqci(name, encodingNames[i]))
1490 #define INIT_ENC_INDEX(enc) ((int)(enc)->initEnc.isUtf16)
1491 #define SET_INIT_ENC_INDEX(enc, i) ((enc)->initEnc.isUtf16 = (char)i)
1503 const INIT_ENCODING *enc,
1507 const char **nextTokPtr)
1509 const ENCODING **encPtr;
1512 return XML_TOK_NONE;
1513 encPtr = enc->encPtr;
1514 if (ptr + 1 == end) {
1518 if (state != XML_CONTENT_STATE)
1519 return XML_TOK_PARTIAL;
1527 return XML_TOK_PARTIAL;
1529 switch ((
unsigned char)*ptr) {
1534 && state == XML_CONTENT_STATE)
1539 return XML_TOK_PARTIAL;
1543 switch (((
unsigned char)ptr[0] << 8) | (
unsigned char)ptr[1]) {
1546 && state == XML_CONTENT_STATE)
1548 *nextTokPtr = ptr + 2;
1555 && state == XML_CONTENT_STATE)
1558 return XmlTok(*encPtr, state, ptr, end, nextTokPtr);
1561 && state == XML_CONTENT_STATE)
1563 *nextTokPtr = ptr + 2;
1574 if (state == XML_CONTENT_STATE) {
1581 return XML_TOK_PARTIAL;
1582 if ((
unsigned char)ptr[2] == 0xBF) {
1583 *nextTokPtr = ptr + 3;
1589 if (ptr[0] ==
'\0') {
1599 return XmlTok(*encPtr, state, ptr, end, nextTokPtr);
1601 else if (ptr[1] ==
'\0') {
1611 if (state == XML_CONTENT_STATE)
1614 return XmlTok(*encPtr, state, ptr, end, nextTokPtr);
1620 return XmlTok(*encPtr, state, ptr, end, nextTokPtr);
1626 #define XML_TOK_NS_C
1634 #define NS(x) x ## NS
1635 #define ns(x) x ## _ns
1637 #define XML_TOK_NS_C
static const char KW_ISO_8859_1[]
static int PTRFASTCALL unicode_byte_type(char hi, char lo)
#define AS_NORMAL_ENCODING(enc)
static c2_factory< G4double > c2
#define INIT_ENC_INDEX(enc)
static void PTRCALL latin1_toUtf8(const ENCODING *enc, const char **fromP, const char *fromLim, char **toP, const char *toLim)
static int PTRFASTCALL utf8_isNmstrt3(const ENCODING *enc, const char *p)
#define LITTLE2_IS_NMSTRT_CHAR_MINBPC(enc, p)
static int toAscii(const ENCODING *enc, const char *ptr, const char *end)
static void PTRCALL initUpdatePosition(const ENCODING *enc, const char *ptr, const char *end, POSITION *pos)
#define LITTLE2_BYTE_TYPE(enc, p)
static const struct normal_encoding utf8_encoding
#define SB_BYTE_TYPE(enc, p)
static int PTRFASTCALL unknown_isName(const ENCODING *enc, const char *p)
static int initScan(const ENCODING *const *encodingTable, const INIT_ENCODING *enc, int state, const char *ptr, const char *end, const char **nextTokPtr)
#define UCS2_GET_NAMING(pages, hi, lo)
static const char KW_UTF_8[]
#define UTF8_GET_NAMING2(pages, byte)
#define AS_UNKNOWN_ENCODING(enc)
static int PTRFASTCALL unknown_isInvalid(const ENCODING *enc, const char *p)
static int FASTCALL checkCharRefNumber(int)
static const char KW_version[]
int FASTCALL XmlUtf16Encode(int charNum, unsigned short *buf)
int XmlSizeOfUnknownEncoding(void)
#define LITTLE2_BYTE_TO_ASCII(enc, p)
static int PTRFASTCALL utf8_isInvalid3(const ENCODING *enc, const char *p)
#define LITTLE2_CHAR_MATCHES(enc, p, c)
static const char KW_encoding[]
static const struct normal_encoding big2_encoding
int FASTCALL XmlUtf8Encode(int c, char *buf)
int(PTRFASTCALL *isName2)(const ENCODING *
static void PTRCALL unknown_toUtf8(const ENCODING *enc, const char **fromP, const char *fromLim, char **toP, const char *toLim)
static const struct normal_encoding internal_utf8_encoding
static const struct normal_encoding ascii_encoding
#define BIG2_IS_NAME_CHAR_MINBPC(enc, p)
static int min3(int a, int b, int c)
#define BIG2_IS_NMSTRT_CHAR_MINBPC(enc, p)
static int PTRFASTCALL unknown_isNmstrt(const ENCODING *enc, const char *p)
static int FASTCALL getEncodingIndex(const char *name)
static int FASTCALL isSpace(int c)
static const struct normal_encoding latin1_encoding
static const struct normal_encoding internal_little2_encoding
static const char KW_UTF_16[]
static int PTRFASTCALL utf8_isInvalid2(const ENCODING *enc, const char *p)
static int FASTCALL streqci(const char *s1, const char *s2)
static const struct normal_encoding internal_big2_encoding
#define BIG2_CHAR_MATCHES(enc, p, c)
static const char KW_no[]
static int doParseXmlDecl(const ENCODING *(*encodingFinder)(const ENCODING *, const char *, const char *), int isGeneralTextEntity, const ENCODING *enc, const char *ptr, const char *end, const char **badPtr, const char **versionPtr, const char **versionEndPtr, const char **encodingName, const ENCODING **encoding, int *standalone)
#define BIG2_BYTE_TO_ASCII(enc, p)
static int PTRFASTCALL utf8_isInvalid4(const ENCODING *enc, const char *p)
static int PTRFASTCALL isNever(const ENCODING *enc, const char *p)
#define DEFINE_UTF16_TO_UTF8(E)
static int PTRFASTCALL utf8_isName3(const ENCODING *enc, const char *p)
static const char KW_yes[]
#define UTF8_GET_NAMING3(pages, byte)
static void PTRCALL utf8_toUtf8(const ENCODING *enc, const char **fromP, const char *fromLim, char **toP, const char *toLim)
static int PTRFASTCALL utf8_isNmstrt2(const ENCODING *enc, const char *p)
static void PTRCALL latin1_toUtf16(const ENCODING *enc, const char **fromP, const char *fromLim, unsigned short **toP, const unsigned short *toLim)
#define BIG2_BYTE_TYPE(enc, p)
#define XmlInitUnknownEncodingNS
struct normal_encoding normal
ENCODING * XmlInitUnknownEncoding(void *mem, int *table, CONVERTER convert, void *userData)
static const char KW_UTF_16BE[]
static int PTRFASTCALL utf8_isName2(const ENCODING *enc, const char *p)
unsigned short utf16[256]
static const char KW_US_ASCII[]
static const char KW_UTF_16LE[]
static void PTRCALL utf8_toUtf16(const ENCODING *enc, const char **fromP, const char *fromLim, unsigned short **toP, const unsigned short *toLim)
static int parsePseudoAttribute(const ENCODING *enc, const char *ptr, const char *end, const char **namePtr, const char **nameEndPtr, const char **valPtr, const char **nextTokPtr)
static const struct normal_encoding little2_encoding
static void PTRCALL unknown_toUtf16(const ENCODING *enc, const char **fromP, const char *fromLim, unsigned short **toP, const unsigned short *toLim)
#define LITTLE2_IS_NAME_CHAR_MINBPC(enc, p)
static const char KW_standalone[]
static void PTRCALL ascii_toUtf8(const ENCODING *enc, const char **fromP, const char *fromLim, char **toP, const char *toLim)
#define DEFINE_UTF16_TO_UTF16(E)
static const G4double pos
#define STANDARD_VTABLE(E)