2 Copyright (c) 1998, 1999 Thai Open Source Software Center Ltd
3 See the file copying.txt for copying permission.
6 #ifndef IS_INVALID_CHAR
7 #define IS_INVALID_CHAR(enc, ptr, n) (0)
10 #define INVALID_LEAD_CASE(n, ptr, nextTokPtr) \
13 return XML_TOK_PARTIAL_CHAR; \
14 if (IS_INVALID_CHAR(enc, ptr, n)) { \
15 *(nextTokPtr) = (ptr); \
16 return XML_TOK_INVALID; \
21 #define INVALID_CASES(ptr, nextTokPtr) \
22 INVALID_LEAD_CASE(2, ptr, nextTokPtr) \
23 INVALID_LEAD_CASE(3, ptr, nextTokPtr) \
24 INVALID_LEAD_CASE(4, ptr, nextTokPtr) \
28 *(nextTokPtr) = (ptr); \
29 return XML_TOK_INVALID;
31 #define CHECK_NAME_CASE(n, enc, ptr, end, nextTokPtr) \
34 return XML_TOK_PARTIAL_CHAR; \
35 if (!IS_NAME_CHAR(enc, ptr, n)) { \
37 return XML_TOK_INVALID; \
42 #define CHECK_NAME_CASES(enc, ptr, end, nextTokPtr) \
44 if (!IS_NAME_CHAR_MINBPC(enc, ptr)) { \
46 return XML_TOK_INVALID; \
55 CHECK_NAME_CASE(2, enc, ptr, end, nextTokPtr) \
56 CHECK_NAME_CASE(3, enc, ptr, end, nextTokPtr) \
57 CHECK_NAME_CASE(4, enc, ptr, end, nextTokPtr)
59 #define CHECK_NMSTRT_CASE(n, enc, ptr, end, nextTokPtr) \
62 return XML_TOK_PARTIAL_CHAR; \
63 if (!IS_NMSTRT_CHAR(enc, ptr, n)) { \
65 return XML_TOK_INVALID; \
70 #define CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr) \
72 if (!IS_NMSTRT_CHAR_MINBPC(enc, ptr)) { \
74 return XML_TOK_INVALID; \
80 CHECK_NMSTRT_CASE(2, enc, ptr, end, nextTokPtr) \
81 CHECK_NMSTRT_CASE(3, enc, ptr, end, nextTokPtr) \
82 CHECK_NMSTRT_CASE(4, enc, ptr, end, nextTokPtr)
85 #define PREFIX(ident) ident
88 /* ptr points to character following "<!-" */
91 int PREFIX(scanComment)(const ENCODING *enc, const char *ptr, const char *end,
92 const char **nextTokPtr)
95 if (!CHAR_MATCHES(enc, ptr, ASCII_MINUS)) {
97 return XML_TOK_INVALID;
101 switch (BYTE_TYPE(enc, ptr)) {
102 INVALID_CASES(ptr, nextTokPtr)
104 if ((ptr += MINBPC(enc)) == end)
105 return XML_TOK_PARTIAL;
106 if (CHAR_MATCHES(enc, ptr, ASCII_MINUS)) {
107 if ((ptr += MINBPC(enc)) == end)
108 return XML_TOK_PARTIAL;
109 if (!CHAR_MATCHES(enc, ptr, ASCII_GT)) {
111 return XML_TOK_INVALID;
113 *nextTokPtr = ptr + MINBPC(enc);
114 return XML_TOK_COMMENT;
123 return XML_TOK_PARTIAL;
126 /* ptr points to character following "<!" */
129 int PREFIX(scanDecl)(const ENCODING *enc, const char *ptr, const char *end,
130 const char **nextTokPtr)
133 return XML_TOK_PARTIAL;
134 switch (BYTE_TYPE(enc, ptr)) {
136 return PREFIX(scanComment)(enc, ptr + MINBPC(enc), end, nextTokPtr);
138 *nextTokPtr = ptr + MINBPC(enc);
139 return XML_TOK_COND_SECT_OPEN;
146 return XML_TOK_INVALID;
149 switch (BYTE_TYPE(enc, ptr)) {
151 if (ptr + MINBPC(enc) == end)
152 return XML_TOK_PARTIAL;
153 /* don't allow <!ENTITY% foo "whatever"> */
154 switch (BYTE_TYPE(enc, ptr + MINBPC(enc))) {
155 case BT_S: case BT_CR: case BT_LF: case BT_PERCNT:
157 return XML_TOK_INVALID;
160 case BT_S: case BT_CR: case BT_LF:
162 return XML_TOK_DECL_OPEN;
169 return XML_TOK_INVALID;
172 return XML_TOK_PARTIAL;
176 int PREFIX(checkPiTarget)(const ENCODING * enc ATTR_UNUSED,
182 *tokPtr = XML_TOK_PI;
183 if (end - ptr != MINBPC(enc)*3)
185 switch (BYTE_TO_ASCII(enc, ptr)) {
195 switch (BYTE_TO_ASCII(enc, ptr)) {
205 switch (BYTE_TO_ASCII(enc, ptr)) {
216 *tokPtr = XML_TOK_XML_DECL;
220 /* ptr points to character following "<?" */
223 int PREFIX(scanPi)(const ENCODING *enc, const char *ptr, const char *end,
224 const char **nextTokPtr)
227 const char *target = ptr;
229 return XML_TOK_PARTIAL;
230 switch (BYTE_TYPE(enc, ptr)) {
231 CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)
234 return XML_TOK_INVALID;
237 switch (BYTE_TYPE(enc, ptr)) {
238 CHECK_NAME_CASES(enc, ptr, end, nextTokPtr)
239 case BT_S: case BT_CR: case BT_LF:
240 if (!PREFIX(checkPiTarget)(enc, target, ptr, &tok)) {
242 return XML_TOK_INVALID;
246 switch (BYTE_TYPE(enc, ptr)) {
247 INVALID_CASES(ptr, nextTokPtr)
251 return XML_TOK_PARTIAL;
252 if (CHAR_MATCHES(enc, ptr, ASCII_GT)) {
253 *nextTokPtr = ptr + MINBPC(enc);
262 return XML_TOK_PARTIAL;
264 if (!PREFIX(checkPiTarget)(enc, target, ptr, &tok)) {
266 return XML_TOK_INVALID;
270 return XML_TOK_PARTIAL;
271 if (CHAR_MATCHES(enc, ptr, ASCII_GT)) {
272 *nextTokPtr = ptr + MINBPC(enc);
278 return XML_TOK_INVALID;
281 return XML_TOK_PARTIAL;
286 int PREFIX(scanCdataSection)(const ENCODING * enc ATTR_UNUSED,
289 const char ** nextTokPtr)
291 static const char CDATA_LSQB[] = { ASCII_C, ASCII_D, ASCII_A, ASCII_T, ASCII_A, ASCII_LSQB };
294 if (end - ptr < 6 * MINBPC(enc))
295 return XML_TOK_PARTIAL;
296 for (i = 0; i < 6; i++, ptr += MINBPC(enc)) {
297 if (!CHAR_MATCHES(enc, ptr, CDATA_LSQB[i])) {
299 return XML_TOK_INVALID;
303 return XML_TOK_CDATA_SECT_OPEN;
307 int PREFIX(cdataSectionTok)(const ENCODING *enc, const char *ptr, const char *end,
308 const char **nextTokPtr)
312 if (MINBPC(enc) > 1) {
313 size_t n = end - ptr;
314 if (n & (MINBPC(enc) - 1)) {
315 n &= ~(MINBPC(enc) - 1);
317 return XML_TOK_PARTIAL;
321 switch (BYTE_TYPE(enc, ptr)) {
325 return XML_TOK_PARTIAL;
326 if (!CHAR_MATCHES(enc, ptr, ASCII_RSQB))
330 return XML_TOK_PARTIAL;
331 if (!CHAR_MATCHES(enc, ptr, ASCII_GT)) {
335 *nextTokPtr = ptr + MINBPC(enc);
336 return XML_TOK_CDATA_SECT_CLOSE;
340 return XML_TOK_PARTIAL;
341 if (BYTE_TYPE(enc, ptr) == BT_LF)
344 return XML_TOK_DATA_NEWLINE;
346 *nextTokPtr = ptr + MINBPC(enc);
347 return XML_TOK_DATA_NEWLINE;
348 INVALID_CASES(ptr, nextTokPtr)
354 switch (BYTE_TYPE(enc, ptr)) {
355 #define LEAD_CASE(n) \
357 if (end - ptr < n || IS_INVALID_CHAR(enc, ptr, n)) { \
359 return XML_TOK_DATA_CHARS; \
363 LEAD_CASE(2) LEAD_CASE(3) LEAD_CASE(4)
372 return XML_TOK_DATA_CHARS;
379 return XML_TOK_DATA_CHARS;
382 /* ptr points to character following "</" */
385 int PREFIX(scanEndTag)(const ENCODING *enc, const char *ptr, const char *end,
386 const char **nextTokPtr)
389 return XML_TOK_PARTIAL;
390 switch (BYTE_TYPE(enc, ptr)) {
391 CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)
394 return XML_TOK_INVALID;
397 switch (BYTE_TYPE(enc, ptr)) {
398 CHECK_NAME_CASES(enc, ptr, end, nextTokPtr)
399 case BT_S: case BT_CR: case BT_LF:
400 for (ptr += MINBPC(enc); ptr != end; ptr += MINBPC(enc)) {
401 switch (BYTE_TYPE(enc, ptr)) {
402 case BT_S: case BT_CR: case BT_LF:
405 *nextTokPtr = ptr + MINBPC(enc);
406 return XML_TOK_END_TAG;
409 return XML_TOK_INVALID;
412 return XML_TOK_PARTIAL;
415 /* no need to check qname syntax here, since end-tag must match exactly */
420 *nextTokPtr = ptr + MINBPC(enc);
421 return XML_TOK_END_TAG;
424 return XML_TOK_INVALID;
427 return XML_TOK_PARTIAL;
430 /* ptr points to character following "&#X" */
433 int PREFIX(scanHexCharRef)(const ENCODING *enc, const char *ptr, const char *end,
434 const char **nextTokPtr)
437 switch (BYTE_TYPE(enc, ptr)) {
443 return XML_TOK_INVALID;
445 for (ptr += MINBPC(enc); ptr != end; ptr += MINBPC(enc)) {
446 switch (BYTE_TYPE(enc, ptr)) {
451 *nextTokPtr = ptr + MINBPC(enc);
452 return XML_TOK_CHAR_REF;
455 return XML_TOK_INVALID;
459 return XML_TOK_PARTIAL;
462 /* ptr points to character following "&#" */
465 int PREFIX(scanCharRef)(const ENCODING *enc, const char *ptr, const char *end,
466 const char **nextTokPtr)
469 if (CHAR_MATCHES(enc, ptr, ASCII_x))
470 return PREFIX(scanHexCharRef)(enc, ptr + MINBPC(enc), end, nextTokPtr);
471 switch (BYTE_TYPE(enc, ptr)) {
476 return XML_TOK_INVALID;
478 for (ptr += MINBPC(enc); ptr != end; ptr += MINBPC(enc)) {
479 switch (BYTE_TYPE(enc, ptr)) {
483 *nextTokPtr = ptr + MINBPC(enc);
484 return XML_TOK_CHAR_REF;
487 return XML_TOK_INVALID;
491 return XML_TOK_PARTIAL;
494 /* ptr points to character following "&" */
497 int PREFIX(scanRef)(const ENCODING *enc, const char *ptr, const char *end,
498 const char **nextTokPtr)
501 return XML_TOK_PARTIAL;
502 switch (BYTE_TYPE(enc, ptr)) {
503 CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)
505 return PREFIX(scanCharRef)(enc, ptr + MINBPC(enc), end, nextTokPtr);
508 return XML_TOK_INVALID;
511 switch (BYTE_TYPE(enc, ptr)) {
512 CHECK_NAME_CASES(enc, ptr, end, nextTokPtr)
514 *nextTokPtr = ptr + MINBPC(enc);
515 return XML_TOK_ENTITY_REF;
518 return XML_TOK_INVALID;
521 return XML_TOK_PARTIAL;
524 /* ptr points to character following first character of attribute name */
527 int PREFIX(scanAtts)(const ENCODING *enc, const char *ptr, const char *end,
528 const char **nextTokPtr)
534 switch (BYTE_TYPE(enc, ptr)) {
535 CHECK_NAME_CASES(enc, ptr, end, nextTokPtr)
540 return XML_TOK_INVALID;
545 return XML_TOK_PARTIAL;
546 switch (BYTE_TYPE(enc, ptr)) {
547 CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)
550 return XML_TOK_INVALID;
554 case BT_S: case BT_CR: case BT_LF:
560 return XML_TOK_PARTIAL;
561 t = BYTE_TYPE(enc, ptr);
571 return XML_TOK_INVALID;
585 return XML_TOK_PARTIAL;
586 open = BYTE_TYPE(enc, ptr);
587 if (open == BT_QUOT || open == BT_APOS)
596 return XML_TOK_INVALID;
600 /* in attribute value */
604 return XML_TOK_PARTIAL;
605 t = BYTE_TYPE(enc, ptr);
609 INVALID_CASES(ptr, nextTokPtr)
612 int tok = PREFIX(scanRef)(enc, ptr + MINBPC(enc), end, &ptr);
614 if (tok == XML_TOK_INVALID)
622 return XML_TOK_INVALID;
630 return XML_TOK_PARTIAL;
631 switch (BYTE_TYPE(enc, ptr)) {
642 return XML_TOK_INVALID;
644 /* ptr points to closing quote */
648 return XML_TOK_PARTIAL;
649 switch (BYTE_TYPE(enc, ptr)) {
650 CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)
651 case BT_S: case BT_CR: case BT_LF:
655 *nextTokPtr = ptr + MINBPC(enc);
656 return XML_TOK_START_TAG_WITH_ATTS;
661 return XML_TOK_PARTIAL;
662 if (!CHAR_MATCHES(enc, ptr, ASCII_GT)) {
664 return XML_TOK_INVALID;
666 *nextTokPtr = ptr + MINBPC(enc);
667 return XML_TOK_EMPTY_ELEMENT_WITH_ATTS;
670 return XML_TOK_INVALID;
678 return XML_TOK_INVALID;
681 return XML_TOK_PARTIAL;
684 /* ptr points to character following "<" */
687 int PREFIX(scanLt)(const ENCODING *enc, const char *ptr, const char *end,
688 const char **nextTokPtr)
694 return XML_TOK_PARTIAL;
695 switch (BYTE_TYPE(enc, ptr)) {
696 CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)
698 if ((ptr += MINBPC(enc)) == end)
699 return XML_TOK_PARTIAL;
700 switch (BYTE_TYPE(enc, ptr)) {
702 return PREFIX(scanComment)(enc, ptr + MINBPC(enc), end, nextTokPtr);
704 return PREFIX(scanCdataSection)(enc, ptr + MINBPC(enc), end, nextTokPtr);
707 return XML_TOK_INVALID;
709 return PREFIX(scanPi)(enc, ptr + MINBPC(enc), end, nextTokPtr);
711 return PREFIX(scanEndTag)(enc, ptr + MINBPC(enc), end, nextTokPtr);
714 return XML_TOK_INVALID;
719 /* we have a start-tag */
721 switch (BYTE_TYPE(enc, ptr)) {
722 CHECK_NAME_CASES(enc, ptr, end, nextTokPtr)
727 return XML_TOK_INVALID;
732 return XML_TOK_PARTIAL;
733 switch (BYTE_TYPE(enc, ptr)) {
734 CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)
737 return XML_TOK_INVALID;
741 case BT_S: case BT_CR: case BT_LF:
745 switch (BYTE_TYPE(enc, ptr)) {
746 CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)
751 case BT_S: case BT_CR: case BT_LF:
756 return XML_TOK_INVALID;
758 return PREFIX(scanAtts)(enc, ptr, end, nextTokPtr);
760 return XML_TOK_PARTIAL;
764 *nextTokPtr = ptr + MINBPC(enc);
765 return XML_TOK_START_TAG_NO_ATTS;
770 return XML_TOK_PARTIAL;
771 if (!CHAR_MATCHES(enc, ptr, ASCII_GT)) {
773 return XML_TOK_INVALID;
775 *nextTokPtr = ptr + MINBPC(enc);
776 return XML_TOK_EMPTY_ELEMENT_NO_ATTS;
779 return XML_TOK_INVALID;
782 return XML_TOK_PARTIAL;
786 int PREFIX(contentTok)(const ENCODING *enc, const char *ptr, const char *end,
787 const char **nextTokPtr)
791 if (MINBPC(enc) > 1) {
792 size_t n = end - ptr;
793 if (n & (MINBPC(enc) - 1)) {
794 n &= ~(MINBPC(enc) - 1);
796 return XML_TOK_PARTIAL;
800 switch (BYTE_TYPE(enc, ptr)) {
802 return PREFIX(scanLt)(enc, ptr + MINBPC(enc), end, nextTokPtr);
804 return PREFIX(scanRef)(enc, ptr + MINBPC(enc), end, nextTokPtr);
808 return XML_TOK_TRAILING_CR;
809 if (BYTE_TYPE(enc, ptr) == BT_LF)
812 return XML_TOK_DATA_NEWLINE;
814 *nextTokPtr = ptr + MINBPC(enc);
815 return XML_TOK_DATA_NEWLINE;
819 return XML_TOK_TRAILING_RSQB;
820 if (!CHAR_MATCHES(enc, ptr, ASCII_RSQB))
824 return XML_TOK_TRAILING_RSQB;
825 if (!CHAR_MATCHES(enc, ptr, ASCII_GT)) {
830 return XML_TOK_INVALID;
831 INVALID_CASES(ptr, nextTokPtr)
837 switch (BYTE_TYPE(enc, ptr)) {
838 #define LEAD_CASE(n) \
840 if (end - ptr < n || IS_INVALID_CHAR(enc, ptr, n)) { \
842 return XML_TOK_DATA_CHARS; \
846 LEAD_CASE(2) LEAD_CASE(3) LEAD_CASE(4)
849 if (ptr + MINBPC(enc) != end) {
850 if (!CHAR_MATCHES(enc, ptr + MINBPC(enc), ASCII_RSQB)) {
854 if (ptr + 2*MINBPC(enc) != end) {
855 if (!CHAR_MATCHES(enc, ptr + 2*MINBPC(enc), ASCII_GT)) {
859 *nextTokPtr = ptr + 2*MINBPC(enc);
860 return XML_TOK_INVALID;
872 return XML_TOK_DATA_CHARS;
879 return XML_TOK_DATA_CHARS;
882 /* ptr points to character following "%" */
885 int PREFIX(scanPercent)(const ENCODING *enc, const char *ptr, const char *end,
886 const char **nextTokPtr)
889 return XML_TOK_PARTIAL;
890 switch (BYTE_TYPE(enc, ptr)) {
891 CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)
892 case BT_S: case BT_LF: case BT_CR: case BT_PERCNT:
894 return XML_TOK_PERCENT;
897 return XML_TOK_INVALID;
900 switch (BYTE_TYPE(enc, ptr)) {
901 CHECK_NAME_CASES(enc, ptr, end, nextTokPtr)
903 *nextTokPtr = ptr + MINBPC(enc);
904 return XML_TOK_PARAM_ENTITY_REF;
907 return XML_TOK_INVALID;
910 return XML_TOK_PARTIAL;
914 int PREFIX(scanPoundName)(const ENCODING *enc, const char *ptr, const char *end,
915 const char **nextTokPtr)
918 return XML_TOK_PARTIAL;
919 switch (BYTE_TYPE(enc, ptr)) {
920 CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)
923 return XML_TOK_INVALID;
926 switch (BYTE_TYPE(enc, ptr)) {
927 CHECK_NAME_CASES(enc, ptr, end, nextTokPtr)
928 case BT_CR: case BT_LF: case BT_S:
929 case BT_RPAR: case BT_GT: case BT_PERCNT: case BT_VERBAR:
931 return XML_TOK_POUND_NAME;
934 return XML_TOK_INVALID;
937 return -XML_TOK_POUND_NAME;
941 int PREFIX(scanLit)(int open, const ENCODING *enc,
942 const char *ptr, const char *end,
943 const char **nextTokPtr)
946 int t = BYTE_TYPE(enc, ptr);
948 INVALID_CASES(ptr, nextTokPtr)
955 return -XML_TOK_LITERAL;
957 switch (BYTE_TYPE(enc, ptr)) {
958 case BT_S: case BT_CR: case BT_LF:
959 case BT_GT: case BT_PERCNT: case BT_LSQB:
960 return XML_TOK_LITERAL;
962 return XML_TOK_INVALID;
969 return XML_TOK_PARTIAL;
973 int PREFIX(prologTok)(const ENCODING *enc, const char *ptr, const char *end,
974 const char **nextTokPtr)
979 if (MINBPC(enc) > 1) {
980 size_t n = end - ptr;
981 if (n & (MINBPC(enc) - 1)) {
982 n &= ~(MINBPC(enc) - 1);
984 return XML_TOK_PARTIAL;
988 switch (BYTE_TYPE(enc, ptr)) {
990 return PREFIX(scanLit)(BT_QUOT, enc, ptr + MINBPC(enc), end, nextTokPtr);
992 return PREFIX(scanLit)(BT_APOS, enc, ptr + MINBPC(enc), end, nextTokPtr);
997 return XML_TOK_PARTIAL;
998 switch (BYTE_TYPE(enc, ptr)) {
1000 return PREFIX(scanDecl)(enc, ptr + MINBPC(enc), end, nextTokPtr);
1002 return PREFIX(scanPi)(enc, ptr + MINBPC(enc), end, nextTokPtr);
1009 *nextTokPtr = ptr - MINBPC(enc);
1010 return XML_TOK_INSTANCE_START;
1013 return XML_TOK_INVALID;
1016 if (ptr + MINBPC(enc) == end)
1017 return -XML_TOK_PROLOG_S;
1019 case BT_S: case BT_LF:
1024 switch (BYTE_TYPE(enc, ptr)) {
1025 case BT_S: case BT_LF:
1028 /* don't split CR/LF pair */
1029 if (ptr + MINBPC(enc) != end)
1034 return XML_TOK_PROLOG_S;
1038 return XML_TOK_PROLOG_S;
1040 return PREFIX(scanPercent)(enc, ptr + MINBPC(enc), end, nextTokPtr);
1042 *nextTokPtr = ptr + MINBPC(enc);
1043 return XML_TOK_COMMA;
1045 *nextTokPtr = ptr + MINBPC(enc);
1046 return XML_TOK_OPEN_BRACKET;
1050 return -XML_TOK_CLOSE_BRACKET;
1051 if (CHAR_MATCHES(enc, ptr, ASCII_RSQB)) {
1052 if (ptr + MINBPC(enc) == end)
1053 return XML_TOK_PARTIAL;
1054 if (CHAR_MATCHES(enc, ptr + MINBPC(enc), ASCII_GT)) {
1055 *nextTokPtr = ptr + 2*MINBPC(enc);
1056 return XML_TOK_COND_SECT_CLOSE;
1060 return XML_TOK_CLOSE_BRACKET;
1062 *nextTokPtr = ptr + MINBPC(enc);
1063 return XML_TOK_OPEN_PAREN;
1067 return -XML_TOK_CLOSE_PAREN;
1068 switch (BYTE_TYPE(enc, ptr)) {
1070 *nextTokPtr = ptr + MINBPC(enc);
1071 return XML_TOK_CLOSE_PAREN_ASTERISK;
1073 *nextTokPtr = ptr + MINBPC(enc);
1074 return XML_TOK_CLOSE_PAREN_QUESTION;
1076 *nextTokPtr = ptr + MINBPC(enc);
1077 return XML_TOK_CLOSE_PAREN_PLUS;
1078 case BT_CR: case BT_LF: case BT_S:
1079 case BT_GT: case BT_COMMA: case BT_VERBAR:
1082 return XML_TOK_CLOSE_PAREN;
1085 return XML_TOK_INVALID;
1087 *nextTokPtr = ptr + MINBPC(enc);
1090 *nextTokPtr = ptr + MINBPC(enc);
1091 return XML_TOK_DECL_CLOSE;
1093 return PREFIX(scanPoundName)(enc, ptr + MINBPC(enc), end, nextTokPtr);
1094 #define LEAD_CASE(n) \
1095 case BT_LEAD ## n: \
1096 if (end - ptr < n) \
1097 return XML_TOK_PARTIAL_CHAR; \
1098 if (IS_NMSTRT_CHAR(enc, ptr, n)) { \
1100 tok = XML_TOK_NAME; \
1103 if (IS_NAME_CHAR(enc, ptr, n)) { \
1105 tok = XML_TOK_NMTOKEN; \
1108 *nextTokPtr = ptr; \
1109 return XML_TOK_INVALID;
1110 LEAD_CASE(2) LEAD_CASE(3) LEAD_CASE(4)
1123 tok = XML_TOK_NMTOKEN;
1127 if (IS_NMSTRT_CHAR_MINBPC(enc, ptr)) {
1132 if (IS_NAME_CHAR_MINBPC(enc, ptr)) {
1134 tok = XML_TOK_NMTOKEN;
1140 return XML_TOK_INVALID;
1142 while (ptr != end) {
1143 switch (BYTE_TYPE(enc, ptr)) {
1144 CHECK_NAME_CASES(enc, ptr, end, nextTokPtr)
1145 case BT_GT: case BT_RPAR: case BT_COMMA:
1146 case BT_VERBAR: case BT_LSQB: case BT_PERCNT:
1147 case BT_S: case BT_CR: case BT_LF:
1156 return XML_TOK_PARTIAL;
1157 tok = XML_TOK_PREFIXED_NAME;
1158 switch (BYTE_TYPE(enc, ptr)) {
1159 CHECK_NAME_CASES(enc, ptr, end, nextTokPtr)
1161 tok = XML_TOK_NMTOKEN;
1165 case XML_TOK_PREFIXED_NAME:
1166 tok = XML_TOK_NMTOKEN;
1172 if (tok == XML_TOK_NMTOKEN) {
1174 return XML_TOK_INVALID;
1176 *nextTokPtr = ptr + MINBPC(enc);
1177 return XML_TOK_NAME_PLUS;
1179 if (tok == XML_TOK_NMTOKEN) {
1181 return XML_TOK_INVALID;
1183 *nextTokPtr = ptr + MINBPC(enc);
1184 return XML_TOK_NAME_ASTERISK;
1186 if (tok == XML_TOK_NMTOKEN) {
1188 return XML_TOK_INVALID;
1190 *nextTokPtr = ptr + MINBPC(enc);
1191 return XML_TOK_NAME_QUESTION;
1194 return XML_TOK_INVALID;
1201 int PREFIX(attributeValueTok)(const ENCODING *enc, const char *ptr, const char *end,
1202 const char **nextTokPtr)
1206 return XML_TOK_NONE;
1208 while (ptr != end) {
1209 switch (BYTE_TYPE(enc, ptr)) {
1210 #define LEAD_CASE(n) \
1211 case BT_LEAD ## n: ptr += n; break;
1212 LEAD_CASE(2) LEAD_CASE(3) LEAD_CASE(4)
1216 return PREFIX(scanRef)(enc, ptr + MINBPC(enc), end, nextTokPtr);
1218 return XML_TOK_DATA_CHARS;
1220 /* this is for inside entity references */
1222 return XML_TOK_INVALID;
1225 *nextTokPtr = ptr + MINBPC(enc);
1226 return XML_TOK_DATA_NEWLINE;
1229 return XML_TOK_DATA_CHARS;
1234 return XML_TOK_TRAILING_CR;
1235 if (BYTE_TYPE(enc, ptr) == BT_LF)
1238 return XML_TOK_DATA_NEWLINE;
1241 return XML_TOK_DATA_CHARS;
1244 *nextTokPtr = ptr + MINBPC(enc);
1245 return XML_TOK_ATTRIBUTE_VALUE_S;
1248 return XML_TOK_DATA_CHARS;
1255 return XML_TOK_DATA_CHARS;
1259 int PREFIX(entityValueTok)(const ENCODING *enc, const char *ptr, const char *end,
1260 const char **nextTokPtr)
1264 return XML_TOK_NONE;
1266 while (ptr != end) {
1267 switch (BYTE_TYPE(enc, ptr)) {
1268 #define LEAD_CASE(n) \
1269 case BT_LEAD ## n: ptr += n; break;
1270 LEAD_CASE(2) LEAD_CASE(3) LEAD_CASE(4)
1274 return PREFIX(scanRef)(enc, ptr + MINBPC(enc), end, nextTokPtr);
1276 return XML_TOK_DATA_CHARS;
1279 return PREFIX(scanPercent)(enc, ptr + MINBPC(enc), end, nextTokPtr);
1281 return XML_TOK_DATA_CHARS;
1284 *nextTokPtr = ptr + MINBPC(enc);
1285 return XML_TOK_DATA_NEWLINE;
1288 return XML_TOK_DATA_CHARS;
1293 return XML_TOK_TRAILING_CR;
1294 if (BYTE_TYPE(enc, ptr) == BT_LF)
1297 return XML_TOK_DATA_NEWLINE;
1300 return XML_TOK_DATA_CHARS;
1307 return XML_TOK_DATA_CHARS;
1313 int PREFIX(ignoreSectionTok)(const ENCODING *enc, const char *ptr, const char *end,
1314 const char **nextTokPtr)
1317 if (MINBPC(enc) > 1) {
1318 size_t n = end - ptr;
1319 if (n & (MINBPC(enc) - 1)) {
1320 n &= ~(MINBPC(enc) - 1);
1324 while (ptr != end) {
1325 switch (BYTE_TYPE(enc, ptr)) {
1326 INVALID_CASES(ptr, nextTokPtr)
1328 if ((ptr += MINBPC(enc)) == end)
1329 return XML_TOK_PARTIAL;
1330 if (CHAR_MATCHES(enc, ptr, ASCII_EXCL)) {
1331 if ((ptr += MINBPC(enc)) == end)
1332 return XML_TOK_PARTIAL;
1333 if (CHAR_MATCHES(enc, ptr, ASCII_LSQB)) {
1340 if ((ptr += MINBPC(enc)) == end)
1341 return XML_TOK_PARTIAL;
1342 if (CHAR_MATCHES(enc, ptr, ASCII_RSQB)) {
1343 if ((ptr += MINBPC(enc)) == end)
1344 return XML_TOK_PARTIAL;
1345 if (CHAR_MATCHES(enc, ptr, ASCII_GT)) {
1349 return XML_TOK_IGNORE_SECT;
1360 return XML_TOK_PARTIAL;
1363 #endif /* XML_DTD */
1366 int PREFIX(isPublicId)(const ENCODING *enc, const char *ptr, const char *end,
1367 const char **badPtr)
1371 for (; ptr != end; ptr += MINBPC(enc)) {
1372 switch (BYTE_TYPE(enc, ptr)) {
1396 if (CHAR_MATCHES(enc, ptr, ASCII_TAB)) {
1403 if (!(BYTE_TO_ASCII(enc, ptr) & ~0x7f))
1406 switch (BYTE_TO_ASCII(enc, ptr)) {
1420 /* This must only be called for a well-formed start-tag or empty element tag.
1421 Returns the number of attributes. Pointers to the first attsMax attributes
1422 are stored in atts. */
1425 int PREFIX(getAtts)(const ENCODING *enc, const char *ptr,
1426 int attsMax, ATTRIBUTE *atts)
1428 enum { other, inName, inValue } state = inName;
1430 int open = 0; /* defined when state == inValue;
1431 initialization just to shut up compilers */
1433 for (ptr += MINBPC(enc);; ptr += MINBPC(enc)) {
1434 switch (BYTE_TYPE(enc, ptr)) {
1435 #define START_NAME \
1436 if (state == other) { \
1437 if (nAtts < attsMax) { \
1438 atts[nAtts].name = ptr; \
1439 atts[nAtts].normalized = 1; \
1443 #define LEAD_CASE(n) \
1444 case BT_LEAD ## n: START_NAME ptr += (n - MINBPC(enc)); break;
1445 LEAD_CASE(2) LEAD_CASE(3) LEAD_CASE(4)
1454 if (state != inValue) {
1455 if (nAtts < attsMax)
1456 atts[nAtts].valuePtr = ptr + MINBPC(enc);
1460 else if (open == BT_QUOT) {
1462 if (nAtts < attsMax)
1463 atts[nAtts].valueEnd = ptr;
1468 if (state != inValue) {
1469 if (nAtts < attsMax)
1470 atts[nAtts].valuePtr = ptr + MINBPC(enc);
1474 else if (open == BT_APOS) {
1476 if (nAtts < attsMax)
1477 atts[nAtts].valueEnd = ptr;
1482 if (nAtts < attsMax)
1483 atts[nAtts].normalized = 0;
1486 if (state == inName)
1488 else if (state == inValue
1490 && atts[nAtts].normalized
1491 && (ptr == atts[nAtts].valuePtr
1492 || BYTE_TO_ASCII(enc, ptr) != ASCII_SPACE
1493 || BYTE_TO_ASCII(enc, ptr + MINBPC(enc)) == ASCII_SPACE
1494 || BYTE_TYPE(enc, ptr + MINBPC(enc)) == open))
1495 atts[nAtts].normalized = 0;
1497 case BT_CR: case BT_LF:
1498 /* This case ensures that the first attribute name is counted
1499 Apart from that we could just change state on the quote. */
1500 if (state == inName)
1502 else if (state == inValue && nAtts < attsMax)
1503 atts[nAtts].normalized = 0;
1507 if (state != inValue)
1518 int PREFIX(charRefNumber)(const ENCODING *enc ATTR_UNUSED, const char *ptr)
1522 ptr += 2*MINBPC(enc);
1523 if (CHAR_MATCHES(enc, ptr, ASCII_x)) {
1524 for (ptr += MINBPC(enc); !CHAR_MATCHES(enc, ptr, ASCII_SEMI); ptr += MINBPC(enc)) {
1525 int c = BYTE_TO_ASCII(enc, ptr);
1527 case ASCII_0: case ASCII_1: case ASCII_2: case ASCII_3: case ASCII_4:
1528 case ASCII_5: case ASCII_6: case ASCII_7: case ASCII_8: case ASCII_9:
1530 result |= (c - ASCII_0);
1532 case ASCII_A: case ASCII_B: case ASCII_C: case ASCII_D: case ASCII_E: case ASCII_F:
1534 result += 10 + (c - ASCII_A);
1536 case ASCII_a: case ASCII_b: case ASCII_c: case ASCII_d: case ASCII_e: case ASCII_f:
1538 result += 10 + (c - ASCII_a);
1541 if (result >= 0x110000)
1546 for (; !CHAR_MATCHES(enc, ptr, ASCII_SEMI); ptr += MINBPC(enc)) {
1547 int c = BYTE_TO_ASCII(enc, ptr);
1549 result += (c - ASCII_0);
1550 if (result >= 0x110000)
1554 return checkCharRefNumber(result);
1558 int PREFIX(predefinedEntityName)(const ENCODING * enc ATTR_UNUSED,
1562 switch ((end - ptr)/MINBPC(enc)) {
1564 if (CHAR_MATCHES(enc, ptr + MINBPC(enc), ASCII_t)) {
1565 switch (BYTE_TO_ASCII(enc, ptr)) {
1574 if (CHAR_MATCHES(enc, ptr, ASCII_a)) {
1576 if (CHAR_MATCHES(enc, ptr, ASCII_m)) {
1578 if (CHAR_MATCHES(enc, ptr, ASCII_p))
1584 switch (BYTE_TO_ASCII(enc, ptr)) {
1587 if (CHAR_MATCHES(enc, ptr, ASCII_u)) {
1589 if (CHAR_MATCHES(enc, ptr, ASCII_o)) {
1591 if (CHAR_MATCHES(enc, ptr, ASCII_t))
1598 if (CHAR_MATCHES(enc, ptr, ASCII_p)) {
1600 if (CHAR_MATCHES(enc, ptr, ASCII_o)) {
1602 if (CHAR_MATCHES(enc, ptr, ASCII_s))
1613 int PREFIX(sameName)(const ENCODING *enc, const char *ptr1, const char *ptr2)
1616 switch (BYTE_TYPE(enc, ptr1)) {
1617 #define LEAD_CASE(n) \
1618 case BT_LEAD ## n: \
1619 if (*ptr1++ != *ptr2++) \
1621 LEAD_CASE(4) LEAD_CASE(3) LEAD_CASE(2)
1624 if (*ptr1++ != *ptr2++)
1636 if (*ptr2++ != *ptr1++)
1638 if (MINBPC(enc) > 1) {
1639 if (*ptr2++ != *ptr1++)
1641 if (MINBPC(enc) > 2) {
1642 if (*ptr2++ != *ptr1++)
1644 if (MINBPC(enc) > 3) {
1645 if (*ptr2++ != *ptr1++)
1652 if (MINBPC(enc) == 1 && *ptr1 == *ptr2)
1654 switch (BYTE_TYPE(enc, ptr2)) {
1677 int PREFIX(nameMatchesAscii)(const ENCODING * enc ATTR_UNUSED,
1682 for (; *ptr2; ptr1 += MINBPC(enc), ptr2++) {
1685 if (!CHAR_MATCHES(enc, ptr1, *ptr2))
1688 return ptr1 == end1;
1692 int PREFIX(nameLength)(const ENCODING *enc, const char *ptr)
1694 const char *start = ptr;
1696 switch (BYTE_TYPE(enc, ptr)) {
1697 #define LEAD_CASE(n) \
1698 case BT_LEAD ## n: ptr += n; break;
1699 LEAD_CASE(2) LEAD_CASE(3) LEAD_CASE(4)
1719 const char *PREFIX(skipS)(const ENCODING *enc, const char *ptr)
1722 switch (BYTE_TYPE(enc, ptr)) {
1735 void PREFIX(updatePosition)(const ENCODING *enc,
1740 while (ptr != end) {
1741 switch (BYTE_TYPE(enc, ptr)) {
1742 #define LEAD_CASE(n) \
1743 case BT_LEAD ## n: \
1746 LEAD_CASE(2) LEAD_CASE(3) LEAD_CASE(4)
1749 pos->columnNumber = (unsigned)-1;
1756 if (ptr != end && BYTE_TYPE(enc, ptr) == BT_LF)
1758 pos->columnNumber = (unsigned)-1;
1764 pos->columnNumber++;
1769 #undef MULTIBYTE_CASES
1770 #undef INVALID_CASES
1771 #undef CHECK_NAME_CASE
1772 #undef CHECK_NAME_CASES
1773 #undef CHECK_NMSTRT_CASE
1774 #undef CHECK_NMSTRT_CASES