2 Copyright (c) 1998, 1999, 2000 Thai Open Source Software Center Ltd
3 See the file copying.txt for copying permission.
6 #ifndef XMLPARSE_H_INCLUDED
7 #define XMLPARSE_H_INCLUDED
14 #define XMLPARSEAPI /* as nothing */
17 typedef void *XML_Parser;
19 #ifdef XML_UNICODE_WCHAR_T
21 /* XML_UNICODE_WCHAR_T will work only if sizeof(wchar_t) == 2 and wchar_t
24 /* Information is UTF-16 encoded as wchar_ts */
31 typedef wchar_t XML_Char;
32 typedef wchar_t XML_LChar;
34 #else /* not XML_UNICODE_WCHAR_T */
38 /* Information is UTF-16 encoded as unsigned shorts */
39 typedef unsigned short XML_Char;
40 typedef char XML_LChar;
42 #else /* not XML_UNICODE */
44 /* Information is UTF-8 encoded. */
45 typedef char XML_Char;
46 typedef char XML_LChar;
48 #endif /* not XML_UNICODE */
50 #endif /* not XML_UNICODE_WCHAR_T */
53 /* Constructs a new parser; encoding is the encoding specified by the external
54 protocol or null if there is none specified. */
56 XML_Parser XMLPARSEAPI
57 xmlrpc_XML_ParserCreate(const XML_Char * encoding);
59 /* Constructs a new parser and namespace processor. Element type names
60 and attribute names that belong to a namespace will be expanded;
61 unprefixed attribute names are never expanded; unprefixed element type
62 names are expanded only if there is a default namespace. The expanded
63 name is the concatenation of the namespace URI, the namespace separator character,
64 and the local part of the name. If the namespace separator is '\0' then
65 the namespace URI and the local part will be concatenated without any
66 separator. When a namespace is not declared, the name and prefix will be
67 passed through without expansion. */
69 XML_Parser XMLPARSEAPI
70 xmlrpc_XML_ParserCreateNS(const XML_Char *encoding,
71 XML_Char namespaceSeparator);
74 /* atts is array of name/value pairs, terminated by 0;
75 names and values are 0 terminated. */
77 typedef void (*XML_StartElementHandler)(void *userData,
79 const XML_Char **atts);
81 typedef void (*XML_EndElementHandler)(void *userData,
82 const XML_Char *name);
84 /* s is not 0 terminated. */
85 typedef void (*XML_CharacterDataHandler)(void *userData,
89 /* target and data are 0 terminated */
90 typedef void (*XML_ProcessingInstructionHandler)(void *userData,
91 const XML_Char *target,
92 const XML_Char *data);
94 /* data is 0 terminated */
95 typedef void (*XML_CommentHandler)(void *userData, const XML_Char *data);
97 typedef void (*XML_StartCdataSectionHandler)(void *userData);
98 typedef void (*XML_EndCdataSectionHandler)(void *userData);
100 /* This is called for any characters in the XML document for
101 which there is no applicable handler. This includes both
102 characters that are part of markup which is of a kind that is
103 not reported (comments, markup declarations), or characters
104 that are part of a construct which could be reported but
105 for which no handler has been supplied. The characters are passed
106 exactly as they were in the XML document except that
107 they will be encoded in UTF-8. Line boundaries are not normalized.
108 Note that a byte order mark character is not passed to the default handler.
109 There are no guarantees about how characters are divided between calls
110 to the default handler: for example, a comment might be split between
113 typedef void (*XML_DefaultHandler)(void *userData,
117 /* This is called for the start of the DOCTYPE declaration when the
118 name of the DOCTYPE is encountered. */
119 typedef void (*XML_StartDoctypeDeclHandler)(void *userData,
120 const XML_Char *doctypeName);
122 /* This is called for the start of the DOCTYPE declaration when the
123 closing > is encountered, but after processing any external subset. */
124 typedef void (*XML_EndDoctypeDeclHandler)(void *userData);
126 /* This is called for a declaration of an unparsed (NDATA)
127 entity. The base argument is whatever was set by XML_SetBase.
128 The entityName, systemId and notationName arguments will never be null.
129 The other arguments may be. */
131 typedef void (*XML_UnparsedEntityDeclHandler)(void *userData,
132 const XML_Char *entityName,
133 const XML_Char *base,
134 const XML_Char *systemId,
135 const XML_Char *publicId,
136 const XML_Char *notationName);
138 /* This is called for a declaration of notation.
139 The base argument is whatever was set by XML_SetBase.
140 The notationName will never be null. The other arguments can be. */
142 typedef void (*XML_NotationDeclHandler)(void *userData,
143 const XML_Char *notationName,
144 const XML_Char *base,
145 const XML_Char *systemId,
146 const XML_Char *publicId);
148 typedef void (*XML_ExternalParsedEntityDeclHandler)(void *userData,
149 const XML_Char *entityName,
150 const XML_Char *base,
151 const XML_Char *systemId,
152 const XML_Char *publicId);
154 typedef void (*XML_InternalParsedEntityDeclHandler)(void *userData,
155 const XML_Char *entityName,
156 const XML_Char *replacementText,
157 int replacementTextLength);
159 /* When namespace processing is enabled, these are called once for
160 each namespace declaration. The call to the start and end element
161 handlers occur between the calls to the start and end namespace
162 declaration handlers. For an xmlns attribute, prefix will be null.
163 For an xmlns="" attribute, uri will be null. */
165 typedef void (*XML_StartNamespaceDeclHandler)(void *userData,
166 const XML_Char *prefix,
167 const XML_Char *uri);
169 typedef void (*XML_EndNamespaceDeclHandler)(void *userData,
170 const XML_Char *prefix);
172 /* This is called if the document is not standalone (it has an
173 external subset or a reference to a parameter entity, but does not
174 have standalone="yes"). If this handler returns 0, then processing
175 will not continue, and the parser will return a
176 XML_ERROR_NOT_STANDALONE error. */
178 typedef int (*XML_NotStandaloneHandler)(void *userData);
180 /* This is called for a reference to an external parsed general entity.
181 The referenced entity is not automatically parsed.
182 The application can parse it immediately or later using
183 XML_ExternalEntityParserCreate.
184 The parser argument is the parser parsing the entity containing the reference;
185 it can be passed as the parser argument to XML_ExternalEntityParserCreate.
186 The systemId argument is the system identifier as specified in the entity declaration;
188 The base argument is the system identifier that should be used as the base for
189 resolving systemId if systemId was relative; this is set by XML_SetBase;
191 The publicId argument is the public identifier as specified in the entity declaration,
192 or null if none was specified; the whitespace in the public identifier
193 will have been normalized as required by the XML spec.
194 The context argument specifies the parsing context in the format
195 expected by the context argument to
196 XML_ExternalEntityParserCreate; context is valid only until the handler
197 returns, so if the referenced entity is to be parsed later, it must be copied.
198 The handler should return 0 if processing should not continue because of
199 a fatal error in the handling of the external entity.
200 In this case the calling parser will return an XML_ERROR_EXTERNAL_ENTITY_HANDLING
202 Note that unlike other handlers the first argument is the parser, not userData. */
204 typedef int (*XML_ExternalEntityRefHandler)(XML_Parser parser,
205 const XML_Char *context,
206 const XML_Char *base,
207 const XML_Char *systemId,
208 const XML_Char *publicId);
210 /* This structure is filled in by the XML_UnknownEncodingHandler
211 to provide information to the parser about encodings that are unknown
213 The map[b] member gives information about byte sequences
214 whose first byte is b.
215 If map[b] is c where c is >= 0, then b by itself encodes the Unicode scalar value c.
216 If map[b] is -1, then the byte sequence is malformed.
217 If map[b] is -n, where n >= 2, then b is the first byte of an n-byte
218 sequence that encodes a single Unicode scalar value.
219 The data member will be passed as the first argument to the convert function.
220 The convert function is used to convert multibyte sequences;
221 s will point to a n-byte sequence where map[(unsigned char)*s] == -n.
222 The convert function must return the Unicode scalar value
223 represented by this byte sequence or -1 if the byte sequence is malformed.
224 The convert function may be null if the encoding is a single-byte encoding,
225 that is if map[b] >= -1 for all bytes b.
226 When the parser is finished with the encoding, then if release is not null,
227 it will call release passing it the data member;
228 once release has been called, the convert function will not be called again.
230 Expat places certain restrictions on the encodings that are supported
231 using this mechanism.
233 1. Every ASCII character that can appear in a well-formed XML document,
234 other than the characters
238 must be represented by a single byte, and that byte must be the
239 same byte that represents that character in ASCII.
241 2. No character may require more than 4 bytes to encode.
243 3. All characters encoded must have Unicode scalar values <= 0xFFFF,
244 (ie characters that would be encoded by surrogates in UTF-16
245 are not allowed). Note that this restriction doesn't apply to
246 the built-in support for UTF-8 and UTF-16.
248 4. No Unicode character may be encoded by more than one distinct sequence
254 int (*convert)(void *data, const char *s);
255 void (*release)(void *data);
258 /* This is called for an encoding that is unknown to the parser.
259 The encodingHandlerData argument is that which was passed as the
260 second argument to XML_SetUnknownEncodingHandler.
261 The name argument gives the name of the encoding as specified in
262 the encoding declaration.
263 If the callback can provide information about the encoding,
264 it must fill in the XML_Encoding structure, and return 1.
265 Otherwise it must return 0.
266 If info does not describe a suitable encoding,
267 then the parser will return an XML_UNKNOWN_ENCODING error. */
269 typedef int (*XML_UnknownEncodingHandler)(void *encodingHandlerData,
270 const XML_Char *name,
274 xmlrpc_XML_SetElementHandler(XML_Parser parser,
275 XML_StartElementHandler start,
276 XML_EndElementHandler end);
279 xmlrpc_XML_SetCharacterDataHandler(XML_Parser parser,
280 XML_CharacterDataHandler handler);
283 xmlrpc_XML_SetProcessingInstructionHandler(
285 XML_ProcessingInstructionHandler handler);
287 xmlrpc_XML_SetCommentHandler(XML_Parser parser,
288 XML_CommentHandler handler);
291 xmlrpc_XML_SetCdataSectionHandler(XML_Parser parser,
292 XML_StartCdataSectionHandler start,
293 XML_EndCdataSectionHandler end);
295 /* This sets the default handler and also inhibits expansion of
296 internal entities. The entity reference will be passed to the default
301 xmlrpc_XML_SetDefaultHandler(XML_Parser parser,
302 XML_DefaultHandler handler);
304 /* This sets the default handler but does not inhibit expansion of internal entities.
305 The entity reference will not be passed to the default handler. */
308 xmlrpc_XML_SetDefaultHandlerExpand(XML_Parser parser,
309 XML_DefaultHandler handler);
312 xmlrpc_XML_SetDoctypeDeclHandler(XML_Parser parser,
313 XML_StartDoctypeDeclHandler start,
314 XML_EndDoctypeDeclHandler end);
317 xmlrpc_XML_SetUnparsedEntityDeclHandler(XML_Parser parser,
318 XML_UnparsedEntityDeclHandler handler);
321 xmlrpc_XML_SetNotationDeclHandler(XML_Parser parser,
322 XML_NotationDeclHandler handler);
325 xmlrpc_XML_SetExternalParsedEntityDeclHandler(
327 XML_ExternalParsedEntityDeclHandler handler);
330 xmlrpc_XML_SetInternalParsedEntityDeclHandler(
332 XML_InternalParsedEntityDeclHandler handler);
335 xmlrpc_XML_SetNamespaceDeclHandler(XML_Parser parser,
336 XML_StartNamespaceDeclHandler start,
337 XML_EndNamespaceDeclHandler end);
340 xmlrpc_XML_SetNotStandaloneHandler(XML_Parser parser,
341 XML_NotStandaloneHandler handler);
344 xmlrpc_XML_SetExternalEntityRefHandler(XML_Parser parser,
345 XML_ExternalEntityRefHandler handler);
347 /* If a non-null value for arg is specified here, then it will be
348 passed as the first argument to the external entity ref handler
349 instead of the parser object.
352 xmlrpc_XML_SetExternalEntityRefHandlerArg(XML_Parser, void *arg);
355 xmlrpc_XML_SetUnknownEncodingHandler(XML_Parser parser,
356 XML_UnknownEncodingHandler handler,
357 void *encodingHandlerData);
359 /* This can be called within a handler for a start element, end element,
360 processing instruction or character data. It causes the corresponding
361 markup to be passed to the default handler.
364 xmlrpc_XML_DefaultCurrent(XML_Parser parser);
366 /* This value is passed as the userData argument to callbacks. */
368 xmlrpc_XML_SetUserData(XML_Parser parser, void *userData);
370 /* Returns the last value set by XML_SetUserData or null. */
371 #define XML_GetUserData(parser) (*(void **)(parser))
373 /* This is equivalent to supplying an encoding argument
374 to XML_ParserCreate. It must not be called after XML_Parse
375 or XML_ParseBuffer. */
378 xmlrpc_XML_SetEncoding(XML_Parser parser, const XML_Char *encoding);
380 /* If this function is called, then the parser will be passed
381 as the first argument to callbacks instead of userData.
382 The userData will still be accessible using XML_GetUserData.
385 xmlrpc_XML_UseParserAsHandlerArg(XML_Parser parser);
387 /* Sets the base to be used for resolving relative URIs in system
388 identifiers in declarations. Resolving relative identifiers is left
389 to the application: this value will be passed through as the base
390 argument to the XML_ExternalEntityRefHandler, XML_NotationDeclHandler
391 and XML_UnparsedEntityDeclHandler. The base argument will be copied.
392 Returns zero if out of memory, non-zero otherwise.
395 xmlrpc_XML_SetBase(XML_Parser parser, const XML_Char *base);
397 const XML_Char XMLPARSEAPI *
398 xmlrpc_XML_GetBase(XML_Parser parser);
400 /* Returns the number of the attribute/value pairs passed in last call
401 to the XML_StartElementHandler that were specified in the start-tag
402 rather than defaulted. Each attribute/value pair counts as 2; thus
403 this correspondds to an index into the atts array passed to the
404 XML_StartElementHandler.
407 xmlrpc_XML_GetSpecifiedAttributeCount(XML_Parser parser);
409 /* Returns the index of the ID attribute passed in the last call to
410 XML_StartElementHandler, or -1 if there is no ID attribute. Each
411 attribute/value pair counts as 2; thus this correspondds to an index
412 into the atts array passed to the XML_StartElementHandler.
415 xmlrpc_XML_GetIdAttributeIndex(XML_Parser parser);
417 /* Parses some input. Returns 0 if a fatal error is detected.
418 The last call to XML_Parse must have isFinal true;
419 len may be zero for this call (or any other).
422 xmlrpc_XML_Parse(XML_Parser parser, const char *s, int len, int isFinal);
425 xmlrpc_XML_GetBuffer(XML_Parser parser, int len);
428 xmlrpc_XML_ParseBuffer(XML_Parser parser, int len, int isFinal);
430 /* Creates an XML_Parser object that can parse an external general
431 entity; context is a '\0'-terminated string specifying the parse
432 context; encoding is a '\0'-terminated string giving the name of the
433 externally specified encoding, or null if there is no externally
434 specified encoding. The context string consists of a sequence of
435 tokens separated by formfeeds (\f); a token consisting of a name
436 specifies that the general entity of the name is open; a token of the
437 form prefix=uri specifies the namespace for a particular prefix; a
438 token of the form =uri specifies the default namespace. This can be
439 called at any point after the first call to an
440 ExternalEntityRefHandler so longer as the parser has not yet been
441 freed. The new parser is completely independent and may safely be
442 used in a separate thread. The handlers and userData are initialized
443 from the parser argument. Returns 0 if out of memory. Otherwise
444 returns a new XML_Parser object.
446 XML_Parser XMLPARSEAPI
447 xmlrpc_XML_ExternalEntityParserCreate(XML_Parser parser,
448 const XML_Char *context,
449 const XML_Char *encoding);
451 enum XML_ParamEntityParsing {
452 XML_PARAM_ENTITY_PARSING_NEVER,
453 XML_PARAM_ENTITY_PARSING_UNLESS_STANDALONE,
454 XML_PARAM_ENTITY_PARSING_ALWAYS
457 /* Controls parsing of parameter entities (including the external DTD
458 subset). If parsing of parameter entities is enabled, then references
459 to external parameter entities (including the external DTD subset)
460 will be passed to the handler set with
461 XML_SetExternalEntityRefHandler. The context passed will be 0.
462 Unlike external general entities, external parameter entities can only
463 be parsed synchronously. If the external parameter entity is to be
464 parsed, it must be parsed during the call to the external entity ref
465 handler: the complete sequence of XML_ExternalEntityParserCreate,
466 XML_Parse/XML_ParseBuffer and XML_ParserFree calls must be made during
467 this call. After XML_ExternalEntityParserCreate has been called to
468 create the parser for the external parameter entity (context must be 0
469 for this call), it is illegal to make any calls on the old parser
470 until XML_ParserFree has been called on the newly created parser. If
471 the library has been compiled without support for parameter entity
472 parsing (ie without XML_DTD being defined), then
473 XML_SetParamEntityParsing will return 0 if parsing of parameter
474 entities is requested; otherwise it will return non-zero.
477 xmlrpc_XML_SetParamEntityParsing(XML_Parser parser,
478 enum XML_ParamEntityParsing parsing);
484 XML_ERROR_NO_ELEMENTS,
485 XML_ERROR_INVALID_TOKEN,
486 XML_ERROR_UNCLOSED_TOKEN,
487 XML_ERROR_PARTIAL_CHAR,
488 XML_ERROR_TAG_MISMATCH,
489 XML_ERROR_DUPLICATE_ATTRIBUTE,
490 XML_ERROR_JUNK_AFTER_DOC_ELEMENT,
491 XML_ERROR_PARAM_ENTITY_REF,
492 XML_ERROR_UNDEFINED_ENTITY,
493 XML_ERROR_RECURSIVE_ENTITY_REF,
494 XML_ERROR_ASYNC_ENTITY,
495 XML_ERROR_BAD_CHAR_REF,
496 XML_ERROR_BINARY_ENTITY_REF,
497 XML_ERROR_ATTRIBUTE_EXTERNAL_ENTITY_REF,
498 XML_ERROR_MISPLACED_XML_PI,
499 XML_ERROR_UNKNOWN_ENCODING,
500 XML_ERROR_INCORRECT_ENCODING,
501 XML_ERROR_UNCLOSED_CDATA_SECTION,
502 XML_ERROR_EXTERNAL_ENTITY_HANDLING,
503 XML_ERROR_NOT_STANDALONE
506 /* If xmlrpc_XML_Parse or xmlrpc_XML_ParseBuffer have returned 0, then
507 xmlrpc_XML_GetErrorCode returns information about the error.
510 enum XML_Error XMLPARSEAPI
511 xmlrpc_XML_GetErrorCode(XML_Parser parser);
513 /* These functions return information about the current parse location.
514 They may be called when XML_Parse or XML_ParseBuffer return 0;
515 in this case the location is the location of the character at which
516 the error was detected.
517 They may also be called from any other callback called to report
518 some parse event; in this the location is the location of the first
519 of the sequence of characters that generated the event.
523 xmlrpc_XML_GetCurrentLineNumber(XML_Parser parser);
525 xmlrpc_XML_GetCurrentColumnNumber(XML_Parser parser);
527 xmlrpc_XML_GetCurrentByteIndex(XML_Parser parser);
529 /* Return the number of bytes in the current event.
530 Returns 0 if the event is in an internal entity. */
533 xmlrpc_XML_GetCurrentByteCount(XML_Parser parser);
535 /* For backwards compatibility with previous versions. */
536 #define XML_GetErrorLineNumber XML_GetCurrentLineNumber
537 #define XML_GetErrorColumnNumber XML_GetCurrentColumnNumber
538 #define XML_GetErrorByteIndex XML_GetCurrentByteIndex
540 /* Frees memory used by the parser. */
542 xmlrpc_XML_ParserFree(XML_Parser parser);
544 /* Returns a string describing the error. */
545 const XML_LChar XMLPARSEAPI *
546 xmlrpc_XML_ErrorString(int code);