1 /* General SGML Parser code SGML.c
2 ** ========================
10 #include "tcp.h" /* For TOUPPER ! */
12 static void (*contents_treatment) PARAMS((char c));
13 static HTTag *current_tag;
14 static attr *current_attribute;
15 static HTChunk *string =0;
16 static HTElement *element_stack;
21 PUBLIC CONST char * SGML_default = "";
24 PRIVATE void handle_attribute_name(const char * s)
26 PRIVATE void handle_attribute_name(s)
30 for( current_attribute = current_tag->attributes;
31 current_attribute->name;
32 current_attribute++) {
33 if (0==strcasecomp(current_attribute->name, s))
36 if (!current_attribute->name) {
38 fprintf(stderr, "SGML: Unknown attribute %s for tag %s\n",
39 s, current_tag->name);
40 current_attribute = 0; /* Invalid */
43 current_attribute->present = YES;
44 if (current_attribute->value) {
45 free(current_attribute->value);
46 current_attribute->value = 0;
50 /* Handle attribute value
51 ** ----------------------
54 PRIVATE void handle_attribute_value(const char * s)
56 PRIVATE void handle_attribute_value(s)
60 if (current_attribute) {
61 StrAllocCopy(current_attribute->value, s);
63 if (TRACE) fprintf(stderr, "SGML: Attribute value %s ignored\n", s);
65 current_attribute = 0; /* can't have two assignments! */
72 ** s contains the entity name zero terminated
74 ** If the entity name is unknown, the terminator is treated as
75 ** a printable non-special character in all cases, even if it is '<'
78 PRIVATE void handle_entity(const char * s, entity * entities, char term)
80 PRIVATE void handle_entity(s,entities, term)
87 for(e = entities; e->name; e++) {
88 if (0==strcmp(e->name, s)) {
90 for (p=e->representation; *p; p++) {
91 (*contents_treatment)(*p);
96 /* If entity string not found, display as text */
98 fprintf(stderr, "SGML: Unknown entity %s\n", s);
99 (*contents_treatment)('&');
103 (*contents_treatment)(*p);
106 (*contents_treatment)(term);
112 PRIVATE void end_element(HTTag * old_tag)
114 PRIVATE void end_element(old_tag)
118 if (TRACE) fprintf(stderr, "SGML: End </%s>\n", old_tag->name);
120 if (TRACE) fprintf(stderr,"SGML: Illegal end tag </%s> found.\n",
124 while (element_stack) {/* Loop is error path only */
125 HTElement * N = element_stack;
126 HTTag * t = element_stack->tag;
128 if (old_tag != t) { /* Mismatch: syntax error */
129 if (element_stack->next) { /* This is not the last level */
130 if (TRACE) fprintf(stderr,
131 "SGML: Found </%s> when expecting </%s>. </%s> assumed.\n",
132 old_tag->name, t->name, t->name);
133 } else { /* last level */
134 if (TRACE) fprintf(stderr,
135 "SGML: Found </%s> when expecting </%s>. </%s> Ignored.\n",
136 old_tag->name, t->name, old_tag->name);
141 element_stack = N->next; /* Remove from stack */
143 (t->end)(t, element_stack); /* Assume tag end */
144 if (element_stack) /* not end of document */
145 contents_treatment = element_stack->tag->treat;
146 if (old_tag == t) return; /* Correct sequence */
148 /* Syntax error path only */
152 "SGML: Extra end tag </%s> found and ignored.\n", old_tag->name);
159 PRIVATE void start_element(HTTag * new_tag)
161 PRIVATE void start_element(new_tag)
165 if (TRACE) fprintf(stderr, "SGML: Start <%s>\n", new_tag->name);
166 (*new_tag->begin)(new_tag, element_stack);
167 if (new_tag->end) { /* i.e. tag not empty */
168 HTElement * N = (HTElement *)malloc(sizeof(HTElement));
169 if (N == NULL) outofmem(__FILE__, "start_element");
170 N->next = element_stack;
173 contents_treatment = new_tag->treat;
183 ** dtd->tags represents the DTD, along with
186 ** default_tag represents the initial and final actions,
187 ** and the character processing, for data outside
188 ** any tags. May not be empty.
191 PRIVATE enum sgml_state { S_text, S_litteral, S_tag, S_tag_gap,
192 S_attr, S_attr_gap, S_equals, S_value,
193 S_quoted, S_end, S_entity, S_junk_tag} state;
195 PUBLIC void SGML_begin ARGS1(SGML_dtd *,dtd)
197 if (!string) string = HTChunkCreate(128); /* Grow by this much */
200 start_element(dtd->default_tag); /* Start document */
203 PUBLIC void SGML_end ARGS1(SGML_dtd *,dtd)
205 end_element(dtd->default_tag); /* End document */
208 PUBLIC void SGML_character ARGS2(SGML_dtd *,dtd, char,c)
213 if (c=='&' && !(element_stack &&
214 element_stack->tag &&
215 element_stack->tag->litteral)) {
221 state = (element_stack &&
222 element_stack->tag &&
223 element_stack->tag->litteral) ?
225 } else (*contents_treatment)(c);
228 /* In litteral mode, waits only for specific end tag!
231 HTChunkPutc(string, c);
232 if ( TOUPPER(c) != ((string->size ==1) ? '/'
233 : element_stack->tag->name[string->size-2])) {
236 /* If complete match, end litteral */
237 if ((c=='>') && (!element_stack->tag->name[string->size-2])) {
238 end_element(element_stack->tag);
240 current_attribute = (attr *) 0;
243 } /* If Mismatch: recover string. */
244 (*contents_treatment)('<');
245 for (i=0; i<string->size; i++) /* recover */
246 (*contents_treatment)(string->data[i]);
256 HTChunkPutc(string, c);
258 HTChunkTerminate(string);
259 handle_entity(string->data, dtd->entities, c);
266 case S_tag: /* new tag */
268 HTChunkPutc(string, c);
269 else { /* End of tag name */
272 if (TRACE) if (string->size!=0)
273 fprintf(stderr,"SGML: `<%s/' found!\n", string->data);
277 HTChunkTerminate(string) ;
278 for(current_tag = dtd->tags; current_tag->name; current_tag++) {
279 if (0==strcasecomp(current_tag->name, string->data)) {
283 if (!current_tag->name) {
284 if(TRACE) fprintf(stderr, "Unknown tag %s\n",
286 state = (c=='>') ? S_text : S_junk_tag;
290 for (a = current_tag->attributes; a->name; a++ ) {
294 current_attribute = (attr *) 0;
297 if (current_tag->name) start_element(current_tag);
306 case S_tag_gap: /* Expecting attribute or > */
307 if (WHITE(c)) break; /* Gap between attributes */
308 if (c=='>') { /* End of tag */
309 if (current_tag->name) start_element(current_tag);
313 HTChunkPutc(string, c);
314 state = S_attr; /* Get attribute */
317 /* accumulating value */
319 if (WHITE(c) || (c=='>') || (c=='=')) { /* End of word */
320 HTChunkTerminate(string) ;
321 handle_attribute_name(string->data);
323 if (c=='>') { /* End of tag */
324 if (current_tag->name) start_element(current_tag);
328 state = (c=='=' ? S_equals: S_attr_gap);
330 HTChunkPutc(string, c);
334 case S_attr_gap: /* Expecting attribute or = or > */
335 if (WHITE(c)) break; /* Gap after attribute */
336 if (c=='>') { /* End of tag */
337 if (current_tag->name) start_element(current_tag);
344 HTChunkPutc(string, c);
345 state = S_attr; /* Get next attribute */
348 case S_equals: /* After attr = */
349 if (WHITE(c)) break; /* Before attribute value */
350 if (c=='>') { /* End of tag */
351 fprintf(stderr, "SGML: found = but no value\n");
352 if (current_tag->name) start_element(current_tag);
360 HTChunkPutc(string, c);
365 if (WHITE(c) || (c=='>')) { /* End of word */
366 HTChunkTerminate(string) ;
367 handle_attribute_value(string->data);
369 if (c=='>') { /* End of tag */
370 if (current_tag->name) start_element(current_tag);
374 else state = S_tag_gap;
376 HTChunkPutc(string, c);
380 case S_quoted: /* Quoted attribute value */
381 if (c=='"') { /* End of attribute value */
382 HTChunkTerminate(string) ;
383 handle_attribute_value(string->data);
387 HTChunkPutc(string, c);
393 HTChunkPutc(string, c);
394 else { /* End of end tag name */
395 HTChunkTerminate(string) ;
397 if (TRACE) fprintf(stderr,"SGML: `</%s%c' found!\n",
402 for(current_tag = dtd->tags; current_tag->name; current_tag++) {
403 if (0==strcasecomp(current_tag->name, string->data)) {
404 end_element(current_tag);
408 if (!current_tag->name) {
409 if(TRACE) fprintf(stderr,
410 "Unknown end tag </%s>\n", string->data);
413 current_attribute = (attr *) 0;
424 } /* switch on state */