20 extern HTStyleSheet * styleSheet; /* Application-wide */
22 PRIVATE HTParentAnchor * node_anchor;
25 PRIVATE HTStyle * glossary_style;
26 PRIVATE HTStyle * list_compact_style;
27 PRIVATE HTStyle * glossary_compact_style;
29 PRIVATE HTChunk title = { 0, 128, 0, 0 }; /* Grow by 128 */
32 /* Forward declarations of routines for DTD
34 PRIVATE void no_change PARAMS((HTTag * t, HTElement * e));
35 PRIVATE void begin_litteral PARAMS((HTTag * t, HTElement * e));
36 PRIVATE void begin_element PARAMS((HTTag * t, HTElement * e));
37 PRIVATE void end_element PARAMS((HTTag * t, HTElement * e));
38 PRIVATE void begin_document PARAMS((HTTag * t, HTElement * e));
39 PRIVATE void end_document PARAMS((HTTag * t, HTElement * e));
40 PRIVATE void begin_anchor PARAMS((HTTag * t, HTElement * e));
41 PRIVATE void end_anchor PARAMS((HTTag * t, HTElement * e));
42 PRIVATE void begin_list PARAMS((HTTag * t, HTElement * e));
43 PRIVATE void list_element PARAMS((HTTag * t, HTElement * e));
44 PRIVATE void end_list PARAMS((HTTag * t, HTElement * e));
45 PRIVATE void begin_glossary PARAMS((HTTag * t, HTElement * e));
46 PRIVATE void end_glossary PARAMS((HTTag * t, HTElement * e));
48 PRIVATE int got_styles = 0;
49 PRIVATE void get_styles NOPARAMS;
51 PRIVATE BOOL style_change;
52 PRIVATE HTStyle * new_style;
53 PRIVATE HTStyle * old_style;
54 PRIVATE BOOL in_word; /* Have just had a non-white character */
56 /* Style buffering avoids dummy paragraph begin/ends.
58 #define UPDATE_STYLE if (style_change) { \
59 HText_setStyle(text, new_style); \
60 old_style = new_style; \
63 PRIVATE void change_style ARGS1(HTStyle *,style)
65 if (new_style!=style) {
66 style_change = YES /* was old_style == new_style */ ;
75 /* Accumulate a character of title
78 static void accumulate_string(char c)
80 static void accumulate_string(c)
84 HTChunkPutc(&title, c);
90 PRIVATE void clear_string ARGS2(HTTag *,t, HTElement *,e)
95 PRIVATE void set_title ARGS2(HTTag *,t, HTElement *,e)
97 HTChunkTerminate(&title);
98 HTAnchor_setTitle(node_anchor, title.data);
101 /* Character handling
103 PRIVATE void set_index ARGS2(HTTag *,t, HTElement *,e)
105 HTAnchor_setIndex(node_anchor);
108 PRIVATE void pass_character ARGS1(char, c)
111 if ((c=='\n') || (c==' ')) return; /* Ignore it */
116 HText_appendCharacter(text, ' ');
120 HText_appendCharacter(text, c);
125 PRIVATE void litteral_text ARGS1(char, c)
127 /* We guarrantee that the style is up-to-date in begin_litteral
129 HText_appendCharacter(text, c); /* @@@@@ */
132 PRIVATE void ignore_text ARGS1(char, c)
137 PRIVATE void set_next_id ARGS2(HTTag *,t, HTElement *,e)
139 /* @@@@@ Bad SGML anyway */
142 PRIVATE void new_paragraph ARGS2(HTTag *,t, HTElement *,e)
145 HText_appendParagraph(text);
149 PRIVATE void term ARGS2(HTTag *,t, HTElement *,e)
152 HText_appendParagraph(text);
157 PRIVATE void definition ARGS2(HTTag *,t, HTElement *,e)
160 pass_character('\t'); /* Just tab out one stop */
164 /* Our Static DTD for HTML
165 ** -----------------------
168 static entity entities[] = {
172 { "bullet" , "\267" }, /* @@@ NeXT only */
173 { 0, 0 } /* Terminate list */
176 static attr no_attr[] = {{ 0, 0 , 0}};
178 static attr a_attr[] = { /* Anchor attributes */
180 { "NAME", 0, 0 }, /* Should be ID */
185 { 0, 0 , 0} /* Terminate list */
187 static attr list_attr[] = {
188 #define LIST_COMPACT 0
190 { 0, 0, 0 } /* Terminate list */
193 static attr glossary_attr[] = {
194 #define GLOSSARY_COMPACT 0
196 { 0, 0, 0 } /* Terminate list */
199 static HTTag default_tag =
200 { "DOCUMENT", no_attr , 0, 0, begin_document, pass_character, end_document };
201 /* NAME ATTR STYLE LITERAL? ON_BEGIN ON__CHARACTER ON_END
203 static HTTag tags[] = {
205 { "TITLE", no_attr, 0, 0, clear_string, accumulate_string, set_title },
206 #define ISINDEX_TAG 1
207 { "ISINDEX", no_attr, 0, 0, set_index, 0 , 0 },
209 { "NEXTID", no_attr, 0, 0, set_next_id, 0, 0 },
210 #define ADDRESS_TAG 3
211 { "ADDRESS" , no_attr, 0, 0, begin_element, pass_character, end_element },
213 { "H1" , no_attr, 0, 0, begin_element, pass_character, end_element },
214 { "H2" , no_attr, 0, 0, begin_element, pass_character, end_element },
215 { "H3" , no_attr, 0, 0, begin_element, pass_character, end_element },
216 { "H4" , no_attr, 0, 0, begin_element, pass_character, end_element },
217 { "H5" , no_attr, 0, 0, begin_element, pass_character, end_element },
218 { "H6" , no_attr, 0, 0, begin_element, pass_character, end_element },
219 { "H7" , no_attr, 0, 0, begin_element, pass_character, end_element },
221 { "UL" , list_attr, 0, 0, begin_list, pass_character, end_list },
223 { "OL" , list_attr, 0, 0, begin_list, pass_character, end_list },
225 { "MENU" , list_attr, 0, 0, begin_list, pass_character, end_list },
227 { "DIR" , list_attr, 0, 0, begin_list, pass_character, end_list },
229 { "LI" , no_attr, 0, 0, list_element, pass_character, 0 },
231 { "DL" , list_attr, 0, 0, begin_glossary, pass_character, end_glossary },
232 { "DT" , no_attr, 0, 0, term, pass_character, 0 },
233 { "DD" , no_attr, 0, 0, definition, pass_character, 0 },
234 { "A" , a_attr, 0, 0, begin_anchor, pass_character, end_anchor },
236 { "P" , no_attr, 0, 0, new_paragraph, pass_character, 0 },
238 { "XMP" , no_attr, 0, YES, begin_litteral, litteral_text, end_element },
239 #define LISTING_TAG 22
240 { "LISTING" , no_attr, 0, YES,begin_litteral, litteral_text, end_element },
241 #define PLAINTEXT_TAG 23
242 { "PLAINTEXT", no_attr, 0, YES, begin_litteral, litteral_text, end_element },
243 #define COMMENT_TAG 24
244 { "COMMENT", no_attr, 0, YES, no_change, ignore_text, no_change },
245 { 0, 0, 0, 0, 0, 0 , 0} /* Terminate list */
248 PUBLIC SGML_dtd HTML_dtd = { tags, &default_tag, entities };
251 /* Flattening the style structure
252 ** ------------------------------
254 On the NeXT, and on any read-only browser, it is simpler for the text to have
255 a sequence of styles, rather than a nested tree of styles. In this
256 case we have to flatten the structure as it arrives from SGML tags into
257 a sequence of styles.
263 PRIVATE void begin_anchor ARGS2(HTTag *,t, HTElement *,e)
265 HTChildAnchor * source = HTAnchor_findChildAndLink(
266 node_anchor, /* parent */
267 a_attr[A_ID].present ? a_attr[A_ID].value : 0, /* Tag */
268 a_attr[A_HREF].present ? a_attr[A_HREF].value : 0, /* Addresss */
269 a_attr[A_TYPE].present ?
270 (HTLinkType*)HTAtom_for(a_attr[A_TYPE].value)
274 HText_beginAnchor(text, source);
277 PRIVATE void end_anchor ARGS2(HTTag *, t,
281 HText_endAnchor(text);
285 /* General SGML Element Handling
286 ** -----------------------------
288 PRIVATE void begin_element ARGS2(HTTag *,t, HTElement *,e)
290 change_style((HTStyle*)(t->style));
292 PRIVATE void no_change ARGS2(HTTag *,t, HTElement *,e)
296 PRIVATE void begin_litteral ARGS2(HTTag *,t, HTElement *,e)
298 change_style(t->style);
301 PRIVATE void end_element ARGS2(HTTag *,t, HTElement *,e)
303 if (e) change_style(e->tag->style);
308 PRIVATE void begin_list ARGS2(HTTag *,t, HTElement *,e)
310 change_style(list_attr[LIST_COMPACT].present
312 : (HTStyle*)(t->style));
316 PRIVATE void end_list ARGS2(HTTag *,t, HTElement *,e)
318 change_style(e->tag->style);
322 PRIVATE void list_element ARGS2(HTTag *,t, HTElement *,e)
324 if (e->tag != &tags[DIR_TAG])
325 HText_appendParagraph(text);
327 HText_appendCharacter(text, '\t'); /* Tab @@ nl for UL? */
332 PRIVATE void begin_glossary ARGS2(HTTag *,t, HTElement *,e)
334 change_style(glossary_attr[GLOSSARY_COMPACT].present
335 ? glossary_compact_style
340 PRIVATE void end_glossary ARGS2(HTTag *,t, HTElement *,e)
342 change_style(e->tag->style);
347 /* Begin and End document
348 ** ----------------------
350 PUBLIC void HTML_begin ARGS1(HTParentAnchor *,anchor)
352 node_anchor = anchor;
355 PRIVATE void begin_document ARGS2(HTTag *, t, HTElement *, e)
357 if (!got_styles) get_styles();
358 text = HText_new(node_anchor);
359 HText_beginAppend(text);
360 HText_setStyle(text, default_tag.style);
366 PRIVATE void end_document ARGS2(HTTag *, t, HTElement *, e)
368 HText_endAppend(text);
372 /* Get Styles from style sheet
373 ** ---------------------------
375 PRIVATE void get_styles NOARGS
380 default_tag.style = HTStyleNamed(styleSheet, "Normal");
381 tags[H1_TAG].style = HTStyleNamed(styleSheet, "Heading1");
382 tags[H1_TAG+1].style = HTStyleNamed(styleSheet, "Heading2");
383 tags[H1_TAG+2].style = HTStyleNamed(styleSheet, "Heading3");
384 tags[H1_TAG+3].style = HTStyleNamed(styleSheet, "Heading4");
385 tags[H1_TAG+4].style = HTStyleNamed(styleSheet, "Heading5");
386 tags[H1_TAG+5].style = HTStyleNamed(styleSheet, "Heading6");
387 tags[H1_TAG+6].style = HTStyleNamed(styleSheet, "Heading7");
388 tags[DL_TAG].style = HTStyleNamed(styleSheet, "Glossary");
389 tags[UL_TAG].style = HTStyleNamed(styleSheet, "List");
390 tags[OL_TAG].style = HTStyleNamed(styleSheet, "List");
391 tags[MENU_TAG].style = HTStyleNamed(styleSheet, "Menu");
393 tags[DIR_TAG].style = HTStyleNamed(styleSheet, "Dir");
394 glossary_style = HTStyleNamed(styleSheet, "Glossary");
395 glossary_compact_style = HTStyleNamed(styleSheet, "GlossaryCompact");
396 tags[ADDRESS_TAG].style= HTStyleNamed(styleSheet, "Address");
397 tags[PLAINTEXT_TAG].style =
398 tags[XMP_TAG].style = HTStyleNamed(styleSheet, "Example");
399 tags[LISTING_TAG].style = HTStyleNamed(styleSheet, "Listing");
403 /* Parse an HTML file
404 ** ------------------
406 ** This version takes a pointer to the routine to call
407 ** to get each character.
411 (HTParentAnchor * anchor, char (*next_char)() )
414 HTParentAnchor * anchor;
419 SGML_begin(&HTML_dtd);
422 character = (*next_char)();
423 if (character == (char)EOF) break;
425 SGML_character(&HTML_dtd, character);