xref: /openbsd-src/gnu/usr.bin/texinfo/makeinfo/html.c (revision 1dc4902c7c259b28d270543b3a1e9e40c3db3fb5)
11cc83814Sespie /* html.c -- html-related utilities.
2*1dc4902cSfgsch    $Id: html.c,v 1.3 2010/06/06 12:31:09 fgsch Exp $
31cc83814Sespie 
4*1dc4902cSfgsch    Copyright (C) 1999, 2000, 2001, 2002, 2003, 2004, 2005 Free Software
5a1acfa9bSespie    Foundation, Inc.
61cc83814Sespie 
71cc83814Sespie    This program is free software; you can redistribute it and/or modify
81cc83814Sespie    it under the terms of the GNU General Public License as published by
91cc83814Sespie    the Free Software Foundation; either version 2, or (at your option)
101cc83814Sespie    any later version.
111cc83814Sespie 
121cc83814Sespie    This program is distributed in the hope that it will be useful,
131cc83814Sespie    but WITHOUT ANY WARRANTY; without even the implied warranty of
141cc83814Sespie    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
151cc83814Sespie    GNU General Public License for more details.
161cc83814Sespie 
171cc83814Sespie    You should have received a copy of the GNU General Public License
181cc83814Sespie    along with this program; if not, write to the Free Software Foundation,
191cc83814Sespie    Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.  */
201cc83814Sespie 
211cc83814Sespie #include "system.h"
221cc83814Sespie #include "cmds.h"
23a1acfa9bSespie #include "files.h"
241cc83814Sespie #include "html.h"
251cc83814Sespie #include "lang.h"
261cc83814Sespie #include "makeinfo.h"
27a1acfa9bSespie #include "node.h"
281cc83814Sespie #include "sectioning.h"
291cc83814Sespie 
30a1acfa9bSespie 
31a1acfa9bSespie /* Append CHAR to BUFFER, (re)allocating as necessary.  We don't handle
32a1acfa9bSespie    null characters.  */
33a1acfa9bSespie 
34a1acfa9bSespie typedef struct
35a1acfa9bSespie {
36a1acfa9bSespie   unsigned size;    /* allocated */
37a1acfa9bSespie   unsigned length;  /* used */
38a1acfa9bSespie   char *buffer;
39a1acfa9bSespie } buffer_type;
40a1acfa9bSespie 
41a1acfa9bSespie static buffer_type *
init_buffer(void)42a1acfa9bSespie init_buffer (void)
43a1acfa9bSespie {
44a1acfa9bSespie   buffer_type *buf = xmalloc (sizeof (buffer_type));
45a1acfa9bSespie   buf->length = 0;
46a1acfa9bSespie   buf->size = 0;
47a1acfa9bSespie   buf->buffer = NULL;
48a1acfa9bSespie 
49a1acfa9bSespie   return buf;
50a1acfa9bSespie }
51a1acfa9bSespie 
52a1acfa9bSespie static void
append_char(buffer_type * buf,int c)53a1acfa9bSespie append_char (buffer_type *buf, int c)
54a1acfa9bSespie {
55a1acfa9bSespie   buf->length++;
56a1acfa9bSespie   if (buf->length >= buf->size)
57a1acfa9bSespie     {
58a1acfa9bSespie       buf->size += 100;
59a1acfa9bSespie       buf->buffer = xrealloc (buf->buffer, buf->size);
60a1acfa9bSespie     }
61a1acfa9bSespie   buf->buffer[buf->length - 1] = c;
62a1acfa9bSespie   buf->buffer[buf->length] = 0;
63a1acfa9bSespie }
64a1acfa9bSespie 
65a1acfa9bSespie /* Read the cascading style-sheet file FILENAME.  Write out any @import
66a1acfa9bSespie    commands, which must come first, by the definition of css.  If the
67a1acfa9bSespie    file contains any actual css code following the @imports, return it;
68a1acfa9bSespie    else return NULL.  */
69a1acfa9bSespie static char *
process_css_file(char * filename)70a1acfa9bSespie process_css_file (char *filename)
71a1acfa9bSespie {
72a1acfa9bSespie   int c;
73a1acfa9bSespie   int lastchar = 0;
74a1acfa9bSespie   FILE *f;
75a1acfa9bSespie   buffer_type *import_text = init_buffer ();
76a1acfa9bSespie   buffer_type *inline_text = init_buffer ();
77a1acfa9bSespie   unsigned lineno = 1;
78a1acfa9bSespie   enum { null_state, comment_state, import_state, inline_state } state
79a1acfa9bSespie     = null_state, prev_state;
80a1acfa9bSespie 
81a1acfa9bSespie   prev_state = null_state;
82a1acfa9bSespie 
83a1acfa9bSespie   /* read from stdin if `-' is the filename.  */
84a1acfa9bSespie   f = STREQ (filename, "-") ? stdin : fopen (filename, "r");
85a1acfa9bSespie   if (!f)
86a1acfa9bSespie     {
87a1acfa9bSespie       error (_("%s: could not open --css-file: %s"), progname, filename);
88a1acfa9bSespie       return NULL;
89a1acfa9bSespie     }
90a1acfa9bSespie 
91a1acfa9bSespie   /* Read the file.  The @import statements must come at the beginning,
92a1acfa9bSespie      with only whitespace and comments allowed before any inline css code.  */
93a1acfa9bSespie   while ((c = getc (f)) >= 0)
94a1acfa9bSespie     {
95a1acfa9bSespie       if (c == '\n')
96a1acfa9bSespie         lineno++;
97a1acfa9bSespie 
98a1acfa9bSespie       switch (state)
99a1acfa9bSespie         {
100a1acfa9bSespie         case null_state: /* between things */
101a1acfa9bSespie           if (c == '@')
102a1acfa9bSespie             { /* Only @import and @charset should switch into
103a1acfa9bSespie                  import_state, other @-commands, such as @media, should
104a1acfa9bSespie                  put us into inline_state.  I don't think any other css
105a1acfa9bSespie                  @-commands start with `i' or `c', although of course
106a1acfa9bSespie                  this will break when such a command is defined.  */
107a1acfa9bSespie               int nextchar = getc (f);
108a1acfa9bSespie               if (nextchar == 'i' || nextchar == 'c')
109a1acfa9bSespie                 {
110a1acfa9bSespie                   append_char (import_text, c);
111a1acfa9bSespie                   state = import_state;
112a1acfa9bSespie                 }
113a1acfa9bSespie               else
114a1acfa9bSespie                 {
115a1acfa9bSespie                   ungetc (nextchar, f);  /* wasn't an @import */
116a1acfa9bSespie                   state = inline_state;
117a1acfa9bSespie                 }
118a1acfa9bSespie             }
119a1acfa9bSespie           else if (c == '/')
120a1acfa9bSespie             { /* possible start of a comment */
121a1acfa9bSespie               int nextchar = getc (f);
122a1acfa9bSespie               if (nextchar == '*')
123a1acfa9bSespie                 state = comment_state;
124a1acfa9bSespie               else
125a1acfa9bSespie                 {
126a1acfa9bSespie                   ungetc (nextchar, f); /* wasn't a comment */
127a1acfa9bSespie                   state = inline_state;
128a1acfa9bSespie                 }
129a1acfa9bSespie             }
130a1acfa9bSespie           else if (isspace (c))
131a1acfa9bSespie             ; /* skip whitespace; maybe should use c_isspace?  */
132a1acfa9bSespie 
133a1acfa9bSespie           else
134a1acfa9bSespie             /* not an @import, not a comment, not whitespace: we must
135a1acfa9bSespie                have started the inline text.  */
136a1acfa9bSespie             state = inline_state;
137a1acfa9bSespie 
138a1acfa9bSespie           if (state == inline_state)
139a1acfa9bSespie             append_char (inline_text, c);
140a1acfa9bSespie 
141a1acfa9bSespie           if (state != null_state)
142a1acfa9bSespie             prev_state = null_state;
143a1acfa9bSespie           break;
144a1acfa9bSespie 
145a1acfa9bSespie         case comment_state:
146a1acfa9bSespie           if (c == '/' && lastchar == '*')
147a1acfa9bSespie             state = prev_state;  /* end of comment */
148a1acfa9bSespie           break;  /* else ignore this comment char */
149a1acfa9bSespie 
150a1acfa9bSespie         case import_state:
151a1acfa9bSespie           append_char (import_text, c);  /* include this import char */
152a1acfa9bSespie           if (c == ';')
153a1acfa9bSespie             { /* done with @import */
154a1acfa9bSespie               append_char (import_text, '\n');  /* make the output nice */
155a1acfa9bSespie               state = null_state;
156a1acfa9bSespie               prev_state = import_state;
157a1acfa9bSespie             }
158a1acfa9bSespie           break;
159a1acfa9bSespie 
160a1acfa9bSespie         case inline_state:
161a1acfa9bSespie           /* No harm in writing out comments, so don't bother parsing
162a1acfa9bSespie              them out, just append everything.  */
163a1acfa9bSespie           append_char (inline_text, c);
164a1acfa9bSespie           break;
165a1acfa9bSespie         }
166a1acfa9bSespie 
167a1acfa9bSespie       lastchar = c;
168a1acfa9bSespie     }
169a1acfa9bSespie 
170*1dc4902cSfgsch   fclose (f);  /* Even closing stdin should be ok, can't read it more
171*1dc4902cSfgsch                   than once? */
172*1dc4902cSfgsch 
173a1acfa9bSespie   /* Reached the end of the file.  We should not be still in a comment.  */
174a1acfa9bSespie   if (state == comment_state)
175a1acfa9bSespie     warning (_("%s:%d: --css-file ended in comment"), filename, lineno);
176a1acfa9bSespie 
177a1acfa9bSespie   /* Write the @import text, if any.  */
178a1acfa9bSespie   if (import_text->buffer)
179a1acfa9bSespie     {
180a1acfa9bSespie       add_word (import_text->buffer);
181a1acfa9bSespie       free (import_text->buffer);
182a1acfa9bSespie       free (import_text);
183a1acfa9bSespie     }
184a1acfa9bSespie 
185a1acfa9bSespie   /* We're wasting the buffer struct memory, but so what.  */
186a1acfa9bSespie   return inline_text->buffer;
187a1acfa9bSespie }
188a1acfa9bSespie 
189a1acfa9bSespie HSTACK *htmlstack = NULL;
190a1acfa9bSespie 
1911cc83814Sespie /* See html.h.  */
1921cc83814Sespie int html_output_head_p = 0;
193a1acfa9bSespie int html_title_written = 0;
1941cc83814Sespie 
1951cc83814Sespie void
html_output_head(void)196a1acfa9bSespie html_output_head (void)
1971cc83814Sespie {
198a1acfa9bSespie   static const char *html_title = NULL;
199a1acfa9bSespie   char *encoding;
2001cc83814Sespie 
2011cc83814Sespie   if (html_output_head_p)
2021cc83814Sespie     return;
2031cc83814Sespie   html_output_head_p = 1;
2041cc83814Sespie 
205a1acfa9bSespie   encoding = current_document_encoding ();
206a1acfa9bSespie 
2073fb98d4aSespie   /* The <title> should not have markup, so use text_expansion.  */
2083fb98d4aSespie   if (!html_title)
209a1acfa9bSespie     html_title = escape_string (title ?
210a1acfa9bSespie         text_expansion (title) : (char *) _("Untitled"));
2111cc83814Sespie 
212a1acfa9bSespie   /* Make sure this is the very first string of the output document.  */
213a1acfa9bSespie   output_paragraph_offset = 0;
214a1acfa9bSespie 
215a1acfa9bSespie   add_html_block_elt_args ("<html lang=\"%s\">\n<head>\n",
216a1acfa9bSespie       language_table[language_code].abbrev);
217a1acfa9bSespie 
218a1acfa9bSespie   /* When splitting, add current node's name to title if it's available and not
219a1acfa9bSespie      Top.  */
220a1acfa9bSespie   if (splitting && current_node && !STREQ (current_node, "Top"))
221a1acfa9bSespie     add_word_args ("<title>%s - %s</title>\n",
222a1acfa9bSespie         escape_string (xstrdup (current_node)), html_title);
223a1acfa9bSespie   else
224a1acfa9bSespie     add_word_args ("<title>%s</title>\n",  html_title);
2251cc83814Sespie 
2261cc83814Sespie   add_word ("<meta http-equiv=\"Content-Type\" content=\"text/html");
227a1acfa9bSespie   if (encoding && *encoding)
228a1acfa9bSespie     add_word_args ("; charset=%s", encoding);
229a1acfa9bSespie 
2301cc83814Sespie   add_word ("\">\n");
2311cc83814Sespie 
2323fb98d4aSespie   if (!document_description)
2333fb98d4aSespie     document_description = html_title;
2343fb98d4aSespie 
235a1acfa9bSespie   add_word_args ("<meta name=\"description\" content=\"%s\">\n",
2363fb98d4aSespie                  document_description);
237a1acfa9bSespie   add_word_args ("<meta name=\"generator\" content=\"makeinfo %s\">\n",
238a1acfa9bSespie                  VERSION);
239a1acfa9bSespie 
240a1acfa9bSespie   /* Navigation bar links.  */
241a1acfa9bSespie   if (!splitting)
242a1acfa9bSespie     add_word ("<link title=\"Top\" rel=\"top\" href=\"#Top\">\n");
243a1acfa9bSespie   else if (tag_table)
244a1acfa9bSespie     {
245a1acfa9bSespie       /* Always put a top link.  */
246a1acfa9bSespie       add_word ("<link title=\"Top\" rel=\"start\" href=\"index.html#Top\">\n");
247a1acfa9bSespie 
248a1acfa9bSespie       /* We already have a top link, avoid duplication.  */
249a1acfa9bSespie       if (tag_table->up && !STREQ (tag_table->up, "Top"))
250a1acfa9bSespie         add_link (tag_table->up, "rel=\"up\"");
251a1acfa9bSespie 
252a1acfa9bSespie       if (tag_table->prev)
253a1acfa9bSespie         add_link (tag_table->prev, "rel=\"prev\"");
254a1acfa9bSespie 
255a1acfa9bSespie       if (tag_table->next)
256a1acfa9bSespie         add_link (tag_table->next, "rel=\"next\"");
257a1acfa9bSespie 
258a1acfa9bSespie       /* fixxme: Look for a way to put links to various indices in the
259a1acfa9bSespie          document.  Also possible candidates to be added here are First and
260a1acfa9bSespie          Last links.  */
261a1acfa9bSespie     }
262a1acfa9bSespie   else
263a1acfa9bSespie     {
264a1acfa9bSespie       /* We are splitting, but we neither have a tag_table.  So this must be
265a1acfa9bSespie          index.html.  So put a link to Top. */
266a1acfa9bSespie       add_word ("<link title=\"Top\" rel=\"start\" href=\"#Top\">\n");
267a1acfa9bSespie     }
268a1acfa9bSespie 
269a1acfa9bSespie   add_word ("<link href=\"http://www.gnu.org/software/texinfo/\" \
270a1acfa9bSespie rel=\"generator-home\" title=\"Texinfo Homepage\">\n");
2713fb98d4aSespie 
2723fb98d4aSespie   if (copying_text)
273a1acfa9bSespie     { /* It is not ideal that we include the html markup here within
274a1acfa9bSespie          <head>, so we use text_expansion.  */
2753fb98d4aSespie       insert_string ("<!--\n");
276a1acfa9bSespie       insert_string (text_expansion (copying_text));
2773fb98d4aSespie       insert_string ("-->\n");
2783fb98d4aSespie     }
2793fb98d4aSespie 
280a1acfa9bSespie   /* Put the style definitions in a comment for the sake of browsers
281a1acfa9bSespie      that don't support <style>.  */
282a1acfa9bSespie   add_word ("<meta http-equiv=\"Content-Style-Type\" content=\"text/css\">\n");
283a1acfa9bSespie   add_word ("<style type=\"text/css\"><!--\n");
284a1acfa9bSespie 
285a1acfa9bSespie   {
286a1acfa9bSespie     char *css_inline = NULL;
287a1acfa9bSespie 
288a1acfa9bSespie     if (css_include)
289a1acfa9bSespie       /* This writes out any @import commands from the --css-file,
290a1acfa9bSespie          and returns any actual css code following the imports.  */
291a1acfa9bSespie       css_inline = process_css_file (css_include);
292a1acfa9bSespie 
293a1acfa9bSespie     /* This seems cleaner than adding <br>'s at the end of each line for
294a1acfa9bSespie        these "roman" displays.  It's hardly the end of the world if the
295a1acfa9bSespie        browser doesn't do <style>s, in any case; they'll just come out in
296a1acfa9bSespie        typewriter.  */
297a1acfa9bSespie #define CSS_FONT_INHERIT "font-family:inherit"
298a1acfa9bSespie     add_word_args ("  pre.display { %s }\n", CSS_FONT_INHERIT);
299a1acfa9bSespie     add_word_args ("  pre.format  { %s }\n", CSS_FONT_INHERIT);
300a1acfa9bSespie 
301a1acfa9bSespie     /* Alternatively, we could do <font size=-1> in insertion.c, but this
302a1acfa9bSespie        way makes it easier to override.  */
303a1acfa9bSespie #define CSS_FONT_SMALLER "font-size:smaller"
304a1acfa9bSespie     add_word_args ("  pre.smalldisplay { %s; %s }\n", CSS_FONT_INHERIT,
305a1acfa9bSespie                    CSS_FONT_SMALLER);
306a1acfa9bSespie     add_word_args ("  pre.smallformat  { %s; %s }\n", CSS_FONT_INHERIT,
307a1acfa9bSespie                    CSS_FONT_SMALLER);
308a1acfa9bSespie     add_word_args ("  pre.smallexample { %s }\n", CSS_FONT_SMALLER);
309a1acfa9bSespie     add_word_args ("  pre.smalllisp    { %s }\n", CSS_FONT_SMALLER);
310a1acfa9bSespie 
311a1acfa9bSespie     /* Since HTML doesn't have a sc element, we use span with a bit of
312a1acfa9bSespie        CSS spice instead.  */
313a1acfa9bSespie #define CSS_FONT_SMALL_CAPS "font-variant:small-caps"
314a1acfa9bSespie     add_word_args ("  span.sc    { %s }\n", CSS_FONT_SMALL_CAPS);
315a1acfa9bSespie 
316a1acfa9bSespie     /* Roman (default) font class, closest we can come.  */
317a1acfa9bSespie #define CSS_FONT_ROMAN "font-family:serif; font-weight:normal;"
318a1acfa9bSespie     add_word_args ("  span.roman { %s } \n", CSS_FONT_ROMAN);
319a1acfa9bSespie 
320a1acfa9bSespie     /* Sans serif font class.  */
321a1acfa9bSespie #define CSS_FONT_SANSSERIF "font-family:sans-serif; font-weight:normal;"
322a1acfa9bSespie     add_word_args ("  span.sansserif { %s } \n", CSS_FONT_SANSSERIF);
323a1acfa9bSespie 
324a1acfa9bSespie     /* Write out any css code from the user's --css-file.  */
325a1acfa9bSespie     if (css_inline)
326a1acfa9bSespie       insert_string (css_inline);
327a1acfa9bSespie 
328a1acfa9bSespie     add_word ("--></style>\n");
329a1acfa9bSespie   }
330a1acfa9bSespie 
3313fb98d4aSespie   add_word ("</head>\n<body>\n");
3323fb98d4aSespie 
333a1acfa9bSespie   if (title && !html_title_written && titlepage_cmd_present)
3343fb98d4aSespie     {
335a1acfa9bSespie       add_word_args ("<h1 class=\"settitle\">%s</h1>\n", html_title);
3363fb98d4aSespie       html_title_written = 1;
3373fb98d4aSespie     }
3381cc83814Sespie 
339a1acfa9bSespie   free (encoding);
340a1acfa9bSespie }
3411cc83814Sespie 
3421cc83814Sespie /* Escape HTML special characters in the string if necessary,
3431cc83814Sespie    returning a pointer to a possibly newly-allocated one. */
3441cc83814Sespie char *
escape_string(char * string)345a1acfa9bSespie escape_string (char *string)
3461cc83814Sespie {
3471cc83814Sespie   char *newstring;
348a1acfa9bSespie   int i = 0, newlen = 0;
3491cc83814Sespie 
3501cc83814Sespie   do
3511cc83814Sespie     {
3521cc83814Sespie       /* Find how much to allocate. */
3531cc83814Sespie       switch (string[i])
3541cc83814Sespie         {
355a1acfa9bSespie         case '"':
356a1acfa9bSespie           newlen += 6;          /* `&quot;' */
357a1acfa9bSespie           break;
3581cc83814Sespie         case '&':
3591cc83814Sespie           newlen += 5;          /* `&amp;' */
3601cc83814Sespie           break;
3611cc83814Sespie         case '<':
3621cc83814Sespie         case '>':
3631cc83814Sespie           newlen += 4;          /* `&lt;', `&gt;' */
3641cc83814Sespie           break;
3651cc83814Sespie         default:
3661cc83814Sespie           newlen++;
3671cc83814Sespie         }
3681cc83814Sespie     }
3693fb98d4aSespie   while (string[i++]);
3701cc83814Sespie 
3711cc83814Sespie   if (newlen == i) return string; /* Already OK. */
3721cc83814Sespie 
3733fb98d4aSespie   newstring = xmalloc (newlen);
3741cc83814Sespie   i = 0;
3751cc83814Sespie   do
3761cc83814Sespie     {
3771cc83814Sespie       switch (string[i])
3781cc83814Sespie         {
379a1acfa9bSespie         case '"':
380a1acfa9bSespie           strcpy (newstring, "&quot;");
381a1acfa9bSespie           newstring += 6;
382a1acfa9bSespie           break;
3831cc83814Sespie         case '&':
3841cc83814Sespie           strcpy (newstring, "&amp;");
3851cc83814Sespie           newstring += 5;
3861cc83814Sespie           break;
3871cc83814Sespie         case '<':
3881cc83814Sespie           strcpy (newstring, "&lt;");
3891cc83814Sespie           newstring += 4;
3901cc83814Sespie           break;
3911cc83814Sespie         case '>':
3921cc83814Sespie           strcpy (newstring, "&gt;");
3931cc83814Sespie           newstring += 4;
3941cc83814Sespie           break;
3951cc83814Sespie         default:
3961cc83814Sespie           newstring[0] = string[i];
3971cc83814Sespie           newstring++;
3981cc83814Sespie         }
3991cc83814Sespie     }
4001cc83814Sespie   while (string[i++]);
4011cc83814Sespie   free (string);
4023fb98d4aSespie   return newstring - newlen;
4031cc83814Sespie }
404a1acfa9bSespie 
405a1acfa9bSespie /* Save current tag.  */
406a1acfa9bSespie static void
push_tag(char * tag,char * attribs)407a1acfa9bSespie push_tag (char *tag, char *attribs)
408a1acfa9bSespie {
409a1acfa9bSespie   HSTACK *newstack = xmalloc (sizeof (HSTACK));
410a1acfa9bSespie 
411a1acfa9bSespie   newstack->tag = tag;
412a1acfa9bSespie   newstack->attribs = xstrdup (attribs);
413a1acfa9bSespie   newstack->next = htmlstack;
414a1acfa9bSespie   htmlstack = newstack;
415a1acfa9bSespie }
416a1acfa9bSespie 
417a1acfa9bSespie /* Get last tag.  */
418a1acfa9bSespie static void
pop_tag(void)419a1acfa9bSespie pop_tag (void)
420a1acfa9bSespie {
421a1acfa9bSespie   HSTACK *tos = htmlstack;
422a1acfa9bSespie 
423a1acfa9bSespie   if (!tos)
424a1acfa9bSespie     {
425a1acfa9bSespie       line_error (_("[unexpected] no html tag to pop"));
426a1acfa9bSespie       return;
427a1acfa9bSespie     }
428a1acfa9bSespie 
429a1acfa9bSespie   free (htmlstack->attribs);
430a1acfa9bSespie 
431a1acfa9bSespie   htmlstack = htmlstack->next;
432a1acfa9bSespie   free (tos);
433a1acfa9bSespie }
434a1acfa9bSespie 
435a1acfa9bSespie /* Check if tag is an empty or a whitespace only element.
436a1acfa9bSespie    If so, remove it, keeping whitespace intact.  */
437a1acfa9bSespie int
rollback_empty_tag(char * tag)438a1acfa9bSespie rollback_empty_tag (char *tag)
439a1acfa9bSespie {
440a1acfa9bSespie   int check_position = output_paragraph_offset;
441a1acfa9bSespie   int taglen = strlen (tag);
442a1acfa9bSespie   int rollback_happened = 0;
443a1acfa9bSespie   char *contents = "";
444a1acfa9bSespie   char *contents_canon_white = "";
445a1acfa9bSespie 
446a1acfa9bSespie   /* If output_paragraph is empty, we cannot rollback :-\  */
447a1acfa9bSespie   if (output_paragraph_offset <= 0)
448a1acfa9bSespie     return 0;
449a1acfa9bSespie 
450a1acfa9bSespie   /* Find the end of the previous tag.  */
4517dbdb528Sespie   while (check_position > 0 && output_paragraph[check_position-1] != '>')
452a1acfa9bSespie     check_position--;
453a1acfa9bSespie 
454a1acfa9bSespie   /* Save stuff between tag's end to output_paragraph's end.  */
455a1acfa9bSespie   if (check_position != output_paragraph_offset)
456a1acfa9bSespie     {
457a1acfa9bSespie       contents = xmalloc (output_paragraph_offset - check_position + 1);
458a1acfa9bSespie       memcpy (contents, output_paragraph + check_position,
459a1acfa9bSespie           output_paragraph_offset - check_position);
460a1acfa9bSespie 
461a1acfa9bSespie       contents[output_paragraph_offset - check_position] = '\0';
462a1acfa9bSespie 
463a1acfa9bSespie       contents_canon_white = xstrdup (contents);
464a1acfa9bSespie       canon_white (contents_canon_white);
465a1acfa9bSespie     }
466a1acfa9bSespie 
467a1acfa9bSespie   /* Find the start of the previous tag.  */
4687dbdb528Sespie   while (check_position > 0 && output_paragraph[check_position-1] != '<')
469a1acfa9bSespie     check_position--;
470a1acfa9bSespie 
471a1acfa9bSespie   /* Check to see if this is the tag.  */
472a1acfa9bSespie   if (strncmp ((char *) output_paragraph + check_position, tag, taglen) == 0
473a1acfa9bSespie       && (whitespace (output_paragraph[check_position + taglen])
474a1acfa9bSespie           || output_paragraph[check_position + taglen] == '>'))
475a1acfa9bSespie     {
476a1acfa9bSespie       if (!contents_canon_white || !*contents_canon_white)
477a1acfa9bSespie         {
478a1acfa9bSespie           /* Empty content after whitespace removal, so roll it back.  */
479a1acfa9bSespie           output_paragraph_offset = check_position - 1;
480a1acfa9bSespie           rollback_happened = 1;
481a1acfa9bSespie 
482a1acfa9bSespie           /* Original contents may not be empty (whitespace.)  */
483a1acfa9bSespie           if (contents && *contents)
484a1acfa9bSespie             {
485a1acfa9bSespie               insert_string (contents);
486a1acfa9bSespie               free (contents);
487a1acfa9bSespie             }
488a1acfa9bSespie         }
489a1acfa9bSespie     }
490a1acfa9bSespie 
491a1acfa9bSespie   return rollback_happened;
492a1acfa9bSespie }
4931cc83814Sespie 
4941cc83814Sespie /* Open or close TAG according to START_OR_END. */
4951cc83814Sespie void
496a1acfa9bSespie #if defined (VA_FPRINTF) && __STDC__
insert_html_tag_with_attribute(int start_or_end,char * tag,char * format,...)497a1acfa9bSespie insert_html_tag_with_attribute (int start_or_end, char *tag, char *format, ...)
498a1acfa9bSespie #else
499a1acfa9bSespie insert_html_tag_with_attribute (start_or_end, tag, format, va_alist)
5001cc83814Sespie      int start_or_end;
5011cc83814Sespie      char *tag;
502a1acfa9bSespie      char *format;
503a1acfa9bSespie      va_dcl
504a1acfa9bSespie #endif
5051cc83814Sespie {
506a1acfa9bSespie   char *old_tag = NULL;
507a1acfa9bSespie   char *old_attribs = NULL;
508a1acfa9bSespie   char formatted_attribs[2000]; /* xx no fixed limits */
509a1acfa9bSespie   int do_return = 0;
510a1acfa9bSespie   extern int in_html_elt;
511a1acfa9bSespie 
5121cc83814Sespie   if (start_or_end != START)
513a1acfa9bSespie     pop_tag ();
514a1acfa9bSespie 
515a1acfa9bSespie   if (htmlstack)
516a1acfa9bSespie     {
517a1acfa9bSespie       old_tag = htmlstack->tag;
518a1acfa9bSespie       old_attribs = htmlstack->attribs;
5191cc83814Sespie     }
5201cc83814Sespie 
521a1acfa9bSespie   if (format)
522a1acfa9bSespie     {
523a1acfa9bSespie #ifdef VA_SPRINTF
524a1acfa9bSespie       va_list ap;
525a1acfa9bSespie #endif
526a1acfa9bSespie 
527a1acfa9bSespie       VA_START (ap, format);
528a1acfa9bSespie #ifdef VA_SPRINTF
529a1acfa9bSespie       VA_SPRINTF (formatted_attribs, format, ap);
530a1acfa9bSespie #else
531a1acfa9bSespie       sprintf (formatted_attribs, format, a1, a2, a3, a4, a5, a6, a7, a8);
532a1acfa9bSespie #endif
533a1acfa9bSespie       va_end (ap);
534a1acfa9bSespie     }
535a1acfa9bSespie   else
536a1acfa9bSespie     formatted_attribs[0] = '\0';
537a1acfa9bSespie 
538a1acfa9bSespie   /* Exception: can nest multiple spans.  */
539a1acfa9bSespie   if (htmlstack
540a1acfa9bSespie       && STREQ (htmlstack->tag, tag)
541a1acfa9bSespie       && !(STREQ (tag, "span") && STREQ (old_attribs, formatted_attribs)))
542a1acfa9bSespie     do_return = 1;
543a1acfa9bSespie 
544a1acfa9bSespie   if (start_or_end == START)
545a1acfa9bSespie     push_tag (tag, formatted_attribs);
546a1acfa9bSespie 
547a1acfa9bSespie   if (do_return)
548a1acfa9bSespie     return;
549a1acfa9bSespie 
550a1acfa9bSespie   in_html_elt++;
551a1acfa9bSespie 
552a1acfa9bSespie   /* texinfo.tex doesn't support more than one font attribute
553a1acfa9bSespie      at the same time.  */
554a1acfa9bSespie   if ((start_or_end == START) && old_tag && *old_tag
555a1acfa9bSespie       && !rollback_empty_tag (old_tag))
556a1acfa9bSespie     add_word_args ("</%s>", old_tag);
557a1acfa9bSespie 
558a1acfa9bSespie   if (*tag)
559a1acfa9bSespie     {
560a1acfa9bSespie       if (start_or_end == START)
561a1acfa9bSespie         add_word_args (format ? "<%s %s>" : "<%s>", tag, formatted_attribs);
562a1acfa9bSespie       else if (!rollback_empty_tag (tag))
563a1acfa9bSespie         /* Insert close tag only if we didn't rollback,
564a1acfa9bSespie            in which case the opening tag is removed.  */
565a1acfa9bSespie         add_word_args ("</%s>", tag);
566a1acfa9bSespie     }
567a1acfa9bSespie 
568a1acfa9bSespie   if ((start_or_end != START) && old_tag && *old_tag)
569a1acfa9bSespie     add_word_args (strlen (old_attribs) > 0 ? "<%s %s>" : "<%s>",
570a1acfa9bSespie         old_tag, old_attribs);
571a1acfa9bSespie 
572a1acfa9bSespie   in_html_elt--;
573a1acfa9bSespie }
574a1acfa9bSespie 
575a1acfa9bSespie void
insert_html_tag(int start_or_end,char * tag)576a1acfa9bSespie insert_html_tag (int start_or_end, char *tag)
577a1acfa9bSespie {
578a1acfa9bSespie   insert_html_tag_with_attribute (start_or_end, tag, NULL);
579a1acfa9bSespie }
580a1acfa9bSespie 
5811cc83814Sespie /* Output an HTML <link> to the filename for NODE, including the
5821cc83814Sespie    other string as extra attributes. */
5831cc83814Sespie void
add_link(char * nodename,char * attributes)584a1acfa9bSespie add_link (char *nodename, char *attributes)
5851cc83814Sespie {
5863fb98d4aSespie   if (nodename)
5871cc83814Sespie     {
5883fb98d4aSespie       add_html_elt ("<link ");
5893fb98d4aSespie       add_word_args ("%s", attributes);
5903fb98d4aSespie       add_word_args (" href=\"");
5913fb98d4aSespie       add_anchor_name (nodename, 1);
592a1acfa9bSespie       add_word_args ("\" title=\"%s\">\n", nodename);
5931cc83814Sespie     }
5941cc83814Sespie }
5951cc83814Sespie 
5961cc83814Sespie /* Output NAME with characters escaped as appropriate for an anchor
597a1acfa9bSespie    name, i.e., escape URL special characters with our _00hh convention
598a1acfa9bSespie    if OLD is zero.  (See the manual for details on the new scheme.)
599a1acfa9bSespie 
600a1acfa9bSespie    If OLD is nonzero, generate the node name with the 4.6-and-earlier
601a1acfa9bSespie    convention of %hh (and more special characters output as-is, notably
602a1acfa9bSespie    - and *).  This is only so that external references to old names can
603a1acfa9bSespie    still work with HTML generated by the new makeinfo; the gcc folks
604a1acfa9bSespie    needed this.  Our own HTML does not refer to these names.  */
605a1acfa9bSespie 
6061cc83814Sespie void
add_escaped_anchor_name(char * name,int old)607a1acfa9bSespie add_escaped_anchor_name (char *name, int old)
6081cc83814Sespie {
609a1acfa9bSespie   canon_white (name);
610a1acfa9bSespie 
611a1acfa9bSespie   if (!old && !strchr ("abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ",
612a1acfa9bSespie                        *name))
613a1acfa9bSespie     { /* XHTML does not allow anything but an ASCII letter to start an
614a1acfa9bSespie          identifier.  Therefore kludge in this constant string if we
615a1acfa9bSespie          have a nonletter.  */
616a1acfa9bSespie       add_word ("g_t");
617a1acfa9bSespie     }
618a1acfa9bSespie 
6191cc83814Sespie   for (; *name; name++)
6201cc83814Sespie     {
621a1acfa9bSespie       if (cr_or_whitespace (*name))
622a1acfa9bSespie         add_char ('-');
623a1acfa9bSespie 
624a1acfa9bSespie       else if (!old && !URL_SAFE_CHAR (*name))
6251cc83814Sespie         /* Cast so characters with the high bit set are treated as >128,
6261cc83814Sespie            for example o-umlaut should be 246, not -10.  */
627a1acfa9bSespie         add_word_args ("_00%x", (unsigned char) *name);
628a1acfa9bSespie 
629a1acfa9bSespie       else if (old && !URL_SAFE_CHAR (*name) && !OLD_URL_SAFE_CHAR (*name))
630a1acfa9bSespie         /* Different output convention, but still cast as above.  */
6311cc83814Sespie         add_word_args ("%%%x", (unsigned char) *name);
632a1acfa9bSespie 
6331cc83814Sespie       else
6341cc83814Sespie         add_char (*name);
6351cc83814Sespie     }
6361cc83814Sespie }
6371cc83814Sespie 
6381cc83814Sespie /* Insert the text for the name of a reference in an HTML anchor
639a1acfa9bSespie    appropriate for NODENAME.
640a1acfa9bSespie 
641a1acfa9bSespie    If HREF is zero, generate text for name= in the new node name
642a1acfa9bSespie      conversion convention.
643a1acfa9bSespie    If HREF is negative, generate text for name= in the old convention.
644a1acfa9bSespie    If HREF is positive, generate the name for an href= attribute, i.e.,
645a1acfa9bSespie      including the `#' if it's an internal reference.   */
6461cc83814Sespie void
add_anchor_name(char * nodename,int href)647a1acfa9bSespie add_anchor_name (char *nodename, int href)
6481cc83814Sespie {
649a1acfa9bSespie   if (href > 0)
6503fb98d4aSespie     {
6513fb98d4aSespie       if (splitting)
6523fb98d4aSespie 	add_url_name (nodename, href);
6531cc83814Sespie       add_char ('#');
6543fb98d4aSespie     }
6553fb98d4aSespie   /* Always add NODENAME, so that the reference would pinpoint the
6563fb98d4aSespie      exact node on its file.  This is so several nodes could share the
6573fb98d4aSespie      same file, in case of file-name clashes, but also for more
6583fb98d4aSespie      accurate browser positioning.  */
6593fb98d4aSespie   if (strcasecmp (nodename, "(dir)") == 0)
6603fb98d4aSespie     /* Strip the parens, but keep the original letter-case.  */
6613fb98d4aSespie     add_word_args ("%.3s", nodename + 1);
662a1acfa9bSespie   else if (strcasecmp (nodename, "top") == 0)
663a1acfa9bSespie     add_word ("Top");
6643fb98d4aSespie   else
665a1acfa9bSespie     add_escaped_anchor_name (nodename, href < 0);
6661cc83814Sespie }
6673fb98d4aSespie 
6683fb98d4aSespie /* Insert the text for the name of a reference in an HTML url, aprropriate
6693fb98d4aSespie    for NODENAME */
6703fb98d4aSespie void
add_url_name(char * nodename,int href)671a1acfa9bSespie add_url_name (char *nodename, int href)
6723fb98d4aSespie {
6733fb98d4aSespie     add_nodename_to_filename (nodename, href);
6743fb98d4aSespie }
6753fb98d4aSespie 
676a1acfa9bSespie /* Convert non [A-Za-z0-9] to _00xx, where xx means the hexadecimal
677a1acfa9bSespie    representation of the ASCII character.  Also convert spaces and
678a1acfa9bSespie    newlines to dashes.  */
679a1acfa9bSespie static void
fix_filename(char * filename)680a1acfa9bSespie fix_filename (char *filename)
6813fb98d4aSespie {
682a1acfa9bSespie   int i;
683a1acfa9bSespie   int len = strlen (filename);
684a1acfa9bSespie   char *oldname = xstrdup (filename);
685a1acfa9bSespie 
686a1acfa9bSespie   *filename = '\0';
687a1acfa9bSespie 
688a1acfa9bSespie   for (i = 0; i < len; i++)
6893fb98d4aSespie     {
690a1acfa9bSespie       if (cr_or_whitespace (oldname[i]))
691a1acfa9bSespie         strcat (filename, "-");
692a1acfa9bSespie       else if (URL_SAFE_CHAR (oldname[i]))
693a1acfa9bSespie         strncat (filename, (char *) oldname + i, 1);
694a1acfa9bSespie       else
695a1acfa9bSespie         {
696a1acfa9bSespie           char *hexchar = xmalloc (6 * sizeof (char));
697a1acfa9bSespie           sprintf (hexchar, "_00%x", (unsigned char) oldname[i]);
698a1acfa9bSespie           strcat (filename, hexchar);
699a1acfa9bSespie           free (hexchar);
7003fb98d4aSespie         }
701a1acfa9bSespie 
702a1acfa9bSespie       /* Check if we are nearing boundaries.  */
703a1acfa9bSespie       if (strlen (filename) >= PATH_MAX - 20)
704a1acfa9bSespie         break;
705a1acfa9bSespie     }
706a1acfa9bSespie 
707a1acfa9bSespie   free (oldname);
7083fb98d4aSespie }
7093fb98d4aSespie 
7103fb98d4aSespie /* As we can't look-up a (forward-referenced) nodes' html filename
7113fb98d4aSespie    from the tentry, we take the easy way out.  We assume that
7123fb98d4aSespie    nodenames are unique, and generate the html filename from the
7133fb98d4aSespie    nodename, that's always known.  */
7143fb98d4aSespie static char *
nodename_to_filename_1(char * nodename,int href)715a1acfa9bSespie nodename_to_filename_1 (char *nodename, int href)
7163fb98d4aSespie {
7173fb98d4aSespie   char *p;
7183fb98d4aSespie   char *filename;
7193fb98d4aSespie   char dirname[PATH_MAX];
7203fb98d4aSespie 
7213fb98d4aSespie   if (strcasecmp (nodename, "Top") == 0)
7223fb98d4aSespie     {
7233fb98d4aSespie       /* We want to convert references to the Top node into
7243fb98d4aSespie 	 "index.html#Top".  */
7253fb98d4aSespie       if (href)
7263fb98d4aSespie 	filename = xstrdup ("index.html"); /* "#Top" is added by our callers */
7273fb98d4aSespie       else
7283fb98d4aSespie 	filename = xstrdup ("Top");
7293fb98d4aSespie     }
7303fb98d4aSespie   else if (strcasecmp (nodename, "(dir)") == 0)
7313fb98d4aSespie     /* We want to convert references to the (dir) node into
7323fb98d4aSespie        "../index.html".  */
7333fb98d4aSespie     filename = xstrdup ("../index.html");
7343fb98d4aSespie   else
7353fb98d4aSespie     {
7363fb98d4aSespie       filename = xmalloc (PATH_MAX);
7373fb98d4aSespie       dirname[0] = '\0';
7383fb98d4aSespie       *filename = '\0';
7393fb98d4aSespie 
7403fb98d4aSespie       /* Check for external reference: ``(info-document)node-name''
7413fb98d4aSespie 	 Assume this node lives at: ``../info-document/node-name.html''
7423fb98d4aSespie 
7433fb98d4aSespie 	 We need to handle the special case (sigh): ``(info-document)'',
7443fb98d4aSespie 	 ie, an external top-node, which should translate to:
7453fb98d4aSespie 	 ``../info-document/info-document.html'' */
7463fb98d4aSespie 
7473fb98d4aSespie       p = nodename;
7483fb98d4aSespie       if (*nodename == '(')
7493fb98d4aSespie 	{
7503fb98d4aSespie 	  int length;
7513fb98d4aSespie 
7523fb98d4aSespie 	  p = strchr (nodename, ')');
7533fb98d4aSespie 	  if (p == NULL)
7543fb98d4aSespie 	    {
755a1acfa9bSespie 	      line_error (_("[unexpected] invalid node name: `%s'"), nodename);
756a1acfa9bSespie 	      xexit (1);
7573fb98d4aSespie 	    }
7583fb98d4aSespie 
7593fb98d4aSespie 	  length = p - nodename - 1;
7603fb98d4aSespie 	  if (length > 5 &&
7613fb98d4aSespie 	      FILENAME_CMPN (p - 5, ".info", 5) == 0)
7623fb98d4aSespie 	    length -= 5;
7633fb98d4aSespie 	  /* This is for DOS, and also for Windows and GNU/Linux
7643fb98d4aSespie 	     systems that might have Info files copied from a DOS 8+3
7653fb98d4aSespie 	     filesystem.  */
7663fb98d4aSespie 	  if (length > 4 &&
7673fb98d4aSespie 	      FILENAME_CMPN (p - 4, ".inf", 4) == 0)
7683fb98d4aSespie 	    length -= 4;
7693fb98d4aSespie 	  strcpy (filename, "../");
7703fb98d4aSespie 	  strncpy (dirname, nodename + 1, length);
7713fb98d4aSespie 	  *(dirname + length) = '\0';
7723fb98d4aSespie 	  fix_filename (dirname);
7733fb98d4aSespie 	  strcat (filename, dirname);
7743fb98d4aSespie 	  strcat (filename, "/");
7753fb98d4aSespie 	  p++;
7763fb98d4aSespie 	}
7773fb98d4aSespie 
7783fb98d4aSespie       /* In the case of just (info-document), there will be nothing
7793fb98d4aSespie 	 remaining, and we will refer to ../info-document/, which will
7803fb98d4aSespie 	 work fine.  */
7813fb98d4aSespie       strcat (filename, p);
7823fb98d4aSespie       if (*p)
7833fb98d4aSespie 	{
7843fb98d4aSespie 	  /* Hmm */
7853fb98d4aSespie 	  fix_filename (filename + strlen (filename) - strlen (p));
7863fb98d4aSespie 	  strcat (filename, ".html");
7873fb98d4aSespie 	}
7883fb98d4aSespie     }
7893fb98d4aSespie 
7903fb98d4aSespie   /* Produce a file name suitable for the underlying filesystem.  */
7913fb98d4aSespie   normalize_filename (filename);
7923fb98d4aSespie 
7933fb98d4aSespie #if 0
7943fb98d4aSespie   /* We add ``#Nodified-filename'' anchor to external references to be
7953fb98d4aSespie      prepared for non-split HTML support.  Maybe drop this. */
7963fb98d4aSespie   if (href && *dirname)
7973fb98d4aSespie     {
7983fb98d4aSespie       strcat (filename, "#");
7993fb98d4aSespie       strcat (filename, p);
8003fb98d4aSespie       /* Hmm, again */
8013fb98d4aSespie       fix_filename (filename + strlen (filename) - strlen (p));
8023fb98d4aSespie     }
8033fb98d4aSespie #endif
8043fb98d4aSespie 
8053fb98d4aSespie   return filename;
8063fb98d4aSespie }
8073fb98d4aSespie 
8083fb98d4aSespie /* If necessary, ie, if current filename != filename of node, output
8093fb98d4aSespie    the node name.  */
8103fb98d4aSespie void
add_nodename_to_filename(char * nodename,int href)811a1acfa9bSespie add_nodename_to_filename (char *nodename, int href)
8123fb98d4aSespie {
8133fb98d4aSespie   /* for now, don't check: always output filename */
8143fb98d4aSespie   char *filename = nodename_to_filename_1 (nodename, href);
8153fb98d4aSespie   add_word (filename);
8163fb98d4aSespie   free (filename);
8173fb98d4aSespie }
8183fb98d4aSespie 
8193fb98d4aSespie char *
nodename_to_filename(char * nodename)820a1acfa9bSespie nodename_to_filename (char *nodename)
8213fb98d4aSespie {
8223fb98d4aSespie   /* The callers of nodename_to_filename use the result to produce
8233fb98d4aSespie      <a href=, so call nodename_to_filename_1 with last arg non-zero.  */
8243fb98d4aSespie   return nodename_to_filename_1 (nodename, 1);
8253fb98d4aSespie }
826