1 #include <u.h>
2 #include <libc.h>
3 #include <ctype.h>
4 #include <bio.h>
5
6 enum
7 {
8 SSIZE = 10,
9
10 /* list types */
11 Lordered = 0,
12 Lunordered,
13 Lmenu,
14 Ldir,
15
16 };
17
18 Biobuf in, out;
19 int lastc = '\n';
20 int inpre = 0;
21
22 /* stack for fonts */
23 char *fontstack[SSIZE];
24 char *font = "R";
25 int fsp;
26
27 /* stack for lists */
28 struct
29 {
30 int type;
31 int ord;
32 } liststack[SSIZE];
33 int lsp;
34
35 int quoting;
36
37 typedef struct Goobie Goobie;
38 struct Goobie
39 {
40 char *name;
41 void (*f)(Goobie*, char*);
42 void (*ef)(Goobie*, char*);
43 };
44
45 void eatwhite(void);
46 void escape(void);
47
48 typedef void Action(Goobie*, char*);
49
50 Action g_ignore;
51 Action g_unexpected;
52 Action g_title;
53 Action g_p;
54 Action g_h;
55 Action g_li;
56 Action g_list, g_listend;
57 Action g_pre;
58 Action g_fpush, g_fpop;
59 Action g_indent, g_exdent;
60 Action g_dt;
61 Action g_display;
62 Action g_displayend;
63 Action g_table, g_tableend, g_caption, g_captionend;
64 Action g_br, g_hr;
65
66 Goobie gtab[] =
67 {
68 "!--", g_ignore, g_unexpected,
69 "!doctype", g_ignore, g_unexpected,
70 "a", g_ignore, g_ignore,
71 "address", g_display, g_displayend,
72 "b", g_fpush, g_fpop,
73 "base", g_ignore, g_unexpected,
74 "blink", g_ignore, g_ignore,
75 "blockquote", g_ignore, g_ignore,
76 "body", g_ignore, g_ignore,
77 "br", g_br, g_unexpected,
78 "caption", g_caption, g_captionend,
79 "center", g_ignore, g_ignore,
80 "cite", g_ignore, g_ignore,
81 "code", g_ignore, g_ignore,
82 "dd", g_ignore, g_unexpected,
83 "dfn", g_ignore, g_ignore,
84 "dir", g_list, g_listend,
85 "div", g_ignore, g_br,
86 "dl", g_indent, g_exdent,
87 "dt", g_dt, g_unexpected,
88 "em", g_ignore, g_ignore,
89 "font", g_ignore, g_ignore,
90 "form", g_ignore, g_ignore,
91 "h1", g_h, g_p,
92 "h2", g_h, g_p,
93 "h3", g_h, g_p,
94 "h4", g_h, g_p,
95 "h5", g_h, g_p,
96 "h6", g_h, g_p,
97 "head", g_ignore, g_ignore,
98 "hr", g_hr, g_unexpected,
99 "html", g_ignore, g_ignore,
100 "i", g_fpush, g_fpop,
101 "input", g_ignore, g_unexpected,
102 "img", g_ignore, g_unexpected,
103 "isindex", g_ignore, g_unexpected,
104 "kbd", g_fpush, g_fpop,
105 "key", g_ignore, g_ignore,
106 "li", g_li, g_unexpected,
107 "link", g_ignore, g_unexpected,
108 "listing", g_ignore, g_ignore,
109 "menu", g_list, g_listend,
110 "meta", g_ignore, g_unexpected,
111 "nextid", g_ignore, g_unexpected,
112 "ol", g_list, g_listend,
113 "option", g_ignore, g_unexpected,
114 "p", g_p, g_ignore,
115 "plaintext", g_ignore, g_unexpected,
116 "pre", g_pre, g_displayend,
117 "samp", g_ignore, g_ignore,
118 "script", g_ignore, g_ignore,
119 "select", g_ignore, g_ignore,
120 "span", g_ignore, g_ignore,
121 "strong", g_ignore, g_ignore,
122 "table", g_table, g_tableend,
123 "textarea", g_ignore, g_ignore,
124 "title", g_title, g_ignore,
125 "tt", g_fpush, g_fpop,
126 "u", g_ignore, g_ignore,
127 "ul", g_list, g_listend,
128 "var", g_ignore, g_ignore,
129 "xmp", g_ignore, g_ignore,
130 0, 0, 0,
131 };
132
133 typedef struct Entity Entity;
134 struct Entity
135 {
136 char *name;
137 Rune value;
138 };
139
140 Entity pl_entity[]=
141 {
142 "#SPACE", L' ', "#RS", L'\n', "#RE", L'\r', "quot", L'"',
143 "AElig", L'Æ', "Aacute", L'Á', "Acirc", L'Â', "Agrave", L'À', "Aring", L'Å',
144 "Atilde", L'Ã', "Auml", L'Ä', "Ccedil", L'Ç', "ETH", L'Ð', "Eacute", L'É',
145 "Ecirc", L'Ê', "Egrave", L'È', "Euml", L'Ë', "Iacute", L'Í', "Icirc", L'Î',
146 "Igrave", L'Ì', "Iuml", L'Ï', "Ntilde", L'Ñ', "Oacute", L'Ó', "Ocirc", L'Ô',
147 "Ograve", L'Ò', "Oslash", L'Ø', "Otilde", L'Õ', "Ouml", L'Ö', "THORN", L'Þ',
148 "Uacute", L'Ú', "Ucirc", L'Û', "Ugrave", L'Ù', "Uuml", L'Ü', "Yacute", L'Ý',
149 "aacute", L'á', "acirc", L'â', "aelig", L'æ', "agrave", L'à', "amp", L'&',
150 "aring", L'å', "atilde", L'ã', "auml", L'ä', "ccedil", L'ç', "eacute", L'é',
151 "ecirc", L'ê', "egrave", L'è', "eth", L'ð', "euml", L'ë', "gt", L'>',
152 "iacute", L'í', "icirc", L'î', "igrave", L'ì', "iuml", L'ï', "lt", L'<',
153 "nbsp", L' ',
154 "ntilde", L'ñ', "oacute", L'ó', "ocirc", L'ô', "ograve", L'ò', "oslash", L'ø',
155 "otilde", L'õ', "ouml", L'ö', "szlig", L'ß', "thorn", L'þ', "uacute", L'ú',
156 "ucirc", L'û', "ugrave", L'ù', "uuml", L'ü', "yacute", L'ý', "yuml", L'ÿ',
157 0
158 };
159
160 int
cistrcmp(char * a,char * b)161 cistrcmp(char *a, char *b)
162 {
163 int c, d;
164
165 for(;; a++, b++){
166 d = tolower(*a);
167 c = d - tolower(*b);
168 if(c)
169 break;
170 if(d == 0)
171 break;
172 }
173 return c;
174 }
175
176 int
readupto(char * buf,int n,char d,char notme)177 readupto(char *buf, int n, char d, char notme)
178 {
179 char *p;
180 int c;
181
182 buf[0] = 0;
183 for(p = buf;; p++){
184 c = Bgetc(&in);
185 if(c < 0){
186 *p = 0;
187 return -1;
188 }
189 if(c == notme){
190 Bungetc(&in);
191 return -1;
192 }
193 if(c == d){
194 *p = 0;
195 return 0;
196 }
197 *p = c;
198 if(p == buf + n){
199 *p = 0;
200 Bprint(&out, "<%s", buf);
201 return -1;
202 }
203 }
204 }
205
206 void
dogoobie(void)207 dogoobie(void)
208 {
209 char *arg, *type;
210 Goobie *g;
211 char buf[1024];
212 int closing;
213
214 if(readupto(buf, sizeof(buf), '>', '<') < 0){
215 Bprint(&out, "<%s", buf);
216 return;
217 }
218 type = buf;
219 if(*type == '/'){
220 type++;
221 closing = 1;
222 } else
223 closing = 0;
224 arg = strchr(type, ' ');
225 if(arg == 0)
226 arg = strchr(type, '\r');
227 if(arg == 0)
228 arg = strchr(type, '\n');
229 if(arg)
230 *arg++ = 0;
231 for(g = gtab; g->name; g++)
232 if(cistrcmp(type, g->name) == 0){
233 if(closing){
234 if(g->ef){
235 (*g->ef)(g, arg);
236 return;
237 }
238 } else {
239 if(g->f){
240 (*g->f)(g, arg);
241 return;
242 }
243 }
244 }
245 if(closing)
246 type--;
247 if(arg)
248 Bprint(&out, "<%s %s>\n", type, arg);
249 else
250 Bprint(&out, "<%s>\n", type);
251 }
252
253 void
main(void)254 main(void)
255 {
256 int c, pos;
257
258 Binit(&in, 0, OREAD);
259 Binit(&out, 1, OWRITE);
260
261 pos = 0;
262 for(;;){
263 c = Bgetc(&in);
264 if(c < 0)
265 return;
266 switch(c){
267 case '<':
268 dogoobie();
269 break;
270 case '&':
271 escape();
272 break;
273 case '\r':
274 pos = 0;
275 break;
276 case '\n':
277 if(quoting){
278 Bputc(&out, '"');
279 quoting = 0;
280 }
281 if(lastc != '\n')
282 Bputc(&out, '\n');
283 /* can't emit leading spaces in filled troff docs */
284 if (!inpre)
285 eatwhite();
286 lastc = c;
287 break;
288 default:
289 ++pos;
290 if(!inpre && isascii(c) && isspace(c) && pos > 80){
291 Bputc(&out, '\n');
292 eatwhite();
293 pos = 0;
294 }else
295 Bputc(&out, c);
296 lastc = c;
297 break;
298 }
299 }
300 }
301
302 void
escape(void)303 escape(void)
304 {
305 int c;
306 Entity *e;
307 char buf[8];
308
309 if(readupto(buf, sizeof(buf), ';', '\n') < 0){
310 Bprint(&out, "&%s", buf);
311 return;
312 }
313 for(e = pl_entity; e->name; e++)
314 if(strcmp(buf, e->name) == 0){
315 Bprint(&out, "%C", e->value);
316 return;
317 }
318 if(*buf == '#'){
319 c = atoi(buf+1);
320 if(isascii(c) && isprint(c)){
321 Bputc(&out, c);
322 return;
323 }
324 }
325 Bprint(&out, "&%s;", buf);
326 }
327
328 /*
329 * whitespace is not significant to HTML, but newlines
330 * and leading spaces are significant to troff.
331 */
332 void
eatwhite(void)333 eatwhite(void)
334 {
335 int c;
336
337 for(;;){
338 c = Bgetc(&in);
339 if(c < 0)
340 break;
341 if(!isspace(c)){
342 Bungetc(&in);
343 break;
344 }
345 }
346 }
347
348 /*
349 * print at start of line
350 */
351 void
printsol(char * fmt,...)352 printsol(char *fmt, ...)
353 {
354 va_list arg;
355
356 if(quoting){
357 Bputc(&out, '"');
358 quoting = 0;
359 }
360 if(lastc != '\n')
361 Bputc(&out, '\n');
362 va_start(arg, fmt);
363 Bvprint(&out, fmt, arg);
364 va_end(arg);
365 lastc = '\n';
366 }
367
368 void
g_ignore(Goobie * g,char * arg)369 g_ignore(Goobie *g, char *arg)
370 {
371 USED(g, arg);
372 }
373
374 void
g_unexpected(Goobie * g,char * arg)375 g_unexpected(Goobie *g, char *arg)
376 {
377 USED(arg);
378 fprint(2, "unexpected %s ending\n", g->name);
379 }
380
381 void
g_title(Goobie * g,char * arg)382 g_title(Goobie *g, char *arg)
383 {
384 USED(arg);
385 printsol(".TL\n", g->name);
386 }
387
388 void
g_p(Goobie * g,char * arg)389 g_p(Goobie *g, char *arg)
390 {
391 USED(arg);
392 printsol(".LP\n", g->name);
393 }
394
395 void
g_h(Goobie * g,char * arg)396 g_h(Goobie *g, char *arg)
397 {
398 USED(arg);
399 printsol(".SH %c\n", g->name[1]);
400 }
401
402 void
g_list(Goobie * g,char * arg)403 g_list(Goobie *g, char *arg)
404 {
405 USED(arg);
406
407 if(lsp != SSIZE){
408 switch(g->name[0]){
409 case 'o':
410 liststack[lsp].type = Lordered;
411 liststack[lsp].ord = 0;
412 break;
413 default:
414 liststack[lsp].type = Lunordered;
415 break;
416 }
417 }
418 lsp++;
419 }
420
421 void
g_br(Goobie * g,char * arg)422 g_br(Goobie *g, char *arg)
423 {
424 USED(g, arg);
425 printsol(".br\n");
426 }
427
428 void
g_li(Goobie * g,char * arg)429 g_li(Goobie *g, char *arg)
430 {
431 USED(g, arg);
432 if(lsp <= 0 || lsp > SSIZE){
433 printsol(".IP \\(bu\n");
434 return;
435 }
436 switch(liststack[lsp-1].type){
437 case Lunordered:
438 printsol(".IP \\(bu\n");
439 break;
440 case Lordered:
441 printsol(".IP %d\n", ++liststack[lsp-1].ord);
442 break;
443 }
444 }
445
446 void
g_listend(Goobie * g,char * arg)447 g_listend(Goobie *g, char *arg)
448 {
449 USED(g, arg);
450 if(--lsp < 0)
451 lsp = 0;
452 printsol(".LP\n");
453 }
454
455 void
g_display(Goobie * g,char * arg)456 g_display(Goobie *g, char *arg)
457 {
458 USED(g, arg);
459 printsol(".DS\n");
460 }
461
462 void
g_pre(Goobie * g,char * arg)463 g_pre(Goobie *g, char *arg)
464 {
465 USED(g, arg);
466 printsol(".DS L\n");
467 inpre = 1;
468 }
469
470 void
g_displayend(Goobie * g,char * arg)471 g_displayend(Goobie *g, char *arg)
472 {
473 USED(g, arg);
474 printsol(".DE\n");
475 inpre = 0;
476 }
477
478 void
g_fpush(Goobie * g,char * arg)479 g_fpush(Goobie *g, char *arg)
480 {
481 USED(arg);
482 if(fsp < SSIZE)
483 fontstack[fsp] = font;
484 fsp++;
485 switch(g->name[0]){
486 case 'b':
487 font = "B";
488 break;
489 case 'i':
490 font = "I";
491 break;
492 case 'k': /* kbd */
493 case 't': /* tt */
494 font = "(CW";
495 break;
496 }
497 Bprint(&out, "\\f%s", font);
498 }
499
500 void
g_fpop(Goobie * g,char * arg)501 g_fpop(Goobie *g, char *arg)
502 {
503 USED(g, arg);
504 fsp--;
505 if(fsp < SSIZE)
506 font = fontstack[fsp];
507 else
508 font = "R";
509
510 Bprint(&out, "\\f%s", font);
511 }
512
513 void
g_indent(Goobie * g,char * arg)514 g_indent(Goobie *g, char *arg)
515 {
516 USED(g, arg);
517 printsol(".RS\n");
518 }
519
520 void
g_exdent(Goobie * g,char * arg)521 g_exdent(Goobie *g, char *arg)
522 {
523 USED(g, arg);
524 printsol(".RE\n");
525 }
526
527 void
g_dt(Goobie * g,char * arg)528 g_dt(Goobie *g, char *arg)
529 {
530 USED(g, arg);
531 printsol(".IP \"");
532 quoting = 1;
533 }
534
535 void
g_hr(Goobie * g,char * arg)536 g_hr(Goobie *g, char *arg)
537 {
538 USED(g, arg);
539 printsol(".br\n");
540 printsol("\\l'5i'\n");
541 }
542
543
544 /*
545 <table border>
546 <caption><font size="+1"><b>Cumulative Class Data</b></font></caption>
547 <tr><th rowspan=2>DOSE<br>mg/kg</th><th colspan=2>PARALYSIS</th><th colspan=2>DEATH</th>
548 </tr>
549 <tr><th width=80>Number</th><th width=80>Percent</th><th width=80>Number</th><th width=80>Percent</th>
550 </tr>
551 <tr align=center>
552 <td>0.1</td><td><br></td> <td><br></td> <td><br></td> <td><br></td>
553 </tr>
554 <tr align=center>
555 <td>0.2</td><td><br></td> <td><br></td> <td><br></td> <td><br></td>
556 </tr>
557 <tr align=center>
558 <td>0.3</td><td><br></td> <td><br></td> <td><br></td> <td><br></td>
559 </tr>
560 <tr align=center>
561 <td>0.4</td><td><br></td> <td><br></td> <td><br></td> <td><br></td>
562 </tr>
563 <tr align=center>
564 <td>0.5</td><td><br></td> <td><br></td> <td><br></td> <td><br></td>
565 </tr>
566 <tr align=center>
567 <td>0.6</td><td><br></td> <td><br></td> <td><br></td> <td><br></td>
568 </tr>
569 <tr align=center>
570 <td>0.7</td><td><br></td> <td><br></td> <td><br></td> <td><br></td>
571 </tr>
572 <tr align=center>
573 <td>0.8</td><td><br></td> <td><br></td> <td><br></td> <td><br></td>
574 </tr>
575 <tr align=center>
576 <td>0.8 oral</td><td><br></td> <td><br></td> <td><br></td> <td><br></td>
577 </tr>
578 </table>
579 */
580
581 void
g_table(Goobie * g,char * arg)582 g_table(Goobie *g, char *arg)
583 {
584 USED(g, arg);
585 printsol(".TS\ncenter ;\n");
586 }
587
588 void
g_tableend(Goobie * g,char * arg)589 g_tableend(Goobie *g, char *arg)
590 {
591 USED(g, arg);
592 printsol(".TE\n");
593 }
594
595 void
g_caption(Goobie * g,char * arg)596 g_caption(Goobie *g, char *arg)
597 {
598 USED(g, arg);
599 }
600
601 void
g_captionend(Goobie * g,char * arg)602 g_captionend(Goobie *g, char *arg)
603 {
604 USED(g, arg);
605 }
606