xref: /netbsd-src/external/gpl2/groff/dist/src/preproc/tbl/main.cpp (revision 89a07cf815a29524268025a1139fac4c5190f765)
1 /*	$NetBSD: main.cpp,v 1.1.1.1 2016/01/13 18:41:48 christos Exp $	*/
2 
3 // -*- C++ -*-
4 /* Copyright (C) 1989, 1990, 1991, 1992, 2000, 2001, 2002, 2003, 2004, 2005
5    Free Software Foundation, Inc.
6      Written by James Clark (jjc@jclark.com)
7 
8 This file is part of groff.
9 
10 groff is free software; you can redistribute it and/or modify it under
11 the terms of the GNU General Public License as published by the Free
12 Software Foundation; either version 2, or (at your option) any later
13 version.
14 
15 groff is distributed in the hope that it will be useful, but WITHOUT ANY
16 WARRANTY; without even the implied warranty of MERCHANTABILITY or
17 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
18 for more details.
19 
20 You should have received a copy of the GNU General Public License along
21 with groff; see the file COPYING.  If not, write to the Free Software
22 Foundation, 51 Franklin St - Fifth Floor, Boston, MA 02110-1301, USA. */
23 
24 #include "table.h"
25 
26 #define MAX_POINT_SIZE 99
27 #define MAX_VERTICAL_SPACING 72
28 
29 extern "C" const char *Version_string;
30 
31 int compatible_flag = 0;
32 
33 class table_input {
34   FILE *fp;
35   enum { START, MIDDLE,
36 	 REREAD_T, REREAD_TE, REREAD_E,
37 	 LEADER_1, LEADER_2, LEADER_3, LEADER_4,
38 	 END, ERROR } state;
39   string unget_stack;
40 public:
41   table_input(FILE *);
42   int get();
ended()43   int ended() { return unget_stack.empty() && state == END; }
44   void unget(char);
45 };
46 
table_input(FILE * p)47 table_input::table_input(FILE *p)
48 : fp(p), state(START)
49 {
50 }
51 
unget(char c)52 void table_input::unget(char c)
53 {
54   assert(c != '\0');
55   unget_stack += c;
56   if (c == '\n')
57     current_lineno--;
58 }
59 
get()60 int table_input::get()
61 {
62   int len = unget_stack.length();
63   if (len != 0) {
64     unsigned char c = unget_stack[len - 1];
65     unget_stack.set_length(len - 1);
66     if (c == '\n')
67       current_lineno++;
68     return c;
69   }
70   int c;
71   for (;;) {
72     switch (state) {
73     case START:
74       if ((c = getc(fp)) == '.') {
75 	if ((c = getc(fp)) == 'T') {
76 	  if ((c = getc(fp)) == 'E') {
77 	    if (compatible_flag) {
78 	      state = END;
79 	      return EOF;
80 	    }
81 	    else {
82 	      c = getc(fp);
83 	      if (c != EOF)
84 		ungetc(c, fp);
85 	      if (c == EOF || c == ' ' || c == '\n') {
86 		state = END;
87 		return EOF;
88 	      }
89 	      state = REREAD_TE;
90 	      return '.';
91 	    }
92 	  }
93 	  else {
94 	    if (c != EOF)
95 	      ungetc(c, fp);
96 	    state = REREAD_T;
97 	    return '.';
98 	  }
99 	}
100 	else {
101 	  if (c != EOF)
102 	    ungetc(c, fp);
103 	  state = MIDDLE;
104 	  return '.';
105 	}
106       }
107       else if (c == EOF) {
108 	state = ERROR;
109 	return EOF;
110       }
111       else {
112 	if (c == '\n')
113 	  current_lineno++;
114 	else {
115 	  state = MIDDLE;
116 	  if (c == '\0') {
117 	    error("invalid input character code 0");
118 	    break;
119 	  }
120 	}
121 	return c;
122       }
123       break;
124     case MIDDLE:
125       // handle line continuation and uninterpreted leader character
126       if ((c = getc(fp)) == '\\') {
127 	c = getc(fp);
128 	if (c == '\n')
129 	  c = getc(fp);		// perhaps state ought to be START now
130 	else if (c == 'a' && compatible_flag) {
131 	  state = LEADER_1;
132 	  return '\\';
133 	}
134 	else {
135 	  if (c != EOF)
136 	    ungetc(c, fp);
137 	  c = '\\';
138 	}
139       }
140       if (c == EOF) {
141 	state = ERROR;
142 	return EOF;
143       }
144       else {
145 	if (c == '\n') {
146 	  state = START;
147 	  current_lineno++;
148 	}
149 	else if (c == '\0') {
150 	  error("invalid input character code 0");
151 	  break;
152 	}
153 	return c;
154       }
155     case REREAD_T:
156       state = MIDDLE;
157       return 'T';
158     case REREAD_TE:
159       state = REREAD_E;
160       return 'T';
161     case REREAD_E:
162       state = MIDDLE;
163       return 'E';
164     case LEADER_1:
165       state = LEADER_2;
166       return '*';
167     case LEADER_2:
168       state = LEADER_3;
169       return '(';
170     case LEADER_3:
171       state = LEADER_4;
172       return PREFIX_CHAR;
173     case LEADER_4:
174       state = MIDDLE;
175       return LEADER_CHAR;
176     case END:
177     case ERROR:
178       return EOF;
179     }
180   }
181 }
182 
183 void process_input_file(FILE *);
184 void process_table(table_input &in);
185 
process_input_file(FILE * fp)186 void process_input_file(FILE *fp)
187 {
188   enum { START, MIDDLE, HAD_DOT, HAD_T, HAD_TS, HAD_l, HAD_lf } state;
189   state = START;
190   int c;
191   while ((c = getc(fp)) != EOF)
192     switch (state) {
193     case START:
194       if (c == '.')
195 	state = HAD_DOT;
196       else {
197 	if (c == '\n')
198 	  current_lineno++;
199 	else
200 	  state = MIDDLE;
201 	putchar(c);
202       }
203       break;
204     case MIDDLE:
205       if (c == '\n') {
206 	current_lineno++;
207 	state = START;
208       }
209       putchar(c);
210       break;
211     case HAD_DOT:
212       if (c == 'T')
213 	state = HAD_T;
214       else if (c == 'l')
215 	state = HAD_l;
216       else {
217 	putchar('.');
218 	putchar(c);
219 	if (c == '\n') {
220 	  current_lineno++;
221 	  state = START;
222 	}
223 	else
224 	  state = MIDDLE;
225       }
226       break;
227     case HAD_T:
228       if (c == 'S')
229 	state = HAD_TS;
230       else {
231 	putchar('.');
232 	putchar('T');
233 	putchar(c);
234 	if (c == '\n') {
235  	  current_lineno++;
236 	  state = START;
237 	}
238 	else
239 	  state = MIDDLE;
240       }
241       break;
242     case HAD_TS:
243       if (c == ' ' || c == '\n' || compatible_flag) {
244 	putchar('.');
245 	putchar('T');
246 	putchar('S');
247 	while (c != '\n') {
248 	  if (c == EOF) {
249 	    error("end of file at beginning of table");
250 	    return;
251 	  }
252 	  putchar(c);
253 	  c = getc(fp);
254 	}
255 	putchar('\n');
256 	current_lineno++;
257 	{
258 	  table_input input(fp);
259 	  process_table(input);
260 	  set_troff_location(current_filename, current_lineno);
261 	  if (input.ended()) {
262 	    fputs(".TE", stdout);
263 	    while ((c = getc(fp)) != '\n') {
264 	      if (c == EOF) {
265 		putchar('\n');
266 		return;
267 	      }
268 	      putchar(c);
269 	    }
270 	    putchar('\n');
271 	    current_lineno++;
272 	  }
273 	}
274 	state = START;
275       }
276       else {
277 	fputs(".TS", stdout);
278 	putchar(c);
279 	state = MIDDLE;
280       }
281       break;
282     case HAD_l:
283       if (c == 'f')
284 	state = HAD_lf;
285       else {
286 	putchar('.');
287 	putchar('l');
288 	putchar(c);
289 	if (c == '\n') {
290  	  current_lineno++;
291 	  state = START;
292 	}
293 	else
294 	  state = MIDDLE;
295       }
296       break;
297     case HAD_lf:
298       if (c == ' ' || c == '\n' || compatible_flag) {
299 	string line;
300 	while (c != EOF) {
301 	  line += c;
302 	  if (c == '\n') {
303 	    current_lineno++;
304 	    break;
305 	  }
306 	  c = getc(fp);
307 	}
308 	line += '\0';
309 	interpret_lf_args(line.contents());
310 	printf(".lf%s", line.contents());
311 	state = START;
312       }
313       else {
314 	fputs(".lf", stdout);
315 	putchar(c);
316 	state = MIDDLE;
317       }
318       break;
319     default:
320       assert(0);
321     }
322   switch(state) {
323   case START:
324     break;
325   case MIDDLE:
326     putchar('\n');
327     break;
328   case HAD_DOT:
329     fputs(".\n", stdout);
330     break;
331   case HAD_l:
332     fputs(".l\n", stdout);
333     break;
334   case HAD_T:
335     fputs(".T\n", stdout);
336     break;
337   case HAD_lf:
338     fputs(".lf\n", stdout);
339     break;
340   case HAD_TS:
341     fputs(".TS\n", stdout);
342     break;
343   }
344   if (fp != stdin)
345     fclose(fp);
346 }
347 
348 struct options {
349   unsigned flags;
350   int linesize;
351   char delim[2];
352   char tab_char;
353   char decimal_point_char;
354 
355   options();
356 };
357 
options()358 options::options()
359 : flags(0), linesize(0), tab_char('\t'), decimal_point_char('.')
360 {
361   delim[0] = delim[1] = '\0';
362 }
363 
364 // Return non-zero if p and q are the same ignoring case.
365 
strieq(const char * p,const char * q)366 int strieq(const char *p, const char *q)
367 {
368   for (; cmlower(*p) == cmlower(*q); p++, q++)
369     if (*p == '\0')
370       return 1;
371   return 0;
372 }
373 
374 // return 0 if we should give up in this table
375 
process_options(table_input & in)376 options *process_options(table_input &in)
377 {
378   options *opt = new options;
379   string line;
380   int level = 0;
381   for (;;) {
382     int c = in.get();
383     if (c == EOF) {
384       int i = line.length();
385       while (--i >= 0)
386 	in.unget(line[i]);
387       return opt;
388     }
389     if (c == '\n') {
390       in.unget(c);
391       int i = line.length();
392       while (--i >= 0)
393 	in.unget(line[i]);
394       return opt;
395     }
396     else if (c == '(')
397       level++;
398     else if (c == ')')
399       level--;
400     else if (c == ';' && level == 0) {
401       line += '\0';
402       break;
403     }
404     line += c;
405   }
406   if (line.empty())
407     return opt;
408   char *p = &line[0];
409   for (;;) {
410     while (!csalpha(*p) && *p != '\0')
411       p++;
412     if (*p == '\0')
413       break;
414     char *q = p;
415     while (csalpha(*q))
416       q++;
417     char *arg = 0;
418     if (*q != '(' && *q != '\0')
419       *q++ = '\0';
420     while (csspace(*q))
421       q++;
422     if (*q == '(') {
423       *q++ = '\0';
424       arg = q;
425       while (*q != ')' && *q != '\0')
426 	q++;
427       if (*q == '\0')
428 	error("missing `)'");
429       else
430 	*q++ = '\0';
431     }
432     if (*p == '\0') {
433       if (arg)
434 	error("argument without option");
435     }
436     else if (strieq(p, "tab")) {
437       if (!arg)
438 	error("`tab' option requires argument in parentheses");
439       else {
440 	if (arg[0] == '\0' || arg[1] != '\0')
441 	  error("argument to `tab' option must be a single character");
442 	else
443 	  opt->tab_char = arg[0];
444       }
445     }
446     else if (strieq(p, "linesize")) {
447       if (!arg)
448 	error("`linesize' option requires argument in parentheses");
449       else {
450 	if (sscanf(arg, "%d", &opt->linesize) != 1)
451 	  error("bad linesize `%s'", arg);
452 	else if (opt->linesize <= 0) {
453 	  error("linesize must be positive");
454 	  opt->linesize = 0;
455 	}
456       }
457     }
458     else if (strieq(p, "delim")) {
459       if (!arg)
460 	error("`delim' option requires argument in parentheses");
461       else if (arg[0] == '\0' || arg[1] == '\0' || arg[2] != '\0')
462 	error("argument to `delim' option must be two characters");
463       else {
464 	opt->delim[0] = arg[0];
465 	opt->delim[1] = arg[1];
466       }
467     }
468     else if (strieq(p, "center") || strieq(p, "centre")) {
469       if (arg)
470 	error("`center' option does not take an argument");
471       opt->flags |= table::CENTER;
472     }
473     else if (strieq(p, "expand")) {
474       if (arg)
475 	error("`expand' option does not take an argument");
476       opt->flags |= table::EXPAND;
477     }
478     else if (strieq(p, "box") || strieq(p, "frame")) {
479       if (arg)
480 	error("`box' option does not take an argument");
481       opt->flags |= table::BOX;
482     }
483     else if (strieq(p, "doublebox") || strieq(p, "doubleframe")) {
484       if (arg)
485 	error("`doublebox' option does not take an argument");
486       opt->flags |= table::DOUBLEBOX;
487     }
488     else if (strieq(p, "allbox")) {
489       if (arg)
490 	error("`allbox' option does not take an argument");
491       opt->flags |= table::ALLBOX;
492     }
493     else if (strieq(p, "nokeep")) {
494       if (arg)
495 	error("`nokeep' option does not take an argument");
496       opt->flags |= table::NOKEEP;
497     }
498     else if (strieq(p, "nospaces")) {
499       if (arg)
500 	error("`nospaces' option does not take an argument");
501       opt->flags |= table::NOSPACES;
502     }
503     else if (strieq(p, "decimalpoint")) {
504       if (!arg)
505 	error("`decimalpoint' option requires argument in parentheses");
506       else {
507 	if (arg[0] == '\0' || arg[1] != '\0')
508 	  error("argument to `decimalpoint' option must be a single character");
509 	else
510 	  opt->decimal_point_char = arg[0];
511       }
512     }
513     else {
514       error("unrecognised global option `%1'", p);
515       // delete opt;
516       // return 0;
517     }
518     p = q;
519   }
520   return opt;
521 }
522 
entry_modifier()523 entry_modifier::entry_modifier()
524 : vertical_alignment(CENTER), zero_width(0), stagger(0)
525 {
526   vertical_spacing.inc = vertical_spacing.val = 0;
527   point_size.inc = point_size.val = 0;
528 }
529 
~entry_modifier()530 entry_modifier::~entry_modifier()
531 {
532 }
533 
entry_format()534 entry_format::entry_format() : type(FORMAT_LEFT)
535 {
536 }
537 
entry_format(format_type t)538 entry_format::entry_format(format_type t) : type(t)
539 {
540 }
541 
debug_print() const542 void entry_format::debug_print() const
543 {
544   switch (type) {
545   case FORMAT_LEFT:
546     putc('l', stderr);
547     break;
548   case FORMAT_CENTER:
549     putc('c', stderr);
550     break;
551   case FORMAT_RIGHT:
552     putc('r', stderr);
553     break;
554   case FORMAT_NUMERIC:
555     putc('n', stderr);
556     break;
557   case FORMAT_ALPHABETIC:
558     putc('a', stderr);
559     break;
560   case FORMAT_SPAN:
561     putc('s', stderr);
562     break;
563   case FORMAT_VSPAN:
564     putc('^', stderr);
565     break;
566   case FORMAT_HLINE:
567     putc('_', stderr);
568     break;
569   case FORMAT_DOUBLE_HLINE:
570     putc('=', stderr);
571     break;
572   default:
573     assert(0);
574     break;
575   }
576   if (point_size.val != 0) {
577     putc('p', stderr);
578     if (point_size.inc > 0)
579       putc('+', stderr);
580     else if (point_size.inc < 0)
581       putc('-', stderr);
582     fprintf(stderr, "%d ", point_size.val);
583   }
584   if (vertical_spacing.val != 0) {
585     putc('v', stderr);
586     if (vertical_spacing.inc > 0)
587       putc('+', stderr);
588     else if (vertical_spacing.inc < 0)
589       putc('-', stderr);
590     fprintf(stderr, "%d ", vertical_spacing.val);
591   }
592   if (!font.empty()) {
593     putc('f', stderr);
594     put_string(font, stderr);
595     putc(' ', stderr);
596   }
597   if (!macro.empty()) {
598     putc('m', stderr);
599     put_string(macro, stderr);
600     putc(' ', stderr);
601   }
602   switch (vertical_alignment) {
603   case entry_modifier::CENTER:
604     break;
605   case entry_modifier::TOP:
606     putc('t', stderr);
607     break;
608   case entry_modifier::BOTTOM:
609     putc('d', stderr);
610     break;
611   }
612   if (zero_width)
613     putc('z', stderr);
614   if (stagger)
615     putc('u', stderr);
616 }
617 
618 struct format {
619   int nrows;
620   int ncolumns;
621   int *separation;
622   string *width;
623   char *equal;
624   entry_format **entry;
625   char **vline;
626 
627   format(int nr, int nc);
628   ~format();
629   void add_rows(int n);
630 };
631 
format(int nr,int nc)632 format::format(int nr, int nc) : nrows(nr), ncolumns(nc)
633 {
634   int i;
635   separation = ncolumns > 1 ? new int[ncolumns - 1] : 0;
636   for (i = 0; i < ncolumns-1; i++)
637     separation[i] = -1;
638   width = new string[ncolumns];
639   equal = new char[ncolumns];
640   for (i = 0; i < ncolumns; i++)
641     equal[i] = 0;
642   entry = new entry_format *[nrows];
643   for (i = 0; i < nrows; i++)
644     entry[i] = new entry_format[ncolumns];
645   vline = new char*[nrows];
646   for (i = 0; i < nrows; i++) {
647     vline[i] = new char[ncolumns+1];
648     for (int j = 0; j < ncolumns+1; j++)
649       vline[i][j] = 0;
650   }
651 }
652 
add_rows(int n)653 void format::add_rows(int n)
654 {
655   int i;
656   char **old_vline = vline;
657   vline = new char*[nrows + n];
658   for (i = 0; i < nrows; i++)
659     vline[i] = old_vline[i];
660   a_delete old_vline;
661   for (i = 0; i < n; i++) {
662     vline[nrows + i] = new char[ncolumns + 1];
663     for (int j = 0; j < ncolumns + 1; j++)
664       vline[nrows + i][j] = 0;
665   }
666   entry_format **old_entry = entry;
667   entry = new entry_format *[nrows + n];
668   for (i = 0; i < nrows; i++)
669     entry[i] = old_entry[i];
670   a_delete old_entry;
671   for (i = 0; i < n; i++)
672     entry[nrows + i] = new entry_format[ncolumns];
673   nrows += n;
674 }
675 
~format()676 format::~format()
677 {
678   a_delete separation;
679   ad_delete(ncolumns) width;
680   a_delete equal;
681   for (int i = 0; i < nrows; i++) {
682     a_delete vline[i];
683     ad_delete(ncolumns) entry[i];
684   }
685   a_delete vline;
686   a_delete entry;
687 }
688 
689 struct input_entry_format : public entry_format {
690   input_entry_format *next;
691   string width;
692   int separation;
693   int vline;
694   int pre_vline;
695   int last_column;
696   int equal;
697   input_entry_format(format_type, input_entry_format * = 0);
698   ~input_entry_format();
699   void debug_print();
700 };
701 
input_entry_format(format_type t,input_entry_format * p)702 input_entry_format::input_entry_format(format_type t, input_entry_format *p)
703 : entry_format(t), next(p)
704 {
705   separation = -1;
706   last_column = 0;
707   vline = 0;
708   pre_vline = 0;
709   equal = 0;
710 }
711 
~input_entry_format()712 input_entry_format::~input_entry_format()
713 {
714 }
715 
free_input_entry_format_list(input_entry_format * list)716 void free_input_entry_format_list(input_entry_format *list)
717 {
718   while (list) {
719     input_entry_format *tem = list;
720     list = list->next;
721     delete tem;
722   }
723 }
724 
debug_print()725 void input_entry_format::debug_print()
726 {
727   int i;
728   for (i = 0; i < pre_vline; i++)
729     putc('|', stderr);
730   entry_format::debug_print();
731   if (!width.empty()) {
732     putc('w', stderr);
733     putc('(', stderr);
734     put_string(width, stderr);
735     putc(')', stderr);
736   }
737   if (equal)
738     putc('e', stderr);
739   if (separation >= 0)
740     fprintf(stderr, "%d", separation);
741   for (i = 0; i < vline; i++)
742     putc('|', stderr);
743   if (last_column)
744     putc(',', stderr);
745 }
746 
747 // Return zero if we should give up on this table.
748 // If this is a continuation format line, current_format will be the current
749 // format line.
750 
process_format(table_input & in,options * opt,format * current_format=0)751 format *process_format(table_input &in, options *opt,
752 		       format *current_format = 0)
753 {
754   input_entry_format *list = 0;
755   int c = in.get();
756   for (;;) {
757     int pre_vline = 0;
758     int got_format = 0;
759     int got_period = 0;
760     format_type t = FORMAT_LEFT;
761     for (;;) {
762       if (c == EOF) {
763 	error("end of input while processing format");
764 	free_input_entry_format_list(list);
765 	return 0;
766       }
767       switch (c) {
768       case 'n':
769       case 'N':
770 	t = FORMAT_NUMERIC;
771 	got_format = 1;
772 	break;
773       case 'a':
774       case 'A':
775 	got_format = 1;
776 	t = FORMAT_ALPHABETIC;
777 	break;
778       case 'c':
779       case 'C':
780 	got_format = 1;
781 	t = FORMAT_CENTER;
782 	break;
783       case 'l':
784       case 'L':
785 	got_format = 1;
786 	t = FORMAT_LEFT;
787 	break;
788       case 'r':
789       case 'R':
790 	got_format = 1;
791 	t = FORMAT_RIGHT;
792 	break;
793       case 's':
794       case 'S':
795 	got_format = 1;
796 	t = FORMAT_SPAN;
797 	break;
798       case '^':
799 	got_format = 1;
800 	t = FORMAT_VSPAN;
801 	break;
802       case '_':
803       case '-':			// tbl also accepts this
804 	got_format = 1;
805 	t = FORMAT_HLINE;
806 	break;
807       case '=':
808 	got_format = 1;
809 	t = FORMAT_DOUBLE_HLINE;
810 	break;
811       case '.':
812 	got_period = 1;
813 	break;
814       case '|':
815 	pre_vline++;
816 	break;
817       case ' ':
818       case '\t':
819       case '\n':
820 	break;
821       default:
822 	if (c == opt->tab_char)
823 	  break;
824 	error("unrecognised format `%1'", char(c));
825 	free_input_entry_format_list(list);
826 	return 0;
827       }
828       if (got_period)
829 	break;
830       c = in.get();
831       if (got_format)
832 	break;
833     }
834     if (got_period)
835       break;
836     list = new input_entry_format(t, list);
837     if (pre_vline)
838       list->pre_vline = pre_vline;
839     int success = 1;
840     do {
841       switch (c) {
842       case 't':
843       case 'T':
844 	c = in.get();
845 	list->vertical_alignment = entry_modifier::TOP;
846 	break;
847       case 'd':
848       case 'D':
849 	c = in.get();
850 	list->vertical_alignment = entry_modifier::BOTTOM;
851 	break;
852       case 'u':
853       case 'U':
854 	c = in.get();
855 	list->stagger = 1;
856 	break;
857       case 'z':
858       case 'Z':
859 	c = in.get();
860 	list->zero_width = 1;
861 	break;
862       case '0':
863       case '1':
864       case '2':
865       case '3':
866       case '4':
867       case '5':
868       case '6':
869       case '7':
870       case '8':
871       case '9':
872 	{
873 	  int w = 0;
874 	  do {
875 	    w = w*10 + (c - '0');
876 	    c = in.get();
877 	  } while (c != EOF && csdigit(c));
878 	  list->separation = w;
879 	}
880 	break;
881       case 'f':
882       case 'F':
883 	do {
884 	  c = in.get();
885 	} while (c == ' ' || c == '\t');
886 	if (c == EOF) {
887 	  error("missing font name");
888 	  break;
889 	}
890 	if (c == '(') {
891 	  for (;;) {
892 	    c = in.get();
893 	    if (c == EOF || c == ' ' || c == '\t') {
894 	      error("missing `)'");
895 	      break;
896 	    }
897 	    if (c == ')') {
898 	      c = in.get();
899 	      break;
900 	    }
901 	    list->font += char(c);
902 	  }
903 	}
904 	else {
905 	  list->font = c;
906 	  char cc = c;
907 	  c = in.get();
908 	  if (!csdigit(cc)
909 	      && c != EOF && c != ' ' && c != '\t' && c != '.' && c != '\n') {
910 	    list->font += char(c);
911 	    c = in.get();
912 	  }
913 	}
914 	break;
915       case 'x':
916       case 'X':
917 	do {
918 	  c = in.get();
919 	} while (c == ' ' || c == '\t');
920 	if (c == EOF) {
921 	  error("missing macro name");
922 	  break;
923 	}
924 	if (c == '(') {
925 	  for (;;) {
926 	    c = in.get();
927 	    if (c == EOF || c == ' ' || c == '\t') {
928 	      error("missing `)'");
929 	      break;
930 	    }
931 	    if (c == ')') {
932 	      c = in.get();
933 	      break;
934 	    }
935 	    list->macro += char(c);
936 	  }
937 	}
938 	else {
939 	  list->macro = c;
940 	  char cc = c;
941 	  c = in.get();
942 	  if (!csdigit(cc)
943 	      && c != EOF && c != ' ' && c != '\t' && c != '.' && c != '\n') {
944 	    list->macro += char(c);
945 	    c = in.get();
946 	  }
947 	}
948 	break;
949       case 'v':
950       case 'V':
951 	c = in.get();
952 	list->vertical_spacing.val = 0;
953 	list->vertical_spacing.inc = 0;
954 	if (c == '+' || c == '-') {
955 	  list->vertical_spacing.inc = (c == '+' ? 1 : -1);
956 	  c = in.get();
957 	}
958 	if (c == EOF || !csdigit(c)) {
959 	  error("`v' modifier must be followed by number");
960 	  list->vertical_spacing.inc = 0;
961 	}
962 	else {
963 	  do {
964 	    list->vertical_spacing.val *= 10;
965 	    list->vertical_spacing.val += c - '0';
966 	    c = in.get();
967 	  } while (c != EOF && csdigit(c));
968 	}
969 	if (list->vertical_spacing.val > MAX_VERTICAL_SPACING
970 	    || list->vertical_spacing.val < -MAX_VERTICAL_SPACING) {
971 	  error("unreasonable vertical spacing");
972 	  list->vertical_spacing.val = 0;
973 	  list->vertical_spacing.inc = 0;
974 	}
975 	break;
976       case 'p':
977       case 'P':
978 	c = in.get();
979 	list->point_size.val = 0;
980 	list->point_size.inc = 0;
981 	if (c == '+' || c == '-') {
982 	  list->point_size.inc = (c == '+' ? 1 : -1);
983 	  c = in.get();
984 	}
985 	if (c == EOF || !csdigit(c)) {
986 	  error("`p' modifier must be followed by number");
987 	  list->point_size.inc = 0;
988 	}
989 	else {
990 	  do {
991 	    list->point_size.val *= 10;
992 	    list->point_size.val += c - '0';
993 	    c = in.get();
994 	  } while (c != EOF && csdigit(c));
995 	}
996 	if (list->point_size.val > MAX_POINT_SIZE
997 	    || list->point_size.val < -MAX_POINT_SIZE) {
998 	  error("unreasonable point size");
999 	  list->point_size.val = 0;
1000 	  list->point_size.inc = 0;
1001 	}
1002 	break;
1003       case 'w':
1004       case 'W':
1005 	c = in.get();
1006 	while (c == ' ' || c == '\t')
1007 	  c = in.get();
1008 	if (c == '(') {
1009 	  list->width = "";
1010 	  c = in.get();
1011 	  while (c != ')') {
1012 	    if (c == EOF || c == '\n') {
1013 	      error("missing `)'");
1014 	      free_input_entry_format_list(list);
1015 	      return 0;
1016 	    }
1017 	    list->width += c;
1018 	    c = in.get();
1019 	  }
1020 	  c = in.get();
1021 	}
1022 	else {
1023 	  if (c == '+' || c == '-') {
1024 	    list->width = char(c);
1025 	    c = in.get();
1026 	  }
1027 	  else
1028 	    list->width = "";
1029 	  if (c == EOF || !csdigit(c))
1030 	    error("bad argument for `w' modifier");
1031 	  else {
1032 	    do {
1033 	      list->width += char(c);
1034 	      c = in.get();
1035 	    } while (c != EOF && csdigit(c));
1036 	  }
1037 	}
1038 	break;
1039       case 'e':
1040       case 'E':
1041 	c = in.get();
1042 	list->equal++;
1043 	break;
1044       case '|':
1045 	c = in.get();
1046 	list->vline++;
1047 	break;
1048       case 'B':
1049       case 'b':
1050 	c = in.get();
1051 	list->font = "B";
1052 	break;
1053       case 'I':
1054       case 'i':
1055 	c = in.get();
1056 	list->font = "I";
1057 	break;
1058       case ' ':
1059       case '\t':
1060 	c = in.get();
1061 	break;
1062       default:
1063 	if (c == opt->tab_char)
1064 	  c = in.get();
1065 	else
1066 	  success = 0;
1067 	break;
1068       }
1069     } while (success);
1070     if (list->vline > 2) {
1071       list->vline = 2;
1072       error("more than 2 vertical bars between key letters");
1073     }
1074     if (c == '\n' || c == ',') {
1075       c = in.get();
1076       list->last_column = 1;
1077     }
1078   }
1079   if (c == '.') {
1080     do {
1081       c = in.get();
1082     } while (c == ' ' || c == '\t');
1083     if (c != '\n') {
1084       error("`.' not last character on line");
1085       free_input_entry_format_list(list);
1086       return 0;
1087     }
1088   }
1089   if (!list) {
1090     error("no format");
1091     free_input_entry_format_list(list);
1092     return 0;
1093   }
1094   list->last_column = 1;
1095   // now reverse the list so that the first row is at the beginning
1096   input_entry_format *rev = 0;
1097   while (list != 0) {
1098     input_entry_format *tem = list->next;
1099     list->next = rev;
1100     rev = list;
1101     list = tem;
1102   }
1103   list = rev;
1104   input_entry_format *tem;
1105 
1106 #if 0
1107   for (tem = list; tem; tem = tem->next)
1108     tem->debug_print();
1109   putc('\n', stderr);
1110 #endif
1111   // compute number of columns and rows
1112   int ncolumns = 0;
1113   int nrows = 0;
1114   int col = 0;
1115   for (tem = list; tem; tem = tem->next) {
1116     if (tem->last_column) {
1117       if (col >= ncolumns)
1118 	ncolumns = col + 1;
1119       col = 0;
1120       nrows++;
1121     }
1122     else
1123       col++;
1124   }
1125   int row;
1126   format *f;
1127   if (current_format) {
1128     if (ncolumns > current_format->ncolumns) {
1129       error("cannot increase the number of columns in a continued format");
1130       free_input_entry_format_list(list);
1131       return 0;
1132     }
1133     f = current_format;
1134     row = f->nrows;
1135     f->add_rows(nrows);
1136   }
1137   else {
1138     f = new format(nrows, ncolumns);
1139     row = 0;
1140   }
1141   col = 0;
1142   for (tem = list; tem; tem = tem->next) {
1143     f->entry[row][col] = *tem;
1144     if (col < ncolumns-1) {
1145       // use the greatest separation
1146       if (tem->separation > f->separation[col]) {
1147 	if (current_format)
1148 	  error("cannot change column separation in continued format");
1149 	else
1150 	  f->separation[col] = tem->separation;
1151       }
1152     }
1153     else if (tem->separation >= 0)
1154       error("column separation specified for last column");
1155     if (tem->equal && !f->equal[col]) {
1156       if (current_format)
1157 	error("cannot change which columns are equal in continued format");
1158       else
1159 	f->equal[col] = 1;
1160     }
1161     if (!tem->width.empty()) {
1162       // use the last width
1163       if (!f->width[col].empty() && f->width[col] != tem->width)
1164 	error("multiple widths for column %1", col+1);
1165       f->width[col] = tem->width;
1166     }
1167     if (tem->pre_vline) {
1168       assert(col == 0);
1169       f->vline[row][col] = tem->pre_vline;
1170     }
1171     f->vline[row][col+1] = tem->vline;
1172     if (tem->last_column) {
1173       row++;
1174       col = 0;
1175     }
1176     else
1177       col++;
1178   }
1179   free_input_entry_format_list(list);
1180   for (col = 0; col < ncolumns; col++) {
1181     entry_format *e = f->entry[f->nrows-1] + col;
1182     if (e->type != FORMAT_HLINE
1183 	&& e->type != FORMAT_DOUBLE_HLINE
1184 	&& e->type != FORMAT_SPAN)
1185       break;
1186   }
1187   if (col >= ncolumns) {
1188     error("last row of format is all lines");
1189     delete f;
1190     return 0;
1191   }
1192   return f;
1193 }
1194 
process_data(table_input & in,format * f,options * opt)1195 table *process_data(table_input &in, format *f, options *opt)
1196 {
1197   char tab_char = opt->tab_char;
1198   int ncolumns = f->ncolumns;
1199   int current_row = 0;
1200   int format_index = 0;
1201   int give_up = 0;
1202   enum { DATA_INPUT_LINE, TROFF_INPUT_LINE, SINGLE_HLINE, DOUBLE_HLINE } type;
1203   table *tbl = new table(ncolumns, opt->flags, opt->linesize,
1204 			 opt->decimal_point_char);
1205   if (opt->delim[0] != '\0')
1206     tbl->set_delim(opt->delim[0], opt->delim[1]);
1207   for (;;) {
1208     // first determine what type of line this is
1209     int c = in.get();
1210     if (c == EOF)
1211       break;
1212     if (c == '.') {
1213       int d = in.get();
1214       if (d != EOF && csdigit(d)) {
1215 	in.unget(d);
1216 	type = DATA_INPUT_LINE;
1217       }
1218       else {
1219 	in.unget(d);
1220 	type = TROFF_INPUT_LINE;
1221       }
1222     }
1223     else if (c == '_' || c == '=') {
1224       int d = in.get();
1225       if (d == '\n') {
1226 	if (c == '_')
1227 	  type = SINGLE_HLINE;
1228 	else
1229 	  type = DOUBLE_HLINE;
1230       }
1231       else {
1232 	in.unget(d);
1233 	type = DATA_INPUT_LINE;
1234       }
1235     }
1236     else {
1237       type = DATA_INPUT_LINE;
1238     }
1239     switch (type) {
1240     case DATA_INPUT_LINE:
1241       {
1242 	string input_entry;
1243 	if (format_index >= f->nrows)
1244 	  format_index = f->nrows - 1;
1245 	// A format row that is all lines doesn't use up a data line.
1246 	while (format_index < f->nrows - 1) {
1247 	  int cnt;
1248 	  for (cnt = 0; cnt < ncolumns; cnt++) {
1249 	    entry_format *e = f->entry[format_index] + cnt;
1250 	    if (e->type != FORMAT_HLINE
1251 		&& e->type != FORMAT_DOUBLE_HLINE
1252 		// Unfortunately tbl treats a span as needing data.
1253 		// && e->type != FORMAT_SPAN
1254 		)
1255 	      break;
1256 	  }
1257 	  if (cnt < ncolumns)
1258 	    break;
1259 	  for (cnt = 0; cnt < ncolumns; cnt++)
1260 	    tbl->add_entry(current_row, cnt, input_entry,
1261 			   f->entry[format_index] + cnt, current_filename,
1262 			   current_lineno);
1263 	  tbl->add_vlines(current_row, f->vline[format_index]);
1264 	  format_index++;
1265 	  current_row++;
1266 	}
1267 	entry_format *line_format = f->entry[format_index];
1268 	int col = 0;
1269 	int row_comment = 0;
1270 	for (;;) {
1271 	  if (c == tab_char || c == '\n') {
1272 	    int ln = current_lineno;
1273 	    if (c == '\n')
1274 	      --ln;
1275 	    if ((opt->flags & table::NOSPACES))
1276 	      input_entry.remove_spaces();
1277 	    while (col < ncolumns
1278 		   && line_format[col].type == FORMAT_SPAN) {
1279 	      tbl->add_entry(current_row, col, "", &line_format[col],
1280 			     current_filename, ln);
1281 	      col++;
1282 	    }
1283 	    if (c == '\n' && input_entry.length() == 2
1284 		&& input_entry[0] == 'T' && input_entry[1] == '{') {
1285 	      input_entry = "";
1286 	      ln++;
1287 	      enum {
1288 		START, MIDDLE, GOT_T, GOT_RIGHT_BRACE, GOT_DOT,
1289 		GOT_l, GOT_lf, END
1290 	      } state = START;
1291 	      while (state != END) {
1292 		c = in.get();
1293 		if (c == EOF)
1294 		  break;
1295 		switch (state) {
1296 		case START:
1297 		  if (c == 'T')
1298 		    state = GOT_T;
1299 		  else if (c == '.')
1300 		    state = GOT_DOT;
1301 		  else {
1302 		    input_entry += c;
1303 		    if (c != '\n')
1304 		      state = MIDDLE;
1305 		  }
1306 		  break;
1307 		case GOT_T:
1308 		  if (c == '}')
1309 		    state = GOT_RIGHT_BRACE;
1310 		  else {
1311 		    input_entry += 'T';
1312 		    input_entry += c;
1313 		    state = c == '\n' ? START : MIDDLE;
1314 		  }
1315 		  break;
1316 		case GOT_DOT:
1317 		  if (c == 'l')
1318 		    state = GOT_l;
1319 		  else {
1320 		    input_entry += '.';
1321 		    input_entry += c;
1322 		    state = c == '\n' ? START : MIDDLE;
1323 		  }
1324 		  break;
1325 		case GOT_l:
1326 		  if (c == 'f')
1327 		    state = GOT_lf;
1328 		  else {
1329 		    input_entry += ".l";
1330 		    input_entry += c;
1331 		    state = c == '\n' ? START : MIDDLE;
1332 		  }
1333 		  break;
1334 		case GOT_lf:
1335 		  if (c == ' ' || c == '\n' || compatible_flag) {
1336 		    string args;
1337 		    input_entry += ".lf";
1338 		    while (c != EOF) {
1339 		      args += c;
1340 		      if (c == '\n')
1341 			break;
1342 		      c = in.get();
1343 		    }
1344 		    args += '\0';
1345 		    interpret_lf_args(args.contents());
1346 		    // remove the '\0'
1347 		    args.set_length(args.length() - 1);
1348 		    input_entry += args;
1349 		    state = START;
1350 		  }
1351 		  else {
1352 		    input_entry += ".lf";
1353 		    input_entry += c;
1354 		    state = MIDDLE;
1355 		  }
1356 		  break;
1357 		case GOT_RIGHT_BRACE:
1358 		  if ((opt->flags & table::NOSPACES)) {
1359 		    while (c == ' ')
1360 		      c = in.get();
1361 		    if (c == EOF)
1362 		      break;
1363 		  }
1364 		  if (c == '\n' || c == tab_char)
1365 		    state = END;
1366 		  else {
1367 		    input_entry += 'T';
1368 		    input_entry += '}';
1369 		    input_entry += c;
1370 		    state = MIDDLE;
1371 		  }
1372 		  break;
1373 		case MIDDLE:
1374 		  if (c == '\n')
1375 		    state = START;
1376 		  input_entry += c;
1377 		  break;
1378 		case END:
1379 		default:
1380 		  assert(0);
1381 		}
1382 	      }
1383 	      if (c == EOF) {
1384 		error("end of data in middle of text block");
1385 		give_up = 1;
1386 		break;
1387 	      }
1388 	    }
1389 	    if (col >= ncolumns) {
1390 	      if (!input_entry.empty()) {
1391 		if (input_entry.length() >= 2
1392 		    && input_entry[0] == '\\'
1393 		    && input_entry[1] == '"')
1394 		  row_comment = 1;
1395 		else if (!row_comment) {
1396 		  if (c == '\n')
1397 		    in.unget(c);
1398 		  input_entry += '\0';
1399 		  error("excess data entry `%1' discarded",
1400 			input_entry.contents());
1401 		  if (c == '\n')
1402 		    (void)in.get();
1403 		}
1404 	      }
1405 	    }
1406 	    else
1407 	      tbl->add_entry(current_row, col, input_entry,
1408 			     &line_format[col], current_filename, ln);
1409 	    col++;
1410 	    if (c == '\n')
1411 	      break;
1412 	    input_entry = "";
1413 	  }
1414 	  else
1415 	    input_entry += c;
1416 	  c = in.get();
1417 	  if (c == EOF)
1418 	    break;
1419 	}
1420 	if (give_up)
1421 	  break;
1422 	input_entry = "";
1423 	for (; col < ncolumns; col++)
1424 	  tbl->add_entry(current_row, col, input_entry, &line_format[col],
1425 			 current_filename, current_lineno - 1);
1426 	tbl->add_vlines(current_row, f->vline[format_index]);
1427 	current_row++;
1428 	format_index++;
1429       }
1430       break;
1431     case TROFF_INPUT_LINE:
1432       {
1433 	string line;
1434 	int ln = current_lineno;
1435 	for (;;) {
1436 	  line += c;
1437 	  if (c == '\n')
1438 	    break;
1439 	  c = in.get();
1440 	  if (c == EOF) {
1441 	    break;
1442 	  }
1443 	}
1444 	tbl->add_text_line(current_row, line, current_filename, ln);
1445 	if (line.length() >= 4
1446 	    && line[0] == '.' && line[1] == 'T' && line[2] == '&') {
1447 	  format *newf = process_format(in, opt, f);
1448 	  if (newf == 0)
1449 	    give_up = 1;
1450 	  else
1451 	    f = newf;
1452 	}
1453 	if (line.length() >= 3
1454 	    && line[0] == '.' && line[1] == 'l' && line[2] == 'f') {
1455 	  line += '\0';
1456 	  interpret_lf_args(line.contents() + 3);
1457 	}
1458       }
1459       break;
1460     case SINGLE_HLINE:
1461       tbl->add_single_hline(current_row);
1462       break;
1463     case DOUBLE_HLINE:
1464       tbl->add_double_hline(current_row);
1465       break;
1466     default:
1467       assert(0);
1468     }
1469     if (give_up)
1470       break;
1471   }
1472   if (!give_up && current_row == 0) {
1473     error("no real data");
1474     give_up = 1;
1475   }
1476   if (give_up) {
1477     delete tbl;
1478     return 0;
1479   }
1480   // Do this here rather than at the beginning in case continued formats
1481   // change it.
1482   int i;
1483   for (i = 0; i < ncolumns - 1; i++)
1484     if (f->separation[i] >= 0)
1485       tbl->set_column_separation(i, f->separation[i]);
1486   for (i = 0; i < ncolumns; i++)
1487     if (!f->width[i].empty())
1488       tbl->set_minimum_width(i, f->width[i]);
1489   for (i = 0; i < ncolumns; i++)
1490     if (f->equal[i])
1491       tbl->set_equal_column(i);
1492   return tbl;
1493 }
1494 
process_table(table_input & in)1495 void process_table(table_input &in)
1496 {
1497   options *opt = 0;
1498   format *form = 0;
1499   table *tbl = 0;
1500   if ((opt = process_options(in)) != 0
1501       && (form = process_format(in, opt)) != 0
1502       && (tbl = process_data(in, form, opt)) != 0) {
1503     tbl->print();
1504     delete tbl;
1505   }
1506   else {
1507     error("giving up on this table");
1508     while (in.get() != EOF)
1509       ;
1510   }
1511   delete opt;
1512   delete form;
1513   if (!in.ended())
1514     error("premature end of file");
1515 }
1516 
usage(FILE * stream)1517 static void usage(FILE *stream)
1518 {
1519   fprintf(stream, "usage: %s [ -vC ] [ files... ]\n", program_name);
1520 }
1521 
main(int argc,char ** argv)1522 int main(int argc, char **argv)
1523 {
1524   program_name = argv[0];
1525   static char stderr_buf[BUFSIZ];
1526   setbuf(stderr, stderr_buf);
1527   int opt;
1528   static const struct option long_options[] = {
1529     { "help", no_argument, 0, CHAR_MAX + 1 },
1530     { "version", no_argument, 0, 'v' },
1531     { NULL, 0, 0, 0 }
1532   };
1533   while ((opt = getopt_long(argc, argv, "vCT:", long_options, NULL)) != EOF)
1534     switch (opt) {
1535     case 'C':
1536       compatible_flag = 1;
1537       break;
1538     case 'v':
1539       {
1540 	printf("GNU tbl (groff) version %s\n", Version_string);
1541 	exit(0);
1542 	break;
1543       }
1544     case 'T':
1545       // I'm sick of getting bug reports from IRIX users
1546       break;
1547     case CHAR_MAX + 1: // --help
1548       usage(stdout);
1549       exit(0);
1550       break;
1551     case '?':
1552       usage(stderr);
1553       exit(1);
1554       break;
1555     default:
1556       assert(0);
1557     }
1558   printf(".if !\\n(.g .ab GNU tbl requires GNU troff.\n"
1559 	 ".if !dTS .ds TS\n"
1560 	 ".if !dTE .ds TE\n");
1561   if (argc > optind) {
1562     for (int i = optind; i < argc; i++)
1563       if (argv[i][0] == '-' && argv[i][1] == '\0') {
1564 	current_filename = "-";
1565 	current_lineno = 1;
1566 	printf(".lf 1 -\n");
1567 	process_input_file(stdin);
1568       }
1569       else {
1570 	errno = 0;
1571 	FILE *fp = fopen(argv[i], "r");
1572 	if (fp == 0)
1573 	  fatal("can't open `%1': %2", argv[i], strerror(errno));
1574 	else {
1575 	  current_lineno = 1;
1576 	  current_filename = argv[i];
1577 	  printf(".lf 1 %s\n", current_filename);
1578 	  process_input_file(fp);
1579 	}
1580       }
1581   }
1582   else {
1583     current_filename = "-";
1584     current_lineno = 1;
1585     printf(".lf 1 -\n");
1586     process_input_file(stdin);
1587   }
1588   if (ferror(stdout) || fflush(stdout) < 0)
1589     fatal("output error");
1590   return 0;
1591 }
1592 
1593