xref: /inferno-os/appl/lib/parseman.b (revision 206fe115321515fd30686e46f830e26205053831)
1implement Parseman;
2
3include "sys.m";
4	sys: Sys;
5
6include "bufio.m";
7	bufio: Bufio;
8	Iobuf: import bufio;
9
10include "man.m";
11
12FONT_LITERAL: con -1;
13
14init(): string
15{
16	sys = load Sys Sys->PATH;
17	bufio = load Bufio Bufio->PATH;
18	if (bufio == nil)
19		return sys->sprint("cannot load module: %r");
20	return nil;
21}
22
23ParseState: adt[T]
24	for{
25	T =>
26		textwidth: fn(t: self T, text: Text): int;
27	}{
28	metrics: Metrics;
29	ql: int;		# quote Literal text
30	margin: int;
31	mstack: list of int;
32	istack: list  of int;
33	indent: int;
34	ntlsetindent: int;	#copy prevailindent to indent on n.t.l
35	prevailindent: int;
36	curfont: int;
37	curattr: int;
38	verbatim: int;
39	pspace: int;
40	curline: list of (int, Text);	# most recent first
41	curwidth: int;
42	newpara: int;
43	heading: int;
44	igto: string;
45	link: string;
46	viewer: T;
47	setline: chan of list of (int, Text);
48
49	# addstring() is simply an addtext() of the current font
50	addstring: fn(s: self ref ParseState, s: string);
51	addtext: fn(s: self ref ParseState, t: list of Text);
52	brk: fn(s: self ref ParseState);
53	paragraph: fn( s: self ref ParseState);
54};
55
56parseman[T](fd: ref Sys->FD, metrics: Metrics, ql: int, viewer: T, setline: chan of list of (int, Text))
57	for{
58	T =>
59		textwidth: fn(t: self T, text: Text): int;
60	}
61{
62	iob := bufio->fopen(fd, Sys->OREAD);
63	state := ref ParseState[T](metrics, ql, 0, nil, nil, 0, 0, metrics.indent, FONT_ROMAN, 0, 0, 1, nil, 0, 1, 0, "", nil, viewer, setline);
64	while ((l := iob.gets('\n')) != nil) {
65		if (l[len l -1] == '\n')
66			l = l[0: len l - 1];
67		if (state.igto != nil && state.igto != l)
68			continue;
69		state.igto = nil;
70		parseline(state, l);
71	}
72	state.pspace = 2;
73	state.pspace = 1;
74	state.paragraph();
75	footer := Text(FONT_ROMAN, 0, "Inferno Manual", 0, nil);
76	textw := state.viewer.textwidth(footer);
77#should do 'center' in addtext (state.justify = CENTER)
78	state.indent = (state.metrics.pagew - textw) / 2;
79	state.addtext(footer::nil);
80	state.brk();
81	setline <- = nil;
82}
83
84parseline[T](state: ref ParseState[T], t: string)
85	for{
86	T =>
87		textwidth: fn(t: self T, text: Text): int;
88	}
89{
90	if (t == nil) {
91		if (state.verbatim) {
92			blank := Text(state.curfont, state.curattr, "", 0, "");
93			state.setline <- = (0, blank)::nil;
94		} else
95			state.paragraph();
96		return;
97	}
98	ntlsetindent := state.ntlsetindent;
99	state.ntlsetindent = 0;
100	if (t[0] == '.' || t[0] == '\'')
101		parsemacro(state, t[1:]);
102	else {
103		state.addtext(parsetext(state, t));
104		if (state.verbatim)
105			state.brk();
106	}
107	if (ntlsetindent) {
108		state.indent = state.prevailindent;
109		if (state.curwidth + state.metrics.en > state.indent + state.margin)
110			state.brk();
111	}
112}
113
114parsemacro[T](state: ref ParseState[T], t: string)
115	for{
116	T =>
117		textwidth: fn(t: self T, text: Text): int;
118	}
119{
120	for (n := 0; n < len t && n < 2; n++)
121		if (t[n] == ' '  || t[n] == '\t')
122			break;
123	macro := t[0:n];
124	params: list of string;
125	quote := 0;
126	param := 0;
127	esc := 0;
128	p := "";
129	for (; n < len t; n++) {
130		if (esc)
131			esc = 0;
132		else {
133			case t[n] {
134			' ' or '\t' =>
135				if (!quote) {
136					if (param) {
137						params = p :: params;
138						p = "";
139						param = 0;
140					}
141				continue;
142				}
143			'"' =>
144				param = 1;
145				quote = !quote;
146				continue;
147			'\\' =>
148				esc = 1;
149			}
150		}
151		param = 1;
152		p[len p] = t[n];
153	}
154	if (param)
155		params = p :: params;
156	plist: list of string;
157	for (; params != nil; params = tl params)
158		plist = hd params :: plist;
159	params = plist;
160
161	case macro {
162		"ig" =>
163			igto := "..";
164			if (params != nil)
165				igto = "." + hd params;
166			state.brk();
167			state.igto = igto;
168		"sp" =>
169			sp := "1";
170			if(params != nil)
171				sp = hd params;
172			d := tval(state.metrics, sp, 'v');
173			gap := d / state.metrics.V;
174			if (gap < 1)
175				gap = 1;
176			while (gap--)
177				state.paragraph();
178		"br" =>
179			state.brk();
180		"nf" =>
181			state.verbatim = 1;
182		"fi" =>
183			state.verbatim = 0;
184		"ti" =>
185			state.brk();
186			#i := 0;
187			#if(params != nil)
188			#	i = tval(state.metrics, hd params, 'n');
189			#state.ntlsetindent = 1;
190			#state.prevailindent = i;
191		"in" =>
192			state.brk();
193			#i := 0;
194			#if(params != nil)
195			#	i = tval(state.metrics, hd params, 'n');
196			#state.indent = i;
197			#state.prevailindent = state.indent;
198		"1C" =>
199			state.brk();
200			# not implemented
201		"2C" =>
202			state.brk();
203			# not implemented
204		"BI" =>
205			altattr(state, FONT_BOLD, FONT_ITALIC, params);
206		"BR" =>
207			altattr(state, FONT_BOLD, FONT_ROMAN, params);
208		"IB" =>
209			altattr(state, FONT_ITALIC, FONT_BOLD, params);
210		"IR" =>
211			# need to determine link if params of valid form
212			state.link = convlink(params);;
213			altattr(state, FONT_ITALIC, FONT_ROMAN, params);
214			state.link = nil;
215		"RB" =>
216			altattr(state, FONT_ROMAN, FONT_BOLD, params);
217		"RI" =>
218			altattr(state, FONT_ROMAN, FONT_ITALIC, params);
219		"B" =>
220			state.curfont = FONT_BOLD;
221			if (params != nil) {
222				for (; params != nil; params = tl params) {
223					textl := parsetext(state, hd params);
224					for (; textl != nil; textl = tl textl)
225						state.addtext(hd textl::nil);
226				}
227				state.curfont = FONT_ROMAN;
228			}
229		"I" =>
230			state.curfont = FONT_ITALIC;
231			if (params != nil) {
232				for (; params != nil; params = tl params) {
233					textl := parsetext(state, hd params);
234					for (; textl != nil; textl = tl textl)
235						state.addtext(hd textl::nil);
236				}
237				state.curfont = FONT_ROMAN;
238			}
239 		"SM"=>
240			state.curattr |= ATTR_SMALL;
241			if (params != nil) {
242				for (; params != nil; params = tl params)
243					state.addstring(hd params);
244				state.curattr &= ~ATTR_SMALL;
245			}
246		"L" =>
247			state.curfont = FONT_LITERAL;
248			if (params != nil) {
249				str := "`";
250				for (pl := params; pl != nil;) {
251					str += hd pl;
252					if ((pl = tl pl) != nil)
253						str += " ";
254					else
255						break;
256				}
257				str += "'";
258				state.addstring(str);
259				state.curfont = FONT_ROMAN;
260			}
261		"LR" =>
262			if (params != nil) {
263				l := Text(FONT_LITERAL, state.curattr, hd params, 0, nil);
264				t: list of Text;
265				params = tl params;
266				if (params == nil)
267					t = l :: nil;
268				else {
269					r := Text(FONT_ROMAN, state.curattr, hd params, 0, nil);
270					t = l :: r :: nil;
271				}
272				state.addtext(t);
273			}
274		"RL" =>
275			if (params != nil) {
276				r := Text(FONT_ROMAN, state.curattr, hd params, 0, nil);
277				t: list of Text;
278				params = tl params;
279				if (params == nil)
280					t = r :: nil;
281				else {
282					l := Text(FONT_LITERAL, state.curattr, hd params, 0, nil);
283					t = r :: l :: nil;
284				}
285				state.addtext(t);
286			}
287		"DT" =>
288			# not yet supported
289			;
290		"EE" =>
291			state.brk();
292			state.verbatim = 0;
293			state.curfont = FONT_ROMAN;
294		"EX" =>
295			state.brk();
296			state.verbatim = 1;
297			state.curfont = FONT_BOLD;
298		"HP" =>
299			state.paragraph();
300			i := state.metrics.indent;
301			if (params != nil)
302				i = tval(state.metrics, hd params, 'n');
303			state.prevailindent = state.indent + i;
304		"IP" =>
305			state.paragraph();
306			i := state.metrics.indent;
307			if (params != nil) {
308				tag := hd params;
309				params = tl params;
310				state.addtext(parsetext(state, tag));
311				if (params != nil)
312					i = tval(state.metrics, hd params, 'n');
313			}
314			state.indent = state.metrics.indent + i;
315			state.prevailindent = state.indent;
316		"PD" =>
317			state.pspace = 1;
318			if (params != nil) {
319				v := tval(state.metrics, hd params, 'v') / state.metrics.V;
320				state.pspace = v;
321			}
322		"LP" or "PP" =>
323			state.paragraph();
324			state.prevailindent = state.indent;
325		"RE" =>
326			state.brk();
327			if (state.mstack == nil || state.istack == nil)
328				break;
329
330			state.margin = hd state.mstack;
331			state.mstack = tl state.mstack;
332			state.prevailindent = hd state.istack;
333			state.indent = state.prevailindent;
334			state.istack = tl state.istack;
335		"RS" =>
336			state.brk();
337			i := state.prevailindent - state.metrics.indent;
338			if (params != nil)
339				i = tval(state.metrics, hd params, 'n');
340			state.mstack = state.margin :: state.mstack;
341			state.istack = state.prevailindent :: state.istack;
342			state.margin += i;
343			state.indent = 2 * state.metrics.indent;
344			state.prevailindent = state.indent;
345		"SH" =>
346			state.paragraph();
347			state.prevailindent = state.indent;
348			state.curfont = FONT_ROMAN;
349			state.curattr = 0;
350			state.indent = 0;
351			state.heading = 1;
352			state.verbatim = 0;
353
354			for (pl := params; pl != nil; pl = tl pl)
355				state.addstring(hd pl);
356
357			state.heading = 0;
358			state.brk();
359			state.newpara = 1;
360			state.pspace = 1;
361		"SS" =>
362			state.paragraph();
363			state.prevailindent = state.indent;
364			state.curfont = FONT_ROMAN;
365			state.curattr = 0;
366			state.indent = state.metrics.ssindent;
367			state.heading = 2;
368
369			for (pl := params; pl != nil; pl = tl pl)
370				state.addstring(hd pl);
371
372			state.heading = 0;
373			state.brk();
374			state.newpara = 1;
375			state.pspace = 1;
376
377		"TF" =>
378			state.brk();
379			state.pspace = 0;
380			i := state.metrics.indent;
381			if (params != nil) {
382				str := hd params;
383				text := Text(FONT_BOLD, 0, str, 0, nil);
384				w := state.viewer.textwidth(text) + 2*state.metrics.em;
385				if (w > i)
386					i = w;
387			}
388			state.indent = state.metrics.indent;;
389			state.prevailindent = state.indent + i;
390		"TH" =>
391			state.brk();
392			if (len params < 2)
393				break;
394			str := hd params + "(" + hd tl params + ")";
395			txt := Text(FONT_ROMAN, 0, str, 0, nil);
396			txtw := state.viewer.textwidth(txt);
397			state.indent = 0;
398			state.addtext(txt::nil);
399			state.indent = state.metrics.pagew - txtw;
400			state.addtext(txt::nil);
401			state.indent = 0;
402			state.brk();
403		"TP" =>
404			state.paragraph();
405			if (state.prevailindent == state.metrics.indent)
406				state.prevailindent += state.metrics.indent;
407			state.indent = state.metrics.indent;
408			state.ntlsetindent = 1;
409			if (params != nil) {
410				i := tval(state.metrics, hd params, 'n');
411				if (i == 0)
412					i = state.metrics.indent;
413				state.prevailindent = state.indent + i;
414			}
415		* =>
416			;
417	}
418	if (state.verbatim)
419		state.brk();
420}
421
422parsetext[T](state: ref ParseState[T], t: string): list of Text
423	for{
424	T =>
425		textwidth: fn(t: self T, text: Text): int;
426	}
427{
428	# need to do better here - spot inline font changes etc
429	# we also currently cannot support troff tab stops
430	textl: list of Text;
431	line := "";
432	curfont := state.curfont;
433	prevfont := state.curfont;	# should perhaps be in State
434	step := 1;
435	for (i := 0; i < len t; i += step) {
436		step = 1;
437		ch := t[i];
438		if (ch == '\\') {
439			i++;
440			width := len t - i;
441			if (width <= 0)
442				break;
443			case t[i] {
444			'-' or '.' or '\\' =>
445				ch = t[i];
446			' ' or '0'  =>
447				ch = ' ';
448			'e' =>
449				ch = '\\';
450			'|' or '&' or '^' =>
451				continue;
452			'(' =>
453				if (width > 3)
454					width = 3;
455				step = width;
456				if (step != 3)
457					continue;
458				case t[i+1:i+3] {
459				"bu" =>
460					ch = '•';
461				"em" =>
462					ch = '—';
463				"mi" =>
464					ch = '-';
465				"mu" =>
466					ch = '×';
467				"*m" =>
468					ch = 'µ';
469				"*G" =>
470					ch = 'Γ';
471				"*p" =>
472					ch = 'π';
473				"*b" =>
474					ch = 'β';
475				"<=" =>
476					ch = '≤';
477				"->" =>
478					ch = '→';
479				* =>
480					continue;
481				}
482
483			'f' =>
484				if (width == 1)
485					continue;
486				if (t[i+1] == '(') {
487					if (width > 4)
488						width = 4;
489					step = width;
490					continue;
491				}
492				i++;
493				case t[i] {
494				'0' or 'R' =>
495					curfont = FONT_ROMAN;
496				'1' or 'I' =>
497					curfont = FONT_ITALIC;
498				'2' =>
499					# should be bold but our 'bold' font is constant width
500					curfont = FONT_ROMAN;
501				'5' or 'L' =>
502					curfont = FONT_BOLD;
503				'P' =>
504					curfont = prevfont;
505				}
506				continue;
507			'*' =>
508				if (width == 1)
509					continue;
510				case t[i+1] {
511				'R' =>
512					step = 2;
513					ch = '®';
514				'(' =>
515					if (width > 4)
516						width = 4;
517					step = width;
518					continue;
519				}
520			* =>
521				i--;
522			}
523		}
524		if (curfont != state.curfont) {
525			if (line != "") {
526				txt := Text(state.curfont, state.curattr, line, state.heading, state.link);
527				line = "";
528				textl = txt :: textl;
529			}
530			prevfont = state.curfont;
531			state.curfont = curfont;
532		}
533		line[len line] = ch;
534	}
535	if (line != "") {
536		txt := Text(state.curfont, state.curattr, line, state.heading, state.link);
537		textl = txt :: textl;
538	}
539	state.curfont = curfont;
540
541	r: list of Text;
542	for (; textl != nil; textl = tl textl)
543		r = hd textl :: r;
544	return r;
545}
546
547ParseState[T].addstring(state: self ref ParseState[T], s: string)
548{
549	t := Text(state.curfont, state.curattr, s, state.heading, state.link);
550	state.addtext(t::nil);
551}
552
553ParseState[T].addtext(state: self ref ParseState[T], t: list of Text)
554{
555#dumptextlist(t);
556	# on setting a line copy state.prevailindent to state.indent
557	#
558	# always make sure that current indent is achieved
559	#
560	# if FONT_LITERAL and state.ql then convert to FONT_BOLD and
561	# quote the text before any other processing
562
563	state.newpara = 0;
564	addspace := 1;
565	while (t != nil) {
566		# this scheme is inadequate...
567		# results in mixed formatting at end of line getting split up
568		# e.g.
569		#	.IR man (1)
570		# can get split at the '('
571
572		indent := 0;
573		spacew := 0;
574		text := hd t;
575		t = tl t;
576		if (state.indent + state.margin > state.curwidth || state.curline == nil) {
577			indent = state.indent + state.margin;
578			state.curwidth = indent;
579			addspace = 0;
580			if (!state.verbatim) {
581				text.text = trim(text.text);
582				while (text.text == "" && t != nil) {
583					text = hd t;
584					t = tl t;
585					text.text = trim(text.text);
586				}
587			}
588		}
589
590		if (text.font == FONT_LITERAL) {
591			if (state.ql)
592				text.text = "`" + text.text + "'";
593			text.font = FONT_BOLD;
594		}
595		if (addspace) {
596			(nil, previtem) := hd state.curline;
597			if (previtem.text[len previtem.text -1] == ' ')
598				addspace = 0;
599			else {
600				space := Text(previtem.font, previtem.attr, " ", 0, nil);
601				spacew = state.viewer.textwidth(space);
602			}
603		}
604		# it doesn't fit - try to word wrap...
605		t2 := text;
606		end := len text.text;
607		prevend := end;
608		nextstart := 0;
609		while (end > 0) {
610			t2.text = text.text[0:end];
611			tlen := state.viewer.textwidth(t2);
612			if (state.verbatim || state.curwidth + spacew + tlen <= state.metrics.pagew) {
613				# easy - just add it!
614				state.curwidth += spacew+tlen;
615				if (addspace) {
616					t2.text = " " + t2.text;
617					addspace = 0;
618				}
619				state.curline = (indent, t2) :: state.curline;
620				indent = 0;
621				break;
622			}
623			prevend = end;
624			for (; end > 0; end--) {
625				if (t2.text[end-1] == ' ') {
626					nextstart = end;
627					for (; end >0 && t2.text[end-1] == ' '; end--)
628						;
629					break;
630				}
631			}
632		}
633		if (end != len text.text) {
634			# couldn't fit whole item onto line
635			if (state.curline == nil) {
636				# couldn't fit (sub)item on empty line - add it anyway
637				# as there is nowhere else to put it
638				end = prevend;
639				t2.text = text.text[0:end];
640				state.curline = (indent, t2) :: state.curline;
641				if (nextstart != 0) {
642					text.text = text.text[nextstart:];
643					t = text :: t;
644				}
645			} else {
646				# already stuff on line and we have consumed upto nexstart of
647				# the current item
648				if (end != 0)
649					text.text = text.text[nextstart:];
650				t = text :: t;
651			}
652			state.brk();
653		}
654		addspace = 0;
655	}
656}
657
658trim(s: string): string
659{
660	for (spi :=0; spi < len s && s[spi] == ' '; spi++)
661			;
662	return s[spi:];
663}
664
665ParseState[T].brk(state: self ref ParseState)
666{
667	if (state.curline != nil) {
668		line: list of (int, Text);
669		for (l := state.curline; l != nil; l = tl l)
670			line = hd l :: line;
671		state.setline <- = line;
672		state.curline = nil;
673		state.curwidth = 0;
674	}
675	state.indent = state.prevailindent;
676}
677
678ParseState[T].paragraph(state: self ref ParseState)
679{
680	state.brk();
681	if (state.newpara == 0) {
682		blank := Text(state.curfont, state.curattr, "", 0, "");
683		for (i := 0; i < state.pspace; i++)
684			state.setline <- = (0, blank)::nil;
685		state.newpara = 1;
686	}
687	state.curattr = 0;
688	state.curfont = FONT_ROMAN;
689	state.indent = state.metrics.indent;
690#	state.prevailindent = state.indent;
691	state.ntlsetindent = 0;
692}
693
694# convert troff 'values' into output 'dots'
695tval(m: Metrics, v: string, defunits: int): int
696{
697	if (v == nil)
698		return 0;
699	units := v[len v -1];
700	val: real;
701
702	case units {
703	'i' or
704	'c' or
705	'P' or
706	'm' or
707	'n' or
708	'p' or
709	'u' or
710	'v' =>
711		val = real v[0:len v - 1];
712	* =>
713		val = real v;
714		units = defunits;
715	}
716	r := 0;
717	case units {
718	'i' =>
719		r = int (real m.dpi * val);
720	'c' =>
721		r =  int ((real m.dpi * val)/2.54);
722	'P' =>
723		r =  int ((real m.dpi * val)/ 6.0);
724	'm' =>
725		r =  int (real m.em * val);
726	'n' =>
727		r =  int (real m.en * val);
728	'p' =>
729		r =  int ((real m.dpi * val)/72.0);
730	'u' =>
731		r =  int val;
732	'v' =>
733		r =  int (real m.V * val);
734	}
735	return r;
736}
737
738altattr[T](state: ref ParseState[T], f1, f2: int, strs: list of string)
739	for{
740	T =>
741		textwidth: fn(t: self T, text: Text): int;
742	}
743{
744	index := 0;
745	textl: list of Text;
746
747	prevfont := state.curfont;
748	for (; strs != nil; strs = tl strs) {
749		str := hd strs;
750		f := f1;
751		if (index++ & 1)
752			f = f2;
753		state.curfont = f;
754		newtext := parsetext(state, str);
755		for (; newtext != nil; newtext = tl newtext)
756			textl = hd newtext :: textl;
757	}
758	orderedtext: list of Text;
759	for (; textl != nil; textl = tl textl)
760		orderedtext = hd textl :: orderedtext;
761	state.addtext(orderedtext);
762	state.curfont = prevfont;
763}
764
765dumptextlist(t: list of Text)
766{
767	sys->print("textlist[");
768	for (; t != nil; t = tl t) {
769		s := hd t;
770		sys->print("(%s)", s.text);
771	}
772	sys->print("]\n");
773}
774
775convlink(params: list of string): string
776{
777	# merge the texts
778	s := "";
779	for (; params != nil; params = tl params)
780		s = s + (hd params);
781
782	for (i := 0; i < len s; i ++)
783		if (s[i] == '(')
784			break;
785	if (i+1 >= len s)
786		return nil;
787	cmd := s[0:i];
788	i++;
789	s = s[i:];
790	for (i = 0; i < len s; i++)
791		if (s[i] == ')')
792			break;
793	section := s[0:i];
794	if (section == nil || !isint(section))
795		return nil;
796
797	return section + " " + cmd;
798}
799
800isint(s: string): int
801{
802	for (i := 0; i < len s; i++)
803		if (s[i] != '.' && (s[i] < '0' || s[i] > '9'))
804			return 0;
805	return 1;
806}
807