xref: /openbsd-src/usr.bin/less/line.c (revision 4b70baf6e17fc8b27fc1f7fa7929335753fa94c3)
1 /*
2  * Copyright (C) 1984-2012  Mark Nudelman
3  * Modified for use with illumos by Garrett D'Amore.
4  * Copyright 2014 Garrett D'Amore <garrett@damore.org>
5  *
6  * You may distribute under the terms of either the GNU General Public
7  * License or the Less License, as specified in the README file.
8  *
9  * For more information, see the README file.
10  */
11 
12 /*
13  * Routines to manipulate the "line buffer".
14  * The line buffer holds a line of output as it is being built
15  * in preparation for output to the screen.
16  */
17 
18 #include <wchar.h>
19 #include <wctype.h>
20 
21 #include "charset.h"
22 #include "less.h"
23 
24 static char *linebuf = NULL;	/* Buffer which holds the current output line */
25 static char *attr = NULL;	/* Extension of linebuf to hold attributes */
26 int size_linebuf = 0;		/* Size of line buffer (and attr buffer) */
27 
28 static int cshift;		/* Current left-shift of output line buffer */
29 int hshift;			/* Desired left-shift of output line buffer */
30 int tabstops[TABSTOP_MAX] = { 0 }; /* Custom tabstops */
31 int ntabstops = 1;		/* Number of tabstops */
32 int tabdefault = 8;		/* Default repeated tabstops */
33 off_t highest_hilite;		/* Pos of last hilite in file found so far */
34 
35 static int curr;		/* Index into linebuf */
36 static int column;	/* Printable length, accounting for backspaces, etc. */
37 static int overstrike;		/* Next char should overstrike previous char */
38 static int is_null_line;	/* There is no current line */
39 static int lmargin;		/* Left margin */
40 static char pendc;
41 static off_t pendpos;
42 static char *end_ansi_chars;
43 static char *mid_ansi_chars;
44 
45 static int attr_swidth(int);
46 static int attr_ewidth(int);
47 static int do_append(LWCHAR, char *, off_t);
48 
49 extern volatile sig_atomic_t sigs;
50 extern int bs_mode;
51 extern int linenums;
52 extern int ctldisp;
53 extern int twiddle;
54 extern int binattr;
55 extern int status_col;
56 extern int auto_wrap, ignaw;
57 extern int bo_s_width, bo_e_width;
58 extern int ul_s_width, ul_e_width;
59 extern int bl_s_width, bl_e_width;
60 extern int so_s_width, so_e_width;
61 extern int sc_width, sc_height;
62 extern int utf_mode;
63 extern off_t start_attnpos;
64 extern off_t end_attnpos;
65 
66 static char mbc_buf[MAX_UTF_CHAR_LEN];
67 static int mbc_buf_len = 0;
68 static int mbc_buf_index = 0;
69 static off_t mbc_pos;
70 
71 /*
72  * Initialize from environment variables.
73  */
74 void
75 init_line(void)
76 {
77 	end_ansi_chars = lgetenv("LESSANSIENDCHARS");
78 	if (end_ansi_chars == NULL || *end_ansi_chars == '\0')
79 		end_ansi_chars = "m";
80 
81 	mid_ansi_chars = lgetenv("LESSANSIMIDCHARS");
82 	if (mid_ansi_chars == NULL || *mid_ansi_chars == '\0')
83 		mid_ansi_chars = "0123456789;[?!\"'#%()*+ ";
84 
85 	linebuf = ecalloc(LINEBUF_SIZE, sizeof (char));
86 	attr = ecalloc(LINEBUF_SIZE, sizeof (char));
87 	size_linebuf = LINEBUF_SIZE;
88 }
89 
90 /*
91  * Expand the line buffer.
92  */
93 static int
94 expand_linebuf(void)
95 {
96 	/* Double the size of the line buffer. */
97 	int new_size = size_linebuf * 2;
98 
99 	/* Just realloc to expand the buffer, if we can. */
100 	char *new_buf = recallocarray(linebuf, size_linebuf, new_size, 1);
101 	char *new_attr = recallocarray(attr, size_linebuf, new_size, 1);
102 	if (new_buf == NULL || new_attr == NULL) {
103 		free(new_attr);
104 		free(new_buf);
105 		return (1);
106 	}
107 	linebuf = new_buf;
108 	attr = new_attr;
109 	size_linebuf = new_size;
110 	return (0);
111 }
112 
113 /*
114  * Is a character ASCII?
115  */
116 static int
117 is_ascii_char(LWCHAR ch)
118 {
119 	return (ch <= 0x7F);
120 }
121 
122 /*
123  * Rewind the line buffer.
124  */
125 void
126 prewind(void)
127 {
128 	curr = 0;
129 	column = 0;
130 	cshift = 0;
131 	overstrike = 0;
132 	mbc_buf_len = 0;
133 	is_null_line = 0;
134 	pendc = '\0';
135 	lmargin = 0;
136 	if (status_col)
137 		lmargin += 1;
138 }
139 
140 /*
141  * Insert the line number (of the given position) into the line buffer.
142  */
143 void
144 plinenum(off_t pos)
145 {
146 	off_t linenum = 0;
147 	int i;
148 
149 	if (linenums == OPT_ONPLUS) {
150 		/*
151 		 * Get the line number and put it in the current line.
152 		 * {{ Note: since find_linenum calls forw_raw_line,
153 		 *    it may seek in the input file, requiring the caller
154 		 *    of plinenum to re-seek if necessary. }}
155 		 * {{ Since forw_raw_line modifies linebuf, we must
156 		 *    do this first, before storing anything in linebuf. }}
157 		 */
158 		linenum = find_linenum(pos);
159 	}
160 
161 	/*
162 	 * Display a status column if the -J option is set.
163 	 */
164 	if (status_col) {
165 		linebuf[curr] = ' ';
166 		if (start_attnpos != -1 &&
167 		    pos >= start_attnpos && pos < end_attnpos)
168 			attr[curr] = AT_NORMAL|AT_HILITE;
169 		else
170 			attr[curr] = AT_NORMAL;
171 		curr++;
172 		column++;
173 	}
174 	/*
175 	 * Display the line number at the start of each line
176 	 * if the -N option is set.
177 	 */
178 	if (linenums == OPT_ONPLUS) {
179 		char buf[23];
180 		int n;
181 
182 		postoa(linenum, buf, sizeof(buf));
183 		n = strlen(buf);
184 		if (n < MIN_LINENUM_WIDTH)
185 			n = MIN_LINENUM_WIDTH;
186 		snprintf(linebuf+curr, size_linebuf-curr, "%*s ", n, buf);
187 		n++;	/* One space after the line number. */
188 		for (i = 0; i < n; i++)
189 			attr[curr+i] = AT_NORMAL;
190 		curr += n;
191 		column += n;
192 		lmargin += n;
193 	}
194 
195 	/*
196 	 * Append enough spaces to bring us to the lmargin.
197 	 */
198 	while (column < lmargin) {
199 		linebuf[curr] = ' ';
200 		attr[curr++] = AT_NORMAL;
201 		column++;
202 	}
203 }
204 
205 /*
206  * Shift the input line left.
207  * This means discarding N printable chars at the start of the buffer.
208  */
209 static void
210 pshift(int shift)
211 {
212 	LWCHAR prev_ch = 0;
213 	unsigned char c;
214 	int shifted = 0;
215 	int to;
216 	int from;
217 	int len;
218 	int width;
219 	int prev_attr;
220 	int next_attr;
221 
222 	if (shift > column - lmargin)
223 		shift = column - lmargin;
224 	if (shift > curr - lmargin)
225 		shift = curr - lmargin;
226 
227 	to = from = lmargin;
228 	/*
229 	 * We keep on going when shifted == shift
230 	 * to get all combining chars.
231 	 */
232 	while (shifted <= shift && from < curr) {
233 		c = linebuf[from];
234 		if (ctldisp == OPT_ONPLUS && c == ESC) {
235 			/* Keep cumulative effect.  */
236 			linebuf[to] = c;
237 			attr[to++] = attr[from++];
238 			while (from < curr && linebuf[from]) {
239 				linebuf[to] = linebuf[from];
240 				attr[to++] = attr[from];
241 				if (!is_ansi_middle(linebuf[from++]))
242 					break;
243 			}
244 			continue;
245 		}
246 
247 		width = 0;
248 
249 		if (!IS_ASCII_OCTET(c) && utf_mode) {
250 			/* Assumes well-formedness validation already done.  */
251 			LWCHAR ch;
252 
253 			len = utf_len(c);
254 			if (from + len > curr)
255 				break;
256 			ch = get_wchar(linebuf + from);
257 			if (!is_composing_char(ch) &&
258 			    !is_combining_char(prev_ch, ch))
259 				width = is_wide_char(ch) ? 2 : 1;
260 			prev_ch = ch;
261 		} else {
262 			len = 1;
263 			if (c == '\b')
264 				/* XXX - Incorrect if several '\b' in a row.  */
265 				width = (utf_mode && is_wide_char(prev_ch)) ?
266 				    -2 : -1;
267 			else if (!control_char(c))
268 				width = 1;
269 			prev_ch = 0;
270 		}
271 
272 		if (width == 2 && shift - shifted == 1) {
273 			/* Should never happen when called by pshift_all().  */
274 			attr[to] = attr[from];
275 			/*
276 			 * Assume a wide_char will never be the first half of a
277 			 * combining_char pair, so reset prev_ch in case we're
278 			 * followed by a '\b'.
279 			 */
280 			prev_ch = linebuf[to++] = ' ';
281 			from += len;
282 			shifted++;
283 			continue;
284 		}
285 
286 		/* Adjust width for magic cookies. */
287 		prev_attr = (to > 0) ? attr[to-1] : AT_NORMAL;
288 		next_attr = (from + len < curr) ? attr[from + len] : prev_attr;
289 		if (!is_at_equiv(attr[from], prev_attr) &&
290 		    !is_at_equiv(attr[from], next_attr)) {
291 			width += attr_swidth(attr[from]);
292 			if (from + len < curr)
293 				width += attr_ewidth(attr[from]);
294 			if (is_at_equiv(prev_attr, next_attr)) {
295 				width += attr_ewidth(prev_attr);
296 				if (from + len < curr)
297 					width += attr_swidth(next_attr);
298 			}
299 		}
300 
301 		if (shift - shifted < width)
302 			break;
303 		from += len;
304 		shifted += width;
305 		if (shifted < 0)
306 			shifted = 0;
307 	}
308 	while (from < curr) {
309 		linebuf[to] = linebuf[from];
310 		attr[to++] = attr[from++];
311 	}
312 	curr = to;
313 	column -= shifted;
314 	cshift += shifted;
315 }
316 
317 /*
318  *
319  */
320 void
321 pshift_all(void)
322 {
323 	pshift(column);
324 }
325 
326 /*
327  * Return the printing width of the start (enter) sequence
328  * for a given character attribute.
329  */
330 static int
331 attr_swidth(int a)
332 {
333 	int w = 0;
334 
335 	a = apply_at_specials(a);
336 
337 	if (a & AT_UNDERLINE)
338 		w += ul_s_width;
339 	if (a & AT_BOLD)
340 		w += bo_s_width;
341 	if (a & AT_BLINK)
342 		w += bl_s_width;
343 	if (a & AT_STANDOUT)
344 		w += so_s_width;
345 
346 	return (w);
347 }
348 
349 /*
350  * Return the printing width of the end (exit) sequence
351  * for a given character attribute.
352  */
353 static int
354 attr_ewidth(int a)
355 {
356 	int w = 0;
357 
358 	a = apply_at_specials(a);
359 
360 	if (a & AT_UNDERLINE)
361 		w += ul_e_width;
362 	if (a & AT_BOLD)
363 		w += bo_e_width;
364 	if (a & AT_BLINK)
365 		w += bl_e_width;
366 	if (a & AT_STANDOUT)
367 		w += so_e_width;
368 
369 	return (w);
370 }
371 
372 /*
373  * Return the printing width of a given character and attribute,
374  * if the character were added to the current position in the line buffer.
375  * Adding a character with a given attribute may cause an enter or exit
376  * attribute sequence to be inserted, so this must be taken into account.
377  */
378 static int
379 pwidth(wchar_t ch, int a, wchar_t prev_ch)
380 {
381 	int w;
382 
383 	/*
384 	 * In case of a backspace, back up by the width of the previous
385 	 * character.  If that is non-printable (for example another
386 	 * backspace) or zero width (for example a combining accent),
387 	 * the terminal may actually back up to a character even further
388 	 * back, but we no longer know how wide that may have been.
389 	 * The best guess possible at this point is that it was
390 	 * hopefully width one.
391 	 */
392 	if (ch == L'\b') {
393 		w = wcwidth(prev_ch);
394 		if (w <= 0)
395 			w = 1;
396 		return (-w);
397 	}
398 
399 	w = wcwidth(ch);
400 
401 	/*
402 	 * Non-printable characters can get here if the -r flag is in
403 	 * effect, and possibly in some other situations (XXX check that!).
404 	 * Treat them as zero width.
405 	 * That may not always match their actual behaviour,
406 	 * but there is no reasonable way to be more exact.
407 	 */
408 	if (w == -1)
409 		w = 0;
410 
411 	/*
412 	 * Combining accents take up no space.
413 	 * Some terminals, upon failure to compose them with the
414 	 * characters that precede them, will actually take up one column
415 	 * for the combining accent; there isn't much we could do short
416 	 * of testing the (complex) composition process ourselves and
417 	 * printing a binary representation when it fails.
418 	 */
419 	if (w == 0)
420 		return (0);
421 
422 	/*
423 	 * Other characters take one or two columns,
424 	 * plus the width of any attribute enter/exit sequence.
425 	 */
426 	if (curr > 0 && !is_at_equiv(attr[curr-1], a))
427 		w += attr_ewidth(attr[curr-1]);
428 	if ((apply_at_specials(a) != AT_NORMAL) &&
429 	    (curr == 0 || !is_at_equiv(attr[curr-1], a)))
430 		w += attr_swidth(a);
431 	return (w);
432 }
433 
434 /*
435  * Delete to the previous base character in the line buffer.
436  * Return 1 if one is found.
437  */
438 static int
439 backc(void)
440 {
441 	wchar_t	 ch, prev_ch;
442 	int	 i, len, width;
443 
444 	i = curr - 1;
445 	if (utf_mode) {
446 		while (i >= lmargin && IS_UTF8_TRAIL(linebuf[i]))
447 			i--;
448 	}
449 	if (i < lmargin)
450 		return (0);
451 	if (utf_mode) {
452 		len = mbtowc(&ch, linebuf + i, curr - i);
453 		if (len == -1 || i + len < curr) {
454 			(void)mbtowc(NULL, NULL, MB_CUR_MAX);
455 			return (0);
456 		}
457 	} else
458 		ch = linebuf[i];
459 
460 	/* This assumes that there is no '\b' in linebuf.  */
461 	while (curr > lmargin && column > lmargin &&
462 	    (!(attr[curr - 1] & (AT_ANSI|AT_BINARY)))) {
463 		curr = i--;
464 		if (utf_mode) {
465 			while (i >= lmargin && IS_UTF8_TRAIL(linebuf[i]))
466 				i--;
467 		}
468 		if (i < lmargin)
469 			prev_ch = L'\0';
470 		else if (utf_mode) {
471 			len = mbtowc(&prev_ch, linebuf + i, curr - i);
472 			if (len == -1 || i + len < curr) {
473 				(void)mbtowc(NULL, NULL, MB_CUR_MAX);
474 				prev_ch = L'\0';
475 			}
476 		} else
477 			prev_ch = linebuf[i];
478 		width = pwidth(ch, attr[curr], prev_ch);
479 		column -= width;
480 		if (width > 0)
481 			return (1);
482 		if (prev_ch == L'\0')
483 			return (0);
484 		ch = prev_ch;
485 	}
486 	return (0);
487 }
488 
489 /*
490  * Is a character the end of an ANSI escape sequence?
491  */
492 static int
493 is_ansi_end(LWCHAR ch)
494 {
495 	if (!is_ascii_char(ch))
496 		return (0);
497 	return (strchr(end_ansi_chars, (char)ch) != NULL);
498 }
499 
500 /*
501  *
502  */
503 int
504 is_ansi_middle(LWCHAR ch)
505 {
506 	if (!is_ascii_char(ch))
507 		return (0);
508 	if (is_ansi_end(ch))
509 		return (0);
510 	return (strchr(mid_ansi_chars, (char)ch) != NULL);
511 }
512 
513 /*
514  * Append a character and attribute to the line buffer.
515  */
516 static int
517 store_char(LWCHAR ch, char a, char *rep, off_t pos)
518 {
519 	int i;
520 	int w;
521 	int replen;
522 	char cs;
523 	int matches;
524 
525 	if (is_hilited(pos, pos+1, 0, &matches)) {
526 		/*
527 		 * This character should be highlighted.
528 		 * Override the attribute passed in.
529 		 */
530 		if (a != AT_ANSI) {
531 			if (highest_hilite != -1 && pos > highest_hilite)
532 				highest_hilite = pos;
533 			a |= AT_HILITE;
534 		}
535 	}
536 
537 	w = -1;
538 	if (ctldisp == OPT_ONPLUS) {
539 		/*
540 		 * Set i to the beginning of an ANSI escape sequence
541 		 * that was begun and not yet ended, or to -1 otherwise.
542 		 */
543 		for (i = curr - 1; i >= 0; i--) {
544 			if (linebuf[i] == ESC)
545 				break;
546 			if (!is_ansi_middle(linebuf[i]))
547 				i = 0;
548 		}
549 		if (i >= 0 && !is_ansi_end(ch) && !is_ansi_middle(ch)) {
550 			/* Remove whole unrecognized sequence.  */
551 			curr = i;
552 			return (0);
553 		}
554 		if (i >= 0 || ch == ESC) {
555 			a = AT_ANSI;  /* Will force re-AT_'ing around it. */
556 			w = 0;
557 		}
558 	}
559 	if (w == -1) {
560 		wchar_t prev_ch;
561 
562 		if (utf_mode) {
563 			for (i = curr - 1; i >= 0; i--)
564 				if (!IS_UTF8_TRAIL(linebuf[i]))
565 					break;
566 			if (i >= 0) {
567 				w = mbtowc(&prev_ch, linebuf + i, curr - i);
568 				if (w == -1 || i + w < curr) {
569 					(void)mbtowc(NULL, NULL, MB_CUR_MAX);
570 					prev_ch = L' ';
571 				}
572 			} else
573 				prev_ch = L' ';
574 		} else
575 			prev_ch = curr > 0 ? linebuf[curr - 1] : L' ';
576 		w = pwidth(ch, a, prev_ch);
577 	}
578 
579 	if (ctldisp != OPT_ON && column + w + attr_ewidth(a) > sc_width)
580 		/*
581 		 * Won't fit on screen.
582 		 */
583 		return (1);
584 
585 	if (rep == NULL) {
586 		cs = (char)ch;
587 		rep = &cs;
588 		replen = 1;
589 	} else {
590 		replen = utf_len(rep[0]);
591 	}
592 	if (curr + replen >= size_linebuf-6) {
593 		/*
594 		 * Won't fit in line buffer.
595 		 * Try to expand it.
596 		 */
597 		if (expand_linebuf())
598 			return (1);
599 	}
600 
601 	while (replen-- > 0) {
602 		linebuf[curr] = *rep++;
603 		attr[curr] = a;
604 		curr++;
605 	}
606 	column += w;
607 	return (0);
608 }
609 
610 /*
611  * Append a tab to the line buffer.
612  * Store spaces to represent the tab.
613  */
614 static int
615 store_tab(int attr, off_t pos)
616 {
617 	int to_tab = column + cshift - lmargin;
618 	int i;
619 
620 	if (ntabstops < 2 || to_tab >= tabstops[ntabstops-1])
621 		to_tab = tabdefault -
622 		    ((to_tab - tabstops[ntabstops-1]) % tabdefault);
623 	else {
624 		for (i = ntabstops - 2; i >= 0; i--)
625 			if (to_tab >= tabstops[i])
626 				break;
627 		to_tab = tabstops[i+1] - to_tab;
628 	}
629 
630 	if (column + to_tab - 1 + pwidth(' ', attr, 0) +
631 	    attr_ewidth(attr) > sc_width)
632 		return (1);
633 
634 	do {
635 		if (store_char(' ', attr, " ", pos))
636 			return (1);
637 	} while (--to_tab > 0);
638 	return (0);
639 }
640 
641 static int
642 store_prchar(char c, off_t pos)
643 {
644 	char *s;
645 
646 	/*
647 	 * Convert to printable representation.
648 	 */
649 	s = prchar(c);
650 
651 	/*
652 	 * Make sure we can get the entire representation
653 	 * of the character on this line.
654 	 */
655 	if (column + (int)strlen(s) - 1 +
656 	    pwidth(' ', binattr, 0) + attr_ewidth(binattr) > sc_width)
657 		return (1);
658 
659 	for (; *s != 0; s++) {
660 		if (store_char(*s, AT_BINARY, NULL, pos))
661 			return (1);
662 	}
663 	return (0);
664 }
665 
666 static int
667 flush_mbc_buf(off_t pos)
668 {
669 	int i;
670 
671 	for (i = 0; i < mbc_buf_index; i++) {
672 		if (store_prchar(mbc_buf[i], pos))
673 			return (mbc_buf_index - i);
674 	}
675 	return (0);
676 }
677 
678 /*
679  * Append a character to the line buffer.
680  * Expand tabs into spaces, handle underlining, boldfacing, etc.
681  * Returns 0 if ok, 1 if couldn't fit in buffer.
682  */
683 int
684 pappend(char c, off_t pos)
685 {
686 	int r;
687 
688 	if (pendc) {
689 		if (do_append(pendc, NULL, pendpos))
690 			/*
691 			 * Oops.  We've probably lost the char which
692 			 * was in pendc, since caller won't back up.
693 			 */
694 			return (1);
695 		pendc = '\0';
696 	}
697 
698 	if (c == '\r' && bs_mode == BS_SPECIAL) {
699 		if (mbc_buf_len > 0)  /* utf_mode must be on. */ {
700 			/* Flush incomplete (truncated) sequence. */
701 			r = flush_mbc_buf(mbc_pos);
702 			mbc_buf_index = r + 1;
703 			mbc_buf_len = 0;
704 			if (r)
705 				return (mbc_buf_index);
706 		}
707 
708 		/*
709 		 * Don't put the CR into the buffer until we see
710 		 * the next char.  If the next char is a newline,
711 		 * discard the CR.
712 		 */
713 		pendc = c;
714 		pendpos = pos;
715 		return (0);
716 	}
717 
718 	if (!utf_mode) {
719 		r = do_append((LWCHAR) c, NULL, pos);
720 	} else {
721 		/* Perform strict validation in all possible cases. */
722 		if (mbc_buf_len == 0) {
723 retry:
724 			mbc_buf_index = 1;
725 			*mbc_buf = c;
726 			if (IS_ASCII_OCTET(c)) {
727 				r = do_append((LWCHAR) c, NULL, pos);
728 			} else if (IS_UTF8_LEAD(c)) {
729 				mbc_buf_len = utf_len(c);
730 				mbc_pos = pos;
731 				return (0);
732 			} else {
733 				/* UTF8_INVALID or stray UTF8_TRAIL */
734 				r = flush_mbc_buf(pos);
735 			}
736 		} else if (IS_UTF8_TRAIL(c)) {
737 			mbc_buf[mbc_buf_index++] = c;
738 			if (mbc_buf_index < mbc_buf_len)
739 				return (0);
740 			if (is_utf8_well_formed(mbc_buf))
741 				r = do_append(get_wchar(mbc_buf), mbc_buf,
742 				    mbc_pos);
743 			else
744 				/* Complete, but not shortest form, sequence. */
745 				mbc_buf_index = r = flush_mbc_buf(mbc_pos);
746 			mbc_buf_len = 0;
747 		} else {
748 			/* Flush incomplete (truncated) sequence.  */
749 			r = flush_mbc_buf(mbc_pos);
750 			mbc_buf_index = r + 1;
751 			mbc_buf_len = 0;
752 			/* Handle new char.  */
753 			if (!r)
754 				goto retry;
755 		}
756 	}
757 
758 	/*
759 	 * If we need to shift the line, do it.
760 	 * But wait until we get to at least the middle of the screen,
761 	 * so shifting it doesn't affect the chars we're currently
762 	 * pappending.  (Bold & underline can get messed up otherwise.)
763 	 */
764 	if (cshift < hshift && column > sc_width / 2) {
765 		linebuf[curr] = '\0';
766 		pshift(hshift - cshift);
767 	}
768 	if (r) {
769 		/* How many chars should caller back up? */
770 		r = (!utf_mode) ? 1 : mbc_buf_index;
771 	}
772 	return (r);
773 }
774 
775 static int
776 do_append(LWCHAR ch, char *rep, off_t pos)
777 {
778 	wchar_t prev_ch;
779 	int a;
780 
781 	a = AT_NORMAL;
782 
783 	if (ch == '\b') {
784 		if (bs_mode == BS_CONTROL)
785 			goto do_control_char;
786 
787 		/*
788 		 * A better test is needed here so we don't
789 		 * backspace over part of the printed
790 		 * representation of a binary character.
791 		 */
792 		if (curr <= lmargin ||
793 		    column <= lmargin ||
794 		    (attr[curr - 1] & (AT_ANSI|AT_BINARY))) {
795 			if (store_prchar('\b', pos))
796 				return (1);
797 		} else if (bs_mode == BS_NORMAL) {
798 			if (store_char(ch, AT_NORMAL, NULL, pos))
799 				return (1);
800 		} else if (bs_mode == BS_SPECIAL) {
801 			overstrike = backc();
802 		}
803 
804 		return (0);
805 	}
806 
807 	if (overstrike > 0) {
808 		/*
809 		 * Overstrike the character at the current position
810 		 * in the line buffer.  This will cause either
811 		 * underline (if a "_" is overstruck),
812 		 * bold (if an identical character is overstruck),
813 		 * or just deletion of the character in the buffer.
814 		 */
815 		overstrike = utf_mode ? -1 : 0;
816 		/* To be correct, this must be a base character.  */
817 		if (mbtowc(&prev_ch, linebuf + curr, MB_CUR_MAX) == -1) {
818 			(void)mbtowc(NULL, NULL, MB_CUR_MAX);
819 			prev_ch = L'\0';
820 		}
821 		a = attr[curr];
822 		if (ch == prev_ch) {
823 			/*
824 			 * Overstriking a char with itself means make it bold.
825 			 * But overstriking an underscore with itself is
826 			 * ambiguous.  It could mean make it bold, or
827 			 * it could mean make it underlined.
828 			 * Use the previous overstrike to resolve it.
829 			 */
830 			if (ch == '_') {
831 				if ((a & (AT_BOLD|AT_UNDERLINE)) != AT_NORMAL)
832 					a |= (AT_BOLD|AT_UNDERLINE);
833 				else if (curr > 0 && attr[curr - 1] & AT_UNDERLINE)
834 					a |= AT_UNDERLINE;
835 				else if (curr > 0 && attr[curr - 1] & AT_BOLD)
836 					a |= AT_BOLD;
837 				else
838 					a |= AT_INDET;
839 			} else {
840 				a |= AT_BOLD;
841 			}
842 		} else if (ch == '_' && prev_ch != L'\0') {
843 			a |= AT_UNDERLINE;
844 			ch = prev_ch;
845 			rep = linebuf + curr;
846 		} else if (prev_ch == '_') {
847 			a |= AT_UNDERLINE;
848 		}
849 		/* Else we replace prev_ch, but we keep its attributes.  */
850 	} else if (overstrike < 0) {
851 		if (wcwidth(ch) == 0) {
852 			/* Continuation of the same overstrike.  */
853 			if (curr > 0)
854 				a = attr[curr - 1] & (AT_UNDERLINE | AT_BOLD);
855 			else
856 				a = AT_NORMAL;
857 		} else
858 			overstrike = 0;
859 	}
860 
861 	if (ch == '\t') {
862 		/*
863 		 * Expand a tab into spaces.
864 		 */
865 		switch (bs_mode) {
866 		case BS_CONTROL:
867 			goto do_control_char;
868 		case BS_NORMAL:
869 		case BS_SPECIAL:
870 			if (store_tab(a, pos))
871 				return (1);
872 			break;
873 		}
874 	} else if ((!utf_mode || is_ascii_char(ch)) &&
875 	    !isprint((unsigned char)ch)) {
876 do_control_char:
877 		if (ctldisp == OPT_ON ||
878 		    (ctldisp == OPT_ONPLUS && ch == ESC)) {
879 			/*
880 			 * Output as a normal character.
881 			 */
882 			if (store_char(ch, AT_NORMAL, rep, pos))
883 				return (1);
884 		} else {
885 			if (store_prchar(ch, pos))
886 				return (1);
887 		}
888 	} else if (utf_mode && ctldisp != OPT_ON && !iswprint(ch)) {
889 		char *s;
890 
891 		s = prutfchar(ch);
892 
893 		if (column + (int)strlen(s) - 1 +
894 		    pwidth(' ', binattr, 0) + attr_ewidth(binattr) > sc_width)
895 			return (1);
896 
897 		for (; *s != 0; s++) {
898 			if (store_char(*s, AT_BINARY, NULL, pos))
899 				return (1);
900 		}
901 	} else {
902 		if (store_char(ch, a, rep, pos))
903 			return (1);
904 	}
905 	return (0);
906 }
907 
908 /*
909  *
910  */
911 int
912 pflushmbc(void)
913 {
914 	int r = 0;
915 
916 	if (mbc_buf_len > 0) {
917 		/* Flush incomplete (truncated) sequence.  */
918 		r = flush_mbc_buf(mbc_pos);
919 		mbc_buf_len = 0;
920 	}
921 	return (r);
922 }
923 
924 /*
925  * Terminate the line in the line buffer.
926  */
927 void
928 pdone(int endline, int forw)
929 {
930 	int i;
931 
932 	(void) pflushmbc();
933 
934 	if (pendc && (pendc != '\r' || !endline))
935 		/*
936 		 * If we had a pending character, put it in the buffer.
937 		 * But discard a pending CR if we are at end of line
938 		 * (that is, discard the CR in a CR/LF sequence).
939 		 */
940 		(void) do_append(pendc, NULL, pendpos);
941 
942 	for (i = curr - 1; i >= 0; i--) {
943 		if (attr[i] & AT_INDET) {
944 			attr[i] &= ~AT_INDET;
945 			if (i < curr - 1 && attr[i + 1] & AT_BOLD)
946 				attr[i] |= AT_BOLD;
947 			else
948 				attr[i] |= AT_UNDERLINE;
949 		}
950 	}
951 
952 	/*
953 	 * Make sure we've shifted the line, if we need to.
954 	 */
955 	if (cshift < hshift)
956 		pshift(hshift - cshift);
957 
958 	if (ctldisp == OPT_ONPLUS && is_ansi_end('m')) {
959 		/* Switch to normal attribute at end of line. */
960 		char *p = "\033[m";
961 		for (; *p != '\0'; p++) {
962 			linebuf[curr] = *p;
963 			attr[curr++] = AT_ANSI;
964 		}
965 	}
966 
967 	/*
968 	 * Add a newline if necessary,
969 	 * and append a '\0' to the end of the line.
970 	 * We output a newline if we're not at the right edge of the screen,
971 	 * or if the terminal doesn't auto wrap,
972 	 * or if this is really the end of the line AND the terminal ignores
973 	 * a newline at the right edge.
974 	 * (In the last case we don't want to output a newline if the terminal
975 	 * doesn't ignore it since that would produce an extra blank line.
976 	 * But we do want to output a newline if the terminal ignores it in case
977 	 * the next line is blank.  In that case the single newline output for
978 	 * that blank line would be ignored!)
979 	 */
980 	if (column < sc_width || !auto_wrap || (endline && ignaw) ||
981 	    ctldisp == OPT_ON) {
982 		linebuf[curr] = '\n';
983 		attr[curr] = AT_NORMAL;
984 		curr++;
985 	} else if (ignaw && column >= sc_width && forw) {
986 		/*
987 		 * Terminals with "ignaw" don't wrap until they *really* need
988 		 * to, i.e. when the character *after* the last one to fit on a
989 		 * line is output. But they are too hard to deal with when they
990 		 * get in the state where a full screen width of characters
991 		 * have been output but the cursor is sitting on the right edge
992 		 * instead of at the start of the next line.
993 		 * So we nudge them into wrapping by outputting a space
994 		 * character plus a backspace.  But do this only if moving
995 		 * forward; if we're moving backward and drawing this line at
996 		 * the top of the screen, the space would overwrite the first
997 		 * char on the next line.  We don't need to do this "nudge"
998 		 * at the top of the screen anyway.
999 		 */
1000 		linebuf[curr] = ' ';
1001 		attr[curr++] = AT_NORMAL;
1002 		linebuf[curr] = '\b';
1003 		attr[curr++] = AT_NORMAL;
1004 	}
1005 	linebuf[curr] = '\0';
1006 	attr[curr] = AT_NORMAL;
1007 }
1008 
1009 /*
1010  *
1011  */
1012 void
1013 set_status_col(char c)
1014 {
1015 	linebuf[0] = c;
1016 	attr[0] = AT_NORMAL|AT_HILITE;
1017 }
1018 
1019 /*
1020  * Get a character from the current line.
1021  * Return the character as the function return value,
1022  * and the character attribute in *ap.
1023  */
1024 int
1025 gline(int i, int *ap)
1026 {
1027 	if (is_null_line) {
1028 		/*
1029 		 * If there is no current line, we pretend the line is
1030 		 * either "~" or "", depending on the "twiddle" flag.
1031 		 */
1032 		if (twiddle) {
1033 			if (i == 0) {
1034 				*ap = AT_BOLD;
1035 				return ('~');
1036 			}
1037 			--i;
1038 		}
1039 		/* Make sure we're back to AT_NORMAL before the '\n'.  */
1040 		*ap = AT_NORMAL;
1041 		return (i ? '\0' : '\n');
1042 	}
1043 
1044 	*ap = attr[i];
1045 	return (linebuf[i] & 0xFF);
1046 }
1047 
1048 /*
1049  * Indicate that there is no current line.
1050  */
1051 void
1052 null_line(void)
1053 {
1054 	is_null_line = 1;
1055 	cshift = 0;
1056 }
1057 
1058 /*
1059  * Analogous to forw_line(), but deals with "raw lines":
1060  * lines which are not split for screen width.
1061  * {{ This is supposed to be more efficient than forw_line(). }}
1062  */
1063 off_t
1064 forw_raw_line(off_t curr_pos, char **linep, int *line_lenp)
1065 {
1066 	int n;
1067 	int c;
1068 	off_t new_pos;
1069 
1070 	if (curr_pos == -1 || ch_seek(curr_pos) ||
1071 	    (c = ch_forw_get()) == EOI)
1072 		return (-1);
1073 
1074 	n = 0;
1075 	for (;;) {
1076 		if (c == '\n' || c == EOI || ABORT_SIGS()) {
1077 			new_pos = ch_tell();
1078 			break;
1079 		}
1080 		if (n >= size_linebuf-1) {
1081 			if (expand_linebuf()) {
1082 				/*
1083 				 * Overflowed the input buffer.
1084 				 * Pretend the line ended here.
1085 				 */
1086 				new_pos = ch_tell() - 1;
1087 				break;
1088 			}
1089 		}
1090 		linebuf[n++] = (char)c;
1091 		c = ch_forw_get();
1092 	}
1093 	linebuf[n] = '\0';
1094 	if (linep != NULL)
1095 		*linep = linebuf;
1096 	if (line_lenp != NULL)
1097 		*line_lenp = n;
1098 	return (new_pos);
1099 }
1100 
1101 /*
1102  * Analogous to back_line(), but deals with "raw lines".
1103  * {{ This is supposed to be more efficient than back_line(). }}
1104  */
1105 off_t
1106 back_raw_line(off_t curr_pos, char **linep, int *line_lenp)
1107 {
1108 	int n;
1109 	int c;
1110 	off_t new_pos;
1111 
1112 	if (curr_pos == -1 || curr_pos <= ch_zero() || ch_seek(curr_pos - 1))
1113 		return (-1);
1114 
1115 	n = size_linebuf;
1116 	linebuf[--n] = '\0';
1117 	for (;;) {
1118 		c = ch_back_get();
1119 		if (c == '\n' || ABORT_SIGS()) {
1120 			/*
1121 			 * This is the newline ending the previous line.
1122 			 * We have hit the beginning of the line.
1123 			 */
1124 			new_pos = ch_tell() + 1;
1125 			break;
1126 		}
1127 		if (c == EOI) {
1128 			/*
1129 			 * We have hit the beginning of the file.
1130 			 * This must be the first line in the file.
1131 			 * This must, of course, be the beginning of the line.
1132 			 */
1133 			new_pos = ch_zero();
1134 			break;
1135 		}
1136 		if (n <= 0) {
1137 			int old_size_linebuf = size_linebuf;
1138 			if (expand_linebuf()) {
1139 				/*
1140 				 * Overflowed the input buffer.
1141 				 * Pretend the line ended here.
1142 				 */
1143 				new_pos = ch_tell() + 1;
1144 				break;
1145 			}
1146 			/*
1147 			 * Shift the data to the end of the new linebuf.
1148 			 */
1149 			n = size_linebuf - old_size_linebuf;
1150 			memmove(linebuf + n, linebuf, old_size_linebuf);
1151 		}
1152 		linebuf[--n] = c;
1153 	}
1154 	if (linep != NULL)
1155 		*linep = &linebuf[n];
1156 	if (line_lenp != NULL)
1157 		*line_lenp = size_linebuf - 1 - n;
1158 	return (new_pos);
1159 }
1160