xref: /netbsd-src/distrib/utils/more/line.c (revision 326b2259b73e878289ebd80cd9d20bc5aee35e99)
1 /*	$NetBSD: line.c,v 1.4 2003/08/07 09:27:59 agc Exp $	*/
2 
3 /*
4  * Copyright (c) 1988, 1993
5  *	The Regents of the University of California.  All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  * 3. Neither the name of the University nor the names of its contributors
16  *    may be used to endorse or promote products derived from this software
17  *    without specific prior written permission.
18  *
19  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
20  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
23  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
25  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
26  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
28  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
29  * SUCH DAMAGE.
30  */
31 
32 /*
33  * Copyright (c) 1988 Mark Nudleman
34  *
35  * Redistribution and use in source and binary forms, with or without
36  * modification, are permitted provided that the following conditions
37  * are met:
38  * 1. Redistributions of source code must retain the above copyright
39  *    notice, this list of conditions and the following disclaimer.
40  * 2. Redistributions in binary form must reproduce the above copyright
41  *    notice, this list of conditions and the following disclaimer in the
42  *    documentation and/or other materials provided with the distribution.
43  * 3. All advertising materials mentioning features or use of this software
44  *    must display the following acknowledgement:
45  *	This product includes software developed by the University of
46  *	California, Berkeley and its contributors.
47  * 4. Neither the name of the University nor the names of its contributors
48  *    may be used to endorse or promote products derived from this software
49  *    without specific prior written permission.
50  *
51  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
52  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
53  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
54  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
55  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
56  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
57  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
58  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
59  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
60  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
61  * SUCH DAMAGE.
62  */
63 
64 #include <sys/cdefs.h>
65 #ifndef lint
66 #if 0
67 static char sccsid[] = "@(#)line.c	8.1 (Berkeley) 6/6/93";
68 #else
69 __RCSID("$NetBSD: line.c,v 1.4 2003/08/07 09:27:59 agc Exp $");
70 #endif
71 #endif /* not lint */
72 
73 /*
74  * Routines to manipulate the "line buffer".
75  * The line buffer holds a line of output as it is being built
76  * in preparation for output to the screen.
77  * We keep track of the PRINTABLE length of the line as it is being built.
78  */
79 
80 #include <sys/types.h>
81 #include <ctype.h>
82 
83 #include "less.h"
84 #include "extern.h"
85 
86 static char linebuf[1024];	/* Buffer which holds the current output line */
87 static char *curr;		/* Pointer into linebuf */
88 static int column;		/* Printable length, accounting for
89 				   backspaces, etc. */
90 /*
91  * A ridiculously complex state machine takes care of backspaces.  The
92  * complexity arises from the attempt to deal with all cases, especially
93  * involving long lines with underlining, boldfacing or whatever.  There
94  * are still some cases which will break it.
95  *
96  * There are four states:
97  *	LN_NORMAL is the normal state (not in underline mode).
98  *	LN_UNDERLINE means we are in underline mode.  We expect to get
99  *		either a sequence like "_\bX" or "X\b_" to continue
100  *		underline mode, or anything else to end underline mode.
101  *	LN_BOLDFACE means we are in boldface mode.  We expect to get sequences
102  *		like "X\bX\b...X\bX" to continue boldface mode, or anything
103  *		else to end boldface mode.
104  *	LN_UL_X means we are one character after LN_UNDERLINE
105  *		(we have gotten the '_' in "_\bX" or the 'X' in "X\b_").
106  *	LN_UL_XB means we are one character after LN_UL_X
107  *		(we have gotten the backspace in "_\bX" or "X\b_";
108  *		we expect one more ordinary character,
109  *		which will put us back in state LN_UNDERLINE).
110  *	LN_BO_X means we are one character after LN_BOLDFACE
111  *		(we have gotten the 'X' in "X\bX").
112  *	LN_BO_XB means we are one character after LN_BO_X
113  *		(we have gotten the backspace in "X\bX";
114  *		we expect one more 'X' which will put us back
115  *		in LN_BOLDFACE).
116  */
117 static int ln_state;		/* Currently in normal/underline/bold/etc mode? */
118 #define	LN_NORMAL	0	/* Not in underline, boldface or whatever mode */
119 #define	LN_UNDERLINE	1	/* In underline, need next char */
120 #define	LN_UL_X		2	/* In underline, got char, need \b */
121 #define	LN_UL_XB	3	/* In underline, got char & \b, need one more */
122 #define	LN_BOLDFACE	4	/* In boldface, need next char */
123 #define	LN_BO_X		5	/* In boldface, got char, need \b */
124 #define	LN_BO_XB	6	/* In boldface, got char & \b, need same char */
125 
126 char *line;			/* Pointer to the current line.
127 				   Usually points to linebuf. */
128 /*
129  * Rewind the line buffer.
130  */
131 void
132 prewind()
133 {
134 	line = curr = linebuf;
135 	ln_state = LN_NORMAL;
136 	column = 0;
137 }
138 
139 /*
140  * Append a character to the line buffer.
141  * Expand tabs into spaces, handle underlining, boldfacing, etc.
142  * Returns 0 if ok, 1 if couldn't fit in buffer.
143  */
144 #define	NEW_COLUMN(addon) \
145 	if (column + addon + (ln_state ? ue_width : 0) > sc_width) \
146 		return(1); \
147 	else \
148 		column += addon
149 
150 int
151 pappend(c)
152 	int c;
153 {
154 	if (c == '\0') {
155 		/*
156 		 * Terminate any special modes, if necessary.
157 		 * Append a '\0' to the end of the line.
158 		 */
159 		switch (ln_state) {
160 		case LN_UL_X:
161 			curr[0] = curr[-1];
162 			curr[-1] = UE_CHAR;
163 			curr++;
164 			break;
165 		case LN_BO_X:
166 			curr[0] = curr[-1];
167 			curr[-1] = BE_CHAR;
168 			curr++;
169 			break;
170 		case LN_UL_XB:
171 		case LN_UNDERLINE:
172 			*curr++ = UE_CHAR;
173 			break;
174 		case LN_BO_XB:
175 		case LN_BOLDFACE:
176 			*curr++ = BE_CHAR;
177 			break;
178 		}
179 		ln_state = LN_NORMAL;
180 		*curr = '\0';
181 		return(0);
182 	}
183 
184 	if (curr > linebuf + sizeof(linebuf) - 12)
185 		/*
186 		 * Almost out of room in the line buffer.
187 		 * Don't take any chances.
188 		 * {{ Linebuf is supposed to be big enough that this
189 		 *    will never happen, but may need to be made
190 		 *    bigger for wide screens or lots of backspaces. }}
191 		 */
192 		return(1);
193 
194 	if (!bs_mode) {
195 		/*
196 		 * Advance the state machine.
197 		 */
198 		switch (ln_state) {
199 		case LN_NORMAL:
200 			if (curr <= linebuf + 1
201 			    || curr[-1] != (char)('H' | 0200))
202 				break;
203 			column -= 2;
204 			if (c == curr[-2])
205 				goto enter_boldface;
206 			if (c == '_' || curr[-2] == '_')
207 				goto enter_underline;
208 			curr -= 2;
209 			break;
210 
211 enter_boldface:
212 			/*
213 			 * We have "X\bX" (including the current char).
214 			 * Switch into boldface mode.
215 			 */
216 			column--;
217 			if (column + bo_width + be_width + 1 >= sc_width)
218 				/*
219 				 * Not enough room left on the screen to
220 				 * enter and exit boldface mode.
221 				 */
222 				return (1);
223 
224 			if (bo_width > 0 && curr > linebuf + 2
225 			    && curr[-3] == ' ') {
226 				/*
227 				 * Special case for magic cookie terminals:
228 				 * if the previous char was a space, replace
229 				 * it with the "enter boldface" sequence.
230 				 */
231 				curr[-3] = BO_CHAR;
232 				column += bo_width-1;
233 			} else {
234 				curr[-1] = curr[-2];
235 				curr[-2] = BO_CHAR;
236 				column += bo_width;
237 				curr++;
238 			}
239 			goto ln_bo_xb_case;
240 
241 enter_underline:
242 			/*
243 			 * We have either "_\bX" or "X\b_" (including
244 			 * the current char).  Switch into underline mode.
245 			 */
246 			column--;
247 			if (column + ul_width + ue_width + 1 >= sc_width)
248 				/*
249 				 * Not enough room left on the screen to
250 				 * enter and exit underline mode.
251 				 */
252 				return (1);
253 
254 			if (ul_width > 0 &&
255 			    curr > linebuf + 2 && curr[-3] == ' ')
256 			{
257 				/*
258 				 * Special case for magic cookie terminals:
259 				 * if the previous char was a space, replace
260 				 * it with the "enter underline" sequence.
261 				 */
262 				curr[-3] = UL_CHAR;
263 				column += ul_width-1;
264 			} else
265 			{
266 				curr[-1] = curr[-2];
267 				curr[-2] = UL_CHAR;
268 				column += ul_width;
269 				curr++;
270 			}
271 			goto ln_ul_xb_case;
272 			/*NOTREACHED*/
273 		case LN_UL_XB:
274 			/*
275 			 * Termination of a sequence "_\bX" or "X\b_".
276 			 */
277 			if (c != '_' && curr[-2] != '_' && c == curr[-2])
278 			{
279 				/*
280 				 * We seem to have run on from underlining
281 				 * into boldfacing - this is a nasty fix, but
282 				 * until this whole routine is rewritten as a
283 				 * real DFA, ...  well ...
284 				 */
285 				curr[0] = curr[-2];
286 				curr[-2] = UE_CHAR;
287 				curr[-1] = BO_CHAR;
288 				curr += 2; /* char & non-existent backspace */
289 				ln_state = LN_BO_XB;
290 				goto ln_bo_xb_case;
291 			}
292 ln_ul_xb_case:
293 			if (c == '_')
294 				c = curr[-2];
295 			curr -= 2;
296 			ln_state = LN_UNDERLINE;
297 			break;
298 		case LN_BO_XB:
299 			/*
300 			 * Termination of a sequnce "X\bX".
301 			 */
302 			if (c != curr[-2] && (c == '_' || curr[-2] == '_'))
303 			{
304 				/*
305 				 * We seem to have run on from
306 				 * boldfacing into underlining.
307 				 */
308 				curr[0] = curr[-2];
309 				curr[-2] = BE_CHAR;
310 				curr[-1] = UL_CHAR;
311 				curr += 2; /* char & non-existent backspace */
312 				ln_state = LN_UL_XB;
313 				goto ln_ul_xb_case;
314 			}
315 ln_bo_xb_case:
316 			curr -= 2;
317 			ln_state = LN_BOLDFACE;
318 			break;
319 		case LN_UNDERLINE:
320 			if (column + ue_width + bo_width + 1 + be_width >= sc_width)
321 				/*
322 				 * We have just barely enough room to
323 				 * exit underline mode and handle a possible
324 				 * underline/boldface run on mixup.
325 				 */
326 				return (1);
327 			ln_state = LN_UL_X;
328 			break;
329 		case LN_BOLDFACE:
330 			if (c == '\b')
331 			{
332 				ln_state = LN_BO_XB;
333 				break;
334 			}
335 			if (column + be_width + ul_width + 1 + ue_width >= sc_width)
336 				/*
337 				 * We have just barely enough room to
338 				 * exit underline mode and handle a possible
339 				 * underline/boldface run on mixup.
340 				 */
341 				return (1);
342 			ln_state = LN_BO_X;
343 			break;
344 		case LN_UL_X:
345 			if (c == '\b')
346 				ln_state = LN_UL_XB;
347 			else
348 			{
349 				/*
350 				 * Exit underline mode.
351 				 * We have to shuffle the chars a bit
352 				 * to make this work.
353 				 */
354 				curr[0] = curr[-1];
355 				curr[-1] = UE_CHAR;
356 				column += ue_width;
357 				if (ue_width > 0 && curr[0] == ' ')
358 					/*
359 					 * Another special case for magic
360 					 * cookie terminals: if the next
361 					 * char is a space, replace it
362 					 * with the "exit underline" sequence.
363 					 */
364 					column--;
365 				else
366 					curr++;
367 				ln_state = LN_NORMAL;
368 			}
369 			break;
370 		case LN_BO_X:
371 			if (c == '\b')
372 				ln_state = LN_BO_XB;
373 			else
374 			{
375 				/*
376 				 * Exit boldface mode.
377 				 * We have to shuffle the chars a bit
378 				 * to make this work.
379 				 */
380 				curr[0] = curr[-1];
381 				curr[-1] = BE_CHAR;
382 				column += be_width;
383 				if (be_width > 0 && curr[0] == ' ')
384 					/*
385 					 * Another special case for magic
386 					 * cookie terminals: if the next
387 					 * char is a space, replace it
388 					 * with the "exit boldface" sequence.
389 					 */
390 					column--;
391 				else
392 					curr++;
393 				ln_state = LN_NORMAL;
394 			}
395 			break;
396 		}
397 	}
398 
399 	if (c == '\t') {
400 		/*
401 		 * Expand a tab into spaces.
402 		 */
403 		do {
404 			NEW_COLUMN(1);
405 		} while ((column % tabstop) != 0);
406 		*curr++ = '\t';
407 		return (0);
408 	}
409 
410 	if (c == '\b') {
411 		if (ln_state == LN_NORMAL)
412 			NEW_COLUMN(2);
413 		else
414 			column--;
415 		*curr++ = ('H' | 0200);
416 		return(0);
417 	}
418 
419 	if (CONTROL_CHAR(c)) {
420 		/*
421 		 * Put a "^X" into the buffer.  The 0200 bit is used to tell
422 		 * put_line() to prefix the char with a ^.  We don't actually
423 		 * put the ^ in the buffer because we sometimes need to move
424 		 * chars around, and such movement might separate the ^ from
425 		 * its following character.
426 		 */
427 		NEW_COLUMN(2);
428 		*curr++ = (CARAT_CHAR(c) | 0200);
429 		return(0);
430 	}
431 
432 	/*
433 	 * Ordinary character.  Just put it in the buffer.
434 	 */
435 	NEW_COLUMN(1);
436 	*curr++ = c;
437 	return (0);
438 }
439 
440 /*
441  * Analogous to forw_line(), but deals with "raw lines":
442  * lines which are not split for screen width.
443  * {{ This is supposed to be more efficient than forw_line(). }}
444  */
445 off_t
446 forw_raw_line(curr_pos)
447 	off_t curr_pos;
448 {
449 	char *p;
450 	int c;
451 	off_t new_pos;
452 
453 	if (curr_pos == NULL_POSITION || ch_seek(curr_pos) ||
454 		(c = ch_forw_get()) == EOI)
455 		return (NULL_POSITION);
456 
457 	p = linebuf;
458 
459 	for (;;)
460 	{
461 		if (c == '\n' || c == EOI)
462 		{
463 			new_pos = ch_tell();
464 			break;
465 		}
466 		if (p >= &linebuf[sizeof(linebuf)-1])
467 		{
468 			/*
469 			 * Overflowed the input buffer.
470 			 * Pretend the line ended here.
471 			 * {{ The line buffer is supposed to be big
472 			 *    enough that this never happens. }}
473 			 */
474 			new_pos = ch_tell() - 1;
475 			break;
476 		}
477 		*p++ = c;
478 		c = ch_forw_get();
479 	}
480 	*p = '\0';
481 	line = linebuf;
482 	return (new_pos);
483 }
484 
485 /*
486  * Analogous to back_line(), but deals with "raw lines".
487  * {{ This is supposed to be more efficient than back_line(). }}
488  */
489 off_t
490 back_raw_line(curr_pos)
491 	off_t curr_pos;
492 {
493 	char *p;
494 	int c;
495 	off_t new_pos;
496 
497 	if (curr_pos == NULL_POSITION || curr_pos <= (off_t)0 ||
498 		ch_seek(curr_pos-1))
499 		return (NULL_POSITION);
500 
501 	p = &linebuf[sizeof(linebuf)];
502 	*--p = '\0';
503 
504 	for (;;)
505 	{
506 		c = ch_back_get();
507 		if (c == '\n')
508 		{
509 			/*
510 			 * This is the newline ending the previous line.
511 			 * We have hit the beginning of the line.
512 			 */
513 			new_pos = ch_tell() + 1;
514 			break;
515 		}
516 		if (c == EOI)
517 		{
518 			/*
519 			 * We have hit the beginning of the file.
520 			 * This must be the first line in the file.
521 			 * This must, of course, be the beginning of the line.
522 			 */
523 			new_pos = (off_t)0;
524 			break;
525 		}
526 		if (p <= linebuf)
527 		{
528 			/*
529 			 * Overflowed the input buffer.
530 			 * Pretend the line ended here.
531 			 */
532 			new_pos = ch_tell() + 1;
533 			break;
534 		}
535 		*--p = c;
536 	}
537 	line = p;
538 	return (new_pos);
539 }
540