xref: /openbsd-src/usr.bin/less/ch.c (revision b2ea75c1b17e1a9a339660e7ed45cd24946b230e)
1 /*	$OpenBSD: ch.c,v 1.2 2001/01/29 01:58:00 niklas Exp $	*/
2 
3 /*
4  * Copyright (c) 1984,1985,1989,1994,1995  Mark Nudelman
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice in the documentation and/or other materials provided with
14  *    the distribution.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY
17  * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
19  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR BE LIABLE
20  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
21  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT
22  * OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
23  * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
24  * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
25  * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN
26  * IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27  */
28 
29 
30 /*
31  * Low level character input from the input file.
32  * We use these special purpose routines which optimize moving
33  * both forward and backward from the current read pointer.
34  */
35 
36 #include "less.h"
37 
38 public int ignore_eoi;
39 
40 /*
41  * Pool of buffers holding the most recently used blocks of the input file.
42  * The buffer pool is kept as a doubly-linked circular list,
43  * in order from most- to least-recently used.
44  * The circular list is anchored by the file state "thisfile".
45  */
46 #define LBUFSIZE	1024
47 struct buf {
48 	struct buf *next, *prev;  /* Must be first to match struct filestate */
49 	long block;
50 	unsigned int datasize;
51 	unsigned char data[LBUFSIZE];
52 };
53 
54 /*
55  * The file state is maintained in a filestate structure.
56  * A pointer to the filestate is kept in the ifile structure.
57  */
58 struct filestate {
59 	/* -- Following members must match struct buf */
60 	struct buf *buf_next, *buf_prev;
61 	long buf_block;
62 	/* -- End of struct buf copy */
63 	int file;
64 	int flags;
65 	POSITION fpos;
66 	int nbufs;
67 	long block;
68 	int offset;
69 	POSITION fsize;
70 };
71 
72 
73 #define	END_OF_CHAIN	((struct buf *)thisfile)
74 #define	ch_bufhead	thisfile->buf_next
75 #define	ch_buftail	thisfile->buf_prev
76 #define	ch_nbufs	thisfile->nbufs
77 #define	ch_block	thisfile->block
78 #define	ch_offset	thisfile->offset
79 #define	ch_fpos		thisfile->fpos
80 #define	ch_fsize	thisfile->fsize
81 #define	ch_flags	thisfile->flags
82 #define	ch_file		thisfile->file
83 
84 static struct filestate *thisfile;
85 static int ch_ungotchar = -1;
86 
87 extern int autobuf;
88 extern int sigs;
89 extern int cbufs;
90 extern IFILE curr_ifile;
91 #if LOGFILE
92 extern int logfile;
93 extern char *namelogfile;
94 #endif
95 
96 static int ch_addbuf();
97 
98 
99 /*
100  * Get the character pointed to by the read pointer.
101  * ch_get() is a macro which is more efficient to call
102  * than fch_get (the function), in the usual case
103  * that the block desired is at the head of the chain.
104  */
105 #define	ch_get()   ((ch_block == ch_bufhead->block && \
106 		     ch_offset < ch_bufhead->datasize) ? \
107 			ch_bufhead->data[ch_offset] : fch_get())
108 	int
109 fch_get()
110 {
111 	register struct buf *bp;
112 	register int n;
113 	register int slept;
114 	POSITION pos;
115 	POSITION len;
116 
117 	slept = FALSE;
118 
119 	/*
120 	 * Look for a buffer holding the desired block.
121 	 */
122 	for (bp = ch_bufhead;  bp != END_OF_CHAIN;  bp = bp->next)
123 		if (bp->block == ch_block)
124 		{
125 			if (ch_offset >= bp->datasize)
126 				/*
127 				 * Need more data in this buffer.
128 				 */
129 				goto read_more;
130 			goto found;
131 		}
132 	/*
133 	 * Block is not in a buffer.
134 	 * Take the least recently used buffer
135 	 * and read the desired block into it.
136 	 * If the LRU buffer has data in it,
137 	 * then maybe allocate a new buffer.
138 	 */
139 	if (ch_buftail == END_OF_CHAIN || ch_buftail->block != (long)(-1))
140 	{
141 		/*
142 		 * There is no empty buffer to use.
143 		 * Allocate a new buffer if:
144 		 * 1. We can't seek on this file and -b is not in effect; or
145 		 * 2. We haven't allocated the max buffers for this file yet.
146 		 */
147 		if ((autobuf && !(ch_flags & CH_CANSEEK)) ||
148 		    (cbufs == -1 || ch_nbufs < cbufs))
149 			if (ch_addbuf())
150 				/*
151 				 * Allocation failed: turn off autobuf.
152 				 */
153 				autobuf = OPT_OFF;
154 	}
155 	bp = ch_buftail;
156 	bp->block = ch_block;
157 	bp->datasize = 0;
158 
159     read_more:
160 	pos = (ch_block * LBUFSIZE) + bp->datasize;
161 	if ((len = ch_length()) != NULL_POSITION && pos >= len)
162 		/*
163 		 * At end of file.
164 		 */
165 		return (EOI);
166 
167 	if (pos != ch_fpos)
168 	{
169 		/*
170 		 * Not at the correct position: must seek.
171 		 * If input is a pipe, we're in trouble (can't seek on a pipe).
172 		 * Some data has been lost: just return "?".
173 		 */
174 		if (!(ch_flags & CH_CANSEEK))
175 			return ('?');
176 		if (lseek(ch_file, (off_t)pos, 0) == BAD_LSEEK)
177 		{
178  			error("seek error", NULL_PARG);
179 			clear_eol();
180 			return (EOI);
181  		}
182  		ch_fpos = pos;
183  	}
184 
185 	/*
186 	 * Read the block.
187 	 * If we read less than a full block, that's ok.
188 	 * We use partial block and pick up the rest next time.
189 	 */
190 	if (ch_ungotchar == -1)
191 	{
192 		n = iread(ch_file, &bp->data[bp->datasize],
193 			(unsigned int)(LBUFSIZE - bp->datasize));
194 	} else
195 	{
196 		bp->data[bp->datasize] = ch_ungotchar;
197 		n = 1;
198 		ch_ungotchar = -1;
199 	}
200 
201 	if (n == READ_INTR)
202 		return (EOI);
203 	if (n < 0)
204 	{
205 		error("read error", NULL_PARG);
206 		clear_eol();
207 		n = 0;
208 	}
209 
210 #if LOGFILE
211 	/*
212 	 * If we have a log file, write the new data to it.
213 	 */
214 	if (logfile >= 0 && n > 0)
215 		write(logfile, (char *) &bp->data[bp->datasize], n);
216 #endif
217 
218 	ch_fpos += n;
219 	bp->datasize += n;
220 
221 	/*
222 	 * If we have read to end of file, set ch_fsize to indicate
223 	 * the position of the end of file.
224 	 */
225 	if (n == 0)
226 	{
227 		ch_fsize = pos;
228 		if (ignore_eoi)
229 		{
230 			/*
231 			 * We are ignoring EOF.
232 			 * Wait a while, then try again.
233 			 */
234 			if (!slept)
235 				ierror("Waiting for data", NULL_PARG);
236 #if !MSOFTC
237 	 		sleep(1);
238 #endif
239 			slept = TRUE;
240 		}
241 		if (ABORT_SIGS())
242 			return (EOI);
243 	}
244 
245     found:
246 	if (ch_bufhead != bp)
247 	{
248 		/*
249 		 * Move the buffer to the head of the buffer chain.
250 		 * This orders the buffer chain, most- to least-recently used.
251 		 */
252 		bp->next->prev = bp->prev;
253 		bp->prev->next = bp->next;
254 
255 		bp->next = ch_bufhead;
256 		bp->prev = END_OF_CHAIN;
257 		ch_bufhead->prev = bp;
258 		ch_bufhead = bp;
259 	}
260 
261 	if (ch_offset >= bp->datasize)
262 		/*
263 		 * After all that, we still don't have enough data.
264 		 * Go back and try again.
265 		 */
266 		goto read_more;
267 
268 	return (bp->data[ch_offset]);
269 }
270 
271 /*
272  * ch_ungetchar is a rather kludgy and limited way to push
273  * a single char onto an input file descriptor.
274  */
275 	public void
276 ch_ungetchar(c)
277 	int c;
278 {
279 	if (c != -1 && ch_ungotchar != -1)
280 		error("ch_ungetchar overrun", NULL_PARG);
281 	ch_ungotchar = c;
282 }
283 
284 #if LOGFILE
285 /*
286  * Close the logfile.
287  * If we haven't read all of standard input into it, do that now.
288  */
289 	public void
290 end_logfile()
291 {
292 	static int tried = FALSE;
293 
294 	if (logfile < 0)
295 		return;
296 	if (!tried && ch_fsize == NULL_POSITION)
297 	{
298 		tried = TRUE;
299 		ierror("Finishing logfile", NULL_PARG);
300 		while (ch_forw_get() != EOI)
301 			if (ABORT_SIGS())
302 				break;
303 	}
304 	close(logfile);
305 	logfile = -1;
306 	namelogfile = NULL;
307 }
308 
309 /*
310  * Start a log file AFTER less has already been running.
311  * Invoked from the - command; see toggle_option().
312  * Write all the existing buffered data to the log file.
313  */
314 	public void
315 sync_logfile()
316 {
317 	register struct buf *bp;
318 	int warned = FALSE;
319 	long block;
320 	long nblocks;
321 
322 	nblocks = (ch_fpos + LBUFSIZE - 1) / LBUFSIZE;
323 	for (block = 0;  block < nblocks;  block++)
324 	{
325 		for (bp = ch_bufhead;  ;  bp = bp->next)
326 		{
327 			if (bp == END_OF_CHAIN)
328 			{
329 				if (!warned)
330 				{
331 					error("Warning: log file is incomplete",
332 						NULL_PARG);
333 					warned = TRUE;
334 				}
335 				break;
336 			}
337 			if (bp->block == block)
338 			{
339 				write(logfile, (char *) bp->data, bp->datasize);
340 				break;
341 			}
342 		}
343 	}
344 }
345 
346 #endif
347 
348 /*
349  * Determine if a specific block is currently in one of the buffers.
350  */
351 	static int
352 buffered(block)
353 	long block;
354 {
355 	register struct buf *bp;
356 
357 	for (bp = ch_bufhead;  bp != END_OF_CHAIN;  bp = bp->next)
358 		if (bp->block == block)
359 			return (TRUE);
360 	return (FALSE);
361 }
362 
363 /*
364  * Seek to a specified position in the file.
365  * Return 0 if successful, non-zero if can't seek there.
366  */
367 	public int
368 ch_seek(pos)
369 	register POSITION pos;
370 {
371 	long new_block;
372 	POSITION len;
373 
374 	len = ch_length();
375 	if (pos < ch_zero() || (len != NULL_POSITION && pos > len))
376 		return (1);
377 
378 	new_block = pos / LBUFSIZE;
379 	if (!(ch_flags & CH_CANSEEK) && pos != ch_fpos && !buffered(new_block))
380 	{
381 		if (ch_fpos > pos)
382 			return (1);
383 		while (ch_fpos < pos)
384 		{
385 			if (ch_forw_get() == EOI)
386 				return (1);
387 			if (ABORT_SIGS())
388 				return (1);
389 		}
390 		return (0);
391 	}
392 	/*
393 	 * Set read pointer.
394 	 */
395 	ch_block = new_block;
396 	ch_offset = pos % LBUFSIZE;
397 	return (0);
398 }
399 
400 /*
401  * Seek to the end of the file.
402  */
403 	public int
404 ch_end_seek()
405 {
406 	POSITION len;
407 
408 	if (ch_flags & CH_CANSEEK)
409 		ch_fsize = filesize(ch_file);
410 
411 	len = ch_length();
412 	if (len != NULL_POSITION)
413 		return (ch_seek(len));
414 
415 	/*
416 	 * Do it the slow way: read till end of data.
417 	 */
418 	while (ch_forw_get() != EOI)
419 		if (ABORT_SIGS())
420 			return (1);
421 	return (0);
422 }
423 
424 /*
425  * Seek to the beginning of the file, or as close to it as we can get.
426  * We may not be able to seek there if input is a pipe and the
427  * beginning of the pipe is no longer buffered.
428  */
429 	public int
430 ch_beg_seek()
431 {
432 	register struct buf *bp, *firstbp;
433 
434 	/*
435 	 * Try a plain ch_seek first.
436 	 */
437 	if (ch_seek(ch_zero()) == 0)
438 		return (0);
439 
440 	/*
441 	 * Can't get to position 0.
442 	 * Look thru the buffers for the one closest to position 0.
443 	 */
444 	firstbp = bp = ch_bufhead;
445 	if (bp == END_OF_CHAIN)
446 		return (1);
447 	while ((bp = bp->next) != END_OF_CHAIN)
448 		if (bp->block < firstbp->block)
449 			firstbp = bp;
450 	ch_block = firstbp->block;
451 	ch_offset = 0;
452 	return (0);
453 }
454 
455 /*
456  * Return the length of the file, if known.
457  */
458 	public POSITION
459 ch_length()
460 {
461 	if (ignore_eoi)
462 		return (NULL_POSITION);
463 	return (ch_fsize);
464 }
465 
466 /*
467  * Return the current position in the file.
468  */
469 #define	tellpos(blk,off)   ((POSITION)((((long)(blk)) * LBUFSIZE) + (off)))
470 
471 	public POSITION
472 ch_tell()
473 {
474 	return (tellpos(ch_block, ch_offset));
475 }
476 
477 /*
478  * Get the current char and post-increment the read pointer.
479  */
480 	public int
481 ch_forw_get()
482 {
483 	register int c;
484 
485 	c = ch_get();
486 	if (c == EOI)
487 		return (EOI);
488 	if (ch_offset < LBUFSIZE-1)
489 		ch_offset++;
490 	else
491 	{
492 		ch_block ++;
493 		ch_offset = 0;
494 	}
495 	return (c);
496 }
497 
498 /*
499  * Pre-decrement the read pointer and get the new current char.
500  */
501 	public int
502 ch_back_get()
503 {
504 	if (ch_offset > 0)
505 		ch_offset --;
506 	else
507 	{
508 		if (ch_block <= 0)
509 			return (EOI);
510 		if (!(ch_flags & CH_CANSEEK) && !buffered(ch_block-1))
511 			return (EOI);
512 		ch_block--;
513 		ch_offset = LBUFSIZE-1;
514 	}
515 	return (ch_get());
516 }
517 
518 /*
519  * Allocate buffers.
520  * Caller wants us to have a total of at least want_nbufs buffers.
521  */
522 	public int
523 ch_nbuf(want_nbufs)
524 	int want_nbufs;
525 {
526 	PARG parg;
527 
528 	while (ch_nbufs < want_nbufs)
529 	{
530 		if (ch_addbuf())
531 		{
532 			/*
533 			 * Cannot allocate enough buffers.
534 			 * If we don't have ANY, then quit.
535 			 * Otherwise, just report the error and return.
536 			 */
537 			parg.p_int = want_nbufs - ch_nbufs;
538 			error("Cannot allocate %d buffers", &parg);
539 			if (ch_nbufs == 0)
540 				quit(QUIT_ERROR);
541 			break;
542 		}
543 	}
544 	return (ch_nbufs);
545 }
546 
547 /*
548  * Flush (discard) any saved file state, including buffer contents.
549  */
550 	public void
551 ch_flush()
552 {
553 	register struct buf *bp;
554 
555 	if (!(ch_flags & CH_CANSEEK))
556 	{
557 		/*
558 		 * If input is a pipe, we don't flush buffer contents,
559 		 * since the contents can't be recovered.
560 		 */
561 		ch_fsize = NULL_POSITION;
562 		return;
563 	}
564 
565 	/*
566 	 * Initialize all the buffers.
567 	 */
568 	for (bp = ch_bufhead;  bp != END_OF_CHAIN;  bp = bp->next)
569 		bp->block = (long)(-1);
570 
571 	/*
572 	 * Figure out the size of the file, if we can.
573 	 */
574 	ch_fsize = filesize(ch_file);
575 
576 	/*
577 	 * Seek to a known position: the beginning of the file.
578 	 */
579 	ch_fpos = 0;
580 	ch_block = 0; /* ch_fpos / LBUFSIZE; */
581 	ch_offset = 0; /* ch_fpos % LBUFSIZE; */
582 
583 	if (lseek(ch_file, (off_t)0, 0) == BAD_LSEEK)
584 	{
585 		/*
586 		 * Warning only; even if the seek fails for some reason,
587 		 * there's a good chance we're at the beginning anyway.
588 		 * {{ I think this is bogus reasoning. }}
589 		 */
590 		error("seek error to 0", NULL_PARG);
591 	}
592 }
593 
594 /*
595  * Allocate a new buffer.
596  * The buffer is added to the tail of the buffer chain.
597  */
598 	static int
599 ch_addbuf()
600 {
601 	register struct buf *bp;
602 
603 	/*
604 	 * Allocate and initialize a new buffer and link it
605 	 * onto the tail of the buffer list.
606 	 */
607 	bp = (struct buf *) calloc(1, sizeof(struct buf));
608 	if (bp == NULL)
609 		return (1);
610 	ch_nbufs++;
611 	bp->block = (long)(-1);
612 	bp->next = END_OF_CHAIN;
613 	bp->prev = ch_buftail;
614 	ch_buftail->next = bp;
615 	ch_buftail = bp;
616 	return (0);
617 }
618 
619 /*
620  * Delete all buffers for this file.
621  */
622 	static void
623 ch_delbufs()
624 {
625 	register struct buf *bp;
626 
627 	while (ch_bufhead != END_OF_CHAIN)
628 	{
629 		bp = ch_bufhead;
630 		bp->next->prev = bp->prev;;
631 		bp->prev->next = bp->next;
632 		free(bp);
633 	}
634 	ch_nbufs = 0;
635 }
636 
637 /*
638  * Is it possible to seek on a file descriptor?
639  */
640 	public int
641 seekable(f)
642 	int f;
643 {
644 	return (lseek(f, (off_t)1, 0) != BAD_LSEEK);
645 }
646 
647 /*
648  * Initialize file state for a new file.
649  */
650 	public void
651 ch_init(f, flags)
652 	int f;
653 	int flags;
654 {
655 	/*
656 	 * See if we already have a filestate for this file.
657 	 */
658 	thisfile = (struct filestate *) get_filestate(curr_ifile);
659 	if (thisfile == NULL)
660 	{
661 		/*
662 		 * Allocate and initialize a new filestate.
663 		 */
664 		thisfile = (struct filestate *)
665 				calloc(1, sizeof(struct filestate));
666 		thisfile->buf_next = thisfile->buf_prev = END_OF_CHAIN;
667 		thisfile->buf_block = (long)(-1);
668 		thisfile->nbufs = 0;
669 		thisfile->flags = 0;
670 		thisfile->fpos = 0;
671 		thisfile->block = 0;
672 		thisfile->offset = 0;
673 		thisfile->file = -1;
674 		thisfile->fsize = NULL_POSITION;
675 		ch_flags = flags;
676 		/*
677 		 * Try to seek; set CH_CANSEEK if it works.
678 		 */
679 		if (seekable(f))
680 			ch_flags |= CH_CANSEEK;
681 		set_filestate(curr_ifile, (void *) thisfile);
682 	}
683 	if (thisfile->file == -1)
684 		thisfile->file = f;
685 	ch_flush();
686 }
687 
688 /*
689  * Close a filestate.
690  */
691 	public void
692 ch_close()
693 {
694 	int keepstate = FALSE;
695 
696 	if (ch_flags & (CH_CANSEEK|CH_POPENED))
697 	{
698 		/*
699 		 * We can seek or re-open, so we don't need to keep buffers.
700 		 */
701 		ch_delbufs();
702 	} else
703 		keepstate = TRUE;
704 	if (!(ch_flags & CH_KEEPOPEN))
705 	{
706 		/*
707 		 * We don't need to keep the file descriptor open
708 		 * (because we can re-open it.)
709 		 * But don't really close it if it was opened via popen(),
710 		 * because pclose() wants to close it.
711 		 */
712 		if (!(ch_flags & CH_POPENED))
713 			close(ch_file);
714 		ch_file = -1;
715 	} else
716 		keepstate = TRUE;
717 	if (!keepstate)
718 	{
719 		/*
720 		 * We don't even need to keep the filestate structure.
721 		 */
722 		free(thisfile);
723 		thisfile = NULL;
724 		set_filestate(curr_ifile, (void *) NULL);
725 	}
726 }
727 
728 /*
729  * Return ch_flags for the current file.
730  */
731 	public int
732 ch_getflags()
733 {
734 	return (ch_flags);
735 }
736 
737 #if 0
738 	public void
739 ch_dump(struct filestate *fs)
740 {
741 	struct buf *bp;
742 	unsigned char *s;
743 
744 	if (fs == NULL)
745 	{
746 		printf(" --no filestate\n");
747 		return;
748 	}
749 	printf(" file %d, flags %x, fpos %x, fsize %x, blk/off %x/%x\n",
750 		fs->file, fs->flags, fs->fpos,
751 		fs->fsize, fs->block, fs->offset);
752 	printf(" %d bufs:\n", fs->nbufs);
753 	for (bp = fs->buf_next; bp != (struct buf *)fs;  bp = bp->next)
754 	{
755 		printf("%x: blk %x, size %x \"",
756 			bp, bp->block, bp->datasize);
757 		for (s = bp->data;  s < bp->data + 30;  s++)
758 			if (*s >= ' ' && *s < 0x7F)
759 				printf("%c", *s);
760 			else
761 				printf(".");
762 		printf("\"\n");
763 	}
764 }
765 #endif
766