xref: /openbsd-src/usr.bin/less/ch.c (revision a28daedfc357b214be5c701aa8ba8adb29a7f1c2)
1 /*
2  * Copyright (C) 1984-2002  Mark Nudelman
3  *
4  * You may distribute under the terms of either the GNU General Public
5  * License or the Less License, as specified in the README file.
6  *
7  * For more information about less, or for information on how to
8  * contact the author, see the README file.
9  */
10 
11 
12 /*
13  * Low level character input from the input file.
14  * We use these special purpose routines which optimize moving
15  * both forward and backward from the current read pointer.
16  */
17 
18 #include "less.h"
19 #if MSDOS_COMPILER==WIN32C
20 #include <errno.h>
21 #include <windows.h>
22 #endif
23 
24 typedef POSITION BLOCKNUM;
25 
26 public int ignore_eoi;
27 
28 /*
29  * Pool of buffers holding the most recently used blocks of the input file.
30  * The buffer pool is kept as a doubly-linked circular list,
31  * in order from most- to least-recently used.
32  * The circular list is anchored by the file state "thisfile".
33  */
34 #define	LBUFSIZE	8192
35 struct buf {
36 	struct buf *next, *prev;
37 	struct buf *hnext, *hprev;
38 	BLOCKNUM block;
39 	unsigned int datasize;
40 	unsigned char data[LBUFSIZE];
41 };
42 
43 struct buflist {
44 	/* -- Following members must match struct buf */
45 	struct buf *buf_next, *buf_prev;
46 	struct buf *buf_hnext, *buf_hprev;
47 };
48 
49 /*
50  * The file state is maintained in a filestate structure.
51  * A pointer to the filestate is kept in the ifile structure.
52  */
53 #define	BUFHASH_SIZE	64
54 struct filestate {
55 	struct buf *buf_next, *buf_prev;
56 	struct buflist hashtbl[BUFHASH_SIZE];
57 	int file;
58 	int flags;
59 	POSITION fpos;
60 	int nbufs;
61 	BLOCKNUM block;
62 	unsigned int offset;
63 	POSITION fsize;
64 };
65 
66 #define	ch_bufhead	thisfile->buf_next
67 #define	ch_buftail	thisfile->buf_prev
68 #define	ch_nbufs	thisfile->nbufs
69 #define	ch_block	thisfile->block
70 #define	ch_offset	thisfile->offset
71 #define	ch_fpos		thisfile->fpos
72 #define	ch_fsize	thisfile->fsize
73 #define	ch_flags	thisfile->flags
74 #define	ch_file		thisfile->file
75 
76 #define	END_OF_CHAIN	((struct buf *)&thisfile->buf_next)
77 #define	END_OF_HCHAIN(h) ((struct buf *)&thisfile->hashtbl[h])
78 #define BUFHASH(blk)	((blk) & (BUFHASH_SIZE-1))
79 
80 #define	FOR_BUFS_IN_CHAIN(h,bp) \
81 	for (bp = thisfile->hashtbl[h].buf_hnext;  \
82 	     bp != END_OF_HCHAIN(h);  bp = bp->hnext)
83 
84 #define	HASH_RM(bp) \
85 	(bp)->hnext->hprev = (bp)->hprev; \
86 	(bp)->hprev->hnext = (bp)->hnext;
87 
88 #define	HASH_INS(bp,h) \
89 	(bp)->hnext = thisfile->hashtbl[h].buf_hnext; \
90 	(bp)->hprev = END_OF_HCHAIN(h); \
91 	thisfile->hashtbl[h].buf_hnext->hprev = (bp); \
92 	thisfile->hashtbl[h].buf_hnext = (bp);
93 
94 static struct filestate *thisfile;
95 static int ch_ungotchar = -1;
96 static int maxbufs = -1;
97 
98 extern int autobuf;
99 extern int sigs;
100 extern int secure;
101 extern IFILE curr_ifile;
102 #if LOGFILE
103 extern int logfile;
104 extern char *namelogfile;
105 #endif
106 
107 static int ch_addbuf();
108 
109 
110 /*
111  * Get the character pointed to by the read pointer.
112  * ch_get() is a macro which is more efficient to call
113  * than fch_get (the function), in the usual case
114  * that the block desired is at the head of the chain.
115  */
116 #define	ch_get()   ((ch_block == ch_bufhead->block && \
117 		     ch_offset < ch_bufhead->datasize) ? \
118 			ch_bufhead->data[ch_offset] : fch_get())
119 	int
120 fch_get()
121 {
122 	register struct buf *bp;
123 	register int n;
124 	register int slept;
125 	register int h;
126 	POSITION pos;
127 	POSITION len;
128 
129 	slept = FALSE;
130 
131 	/*
132 	 * Look for a buffer holding the desired block.
133 	 */
134 	h = BUFHASH(ch_block);
135 	FOR_BUFS_IN_CHAIN(h, bp)
136 	{
137 		if (bp->block == ch_block)
138 		{
139 			if (ch_offset >= bp->datasize)
140 				/*
141 				 * Need more data in this buffer.
142 				 */
143 				goto read_more;
144 			goto found;
145 		}
146 	}
147 	/*
148 	 * Block is not in a buffer.
149 	 * Take the least recently used buffer
150 	 * and read the desired block into it.
151 	 * If the LRU buffer has data in it,
152 	 * then maybe allocate a new buffer.
153 	 */
154 	if (ch_buftail == END_OF_CHAIN || ch_buftail->block != -1)
155 	{
156 		/*
157 		 * There is no empty buffer to use.
158 		 * Allocate a new buffer if:
159 		 * 1. We can't seek on this file and -b is not in effect; or
160 		 * 2. We haven't allocated the max buffers for this file yet.
161 		 */
162 		if ((autobuf && !(ch_flags & CH_CANSEEK)) ||
163 		    (maxbufs < 0 || ch_nbufs < maxbufs))
164 			if (ch_addbuf())
165 				/*
166 				 * Allocation failed: turn off autobuf.
167 				 */
168 				autobuf = OPT_OFF;
169 	}
170 	bp = ch_buftail;
171 	HASH_RM(bp); /* Remove from old hash chain. */
172 	bp->block = ch_block;
173 	bp->datasize = 0;
174 	HASH_INS(bp, h); /* Insert into new hash chain. */
175 
176     read_more:
177 	pos = (ch_block * LBUFSIZE) + bp->datasize;
178 	if ((len = ch_length()) != NULL_POSITION && pos >= len)
179 		/*
180 		 * At end of file.
181 		 */
182 		return (EOI);
183 
184 	if (pos != ch_fpos)
185 	{
186 		/*
187 		 * Not at the correct position: must seek.
188 		 * If input is a pipe, we're in trouble (can't seek on a pipe).
189 		 * Some data has been lost: just return "?".
190 		 */
191 		if (!(ch_flags & CH_CANSEEK))
192 			return ('?');
193 		if (lseek(ch_file, (off_t)pos, SEEK_SET) == BAD_LSEEK)
194 		{
195  			error("seek error", NULL_PARG);
196 			clear_eol();
197 			return (EOI);
198  		}
199  		ch_fpos = pos;
200  	}
201 
202 	/*
203 	 * Read the block.
204 	 * If we read less than a full block, that's ok.
205 	 * We use partial block and pick up the rest next time.
206 	 */
207 	if (ch_ungotchar != -1)
208 	{
209 		bp->data[bp->datasize] = ch_ungotchar;
210 		n = 1;
211 		ch_ungotchar = -1;
212 	} else
213 	{
214 		n = iread(ch_file, &bp->data[bp->datasize],
215 			(unsigned int)(LBUFSIZE - bp->datasize));
216 	}
217 
218 	if (n == READ_INTR)
219 		return (EOI);
220 	if (n < 0)
221 	{
222 #if MSDOS_COMPILER==WIN32C
223 		if (errno != EPIPE)
224 #endif
225 		{
226 			error("read error", NULL_PARG);
227 			clear_eol();
228 		}
229 		n = 0;
230 	}
231 
232 #if LOGFILE
233 	/*
234 	 * If we have a log file, write the new data to it.
235 	 */
236 	if (!secure && logfile >= 0 && n > 0)
237 		write(logfile, (char *) &bp->data[bp->datasize], n);
238 #endif
239 
240 	ch_fpos += n;
241 	bp->datasize += n;
242 
243 	/*
244 	 * If we have read to end of file, set ch_fsize to indicate
245 	 * the position of the end of file.
246 	 */
247 	if (n == 0)
248 	{
249 		ch_fsize = pos;
250 		if (ignore_eoi)
251 		{
252 			/*
253 			 * We are ignoring EOF.
254 			 * Wait a while, then try again.
255 			 */
256 			if (!slept)
257 			{
258 				PARG parg;
259 				parg.p_string = wait_message();
260 				ierror("%s", &parg);
261 			}
262 #if !MSDOS_COMPILER
263 	 		sleep(1);
264 #else
265 #if MSDOS_COMPILER==WIN32C
266 			Sleep(1000);
267 #endif
268 #endif
269 			slept = TRUE;
270 		}
271 		if (sigs)
272 			return (EOI);
273 	}
274 
275     found:
276 	if (ch_bufhead != bp)
277 	{
278 		/*
279 		 * Move the buffer to the head of the buffer chain.
280 		 * This orders the buffer chain, most- to least-recently used.
281 		 */
282 		bp->next->prev = bp->prev;
283 		bp->prev->next = bp->next;
284 		bp->next = ch_bufhead;
285 		bp->prev = END_OF_CHAIN;
286 		ch_bufhead->prev = bp;
287 		ch_bufhead = bp;
288 
289 		/*
290 		 * Move to head of hash chain too.
291 		 */
292 		HASH_RM(bp);
293 		HASH_INS(bp, h);
294 	}
295 
296 	if (ch_offset >= bp->datasize)
297 		/*
298 		 * After all that, we still don't have enough data.
299 		 * Go back and try again.
300 		 */
301 		goto read_more;
302 
303 	return (bp->data[ch_offset]);
304 }
305 
306 /*
307  * ch_ungetchar is a rather kludgy and limited way to push
308  * a single char onto an input file descriptor.
309  */
310 	public void
311 ch_ungetchar(c)
312 	int c;
313 {
314 	if (c != -1 && ch_ungotchar != -1)
315 		error("ch_ungetchar overrun", NULL_PARG);
316 	ch_ungotchar = c;
317 }
318 
319 #if LOGFILE
320 /*
321  * Close the logfile.
322  * If we haven't read all of standard input into it, do that now.
323  */
324 	public void
325 end_logfile()
326 {
327 	static int tried = FALSE;
328 
329 	if (logfile < 0)
330 		return;
331 	if (!tried && ch_fsize == NULL_POSITION)
332 	{
333 		tried = TRUE;
334 		ierror("Finishing logfile", NULL_PARG);
335 		while (ch_forw_get() != EOI)
336 			if (ABORT_SIGS())
337 				break;
338 	}
339 	close(logfile);
340 	logfile = -1;
341 	namelogfile = NULL;
342 }
343 
344 /*
345  * Start a log file AFTER less has already been running.
346  * Invoked from the - command; see toggle_option().
347  * Write all the existing buffered data to the log file.
348  */
349 	public void
350 sync_logfile()
351 {
352 	register struct buf *bp;
353 	int warned = FALSE;
354 	BLOCKNUM block;
355 	BLOCKNUM nblocks;
356 
357 	nblocks = (ch_fpos + LBUFSIZE - 1) / LBUFSIZE;
358 	for (block = 0;  block < nblocks;  block++)
359 	{
360 		for (bp = ch_bufhead;  ;  bp = bp->next)
361 		{
362 			if (bp == END_OF_CHAIN)
363 			{
364 				if (!warned)
365 				{
366 					error("Warning: log file is incomplete",
367 						NULL_PARG);
368 					warned = TRUE;
369 				}
370 				break;
371 			}
372 			if (bp->block == block)
373 			{
374 				write(logfile, (char *) bp->data, bp->datasize);
375 				break;
376 			}
377 		}
378 	}
379 }
380 
381 #endif
382 
383 /*
384  * Determine if a specific block is currently in one of the buffers.
385  */
386 	static int
387 buffered(block)
388 	BLOCKNUM block;
389 {
390 	register struct buf *bp;
391 	register int h;
392 
393 	h = BUFHASH(block);
394 	FOR_BUFS_IN_CHAIN(h, bp)
395 	{
396 		if (bp->block == block)
397 			return (TRUE);
398 	}
399 	return (FALSE);
400 }
401 
402 /*
403  * Seek to a specified position in the file.
404  * Return 0 if successful, non-zero if can't seek there.
405  */
406 	public int
407 ch_seek(pos)
408 	register POSITION pos;
409 {
410 	BLOCKNUM new_block;
411 	POSITION len;
412 
413 	len = ch_length();
414 	if (pos < ch_zero() || (len != NULL_POSITION && pos > len))
415 		return (1);
416 
417 	new_block = pos / LBUFSIZE;
418 	if (!(ch_flags & CH_CANSEEK) && pos != ch_fpos && !buffered(new_block))
419 	{
420 		if (ch_fpos > pos)
421 			return (1);
422 		while (ch_fpos < pos)
423 		{
424 			if (ch_forw_get() == EOI)
425 				return (1);
426 			if (ABORT_SIGS())
427 				return (1);
428 		}
429 		return (0);
430 	}
431 	/*
432 	 * Set read pointer.
433 	 */
434 	ch_block = new_block;
435 	ch_offset = pos % LBUFSIZE;
436 	return (0);
437 }
438 
439 /*
440  * Seek to the end of the file.
441  */
442 	public int
443 ch_end_seek()
444 {
445 	POSITION len;
446 
447 	if (ch_flags & CH_CANSEEK)
448 		ch_fsize = filesize(ch_file);
449 
450 	len = ch_length();
451 	if (len != NULL_POSITION)
452 		return (ch_seek(len));
453 
454 	/*
455 	 * Do it the slow way: read till end of data.
456 	 */
457 	while (ch_forw_get() != EOI)
458 		if (ABORT_SIGS())
459 			return (1);
460 	return (0);
461 }
462 
463 /*
464  * Seek to the beginning of the file, or as close to it as we can get.
465  * We may not be able to seek there if input is a pipe and the
466  * beginning of the pipe is no longer buffered.
467  */
468 	public int
469 ch_beg_seek()
470 {
471 	register struct buf *bp, *firstbp;
472 
473 	/*
474 	 * Try a plain ch_seek first.
475 	 */
476 	if (ch_seek(ch_zero()) == 0)
477 		return (0);
478 
479 	/*
480 	 * Can't get to position 0.
481 	 * Look thru the buffers for the one closest to position 0.
482 	 */
483 	firstbp = bp = ch_bufhead;
484 	if (bp == END_OF_CHAIN)
485 		return (1);
486 	while ((bp = bp->next) != END_OF_CHAIN)
487 		if (bp->block < firstbp->block)
488 			firstbp = bp;
489 	ch_block = firstbp->block;
490 	ch_offset = 0;
491 	return (0);
492 }
493 
494 /*
495  * Return the length of the file, if known.
496  */
497 	public POSITION
498 ch_length()
499 {
500 	if (ignore_eoi)
501 		return (NULL_POSITION);
502 	return (ch_fsize);
503 }
504 
505 /*
506  * Return the current position in the file.
507  */
508 	public POSITION
509 ch_tell()
510 {
511 	return (ch_block * LBUFSIZE) + ch_offset;
512 }
513 
514 /*
515  * Get the current char and post-increment the read pointer.
516  */
517 	public int
518 ch_forw_get()
519 {
520 	register int c;
521 
522 	c = ch_get();
523 	if (c == EOI)
524 		return (EOI);
525 	if (ch_offset < LBUFSIZE-1)
526 		ch_offset++;
527 	else
528 	{
529 		ch_block ++;
530 		ch_offset = 0;
531 	}
532 	return (c);
533 }
534 
535 /*
536  * Pre-decrement the read pointer and get the new current char.
537  */
538 	public int
539 ch_back_get()
540 {
541 	if (ch_offset > 0)
542 		ch_offset --;
543 	else
544 	{
545 		if (ch_block <= 0)
546 			return (EOI);
547 		if (!(ch_flags & CH_CANSEEK) && !buffered(ch_block-1))
548 			return (EOI);
549 		ch_block--;
550 		ch_offset = LBUFSIZE-1;
551 	}
552 	return (ch_get());
553 }
554 
555 /*
556  * Set max amount of buffer space.
557  * bufspace is in units of 1024 bytes.  -1 mean no limit.
558  */
559 	public void
560 ch_setbufspace(bufspace)
561 	int bufspace;
562 {
563 	if (bufspace < 0)
564 		maxbufs = -1;
565 	else
566 	{
567 		maxbufs = ((bufspace * 1024) + LBUFSIZE-1) / LBUFSIZE;
568 		if (maxbufs < 1)
569 			maxbufs = 1;
570 	}
571 }
572 
573 /*
574  * Flush (discard) any saved file state, including buffer contents.
575  */
576 	public void
577 ch_flush()
578 {
579 	register struct buf *bp;
580 
581 	if (!(ch_flags & CH_CANSEEK))
582 	{
583 		/*
584 		 * If input is a pipe, we don't flush buffer contents,
585 		 * since the contents can't be recovered.
586 		 */
587 		ch_fsize = NULL_POSITION;
588 		return;
589 	}
590 
591 	/*
592 	 * Initialize all the buffers.
593 	 */
594 	for (bp = ch_bufhead;  bp != END_OF_CHAIN;  bp = bp->next)
595 		bp->block = -1;
596 
597 	/*
598 	 * Figure out the size of the file, if we can.
599 	 */
600 	ch_fsize = filesize(ch_file);
601 
602 	/*
603 	 * Seek to a known position: the beginning of the file.
604 	 */
605 	ch_fpos = 0;
606 	ch_block = 0; /* ch_fpos / LBUFSIZE; */
607 	ch_offset = 0; /* ch_fpos % LBUFSIZE; */
608 
609 #if 1
610 	/*
611 	 * This is a kludge to workaround a Linux kernel bug: files in
612 	 * /proc have a size of 0 according to fstat() but have readable
613 	 * data.  They are sometimes, but not always, seekable.
614 	 * Force them to be non-seekable here.
615 	 */
616 	if (ch_fsize == 0)
617 	{
618 		ch_fsize = NULL_POSITION;
619 		ch_flags &= ~CH_CANSEEK;
620 	}
621 #endif
622 
623 	if (lseek(ch_file, (off_t)0, SEEK_SET) == BAD_LSEEK)
624 	{
625 		/*
626 		 * Warning only; even if the seek fails for some reason,
627 		 * there's a good chance we're at the beginning anyway.
628 		 * {{ I think this is bogus reasoning. }}
629 		 */
630 		error("seek error to 0", NULL_PARG);
631 	}
632 }
633 
634 /*
635  * Allocate a new buffer.
636  * The buffer is added to the tail of the buffer chain.
637  */
638 	static int
639 ch_addbuf()
640 {
641 	register struct buf *bp;
642 
643 	/*
644 	 * Allocate and initialize a new buffer and link it
645 	 * onto the tail of the buffer list.
646 	 */
647 	bp = (struct buf *) calloc(1, sizeof(struct buf));
648 	if (bp == NULL)
649 		return (1);
650 	ch_nbufs++;
651 	bp->block = -1;
652 	bp->next = END_OF_CHAIN;
653 	bp->prev = ch_buftail;
654 	ch_buftail->next = bp;
655 	ch_buftail = bp;
656 	HASH_INS(bp, 0);
657 	return (0);
658 }
659 
660 /*
661  *
662  */
663 	static void
664 init_hashtbl()
665 {
666 	register int h;
667 
668 	for (h = 0;  h < BUFHASH_SIZE;  h++)
669 	{
670 		thisfile->hashtbl[h].buf_hnext = END_OF_HCHAIN(h);
671 		thisfile->hashtbl[h].buf_hprev = END_OF_HCHAIN(h);
672 	}
673 }
674 
675 /*
676  * Delete all buffers for this file.
677  */
678 	static void
679 ch_delbufs()
680 {
681 	register struct buf *bp;
682 
683 	while (ch_bufhead != END_OF_CHAIN)
684 	{
685 		bp = ch_bufhead;
686 		bp->next->prev = bp->prev;
687 		bp->prev->next = bp->next;
688 		free(bp);
689 	}
690 	ch_nbufs = 0;
691 	init_hashtbl();
692 }
693 
694 /*
695  * Is it possible to seek on a file descriptor?
696  */
697 	public int
698 seekable(f)
699 	int f;
700 {
701 #if MSDOS_COMPILER
702 	extern int fd0;
703 	if (f == fd0 && !isatty(fd0))
704 	{
705 		/*
706 		 * In MS-DOS, pipes are seekable.  Check for
707 		 * standard input, and pretend it is not seekable.
708 		 */
709 		return (0);
710 	}
711 #endif
712 	return (lseek(f, (off_t)1, SEEK_SET) != BAD_LSEEK);
713 }
714 
715 /*
716  * Initialize file state for a new file.
717  */
718 	public void
719 ch_init(f, flags)
720 	int f;
721 	int flags;
722 {
723 	/*
724 	 * See if we already have a filestate for this file.
725 	 */
726 	thisfile = (struct filestate *) get_filestate(curr_ifile);
727 	if (thisfile == NULL)
728 	{
729 		/*
730 		 * Allocate and initialize a new filestate.
731 		 */
732 		thisfile = (struct filestate *)
733 				calloc(1, sizeof(struct filestate));
734 		thisfile->buf_next = thisfile->buf_prev = END_OF_CHAIN;
735 		thisfile->nbufs = 0;
736 		thisfile->flags = 0;
737 		thisfile->fpos = 0;
738 		thisfile->block = 0;
739 		thisfile->offset = 0;
740 		thisfile->file = -1;
741 		thisfile->fsize = NULL_POSITION;
742 		ch_flags = flags;
743 		init_hashtbl();
744 		/*
745 		 * Try to seek; set CH_CANSEEK if it works.
746 		 */
747 		if ((flags & CH_CANSEEK) && !seekable(f))
748 			ch_flags &= ~CH_CANSEEK;
749 		set_filestate(curr_ifile, (void *) thisfile);
750 	}
751 	if (thisfile->file == -1)
752 		thisfile->file = f;
753 	ch_flush();
754 }
755 
756 /*
757  * Close a filestate.
758  */
759 	public void
760 ch_close()
761 {
762 	int keepstate = FALSE;
763 
764 	if (ch_flags & (CH_CANSEEK|CH_POPENED))
765 	{
766 		/*
767 		 * We can seek or re-open, so we don't need to keep buffers.
768 		 */
769 		ch_delbufs();
770 	} else
771 		keepstate = TRUE;
772 	if (!(ch_flags & CH_KEEPOPEN))
773 	{
774 		/*
775 		 * We don't need to keep the file descriptor open
776 		 * (because we can re-open it.)
777 		 * But don't really close it if it was opened via popen(),
778 		 * because pclose() wants to close it.
779 		 */
780 		if (!(ch_flags & CH_POPENED))
781 			close(ch_file);
782 		ch_file = -1;
783 	} else
784 		keepstate = TRUE;
785 	if (!keepstate)
786 	{
787 		/*
788 		 * We don't even need to keep the filestate structure.
789 		 */
790 		free(thisfile);
791 		thisfile = NULL;
792 		set_filestate(curr_ifile, (void *) NULL);
793 	}
794 }
795 
796 /*
797  * Return ch_flags for the current file.
798  */
799 	public int
800 ch_getflags()
801 {
802 	return (ch_flags);
803 }
804 
805 #if 0
806 	public void
807 ch_dump(struct filestate *fs)
808 {
809 	struct buf *bp;
810 	unsigned char *s;
811 
812 	if (fs == NULL)
813 	{
814 		printf(" --no filestate\n");
815 		return;
816 	}
817 	printf(" file %d, flags %x, fpos %x, fsize %x, blk/off %x/%x\n",
818 		fs->file, fs->flags, fs->fpos,
819 		fs->fsize, fs->block, fs->offset);
820 	printf(" %d bufs:\n", fs->nbufs);
821 	for (bp = fs->buf_next; bp != (struct buf *)fs;  bp = bp->next)
822 	{
823 		printf("%x: blk %x, size %x \"",
824 			bp, bp->block, bp->datasize);
825 		for (s = bp->data;  s < bp->data + 30;  s++)
826 			if (*s >= ' ' && *s < 0x7F)
827 				printf("%c", *s);
828 			else
829 				printf(".");
830 		printf("\"\n");
831 	}
832 }
833 #endif
834