135209Sbostic /*
235209Sbostic * Copyright (c) 1988 Mark Nudleman
3*62131Sbostic * Copyright (c) 1988, 1993
4*62131Sbostic * The Regents of the University of California. All rights reserved.
535209Sbostic *
642742Sbostic * %sccs.include.redist.c%
735209Sbostic */
835209Sbostic
935209Sbostic #ifndef lint
10*62131Sbostic static char sccsid[] = "@(#)ch.c 8.1 (Berkeley) 06/06/93";
1135209Sbostic #endif /* not lint */
1235209Sbostic
1335209Sbostic /*
1435209Sbostic * Low level character input from the input file.
1535209Sbostic * We use these special purpose routines which optimize moving
1635209Sbostic * both forward and backward from the current read pointer.
1735209Sbostic */
1835209Sbostic
1936251Sbostic #include <sys/types.h>
2036251Sbostic #include <sys/file.h>
2154196Sbostic #include <unistd.h>
2236251Sbostic #include <stdio.h>
2336251Sbostic #include <less.h>
2435209Sbostic
2536251Sbostic int file = -1; /* File descriptor of the input file */
2635209Sbostic
2735209Sbostic /*
2835209Sbostic * Pool of buffers holding the most recently used blocks of the input file.
2935209Sbostic */
3035209Sbostic struct buf {
3135209Sbostic struct buf *next, *prev;
3235209Sbostic long block;
3335209Sbostic int datasize;
3435209Sbostic char data[BUFSIZ];
3535209Sbostic };
3636251Sbostic int nbufs;
3735209Sbostic
3835209Sbostic /*
3936251Sbostic * The buffer pool is kept as a doubly-linked circular list, in order from
4036251Sbostic * most- to least-recently used. The circular list is anchored by buf_anchor.
4135209Sbostic */
4235209Sbostic #define END_OF_CHAIN ((struct buf *)&buf_anchor)
4335209Sbostic #define buf_head buf_anchor.next
4435209Sbostic #define buf_tail buf_anchor.prev
4535209Sbostic
4635209Sbostic static struct {
4735209Sbostic struct buf *next, *prev;
4835209Sbostic } buf_anchor = { END_OF_CHAIN, END_OF_CHAIN };
4935209Sbostic
5036251Sbostic extern int ispipe, cbufs, sigs;
5135209Sbostic
5235209Sbostic /*
5335209Sbostic * Current position in file.
5435209Sbostic * Stored as a block number and an offset into the block.
5535209Sbostic */
5635209Sbostic static long ch_block;
5735209Sbostic static int ch_offset;
5835209Sbostic
5936251Sbostic /* Length of file, needed if input is a pipe. */
6036251Sbostic static off_t ch_fsize;
6135209Sbostic
6236251Sbostic /* Number of bytes read, if input is standard input (a pipe). */
6336251Sbostic static off_t last_piped_pos;
6436251Sbostic
6535209Sbostic /*
6636251Sbostic * Get the character pointed to by the read pointer. ch_get() is a macro
6736251Sbostic * which is more efficient to call than fch_get (the function), in the usual
6836251Sbostic * case that the block desired is at the head of the chain.
6935209Sbostic */
7036251Sbostic #define ch_get() \
7136251Sbostic ((buf_head->block == ch_block && \
7236251Sbostic ch_offset < buf_head->datasize) ? \
7336251Sbostic buf_head->data[ch_offset] : fch_get())
7435209Sbostic
7536251Sbostic static
fch_get()7635209Sbostic fch_get()
7735209Sbostic {
7836262Sbostic extern int bs_mode;
7935209Sbostic register struct buf *bp;
8036262Sbostic register int n, ch;
8136262Sbostic register char *p, *t;
8236251Sbostic off_t pos, lseek();
8335209Sbostic
8436251Sbostic /* look for a buffer holding the desired block. */
8535209Sbostic for (bp = buf_head; bp != END_OF_CHAIN; bp = bp->next)
8636251Sbostic if (bp->block == ch_block) {
8735209Sbostic if (ch_offset >= bp->datasize)
8835209Sbostic /*
8935209Sbostic * Need more data in this buffer.
9035209Sbostic */
9135209Sbostic goto read_more;
9235209Sbostic /*
9335209Sbostic * On a pipe, we don't sort the buffers LRU
9435209Sbostic * because this can cause gaps in the buffers.
9535209Sbostic * For example, suppose we've got 12 1K buffers,
9635209Sbostic * and a 15K input stream. If we read the first 12K
9735209Sbostic * sequentially, then jump to line 1, then jump to
9835209Sbostic * the end, the buffers have blocks 0,4,5,6,..,14.
9935209Sbostic * If we then jump to line 1 again and try to
10035209Sbostic * read sequentially, we're out of luck when we
10135209Sbostic * get to block 1 (we'd get the "pipe error" below).
10235209Sbostic * To avoid this, we only sort buffers on a pipe
10335209Sbostic * when we actually READ the data, not when we
10435209Sbostic * find it already buffered.
10535209Sbostic */
10635209Sbostic if (ispipe)
10736251Sbostic return(bp->data[ch_offset]);
10835209Sbostic goto found;
10935209Sbostic }
11035209Sbostic /*
11136251Sbostic * Block is not in a buffer. Take the least recently used buffer
11236251Sbostic * and read the desired block into it. If the LRU buffer has data
11336251Sbostic * in it, and input is a pipe, then try to allocate a new buffer first.
11435209Sbostic */
11536251Sbostic if (ispipe && buf_tail->block != (long)(-1))
11636251Sbostic (void)ch_addbuf(1);
11735209Sbostic bp = buf_tail;
11835209Sbostic bp->block = ch_block;
11935209Sbostic bp->datasize = 0;
12035209Sbostic
12136251Sbostic read_more:
12235209Sbostic pos = (ch_block * BUFSIZ) + bp->datasize;
12336251Sbostic if (ispipe) {
12435209Sbostic /*
12535209Sbostic * The data requested should be immediately after
12635209Sbostic * the last data read from the pipe.
12735209Sbostic */
12836251Sbostic if (pos != last_piped_pos) {
12935209Sbostic error("pipe error");
13035209Sbostic quit();
13135209Sbostic }
13235209Sbostic } else
13335232Sbostic (void)lseek(file, pos, L_SET);
13435209Sbostic
13535209Sbostic /*
13635209Sbostic * Read the block.
13735209Sbostic * If we read less than a full block, we just return the
13835209Sbostic * partial block and pick up the rest next time.
13935209Sbostic */
14035209Sbostic n = iread(file, &bp->data[bp->datasize], BUFSIZ - bp->datasize);
14135209Sbostic if (n == READ_INTR)
14235209Sbostic return (EOI);
14336251Sbostic if (n < 0) {
14435209Sbostic error("read error");
14535209Sbostic quit();
14635209Sbostic }
14735209Sbostic if (ispipe)
14835209Sbostic last_piped_pos += n;
14935209Sbostic
15036262Sbostic p = &bp->data[bp->datasize];
15135209Sbostic bp->datasize += n;
15235209Sbostic
15335209Sbostic /*
15436251Sbostic * Set an EOI marker in the buffered data itself. Then ensure the
15536251Sbostic * data is "clean": there are no extra EOI chars in the data and
15636262Sbostic * that the "meta" bit (the 0200 bit) is reset in each char;
15736262Sbostic * also translate \r\n sequences to \n if -u flag not set.
15835209Sbostic */
15936251Sbostic if (n == 0) {
16035209Sbostic ch_fsize = pos;
16135209Sbostic bp->data[bp->datasize++] = EOI;
16235209Sbostic }
16335209Sbostic
16436262Sbostic if (bs_mode) {
16536262Sbostic for (p = &bp->data[bp->datasize]; --n >= 0;) {
16636262Sbostic *--p &= 0177;
16736262Sbostic if (*p == EOI)
16836262Sbostic *p = 0200;
16936262Sbostic }
17035209Sbostic }
17136262Sbostic else {
17236262Sbostic for (t = p; --n >= 0; ++p) {
17336262Sbostic ch = *p & 0177;
17436262Sbostic if (ch == '\r' && n && (p[1] & 0177) == '\n') {
17536262Sbostic ++p;
17636262Sbostic *t++ = '\n';
17736262Sbostic }
17836262Sbostic else
17936262Sbostic *t++ = (ch == EOI) ? 0200 : ch;
18036262Sbostic }
18136262Sbostic if (p != t) {
18236262Sbostic bp->datasize -= p - t;
18336262Sbostic if (ispipe)
18436262Sbostic last_piped_pos -= p - t;
18536262Sbostic }
18636262Sbostic }
18735209Sbostic
18836251Sbostic found:
18936251Sbostic if (buf_head != bp) {
19035209Sbostic /*
19135209Sbostic * Move the buffer to the head of the buffer chain.
19235209Sbostic * This orders the buffer chain, most- to least-recently used.
19335209Sbostic */
19435209Sbostic bp->next->prev = bp->prev;
19535209Sbostic bp->prev->next = bp->next;
19635209Sbostic
19735209Sbostic bp->next = buf_head;
19835209Sbostic bp->prev = END_OF_CHAIN;
19935209Sbostic buf_head->prev = bp;
20035209Sbostic buf_head = bp;
20135209Sbostic }
20235209Sbostic
20335209Sbostic if (ch_offset >= bp->datasize)
20435209Sbostic /*
20535209Sbostic * After all that, we still don't have enough data.
20635209Sbostic * Go back and try again.
20735209Sbostic */
20835209Sbostic goto read_more;
20935209Sbostic
21036251Sbostic return(bp->data[ch_offset]);
21135209Sbostic }
21235209Sbostic
21335209Sbostic /*
21435209Sbostic * Determine if a specific block is currently in one of the buffers.
21535209Sbostic */
21636251Sbostic static
buffered(block)21735209Sbostic buffered(block)
21835209Sbostic long block;
21935209Sbostic {
22035209Sbostic register struct buf *bp;
22135209Sbostic
22236251Sbostic for (bp = buf_head; bp != END_OF_CHAIN; bp = bp->next)
22335209Sbostic if (bp->block == block)
22436251Sbostic return(1);
22536251Sbostic return(0);
22635209Sbostic }
22735209Sbostic
22835209Sbostic /*
22935209Sbostic * Seek to a specified position in the file.
23035209Sbostic * Return 0 if successful, non-zero if can't seek there.
23135209Sbostic */
ch_seek(pos)23235209Sbostic ch_seek(pos)
23336251Sbostic register off_t pos;
23435209Sbostic {
23535209Sbostic long new_block;
23635209Sbostic
23735209Sbostic new_block = pos / BUFSIZ;
23836251Sbostic if (!ispipe || pos == last_piped_pos || buffered(new_block)) {
23935209Sbostic /*
24035209Sbostic * Set read pointer.
24135209Sbostic */
24235209Sbostic ch_block = new_block;
24335209Sbostic ch_offset = pos % BUFSIZ;
24436251Sbostic return(0);
24535209Sbostic }
24636251Sbostic return(1);
24735209Sbostic }
24835209Sbostic
24935209Sbostic /*
25035209Sbostic * Seek to the end of the file.
25135209Sbostic */
ch_end_seek()25235209Sbostic ch_end_seek()
25335209Sbostic {
25436251Sbostic off_t ch_length();
25536251Sbostic
25635209Sbostic if (!ispipe)
25736251Sbostic return(ch_seek(ch_length()));
25835209Sbostic
25935209Sbostic /*
26035209Sbostic * Do it the slow way: read till end of data.
26135209Sbostic */
26235209Sbostic while (ch_forw_get() != EOI)
26335209Sbostic if (sigs)
26436251Sbostic return(1);
26536251Sbostic return(0);
26635209Sbostic }
26735209Sbostic
26835209Sbostic /*
26935209Sbostic * Seek to the beginning of the file, or as close to it as we can get.
27035209Sbostic * We may not be able to seek there if input is a pipe and the
27135209Sbostic * beginning of the pipe is no longer buffered.
27235209Sbostic */
ch_beg_seek()27335209Sbostic ch_beg_seek()
27435209Sbostic {
27535209Sbostic register struct buf *bp, *firstbp;
27635209Sbostic
27735209Sbostic /*
27835209Sbostic * Try a plain ch_seek first.
27935209Sbostic */
28036251Sbostic if (ch_seek((off_t)0) == 0)
28136251Sbostic return(0);
28235209Sbostic
28335209Sbostic /*
28435209Sbostic * Can't get to position 0.
28535209Sbostic * Look thru the buffers for the one closest to position 0.
28635209Sbostic */
28735209Sbostic firstbp = bp = buf_head;
28835209Sbostic if (bp == END_OF_CHAIN)
28936251Sbostic return(1);
29035209Sbostic while ((bp = bp->next) != END_OF_CHAIN)
29135209Sbostic if (bp->block < firstbp->block)
29235209Sbostic firstbp = bp;
29335209Sbostic ch_block = firstbp->block;
29435209Sbostic ch_offset = 0;
29536251Sbostic return(0);
29635209Sbostic }
29735209Sbostic
29835209Sbostic /*
29935209Sbostic * Return the length of the file, if known.
30035209Sbostic */
30136251Sbostic off_t
ch_length()30235209Sbostic ch_length()
30335209Sbostic {
30436251Sbostic off_t lseek();
30536251Sbostic
30635209Sbostic if (ispipe)
30736251Sbostic return(ch_fsize);
30836251Sbostic return((off_t)(lseek(file, (off_t)0, L_XTND)));
30935209Sbostic }
31035209Sbostic
31135209Sbostic /*
31235209Sbostic * Return the current position in the file.
31335209Sbostic */
31436251Sbostic off_t
ch_tell()31535209Sbostic ch_tell()
31635209Sbostic {
31736251Sbostic return(ch_block * BUFSIZ + ch_offset);
31835209Sbostic }
31935209Sbostic
32035209Sbostic /*
32135209Sbostic * Get the current char and post-increment the read pointer.
32235209Sbostic */
ch_forw_get()32335209Sbostic ch_forw_get()
32435209Sbostic {
32535209Sbostic register int c;
32635209Sbostic
32735209Sbostic c = ch_get();
32836251Sbostic if (c != EOI && ++ch_offset >= BUFSIZ) {
32935209Sbostic ch_offset = 0;
33036251Sbostic ++ch_block;
33135209Sbostic }
33236251Sbostic return(c);
33335209Sbostic }
33435209Sbostic
33535209Sbostic /*
33635209Sbostic * Pre-decrement the read pointer and get the new current char.
33735209Sbostic */
ch_back_get()33835209Sbostic ch_back_get()
33935209Sbostic {
34036251Sbostic if (--ch_offset < 0) {
34136251Sbostic if (ch_block <= 0 || (ispipe && !buffered(ch_block-1))) {
34235209Sbostic ch_offset = 0;
34336251Sbostic return(EOI);
34435209Sbostic }
34535209Sbostic ch_offset = BUFSIZ - 1;
34635209Sbostic ch_block--;
34735209Sbostic }
34836251Sbostic return(ch_get());
34935209Sbostic }
35035209Sbostic
35135209Sbostic /*
35235209Sbostic * Allocate buffers.
35335209Sbostic * Caller wants us to have a total of at least want_nbufs buffers.
35435209Sbostic * keep==1 means keep the data in the current buffers;
35535209Sbostic * otherwise discard the old data.
35635209Sbostic */
ch_init(want_nbufs,keep)35735209Sbostic ch_init(want_nbufs, keep)
35835209Sbostic int want_nbufs;
35935209Sbostic int keep;
36035209Sbostic {
36135209Sbostic register struct buf *bp;
36235209Sbostic char message[80];
36335209Sbostic
36435209Sbostic cbufs = nbufs;
36536251Sbostic if (nbufs < want_nbufs && ch_addbuf(want_nbufs - nbufs)) {
36635209Sbostic /*
36735209Sbostic * Cannot allocate enough buffers.
36835209Sbostic * If we don't have ANY, then quit.
36935209Sbostic * Otherwise, just report the error and return.
37035209Sbostic */
37135232Sbostic (void)sprintf(message, "cannot allocate %d buffers",
37236251Sbostic want_nbufs - nbufs);
37335209Sbostic error(message);
37435209Sbostic if (nbufs == 0)
37535209Sbostic quit();
37635209Sbostic return;
37735209Sbostic }
37835209Sbostic
37935209Sbostic if (keep)
38035209Sbostic return;
38135209Sbostic
38235209Sbostic /*
38335209Sbostic * We don't want to keep the old data,
38435209Sbostic * so initialize all the buffers now.
38535209Sbostic */
38635209Sbostic for (bp = buf_head; bp != END_OF_CHAIN; bp = bp->next)
38735209Sbostic bp->block = (long)(-1);
38836251Sbostic last_piped_pos = (off_t)0;
38935209Sbostic ch_fsize = NULL_POSITION;
39036251Sbostic (void)ch_seek((off_t)0);
39135209Sbostic }
39235209Sbostic
39335209Sbostic /*
39435209Sbostic * Allocate some new buffers.
39535209Sbostic * The buffers are added to the tail of the buffer chain.
39635209Sbostic */
ch_addbuf(nnew)39735209Sbostic ch_addbuf(nnew)
39835209Sbostic int nnew;
39935209Sbostic {
40035209Sbostic register struct buf *bp;
40135209Sbostic register struct buf *newbufs;
40236251Sbostic char *calloc();
40335209Sbostic
40435209Sbostic /*
40535209Sbostic * We don't have enough buffers.
40635209Sbostic * Allocate some new ones.
40735209Sbostic */
40836251Sbostic newbufs = (struct buf *)calloc((u_int)nnew, sizeof(struct buf));
40935209Sbostic if (newbufs == NULL)
41036251Sbostic return(1);
41135209Sbostic
41235209Sbostic /*
41335209Sbostic * Initialize the new buffers and link them together.
41435209Sbostic * Link them all onto the tail of the buffer list.
41535209Sbostic */
41635209Sbostic nbufs += nnew;
41735209Sbostic cbufs = nbufs;
41836251Sbostic for (bp = &newbufs[0]; bp < &newbufs[nnew]; bp++) {
41935209Sbostic bp->next = bp + 1;
42035209Sbostic bp->prev = bp - 1;
42135209Sbostic bp->block = (long)(-1);
42235209Sbostic }
42335209Sbostic newbufs[nnew-1].next = END_OF_CHAIN;
42435209Sbostic newbufs[0].prev = buf_tail;
42535209Sbostic buf_tail->next = &newbufs[0];
42635209Sbostic buf_tail = &newbufs[nnew-1];
42736251Sbostic return(0);
42835209Sbostic }
429