1 /* 2 * Copyright (C) 1984-2002 Mark Nudelman 3 * 4 * You may distribute under the terms of either the GNU General Public 5 * License or the Less License, as specified in the README file. 6 * 7 * For more information about less, or for information on how to 8 * contact the author, see the README file. 9 */ 10 11 12 /* 13 * Low level character input from the input file. 14 * We use these special purpose routines which optimize moving 15 * both forward and backward from the current read pointer. 16 */ 17 18 #include "less.h" 19 #if MSDOS_COMPILER==WIN32C 20 #include <errno.h> 21 #include <windows.h> 22 #endif 23 24 typedef POSITION BLOCKNUM; 25 26 public int ignore_eoi; 27 28 /* 29 * Pool of buffers holding the most recently used blocks of the input file. 30 * The buffer pool is kept as a doubly-linked circular list, 31 * in order from most- to least-recently used. 32 * The circular list is anchored by the file state "thisfile". 33 */ 34 #define LBUFSIZE 8192 35 struct buf { 36 struct buf *next, *prev; 37 struct buf *hnext, *hprev; 38 BLOCKNUM block; 39 unsigned int datasize; 40 unsigned char data[LBUFSIZE]; 41 }; 42 43 struct buflist { 44 /* -- Following members must match struct buf */ 45 struct buf *buf_next, *buf_prev; 46 struct buf *buf_hnext, *buf_hprev; 47 }; 48 49 /* 50 * The file state is maintained in a filestate structure. 51 * A pointer to the filestate is kept in the ifile structure. 52 */ 53 #define BUFHASH_SIZE 64 54 struct filestate { 55 struct buf *buf_next, *buf_prev; 56 struct buflist hashtbl[BUFHASH_SIZE]; 57 int file; 58 int flags; 59 POSITION fpos; 60 int nbufs; 61 BLOCKNUM block; 62 unsigned int offset; 63 POSITION fsize; 64 }; 65 66 #define ch_bufhead thisfile->buf_next 67 #define ch_buftail thisfile->buf_prev 68 #define ch_nbufs thisfile->nbufs 69 #define ch_block thisfile->block 70 #define ch_offset thisfile->offset 71 #define ch_fpos thisfile->fpos 72 #define ch_fsize thisfile->fsize 73 #define ch_flags thisfile->flags 74 #define ch_file thisfile->file 75 76 #define END_OF_CHAIN ((struct buf *)&thisfile->buf_next) 77 #define END_OF_HCHAIN(h) ((struct buf *)&thisfile->hashtbl[h]) 78 #define BUFHASH(blk) ((blk) & (BUFHASH_SIZE-1)) 79 80 #define FOR_BUFS_IN_CHAIN(h,bp) \ 81 for (bp = thisfile->hashtbl[h].buf_hnext; \ 82 bp != END_OF_HCHAIN(h); bp = bp->hnext) 83 84 #define HASH_RM(bp) \ 85 (bp)->hnext->hprev = (bp)->hprev; \ 86 (bp)->hprev->hnext = (bp)->hnext; 87 88 #define HASH_INS(bp,h) \ 89 (bp)->hnext = thisfile->hashtbl[h].buf_hnext; \ 90 (bp)->hprev = END_OF_HCHAIN(h); \ 91 thisfile->hashtbl[h].buf_hnext->hprev = (bp); \ 92 thisfile->hashtbl[h].buf_hnext = (bp); 93 94 static struct filestate *thisfile; 95 static int ch_ungotchar = -1; 96 static int maxbufs = -1; 97 98 extern int autobuf; 99 extern int sigs; 100 extern int secure; 101 extern IFILE curr_ifile; 102 #if LOGFILE 103 extern int logfile; 104 extern char *namelogfile; 105 #endif 106 107 static int ch_addbuf(); 108 109 110 /* 111 * Get the character pointed to by the read pointer. 112 * ch_get() is a macro which is more efficient to call 113 * than fch_get (the function), in the usual case 114 * that the block desired is at the head of the chain. 115 */ 116 #define ch_get() ((ch_block == ch_bufhead->block && \ 117 ch_offset < ch_bufhead->datasize) ? \ 118 ch_bufhead->data[ch_offset] : fch_get()) 119 int 120 fch_get() 121 { 122 register struct buf *bp; 123 register int n; 124 register int slept; 125 register int h; 126 POSITION pos; 127 POSITION len; 128 129 slept = FALSE; 130 131 /* 132 * Look for a buffer holding the desired block. 133 */ 134 h = BUFHASH(ch_block); 135 FOR_BUFS_IN_CHAIN(h, bp) 136 { 137 if (bp->block == ch_block) 138 { 139 if (ch_offset >= bp->datasize) 140 /* 141 * Need more data in this buffer. 142 */ 143 goto read_more; 144 goto found; 145 } 146 } 147 /* 148 * Block is not in a buffer. 149 * Take the least recently used buffer 150 * and read the desired block into it. 151 * If the LRU buffer has data in it, 152 * then maybe allocate a new buffer. 153 */ 154 if (ch_buftail == END_OF_CHAIN || ch_buftail->block != -1) 155 { 156 /* 157 * There is no empty buffer to use. 158 * Allocate a new buffer if: 159 * 1. We can't seek on this file and -b is not in effect; or 160 * 2. We haven't allocated the max buffers for this file yet. 161 */ 162 if ((autobuf && !(ch_flags & CH_CANSEEK)) || 163 (maxbufs < 0 || ch_nbufs < maxbufs)) 164 if (ch_addbuf()) 165 /* 166 * Allocation failed: turn off autobuf. 167 */ 168 autobuf = OPT_OFF; 169 } 170 bp = ch_buftail; 171 HASH_RM(bp); /* Remove from old hash chain. */ 172 bp->block = ch_block; 173 bp->datasize = 0; 174 HASH_INS(bp, h); /* Insert into new hash chain. */ 175 176 read_more: 177 pos = (ch_block * LBUFSIZE) + bp->datasize; 178 if ((len = ch_length()) != NULL_POSITION && pos >= len) 179 /* 180 * At end of file. 181 */ 182 return (EOI); 183 184 if (pos != ch_fpos) 185 { 186 /* 187 * Not at the correct position: must seek. 188 * If input is a pipe, we're in trouble (can't seek on a pipe). 189 * Some data has been lost: just return "?". 190 */ 191 if (!(ch_flags & CH_CANSEEK)) 192 return ('?'); 193 if (lseek(ch_file, (off_t)pos, SEEK_SET) == BAD_LSEEK) 194 { 195 error("seek error", NULL_PARG); 196 clear_eol(); 197 return (EOI); 198 } 199 ch_fpos = pos; 200 } 201 202 /* 203 * Read the block. 204 * If we read less than a full block, that's ok. 205 * We use partial block and pick up the rest next time. 206 */ 207 if (ch_ungotchar != -1) 208 { 209 bp->data[bp->datasize] = ch_ungotchar; 210 n = 1; 211 ch_ungotchar = -1; 212 } else 213 { 214 n = iread(ch_file, &bp->data[bp->datasize], 215 (unsigned int)(LBUFSIZE - bp->datasize)); 216 } 217 218 if (n == READ_INTR) 219 return (EOI); 220 if (n < 0) 221 { 222 #if MSDOS_COMPILER==WIN32C 223 if (errno != EPIPE) 224 #endif 225 { 226 error("read error", NULL_PARG); 227 clear_eol(); 228 } 229 n = 0; 230 } 231 232 #if LOGFILE 233 /* 234 * If we have a log file, write the new data to it. 235 */ 236 if (!secure && logfile >= 0 && n > 0) 237 write(logfile, (char *) &bp->data[bp->datasize], n); 238 #endif 239 240 ch_fpos += n; 241 bp->datasize += n; 242 243 /* 244 * If we have read to end of file, set ch_fsize to indicate 245 * the position of the end of file. 246 */ 247 if (n == 0) 248 { 249 ch_fsize = pos; 250 if (ignore_eoi) 251 { 252 /* 253 * We are ignoring EOF. 254 * Wait a while, then try again. 255 */ 256 if (!slept) 257 { 258 PARG parg; 259 parg.p_string = wait_message(); 260 ierror("%s", &parg); 261 } 262 #if !MSDOS_COMPILER 263 sleep(1); 264 #else 265 #if MSDOS_COMPILER==WIN32C 266 Sleep(1000); 267 #endif 268 #endif 269 slept = TRUE; 270 } 271 if (sigs) 272 return (EOI); 273 } 274 275 found: 276 if (ch_bufhead != bp) 277 { 278 /* 279 * Move the buffer to the head of the buffer chain. 280 * This orders the buffer chain, most- to least-recently used. 281 */ 282 bp->next->prev = bp->prev; 283 bp->prev->next = bp->next; 284 bp->next = ch_bufhead; 285 bp->prev = END_OF_CHAIN; 286 ch_bufhead->prev = bp; 287 ch_bufhead = bp; 288 289 /* 290 * Move to head of hash chain too. 291 */ 292 HASH_RM(bp); 293 HASH_INS(bp, h); 294 } 295 296 if (ch_offset >= bp->datasize) 297 /* 298 * After all that, we still don't have enough data. 299 * Go back and try again. 300 */ 301 goto read_more; 302 303 return (bp->data[ch_offset]); 304 } 305 306 /* 307 * ch_ungetchar is a rather kludgy and limited way to push 308 * a single char onto an input file descriptor. 309 */ 310 public void 311 ch_ungetchar(c) 312 int c; 313 { 314 if (c != -1 && ch_ungotchar != -1) 315 error("ch_ungetchar overrun", NULL_PARG); 316 ch_ungotchar = c; 317 } 318 319 #if LOGFILE 320 /* 321 * Close the logfile. 322 * If we haven't read all of standard input into it, do that now. 323 */ 324 public void 325 end_logfile() 326 { 327 static int tried = FALSE; 328 329 if (logfile < 0) 330 return; 331 if (!tried && ch_fsize == NULL_POSITION) 332 { 333 tried = TRUE; 334 ierror("Finishing logfile", NULL_PARG); 335 while (ch_forw_get() != EOI) 336 if (ABORT_SIGS()) 337 break; 338 } 339 close(logfile); 340 logfile = -1; 341 namelogfile = NULL; 342 } 343 344 /* 345 * Start a log file AFTER less has already been running. 346 * Invoked from the - command; see toggle_option(). 347 * Write all the existing buffered data to the log file. 348 */ 349 public void 350 sync_logfile() 351 { 352 register struct buf *bp; 353 int warned = FALSE; 354 BLOCKNUM block; 355 BLOCKNUM nblocks; 356 357 nblocks = (ch_fpos + LBUFSIZE - 1) / LBUFSIZE; 358 for (block = 0; block < nblocks; block++) 359 { 360 for (bp = ch_bufhead; ; bp = bp->next) 361 { 362 if (bp == END_OF_CHAIN) 363 { 364 if (!warned) 365 { 366 error("Warning: log file is incomplete", 367 NULL_PARG); 368 warned = TRUE; 369 } 370 break; 371 } 372 if (bp->block == block) 373 { 374 write(logfile, (char *) bp->data, bp->datasize); 375 break; 376 } 377 } 378 } 379 } 380 381 #endif 382 383 /* 384 * Determine if a specific block is currently in one of the buffers. 385 */ 386 static int 387 buffered(block) 388 BLOCKNUM block; 389 { 390 register struct buf *bp; 391 register int h; 392 393 h = BUFHASH(block); 394 FOR_BUFS_IN_CHAIN(h, bp) 395 { 396 if (bp->block == block) 397 return (TRUE); 398 } 399 return (FALSE); 400 } 401 402 /* 403 * Seek to a specified position in the file. 404 * Return 0 if successful, non-zero if can't seek there. 405 */ 406 public int 407 ch_seek(pos) 408 register POSITION pos; 409 { 410 BLOCKNUM new_block; 411 POSITION len; 412 413 len = ch_length(); 414 if (pos < ch_zero() || (len != NULL_POSITION && pos > len)) 415 return (1); 416 417 new_block = pos / LBUFSIZE; 418 if (!(ch_flags & CH_CANSEEK) && pos != ch_fpos && !buffered(new_block)) 419 { 420 if (ch_fpos > pos) 421 return (1); 422 while (ch_fpos < pos) 423 { 424 if (ch_forw_get() == EOI) 425 return (1); 426 if (ABORT_SIGS()) 427 return (1); 428 } 429 return (0); 430 } 431 /* 432 * Set read pointer. 433 */ 434 ch_block = new_block; 435 ch_offset = pos % LBUFSIZE; 436 return (0); 437 } 438 439 /* 440 * Seek to the end of the file. 441 */ 442 public int 443 ch_end_seek() 444 { 445 POSITION len; 446 447 if (ch_flags & CH_CANSEEK) 448 ch_fsize = filesize(ch_file); 449 450 len = ch_length(); 451 if (len != NULL_POSITION) 452 return (ch_seek(len)); 453 454 /* 455 * Do it the slow way: read till end of data. 456 */ 457 while (ch_forw_get() != EOI) 458 if (ABORT_SIGS()) 459 return (1); 460 return (0); 461 } 462 463 /* 464 * Seek to the beginning of the file, or as close to it as we can get. 465 * We may not be able to seek there if input is a pipe and the 466 * beginning of the pipe is no longer buffered. 467 */ 468 public int 469 ch_beg_seek() 470 { 471 register struct buf *bp, *firstbp; 472 473 /* 474 * Try a plain ch_seek first. 475 */ 476 if (ch_seek(ch_zero()) == 0) 477 return (0); 478 479 /* 480 * Can't get to position 0. 481 * Look thru the buffers for the one closest to position 0. 482 */ 483 firstbp = bp = ch_bufhead; 484 if (bp == END_OF_CHAIN) 485 return (1); 486 while ((bp = bp->next) != END_OF_CHAIN) 487 if (bp->block < firstbp->block) 488 firstbp = bp; 489 ch_block = firstbp->block; 490 ch_offset = 0; 491 return (0); 492 } 493 494 /* 495 * Return the length of the file, if known. 496 */ 497 public POSITION 498 ch_length() 499 { 500 if (ignore_eoi) 501 return (NULL_POSITION); 502 return (ch_fsize); 503 } 504 505 /* 506 * Return the current position in the file. 507 */ 508 public POSITION 509 ch_tell() 510 { 511 return (ch_block * LBUFSIZE) + ch_offset; 512 } 513 514 /* 515 * Get the current char and post-increment the read pointer. 516 */ 517 public int 518 ch_forw_get() 519 { 520 register int c; 521 522 c = ch_get(); 523 if (c == EOI) 524 return (EOI); 525 if (ch_offset < LBUFSIZE-1) 526 ch_offset++; 527 else 528 { 529 ch_block ++; 530 ch_offset = 0; 531 } 532 return (c); 533 } 534 535 /* 536 * Pre-decrement the read pointer and get the new current char. 537 */ 538 public int 539 ch_back_get() 540 { 541 if (ch_offset > 0) 542 ch_offset --; 543 else 544 { 545 if (ch_block <= 0) 546 return (EOI); 547 if (!(ch_flags & CH_CANSEEK) && !buffered(ch_block-1)) 548 return (EOI); 549 ch_block--; 550 ch_offset = LBUFSIZE-1; 551 } 552 return (ch_get()); 553 } 554 555 /* 556 * Set max amount of buffer space. 557 * bufspace is in units of 1024 bytes. -1 mean no limit. 558 */ 559 public void 560 ch_setbufspace(bufspace) 561 int bufspace; 562 { 563 if (bufspace < 0) 564 maxbufs = -1; 565 else 566 { 567 maxbufs = ((bufspace * 1024) + LBUFSIZE-1) / LBUFSIZE; 568 if (maxbufs < 1) 569 maxbufs = 1; 570 } 571 } 572 573 /* 574 * Flush (discard) any saved file state, including buffer contents. 575 */ 576 public void 577 ch_flush() 578 { 579 register struct buf *bp; 580 581 if (!(ch_flags & CH_CANSEEK)) 582 { 583 /* 584 * If input is a pipe, we don't flush buffer contents, 585 * since the contents can't be recovered. 586 */ 587 ch_fsize = NULL_POSITION; 588 return; 589 } 590 591 /* 592 * Initialize all the buffers. 593 */ 594 for (bp = ch_bufhead; bp != END_OF_CHAIN; bp = bp->next) 595 bp->block = -1; 596 597 /* 598 * Figure out the size of the file, if we can. 599 */ 600 ch_fsize = filesize(ch_file); 601 602 /* 603 * Seek to a known position: the beginning of the file. 604 */ 605 ch_fpos = 0; 606 ch_block = 0; /* ch_fpos / LBUFSIZE; */ 607 ch_offset = 0; /* ch_fpos % LBUFSIZE; */ 608 609 #if 1 610 /* 611 * This is a kludge to workaround a Linux kernel bug: files in 612 * /proc have a size of 0 according to fstat() but have readable 613 * data. They are sometimes, but not always, seekable. 614 * Force them to be non-seekable here. 615 */ 616 if (ch_fsize == 0) 617 { 618 ch_fsize = NULL_POSITION; 619 ch_flags &= ~CH_CANSEEK; 620 } 621 #endif 622 623 if (lseek(ch_file, (off_t)0, SEEK_SET) == BAD_LSEEK) 624 { 625 /* 626 * Warning only; even if the seek fails for some reason, 627 * there's a good chance we're at the beginning anyway. 628 * {{ I think this is bogus reasoning. }} 629 */ 630 error("seek error to 0", NULL_PARG); 631 } 632 } 633 634 /* 635 * Allocate a new buffer. 636 * The buffer is added to the tail of the buffer chain. 637 */ 638 static int 639 ch_addbuf() 640 { 641 register struct buf *bp; 642 643 /* 644 * Allocate and initialize a new buffer and link it 645 * onto the tail of the buffer list. 646 */ 647 bp = (struct buf *) calloc(1, sizeof(struct buf)); 648 if (bp == NULL) 649 return (1); 650 ch_nbufs++; 651 bp->block = -1; 652 bp->next = END_OF_CHAIN; 653 bp->prev = ch_buftail; 654 ch_buftail->next = bp; 655 ch_buftail = bp; 656 HASH_INS(bp, 0); 657 return (0); 658 } 659 660 /* 661 * 662 */ 663 static void 664 init_hashtbl() 665 { 666 register int h; 667 668 for (h = 0; h < BUFHASH_SIZE; h++) 669 { 670 thisfile->hashtbl[h].buf_hnext = END_OF_HCHAIN(h); 671 thisfile->hashtbl[h].buf_hprev = END_OF_HCHAIN(h); 672 } 673 } 674 675 /* 676 * Delete all buffers for this file. 677 */ 678 static void 679 ch_delbufs() 680 { 681 register struct buf *bp; 682 683 while (ch_bufhead != END_OF_CHAIN) 684 { 685 bp = ch_bufhead; 686 bp->next->prev = bp->prev; 687 bp->prev->next = bp->next; 688 free(bp); 689 } 690 ch_nbufs = 0; 691 init_hashtbl(); 692 } 693 694 /* 695 * Is it possible to seek on a file descriptor? 696 */ 697 public int 698 seekable(f) 699 int f; 700 { 701 #if MSDOS_COMPILER 702 extern int fd0; 703 if (f == fd0 && !isatty(fd0)) 704 { 705 /* 706 * In MS-DOS, pipes are seekable. Check for 707 * standard input, and pretend it is not seekable. 708 */ 709 return (0); 710 } 711 #endif 712 return (lseek(f, (off_t)1, SEEK_SET) != BAD_LSEEK); 713 } 714 715 /* 716 * Initialize file state for a new file. 717 */ 718 public void 719 ch_init(f, flags) 720 int f; 721 int flags; 722 { 723 /* 724 * See if we already have a filestate for this file. 725 */ 726 thisfile = (struct filestate *) get_filestate(curr_ifile); 727 if (thisfile == NULL) 728 { 729 /* 730 * Allocate and initialize a new filestate. 731 */ 732 thisfile = (struct filestate *) 733 calloc(1, sizeof(struct filestate)); 734 thisfile->buf_next = thisfile->buf_prev = END_OF_CHAIN; 735 thisfile->nbufs = 0; 736 thisfile->flags = 0; 737 thisfile->fpos = 0; 738 thisfile->block = 0; 739 thisfile->offset = 0; 740 thisfile->file = -1; 741 thisfile->fsize = NULL_POSITION; 742 ch_flags = flags; 743 init_hashtbl(); 744 /* 745 * Try to seek; set CH_CANSEEK if it works. 746 */ 747 if ((flags & CH_CANSEEK) && !seekable(f)) 748 ch_flags &= ~CH_CANSEEK; 749 set_filestate(curr_ifile, (void *) thisfile); 750 } 751 if (thisfile->file == -1) 752 thisfile->file = f; 753 ch_flush(); 754 } 755 756 /* 757 * Close a filestate. 758 */ 759 public void 760 ch_close() 761 { 762 int keepstate = FALSE; 763 764 if (ch_flags & (CH_CANSEEK|CH_POPENED)) 765 { 766 /* 767 * We can seek or re-open, so we don't need to keep buffers. 768 */ 769 ch_delbufs(); 770 } else 771 keepstate = TRUE; 772 if (!(ch_flags & CH_KEEPOPEN)) 773 { 774 /* 775 * We don't need to keep the file descriptor open 776 * (because we can re-open it.) 777 * But don't really close it if it was opened via popen(), 778 * because pclose() wants to close it. 779 */ 780 if (!(ch_flags & CH_POPENED)) 781 close(ch_file); 782 ch_file = -1; 783 } else 784 keepstate = TRUE; 785 if (!keepstate) 786 { 787 /* 788 * We don't even need to keep the filestate structure. 789 */ 790 free(thisfile); 791 thisfile = NULL; 792 set_filestate(curr_ifile, (void *) NULL); 793 } 794 } 795 796 /* 797 * Return ch_flags for the current file. 798 */ 799 public int 800 ch_getflags() 801 { 802 return (ch_flags); 803 } 804 805 #if 0 806 public void 807 ch_dump(struct filestate *fs) 808 { 809 struct buf *bp; 810 unsigned char *s; 811 812 if (fs == NULL) 813 { 814 printf(" --no filestate\n"); 815 return; 816 } 817 printf(" file %d, flags %x, fpos %x, fsize %x, blk/off %x/%x\n", 818 fs->file, fs->flags, fs->fpos, 819 fs->fsize, fs->block, fs->offset); 820 printf(" %d bufs:\n", fs->nbufs); 821 for (bp = fs->buf_next; bp != (struct buf *)fs; bp = bp->next) 822 { 823 printf("%x: blk %x, size %x \"", 824 bp, bp->block, bp->datasize); 825 for (s = bp->data; s < bp->data + 30; s++) 826 if (*s >= ' ' && *s < 0x7F) 827 printf("%c", *s); 828 else 829 printf("."); 830 printf("\"\n"); 831 } 832 } 833 #endif 834