1 /* $Id: term.c,v 1.84 2014/04/23 21:06:33 schwarze Exp $ */ 2 /* 3 * Copyright (c) 2008, 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv> 4 * Copyright (c) 2010-2014 Ingo Schwarze <schwarze@openbsd.org> 5 * 6 * Permission to use, copy, modify, and distribute this software for any 7 * purpose with or without fee is hereby granted, provided that the above 8 * copyright notice and this permission notice appear in all copies. 9 * 10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 17 */ 18 #include <sys/types.h> 19 20 #include <assert.h> 21 #include <ctype.h> 22 #include <stdio.h> 23 #include <stdlib.h> 24 #include <string.h> 25 26 #include "mandoc.h" 27 #include "mandoc_aux.h" 28 #include "out.h" 29 #include "term.h" 30 #include "main.h" 31 32 static size_t cond_width(const struct termp *, int, int *); 33 static void adjbuf(struct termp *p, size_t); 34 static void bufferc(struct termp *, char); 35 static void encode(struct termp *, const char *, size_t); 36 static void encode1(struct termp *, int); 37 38 39 void 40 term_free(struct termp *p) 41 { 42 43 if (p->buf) 44 free(p->buf); 45 if (p->symtab) 46 mchars_free(p->symtab); 47 48 free(p); 49 } 50 51 void 52 term_begin(struct termp *p, term_margin head, 53 term_margin foot, const void *arg) 54 { 55 56 p->headf = head; 57 p->footf = foot; 58 p->argf = arg; 59 (*p->begin)(p); 60 } 61 62 void 63 term_end(struct termp *p) 64 { 65 66 (*p->end)(p); 67 } 68 69 /* 70 * Flush a chunk of text. By default, break the output line each time 71 * the right margin is reached, and continue output on the next line 72 * at the same offset as the chunk itself. By default, also break the 73 * output line at the end of the chunk. 74 * The following flags may be specified: 75 * 76 * - TERMP_NOBREAK: Do not break the output line at the right margin, 77 * but only at the max right margin. Also, do not break the output 78 * line at the end of the chunk, such that the next call can pad to 79 * the next column. However, if less than p->trailspace blanks, 80 * which can be 0, 1, or 2, remain to the right margin, the line 81 * will be broken. 82 * - TERMP_BRIND: If the chunk does not fit and the output line has 83 * to be broken, start the next line at the right margin instead 84 * of at the offset. Used together with TERMP_NOBREAK for the tags 85 * in various kinds of tagged lists. 86 * - TERMP_DANGLE: Do not break the output line at the right margin, 87 * append the next chunk after it even if this one is too long. 88 * To be used together with TERMP_NOBREAK. 89 * - TERMP_HANG: Like TERMP_DANGLE, and also suppress padding before 90 * the next chunk if this column is not full. 91 */ 92 void 93 term_flushln(struct termp *p) 94 { 95 size_t i; /* current input position in p->buf */ 96 int ntab; /* number of tabs to prepend */ 97 size_t vis; /* current visual position on output */ 98 size_t vbl; /* number of blanks to prepend to output */ 99 size_t vend; /* end of word visual position on output */ 100 size_t bp; /* visual right border position */ 101 size_t dv; /* temporary for visual pos calculations */ 102 size_t j; /* temporary loop index for p->buf */ 103 size_t jhy; /* last hyph before overflow w/r/t j */ 104 size_t maxvis; /* output position of visible boundary */ 105 size_t mmax; /* used in calculating bp */ 106 107 /* 108 * First, establish the maximum columns of "visible" content. 109 * This is usually the difference between the right-margin and 110 * an indentation, but can be, for tagged lists or columns, a 111 * small set of values. 112 * 113 * The following unsigned-signed subtractions look strange, 114 * but they are actually correct. If the int p->overstep 115 * is negative, it gets sign extended. Subtracting that 116 * very large size_t effectively adds a small number to dv. 117 */ 118 assert (p->rmargin >= p->offset); 119 dv = p->rmargin - p->offset; 120 maxvis = (int)dv > p->overstep ? dv - (size_t)p->overstep : 0; 121 dv = p->maxrmargin - p->offset; 122 mmax = (int)dv > p->overstep ? dv - (size_t)p->overstep : 0; 123 124 bp = TERMP_NOBREAK & p->flags ? mmax : maxvis; 125 126 /* 127 * Calculate the required amount of padding. 128 */ 129 vbl = p->offset + p->overstep > p->viscol ? 130 p->offset + p->overstep - p->viscol : 0; 131 132 vis = vend = 0; 133 i = 0; 134 135 while (i < p->col) { 136 /* 137 * Handle literal tab characters: collapse all 138 * subsequent tabs into a single huge set of spaces. 139 */ 140 ntab = 0; 141 while (i < p->col && '\t' == p->buf[i]) { 142 vend = (vis / p->tabwidth + 1) * p->tabwidth; 143 vbl += vend - vis; 144 vis = vend; 145 ntab++; 146 i++; 147 } 148 149 /* 150 * Count up visible word characters. Control sequences 151 * (starting with the CSI) aren't counted. A space 152 * generates a non-printing word, which is valid (the 153 * space is printed according to regular spacing rules). 154 */ 155 156 for (j = i, jhy = 0; j < p->col; j++) { 157 if (' ' == p->buf[j] || '\t' == p->buf[j]) 158 break; 159 160 /* Back over the the last printed character. */ 161 if (8 == p->buf[j]) { 162 assert(j); 163 vend -= (*p->width)(p, p->buf[j - 1]); 164 continue; 165 } 166 167 /* Regular word. */ 168 /* Break at the hyphen point if we overrun. */ 169 if (vend > vis && vend < bp && 170 (ASCII_HYPH == p->buf[j] || 171 ASCII_BREAK == p->buf[j])) 172 jhy = j; 173 174 /* 175 * Hyphenation now decided, put back a real 176 * hyphen such that we get the correct width. 177 */ 178 if (ASCII_HYPH == p->buf[j]) 179 p->buf[j] = '-'; 180 181 vend += (*p->width)(p, p->buf[j]); 182 } 183 184 /* 185 * Find out whether we would exceed the right margin. 186 * If so, break to the next line. 187 */ 188 if (vend > bp && 0 == jhy && vis > 0) { 189 vend -= vis; 190 (*p->endline)(p); 191 p->viscol = 0; 192 if (TERMP_BRIND & p->flags) { 193 vbl = p->rmargin; 194 vend += p->rmargin - p->offset; 195 } else 196 vbl = p->offset; 197 198 /* use pending tabs on the new line */ 199 200 if (0 < ntab) 201 vbl += ntab * p->tabwidth; 202 203 /* 204 * Remove the p->overstep width. 205 * Again, if p->overstep is negative, 206 * sign extension does the right thing. 207 */ 208 209 bp += (size_t)p->overstep; 210 p->overstep = 0; 211 } 212 213 /* Write out the [remaining] word. */ 214 for ( ; i < p->col; i++) { 215 if (vend > bp && jhy > 0 && i > jhy) 216 break; 217 if ('\t' == p->buf[i]) 218 break; 219 if (' ' == p->buf[i]) { 220 j = i; 221 while (' ' == p->buf[i]) 222 i++; 223 dv = (i - j) * (*p->width)(p, ' '); 224 vbl += dv; 225 vend += dv; 226 break; 227 } 228 if (ASCII_NBRSP == p->buf[i]) { 229 vbl += (*p->width)(p, ' '); 230 continue; 231 } 232 if (ASCII_BREAK == p->buf[i]) 233 continue; 234 235 /* 236 * Now we definitely know there will be 237 * printable characters to output, 238 * so write preceding white space now. 239 */ 240 if (vbl) { 241 (*p->advance)(p, vbl); 242 p->viscol += vbl; 243 vbl = 0; 244 } 245 246 (*p->letter)(p, p->buf[i]); 247 if (8 == p->buf[i]) 248 p->viscol -= (*p->width)(p, p->buf[i-1]); 249 else 250 p->viscol += (*p->width)(p, p->buf[i]); 251 } 252 vis = vend; 253 } 254 255 /* 256 * If there was trailing white space, it was not printed; 257 * so reset the cursor position accordingly. 258 */ 259 if (vis) 260 vis -= vbl; 261 262 p->col = 0; 263 p->overstep = 0; 264 265 if ( ! (TERMP_NOBREAK & p->flags)) { 266 p->viscol = 0; 267 (*p->endline)(p); 268 return; 269 } 270 271 if (TERMP_HANG & p->flags) { 272 p->overstep = (int)(vis - maxvis + 273 p->trailspace * (*p->width)(p, ' ')); 274 275 /* 276 * If we have overstepped the margin, temporarily move 277 * it to the right and flag the rest of the line to be 278 * shorter. 279 * If there is a request to keep the columns together, 280 * allow negative overstep when the column is not full. 281 */ 282 if (p->trailspace && p->overstep < 0) 283 p->overstep = 0; 284 return; 285 286 } else if (TERMP_DANGLE & p->flags) 287 return; 288 289 /* If the column was overrun, break the line. */ 290 if (maxvis < vis + p->trailspace * (*p->width)(p, ' ')) { 291 (*p->endline)(p); 292 p->viscol = 0; 293 } 294 } 295 296 /* 297 * A newline only breaks an existing line; it won't assert vertical 298 * space. All data in the output buffer is flushed prior to the newline 299 * assertion. 300 */ 301 void 302 term_newln(struct termp *p) 303 { 304 305 p->flags |= TERMP_NOSPACE; 306 if (p->col || p->viscol) 307 term_flushln(p); 308 } 309 310 /* 311 * Asserts a vertical space (a full, empty line-break between lines). 312 * Note that if used twice, this will cause two blank spaces and so on. 313 * All data in the output buffer is flushed prior to the newline 314 * assertion. 315 */ 316 void 317 term_vspace(struct termp *p) 318 { 319 320 term_newln(p); 321 p->viscol = 0; 322 if (0 < p->skipvsp) 323 p->skipvsp--; 324 else 325 (*p->endline)(p); 326 } 327 328 void 329 term_fontlast(struct termp *p) 330 { 331 enum termfont f; 332 333 f = p->fontl; 334 p->fontl = p->fontq[p->fonti]; 335 p->fontq[p->fonti] = f; 336 } 337 338 void 339 term_fontrepl(struct termp *p, enum termfont f) 340 { 341 342 p->fontl = p->fontq[p->fonti]; 343 p->fontq[p->fonti] = f; 344 } 345 346 void 347 term_fontpush(struct termp *p, enum termfont f) 348 { 349 350 assert(p->fonti + 1 < 10); 351 p->fontl = p->fontq[p->fonti]; 352 p->fontq[++p->fonti] = f; 353 } 354 355 const void * 356 term_fontq(struct termp *p) 357 { 358 359 return(&p->fontq[p->fonti]); 360 } 361 362 enum termfont 363 term_fonttop(struct termp *p) 364 { 365 366 return(p->fontq[p->fonti]); 367 } 368 369 void 370 term_fontpopq(struct termp *p, const void *key) 371 { 372 373 while (p->fonti >= 0 && key < (void *)(p->fontq + p->fonti)) 374 p->fonti--; 375 assert(p->fonti >= 0); 376 } 377 378 void 379 term_fontpop(struct termp *p) 380 { 381 382 assert(p->fonti); 383 p->fonti--; 384 } 385 386 /* 387 * Handle pwords, partial words, which may be either a single word or a 388 * phrase that cannot be broken down (such as a literal string). This 389 * handles word styling. 390 */ 391 void 392 term_word(struct termp *p, const char *word) 393 { 394 const char nbrsp[2] = { ASCII_NBRSP, 0 }; 395 const char *seq, *cp; 396 char c; 397 int sz, uc; 398 size_t ssz; 399 enum mandoc_esc esc; 400 401 if ( ! (TERMP_NOSPACE & p->flags)) { 402 if ( ! (TERMP_KEEP & p->flags)) { 403 bufferc(p, ' '); 404 if (TERMP_SENTENCE & p->flags) 405 bufferc(p, ' '); 406 } else 407 bufferc(p, ASCII_NBRSP); 408 } 409 if (TERMP_PREKEEP & p->flags) 410 p->flags |= TERMP_KEEP; 411 412 if ( ! (p->flags & TERMP_NONOSPACE)) 413 p->flags &= ~TERMP_NOSPACE; 414 else 415 p->flags |= TERMP_NOSPACE; 416 417 p->flags &= ~TERMP_SENTENCE; 418 419 while ('\0' != *word) { 420 if ('\\' != *word) { 421 if (TERMP_SKIPCHAR & p->flags) { 422 p->flags &= ~TERMP_SKIPCHAR; 423 word++; 424 continue; 425 } 426 if (TERMP_NBRWORD & p->flags) { 427 if (' ' == *word) { 428 encode(p, nbrsp, 1); 429 word++; 430 continue; 431 } 432 ssz = strcspn(word, "\\ "); 433 } else 434 ssz = strcspn(word, "\\"); 435 encode(p, word, ssz); 436 word += (int)ssz; 437 continue; 438 } 439 440 word++; 441 esc = mandoc_escape(&word, &seq, &sz); 442 if (ESCAPE_ERROR == esc) 443 break; 444 445 if (TERMENC_ASCII != p->enc) 446 switch (esc) { 447 case ESCAPE_UNICODE: 448 uc = mchars_num2uc(seq + 1, sz - 1); 449 if ('\0' == uc) 450 break; 451 encode1(p, uc); 452 continue; 453 case ESCAPE_SPECIAL: 454 uc = mchars_spec2cp(p->symtab, seq, sz); 455 if (uc <= 0) 456 break; 457 encode1(p, uc); 458 continue; 459 default: 460 break; 461 } 462 463 switch (esc) { 464 case ESCAPE_UNICODE: 465 encode1(p, '?'); 466 break; 467 case ESCAPE_NUMBERED: 468 c = mchars_num2char(seq, sz); 469 if ('\0' != c) 470 encode(p, &c, 1); 471 break; 472 case ESCAPE_SPECIAL: 473 cp = mchars_spec2str(p->symtab, seq, sz, &ssz); 474 if (NULL != cp) 475 encode(p, cp, ssz); 476 else if (1 == ssz) 477 encode(p, seq, sz); 478 break; 479 case ESCAPE_FONTBOLD: 480 term_fontrepl(p, TERMFONT_BOLD); 481 break; 482 case ESCAPE_FONTITALIC: 483 term_fontrepl(p, TERMFONT_UNDER); 484 break; 485 case ESCAPE_FONTBI: 486 term_fontrepl(p, TERMFONT_BI); 487 break; 488 case ESCAPE_FONT: 489 /* FALLTHROUGH */ 490 case ESCAPE_FONTROMAN: 491 term_fontrepl(p, TERMFONT_NONE); 492 break; 493 case ESCAPE_FONTPREV: 494 term_fontlast(p); 495 break; 496 case ESCAPE_NOSPACE: 497 if (TERMP_SKIPCHAR & p->flags) 498 p->flags &= ~TERMP_SKIPCHAR; 499 else if ('\0' == *word) 500 p->flags |= TERMP_NOSPACE; 501 break; 502 case ESCAPE_SKIPCHAR: 503 p->flags |= TERMP_SKIPCHAR; 504 break; 505 default: 506 break; 507 } 508 } 509 p->flags &= ~TERMP_NBRWORD; 510 } 511 512 static void 513 adjbuf(struct termp *p, size_t sz) 514 { 515 516 if (0 == p->maxcols) 517 p->maxcols = 1024; 518 while (sz >= p->maxcols) 519 p->maxcols <<= 2; 520 521 p->buf = mandoc_reallocarray(p->buf, p->maxcols, sizeof(int)); 522 } 523 524 static void 525 bufferc(struct termp *p, char c) 526 { 527 528 if (p->col + 1 >= p->maxcols) 529 adjbuf(p, p->col + 1); 530 531 p->buf[p->col++] = c; 532 } 533 534 /* 535 * See encode(). 536 * Do this for a single (probably unicode) value. 537 * Does not check for non-decorated glyphs. 538 */ 539 static void 540 encode1(struct termp *p, int c) 541 { 542 enum termfont f; 543 544 if (TERMP_SKIPCHAR & p->flags) { 545 p->flags &= ~TERMP_SKIPCHAR; 546 return; 547 } 548 549 if (p->col + 6 >= p->maxcols) 550 adjbuf(p, p->col + 6); 551 552 f = term_fonttop(p); 553 554 if (TERMFONT_UNDER == f || TERMFONT_BI == f) { 555 p->buf[p->col++] = '_'; 556 p->buf[p->col++] = 8; 557 } 558 if (TERMFONT_BOLD == f || TERMFONT_BI == f) { 559 if (ASCII_HYPH == c) 560 p->buf[p->col++] = '-'; 561 else 562 p->buf[p->col++] = c; 563 p->buf[p->col++] = 8; 564 } 565 p->buf[p->col++] = c; 566 } 567 568 static void 569 encode(struct termp *p, const char *word, size_t sz) 570 { 571 size_t i; 572 573 if (TERMP_SKIPCHAR & p->flags) { 574 p->flags &= ~TERMP_SKIPCHAR; 575 return; 576 } 577 578 /* 579 * Encode and buffer a string of characters. If the current 580 * font mode is unset, buffer directly, else encode then buffer 581 * character by character. 582 */ 583 584 if (TERMFONT_NONE == term_fonttop(p)) { 585 if (p->col + sz >= p->maxcols) 586 adjbuf(p, p->col + sz); 587 for (i = 0; i < sz; i++) 588 p->buf[p->col++] = word[i]; 589 return; 590 } 591 592 /* Pre-buffer, assuming worst-case. */ 593 594 if (p->col + 1 + (sz * 5) >= p->maxcols) 595 adjbuf(p, p->col + 1 + (sz * 5)); 596 597 for (i = 0; i < sz; i++) { 598 if (ASCII_HYPH == word[i] || 599 isgraph((unsigned char)word[i])) 600 encode1(p, word[i]); 601 else 602 p->buf[p->col++] = word[i]; 603 } 604 } 605 606 void 607 term_setwidth(struct termp *p, const char *wstr) 608 { 609 struct roffsu su; 610 size_t width; 611 int iop; 612 613 iop = 0; 614 width = 0; 615 if (NULL != wstr) { 616 switch (*wstr) { 617 case '+': 618 iop = 1; 619 wstr++; 620 break; 621 case '-': 622 iop = -1; 623 wstr++; 624 break; 625 default: 626 break; 627 } 628 if (a2roffsu(wstr, &su, SCALE_MAX)) 629 width = term_hspan(p, &su); 630 else 631 iop = 0; 632 } 633 (*p->setwidth)(p, iop, width); 634 } 635 636 size_t 637 term_len(const struct termp *p, size_t sz) 638 { 639 640 return((*p->width)(p, ' ') * sz); 641 } 642 643 static size_t 644 cond_width(const struct termp *p, int c, int *skip) 645 { 646 647 if (*skip) { 648 (*skip) = 0; 649 return(0); 650 } else 651 return((*p->width)(p, c)); 652 } 653 654 size_t 655 term_strlen(const struct termp *p, const char *cp) 656 { 657 size_t sz, rsz, i; 658 int ssz, skip, c; 659 const char *seq, *rhs; 660 enum mandoc_esc esc; 661 static const char rej[] = { '\\', ASCII_NBRSP, ASCII_HYPH, 662 ASCII_BREAK, '\0' }; 663 664 /* 665 * Account for escaped sequences within string length 666 * calculations. This follows the logic in term_word() as we 667 * must calculate the width of produced strings. 668 */ 669 670 sz = 0; 671 skip = 0; 672 while ('\0' != *cp) { 673 rsz = strcspn(cp, rej); 674 for (i = 0; i < rsz; i++) 675 sz += cond_width(p, *cp++, &skip); 676 677 switch (*cp) { 678 case '\\': 679 cp++; 680 esc = mandoc_escape(&cp, &seq, &ssz); 681 if (ESCAPE_ERROR == esc) 682 return(sz); 683 684 if (TERMENC_ASCII != p->enc) 685 switch (esc) { 686 case ESCAPE_UNICODE: 687 c = mchars_num2uc(seq + 1, 688 ssz - 1); 689 if ('\0' == c) 690 break; 691 sz += cond_width(p, c, &skip); 692 continue; 693 case ESCAPE_SPECIAL: 694 c = mchars_spec2cp(p->symtab, 695 seq, ssz); 696 if (c <= 0) 697 break; 698 sz += cond_width(p, c, &skip); 699 continue; 700 default: 701 break; 702 } 703 704 rhs = NULL; 705 706 switch (esc) { 707 case ESCAPE_UNICODE: 708 sz += cond_width(p, '?', &skip); 709 break; 710 case ESCAPE_NUMBERED: 711 c = mchars_num2char(seq, ssz); 712 if ('\0' != c) 713 sz += cond_width(p, c, &skip); 714 break; 715 case ESCAPE_SPECIAL: 716 rhs = mchars_spec2str(p->symtab, 717 seq, ssz, &rsz); 718 719 if (ssz != 1 || rhs) 720 break; 721 722 rhs = seq; 723 rsz = ssz; 724 break; 725 case ESCAPE_SKIPCHAR: 726 skip = 1; 727 break; 728 default: 729 break; 730 } 731 732 if (NULL == rhs) 733 break; 734 735 if (skip) { 736 skip = 0; 737 break; 738 } 739 740 for (i = 0; i < rsz; i++) 741 sz += (*p->width)(p, *rhs++); 742 break; 743 case ASCII_NBRSP: 744 sz += cond_width(p, ' ', &skip); 745 cp++; 746 break; 747 case ASCII_HYPH: 748 sz += cond_width(p, '-', &skip); 749 cp++; 750 /* FALLTHROUGH */ 751 case ASCII_BREAK: 752 break; 753 default: 754 break; 755 } 756 } 757 758 return(sz); 759 } 760 761 size_t 762 term_vspan(const struct termp *p, const struct roffsu *su) 763 { 764 double r; 765 766 switch (su->unit) { 767 case SCALE_CM: 768 r = su->scale * 2; 769 break; 770 case SCALE_IN: 771 r = su->scale * 6; 772 break; 773 case SCALE_PC: 774 r = su->scale; 775 break; 776 case SCALE_PT: 777 r = su->scale / 8; 778 break; 779 case SCALE_MM: 780 r = su->scale / 1000; 781 break; 782 case SCALE_VS: 783 r = su->scale; 784 break; 785 default: 786 r = su->scale - 1; 787 break; 788 } 789 790 if (r < 0.0) 791 r = 0.0; 792 return((size_t)r); 793 } 794 795 size_t 796 term_hspan(const struct termp *p, const struct roffsu *su) 797 { 798 double v; 799 800 v = ((*p->hspan)(p, su)); 801 if (v < 0.0) 802 v = 0.0; 803 return((size_t)v); 804 } 805