1 /* $OpenBSD: term.c,v 1.151 2022/12/26 19:16:02 jmc Exp $ */ 2 /* 3 * Copyright (c) 2010-2022 Ingo Schwarze <schwarze@openbsd.org> 4 * Copyright (c) 2008, 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv> 5 * 6 * Permission to use, copy, modify, and distribute this software for any 7 * purpose with or without fee is hereby granted, provided that the above 8 * copyright notice and this permission notice appear in all copies. 9 * 10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES 11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR 13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 17 */ 18 #include <sys/types.h> 19 20 #include <assert.h> 21 #include <ctype.h> 22 #include <stdint.h> 23 #include <stdio.h> 24 #include <stdlib.h> 25 #include <string.h> 26 27 #include "mandoc.h" 28 #include "mandoc_aux.h" 29 #include "out.h" 30 #include "term.h" 31 #include "main.h" 32 33 static size_t cond_width(const struct termp *, int, int *); 34 static void adjbuf(struct termp_col *, size_t); 35 static void bufferc(struct termp *, char); 36 static void encode(struct termp *, const char *, size_t); 37 static void encode1(struct termp *, int); 38 static void endline(struct termp *); 39 static void term_field(struct termp *, size_t, size_t); 40 static void term_fill(struct termp *, size_t *, size_t *, 41 size_t); 42 43 44 void 45 term_setcol(struct termp *p, size_t maxtcol) 46 { 47 if (maxtcol > p->maxtcol) { 48 p->tcols = mandoc_recallocarray(p->tcols, 49 p->maxtcol, maxtcol, sizeof(*p->tcols)); 50 p->maxtcol = maxtcol; 51 } 52 p->lasttcol = maxtcol - 1; 53 p->tcol = p->tcols; 54 } 55 56 void 57 term_free(struct termp *p) 58 { 59 term_tab_free(); 60 for (p->tcol = p->tcols; p->tcol < p->tcols + p->maxtcol; p->tcol++) 61 free(p->tcol->buf); 62 free(p->tcols); 63 free(p->fontq); 64 free(p); 65 } 66 67 void 68 term_begin(struct termp *p, term_margin head, 69 term_margin foot, const struct roff_meta *arg) 70 { 71 72 p->headf = head; 73 p->footf = foot; 74 p->argf = arg; 75 (*p->begin)(p); 76 } 77 78 void 79 term_end(struct termp *p) 80 { 81 82 (*p->end)(p); 83 } 84 85 /* 86 * Flush a chunk of text. By default, break the output line each time 87 * the right margin is reached, and continue output on the next line 88 * at the same offset as the chunk itself. By default, also break the 89 * output line at the end of the chunk. There are many flags modifying 90 * this behaviour, see the comments in the body of the function. 91 */ 92 void 93 term_flushln(struct termp *p) 94 { 95 size_t vbl; /* Number of blanks to prepend to the output. */ 96 size_t vbr; /* Actual visual position of the end of field. */ 97 size_t vfield; /* Desired visual field width. */ 98 size_t vtarget; /* Desired visual position of the right margin. */ 99 size_t ic; /* Character position in the input buffer. */ 100 size_t nbr; /* Number of characters to print in this field. */ 101 102 /* 103 * Normally, start writing at the left margin, but with the 104 * NOPAD flag, start writing at the current position instead. 105 */ 106 107 vbl = (p->flags & TERMP_NOPAD) || p->tcol->offset < p->viscol ? 108 0 : p->tcol->offset - p->viscol; 109 if (p->minbl && vbl < p->minbl) 110 vbl = p->minbl; 111 112 if ((p->flags & TERMP_MULTICOL) == 0) 113 p->tcol->col = 0; 114 115 /* Loop over output lines. */ 116 117 for (;;) { 118 vfield = p->tcol->rmargin > p->viscol + vbl ? 119 p->tcol->rmargin - p->viscol - vbl : 0; 120 121 /* 122 * Normally, break the line at the the right margin 123 * of the field, but with the NOBREAK flag, only 124 * break it at the max right margin of the screen, 125 * and with the BRNEVER flag, never break it at all. 126 */ 127 128 vtarget = (p->flags & TERMP_NOBREAK) == 0 ? vfield : 129 p->maxrmargin > p->viscol + vbl ? 130 p->maxrmargin - p->viscol - vbl : 0; 131 132 /* 133 * Figure out how much text will fit in the field. 134 * If there is whitespace only, print nothing. 135 */ 136 137 term_fill(p, &nbr, &vbr, 138 p->flags & TERMP_BRNEVER ? SIZE_MAX : vtarget); 139 if (nbr == 0) 140 break; 141 142 /* 143 * With the CENTER or RIGHT flag, increase the indentation 144 * to center the text between the left and right margins 145 * or to adjust it to the right margin, respectively. 146 */ 147 148 if (vbr < vtarget) { 149 if (p->flags & TERMP_CENTER) 150 vbl += (vtarget - vbr) / 2; 151 else if (p->flags & TERMP_RIGHT) 152 vbl += vtarget - vbr; 153 } 154 155 /* Finally, print the field content. */ 156 157 term_field(p, vbl, nbr); 158 if (vbr < vtarget) 159 p->tcol->taboff += vbr; 160 else 161 p->tcol->taboff += vtarget; 162 p->tcol->taboff += (*p->width)(p, ' '); 163 164 /* 165 * If there is no text left in the field, exit the loop. 166 * If the BRTRSP flag is set, consider trailing 167 * whitespace significant when deciding whether 168 * the field fits or not. 169 */ 170 171 for (ic = p->tcol->col; ic < p->tcol->lastcol; ic++) { 172 switch (p->tcol->buf[ic]) { 173 case '\t': 174 if (p->flags & TERMP_BRTRSP) 175 vbr = term_tab_next(vbr); 176 continue; 177 case ' ': 178 if (p->flags & TERMP_BRTRSP) 179 vbr += (*p->width)(p, ' '); 180 continue; 181 case '\n': 182 case ASCII_NBRZW: 183 case ASCII_BREAK: 184 case ASCII_TABREF: 185 continue; 186 default: 187 break; 188 } 189 break; 190 } 191 if (ic == p->tcol->lastcol) 192 break; 193 194 /* 195 * At the location of an automatic line break, input 196 * space characters are consumed by the line break. 197 */ 198 199 while (p->tcol->col < p->tcol->lastcol && 200 p->tcol->buf[p->tcol->col] == ' ') 201 p->tcol->col++; 202 203 /* 204 * In multi-column mode, leave the rest of the text 205 * in the buffer to be handled by a subsequent 206 * invocation, such that the other columns of the 207 * table can be handled first. 208 * In single-column mode, simply break the line. 209 */ 210 211 if (p->flags & TERMP_MULTICOL) 212 return; 213 214 endline(p); 215 216 /* 217 * Normally, start the next line at the same indentation 218 * as this one, but with the BRIND flag, start it at the 219 * right margin instead. This is used together with 220 * NOBREAK for the tags in various kinds of tagged lists. 221 */ 222 223 vbl = p->flags & TERMP_BRIND ? 224 p->tcol->rmargin : p->tcol->offset; 225 } 226 227 /* Reset output state in preparation for the next field. */ 228 229 p->col = p->tcol->col = p->tcol->lastcol = 0; 230 p->minbl = p->trailspace; 231 p->flags &= ~(TERMP_BACKAFTER | TERMP_BACKBEFORE | TERMP_NOPAD); 232 233 if (p->flags & TERMP_MULTICOL) 234 return; 235 236 /* 237 * The HANG flag means that the next field 238 * always follows on the same line. 239 * The NOBREAK flag means that the next field 240 * follows on the same line unless the field was overrun. 241 * Normally, break the line at the end of each field. 242 */ 243 244 if ((p->flags & TERMP_HANG) == 0 && 245 ((p->flags & TERMP_NOBREAK) == 0 || 246 vbr + term_len(p, p->trailspace) > vfield)) 247 endline(p); 248 } 249 250 /* 251 * Store the number of input characters to print in this field in *nbr 252 * and their total visual width to print in *vbr. 253 * If there is only whitespace in the field, both remain zero. 254 * The desired visual width of the field is provided by vtarget. 255 * If the first word is longer, the field will be overrun. 256 */ 257 static void 258 term_fill(struct termp *p, size_t *nbr, size_t *vbr, size_t vtarget) 259 { 260 size_t ic; /* Character position in the input buffer. */ 261 size_t vis; /* Visual position of the current character. */ 262 size_t vn; /* Visual position of the next character. */ 263 int breakline; /* Break at the end of this word. */ 264 int graph; /* Last character was non-blank. */ 265 int taboff; /* Temporary offset for literal tabs. */ 266 267 *nbr = *vbr = vis = 0; 268 breakline = graph = 0; 269 taboff = p->tcol->taboff; 270 for (ic = p->tcol->col; ic < p->tcol->lastcol; ic++) { 271 switch (p->tcol->buf[ic]) { 272 case '\b': /* Escape \o (overstrike) or backspace markup. */ 273 assert(ic > 0); 274 vis -= (*p->width)(p, p->tcol->buf[ic - 1]); 275 continue; 276 277 case ' ': 278 case ASCII_BREAK: /* Escape \: (breakpoint). */ 279 vn = vis; 280 if (p->tcol->buf[ic] == ' ') 281 vn += (*p->width)(p, ' '); 282 /* Can break at the end of a word. */ 283 if (breakline || vn > vtarget) 284 break; 285 if (graph) { 286 *nbr = ic; 287 *vbr = vis; 288 graph = 0; 289 } 290 vis = vn; 291 continue; 292 293 case '\n': /* Escape \p (break at the end of the word). */ 294 breakline = 1; 295 continue; 296 297 case ASCII_HYPH: /* Breakable hyphen. */ 298 graph = 1; 299 /* 300 * We are about to decide whether to break the 301 * line or not, so we no longer need this hyphen 302 * to be marked as breakable. Put back a real 303 * hyphen such that we get the correct width. 304 */ 305 p->tcol->buf[ic] = '-'; 306 vis += (*p->width)(p, '-'); 307 if (vis > vtarget) { 308 ic++; 309 break; 310 } 311 *nbr = ic + 1; 312 *vbr = vis; 313 continue; 314 315 case ASCII_TABREF: 316 taboff = -vis - (*p->width)(p, ' '); 317 continue; 318 319 default: 320 switch (p->tcol->buf[ic]) { 321 case '\t': 322 if (taboff < 0 && (size_t)-taboff > vis) 323 vis = 0; 324 else 325 vis += taboff; 326 vis = term_tab_next(vis); 327 vis -= taboff; 328 break; 329 case ASCII_NBRZW: /* Non-breakable zero-width. */ 330 break; 331 case ASCII_NBRSP: /* Non-breakable space. */ 332 p->tcol->buf[ic] = ' '; 333 /* FALLTHROUGH */ 334 default: /* Printable character. */ 335 vis += (*p->width)(p, p->tcol->buf[ic]); 336 break; 337 } 338 graph = 1; 339 if (vis > vtarget && *nbr > 0) 340 return; 341 continue; 342 } 343 break; 344 } 345 346 /* 347 * If the last word extends to the end of the field without any 348 * trailing whitespace, the loop could not check yet whether it 349 * can remain on this line. So do the check now. 350 */ 351 352 if (graph && (vis <= vtarget || *nbr == 0)) { 353 *nbr = ic; 354 *vbr = vis; 355 } 356 } 357 358 /* 359 * Print the contents of one field 360 * with an indentation of vbl visual columns, 361 * and an input string length of nbr characters. 362 */ 363 static void 364 term_field(struct termp *p, size_t vbl, size_t nbr) 365 { 366 size_t ic; /* Character position in the input buffer. */ 367 size_t vis; /* Visual position of the current character. */ 368 size_t vt; /* Visual position including tab offset. */ 369 size_t dv; /* Visual width of the current character. */ 370 int taboff; /* Temporary offset for literal tabs. */ 371 372 vis = 0; 373 taboff = p->tcol->taboff; 374 for (ic = p->tcol->col; ic < nbr; ic++) { 375 376 /* 377 * To avoid the printing of trailing whitespace, 378 * do not print whitespace right away, only count it. 379 */ 380 381 switch (p->tcol->buf[ic]) { 382 case '\n': 383 case ASCII_BREAK: 384 case ASCII_NBRZW: 385 continue; 386 case ASCII_TABREF: 387 taboff = -vis - (*p->width)(p, ' '); 388 continue; 389 case '\t': 390 case ' ': 391 case ASCII_NBRSP: 392 if (p->tcol->buf[ic] == '\t') { 393 if (taboff < 0 && (size_t)-taboff > vis) 394 vt = 0; 395 else 396 vt = vis + taboff; 397 dv = term_tab_next(vt) - vt; 398 } else 399 dv = (*p->width)(p, ' '); 400 vbl += dv; 401 vis += dv; 402 continue; 403 default: 404 break; 405 } 406 407 /* 408 * We found a non-blank character to print, 409 * so write preceding white space now. 410 */ 411 412 if (vbl > 0) { 413 (*p->advance)(p, vbl); 414 p->viscol += vbl; 415 vbl = 0; 416 } 417 418 /* Print the character and adjust the visual position. */ 419 420 (*p->letter)(p, p->tcol->buf[ic]); 421 if (p->tcol->buf[ic] == '\b') { 422 dv = (*p->width)(p, p->tcol->buf[ic - 1]); 423 p->viscol -= dv; 424 vis -= dv; 425 } else { 426 dv = (*p->width)(p, p->tcol->buf[ic]); 427 p->viscol += dv; 428 vis += dv; 429 } 430 } 431 p->tcol->col = nbr; 432 } 433 434 static void 435 endline(struct termp *p) 436 { 437 if ((p->flags & (TERMP_NEWMC | TERMP_ENDMC)) == TERMP_ENDMC) { 438 p->mc = NULL; 439 p->flags &= ~TERMP_ENDMC; 440 } 441 if (p->mc != NULL) { 442 if (p->viscol && p->maxrmargin >= p->viscol) 443 (*p->advance)(p, p->maxrmargin - p->viscol + 1); 444 p->flags |= TERMP_NOBUF | TERMP_NOSPACE; 445 term_word(p, p->mc); 446 p->flags &= ~(TERMP_NOBUF | TERMP_NEWMC); 447 } 448 p->viscol = 0; 449 p->minbl = 0; 450 (*p->endline)(p); 451 } 452 453 /* 454 * A newline only breaks an existing line; it won't assert vertical 455 * space. All data in the output buffer is flushed prior to the newline 456 * assertion. 457 */ 458 void 459 term_newln(struct termp *p) 460 { 461 p->flags |= TERMP_NOSPACE; 462 if (p->tcol->lastcol || p->viscol) 463 term_flushln(p); 464 p->tcol->taboff = 0; 465 } 466 467 /* 468 * Asserts a vertical space (a full, empty line-break between lines). 469 * Note that if used twice, this will cause two blank spaces and so on. 470 * All data in the output buffer is flushed prior to the newline 471 * assertion. 472 */ 473 void 474 term_vspace(struct termp *p) 475 { 476 477 term_newln(p); 478 p->viscol = 0; 479 p->minbl = 0; 480 if (0 < p->skipvsp) 481 p->skipvsp--; 482 else 483 (*p->endline)(p); 484 } 485 486 /* Swap current and previous font; for \fP and .ft P */ 487 void 488 term_fontlast(struct termp *p) 489 { 490 enum termfont f; 491 492 f = p->fontl; 493 p->fontl = p->fontq[p->fonti]; 494 p->fontq[p->fonti] = f; 495 } 496 497 /* Set font, save current, discard previous; for \f, .ft, .B etc. */ 498 void 499 term_fontrepl(struct termp *p, enum termfont f) 500 { 501 502 p->fontl = p->fontq[p->fonti]; 503 p->fontq[p->fonti] = f; 504 } 505 506 /* Set font, save previous. */ 507 void 508 term_fontpush(struct termp *p, enum termfont f) 509 { 510 511 p->fontl = p->fontq[p->fonti]; 512 if (++p->fonti == p->fontsz) { 513 p->fontsz += 8; 514 p->fontq = mandoc_reallocarray(p->fontq, 515 p->fontsz, sizeof(*p->fontq)); 516 } 517 p->fontq[p->fonti] = f; 518 } 519 520 /* Flush to make the saved pointer current again. */ 521 void 522 term_fontpopq(struct termp *p, int i) 523 { 524 525 assert(i >= 0); 526 if (p->fonti > i) 527 p->fonti = i; 528 } 529 530 /* Pop one font off the stack. */ 531 void 532 term_fontpop(struct termp *p) 533 { 534 535 assert(p->fonti); 536 p->fonti--; 537 } 538 539 /* 540 * Handle pwords, partial words, which may be either a single word or a 541 * phrase that cannot be broken down (such as a literal string). This 542 * handles word styling. 543 */ 544 void 545 term_word(struct termp *p, const char *word) 546 { 547 struct roffsu su; 548 const char nbrsp[2] = { ASCII_NBRSP, 0 }; 549 const char *seq, *cp; 550 int sz, uc; 551 size_t csz, lsz, ssz; 552 enum mandoc_esc esc; 553 554 if ((p->flags & TERMP_NOBUF) == 0) { 555 if ((p->flags & TERMP_NOSPACE) == 0) { 556 if ((p->flags & TERMP_KEEP) == 0) { 557 bufferc(p, ' '); 558 if (p->flags & TERMP_SENTENCE) 559 bufferc(p, ' '); 560 } else 561 bufferc(p, ASCII_NBRSP); 562 } 563 if (p->flags & TERMP_PREKEEP) 564 p->flags |= TERMP_KEEP; 565 if (p->flags & TERMP_NONOSPACE) 566 p->flags |= TERMP_NOSPACE; 567 else 568 p->flags &= ~TERMP_NOSPACE; 569 p->flags &= ~(TERMP_SENTENCE | TERMP_NONEWLINE); 570 p->skipvsp = 0; 571 } 572 573 while ('\0' != *word) { 574 if ('\\' != *word) { 575 if (TERMP_NBRWORD & p->flags) { 576 if (' ' == *word) { 577 encode(p, nbrsp, 1); 578 word++; 579 continue; 580 } 581 ssz = strcspn(word, "\\ "); 582 } else 583 ssz = strcspn(word, "\\"); 584 encode(p, word, ssz); 585 word += (int)ssz; 586 continue; 587 } 588 589 word++; 590 esc = mandoc_escape(&word, &seq, &sz); 591 switch (esc) { 592 case ESCAPE_UNICODE: 593 uc = mchars_num2uc(seq + 1, sz - 1); 594 break; 595 case ESCAPE_NUMBERED: 596 uc = mchars_num2char(seq, sz); 597 if (uc >= 0) 598 break; 599 bufferc(p, ASCII_NBRZW); 600 continue; 601 case ESCAPE_SPECIAL: 602 if (p->enc == TERMENC_ASCII) { 603 cp = mchars_spec2str(seq, sz, &ssz); 604 if (cp != NULL) 605 encode(p, cp, ssz); 606 else 607 bufferc(p, ASCII_NBRZW); 608 } else { 609 uc = mchars_spec2cp(seq, sz); 610 if (uc > 0) 611 encode1(p, uc); 612 else 613 bufferc(p, ASCII_NBRZW); 614 } 615 continue; 616 case ESCAPE_UNDEF: 617 uc = *seq; 618 break; 619 case ESCAPE_FONTBOLD: 620 case ESCAPE_FONTCB: 621 term_fontrepl(p, TERMFONT_BOLD); 622 continue; 623 case ESCAPE_FONTITALIC: 624 case ESCAPE_FONTCI: 625 term_fontrepl(p, TERMFONT_UNDER); 626 continue; 627 case ESCAPE_FONTBI: 628 term_fontrepl(p, TERMFONT_BI); 629 continue; 630 case ESCAPE_FONT: 631 case ESCAPE_FONTCR: 632 case ESCAPE_FONTROMAN: 633 term_fontrepl(p, TERMFONT_NONE); 634 continue; 635 case ESCAPE_FONTPREV: 636 term_fontlast(p); 637 continue; 638 case ESCAPE_BREAK: 639 bufferc(p, '\n'); 640 continue; 641 case ESCAPE_NOSPACE: 642 if (p->flags & TERMP_BACKAFTER) 643 p->flags &= ~TERMP_BACKAFTER; 644 else if (*word == '\0') 645 p->flags |= (TERMP_NOSPACE | TERMP_NONEWLINE); 646 continue; 647 case ESCAPE_DEVICE: 648 if (p->type == TERMTYPE_PDF) 649 encode(p, "pdf", 3); 650 else if (p->type == TERMTYPE_PS) 651 encode(p, "ps", 2); 652 else if (p->enc == TERMENC_ASCII) 653 encode(p, "ascii", 5); 654 else 655 encode(p, "utf8", 4); 656 continue; 657 case ESCAPE_HORIZ: 658 if (p->flags & TERMP_BACKAFTER) { 659 p->flags &= ~TERMP_BACKAFTER; 660 continue; 661 } 662 if (*seq == '|') { 663 seq++; 664 uc = -p->col; 665 } else 666 uc = 0; 667 if (a2roffsu(seq, &su, SCALE_EM) == NULL) 668 continue; 669 uc += term_hen(p, &su); 670 if (uc >= 0) { 671 while (uc > 0) { 672 uc -= term_len(p, 1); 673 if (p->flags & TERMP_BACKBEFORE) 674 p->flags &= ~TERMP_BACKBEFORE; 675 else 676 bufferc(p, ASCII_NBRSP); 677 } 678 continue; 679 } 680 if (p->flags & TERMP_BACKBEFORE) { 681 p->flags &= ~TERMP_BACKBEFORE; 682 assert(p->col > 0); 683 p->col--; 684 } 685 if (p->col >= (size_t)(-uc)) { 686 p->col += uc; 687 } else { 688 uc += p->col; 689 p->col = 0; 690 if (p->tcol->offset > (size_t)(-uc)) { 691 p->ti += uc; 692 p->tcol->offset += uc; 693 } else { 694 p->ti -= p->tcol->offset; 695 p->tcol->offset = 0; 696 } 697 } 698 continue; 699 case ESCAPE_HLINE: 700 if ((cp = a2roffsu(seq, &su, SCALE_EM)) == NULL) 701 continue; 702 uc = term_hen(p, &su); 703 if (uc <= 0) { 704 if (p->tcol->rmargin <= p->tcol->offset) 705 continue; 706 lsz = p->tcol->rmargin - p->tcol->offset; 707 } else 708 lsz = uc; 709 if (*cp == seq[-1]) 710 uc = -1; 711 else if (*cp == '\\') { 712 seq = cp + 1; 713 esc = mandoc_escape(&seq, &cp, &sz); 714 switch (esc) { 715 case ESCAPE_UNICODE: 716 uc = mchars_num2uc(cp + 1, sz - 1); 717 break; 718 case ESCAPE_NUMBERED: 719 uc = mchars_num2char(cp, sz); 720 break; 721 case ESCAPE_SPECIAL: 722 uc = mchars_spec2cp(cp, sz); 723 break; 724 case ESCAPE_UNDEF: 725 uc = *seq; 726 break; 727 default: 728 uc = -1; 729 break; 730 } 731 } else 732 uc = *cp; 733 if (uc < 0x20 || (uc > 0x7E && uc < 0xA0)) 734 uc = '_'; 735 if (p->enc == TERMENC_ASCII) { 736 cp = ascii_uc2str(uc); 737 csz = term_strlen(p, cp); 738 ssz = strlen(cp); 739 } else 740 csz = (*p->width)(p, uc); 741 while (lsz >= csz) { 742 if (p->enc == TERMENC_ASCII) 743 encode(p, cp, ssz); 744 else 745 encode1(p, uc); 746 lsz -= csz; 747 } 748 continue; 749 case ESCAPE_SKIPCHAR: 750 p->flags |= TERMP_BACKAFTER; 751 continue; 752 case ESCAPE_OVERSTRIKE: 753 cp = seq + sz; 754 while (seq < cp) { 755 if (*seq == '\\') { 756 mandoc_escape(&seq, NULL, NULL); 757 continue; 758 } 759 encode1(p, *seq++); 760 if (seq < cp) { 761 if (p->flags & TERMP_BACKBEFORE) 762 p->flags |= TERMP_BACKAFTER; 763 else 764 p->flags |= TERMP_BACKBEFORE; 765 } 766 } 767 /* Trim trailing backspace/blank pair. */ 768 if (p->tcol->lastcol > 2 && 769 (p->tcol->buf[p->tcol->lastcol - 1] == ' ' || 770 p->tcol->buf[p->tcol->lastcol - 1] == '\t')) 771 p->tcol->lastcol -= 2; 772 if (p->col > p->tcol->lastcol) 773 p->col = p->tcol->lastcol; 774 continue; 775 case ESCAPE_IGNORE: 776 bufferc(p, ASCII_NBRZW); 777 continue; 778 default: 779 continue; 780 } 781 782 /* 783 * Common handling for Unicode and numbered 784 * character escape sequences. 785 */ 786 787 if (p->enc == TERMENC_ASCII) { 788 cp = ascii_uc2str(uc); 789 encode(p, cp, strlen(cp)); 790 } else { 791 if ((uc < 0x20 && uc != 0x09) || 792 (uc > 0x7E && uc < 0xA0)) 793 uc = 0xFFFD; 794 encode1(p, uc); 795 } 796 } 797 p->flags &= ~TERMP_NBRWORD; 798 } 799 800 static void 801 adjbuf(struct termp_col *c, size_t sz) 802 { 803 if (c->maxcols == 0) 804 c->maxcols = 1024; 805 while (c->maxcols <= sz) 806 c->maxcols <<= 2; 807 c->buf = mandoc_reallocarray(c->buf, c->maxcols, sizeof(*c->buf)); 808 } 809 810 static void 811 bufferc(struct termp *p, char c) 812 { 813 if (p->flags & TERMP_NOBUF) { 814 (*p->letter)(p, c); 815 return; 816 } 817 if (p->col + 1 >= p->tcol->maxcols) 818 adjbuf(p->tcol, p->col + 1); 819 if (p->tcol->lastcol <= p->col || (c != ' ' && c != ASCII_NBRSP)) 820 p->tcol->buf[p->col] = c; 821 if (p->tcol->lastcol < ++p->col) 822 p->tcol->lastcol = p->col; 823 } 824 825 void 826 term_tab_ref(struct termp *p) 827 { 828 if (p->tcol->lastcol && p->tcol->lastcol <= p->col && 829 (p->flags & TERMP_NOBUF) == 0) 830 bufferc(p, ASCII_TABREF); 831 } 832 833 /* 834 * See encode(). 835 * Do this for a single (probably unicode) value. 836 * Does not check for non-decorated glyphs. 837 */ 838 static void 839 encode1(struct termp *p, int c) 840 { 841 enum termfont f; 842 843 if (p->flags & TERMP_NOBUF) { 844 (*p->letter)(p, c); 845 return; 846 } 847 848 if (p->col + 7 >= p->tcol->maxcols) 849 adjbuf(p->tcol, p->col + 7); 850 851 f = (c == ASCII_HYPH || c > 127 || isgraph(c)) ? 852 p->fontq[p->fonti] : TERMFONT_NONE; 853 854 if (p->flags & TERMP_BACKBEFORE) { 855 if (p->tcol->buf[p->col - 1] == ' ' || 856 p->tcol->buf[p->col - 1] == '\t') 857 p->col--; 858 else 859 p->tcol->buf[p->col++] = '\b'; 860 p->flags &= ~TERMP_BACKBEFORE; 861 } 862 if (f == TERMFONT_UNDER || f == TERMFONT_BI) { 863 p->tcol->buf[p->col++] = '_'; 864 p->tcol->buf[p->col++] = '\b'; 865 } 866 if (f == TERMFONT_BOLD || f == TERMFONT_BI) { 867 if (c == ASCII_HYPH) 868 p->tcol->buf[p->col++] = '-'; 869 else 870 p->tcol->buf[p->col++] = c; 871 p->tcol->buf[p->col++] = '\b'; 872 } 873 if (p->tcol->lastcol <= p->col || (c != ' ' && c != ASCII_NBRSP)) 874 p->tcol->buf[p->col] = c; 875 if (p->tcol->lastcol < ++p->col) 876 p->tcol->lastcol = p->col; 877 if (p->flags & TERMP_BACKAFTER) { 878 p->flags |= TERMP_BACKBEFORE; 879 p->flags &= ~TERMP_BACKAFTER; 880 } 881 } 882 883 static void 884 encode(struct termp *p, const char *word, size_t sz) 885 { 886 size_t i; 887 888 if (p->flags & TERMP_NOBUF) { 889 for (i = 0; i < sz; i++) 890 (*p->letter)(p, word[i]); 891 return; 892 } 893 894 if (p->col + 2 + (sz * 5) >= p->tcol->maxcols) 895 adjbuf(p->tcol, p->col + 2 + (sz * 5)); 896 897 for (i = 0; i < sz; i++) { 898 if (ASCII_HYPH == word[i] || 899 isgraph((unsigned char)word[i])) 900 encode1(p, word[i]); 901 else { 902 if (p->tcol->lastcol <= p->col || 903 (word[i] != ' ' && word[i] != ASCII_NBRSP)) 904 p->tcol->buf[p->col] = word[i]; 905 p->col++; 906 907 /* 908 * Postpone the effect of \z while handling 909 * an overstrike sequence from ascii_uc2str(). 910 */ 911 912 if (word[i] == '\b' && 913 (p->flags & TERMP_BACKBEFORE)) { 914 p->flags &= ~TERMP_BACKBEFORE; 915 p->flags |= TERMP_BACKAFTER; 916 } 917 } 918 } 919 if (p->tcol->lastcol < p->col) 920 p->tcol->lastcol = p->col; 921 } 922 923 void 924 term_setwidth(struct termp *p, const char *wstr) 925 { 926 struct roffsu su; 927 int iop, width; 928 929 iop = 0; 930 width = 0; 931 if (NULL != wstr) { 932 switch (*wstr) { 933 case '+': 934 iop = 1; 935 wstr++; 936 break; 937 case '-': 938 iop = -1; 939 wstr++; 940 break; 941 default: 942 break; 943 } 944 if (a2roffsu(wstr, &su, SCALE_MAX) != NULL) 945 width = term_hspan(p, &su); 946 else 947 iop = 0; 948 } 949 (*p->setwidth)(p, iop, width); 950 } 951 952 size_t 953 term_len(const struct termp *p, size_t sz) 954 { 955 956 return (*p->width)(p, ' ') * sz; 957 } 958 959 static size_t 960 cond_width(const struct termp *p, int c, int *skip) 961 { 962 963 if (*skip) { 964 (*skip) = 0; 965 return 0; 966 } else 967 return (*p->width)(p, c); 968 } 969 970 size_t 971 term_strlen(const struct termp *p, const char *cp) 972 { 973 size_t sz, rsz, i; 974 int ssz, skip, uc; 975 const char *seq, *rhs; 976 enum mandoc_esc esc; 977 static const char rej[] = { '\\', ASCII_NBRSP, ASCII_NBRZW, 978 ASCII_BREAK, ASCII_HYPH, ASCII_TABREF, '\0' }; 979 980 /* 981 * Account for escaped sequences within string length 982 * calculations. This follows the logic in term_word() as we 983 * must calculate the width of produced strings. 984 */ 985 986 sz = 0; 987 skip = 0; 988 while ('\0' != *cp) { 989 rsz = strcspn(cp, rej); 990 for (i = 0; i < rsz; i++) 991 sz += cond_width(p, *cp++, &skip); 992 993 switch (*cp) { 994 case '\\': 995 cp++; 996 rhs = NULL; 997 esc = mandoc_escape(&cp, &seq, &ssz); 998 switch (esc) { 999 case ESCAPE_UNICODE: 1000 uc = mchars_num2uc(seq + 1, ssz - 1); 1001 break; 1002 case ESCAPE_NUMBERED: 1003 uc = mchars_num2char(seq, ssz); 1004 if (uc < 0) 1005 continue; 1006 break; 1007 case ESCAPE_SPECIAL: 1008 if (p->enc == TERMENC_ASCII) { 1009 rhs = mchars_spec2str(seq, ssz, &rsz); 1010 if (rhs != NULL) 1011 break; 1012 } else { 1013 uc = mchars_spec2cp(seq, ssz); 1014 if (uc > 0) 1015 sz += cond_width(p, uc, &skip); 1016 } 1017 continue; 1018 case ESCAPE_UNDEF: 1019 uc = *seq; 1020 break; 1021 case ESCAPE_DEVICE: 1022 if (p->type == TERMTYPE_PDF) { 1023 rhs = "pdf"; 1024 rsz = 3; 1025 } else if (p->type == TERMTYPE_PS) { 1026 rhs = "ps"; 1027 rsz = 2; 1028 } else if (p->enc == TERMENC_ASCII) { 1029 rhs = "ascii"; 1030 rsz = 5; 1031 } else { 1032 rhs = "utf8"; 1033 rsz = 4; 1034 } 1035 break; 1036 case ESCAPE_SKIPCHAR: 1037 skip = 1; 1038 continue; 1039 case ESCAPE_OVERSTRIKE: 1040 rsz = 0; 1041 rhs = seq + ssz; 1042 while (seq < rhs) { 1043 if (*seq == '\\') { 1044 mandoc_escape(&seq, NULL, NULL); 1045 continue; 1046 } 1047 i = (*p->width)(p, *seq++); 1048 if (rsz < i) 1049 rsz = i; 1050 } 1051 sz += rsz; 1052 continue; 1053 default: 1054 continue; 1055 } 1056 1057 /* 1058 * Common handling for Unicode and numbered 1059 * character escape sequences. 1060 */ 1061 1062 if (rhs == NULL) { 1063 if (p->enc == TERMENC_ASCII) { 1064 rhs = ascii_uc2str(uc); 1065 rsz = strlen(rhs); 1066 } else { 1067 if ((uc < 0x20 && uc != 0x09) || 1068 (uc > 0x7E && uc < 0xA0)) 1069 uc = 0xFFFD; 1070 sz += cond_width(p, uc, &skip); 1071 continue; 1072 } 1073 } 1074 1075 if (skip) { 1076 skip = 0; 1077 break; 1078 } 1079 1080 /* 1081 * Common handling for all escape sequences 1082 * printing more than one character. 1083 */ 1084 1085 for (i = 0; i < rsz; i++) 1086 sz += (*p->width)(p, *rhs++); 1087 break; 1088 case ASCII_NBRSP: 1089 sz += cond_width(p, ' ', &skip); 1090 cp++; 1091 break; 1092 case ASCII_HYPH: 1093 sz += cond_width(p, '-', &skip); 1094 cp++; 1095 break; 1096 default: 1097 break; 1098 } 1099 } 1100 1101 return sz; 1102 } 1103 1104 int 1105 term_vspan(const struct termp *p, const struct roffsu *su) 1106 { 1107 double r; 1108 int ri; 1109 1110 switch (su->unit) { 1111 case SCALE_BU: 1112 r = su->scale / 40.0; 1113 break; 1114 case SCALE_CM: 1115 r = su->scale * 6.0 / 2.54; 1116 break; 1117 case SCALE_FS: 1118 r = su->scale * 65536.0 / 40.0; 1119 break; 1120 case SCALE_IN: 1121 r = su->scale * 6.0; 1122 break; 1123 case SCALE_MM: 1124 r = su->scale * 0.006; 1125 break; 1126 case SCALE_PC: 1127 r = su->scale; 1128 break; 1129 case SCALE_PT: 1130 r = su->scale / 12.0; 1131 break; 1132 case SCALE_EN: 1133 case SCALE_EM: 1134 r = su->scale * 0.6; 1135 break; 1136 case SCALE_VS: 1137 r = su->scale; 1138 break; 1139 default: 1140 abort(); 1141 } 1142 ri = r > 0.0 ? r + 0.4995 : r - 0.4995; 1143 return ri < 66 ? ri : 1; 1144 } 1145 1146 /* 1147 * Convert a scaling width to basic units, rounding towards 0. 1148 */ 1149 int 1150 term_hspan(const struct termp *p, const struct roffsu *su) 1151 { 1152 1153 return (*p->hspan)(p, su); 1154 } 1155 1156 /* 1157 * Convert a scaling width to basic units, rounding to closest. 1158 */ 1159 int 1160 term_hen(const struct termp *p, const struct roffsu *su) 1161 { 1162 int bu; 1163 1164 if ((bu = (*p->hspan)(p, su)) >= 0) 1165 return (bu + 11) / 24; 1166 else 1167 return -((-bu + 11) / 24); 1168 } 1169