1 /* $Vendor-Id: term.c,v 1.183 2011/04/04 21:14:12 kristaps Exp $ */ 2 /* 3 * Copyright (c) 2008, 2009, 2010 Kristaps Dzonsons <kristaps@bsd.lv> 4 * Copyright (c) 2010, 2011 Ingo Schwarze <schwarze@openbsd.org> 5 * 6 * Permission to use, copy, modify, and distribute this software for any 7 * purpose with or without fee is hereby granted, provided that the above 8 * copyright notice and this permission notice appear in all copies. 9 * 10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 17 */ 18 #ifdef HAVE_CONFIG_H 19 #include "config.h" 20 #endif 21 22 #include <sys/types.h> 23 24 #include <assert.h> 25 #include <ctype.h> 26 #include <stdint.h> 27 #include <stdio.h> 28 #include <stdlib.h> 29 #include <string.h> 30 31 #include "mandoc.h" 32 #include "out.h" 33 #include "term.h" 34 #include "main.h" 35 36 static void spec(struct termp *, enum roffdeco, 37 const char *, size_t); 38 static void res(struct termp *, const char *, size_t); 39 static void bufferc(struct termp *, char); 40 static void adjbuf(struct termp *p, size_t); 41 static void encode(struct termp *, const char *, size_t); 42 43 44 void 45 term_free(struct termp *p) 46 { 47 48 if (p->buf) 49 free(p->buf); 50 if (p->symtab) 51 chars_free(p->symtab); 52 53 free(p); 54 } 55 56 57 void 58 term_begin(struct termp *p, term_margin head, 59 term_margin foot, const void *arg) 60 { 61 62 p->headf = head; 63 p->footf = foot; 64 p->argf = arg; 65 (*p->begin)(p); 66 } 67 68 69 void 70 term_end(struct termp *p) 71 { 72 73 (*p->end)(p); 74 } 75 76 77 struct termp * 78 term_alloc(enum termenc enc) 79 { 80 struct termp *p; 81 82 p = mandoc_calloc(1, sizeof(struct termp)); 83 p->enc = enc; 84 return(p); 85 } 86 87 88 /* 89 * Flush a line of text. A "line" is loosely defined as being something 90 * that should be followed by a newline, regardless of whether it's 91 * broken apart by newlines getting there. A line can also be a 92 * fragment of a columnar list (`Bl -tag' or `Bl -column'), which does 93 * not have a trailing newline. 94 * 95 * The following flags may be specified: 96 * 97 * - TERMP_NOLPAD: when beginning to write the line, don't left-pad the 98 * offset value. This is useful when doing columnar lists where the 99 * prior column has right-padded. 100 * 101 * - TERMP_NOBREAK: this is the most important and is used when making 102 * columns. In short: don't print a newline and instead pad to the 103 * right margin. Used in conjunction with TERMP_NOLPAD. 104 * 105 * - TERMP_TWOSPACE: when padding, make sure there are at least two 106 * space characters of padding. Otherwise, rather break the line. 107 * 108 * - TERMP_DANGLE: don't newline when TERMP_NOBREAK is specified and 109 * the line is overrun, and don't pad-right if it's underrun. 110 * 111 * - TERMP_HANG: like TERMP_DANGLE, but doesn't newline when 112 * overruning, instead save the position and continue at that point 113 * when the next invocation. 114 * 115 * In-line line breaking: 116 * 117 * If TERMP_NOBREAK is specified and the line overruns the right 118 * margin, it will break and pad-right to the right margin after 119 * writing. If maxrmargin is violated, it will break and continue 120 * writing from the right-margin, which will lead to the above scenario 121 * upon exit. Otherwise, the line will break at the right margin. 122 */ 123 void 124 term_flushln(struct termp *p) 125 { 126 int i; /* current input position in p->buf */ 127 size_t vis; /* current visual position on output */ 128 size_t vbl; /* number of blanks to prepend to output */ 129 size_t vend; /* end of word visual position on output */ 130 size_t bp; /* visual right border position */ 131 size_t dv; /* temporary for visual pos calculations */ 132 int j; /* temporary loop index for p->buf */ 133 int jhy; /* last hyph before overflow w/r/t j */ 134 size_t maxvis; /* output position of visible boundary */ 135 size_t mmax; /* used in calculating bp */ 136 137 /* 138 * First, establish the maximum columns of "visible" content. 139 * This is usually the difference between the right-margin and 140 * an indentation, but can be, for tagged lists or columns, a 141 * small set of values. 142 */ 143 assert (p->rmargin >= p->offset); 144 dv = p->rmargin - p->offset; 145 maxvis = (int)dv > p->overstep ? dv - (size_t)p->overstep : 0; 146 dv = p->maxrmargin - p->offset; 147 mmax = (int)dv > p->overstep ? dv - (size_t)p->overstep : 0; 148 149 bp = TERMP_NOBREAK & p->flags ? mmax : maxvis; 150 151 /* 152 * Indent the first line of a paragraph. 153 */ 154 vbl = p->flags & TERMP_NOLPAD ? (size_t)0 : p->offset; 155 156 vis = vend = 0; 157 i = 0; 158 159 while (i < (int)p->col) { 160 /* 161 * Handle literal tab characters: collapse all 162 * subsequent tabs into a single huge set of spaces. 163 */ 164 while (i < (int)p->col && '\t' == p->buf[i]) { 165 vend = (vis / p->tabwidth + 1) * p->tabwidth; 166 vbl += vend - vis; 167 vis = vend; 168 i++; 169 } 170 171 /* 172 * Count up visible word characters. Control sequences 173 * (starting with the CSI) aren't counted. A space 174 * generates a non-printing word, which is valid (the 175 * space is printed according to regular spacing rules). 176 */ 177 178 for (j = i, jhy = 0; j < (int)p->col; j++) { 179 if ((j && ' ' == p->buf[j]) || '\t' == p->buf[j]) 180 break; 181 182 /* Back over the the last printed character. */ 183 if (8 == p->buf[j]) { 184 assert(j); 185 vend -= (*p->width)(p, p->buf[j - 1]); 186 continue; 187 } 188 189 /* Regular word. */ 190 /* Break at the hyphen point if we overrun. */ 191 if (vend > vis && vend < bp && 192 ASCII_HYPH == p->buf[j]) 193 jhy = j; 194 195 vend += (*p->width)(p, p->buf[j]); 196 } 197 198 /* 199 * Find out whether we would exceed the right margin. 200 * If so, break to the next line. 201 */ 202 if (vend > bp && 0 == jhy && vis > 0) { 203 vend -= vis; 204 (*p->endline)(p); 205 if (TERMP_NOBREAK & p->flags) { 206 p->viscol = p->rmargin; 207 (*p->advance)(p, p->rmargin); 208 vend += p->rmargin - p->offset; 209 } else { 210 p->viscol = 0; 211 vbl = p->offset; 212 } 213 214 /* Remove the p->overstep width. */ 215 216 bp += (size_t)p->overstep; 217 p->overstep = 0; 218 } 219 220 /* Write out the [remaining] word. */ 221 for ( ; i < (int)p->col; i++) { 222 if (vend > bp && jhy > 0 && i > jhy) 223 break; 224 if ('\t' == p->buf[i]) 225 break; 226 if (' ' == p->buf[i]) { 227 j = i; 228 while (' ' == p->buf[i]) 229 i++; 230 dv = (size_t)(i - j) * (*p->width)(p, ' '); 231 vbl += dv; 232 vend += dv; 233 break; 234 } 235 if (ASCII_NBRSP == p->buf[i]) { 236 vbl += (*p->width)(p, ' '); 237 continue; 238 } 239 240 /* 241 * Now we definitely know there will be 242 * printable characters to output, 243 * so write preceding white space now. 244 */ 245 if (vbl) { 246 (*p->advance)(p, vbl); 247 p->viscol += vbl; 248 vbl = 0; 249 } 250 251 if (ASCII_HYPH == p->buf[i]) { 252 (*p->letter)(p, '-'); 253 p->viscol += (*p->width)(p, '-'); 254 } else { 255 (*p->letter)(p, p->buf[i]); 256 p->viscol += (*p->width)(p, p->buf[i]); 257 } 258 } 259 vis = vend; 260 } 261 262 /* 263 * If there was trailing white space, it was not printed; 264 * so reset the cursor position accordingly. 265 */ 266 vis -= vbl; 267 268 p->col = 0; 269 p->overstep = 0; 270 271 if ( ! (TERMP_NOBREAK & p->flags)) { 272 p->viscol = 0; 273 (*p->endline)(p); 274 return; 275 } 276 277 if (TERMP_HANG & p->flags) { 278 /* We need one blank after the tag. */ 279 p->overstep = (int)(vis - maxvis + (*p->width)(p, ' ')); 280 281 /* 282 * Behave exactly the same way as groff: 283 * If we have overstepped the margin, temporarily move 284 * it to the right and flag the rest of the line to be 285 * shorter. 286 * If we landed right at the margin, be happy. 287 * If we are one step before the margin, temporarily 288 * move it one step LEFT and flag the rest of the line 289 * to be longer. 290 */ 291 if (p->overstep >= -1) { 292 assert((int)maxvis + p->overstep >= 0); 293 maxvis += (size_t)p->overstep; 294 } else 295 p->overstep = 0; 296 297 } else if (TERMP_DANGLE & p->flags) 298 return; 299 300 /* Right-pad. */ 301 if (maxvis > vis + 302 ((TERMP_TWOSPACE & p->flags) ? (*p->width)(p, ' ') : 0)) { 303 p->viscol += maxvis - vis; 304 (*p->advance)(p, maxvis - vis); 305 vis += (maxvis - vis); 306 } else { /* ...or newline break. */ 307 (*p->endline)(p); 308 p->viscol = p->rmargin; 309 (*p->advance)(p, p->rmargin); 310 } 311 } 312 313 314 /* 315 * A newline only breaks an existing line; it won't assert vertical 316 * space. All data in the output buffer is flushed prior to the newline 317 * assertion. 318 */ 319 void 320 term_newln(struct termp *p) 321 { 322 323 p->flags |= TERMP_NOSPACE; 324 if (0 == p->col && 0 == p->viscol) { 325 p->flags &= ~TERMP_NOLPAD; 326 return; 327 } 328 term_flushln(p); 329 p->flags &= ~TERMP_NOLPAD; 330 } 331 332 333 /* 334 * Asserts a vertical space (a full, empty line-break between lines). 335 * Note that if used twice, this will cause two blank spaces and so on. 336 * All data in the output buffer is flushed prior to the newline 337 * assertion. 338 */ 339 void 340 term_vspace(struct termp *p) 341 { 342 343 term_newln(p); 344 p->viscol = 0; 345 (*p->endline)(p); 346 } 347 348 349 static void 350 numbered(struct termp *p, const char *word, size_t len) 351 { 352 const char *rhs; 353 354 rhs = chars_num2char(word, len); 355 if (rhs) 356 encode(p, rhs, 1); 357 } 358 359 360 static void 361 spec(struct termp *p, enum roffdeco d, const char *word, size_t len) 362 { 363 const char *rhs; 364 size_t sz; 365 366 rhs = chars_spec2str(p->symtab, word, len, &sz); 367 if (rhs) 368 encode(p, rhs, sz); 369 else if (DECO_SSPECIAL == d) 370 encode(p, word, len); 371 } 372 373 374 static void 375 res(struct termp *p, const char *word, size_t len) 376 { 377 const char *rhs; 378 size_t sz; 379 380 rhs = chars_res2str(p->symtab, word, len, &sz); 381 if (rhs) 382 encode(p, rhs, sz); 383 } 384 385 386 void 387 term_fontlast(struct termp *p) 388 { 389 enum termfont f; 390 391 f = p->fontl; 392 p->fontl = p->fontq[p->fonti]; 393 p->fontq[p->fonti] = f; 394 } 395 396 397 void 398 term_fontrepl(struct termp *p, enum termfont f) 399 { 400 401 p->fontl = p->fontq[p->fonti]; 402 p->fontq[p->fonti] = f; 403 } 404 405 406 void 407 term_fontpush(struct termp *p, enum termfont f) 408 { 409 410 assert(p->fonti + 1 < 10); 411 p->fontl = p->fontq[p->fonti]; 412 p->fontq[++p->fonti] = f; 413 } 414 415 416 const void * 417 term_fontq(struct termp *p) 418 { 419 420 return(&p->fontq[p->fonti]); 421 } 422 423 424 enum termfont 425 term_fonttop(struct termp *p) 426 { 427 428 return(p->fontq[p->fonti]); 429 } 430 431 432 void 433 term_fontpopq(struct termp *p, const void *key) 434 { 435 436 while (p->fonti >= 0 && key != &p->fontq[p->fonti]) 437 p->fonti--; 438 assert(p->fonti >= 0); 439 } 440 441 442 void 443 term_fontpop(struct termp *p) 444 { 445 446 assert(p->fonti); 447 p->fonti--; 448 } 449 450 451 /* 452 * Handle pwords, partial words, which may be either a single word or a 453 * phrase that cannot be broken down (such as a literal string). This 454 * handles word styling. 455 */ 456 void 457 term_word(struct termp *p, const char *word) 458 { 459 const char *seq; 460 size_t ssz; 461 enum roffdeco deco; 462 463 if ( ! (TERMP_NOSPACE & p->flags)) { 464 if ( ! (TERMP_KEEP & p->flags)) { 465 if (TERMP_PREKEEP & p->flags) 466 p->flags |= TERMP_KEEP; 467 bufferc(p, ' '); 468 if (TERMP_SENTENCE & p->flags) 469 bufferc(p, ' '); 470 } else 471 bufferc(p, ASCII_NBRSP); 472 } 473 474 if ( ! (p->flags & TERMP_NONOSPACE)) 475 p->flags &= ~TERMP_NOSPACE; 476 else 477 p->flags |= TERMP_NOSPACE; 478 479 p->flags &= ~(TERMP_SENTENCE | TERMP_IGNDELIM); 480 481 while (*word) { 482 if ((ssz = strcspn(word, "\\")) > 0) 483 encode(p, word, ssz); 484 485 word += (int)ssz; 486 if ('\\' != *word) 487 continue; 488 489 seq = ++word; 490 word += a2roffdeco(&deco, &seq, &ssz); 491 492 switch (deco) { 493 case (DECO_NUMBERED): 494 numbered(p, seq, ssz); 495 break; 496 case (DECO_RESERVED): 497 res(p, seq, ssz); 498 break; 499 case (DECO_SPECIAL): 500 /* FALLTHROUGH */ 501 case (DECO_SSPECIAL): 502 spec(p, deco, seq, ssz); 503 break; 504 case (DECO_BOLD): 505 term_fontrepl(p, TERMFONT_BOLD); 506 break; 507 case (DECO_ITALIC): 508 term_fontrepl(p, TERMFONT_UNDER); 509 break; 510 case (DECO_ROMAN): 511 term_fontrepl(p, TERMFONT_NONE); 512 break; 513 case (DECO_PREVIOUS): 514 term_fontlast(p); 515 break; 516 default: 517 break; 518 } 519 520 if (DECO_NOSPACE == deco && '\0' == *word) 521 p->flags |= TERMP_NOSPACE; 522 } 523 } 524 525 526 static void 527 adjbuf(struct termp *p, size_t sz) 528 { 529 530 if (0 == p->maxcols) 531 p->maxcols = 1024; 532 while (sz >= p->maxcols) 533 p->maxcols <<= 2; 534 535 p->buf = mandoc_realloc(p->buf, p->maxcols); 536 } 537 538 539 static void 540 bufferc(struct termp *p, char c) 541 { 542 543 if (p->col + 1 >= p->maxcols) 544 adjbuf(p, p->col + 1); 545 546 p->buf[(int)p->col++] = c; 547 } 548 549 550 static void 551 encode(struct termp *p, const char *word, size_t sz) 552 { 553 enum termfont f; 554 int i; 555 556 /* 557 * Encode and buffer a string of characters. If the current 558 * font mode is unset, buffer directly, else encode then buffer 559 * character by character. 560 */ 561 562 if (TERMFONT_NONE == (f = term_fonttop(p))) { 563 if (p->col + sz >= p->maxcols) 564 adjbuf(p, p->col + sz); 565 memcpy(&p->buf[(int)p->col], word, sz); 566 p->col += sz; 567 return; 568 } 569 570 /* Pre-buffer, assuming worst-case. */ 571 572 if (p->col + 1 + (sz * 3) >= p->maxcols) 573 adjbuf(p, p->col + 1 + (sz * 3)); 574 575 for (i = 0; i < (int)sz; i++) { 576 if ( ! isgraph((u_char)word[i])) { 577 p->buf[(int)p->col++] = word[i]; 578 continue; 579 } 580 581 if (TERMFONT_UNDER == f) 582 p->buf[(int)p->col++] = '_'; 583 else 584 p->buf[(int)p->col++] = word[i]; 585 586 p->buf[(int)p->col++] = 8; 587 p->buf[(int)p->col++] = word[i]; 588 } 589 } 590 591 592 size_t 593 term_len(const struct termp *p, size_t sz) 594 { 595 596 return((*p->width)(p, ' ') * sz); 597 } 598 599 600 size_t 601 term_strlen(const struct termp *p, const char *cp) 602 { 603 size_t sz, ssz, rsz, i; 604 enum roffdeco d; 605 const char *seq, *rhs; 606 607 for (sz = 0; '\0' != *cp; ) 608 /* 609 * Account for escaped sequences within string length 610 * calculations. This follows the logic in term_word() 611 * as we must calculate the width of produced strings. 612 */ 613 if ('\\' == *cp) { 614 seq = ++cp; 615 cp += a2roffdeco(&d, &seq, &ssz); 616 617 switch (d) { 618 case (DECO_RESERVED): 619 rhs = chars_res2str 620 (p->symtab, seq, ssz, &rsz); 621 break; 622 case (DECO_SPECIAL): 623 /* FALLTHROUGH */ 624 case (DECO_SSPECIAL): 625 rhs = chars_spec2str 626 (p->symtab, seq, ssz, &rsz); 627 628 /* Allow for one-char escapes. */ 629 if (DECO_SSPECIAL != d || rhs) 630 break; 631 632 rhs = seq; 633 rsz = ssz; 634 break; 635 default: 636 rhs = NULL; 637 break; 638 } 639 640 if (rhs) 641 for (i = 0; i < rsz; i++) 642 sz += (*p->width)(p, *rhs++); 643 } else if (ASCII_NBRSP == *cp) { 644 sz += (*p->width)(p, ' '); 645 cp++; 646 } else if (ASCII_HYPH == *cp) { 647 sz += (*p->width)(p, '-'); 648 cp++; 649 } else 650 sz += (*p->width)(p, *cp++); 651 652 return(sz); 653 } 654 655 656 /* ARGSUSED */ 657 size_t 658 term_vspan(const struct termp *p, const struct roffsu *su) 659 { 660 double r; 661 662 switch (su->unit) { 663 case (SCALE_CM): 664 r = su->scale * 2; 665 break; 666 case (SCALE_IN): 667 r = su->scale * 6; 668 break; 669 case (SCALE_PC): 670 r = su->scale; 671 break; 672 case (SCALE_PT): 673 r = su->scale / 8; 674 break; 675 case (SCALE_MM): 676 r = su->scale / 1000; 677 break; 678 case (SCALE_VS): 679 r = su->scale; 680 break; 681 default: 682 r = su->scale - 1; 683 break; 684 } 685 686 if (r < 0.0) 687 r = 0.0; 688 return(/* LINTED */(size_t) 689 r); 690 } 691 692 693 size_t 694 term_hspan(const struct termp *p, const struct roffsu *su) 695 { 696 double v; 697 698 v = ((*p->hspan)(p, su)); 699 if (v < 0.0) 700 v = 0.0; 701 return((size_t) /* LINTED */ 702 v); 703 } 704