1.\" $Id: mandoc.3,v 1.1.1.5 2015/12/17 21:58:48 christos Exp $ 2.\" 3.\" Copyright (c) 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv> 4.\" Copyright (c) 2010, 2013, 2014, 2015 Ingo Schwarze <schwarze@openbsd.org> 5.\" 6.\" Permission to use, copy, modify, and distribute this software for any 7.\" purpose with or without fee is hereby granted, provided that the above 8.\" copyright notice and this permission notice appear in all copies. 9.\" 10.\" THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 11.\" WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 12.\" MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 13.\" ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 14.\" WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 15.\" ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 16.\" OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 17.\" 18.Dd $Mdocdate: January 15 2015 $ 19.Dt MANDOC 3 20.Os 21.Sh NAME 22.Nm mandoc , 23.Nm man_deroff , 24.Nm man_meta , 25.Nm man_mparse , 26.Nm man_node , 27.Nm mdoc_deroff , 28.Nm mdoc_meta , 29.Nm mdoc_node , 30.Nm mparse_alloc , 31.Nm mparse_free , 32.Nm mparse_getkeep , 33.Nm mparse_keep , 34.Nm mparse_open , 35.Nm mparse_readfd , 36.Nm mparse_reset , 37.Nm mparse_result , 38.Nm mparse_strerror , 39.Nm mparse_strlevel 40.Nm mparse_wait , 41.Nd mandoc macro compiler library 42.Sh SYNOPSIS 43.In sys/types.h 44.In mandoc.h 45.Pp 46.Fd "#define ASCII_NBRSP" 47.Fd "#define ASCII_HYPH" 48.Fd "#define ASCII_BREAK" 49.Ft struct mparse * 50.Fo mparse_alloc 51.Fa "int options" 52.Fa "enum mandoclevel wlevel" 53.Fa "mandocmsg mmsg" 54.Fa "const struct mchars *mchars" 55.Fa "char *defos" 56.Fc 57.Ft void 58.Fo (*mandocmsg) 59.Fa "enum mandocerr errtype" 60.Fa "enum mandoclevel level" 61.Fa "const char *file" 62.Fa "int line" 63.Fa "int col" 64.Fa "const char *msg" 65.Fc 66.Ft void 67.Fo mparse_free 68.Fa "struct mparse *parse" 69.Fc 70.Ft const char * 71.Fo mparse_getkeep 72.Fa "const struct mparse *parse" 73.Fc 74.Ft void 75.Fo mparse_keep 76.Fa "struct mparse *parse" 77.Fc 78.Ft "enum mandoclevel" 79.Fo mparse_open 80.Fa "struct mparse *parse" 81.Fa "int *fd" 82.Fa "const char *fname" 83.Fc 84.Ft "enum mandoclevel" 85.Fo mparse_readfd 86.Fa "struct mparse *parse" 87.Fa "int fd" 88.Fa "const char *fname" 89.Fc 90.Ft void 91.Fo mparse_reset 92.Fa "struct mparse *parse" 93.Fc 94.Ft void 95.Fo mparse_result 96.Fa "struct mparse *parse" 97.Fa "struct mdoc **mdoc" 98.Fa "struct man **man" 99.Fa "char **sodest" 100.Fc 101.Ft "const char *" 102.Fo mparse_strerror 103.Fa "enum mandocerr" 104.Fc 105.Ft "const char *" 106.Fo mparse_strlevel 107.Fa "enum mandoclevel" 108.Fc 109.Ft "enum mandoclevel" 110.Fo mparse_wait 111.Fa "struct mparse *parse" 112.Fc 113.In sys/types.h 114.In mandoc.h 115.In mdoc.h 116.Ft void 117.Fo mdoc_deroff 118.Fa "char **dest" 119.Fa "const struct mdoc_node *node" 120.Fc 121.Ft "const struct mdoc_meta *" 122.Fo mdoc_meta 123.Fa "const struct mdoc *mdoc" 124.Fc 125.Ft "const struct mdoc_node *" 126.Fo mdoc_node 127.Fa "const struct mdoc *mdoc" 128.Fc 129.Vt extern const char * const * mdoc_argnames; 130.Vt extern const char * const * mdoc_macronames; 131.In sys/types.h 132.In mandoc.h 133.In man.h 134.Ft void 135.Fo man_deroff 136.Fa "char **dest" 137.Fa "const struct man_node *node" 138.Fc 139.Ft "const struct man_meta *" 140.Fo man_meta 141.Fa "const struct man *man" 142.Fc 143.Ft "const struct mparse *" 144.Fo man_mparse 145.Fa "const struct man *man" 146.Fc 147.Ft "const struct man_node *" 148.Fo man_node 149.Fa "const struct man *man" 150.Fc 151.Vt extern const char * const * man_macronames; 152.Sh DESCRIPTION 153The 154.Nm mandoc 155library parses a 156.Ux 157manual into an abstract syntax tree (AST). 158.Ux 159manuals are composed of 160.Xr mdoc 7 161or 162.Xr man 7 , 163and may be mixed with 164.Xr roff 7 , 165.Xr tbl 7 , 166and 167.Xr eqn 7 168invocations. 169.Pp 170The following describes a general parse sequence: 171.Bl -enum 172.It 173initiate a parsing sequence with 174.Xr mchars_alloc 3 175and 176.Fn mparse_alloc ; 177.It 178open a file with 179.Xr open 2 180or 181.Fn mparse_open ; 182.It 183parse it with 184.Fn mparse_readfd ; 185.It 186retrieve the syntax tree with 187.Fn mparse_result ; 188.It 189iterate over parse nodes with 190.Fn mdoc_node 191or 192.Fn man_node ; 193.It 194free all allocated memory with 195.Fn mparse_free 196and 197.Xr mchars_free 3 , 198or invoke 199.Fn mparse_reset 200and parse new files. 201.El 202.Sh REFERENCE 203This section documents the functions, types, and variables available 204via 205.In mandoc.h , 206with the exception of those documented in 207.Xr mandoc_escape 3 208and 209.Xr mchars_alloc 3 . 210.Ss Types 211.Bl -ohang 212.It Vt "enum mandocerr" 213An error or warning message during parsing. 214.It Vt "enum mandoclevel" 215A classification of an 216.Vt "enum mandocerr" 217as regards system operation. 218.It Vt "struct mchars" 219An opaque pointer to a a character table. 220Created with 221.Xr mchars_alloc 3 222and freed with 223.Xr mchars_free 3 . 224.It Vt "struct mparse" 225An opaque pointer to a running parse sequence. 226Created with 227.Fn mparse_alloc 228and freed with 229.Fn mparse_free . 230This may be used across parsed input if 231.Fn mparse_reset 232is called between parses. 233.It Vt "mandocmsg" 234A prototype for a function to handle error and warning 235messages emitted by the parser. 236.El 237.Ss Functions 238.Bl -ohang 239.It Fn man_deroff 240Obtain a text-only representation of a 241.Vt struct man_node , 242including text contained in its child nodes. 243To be used on children of the pointer returned from 244.Fn man_node . 245When it is no longer needed, the pointer returned from 246.Fn man_deroff 247can be passed to 248.Xr free 3 . 249.It Fn man_meta 250Obtain the meta-data of a successful 251.Xr man 7 252parse. 253This may only be used on a pointer returned by 254.Fn mparse_result . 255Declared in 256.In man.h , 257implemented in 258.Pa man.c . 259.It Fn man_mparse 260Get the parser used for the current output. 261Declared in 262.In man.h , 263implemented in 264.Pa man.c . 265.It Fn man_node 266Obtain the root node of a successful 267.Xr man 7 268parse. 269This may only be used on a pointer returned by 270.Fn mparse_result . 271Declared in 272.In man.h , 273implemented in 274.Pa man.c . 275.It Fn mdoc_deroff 276Obtain a text-only representation of a 277.Vt struct mdoc_node , 278including text contained in its child nodes. 279To be used on children of the pointer returned from 280.Fn mdoc_node . 281When it is no longer needed, the pointer returned from 282.Fn mdoc_deroff 283can be passed to 284.Xr free 3 . 285.It Fn mdoc_meta 286Obtain the meta-data of a successful 287.Xr mdoc 288parse. 289This may only be used on a pointer returned by 290.Fn mparse_result . 291Declared in 292.In mdoc.h , 293implemented in 294.Pa mdoc.c . 295.It Fn mdoc_node 296Obtain the root node of a successful 297.Xr mdoc 298parse. 299This may only be used on a pointer returned by 300.Fn mparse_result . 301Declared in 302.In mdoc.h , 303implemented in 304.Pa mdoc.c . 305.It Fn mparse_alloc 306Allocate a parser. 307The arguments have the following effect: 308.Bl -tag -offset 5n -width inttype 309.It Ar options 310When the 311.Dv MPARSE_MDOC 312or 313.Dv MPARSE_MAN 314bit is set, only that parser is used. 315Otherwise, the document type is automatically detected. 316.Pp 317When the 318.Dv MPARSE_SO 319bit is set, 320.Xr roff 7 321.Ic \&so 322file inclusion requests are always honoured. 323Otherwise, if the request is the only content in an input file, 324only the file name is remembered, to be returned in the 325.Fa sodest 326argument of 327.Fn mparse_result . 328.Pp 329When the 330.Dv MPARSE_QUICK 331bit is set, parsing is aborted after the NAME section. 332This is for example useful in 333.Xr makewhatis 8 334.Fl Q 335to quickly build minimal databases. 336.It Ar wlevel 337Can be set to 338.Dv MANDOCLEVEL_BADARG , 339.Dv MANDOCLEVEL_ERROR , 340or 341.Dv MANDOCLEVEL_WARNING . 342Messages below the selected level will be suppressed. 343.It Ar mmsg 344A callback function to handle errors and warnings. 345See 346.Pa main.c 347for an example. 348.It Ar mchars 349An opaque pointer to a a character table obtained from 350.Xr mchars_alloc 3 . 351.It Ar defos 352A default string for the 353.Xr mdoc 7 354.Sq \&Os 355macro, overriding the 356.Dv OSNAME 357preprocessor definition and the results of 358.Xr uname 3 . 359.El 360.Pp 361The same parser may be used for multiple files so long as 362.Fn mparse_reset 363is called between parses. 364.Fn mparse_free 365must be called to free the memory allocated by this function. 366Declared in 367.In mandoc.h , 368implemented in 369.Pa read.c . 370.It Fn mparse_free 371Free all memory allocated by 372.Fn mparse_alloc . 373Declared in 374.In mandoc.h , 375implemented in 376.Pa read.c . 377.It Fn mparse_getkeep 378Acquire the keep buffer. 379Must follow a call of 380.Fn mparse_keep . 381Declared in 382.In mandoc.h , 383implemented in 384.Pa read.c . 385.It Fn mparse_keep 386Instruct the parser to retain a copy of its parsed input. 387This can be acquired with subsequent 388.Fn mparse_getkeep 389calls. 390Declared in 391.In mandoc.h , 392implemented in 393.Pa read.c . 394.It Fn mparse_open 395If the 396.Fa fname 397ends in 398.Pa .gz , 399open with 400.Xr gunzip 1 ; 401otherwise, with 402.Xr open 2 . 403If 404.Xr open 2 405fails, append 406.Pa .gz 407and try with 408.Xr gunzip 1 . 409Return a file descriptor open for reading in 410.Fa fd , 411or -1 on failure. 412It can be passed to 413.Fn mparse_readfd 414or used directly. 415Declared in 416.In mandoc.h , 417implemented in 418.Pa read.c . 419.It Fn mparse_readfd 420Parse a file descriptor opened with 421.Xr open 2 422or 423.Fn mparse_open . 424Pass the associated filename in 425.Va fname . 426Calls 427.Fn mparse_wait 428before returning. 429This function may be called multiple times with different parameters; however, 430.Fn mparse_reset 431should be invoked between parses. 432Declared in 433.In mandoc.h , 434implemented in 435.Pa read.c . 436.It Fn mparse_reset 437Reset a parser so that 438.Fn mparse_readfd 439may be used again. 440Declared in 441.In mandoc.h , 442implemented in 443.Pa read.c . 444.It Fn mparse_result 445Obtain the result of a parse. 446One of the three pointers will be filled in. 447Declared in 448.In mandoc.h , 449implemented in 450.Pa read.c . 451.It Fn mparse_strerror 452Return a statically-allocated string representation of an error code. 453Declared in 454.In mandoc.h , 455implemented in 456.Pa read.c . 457.It Fn mparse_strlevel 458Return a statically-allocated string representation of a level code. 459Declared in 460.In mandoc.h , 461implemented in 462.Pa read.c . 463.It Fn mparse_wait 464Bury a 465.Xr gunzip 1 466child process that was spawned with 467.Fn mparse_open . 468To be called after the parse sequence is complete. 469Not needed after 470.Fn mparse_readfd , 471but does no harm in that case, either. 472Returns 473.Dv MANDOCLEVEL_OK 474on success and 475.Dv MANDOCLEVEL_SYSERR 476on failure, that is, when 477.Xr wait 2 478fails, or when 479.Xr gunzip 1 480died from a signal or exited with non-zero status. 481Declared in 482.In mandoc.h , 483implemented in 484.Pa read.c . 485.El 486.Ss Variables 487.Bl -ohang 488.It Va man_macronames 489The string representation of a man macro as indexed by 490.Vt "enum mant" . 491.It Va mdoc_argnames 492The string representation of a mdoc macro argument as indexed by 493.Vt "enum mdocargt" . 494.It Va mdoc_macronames 495The string representation of a mdoc macro as indexed by 496.Vt "enum mdoct" . 497.El 498.Sh IMPLEMENTATION NOTES 499This section consists of structural documentation for 500.Xr mdoc 7 501and 502.Xr man 7 503syntax trees and strings. 504.Ss Man and Mdoc Strings 505Strings may be extracted from mdoc and man meta-data, or from text 506nodes (MDOC_TEXT and MAN_TEXT, respectively). 507These strings have special non-printing formatting cues embedded in the 508text itself, as well as 509.Xr roff 7 510escapes preserved from input. 511Implementing systems will need to handle both situations to produce 512human-readable text. 513In general, strings may be assumed to consist of 7-bit ASCII characters. 514.Pp 515The following non-printing characters may be embedded in text strings: 516.Bl -tag -width Ds 517.It Dv ASCII_NBRSP 518A non-breaking space character. 519.It Dv ASCII_HYPH 520A soft hyphen. 521.It Dv ASCII_BREAK 522A breakable zero-width space. 523.El 524.Pp 525Escape characters are also passed verbatim into text strings. 526An escape character is a sequence of characters beginning with the 527backslash 528.Pq Sq \e . 529To construct human-readable text, these should be intercepted with 530.Xr mandoc_escape 3 531and converted with one the functions described in 532.Xr mchars_alloc 3 . 533.Ss Man Abstract Syntax Tree 534This AST is governed by the ontological rules dictated in 535.Xr man 7 536and derives its terminology accordingly. 537.Pp 538The AST is composed of 539.Vt struct man_node 540nodes with element, root and text types as declared by the 541.Va type 542field. 543Each node also provides its parse point (the 544.Va line , 545.Va sec , 546and 547.Va pos 548fields), its position in the tree (the 549.Va parent , 550.Va child , 551.Va next 552and 553.Va prev 554fields) and some type-specific data. 555.Pp 556The tree itself is arranged according to the following normal form, 557where capitalised non-terminals represent nodes. 558.Pp 559.Bl -tag -width "ELEMENTXX" -compact 560.It ROOT 561\(<- mnode+ 562.It mnode 563\(<- ELEMENT | TEXT | BLOCK 564.It BLOCK 565\(<- HEAD BODY 566.It HEAD 567\(<- mnode* 568.It BODY 569\(<- mnode* 570.It ELEMENT 571\(<- ELEMENT | TEXT* 572.It TEXT 573\(<- [[:ascii:]]* 574.El 575.Pp 576The only elements capable of nesting other elements are those with 577next-line scope as documented in 578.Xr man 7 . 579.Ss Mdoc Abstract Syntax Tree 580This AST is governed by the ontological 581rules dictated in 582.Xr mdoc 7 583and derives its terminology accordingly. 584.Qq In-line 585elements described in 586.Xr mdoc 7 587are described simply as 588.Qq elements . 589.Pp 590The AST is composed of 591.Vt struct mdoc_node 592nodes with block, head, body, element, root and text types as declared 593by the 594.Va type 595field. 596Each node also provides its parse point (the 597.Va line , 598.Va sec , 599and 600.Va pos 601fields), its position in the tree (the 602.Va parent , 603.Va child , 604.Va nchild , 605.Va next 606and 607.Va prev 608fields) and some type-specific data, in particular, for nodes generated 609from macros, the generating macro in the 610.Va tok 611field. 612.Pp 613The tree itself is arranged according to the following normal form, 614where capitalised non-terminals represent nodes. 615.Pp 616.Bl -tag -width "ELEMENTXX" -compact 617.It ROOT 618\(<- mnode+ 619.It mnode 620\(<- BLOCK | ELEMENT | TEXT 621.It BLOCK 622\(<- HEAD [TEXT] (BODY [TEXT])+ [TAIL [TEXT]] 623.It ELEMENT 624\(<- TEXT* 625.It HEAD 626\(<- mnode* 627.It BODY 628\(<- mnode* [ENDBODY mnode*] 629.It TAIL 630\(<- mnode* 631.It TEXT 632\(<- [[:ascii:]]* 633.El 634.Pp 635Of note are the TEXT nodes following the HEAD, BODY and TAIL nodes of 636the BLOCK production: these refer to punctuation marks. 637Furthermore, although a TEXT node will generally have a non-zero-length 638string, in the specific case of 639.Sq \&.Bd \-literal , 640an empty line will produce a zero-length string. 641Multiple body parts are only found in invocations of 642.Sq \&Bl \-column , 643where a new body introduces a new phrase. 644.Pp 645The 646.Xr mdoc 7 647syntax tree accommodates for broken block structures as well. 648The ENDBODY node is available to end the formatting associated 649with a given block before the physical end of that block. 650It has a non-null 651.Va end 652field, is of the BODY 653.Va type , 654has the same 655.Va tok 656as the BLOCK it is ending, and has a 657.Va pending 658field pointing to that BLOCK's BODY node. 659It is an indirect child of that BODY node 660and has no children of its own. 661.Pp 662An ENDBODY node is generated when a block ends while one of its child 663blocks is still open, like in the following example: 664.Bd -literal -offset indent 665\&.Ao ao 666\&.Bo bo ac 667\&.Ac bc 668\&.Bc end 669.Ed 670.Pp 671This example results in the following block structure: 672.Bd -literal -offset indent 673BLOCK Ao 674 HEAD Ao 675 BODY Ao 676 TEXT ao 677 BLOCK Bo, pending -> Ao 678 HEAD Bo 679 BODY Bo 680 TEXT bo 681 TEXT ac 682 ENDBODY Ao, pending -> Ao 683 TEXT bc 684TEXT end 685.Ed 686.Pp 687Here, the formatting of the 688.Sq \&Ao 689block extends from TEXT ao to TEXT ac, 690while the formatting of the 691.Sq \&Bo 692block extends from TEXT bo to TEXT bc. 693It renders as follows in 694.Fl T Ns Cm ascii 695mode: 696.Pp 697.Dl <ao [bo ac> bc] end 698.Pp 699Support for badly-nested blocks is only provided for backward 700compatibility with some older 701.Xr mdoc 7 702implementations. 703Using badly-nested blocks is 704.Em strongly discouraged ; 705for example, the 706.Fl T Ns Cm html 707and 708.Fl T Ns Cm xhtml 709front-ends to 710.Xr mandoc 1 711are unable to render them in any meaningful way. 712Furthermore, behaviour when encountering badly-nested blocks is not 713consistent across troff implementations, especially when using multiple 714levels of badly-nested blocks. 715.Sh SEE ALSO 716.Xr mandoc 1 , 717.Xr mandoc_escape 3 , 718.Xr mandoc_malloc 3 , 719.Xr mchars_alloc 3 , 720.Xr eqn 7 , 721.Xr man 7 , 722.Xr mandoc_char 7 , 723.Xr mdoc 7 , 724.Xr roff 7 , 725.Xr tbl 7 726.Sh AUTHORS 727The 728.Nm 729library was written by 730.An Kristaps Dzonsons Aq Mt kristaps@bsd.lv . 731