xref: /netbsd-src/external/bsd/mdocml/dist/mandoc.3 (revision 63aea4bd5b445e491ff0389fe27ec78b3099dba3)
1.\"	$Id: mandoc.3,v 1.1.1.5 2015/12/17 21:58:48 christos Exp $
2.\"
3.\" Copyright (c) 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv>
4.\" Copyright (c) 2010, 2013, 2014, 2015 Ingo Schwarze <schwarze@openbsd.org>
5.\"
6.\" Permission to use, copy, modify, and distribute this software for any
7.\" purpose with or without fee is hereby granted, provided that the above
8.\" copyright notice and this permission notice appear in all copies.
9.\"
10.\" THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
11.\" WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12.\" MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
13.\" ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14.\" WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15.\" ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16.\" OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17.\"
18.Dd $Mdocdate: January 15 2015 $
19.Dt MANDOC 3
20.Os
21.Sh NAME
22.Nm mandoc ,
23.Nm man_deroff ,
24.Nm man_meta ,
25.Nm man_mparse ,
26.Nm man_node ,
27.Nm mdoc_deroff ,
28.Nm mdoc_meta ,
29.Nm mdoc_node ,
30.Nm mparse_alloc ,
31.Nm mparse_free ,
32.Nm mparse_getkeep ,
33.Nm mparse_keep ,
34.Nm mparse_open ,
35.Nm mparse_readfd ,
36.Nm mparse_reset ,
37.Nm mparse_result ,
38.Nm mparse_strerror ,
39.Nm mparse_strlevel
40.Nm mparse_wait ,
41.Nd mandoc macro compiler library
42.Sh SYNOPSIS
43.In sys/types.h
44.In mandoc.h
45.Pp
46.Fd "#define ASCII_NBRSP"
47.Fd "#define ASCII_HYPH"
48.Fd "#define ASCII_BREAK"
49.Ft struct mparse *
50.Fo mparse_alloc
51.Fa "int options"
52.Fa "enum mandoclevel wlevel"
53.Fa "mandocmsg mmsg"
54.Fa "const struct mchars *mchars"
55.Fa "char *defos"
56.Fc
57.Ft void
58.Fo (*mandocmsg)
59.Fa "enum mandocerr errtype"
60.Fa "enum mandoclevel level"
61.Fa "const char *file"
62.Fa "int line"
63.Fa "int col"
64.Fa "const char *msg"
65.Fc
66.Ft void
67.Fo mparse_free
68.Fa "struct mparse *parse"
69.Fc
70.Ft const char *
71.Fo mparse_getkeep
72.Fa "const struct mparse *parse"
73.Fc
74.Ft void
75.Fo mparse_keep
76.Fa "struct mparse *parse"
77.Fc
78.Ft "enum mandoclevel"
79.Fo mparse_open
80.Fa "struct mparse *parse"
81.Fa "int *fd"
82.Fa "const char *fname"
83.Fc
84.Ft "enum mandoclevel"
85.Fo mparse_readfd
86.Fa "struct mparse *parse"
87.Fa "int fd"
88.Fa "const char *fname"
89.Fc
90.Ft void
91.Fo mparse_reset
92.Fa "struct mparse *parse"
93.Fc
94.Ft void
95.Fo mparse_result
96.Fa "struct mparse *parse"
97.Fa "struct mdoc **mdoc"
98.Fa "struct man **man"
99.Fa "char **sodest"
100.Fc
101.Ft "const char *"
102.Fo mparse_strerror
103.Fa "enum mandocerr"
104.Fc
105.Ft "const char *"
106.Fo mparse_strlevel
107.Fa "enum mandoclevel"
108.Fc
109.Ft "enum mandoclevel"
110.Fo mparse_wait
111.Fa "struct mparse *parse"
112.Fc
113.In sys/types.h
114.In mandoc.h
115.In mdoc.h
116.Ft void
117.Fo mdoc_deroff
118.Fa "char **dest"
119.Fa "const struct mdoc_node *node"
120.Fc
121.Ft "const struct mdoc_meta *"
122.Fo mdoc_meta
123.Fa "const struct mdoc *mdoc"
124.Fc
125.Ft "const struct mdoc_node *"
126.Fo mdoc_node
127.Fa "const struct mdoc *mdoc"
128.Fc
129.Vt extern const char * const * mdoc_argnames;
130.Vt extern const char * const * mdoc_macronames;
131.In sys/types.h
132.In mandoc.h
133.In man.h
134.Ft void
135.Fo man_deroff
136.Fa "char **dest"
137.Fa "const struct man_node *node"
138.Fc
139.Ft "const struct man_meta *"
140.Fo man_meta
141.Fa "const struct man *man"
142.Fc
143.Ft "const struct mparse *"
144.Fo man_mparse
145.Fa "const struct man *man"
146.Fc
147.Ft "const struct man_node *"
148.Fo man_node
149.Fa "const struct man *man"
150.Fc
151.Vt extern const char * const * man_macronames;
152.Sh DESCRIPTION
153The
154.Nm mandoc
155library parses a
156.Ux
157manual into an abstract syntax tree (AST).
158.Ux
159manuals are composed of
160.Xr mdoc 7
161or
162.Xr man 7 ,
163and may be mixed with
164.Xr roff 7 ,
165.Xr tbl 7 ,
166and
167.Xr eqn 7
168invocations.
169.Pp
170The following describes a general parse sequence:
171.Bl -enum
172.It
173initiate a parsing sequence with
174.Xr mchars_alloc 3
175and
176.Fn mparse_alloc ;
177.It
178open a file with
179.Xr open 2
180or
181.Fn mparse_open ;
182.It
183parse it with
184.Fn mparse_readfd ;
185.It
186retrieve the syntax tree with
187.Fn mparse_result ;
188.It
189iterate over parse nodes with
190.Fn mdoc_node
191or
192.Fn man_node ;
193.It
194free all allocated memory with
195.Fn mparse_free
196and
197.Xr mchars_free 3 ,
198or invoke
199.Fn mparse_reset
200and parse new files.
201.El
202.Sh REFERENCE
203This section documents the functions, types, and variables available
204via
205.In mandoc.h ,
206with the exception of those documented in
207.Xr mandoc_escape 3
208and
209.Xr mchars_alloc 3 .
210.Ss Types
211.Bl -ohang
212.It Vt "enum mandocerr"
213An error or warning message during parsing.
214.It Vt "enum mandoclevel"
215A classification of an
216.Vt "enum mandocerr"
217as regards system operation.
218.It Vt "struct mchars"
219An opaque pointer to a a character table.
220Created with
221.Xr mchars_alloc 3
222and freed with
223.Xr mchars_free 3 .
224.It Vt "struct mparse"
225An opaque pointer to a running parse sequence.
226Created with
227.Fn mparse_alloc
228and freed with
229.Fn mparse_free .
230This may be used across parsed input if
231.Fn mparse_reset
232is called between parses.
233.It Vt "mandocmsg"
234A prototype for a function to handle error and warning
235messages emitted by the parser.
236.El
237.Ss Functions
238.Bl -ohang
239.It Fn man_deroff
240Obtain a text-only representation of a
241.Vt struct man_node ,
242including text contained in its child nodes.
243To be used on children of the pointer returned from
244.Fn man_node .
245When it is no longer needed, the pointer returned from
246.Fn man_deroff
247can be passed to
248.Xr free 3 .
249.It Fn man_meta
250Obtain the meta-data of a successful
251.Xr man 7
252parse.
253This may only be used on a pointer returned by
254.Fn mparse_result .
255Declared in
256.In man.h ,
257implemented in
258.Pa man.c .
259.It Fn man_mparse
260Get the parser used for the current output.
261Declared in
262.In man.h ,
263implemented in
264.Pa man.c .
265.It Fn man_node
266Obtain the root node of a successful
267.Xr man 7
268parse.
269This may only be used on a pointer returned by
270.Fn mparse_result .
271Declared in
272.In man.h ,
273implemented in
274.Pa man.c .
275.It Fn mdoc_deroff
276Obtain a text-only representation of a
277.Vt struct mdoc_node ,
278including text contained in its child nodes.
279To be used on children of the pointer returned from
280.Fn mdoc_node .
281When it is no longer needed, the pointer returned from
282.Fn mdoc_deroff
283can be passed to
284.Xr free 3 .
285.It Fn mdoc_meta
286Obtain the meta-data of a successful
287.Xr mdoc
288parse.
289This may only be used on a pointer returned by
290.Fn mparse_result .
291Declared in
292.In mdoc.h ,
293implemented in
294.Pa mdoc.c .
295.It Fn mdoc_node
296Obtain the root node of a successful
297.Xr mdoc
298parse.
299This may only be used on a pointer returned by
300.Fn mparse_result .
301Declared in
302.In mdoc.h ,
303implemented in
304.Pa mdoc.c .
305.It Fn mparse_alloc
306Allocate a parser.
307The arguments have the following effect:
308.Bl -tag -offset 5n -width inttype
309.It Ar options
310When the
311.Dv MPARSE_MDOC
312or
313.Dv MPARSE_MAN
314bit is set, only that parser is used.
315Otherwise, the document type is automatically detected.
316.Pp
317When the
318.Dv MPARSE_SO
319bit is set,
320.Xr roff 7
321.Ic \&so
322file inclusion requests are always honoured.
323Otherwise, if the request is the only content in an input file,
324only the file name is remembered, to be returned in the
325.Fa sodest
326argument of
327.Fn mparse_result .
328.Pp
329When the
330.Dv MPARSE_QUICK
331bit is set, parsing is aborted after the NAME section.
332This is for example useful in
333.Xr makewhatis 8
334.Fl Q
335to quickly build minimal databases.
336.It Ar wlevel
337Can be set to
338.Dv MANDOCLEVEL_BADARG ,
339.Dv MANDOCLEVEL_ERROR ,
340or
341.Dv MANDOCLEVEL_WARNING .
342Messages below the selected level will be suppressed.
343.It Ar mmsg
344A callback function to handle errors and warnings.
345See
346.Pa main.c
347for an example.
348.It Ar mchars
349An opaque pointer to a a character table obtained from
350.Xr mchars_alloc 3 .
351.It Ar defos
352A default string for the
353.Xr mdoc 7
354.Sq \&Os
355macro, overriding the
356.Dv OSNAME
357preprocessor definition and the results of
358.Xr uname 3 .
359.El
360.Pp
361The same parser may be used for multiple files so long as
362.Fn mparse_reset
363is called between parses.
364.Fn mparse_free
365must be called to free the memory allocated by this function.
366Declared in
367.In mandoc.h ,
368implemented in
369.Pa read.c .
370.It Fn mparse_free
371Free all memory allocated by
372.Fn mparse_alloc .
373Declared in
374.In mandoc.h ,
375implemented in
376.Pa read.c .
377.It Fn mparse_getkeep
378Acquire the keep buffer.
379Must follow a call of
380.Fn mparse_keep .
381Declared in
382.In mandoc.h ,
383implemented in
384.Pa read.c .
385.It Fn mparse_keep
386Instruct the parser to retain a copy of its parsed input.
387This can be acquired with subsequent
388.Fn mparse_getkeep
389calls.
390Declared in
391.In mandoc.h ,
392implemented in
393.Pa read.c .
394.It Fn mparse_open
395If the
396.Fa fname
397ends in
398.Pa .gz ,
399open with
400.Xr gunzip 1 ;
401otherwise, with
402.Xr open 2 .
403If
404.Xr open 2
405fails, append
406.Pa .gz
407and try with
408.Xr gunzip 1 .
409Return a file descriptor open for reading in
410.Fa fd ,
411or -1 on failure.
412It can be passed to
413.Fn mparse_readfd
414or used directly.
415Declared in
416.In mandoc.h ,
417implemented in
418.Pa read.c .
419.It Fn mparse_readfd
420Parse a file descriptor opened with
421.Xr open 2
422or
423.Fn mparse_open .
424Pass the associated filename in
425.Va fname .
426Calls
427.Fn mparse_wait
428before returning.
429This function may be called multiple times with different parameters; however,
430.Fn mparse_reset
431should be invoked between parses.
432Declared in
433.In mandoc.h ,
434implemented in
435.Pa read.c .
436.It Fn mparse_reset
437Reset a parser so that
438.Fn mparse_readfd
439may be used again.
440Declared in
441.In mandoc.h ,
442implemented in
443.Pa read.c .
444.It Fn mparse_result
445Obtain the result of a parse.
446One of the three pointers will be filled in.
447Declared in
448.In mandoc.h ,
449implemented in
450.Pa read.c .
451.It Fn mparse_strerror
452Return a statically-allocated string representation of an error code.
453Declared in
454.In mandoc.h ,
455implemented in
456.Pa read.c .
457.It Fn mparse_strlevel
458Return a statically-allocated string representation of a level code.
459Declared in
460.In mandoc.h ,
461implemented in
462.Pa read.c .
463.It Fn mparse_wait
464Bury a
465.Xr gunzip 1
466child process that was spawned with
467.Fn mparse_open .
468To be called after the parse sequence is complete.
469Not needed after
470.Fn mparse_readfd ,
471but does no harm in that case, either.
472Returns
473.Dv MANDOCLEVEL_OK
474on success and
475.Dv MANDOCLEVEL_SYSERR
476on failure, that is, when
477.Xr wait 2
478fails, or when
479.Xr gunzip 1
480died from a signal or exited with non-zero status.
481Declared in
482.In mandoc.h ,
483implemented in
484.Pa read.c .
485.El
486.Ss Variables
487.Bl -ohang
488.It Va man_macronames
489The string representation of a man macro as indexed by
490.Vt "enum mant" .
491.It Va mdoc_argnames
492The string representation of a mdoc macro argument as indexed by
493.Vt "enum mdocargt" .
494.It Va mdoc_macronames
495The string representation of a mdoc macro as indexed by
496.Vt "enum mdoct" .
497.El
498.Sh IMPLEMENTATION NOTES
499This section consists of structural documentation for
500.Xr mdoc 7
501and
502.Xr man 7
503syntax trees and strings.
504.Ss Man and Mdoc Strings
505Strings may be extracted from mdoc and man meta-data, or from text
506nodes (MDOC_TEXT and MAN_TEXT, respectively).
507These strings have special non-printing formatting cues embedded in the
508text itself, as well as
509.Xr roff 7
510escapes preserved from input.
511Implementing systems will need to handle both situations to produce
512human-readable text.
513In general, strings may be assumed to consist of 7-bit ASCII characters.
514.Pp
515The following non-printing characters may be embedded in text strings:
516.Bl -tag -width Ds
517.It Dv ASCII_NBRSP
518A non-breaking space character.
519.It Dv ASCII_HYPH
520A soft hyphen.
521.It Dv ASCII_BREAK
522A breakable zero-width space.
523.El
524.Pp
525Escape characters are also passed verbatim into text strings.
526An escape character is a sequence of characters beginning with the
527backslash
528.Pq Sq \e .
529To construct human-readable text, these should be intercepted with
530.Xr mandoc_escape 3
531and converted with one the functions described in
532.Xr mchars_alloc 3 .
533.Ss Man Abstract Syntax Tree
534This AST is governed by the ontological rules dictated in
535.Xr man 7
536and derives its terminology accordingly.
537.Pp
538The AST is composed of
539.Vt struct man_node
540nodes with element, root and text types as declared by the
541.Va type
542field.
543Each node also provides its parse point (the
544.Va line ,
545.Va sec ,
546and
547.Va pos
548fields), its position in the tree (the
549.Va parent ,
550.Va child ,
551.Va next
552and
553.Va prev
554fields) and some type-specific data.
555.Pp
556The tree itself is arranged according to the following normal form,
557where capitalised non-terminals represent nodes.
558.Pp
559.Bl -tag -width "ELEMENTXX" -compact
560.It ROOT
561\(<- mnode+
562.It mnode
563\(<- ELEMENT | TEXT | BLOCK
564.It BLOCK
565\(<- HEAD BODY
566.It HEAD
567\(<- mnode*
568.It BODY
569\(<- mnode*
570.It ELEMENT
571\(<- ELEMENT | TEXT*
572.It TEXT
573\(<- [[:ascii:]]*
574.El
575.Pp
576The only elements capable of nesting other elements are those with
577next-line scope as documented in
578.Xr man 7 .
579.Ss Mdoc Abstract Syntax Tree
580This AST is governed by the ontological
581rules dictated in
582.Xr mdoc 7
583and derives its terminology accordingly.
584.Qq In-line
585elements described in
586.Xr mdoc 7
587are described simply as
588.Qq elements .
589.Pp
590The AST is composed of
591.Vt struct mdoc_node
592nodes with block, head, body, element, root and text types as declared
593by the
594.Va type
595field.
596Each node also provides its parse point (the
597.Va line ,
598.Va sec ,
599and
600.Va pos
601fields), its position in the tree (the
602.Va parent ,
603.Va child ,
604.Va nchild ,
605.Va next
606and
607.Va prev
608fields) and some type-specific data, in particular, for nodes generated
609from macros, the generating macro in the
610.Va tok
611field.
612.Pp
613The tree itself is arranged according to the following normal form,
614where capitalised non-terminals represent nodes.
615.Pp
616.Bl -tag -width "ELEMENTXX" -compact
617.It ROOT
618\(<- mnode+
619.It mnode
620\(<- BLOCK | ELEMENT | TEXT
621.It BLOCK
622\(<- HEAD [TEXT] (BODY [TEXT])+ [TAIL [TEXT]]
623.It ELEMENT
624\(<- TEXT*
625.It HEAD
626\(<- mnode*
627.It BODY
628\(<- mnode* [ENDBODY mnode*]
629.It TAIL
630\(<- mnode*
631.It TEXT
632\(<- [[:ascii:]]*
633.El
634.Pp
635Of note are the TEXT nodes following the HEAD, BODY and TAIL nodes of
636the BLOCK production: these refer to punctuation marks.
637Furthermore, although a TEXT node will generally have a non-zero-length
638string, in the specific case of
639.Sq \&.Bd \-literal ,
640an empty line will produce a zero-length string.
641Multiple body parts are only found in invocations of
642.Sq \&Bl \-column ,
643where a new body introduces a new phrase.
644.Pp
645The
646.Xr mdoc 7
647syntax tree accommodates for broken block structures as well.
648The ENDBODY node is available to end the formatting associated
649with a given block before the physical end of that block.
650It has a non-null
651.Va end
652field, is of the BODY
653.Va type ,
654has the same
655.Va tok
656as the BLOCK it is ending, and has a
657.Va pending
658field pointing to that BLOCK's BODY node.
659It is an indirect child of that BODY node
660and has no children of its own.
661.Pp
662An ENDBODY node is generated when a block ends while one of its child
663blocks is still open, like in the following example:
664.Bd -literal -offset indent
665\&.Ao ao
666\&.Bo bo ac
667\&.Ac bc
668\&.Bc end
669.Ed
670.Pp
671This example results in the following block structure:
672.Bd -literal -offset indent
673BLOCK Ao
674    HEAD Ao
675    BODY Ao
676        TEXT ao
677        BLOCK Bo, pending -> Ao
678            HEAD Bo
679            BODY Bo
680                TEXT bo
681                TEXT ac
682                ENDBODY Ao, pending -> Ao
683                TEXT bc
684TEXT end
685.Ed
686.Pp
687Here, the formatting of the
688.Sq \&Ao
689block extends from TEXT ao to TEXT ac,
690while the formatting of the
691.Sq \&Bo
692block extends from TEXT bo to TEXT bc.
693It renders as follows in
694.Fl T Ns Cm ascii
695mode:
696.Pp
697.Dl <ao [bo ac> bc] end
698.Pp
699Support for badly-nested blocks is only provided for backward
700compatibility with some older
701.Xr mdoc 7
702implementations.
703Using badly-nested blocks is
704.Em strongly discouraged ;
705for example, the
706.Fl T Ns Cm html
707and
708.Fl T Ns Cm xhtml
709front-ends to
710.Xr mandoc 1
711are unable to render them in any meaningful way.
712Furthermore, behaviour when encountering badly-nested blocks is not
713consistent across troff implementations, especially when using multiple
714levels of badly-nested blocks.
715.Sh SEE ALSO
716.Xr mandoc 1 ,
717.Xr mandoc_escape 3 ,
718.Xr mandoc_malloc 3 ,
719.Xr mchars_alloc 3 ,
720.Xr eqn 7 ,
721.Xr man 7 ,
722.Xr mandoc_char 7 ,
723.Xr mdoc 7 ,
724.Xr roff 7 ,
725.Xr tbl 7
726.Sh AUTHORS
727The
728.Nm
729library was written by
730.An Kristaps Dzonsons Aq Mt kristaps@bsd.lv .
731