xref: /netbsd-src/external/bsd/mdocml/dist/mandoc.3 (revision af56d1fe9956bd7c616e18c1b7f025f464618471)
1.\"	$Vendor-Id: mandoc.3,v 1.17 2012/01/13 15:27:14 joerg Exp $
2.\"
3.\" Copyright (c) 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv>
4.\" Copyright (c) 2010 Ingo Schwarze <schwarze@openbsd.org>
5.\"
6.\" Permission to use, copy, modify, and distribute this software for any
7.\" purpose with or without fee is hereby granted, provided that the above
8.\" copyright notice and this permission notice appear in all copies.
9.\"
10.\" THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
11.\" WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12.\" MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
13.\" ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14.\" WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15.\" ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16.\" OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17.\"
18.Dd January 13, 2012
19.Dt MANDOC 3
20.Os
21.Sh NAME
22.Nm mandoc ,
23.Nm mandoc_escape ,
24.Nm man_meta ,
25.Nm man_mparse ,
26.Nm man_node ,
27.Nm mchars_alloc ,
28.Nm mchars_free ,
29.Nm mchars_num2char ,
30.Nm mchars_num2uc ,
31.Nm mchars_spec2cp ,
32.Nm mchars_spec2str ,
33.Nm mdoc_meta ,
34.Nm mdoc_node ,
35.Nm mparse_alloc ,
36.Nm mparse_free ,
37.Nm mparse_getkeep ,
38.Nm mparse_keep ,
39.Nm mparse_readfd ,
40.Nm mparse_reset ,
41.Nm mparse_result ,
42.Nm mparse_strerror ,
43.Nm mparse_strlevel
44.Nd mandoc macro compiler library
45.Sh LIBRARY
46.Lb mandoc
47.Sh SYNOPSIS
48.In man.h
49.In mdoc.h
50.In mandoc.h
51.Ft "enum mandoc_esc"
52.Fo mandoc_escape
53.Fa "const char **end"
54.Fa "const char **start"
55.Fa "int *sz"
56.Fc
57.Ft "const struct man_meta *"
58.Fo man_meta
59.Fa "const struct man *man"
60.Fc
61.Ft "const struct mparse *"
62.Fo man_mparse
63.Fa "const struct man *man"
64.Fc
65.Ft "const struct man_node *"
66.Fo man_node
67.Fa "const struct man *man"
68.Fc
69.Ft "struct mchars *"
70.Fn mchars_alloc
71.Ft void
72.Fn mchars_free "struct mchars *p"
73.Ft char
74.Fn mchars_num2char "const char *cp" "size_t sz"
75.Ft int
76.Fn mchars_num2uc "const char *cp" "size_t sz"
77.Ft "const char *"
78.Fo mchars_spec2str
79.Fa "const struct mchars *p"
80.Fa "const char *cp"
81.Fa "size_t sz"
82.Fa "size_t *rsz"
83.Fc
84.Ft int
85.Fo mchars_spec2cp
86.Fa "const struct mchars *p"
87.Fa "const char *cp"
88.Fa "size_t sz"
89.Ft "const char *"
90.Fc
91.Ft "const struct mdoc_meta *"
92.Fo mdoc_meta
93.Fa "const struct mdoc *mdoc"
94.Fc
95.Ft "const struct mdoc_node *"
96.Fo mdoc_node
97.Fa "const struct mdoc *mdoc"
98.Fc
99.Ft void
100.Fo mparse_alloc
101.Fa "enum mparset type"
102.Fa "enum mandoclevel wlevel"
103.Fa "mandocmsg msg"
104.Fa "void *msgarg"
105.Fc
106.Ft void
107.Fo mparse_free
108.Fa "struct mparse *parse"
109.Fc
110.Ft void
111.Fo mparse_getkeep
112.Fa "const struct mparse *parse"
113.Fc
114.Ft void
115.Fo mparse_keep
116.Fa "struct mparse *parse"
117.Fc
118.Ft "enum mandoclevel"
119.Fo mparse_readfd
120.Fa "struct mparse *parse"
121.Fa "int fd"
122.Fa "const char *fname"
123.Fc
124.Ft void
125.Fo mparse_reset
126.Fa "struct mparse *parse"
127.Fc
128.Ft void
129.Fo mparse_result
130.Fa "struct mparse *parse"
131.Fa "struct mdoc **mdoc"
132.Fa "struct man **man"
133.Fc
134.Ft "const char *"
135.Fo mparse_strerror
136.Fa "enum mandocerr"
137.Fc
138.Ft "const char *"
139.Fo mparse_strlevel
140.Fa "enum mandoclevel"
141.Fc
142.Vt extern const char * const * man_macronames;
143.Vt extern const char * const * mdoc_argnames;
144.Vt extern const char * const * mdoc_macronames;
145.Fd "#define ASCII_NBRSP"
146.Fd "#define ASCII_HYPH"
147.Sh DESCRIPTION
148The
149.Nm mandoc
150library parses a
151.Ux
152manual into an abstract syntax tree (AST).
153.Ux
154manuals are composed of
155.Xr mdoc 7
156or
157.Xr man 7 ,
158and may be mixed with
159.Xr roff 7 ,
160.Xr tbl 7 ,
161and
162.Xr eqn 7
163invocations.
164.Pp
165The following describes a general parse sequence:
166.Bl -enum
167.It
168initiate a parsing sequence with
169.Fn mparse_alloc ;
170.It
171parse files or file descriptors with
172.Fn mparse_readfd ;
173.It
174retrieve a parsed syntax tree, if the parse was successful, with
175.Fn mparse_result ;
176.It
177iterate over parse nodes with
178.Fn mdoc_node
179or
180.Fn man_node ;
181.It
182free all allocated memory with
183.Fn mparse_free ,
184or invoke
185.Fn mparse_reset
186and parse new files.
187.El
188.Pp
189The
190.Nm
191library also contains routines for translating character strings into glyphs
192.Pq see Fn mchars_alloc
193and parsing escape sequences from strings
194.Pq see Fn mandoc_escape .
195.Sh REFERENCE
196This section documents the functions, types, and variables available
197via
198.In mandoc.h .
199.Ss Types
200.Bl -ohang
201.It Vt "enum mandoc_esc"
202An escape sequence classification.
203.It Vt "enum mandocerr"
204A fatal error, error, or warning message during parsing.
205.It Vt "enum mandoclevel"
206A classification of an
207.Vt "enum mandoclevel"
208as regards system operation.
209.It Vt "struct mchars"
210An opaque pointer to an object allowing for translation between
211character strings and glyphs.
212See
213.Fn mchars_alloc .
214.It Vt "enum mparset"
215The type of parser when reading input.
216This should usually be
217.Dv MPARSE_AUTO
218for auto-detection.
219.It Vt "struct mparse"
220An opaque pointer to a running parse sequence.
221Created with
222.Fn mparse_alloc
223and freed with
224.Fn mparse_free .
225This may be used across parsed input if
226.Fn mparse_reset
227is called between parses.
228.It Vt "mandocmsg"
229A prototype for a function to handle fatal error, error, and warning
230messages emitted by the parser.
231.El
232.Ss Functions
233.Bl -ohang
234.It Fn mandoc_escape
235Scan an escape sequence, i.e., a character string beginning with
236.Sq \e .
237Pass a pointer to the character after the
238.Sq \e
239as
240.Va end ;
241it will be set to the supremum of the parsed escape sequence unless
242returning
243.Dv ESCAPE_ERROR ,
244in which case the string is bogus and should be
245thrown away.
246If not
247.Dv ESCAPE_ERROR
248or
249.Dv ESCAPE_IGNORE ,
250.Va start
251is set to the first relevant character of the substring (font, glyph,
252whatever) of length
253.Va sz .
254Both
255.Va start
256and
257.Va sz
258may be
259.Dv NULL .
260.It Fn man_meta
261Obtain the meta-data of a successful parse.
262This may only be used on a pointer returned by
263.Fn mparse_result .
264.It Fn man_mparse
265Get the parser used for the current output.
266.It Fn man_node
267Obtain the root node of a successful parse.
268This may only be used on a pointer returned by
269.Fn mparse_result .
270.It Fn mchars_alloc
271Allocate an
272.Vt "struct mchars *"
273object for translating special characters into glyphs.
274See
275.Xr mandoc_char 7
276for an overview of special characters.
277The object must be freed with
278.Fn mchars_free .
279.It Fn mchars_free
280Free an object created with
281.Fn mchars_alloc .
282.It Fn mchars_num2char
283Convert a character index (e.g., the \eN\(aq\(aq escape) into a
284printable ASCII character.
285Returns \e0 (the nil character) if the input sequence is malformed.
286.It Fn mchars_num2uc
287Convert a hexadecimal character index (e.g., the \e[uNNNN] escape) into
288a Unicode codepoint.
289Returns \e0 (the nil character) if the input sequence is malformed.
290.It Fn mchars_spec2cp
291Convert a special character into a valid Unicode codepoint.
292Returns \-1 on failure or a non-zero Unicode codepoint on success.
293.It Fn mchars_spec2str
294Convert a special character into an ASCII string.
295Returns
296.Dv NULL
297on failure.
298.It Fn mdoc_meta
299Obtain the meta-data of a successful parse.
300This may only be used on a pointer returned by
301.Fn mparse_result .
302.It Fn mdoc_node
303Obtain the root node of a successful parse.
304This may only be used on a pointer returned by
305.Fn mparse_result .
306.It Fn mparse_alloc
307Allocate a parser.
308The same parser may be used for multiple files so long as
309.Fn mparse_reset
310is called between parses.
311.Fn mparse_free
312must be called to free the memory allocated by this function.
313.It Fn mparse_free
314Free all memory allocated by
315.Fn mparse_alloc .
316.It Fn mparse_getkeep
317Acquire the keep buffer.
318Must follow a call of
319.Fn mparse_keep .
320.It Fn mparse_keep
321Instruct the parser to retain a copy of its parsed input.
322This can be acquired with subsequent
323.Fn mparse_getkeep
324calls.
325.It Fn mparse_readfd
326Parse a file or file descriptor.
327If
328.Va fd
329is -1,
330.Va fname
331is opened for reading.
332Otherwise,
333.Va fname
334is assumed to be the name associated with
335.Va fd .
336This may be called multiple times with different parameters; however,
337.Fn mparse_reset
338should be invoked between parses.
339.It Fn mparse_reset
340Reset a parser so that
341.Fn mparse_readfd
342may be used again.
343.It Fn mparse_result
344Obtain the result of a parse.
345Only successful parses
346.Po
347i.e., those where
348.Fn mparse_readfd
349returned less than MANDOCLEVEL_FATAL
350.Pc
351should invoke this function, in which case one of the two pointers will
352be filled in.
353.It Fn mparse_strerror
354Return a statically-allocated string representation of an error code.
355.It Fn mparse_strlevel
356Return a statically-allocated string representation of a level code.
357.El
358.Ss Variables
359.Bl -ohang
360.It Va man_macronames
361The string representation of a man macro as indexed by
362.Vt "enum mant" .
363.It Va mdoc_argnames
364The string representation of a mdoc macro argument as indexed by
365.Vt "enum mdocargt" .
366.It Va mdoc_macronames
367The string representation of a mdoc macro as indexed by
368.Vt "enum mdoct" .
369.El
370.Sh IMPLEMENTATION NOTES
371This section consists of structural documentation for
372.Xr mdoc 7
373and
374.Xr man 7
375syntax trees and strings.
376.Ss Man and Mdoc Strings
377Strings may be extracted from mdoc and man meta-data, or from text
378nodes (MDOC_TEXT and MAN_TEXT, respectively).
379These strings have special non-printing formatting cues embedded in the
380text itself, as well as
381.Xr roff 7
382escapes preserved from input.
383Implementing systems will need to handle both situations to produce
384human-readable text.
385In general, strings may be assumed to consist of 7-bit ASCII characters.
386.Pp
387The following non-printing characters may be embedded in text strings:
388.Bl -tag -width Ds
389.It Dv ASCII_NBRSP
390A non-breaking space character.
391.It Dv ASCII_HYPH
392A soft hyphen.
393.El
394.Pp
395Escape characters are also passed verbatim into text strings.
396An escape character is a sequence of characters beginning with the
397backslash
398.Pq Sq \e .
399To construct human-readable text, these should be intercepted with
400.Fn mandoc_escape
401and converted with one of
402.Fn mchars_num2char ,
403.Fn mchars_spec2str ,
404and so on.
405.Ss Man Abstract Syntax Tree
406This AST is governed by the ontological rules dictated in
407.Xr man 7
408and derives its terminology accordingly.
409.Pp
410The AST is composed of
411.Vt struct man_node
412nodes with element, root and text types as declared by the
413.Va type
414field.
415Each node also provides its parse point (the
416.Va line ,
417.Va sec ,
418and
419.Va pos
420fields), its position in the tree (the
421.Va parent ,
422.Va child ,
423.Va next
424and
425.Va prev
426fields) and some type-specific data.
427.Pp
428The tree itself is arranged according to the following normal form,
429where capitalised non-terminals represent nodes.
430.Pp
431.Bl -tag -width "ELEMENTXX" -compact
432.It ROOT
433\(<- mnode+
434.It mnode
435\(<- ELEMENT | TEXT | BLOCK
436.It BLOCK
437\(<- HEAD BODY
438.It HEAD
439\(<- mnode*
440.It BODY
441\(<- mnode*
442.It ELEMENT
443\(<- ELEMENT | TEXT*
444.It TEXT
445\(<- [[:ascii:]]*
446.El
447.Pp
448The only elements capable of nesting other elements are those with
449next-lint scope as documented in
450.Xr man 7 .
451.Ss Mdoc Abstract Syntax Tree
452This AST is governed by the ontological
453rules dictated in
454.Xr mdoc 7
455and derives its terminology accordingly.
456.Qq In-line
457elements described in
458.Xr mdoc 7
459are described simply as
460.Qq elements .
461.Pp
462The AST is composed of
463.Vt struct mdoc_node
464nodes with block, head, body, element, root and text types as declared
465by the
466.Va type
467field.
468Each node also provides its parse point (the
469.Va line ,
470.Va sec ,
471and
472.Va pos
473fields), its position in the tree (the
474.Va parent ,
475.Va child ,
476.Va nchild ,
477.Va next
478and
479.Va prev
480fields) and some type-specific data, in particular, for nodes generated
481from macros, the generating macro in the
482.Va tok
483field.
484.Pp
485The tree itself is arranged according to the following normal form,
486where capitalised non-terminals represent nodes.
487.Pp
488.Bl -tag -width "ELEMENTXX" -compact
489.It ROOT
490\(<- mnode+
491.It mnode
492\(<- BLOCK | ELEMENT | TEXT
493.It BLOCK
494\(<- HEAD [TEXT] (BODY [TEXT])+ [TAIL [TEXT]]
495.It ELEMENT
496\(<- TEXT*
497.It HEAD
498\(<- mnode*
499.It BODY
500\(<- mnode* [ENDBODY mnode*]
501.It TAIL
502\(<- mnode*
503.It TEXT
504\(<- [[:ascii:]]*
505.El
506.Pp
507Of note are the TEXT nodes following the HEAD, BODY and TAIL nodes of
508the BLOCK production: these refer to punctuation marks.
509Furthermore, although a TEXT node will generally have a non-zero-length
510string, in the specific case of
511.Sq \&.Bd \-literal ,
512an empty line will produce a zero-length string.
513Multiple body parts are only found in invocations of
514.Sq \&Bl \-column ,
515where a new body introduces a new phrase.
516.Pp
517The
518.Xr mdoc 7
519syntax tree accommodates for broken block structures as well.
520The ENDBODY node is available to end the formatting associated
521with a given block before the physical end of that block.
522It has a non-null
523.Va end
524field, is of the BODY
525.Va type ,
526has the same
527.Va tok
528as the BLOCK it is ending, and has a
529.Va pending
530field pointing to that BLOCK's BODY node.
531It is an indirect child of that BODY node
532and has no children of its own.
533.Pp
534An ENDBODY node is generated when a block ends while one of its child
535blocks is still open, like in the following example:
536.Bd -literal -offset indent
537\&.Ao ao
538\&.Bo bo ac
539\&.Ac bc
540\&.Bc end
541.Ed
542.Pp
543This example results in the following block structure:
544.Bd -literal -offset indent
545BLOCK Ao
546    HEAD Ao
547    BODY Ao
548        TEXT ao
549        BLOCK Bo, pending -> Ao
550            HEAD Bo
551            BODY Bo
552                TEXT bo
553                TEXT ac
554                ENDBODY Ao, pending -> Ao
555                TEXT bc
556TEXT end
557.Ed
558.Pp
559Here, the formatting of the
560.Sq \&Ao
561block extends from TEXT ao to TEXT ac,
562while the formatting of the
563.Sq \&Bo
564block extends from TEXT bo to TEXT bc.
565It renders as follows in
566.Fl T Ns Cm ascii
567mode:
568.Pp
569.Dl <ao [bo ac> bc] end
570.Pp
571Support for badly-nested blocks is only provided for backward
572compatibility with some older
573.Xr mdoc 7
574implementations.
575Using badly-nested blocks is
576.Em strongly discouraged ;
577for example, the
578.Fl T Ns Cm html
579and
580.Fl T Ns Cm xhtml
581front-ends to
582.Xr mandoc 1
583are unable to render them in any meaningful way.
584Furthermore, behaviour when encountering badly-nested blocks is not
585consistent across troff implementations, especially when using  multiple
586levels of badly-nested blocks.
587.Sh SEE ALSO
588.Xr mandoc 1 ,
589.Xr eqn 7 ,
590.Xr man 7 ,
591.Xr mandoc_char 7 ,
592.Xr mdoc 7 ,
593.Xr roff 7 ,
594.Xr tbl 7
595.Sh AUTHORS
596The
597.Nm
598library was written by
599.An Kristaps Dzonsons ,
600.Mt kristaps@bsd.lv .
601