xref: /openbsd-src/lib/libc/gen/vis.3 (revision f2da64fbbbf1b03f09f390ab01267c93dfd77c4c)
1.\"	$OpenBSD: vis.3,v 1.35 2015/07/20 01:52:28 millert Exp $
2.\"
3.\" Copyright (c) 1989, 1991, 1993
4.\"	The Regents of the University of California.  All rights reserved.
5.\"
6.\" Redistribution and use in source and binary forms, with or without
7.\" modification, are permitted provided that the following conditions
8.\" are met:
9.\" 1. Redistributions of source code must retain the above copyright
10.\"    notice, this list of conditions and the following disclaimer.
11.\" 2. Redistributions in binary form must reproduce the above copyright
12.\"    notice, this list of conditions and the following disclaimer in the
13.\"    documentation and/or other materials provided with the distribution.
14.\" 3. Neither the name of the University nor the names of its contributors
15.\"    may be used to endorse or promote products derived from this software
16.\"    without specific prior written permission.
17.\"
18.\" THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
19.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21.\" ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
22.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
24.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
25.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
26.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
27.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
28.\" SUCH DAMAGE.
29.\"
30.Dd $Mdocdate: July 20 2015 $
31.Dt VIS 3
32.Os
33.Sh NAME
34.Nm vis ,
35.Nm strvis ,
36.Nm strnvis ,
37.Nm strvisx ,
38.Nm stravis
39.Nd visually encode characters
40.Sh SYNOPSIS
41.In stdlib.h
42.In vis.h
43.Ft char *
44.Fn vis "char *dst" "int c" "int flag" "int nextc"
45.Ft int
46.Fn strvis "char *dst" "const char *src" "int flag"
47.Ft int
48.Fn strnvis "char *dst" "const char *src" "size_t dstsize" "int flag"
49.Ft int
50.Fn strvisx "char *dst" "const char *src" "size_t srclen" "int flag"
51.Ft int
52.Fn stravis "char **outp" "const char *src" "int flag"
53.Sh DESCRIPTION
54The
55.Fn vis
56function copies into
57.Fa dst
58a string which represents the character
59.Fa c .
60If
61.Fa c
62needs no encoding, it is copied in unaltered.
63.Fa dst
64will be NUL-terminated and must be at least 5 bytes long
65(maximum encoding requires 4 bytes plus the NUL).
66The additional character,
67.Fa nextc ,
68is only used when selecting the
69.Dv VIS_CSTYLE
70encoding format (explained below).
71.Pp
72The
73.Fn strvis ,
74.Fn strnvis
75and
76.Fn strvisx
77functions copy into
78.Fa dst
79a visual representation of
80the string
81.Fa src .
82.Pp
83The
84.Fn strvis
85function encodes characters from
86.Fa src
87up to the first NUL, into a buffer
88.Fa dst
89(which must be at least 4 * strlen(src) + 1 long).
90.Pp
91The
92.Fn strnvis
93function encodes characters from
94.Fa src
95up to the first NUL or the end of the buffer
96.Fa dst ,
97as indicated by
98.Fa dstsize .
99.Pp
100The
101.Fn strvisx
102function encodes exactly
103.Fa srclen
104characters from
105.Fa src
106into a buffer
107.Fa dst
108(which must be at least 4 * srclen + 1 long).
109This
110is useful for encoding a block of data that may contain NULs.
111.Pp
112The
113.Fn stravis
114function writes a visual representation of the string
115.Fa src
116into a newly allocated string
117.Fa outp ;
118it does not attempt to
119.Xr realloc 3
120.Fa outp .
121.Fa outp
122should be passed to
123.Xr free 3
124to release the allocated storage when it is no longer needed.
125.Fn stravis
126checks for integer overflow when allocating memory.
127.Pp
128All forms NUL-terminate
129.Fa dst ,
130except for
131.Fn strnvis
132when
133.Fa dstsize
134is zero, in which case
135.Fa dst
136is not touched.
137.Pp
138The
139.Fa flag
140parameter is used for altering the default range of
141characters considered for encoding and for altering the visual
142representation.
143.Ss Encodings
144The encoding is a unique, invertible representation composed entirely of
145graphic characters; it can be decoded back into the original form using
146the
147.Xr unvis 3
148or
149.Xr strunvis 3
150functions.
151.Pp
152There are two parameters that can be controlled: the range of
153characters that are encoded, and the type
154of representation used.
155By default, all non-graphic characters
156except space, tab, and newline are encoded
157(see
158.Xr isgraph 3 ) .
159The following flags
160alter this:
161.Bl -tag -width VIS_WHITEX
162.It Dv VIS_ALL
163Encode all characters, whether visible or not.
164.It Dv VIS_DQ
165Also encode double quote characters
166.Pf ( Ql \&" ) .
167.It Dv VIS_GLOB
168Also encode magic characters recognized by
169.Xr glob 3
170.Pf ( Ql * ,
171.Ql \&? ,
172.Ql \&[ )
173and
174.Ql # .
175.It Dv VIS_SP
176Also encode space.
177.It Dv VIS_TAB
178Also encode tab.
179.It Dv VIS_NL
180Also encode newline.
181.It Dv VIS_WHITE
182Synonym for
183.Dv VIS_SP | VIS_TAB | VIS_NL .
184.It Dv VIS_SAFE
185Only encode
186.Dq unsafe
187characters.
188These are control characters which may cause common terminals to perform
189unexpected functions.
190Currently this form allows space,
191tab, newline, backspace, bell, and return -- in addition
192to all graphic characters -- unencoded.
193.El
194.Pp
195There are three forms of encoding.
196All forms use the backslash
197.Ql \e
198character to introduce a special
199sequence; two backslashes are used to represent a real backslash.
200These are the visual formats:
201.Bl -tag -width VIS_CSTYLE
202.It (default)
203Use an
204.Ql M
205to represent meta characters (characters with the 8th
206bit set), and use a caret
207.Ql ^
208to represent control characters (see
209.Xr iscntrl 3 ) .
210The following formats are used:
211.Bl -tag -width xxxxx
212.It Dv \e^C
213Represents the control character
214.Ql C .
215Spans characters
216.Ql \e000
217through
218.Ql \e037 ,
219and
220.Ql \e177
221(as
222.Ql \e^? ) .
223.It Dv \eM-C
224Represents character
225.Ql C
226with the 8th bit set.
227Spans characters
228.Ql \e241
229through
230.Ql \e376 .
231.It Dv \eM^C
232Represents control character
233.Ql C
234with the 8th bit set.
235Spans characters
236.Ql \e200
237through
238.Ql \e237 ,
239and
240.Ql \e377
241(as
242.Ql \eM^? ) .
243.It Dv \e040
244Represents
245.Tn ASCII
246space.
247.It Dv \e240
248Represents Meta-space.
249.It Dv \e-C
250Represents character
251.Ql C .
252Only used with
253.Dv VIS_ALL .
254.El
255.It Dv VIS_CSTYLE
256Use C-style backslash sequences to represent standard non-printable
257characters.
258The following sequences are used to represent the indicated characters:
259.Bd -unfilled -offset indent
260.Li \ea Tn  - BEL No (007)
261.Li \eb Tn  - BS No (010)
262.Li \ef Tn  - NP No (014)
263.Li \en Tn  - NL No (012)
264.Li \er Tn  - CR No (015)
265.Li \es Tn  - SP No (040)
266.Li \et Tn  - HT No (011)
267.Li \ev Tn  - VT No (013)
268.Li \e0 Tn  - NUL No (000)
269.Ed
270.Pp
271When using this format, the
272.Fa nextc
273parameter is looked at to determine
274if a NUL character can be encoded as
275.Ql \e0
276instead of
277.Ql \e000 .
278If
279.Fa nextc
280is an octal digit, the latter representation is used to
281avoid ambiguity.
282.It Dv VIS_OCTAL
283Use a three digit octal sequence.
284The form is
285.Ql \eddd
286where
287.Ar d
288represents an octal digit.
289.El
290.Pp
291There is one additional flag,
292.Dv VIS_NOSLASH ,
293which inhibits the
294doubling of backslashes and the backslash before the default
295format (that is, control characters are represented by
296.Ql ^C
297and
298meta characters as
299.Ql M-C ) .
300With this flag set, the encoding is
301ambiguous and non-invertible.
302.Sh RETURN VALUES
303.Fn vis
304returns a pointer to the terminating NUL character of the string
305.Fa dst .
306.Pp
307.Fn strvis
308and
309.Fn strvisx
310return the number of characters in
311.Fa dst
312(not including the trailing NUL).
313.Pp
314.Fn strnvis
315returns the length that
316.Fa dst
317would become if it were of unlimited size (similar to
318.Xr snprintf 3
319or
320.Xr strlcpy 3 ) .
321This can be used to detect truncation, but it also means that
322the return value of
323.Fn strnvis
324must not be used without checking it against
325.Fa dstsize .
326.Pp
327Upon successful completion,
328.Fn stravis
329returns the number of characters in
330.Pf * Fa outp
331(not including the trailing NUL).
332Otherwise,
333.Fn stravis
334returns -1 and sets
335.Va errno
336to
337.Er ENOMEM .
338.Sh EXAMPLES
339.Fn strvis
340has unusual storage requirements that can lead to stack or heap corruption
341if the destination is not carefully constructed.
342A common mistake is to use the same size for the source and destination
343when the destination actually needs up to 4 * strlen(source) + 1 bytes.
344.Pp
345If the length of a string to be encoded is not known at compile time, use
346.Fn stravis :
347.Bd -literal -offset indent
348char *src, *dst;
349
350\&...
351if (stravis(&dst, src, VIS_OCTAL) == -1)
352	err(1, "stravis");
353
354\&...
355free(dst);
356.Ed
357.Pp
358To encode a fixed size buffer,
359.Fn strnvis
360can be used with a fixed size target buffer:
361.Bd -literal -offset indent
362char src[MAXPATHLEN];
363char dst[4 * MAXPATHLEN + 1];
364
365\&...
366if (strnvis(dst, src, sizeof(dst), VIS_OCTAL) >= sizeof(dst))
367	err(1, "strnvis");
368.Ed
369.Sh SEE ALSO
370.Xr unvis 1 ,
371.Xr vis 1 ,
372.Xr free 3 ,
373.Xr snprintf 3 ,
374.Xr strlcpy 3 ,
375.Xr unvis 3
376.Sh HISTORY
377The
378.Fn vis ,
379.Fn strvis
380and
381.Fn strvisx
382functions first appeared in
383.Bx 4.4 ,
384.Fn strnvis
385in
386.Ox 2.9
387and
388.Fn stravis
389in
390.Ox 5.7 .
391.Pp
392The
393.Dv VIS_ALL
394flag first appeared in
395.Ox 4.9 .
396