xref: /netbsd-src/lib/libc/gen/ctype.3 (revision f401b5d182a0b1cd7cd81ef6132ec60aea190eb1)
1.\"	$NetBSD: ctype.3,v 1.31 2019/01/15 07:01:01 wiz Exp $
2.\"
3.\" Copyright (c) 1991 Regents of the University of California.
4.\" All rights reserved.
5.\"
6.\"
7.\" Redistribution and use in source and binary forms, with or without
8.\" modification, are permitted provided that the following conditions
9.\" are met:
10.\" 1. Redistributions of source code must retain the above copyright
11.\"    notice, this list of conditions and the following disclaimer.
12.\" 2. Redistributions in binary form must reproduce the above copyright
13.\"    notice, this list of conditions and the following disclaimer in the
14.\"    documentation and/or other materials provided with the distribution.
15.\" 3. Neither the name of the University nor the names of its contributors
16.\"    may be used to endorse or promote products derived from this software
17.\"    without specific prior written permission.
18.\"
19.\" THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
20.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22.\" ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
23.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
25.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
26.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
28.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
29.\" SUCH DAMAGE.
30.\"
31.\"     @(#)ctype.3	6.5 (Berkeley) 4/19/91
32.\"
33.Dd January 15, 2019
34.Dt CTYPE 3
35.Os
36.Sh NAME
37.Nm ctype
38.Nd character classification and mapping functions
39.Sh LIBRARY
40.Lb libc
41.Sh SYNOPSIS
42.In ctype.h
43.Fn isalpha "int c"
44.Fn isupper "int c"
45.Fn islower "int c"
46.Fn isdigit "int c"
47.Fn isxdigit "int c"
48.Fn isalnum "int c"
49.Fn isspace "int c"
50.Fn ispunct "int c"
51.Fn isprint "int c"
52.Fn isgraph "int c"
53.Fn iscntrl "int c"
54.Fn isblank "int c"
55.Fn toupper "int c"
56.Fn tolower "int c"
57.Sh DESCRIPTION
58The above functions perform character tests and conversions on the integer
59.Ar c .
60.Pp
61See the specific manual pages for information about the
62test or conversion performed by each function.
63.Sh EXAMPLES
64To print an upper-case version of a string to stdout,
65the following code can be used:
66.Bd -literal -offset indent
67const char *s = "xyz";
68
69while (*s != '\e0') {
70    putchar(toupper((unsigned char)*s));
71    s++;
72}
73.Ed
74.Sh SEE ALSO
75.Xr isalnum 3 ,
76.Xr isalpha 3 ,
77.Xr isblank 3 ,
78.Xr iscntrl 3 ,
79.Xr isdigit 3 ,
80.Xr isgraph 3 ,
81.Xr islower 3 ,
82.Xr isprint 3 ,
83.Xr ispunct 3 ,
84.Xr isspace 3 ,
85.Xr isupper 3 ,
86.Xr isxdigit 3 ,
87.Xr tolower 3 ,
88.Xr toupper 3 ,
89.Xr ascii 7
90.Sh STANDARDS
91These functions, with the exception of
92.Fn isblank ,
93conform to
94.St -ansiC .
95All described functions, including
96.Fn isblank ,
97also conform to
98.St -p1003.1-2001 .
99.Sh CAVEATS
100The argument of these functions is of type
101.Vt int ,
102but only a very restricted subset of values are actually valid.
103The argument must either be the value of the macro
104.Dv EOF
105(which has a negative value),
106or must be a non-negative value within the range representable as
107.Vt unsigned char .
108Passing invalid values leads to undefined behavior.
109.Pp
110Values of type
111.Vt int
112that were returned by
113.Xr getc 3 ,
114.Xr fgetc 3 ,
115and similar functions or macros
116are already in the correct range, and may be safely passed to these
117.Nm ctype
118functions without any casts.
119.Pp
120Values of type
121.Vt char
122or
123.Vt signed char
124must first be cast to
125.Vt unsigned char ,
126to ensure that the values are within the correct range.
127Casting a negative-valued
128.Vt char
129or
130.Vt signed char
131directly to
132.Vt int
133will produce a negative-valued
134.Vt int ,
135which will be outside the range of allowed values
136(unless it happens to be equal to
137.Dv EOF ,
138but even that would not give the desired result).
139.Pp
140Because the bugs may manifest as silent misbehavior or as crashes only
141when fed input outside the US-ASCII range, the
142.Nx
143implementation of the
144.Nm
145functions is designed to elicit a compiler warning for code that passes
146inputs of type
147.Vt char
148in order to flag code that may pass negative values at runtime that
149would lead to undefined behavior:
150.Bd -literal -offset indent
151#include <ctype.h>
152#include <locale.h>
153#include <stdio.h>
154
155int
156main(int argc, char **argv)
157{
158
159	if (argc < 2)
160		return 1;
161	setlocale(LC_ALL, "");
162	printf("%d %d\en", *argv[1], isprint(*argv[1]));
163	printf("%d %d\en", (int)(unsigned char)*argv[1],
164	    isprint((unsigned char)*argv[1]));
165	return 0;
166}
167.Ed
168.Pp
169When compiling this program, GCC reports a warning for the line that
170passes
171.Vt char .
172At runtime, you may get nonsense answers for some inputs without the
173cast \(em if you're lucky and it doesn't crash:
174.Bd -literal -offset indent
175% gcc -Wall -o test test.c
176test.c: In function 'main':
177test.c:12:2: warning: array subscript has type 'char'
178% LC_CTYPE=C ./test $(printf '\e270')
179-72 5
180184 0
181% LC_CTYPE=C ./test $(printf '\e377')
182-1 0
183255 0
184% LC_CTYPE=fr_FR.ISO8859-1 ./test $(printf '\e377')
185-1 0
186255 2
187.Ed
188.Pp
189Some implementations of libc, such as glibc as of 2018, attempt to
190avoid the worst of the undefined behavior by defining the functions to
191work for all integer inputs representable by either
192.Vt unsigned char
193or
194.Vt char ,
195and suppress the warning.
196However, this is not an excuse for avoiding conversion to
197.Vt unsigned char :
198if
199.Dv EOF
200coincides with any such value, as it does when it is \-1 on platforms
201with signed
202.Vt char ,
203programs that pass
204.Vt char
205will still necessarily confuse the classification and mapping of
206.Dv EOF
207with the classification and mapping of some non-EOF inputs.
208