xref: /openbsd-src/usr.bin/less/charset.c (revision b2ea75c1b17e1a9a339660e7ed45cd24946b230e)
1 /*	$OpenBSD: charset.c,v 1.2 2001/01/29 01:58:00 niklas Exp $	*/
2 
3 /*
4  * Copyright (c) 1984,1985,1989,1994,1995  Mark Nudelman
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice in the documentation and/or other materials provided with
14  *    the distribution.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY
17  * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
19  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR BE LIABLE
20  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
21  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT
22  * OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
23  * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
24  * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
25  * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN
26  * IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27  */
28 
29 
30 /*
31  * Functions to define the character set
32  * and do things specific to the character set.
33  */
34 
35 #include "less.h"
36 #if HAVE_LOCALE
37 #include <locale.h>
38 #include <ctype.h>
39 #endif
40 
41 /*
42  * Predefined character sets,
43  * selected by the LESSCHARSET environment variable.
44  */
45 struct charset {
46 	char *name;
47 	char *desc;
48 } charsets[] = {
49 	{ "ascii",	"8bcccbcc18b95.b"		},
50 	{ "latin1",	"8bcccbcc18b95.33b."		},
51 	{ "dos",	"8bcccbcc12bc5b95.b."		},
52 	{ "koi8-r",	"8bcccbcc18b95.b128."		},
53 	{ "next",	"8bcccbcc18b95.bb125.bb"	},
54 	{ NULL }
55 };
56 
57 #define	IS_BINARY_CHAR	01
58 #define	IS_CONTROL_CHAR	02
59 
60 static char chardef[256];
61 static char *binfmt = NULL;
62 public int binattr = AT_STANDOUT;
63 
64 
65 /*
66  * Define a charset, given a description string.
67  * The string consists of 256 letters,
68  * one for each character in the charset.
69  * If the string is shorter than 256 letters, missing letters
70  * are taken to be identical to the last one.
71  * A decimal number followed by a letter is taken to be a
72  * repetition of the letter.
73  *
74  * Each letter is one of:
75  *	. normal character
76  *	b binary character
77  *	c control character
78  */
79 	static void
80 ichardef(s)
81 	char *s;
82 {
83 	register char *cp;
84 	register int n;
85 	register char v;
86 
87 	n = 0;
88 	v = 0;
89 	cp = chardef;
90 	while (*s != '\0')
91 	{
92 		switch (*s++)
93 		{
94 		case '.':
95 			v = 0;
96 			break;
97 		case 'c':
98 			v = IS_CONTROL_CHAR;
99 			break;
100 		case 'b':
101 			v = IS_BINARY_CHAR|IS_CONTROL_CHAR;
102 			break;
103 
104 		case '0': case '1': case '2': case '3': case '4':
105 		case '5': case '6': case '7': case '8': case '9':
106 			n = (10 * n) + (s[-1] - '0');
107 			continue;
108 
109 		default:
110 			error("invalid chardef", NULL_PARG);
111 			quit(QUIT_ERROR);
112 			/*NOTREACHED*/
113 		}
114 
115 		do
116 		{
117 			if (cp >= chardef + sizeof(chardef))
118 			{
119 				error("chardef longer than 256", NULL_PARG);
120 				quit(QUIT_ERROR);
121 				/*NOTREACHED*/
122 			}
123 			*cp++ = v;
124 		} while (--n > 0);
125 		n = 0;
126 	}
127 
128 	while (cp < chardef + sizeof(chardef))
129 		*cp++ = v;
130 }
131 
132 /*
133  * Define a charset, given a charset name.
134  * The valid charset names are listed in the "charsets" array.
135  */
136 	static int
137 icharset(name)
138 	register char *name;
139 {
140 	register struct charset *p;
141 
142 	if (name == NULL || *name == '\0')
143 		return (0);
144 
145 	for (p = charsets;  p->name != NULL;  p++)
146 	{
147 		if (strcmp(name, p->name) == 0)
148 		{
149 			ichardef(p->desc);
150 			return (1);
151 		}
152 	}
153 
154 	error("invalid charset name", NULL_PARG);
155 	quit(QUIT_ERROR);
156 	/*NOTREACHED*/
157 }
158 
159 #if HAVE_LOCALE
160 /*
161  * Define a charset, given a locale name.
162  */
163 	static void
164 ilocale()
165 {
166 	register int c;
167 
168 	setlocale(LC_CTYPE, "");
169 	for (c = 0;  c < sizeof(chardef);  c++)
170 	{
171 		if (isprint(c))
172 			chardef[c] = 0;
173 		else if (iscntrl(c))
174 			chardef[c] = IS_CONTROL_CHAR;
175 		else
176 			chardef[c] = IS_BINARY_CHAR|IS_CONTROL_CHAR;
177 	}
178 }
179 #endif
180 
181 /*
182  * Define the printing format for control chars.
183  */
184    	public void
185 setbinfmt(s)
186 	char *s;
187 {
188 	if (s == NULL || *s == '\0')
189 		s = "*s<%X>";
190 	/*
191 	 * Select the attributes if it starts with "*".
192 	 */
193 	if (*s == '*')
194 	{
195 		switch (s[1])
196 		{
197 		case 'd':  binattr = AT_BOLD;      break;
198 		case 'k':  binattr = AT_BLINK;     break;
199 		case 's':  binattr = AT_STANDOUT;  break;
200 		case 'u':  binattr = AT_UNDERLINE; break;
201 		default:   binattr = AT_NORMAL;    break;
202 		}
203 		s += 2;
204 	}
205 	binfmt = s;
206 }
207 
208 /*
209  * Initialize charset data structures.
210  */
211 	public void
212 init_charset()
213 {
214 	register char *s;
215 
216 	s = getenv("LESSBINFMT");
217 	setbinfmt(s);
218 
219 	/*
220 	 * See if environment variable LESSCHARSET is defined.
221 	 */
222 	s = getenv("LESSCHARSET");
223 	if (icharset(s))
224 		return;
225 	/*
226 	 * LESSCHARSET is not defined: try LESSCHARDEF.
227 	 */
228 	s = getenv("LESSCHARDEF");
229 	if (s != NULL && *s != '\0')
230 	{
231 		ichardef(s);
232 		return;
233 	}
234 #if HAVE_LOCALE
235 	/*
236 	 * Use setlocale.
237 	 */
238 	ilocale();
239 #else
240 	/*
241 	 * Default to "ascii".
242 	 */
243 	(void) icharset("ascii");
244 #endif
245 }
246 
247 /*
248  * Is a given character a "binary" character?
249  */
250 	public int
251 binary_char(c)
252 	int c;
253 {
254 	c &= 0377;
255 	return (chardef[c] & IS_BINARY_CHAR);
256 }
257 
258 /*
259  * Is a given character a "control" character?
260  */
261 	public int
262 control_char(c)
263 	int c;
264 {
265 	c &= 0377;
266 	return (chardef[c] & IS_CONTROL_CHAR);
267 }
268 
269 /*
270  * Return the printable form of a character.
271  * For example, in the "ascii" charset '\3' is printed as "^C".
272  */
273 	public char *
274 prchar(c)
275 	int c;
276 {
277 	static char buf[8];
278 
279 	c &= 0377;
280 	if (!control_char(c))
281 		sprintf(buf, "%c", c);
282 	else if (c == ESC)
283 		sprintf(buf, "ESC");
284 	else if (c < 128 && !control_char(c ^ 0100))
285 		sprintf(buf, "^%c", c ^ 0100);
286 	else
287 		sprintf(buf, binfmt, c);
288 	return (buf);
289 }
290