1 /* $OpenBSD: charset.c,v 1.2 2001/01/29 01:58:00 niklas Exp $ */ 2 3 /* 4 * Copyright (c) 1984,1985,1989,1994,1995 Mark Nudelman 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice in the documentation and/or other materials provided with 14 * the distribution. 15 * 16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY 17 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 19 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE 20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 21 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT 22 * OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR 23 * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, 24 * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE 25 * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN 26 * IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 27 */ 28 29 30 /* 31 * Functions to define the character set 32 * and do things specific to the character set. 33 */ 34 35 #include "less.h" 36 #if HAVE_LOCALE 37 #include <locale.h> 38 #include <ctype.h> 39 #endif 40 41 /* 42 * Predefined character sets, 43 * selected by the LESSCHARSET environment variable. 44 */ 45 struct charset { 46 char *name; 47 char *desc; 48 } charsets[] = { 49 { "ascii", "8bcccbcc18b95.b" }, 50 { "latin1", "8bcccbcc18b95.33b." }, 51 { "dos", "8bcccbcc12bc5b95.b." }, 52 { "koi8-r", "8bcccbcc18b95.b128." }, 53 { "next", "8bcccbcc18b95.bb125.bb" }, 54 { NULL } 55 }; 56 57 #define IS_BINARY_CHAR 01 58 #define IS_CONTROL_CHAR 02 59 60 static char chardef[256]; 61 static char *binfmt = NULL; 62 public int binattr = AT_STANDOUT; 63 64 65 /* 66 * Define a charset, given a description string. 67 * The string consists of 256 letters, 68 * one for each character in the charset. 69 * If the string is shorter than 256 letters, missing letters 70 * are taken to be identical to the last one. 71 * A decimal number followed by a letter is taken to be a 72 * repetition of the letter. 73 * 74 * Each letter is one of: 75 * . normal character 76 * b binary character 77 * c control character 78 */ 79 static void 80 ichardef(s) 81 char *s; 82 { 83 register char *cp; 84 register int n; 85 register char v; 86 87 n = 0; 88 v = 0; 89 cp = chardef; 90 while (*s != '\0') 91 { 92 switch (*s++) 93 { 94 case '.': 95 v = 0; 96 break; 97 case 'c': 98 v = IS_CONTROL_CHAR; 99 break; 100 case 'b': 101 v = IS_BINARY_CHAR|IS_CONTROL_CHAR; 102 break; 103 104 case '0': case '1': case '2': case '3': case '4': 105 case '5': case '6': case '7': case '8': case '9': 106 n = (10 * n) + (s[-1] - '0'); 107 continue; 108 109 default: 110 error("invalid chardef", NULL_PARG); 111 quit(QUIT_ERROR); 112 /*NOTREACHED*/ 113 } 114 115 do 116 { 117 if (cp >= chardef + sizeof(chardef)) 118 { 119 error("chardef longer than 256", NULL_PARG); 120 quit(QUIT_ERROR); 121 /*NOTREACHED*/ 122 } 123 *cp++ = v; 124 } while (--n > 0); 125 n = 0; 126 } 127 128 while (cp < chardef + sizeof(chardef)) 129 *cp++ = v; 130 } 131 132 /* 133 * Define a charset, given a charset name. 134 * The valid charset names are listed in the "charsets" array. 135 */ 136 static int 137 icharset(name) 138 register char *name; 139 { 140 register struct charset *p; 141 142 if (name == NULL || *name == '\0') 143 return (0); 144 145 for (p = charsets; p->name != NULL; p++) 146 { 147 if (strcmp(name, p->name) == 0) 148 { 149 ichardef(p->desc); 150 return (1); 151 } 152 } 153 154 error("invalid charset name", NULL_PARG); 155 quit(QUIT_ERROR); 156 /*NOTREACHED*/ 157 } 158 159 #if HAVE_LOCALE 160 /* 161 * Define a charset, given a locale name. 162 */ 163 static void 164 ilocale() 165 { 166 register int c; 167 168 setlocale(LC_CTYPE, ""); 169 for (c = 0; c < sizeof(chardef); c++) 170 { 171 if (isprint(c)) 172 chardef[c] = 0; 173 else if (iscntrl(c)) 174 chardef[c] = IS_CONTROL_CHAR; 175 else 176 chardef[c] = IS_BINARY_CHAR|IS_CONTROL_CHAR; 177 } 178 } 179 #endif 180 181 /* 182 * Define the printing format for control chars. 183 */ 184 public void 185 setbinfmt(s) 186 char *s; 187 { 188 if (s == NULL || *s == '\0') 189 s = "*s<%X>"; 190 /* 191 * Select the attributes if it starts with "*". 192 */ 193 if (*s == '*') 194 { 195 switch (s[1]) 196 { 197 case 'd': binattr = AT_BOLD; break; 198 case 'k': binattr = AT_BLINK; break; 199 case 's': binattr = AT_STANDOUT; break; 200 case 'u': binattr = AT_UNDERLINE; break; 201 default: binattr = AT_NORMAL; break; 202 } 203 s += 2; 204 } 205 binfmt = s; 206 } 207 208 /* 209 * Initialize charset data structures. 210 */ 211 public void 212 init_charset() 213 { 214 register char *s; 215 216 s = getenv("LESSBINFMT"); 217 setbinfmt(s); 218 219 /* 220 * See if environment variable LESSCHARSET is defined. 221 */ 222 s = getenv("LESSCHARSET"); 223 if (icharset(s)) 224 return; 225 /* 226 * LESSCHARSET is not defined: try LESSCHARDEF. 227 */ 228 s = getenv("LESSCHARDEF"); 229 if (s != NULL && *s != '\0') 230 { 231 ichardef(s); 232 return; 233 } 234 #if HAVE_LOCALE 235 /* 236 * Use setlocale. 237 */ 238 ilocale(); 239 #else 240 /* 241 * Default to "ascii". 242 */ 243 (void) icharset("ascii"); 244 #endif 245 } 246 247 /* 248 * Is a given character a "binary" character? 249 */ 250 public int 251 binary_char(c) 252 int c; 253 { 254 c &= 0377; 255 return (chardef[c] & IS_BINARY_CHAR); 256 } 257 258 /* 259 * Is a given character a "control" character? 260 */ 261 public int 262 control_char(c) 263 int c; 264 { 265 c &= 0377; 266 return (chardef[c] & IS_CONTROL_CHAR); 267 } 268 269 /* 270 * Return the printable form of a character. 271 * For example, in the "ascii" charset '\3' is printed as "^C". 272 */ 273 public char * 274 prchar(c) 275 int c; 276 { 277 static char buf[8]; 278 279 c &= 0377; 280 if (!control_char(c)) 281 sprintf(buf, "%c", c); 282 else if (c == ESC) 283 sprintf(buf, "ESC"); 284 else if (c < 128 && !control_char(c ^ 0100)) 285 sprintf(buf, "^%c", c ^ 0100); 286 else 287 sprintf(buf, binfmt, c); 288 return (buf); 289 } 290