1 /* $OpenBSD: tokenizer.c,v 1.11 2009/10/27 23:59:28 deraadt Exp $ */ 2 /* $NetBSD: tokenizer.c,v 1.13 2003/10/18 23:48:42 christos Exp $ */ 3 4 /*- 5 * Copyright (c) 1992, 1993 6 * The Regents of the University of California. All rights reserved. 7 * 8 * This code is derived from software contributed to Berkeley by 9 * Christos Zoulas of Cornell University. 10 * 11 * Redistribution and use in source and binary forms, with or without 12 * modification, are permitted provided that the following conditions 13 * are met: 14 * 1. Redistributions of source code must retain the above copyright 15 * notice, this list of conditions and the following disclaimer. 16 * 2. Redistributions in binary form must reproduce the above copyright 17 * notice, this list of conditions and the following disclaimer in the 18 * documentation and/or other materials provided with the distribution. 19 * 3. Neither the name of the University nor the names of its contributors 20 * may be used to endorse or promote products derived from this software 21 * without specific prior written permission. 22 * 23 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 24 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 25 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 26 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 27 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 28 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 29 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 30 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 31 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 32 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 33 * SUCH DAMAGE. 34 */ 35 36 #include "config.h" 37 /* 38 * tokenize.c: Bourne shell like tokenizer 39 */ 40 #include <string.h> 41 #include <stdlib.h> 42 #include "tokenizer.h" 43 44 typedef enum { 45 Q_none, Q_single, Q_double, Q_one, Q_doubleone 46 } quote_t; 47 48 #define IFS "\t \n" 49 50 #define TOK_KEEP 1 51 #define TOK_EAT 2 52 53 #define WINCR 20 54 #define AINCR 10 55 56 #define tok_strdup(a) strdup(a) 57 #define tok_malloc(a) malloc(a) 58 #define tok_free(a) free(a) 59 #define tok_realloc(a, b) realloc(a, b) 60 61 62 struct tokenizer { 63 char *ifs; /* In field separator */ 64 int argc, amax; /* Current and maximum number of args */ 65 char **argv; /* Argument list */ 66 char *wptr, *wmax; /* Space and limit on the word buffer */ 67 char *wstart; /* Beginning of next word */ 68 char *wspace; /* Space of word buffer */ 69 quote_t quote; /* Quoting state */ 70 int flags; /* flags; */ 71 }; 72 73 74 private void tok_finish(Tokenizer *); 75 76 77 /* tok_finish(): 78 * Finish a word in the tokenizer. 79 */ 80 private void 81 tok_finish(Tokenizer *tok) 82 { 83 84 *tok->wptr = '\0'; 85 if ((tok->flags & TOK_KEEP) || tok->wptr != tok->wstart) { 86 tok->argv[tok->argc++] = tok->wstart; 87 tok->argv[tok->argc] = NULL; 88 tok->wstart = ++tok->wptr; 89 } 90 tok->flags &= ~TOK_KEEP; 91 } 92 93 94 /* tok_init(): 95 * Initialize the tokenizer 96 */ 97 public Tokenizer * 98 tok_init(const char *ifs) 99 { 100 Tokenizer *tok = (Tokenizer *) tok_malloc(sizeof(Tokenizer)); 101 102 if (tok == NULL) 103 return NULL; 104 tok->ifs = tok_strdup(ifs ? ifs : IFS); 105 if (tok->ifs == NULL) { 106 tok_free((ptr_t)tok); 107 return NULL; 108 } 109 tok->argc = 0; 110 tok->amax = AINCR; 111 tok->argv = (char **) tok_malloc(sizeof(char *) * tok->amax); 112 if (tok->argv == NULL) { 113 tok_free((ptr_t)tok->ifs); 114 tok_free((ptr_t)tok); 115 return NULL; 116 } 117 tok->argv[0] = NULL; 118 tok->wspace = (char *) tok_malloc(WINCR); 119 if (tok->wspace == NULL) { 120 tok_free((ptr_t)tok->argv); 121 tok_free((ptr_t)tok->ifs); 122 tok_free((ptr_t)tok); 123 return NULL; 124 } 125 tok->wmax = tok->wspace + WINCR; 126 tok->wstart = tok->wspace; 127 tok->wptr = tok->wspace; 128 tok->flags = 0; 129 tok->quote = Q_none; 130 131 return (tok); 132 } 133 134 135 /* tok_reset(): 136 * Reset the tokenizer 137 */ 138 public void 139 tok_reset(Tokenizer *tok) 140 { 141 142 tok->argc = 0; 143 tok->wstart = tok->wspace; 144 tok->wptr = tok->wspace; 145 tok->flags = 0; 146 tok->quote = Q_none; 147 } 148 149 150 /* tok_end(): 151 * Clean up 152 */ 153 public void 154 tok_end(Tokenizer *tok) 155 { 156 157 tok_free((ptr_t) tok->ifs); 158 tok_free((ptr_t) tok->wspace); 159 tok_free((ptr_t) tok->argv); 160 tok_free((ptr_t) tok); 161 } 162 163 164 165 /* tok_line(): 166 * Bourne shell like tokenizing 167 * Return: 168 * -1: Internal error 169 * 3: Quoted return 170 * 2: Unmatched double quote 171 * 1: Unmatched single quote 172 * 0: Ok 173 */ 174 public int 175 tok_line(Tokenizer *tok, const char *line, int *argc, const char ***argv) 176 { 177 const char *ptr; 178 179 for (;;) { 180 switch (*(ptr = line++)) { 181 case '\'': 182 tok->flags |= TOK_KEEP; 183 tok->flags &= ~TOK_EAT; 184 switch (tok->quote) { 185 case Q_none: 186 tok->quote = Q_single; /* Enter single quote 187 * mode */ 188 break; 189 190 case Q_single: /* Exit single quote mode */ 191 tok->quote = Q_none; 192 break; 193 194 case Q_one: /* Quote this ' */ 195 tok->quote = Q_none; 196 *tok->wptr++ = *ptr; 197 break; 198 199 case Q_double: /* Stay in double quote mode */ 200 *tok->wptr++ = *ptr; 201 break; 202 203 case Q_doubleone: /* Quote this ' */ 204 tok->quote = Q_double; 205 *tok->wptr++ = *ptr; 206 break; 207 208 default: 209 return (-1); 210 } 211 break; 212 213 case '"': 214 tok->flags &= ~TOK_EAT; 215 tok->flags |= TOK_KEEP; 216 switch (tok->quote) { 217 case Q_none: /* Enter double quote mode */ 218 tok->quote = Q_double; 219 break; 220 221 case Q_double: /* Exit double quote mode */ 222 tok->quote = Q_none; 223 break; 224 225 case Q_one: /* Quote this " */ 226 tok->quote = Q_none; 227 *tok->wptr++ = *ptr; 228 break; 229 230 case Q_single: /* Stay in single quote mode */ 231 *tok->wptr++ = *ptr; 232 break; 233 234 case Q_doubleone: /* Quote this " */ 235 tok->quote = Q_double; 236 *tok->wptr++ = *ptr; 237 break; 238 239 default: 240 return (-1); 241 } 242 break; 243 244 case '\\': 245 tok->flags |= TOK_KEEP; 246 tok->flags &= ~TOK_EAT; 247 switch (tok->quote) { 248 case Q_none: /* Quote next character */ 249 tok->quote = Q_one; 250 break; 251 252 case Q_double: /* Quote next character */ 253 tok->quote = Q_doubleone; 254 break; 255 256 case Q_one: /* Quote this, restore state */ 257 *tok->wptr++ = *ptr; 258 tok->quote = Q_none; 259 break; 260 261 case Q_single: /* Stay in single quote mode */ 262 *tok->wptr++ = *ptr; 263 break; 264 265 case Q_doubleone: /* Quote this \ */ 266 tok->quote = Q_double; 267 *tok->wptr++ = *ptr; 268 break; 269 270 default: 271 return (-1); 272 } 273 break; 274 275 case '\n': 276 tok->flags &= ~TOK_EAT; 277 switch (tok->quote) { 278 case Q_none: 279 tok_finish(tok); 280 *argv = (const char **)tok->argv; 281 *argc = tok->argc; 282 return (0); 283 284 case Q_single: 285 case Q_double: 286 *tok->wptr++ = *ptr; /* Add the return */ 287 break; 288 289 case Q_doubleone: /* Back to double, eat the '\n' */ 290 tok->flags |= TOK_EAT; 291 tok->quote = Q_double; 292 break; 293 294 case Q_one: /* No quote, more eat the '\n' */ 295 tok->flags |= TOK_EAT; 296 tok->quote = Q_none; 297 break; 298 299 default: 300 return (0); 301 } 302 break; 303 304 case '\0': 305 switch (tok->quote) { 306 case Q_none: 307 /* Finish word and return */ 308 if (tok->flags & TOK_EAT) { 309 tok->flags &= ~TOK_EAT; 310 return (3); 311 } 312 tok_finish(tok); 313 *argv = (const char **)tok->argv; 314 *argc = tok->argc; 315 return (0); 316 317 case Q_single: 318 return (1); 319 320 case Q_double: 321 return (2); 322 323 case Q_doubleone: 324 tok->quote = Q_double; 325 *tok->wptr++ = *ptr; 326 break; 327 328 case Q_one: 329 tok->quote = Q_none; 330 *tok->wptr++ = *ptr; 331 break; 332 333 default: 334 return (-1); 335 } 336 break; 337 338 default: 339 tok->flags &= ~TOK_EAT; 340 switch (tok->quote) { 341 case Q_none: 342 if (strchr(tok->ifs, *ptr) != NULL) 343 tok_finish(tok); 344 else 345 *tok->wptr++ = *ptr; 346 break; 347 348 case Q_single: 349 case Q_double: 350 *tok->wptr++ = *ptr; 351 break; 352 353 354 case Q_doubleone: 355 *tok->wptr++ = '\\'; 356 tok->quote = Q_double; 357 *tok->wptr++ = *ptr; 358 break; 359 360 case Q_one: 361 tok->quote = Q_none; 362 *tok->wptr++ = *ptr; 363 break; 364 365 default: 366 return (-1); 367 368 } 369 break; 370 } 371 372 if (tok->wptr >= tok->wmax - 4) { 373 size_t size = tok->wmax - tok->wspace + WINCR; 374 char *s = (char *) tok_realloc(tok->wspace, size); 375 if (s == NULL) 376 return (-1); 377 378 if (s != tok->wspace) { 379 int i; 380 for (i = 0; i < tok->argc; i++) { 381 tok->argv[i] = 382 (tok->argv[i] - tok->wspace) + s; 383 } 384 tok->wptr = (tok->wptr - tok->wspace) + s; 385 tok->wstart = (tok->wstart - tok->wspace) + s; 386 tok->wspace = s; 387 } 388 tok->wmax = s + size; 389 } 390 if (tok->argc >= tok->amax - 4) { 391 char **p; 392 tok->amax += AINCR; 393 p = (char **) tok_realloc(tok->argv, 394 tok->amax * sizeof(char *)); 395 if (p == NULL) 396 return (-1); 397 tok->argv = p; 398 } 399 } 400 } 401