1 /* 2 * Copyright (C) Internet Systems Consortium, Inc. ("ISC") 3 * 4 * Permission to use, copy, modify, and/or distribute this software for any 5 * purpose with or without fee is hereby granted, provided that the above 6 * copyright notice and this permission notice appear in all copies. 7 * 8 * THE SOFTWARE IS PROVIDED "AS IS" AND ISC DISCLAIMS ALL WARRANTIES WITH 9 * REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY 10 * AND FITNESS. IN NO EVENT SHALL ISC BE LIABLE FOR ANY SPECIAL, DIRECT, 11 * INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM 12 * LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE 13 * OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR 14 * PERFORMANCE OF THIS SOFTWARE. 15 */ 16 17 /* $Id: lex.h,v 1.6 2020/02/25 05:00:43 jsg Exp $ */ 18 19 #ifndef ISC_LEX_H 20 #define ISC_LEX_H 1 21 22 /***** 23 ***** Module Info 24 *****/ 25 26 /*! \file isc/lex.h 27 * \brief The "lex" module provides a lightweight tokenizer. It can operate 28 * on files or buffers, and can handle "include". It is designed for 29 * parsing of DNS master files and the BIND configuration file, but 30 * should be general enough to tokenize other things, e.g. HTTP. 31 * 32 * \li MP: 33 * No synchronization is provided. Clients must ensure exclusive 34 * access. 35 * 36 * \li Reliability: 37 * No anticipated impact. 38 * 39 * \li Resources: 40 * TBS 41 * 42 * \li Security: 43 * No anticipated impact. 44 * 45 * \li Standards: 46 * None. 47 */ 48 49 /*** 50 *** Imports 51 ***/ 52 53 #include <stdio.h> 54 55 #include <isc/region.h> 56 #include <isc/types.h> 57 58 /*** 59 *** Options 60 ***/ 61 62 /*@{*/ 63 /*! 64 * Various options for isc_lex_gettoken(). 65 */ 66 67 #define ISC_LEXOPT_EOL 0x01 /*%< Want end-of-line token. */ 68 #define ISC_LEXOPT_EOF 0x02 /*%< Want end-of-file token. */ 69 #define ISC_LEXOPT_INITIALWS 0x04 /*%< Want initial whitespace. */ 70 #define ISC_LEXOPT_NUMBER 0x08 /*%< Recognize numbers. */ 71 #define ISC_LEXOPT_QSTRING 0x10 /*%< Recognize qstrings. */ 72 /*@}*/ 73 74 /*@{*/ 75 /*! 76 * The ISC_LEXOPT_DNSMULTILINE option handles the processing of '(' and ')' in 77 * the DNS master file format. If this option is set, then the 78 * ISC_LEXOPT_INITIALWS and ISC_LEXOPT_EOL options will be ignored when 79 * the paren count is > 0. To use this option, '(' and ')' must be special 80 * characters. 81 */ 82 #define ISC_LEXOPT_DNSMULTILINE 0x20 /*%< Handle '(' and ')'. */ 83 #define ISC_LEXOPT_NOMORE 0x40 /*%< Want "no more" token. */ 84 85 #define ISC_LEXOPT_CNUMBER 0x80 /*%< Recognize octal and hex. */ 86 #define ISC_LEXOPT_ESCAPE 0x100 /*%< Recognize escapes. */ 87 #define ISC_LEXOPT_QSTRINGMULTILINE 0x200 /*%< Allow multiline "" strings */ 88 #define ISC_LEXOPT_OCTAL 0x400 /*%< Expect a octal number. */ 89 /*@}*/ 90 /*@{*/ 91 /*! 92 * Various commenting styles, which may be changed at any time with 93 * isc_lex_setcomments(). 94 */ 95 96 #define ISC_LEXCOMMENT_C 0x01 97 #define ISC_LEXCOMMENT_CPLUSPLUS 0x02 98 #define ISC_LEXCOMMENT_SHELL 0x04 99 #define ISC_LEXCOMMENT_DNSMASTERFILE 0x08 100 /*@}*/ 101 102 /*** 103 *** Types 104 ***/ 105 106 /*! Lex */ 107 108 typedef char isc_lexspecials_t[256]; 109 110 /* Tokens */ 111 112 typedef enum { 113 isc_tokentype_unknown = 0, 114 isc_tokentype_string = 1, 115 isc_tokentype_number = 2, 116 isc_tokentype_qstring = 3, 117 isc_tokentype_eol = 4, 118 isc_tokentype_eof = 5, 119 isc_tokentype_initialws = 6, 120 isc_tokentype_special = 7, 121 isc_tokentype_nomore = 8 122 } isc_tokentype_t; 123 124 typedef union { 125 char as_char; 126 unsigned long as_ulong; 127 isc_region_t as_region; 128 isc_textregion_t as_textregion; 129 void * as_pointer; 130 } isc_tokenvalue_t; 131 132 typedef struct isc_token { 133 isc_tokentype_t type; 134 isc_tokenvalue_t value; 135 } isc_token_t; 136 137 /*** 138 *** Functions 139 ***/ 140 141 isc_result_t 142 isc_lex_create(size_t max_token, isc_lex_t **lexp); 143 /*%< 144 * Create a lexer. 145 * 146 * 'max_token' is a hint of the number of bytes in the largest token. 147 * 148 * Requires: 149 *\li '*lexp' is a valid lexer. 150 * 151 * Ensures: 152 *\li On success, *lexp is attached to the newly created lexer. 153 * 154 * Returns: 155 *\li #ISC_R_SUCCESS 156 *\li #ISC_R_NOMEMORY 157 */ 158 159 void 160 isc_lex_destroy(isc_lex_t **lexp); 161 /*%< 162 * Destroy the lexer. 163 * 164 * Requires: 165 *\li '*lexp' is a valid lexer. 166 * 167 * Ensures: 168 *\li *lexp == NULL 169 */ 170 171 void 172 isc_lex_setcomments(isc_lex_t *lex, unsigned int comments); 173 /*%< 174 * Set allowed lexer commenting styles. 175 * 176 * Requires: 177 *\li 'lex' is a valid lexer. 178 * 179 *\li 'comments' has meaningful values. 180 */ 181 182 void 183 isc_lex_setspecials(isc_lex_t *lex, isc_lexspecials_t specials); 184 /*!< 185 * The characters in 'specials' are returned as tokens. Along with 186 * whitespace, they delimit strings and numbers. 187 * 188 * Note: 189 *\li Comment processing takes precedence over special character 190 * recognition. 191 * 192 * Requires: 193 *\li 'lex' is a valid lexer. 194 */ 195 196 isc_result_t 197 isc_lex_openfile(isc_lex_t *lex, const char *filename); 198 /*%< 199 * Open 'filename' and make it the current input source for 'lex'. 200 * 201 * Requires: 202 *\li 'lex' is a valid lexer. 203 * 204 *\li filename is a valid C string. 205 * 206 * Returns: 207 *\li #ISC_R_SUCCESS 208 *\li #ISC_R_NOMEMORY Out of memory 209 *\li #ISC_R_NOTFOUND File not found 210 *\li #ISC_R_NOPERM No permission to open file 211 *\li #ISC_R_FAILURE Couldn't open file, not sure why 212 *\li #ISC_R_UNEXPECTED 213 */ 214 215 isc_result_t 216 isc_lex_close(isc_lex_t *lex); 217 /*%< 218 * Close the most recently opened object (i.e. file or buffer). 219 * 220 * Returns: 221 *\li #ISC_R_SUCCESS 222 *\li #ISC_R_NOMORE No more input sources 223 */ 224 225 isc_result_t 226 isc_lex_gettoken(isc_lex_t *lex, unsigned int options, isc_token_t *tokenp); 227 /*%< 228 * Get the next token. 229 * 230 * Requires: 231 *\li 'lex' is a valid lexer. 232 * 233 *\li 'lex' has an input source. 234 * 235 *\li 'options' contains valid options. 236 * 237 *\li '*tokenp' is a valid pointer. 238 * 239 * Returns: 240 *\li #ISC_R_SUCCESS 241 *\li #ISC_R_UNEXPECTEDEND 242 *\li #ISC_R_NOMEMORY 243 * 244 * These two results are returned only if their corresponding lexer 245 * options are not set. 246 * 247 *\li #ISC_R_EOF End of input source 248 *\li #ISC_R_NOMORE No more input sources 249 */ 250 251 void 252 isc_lex_ungettoken(isc_lex_t *lex, isc_token_t *tokenp); 253 /*%< 254 * Unget the current token. 255 * 256 * Requires: 257 *\li 'lex' is a valid lexer. 258 * 259 *\li 'lex' has an input source. 260 * 261 *\li 'tokenp' points to a valid token. 262 * 263 *\li There is no ungotten token already. 264 */ 265 266 void 267 isc_lex_getlasttokentext(isc_lex_t *lex, isc_token_t *tokenp, isc_region_t *r); 268 /*%< 269 * Returns a region containing the text of the last token returned. 270 * 271 * Requires: 272 *\li 'lex' is a valid lexer. 273 * 274 *\li 'lex' has an input source. 275 * 276 *\li 'tokenp' points to a valid token. 277 * 278 *\li A token has been gotten and not ungotten. 279 */ 280 281 char * 282 isc_lex_getsourcename(isc_lex_t *lex); 283 /*%< 284 * Return the input source name. 285 * 286 * Requires: 287 *\li 'lex' is a valid lexer. 288 * 289 * Returns: 290 * \li source name or NULL if no current source. 291 *\li result valid while current input source exists. 292 */ 293 294 unsigned long 295 isc_lex_getsourceline(isc_lex_t *lex); 296 /*%< 297 * Return the input source line number. 298 * 299 * Requires: 300 *\li 'lex' is a valid lexer. 301 * 302 * Returns: 303 *\li Current line number or 0 if no current source. 304 */ 305 306 #endif /* ISC_LEX_H */ 307