xref: /openbsd-src/usr.bin/dig/lib/isc/include/isc/lex.h (revision 99fd087599a8791921855f21bd7e36130f39aadc)
1 /*
2  * Copyright (C) Internet Systems Consortium, Inc. ("ISC")
3  *
4  * Permission to use, copy, modify, and/or distribute this software for any
5  * purpose with or without fee is hereby granted, provided that the above
6  * copyright notice and this permission notice appear in all copies.
7  *
8  * THE SOFTWARE IS PROVIDED "AS IS" AND ISC DISCLAIMS ALL WARRANTIES WITH
9  * REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY
10  * AND FITNESS.  IN NO EVENT SHALL ISC BE LIABLE FOR ANY SPECIAL, DIRECT,
11  * INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM
12  * LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE
13  * OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
14  * PERFORMANCE OF THIS SOFTWARE.
15  */
16 
17 /* $Id: lex.h,v 1.6 2020/02/25 05:00:43 jsg Exp $ */
18 
19 #ifndef ISC_LEX_H
20 #define ISC_LEX_H 1
21 
22 /*****
23  ***** Module Info
24  *****/
25 
26 /*! \file isc/lex.h
27  * \brief The "lex" module provides a lightweight tokenizer.  It can operate
28  * on files or buffers, and can handle "include".  It is designed for
29  * parsing of DNS master files and the BIND configuration file, but
30  * should be general enough to tokenize other things, e.g. HTTP.
31  *
32  * \li MP:
33  *	No synchronization is provided.  Clients must ensure exclusive
34  *	access.
35  *
36  * \li Reliability:
37  *	No anticipated impact.
38  *
39  * \li Resources:
40  *	TBS
41  *
42  * \li Security:
43  *	No anticipated impact.
44  *
45  * \li Standards:
46  * 	None.
47  */
48 
49 /***
50  *** Imports
51  ***/
52 
53 #include <stdio.h>
54 
55 #include <isc/region.h>
56 #include <isc/types.h>
57 
58 /***
59  *** Options
60  ***/
61 
62 /*@{*/
63 /*!
64  * Various options for isc_lex_gettoken().
65  */
66 
67 #define ISC_LEXOPT_EOL			0x01	/*%< Want end-of-line token. */
68 #define ISC_LEXOPT_EOF			0x02	/*%< Want end-of-file token. */
69 #define ISC_LEXOPT_INITIALWS		0x04	/*%< Want initial whitespace. */
70 #define ISC_LEXOPT_NUMBER		0x08	/*%< Recognize numbers. */
71 #define ISC_LEXOPT_QSTRING		0x10	/*%< Recognize qstrings. */
72 /*@}*/
73 
74 /*@{*/
75 /*!
76  * The ISC_LEXOPT_DNSMULTILINE option handles the processing of '(' and ')' in
77  * the DNS master file format.  If this option is set, then the
78  * ISC_LEXOPT_INITIALWS and ISC_LEXOPT_EOL options will be ignored when
79  * the paren count is > 0.  To use this option, '(' and ')' must be special
80  * characters.
81  */
82 #define ISC_LEXOPT_DNSMULTILINE		0x20	/*%< Handle '(' and ')'. */
83 #define ISC_LEXOPT_NOMORE		0x40	/*%< Want "no more" token. */
84 
85 #define ISC_LEXOPT_CNUMBER		0x80    /*%< Recognize octal and hex. */
86 #define ISC_LEXOPT_ESCAPE		0x100	/*%< Recognize escapes. */
87 #define ISC_LEXOPT_QSTRINGMULTILINE	0x200	/*%< Allow multiline "" strings */
88 #define ISC_LEXOPT_OCTAL		0x400	/*%< Expect a octal number. */
89 /*@}*/
90 /*@{*/
91 /*!
92  * Various commenting styles, which may be changed at any time with
93  * isc_lex_setcomments().
94  */
95 
96 #define ISC_LEXCOMMENT_C		0x01
97 #define ISC_LEXCOMMENT_CPLUSPLUS	0x02
98 #define ISC_LEXCOMMENT_SHELL		0x04
99 #define ISC_LEXCOMMENT_DNSMASTERFILE	0x08
100 /*@}*/
101 
102 /***
103  *** Types
104  ***/
105 
106 /*! Lex */
107 
108 typedef char isc_lexspecials_t[256];
109 
110 /* Tokens */
111 
112 typedef enum {
113 	isc_tokentype_unknown = 0,
114 	isc_tokentype_string = 1,
115 	isc_tokentype_number = 2,
116 	isc_tokentype_qstring = 3,
117 	isc_tokentype_eol = 4,
118 	isc_tokentype_eof = 5,
119 	isc_tokentype_initialws = 6,
120 	isc_tokentype_special = 7,
121 	isc_tokentype_nomore = 8
122 } isc_tokentype_t;
123 
124 typedef union {
125 	char				as_char;
126 	unsigned long			as_ulong;
127 	isc_region_t			as_region;
128 	isc_textregion_t		as_textregion;
129 	void *				as_pointer;
130 } isc_tokenvalue_t;
131 
132 typedef struct isc_token {
133 	isc_tokentype_t			type;
134 	isc_tokenvalue_t		value;
135 } isc_token_t;
136 
137 /***
138  *** Functions
139  ***/
140 
141 isc_result_t
142 isc_lex_create(size_t max_token, isc_lex_t **lexp);
143 /*%<
144  * Create a lexer.
145  *
146  * 'max_token' is a hint of the number of bytes in the largest token.
147  *
148  * Requires:
149  *\li	'*lexp' is a valid lexer.
150  *
151  * Ensures:
152  *\li	On success, *lexp is attached to the newly created lexer.
153  *
154  * Returns:
155  *\li	#ISC_R_SUCCESS
156  *\li	#ISC_R_NOMEMORY
157  */
158 
159 void
160 isc_lex_destroy(isc_lex_t **lexp);
161 /*%<
162  * Destroy the lexer.
163  *
164  * Requires:
165  *\li	'*lexp' is a valid lexer.
166  *
167  * Ensures:
168  *\li	*lexp == NULL
169  */
170 
171 void
172 isc_lex_setcomments(isc_lex_t *lex, unsigned int comments);
173 /*%<
174  * Set allowed lexer commenting styles.
175  *
176  * Requires:
177  *\li	'lex' is a valid lexer.
178  *
179  *\li	'comments' has meaningful values.
180  */
181 
182 void
183 isc_lex_setspecials(isc_lex_t *lex, isc_lexspecials_t specials);
184 /*!<
185  * The characters in 'specials' are returned as tokens.  Along with
186  * whitespace, they delimit strings and numbers.
187  *
188  * Note:
189  *\li	Comment processing takes precedence over special character
190  *	recognition.
191  *
192  * Requires:
193  *\li	'lex' is a valid lexer.
194  */
195 
196 isc_result_t
197 isc_lex_openfile(isc_lex_t *lex, const char *filename);
198 /*%<
199  * Open 'filename' and make it the current input source for 'lex'.
200  *
201  * Requires:
202  *\li	'lex' is a valid lexer.
203  *
204  *\li	filename is a valid C string.
205  *
206  * Returns:
207  *\li	#ISC_R_SUCCESS
208  *\li	#ISC_R_NOMEMORY			Out of memory
209  *\li	#ISC_R_NOTFOUND			File not found
210  *\li	#ISC_R_NOPERM			No permission to open file
211  *\li	#ISC_R_FAILURE			Couldn't open file, not sure why
212  *\li	#ISC_R_UNEXPECTED
213  */
214 
215 isc_result_t
216 isc_lex_close(isc_lex_t *lex);
217 /*%<
218  * Close the most recently opened object (i.e. file or buffer).
219  *
220  * Returns:
221  *\li	#ISC_R_SUCCESS
222  *\li	#ISC_R_NOMORE			No more input sources
223  */
224 
225 isc_result_t
226 isc_lex_gettoken(isc_lex_t *lex, unsigned int options, isc_token_t *tokenp);
227 /*%<
228  * Get the next token.
229  *
230  * Requires:
231  *\li	'lex' is a valid lexer.
232  *
233  *\li	'lex' has an input source.
234  *
235  *\li	'options' contains valid options.
236  *
237  *\li	'*tokenp' is a valid pointer.
238  *
239  * Returns:
240  *\li	#ISC_R_SUCCESS
241  *\li	#ISC_R_UNEXPECTEDEND
242  *\li	#ISC_R_NOMEMORY
243  *
244  *	These two results are returned only if their corresponding lexer
245  *	options are not set.
246  *
247  *\li	#ISC_R_EOF			End of input source
248  *\li	#ISC_R_NOMORE			No more input sources
249  */
250 
251 void
252 isc_lex_ungettoken(isc_lex_t *lex, isc_token_t *tokenp);
253 /*%<
254  * Unget the current token.
255  *
256  * Requires:
257  *\li	'lex' is a valid lexer.
258  *
259  *\li	'lex' has an input source.
260  *
261  *\li	'tokenp' points to a valid token.
262  *
263  *\li	There is no ungotten token already.
264  */
265 
266 void
267 isc_lex_getlasttokentext(isc_lex_t *lex, isc_token_t *tokenp, isc_region_t *r);
268 /*%<
269  * Returns a region containing the text of the last token returned.
270  *
271  * Requires:
272  *\li	'lex' is a valid lexer.
273  *
274  *\li	'lex' has an input source.
275  *
276  *\li	'tokenp' points to a valid token.
277  *
278  *\li	A token has been gotten and not ungotten.
279  */
280 
281 char *
282 isc_lex_getsourcename(isc_lex_t *lex);
283 /*%<
284  * Return the input source name.
285  *
286  * Requires:
287  *\li	'lex' is a valid lexer.
288  *
289  * Returns:
290  * \li	source name or NULL if no current source.
291  *\li	result valid while current input source exists.
292  */
293 
294 unsigned long
295 isc_lex_getsourceline(isc_lex_t *lex);
296 /*%<
297  * Return the input source line number.
298  *
299  * Requires:
300  *\li	'lex' is a valid lexer.
301  *
302  * Returns:
303  *\li 	Current line number or 0 if no current source.
304  */
305 
306 #endif /* ISC_LEX_H */
307