xref: /netbsd-src/sys/external/bsd/sljit/dist/regex_src/regexJIT.h (revision 06eb4e7bdb1e14f0c368bf8554cee763517c4736)
177d68377Salnsn /*
277d68377Salnsn  *    Stack-less Just-In-Time compiler
377d68377Salnsn  *
4*06eb4e7bSalnsn  *    Copyright Zoltan Herczeg (hzmester@freemail.hu). All rights reserved.
577d68377Salnsn  *
677d68377Salnsn  * Redistribution and use in source and binary forms, with or without modification, are
777d68377Salnsn  * permitted provided that the following conditions are met:
877d68377Salnsn  *
977d68377Salnsn  *   1. Redistributions of source code must retain the above copyright notice, this list of
1077d68377Salnsn  *      conditions and the following disclaimer.
1177d68377Salnsn  *
1277d68377Salnsn  *   2. Redistributions in binary form must reproduce the above copyright notice, this list
1377d68377Salnsn  *      of conditions and the following disclaimer in the documentation and/or other materials
1477d68377Salnsn  *      provided with the distribution.
1577d68377Salnsn  *
1677d68377Salnsn  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) AND CONTRIBUTORS ``AS IS'' AND ANY
1777d68377Salnsn  * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
1877d68377Salnsn  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT
1977d68377Salnsn  * SHALL THE COPYRIGHT HOLDER(S) OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
2077d68377Salnsn  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
2177d68377Salnsn  * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
2277d68377Salnsn  * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
2377d68377Salnsn  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
2477d68377Salnsn  * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
2577d68377Salnsn  */
2677d68377Salnsn 
2777d68377Salnsn #ifndef _REGEX_JIT_H_
2877d68377Salnsn #define _REGEX_JIT_H_
2977d68377Salnsn 
3077d68377Salnsn /* Character type config. */
3177d68377Salnsn #define REGEX_USE_8BIT_CHARS
3277d68377Salnsn 
3377d68377Salnsn #ifdef REGEX_USE_8BIT_CHARS
3477d68377Salnsn typedef char regex_char_t;
3577d68377Salnsn #else
3677d68377Salnsn typedef wchar_t regex_char_t;
3777d68377Salnsn #endif
3877d68377Salnsn 
3977d68377Salnsn /* Error codes. */
4077d68377Salnsn #define REGEX_NO_ERROR		0
4177d68377Salnsn #define REGEX_MEMORY_ERROR	1
4277d68377Salnsn #define REGEX_INVALID_REGEX	2
4377d68377Salnsn 
4477d68377Salnsn /* Note: large, nested {a,b} iterations can blow up the memory consumption
4577d68377Salnsn    a{n,m} is replaced by aa...aaa?a?a?a?a? (n >= 0, m > 0)
4677d68377Salnsn                          \__n__/\____m___/
4777d68377Salnsn    a{n,}  is replaced by aa...aaa+ (n > 0)
4877d68377Salnsn                          \_n-1_/
4977d68377Salnsn */
5077d68377Salnsn 
5177d68377Salnsn /* The value returned by regex_compile. Can be used for multiple matching. */
5277d68377Salnsn struct regex_machine;
5377d68377Salnsn 
5477d68377Salnsn /* A matching state. */
5577d68377Salnsn struct regex_match;
5677d68377Salnsn 
5777d68377Salnsn /* Note: REGEX_MATCH_BEGIN and REGEX_MATCH_END does not change the parsing
5877d68377Salnsn      (Hence ^ and $ are parsed normally).
5977d68377Salnsn    Force matching to start from begining of the string (same as ^). */
6077d68377Salnsn #define REGEX_MATCH_BEGIN	0x01
6177d68377Salnsn /* Force matching to continue until the last character (same as $). */
6277d68377Salnsn #define REGEX_MATCH_END		0x02
6377d68377Salnsn /* Changes . to [^\r\n]
6477d68377Salnsn      Note: [...] and [^...] are NOT affected at all (as other regex engines do). */
6577d68377Salnsn #define REGEX_NEWLINE		0x04
6677d68377Salnsn /* Non greedy matching. In case of Thompson (non-recursive) algorithm,
6777d68377Salnsn    it (usually) does not have a significant speed gain. */
6877d68377Salnsn #define REGEX_MATCH_NON_GREEDY	0x08
6977d68377Salnsn /* Verbose. This define can be commented out, which disables all verbose features. */
7077d68377Salnsn #define REGEX_MATCH_VERBOSE	0x10
7177d68377Salnsn 
7277d68377Salnsn /* If error occures the function returns NULL, and the error code returned in error variable.
7377d68377Salnsn    You can pass NULL to error if you don't care about the error code.
7477d68377Salnsn    The re_flags argument contains the default REGEX_MATCH flags. See above. */
7577d68377Salnsn struct regex_machine* regex_compile(const regex_char_t *regex_string, int length, int re_flags, int *error);
7677d68377Salnsn void regex_free_machine(struct regex_machine *machine);
7777d68377Salnsn 
7877d68377Salnsn /* Create and init match structure for a given machine. */
7977d68377Salnsn struct regex_match* regex_begin_match(struct regex_machine *machine);
8077d68377Salnsn void regex_reset_match(struct regex_match *match);
8177d68377Salnsn void regex_free_match(struct regex_match *match);
8277d68377Salnsn 
8377d68377Salnsn /* Pattern matching.
8477d68377Salnsn    regex_continue_match does not support REGEX_MATCH_VERBOSE flag. */
8577d68377Salnsn void regex_continue_match(struct regex_match *match, const regex_char_t *input_string, int length);
8677d68377Salnsn int regex_get_result(struct regex_match *match, int *end, int *id);
8777d68377Salnsn /* Returns true, if the best match has already found. */
8877d68377Salnsn int regex_is_match_finished(struct regex_match *match);
8977d68377Salnsn 
9077d68377Salnsn /* Only exists if VERBOSE is defined in regexJIT.c
9177d68377Salnsn    Do both sanity check and verbose.
9277d68377Salnsn    (The latter only if REGEX_MATCH_VERBOSE was passed to regex_compile) */
9377d68377Salnsn void regex_continue_match_debug(struct regex_match *match, const regex_char_t *input_string, int length);
9477d68377Salnsn 
9577d68377Salnsn /* Misc. */
9677d68377Salnsn const char* regex_get_platform_name(void);
9777d68377Salnsn 
9877d68377Salnsn #endif
99