xref: /openbsd-src/usr.bin/lex/regex.c (revision 4c01f2087f360d7ff4690e4abe72ad131a02f6e9)
1*4c01f208Stedu /* $OpenBSD: regex.c,v 1.3 2015/11/19 23:20:34 tedu Exp $ */
2a58c1ecbStedu 
3a58c1ecbStedu /** regex - regular expression functions related to POSIX regex lib. */
4a58c1ecbStedu 
5a58c1ecbStedu /*  This file is part of flex. */
6a58c1ecbStedu 
7a58c1ecbStedu /*  Redistribution and use in source and binary forms, with or without */
8a58c1ecbStedu /*  modification, are permitted provided that the following conditions */
9a58c1ecbStedu /*  are met: */
10a58c1ecbStedu 
11a58c1ecbStedu /*  1. Redistributions of source code must retain the above copyright */
12a58c1ecbStedu /*     notice, this list of conditions and the following disclaimer. */
13a58c1ecbStedu /*  2. Redistributions in binary form must reproduce the above copyright */
14a58c1ecbStedu /*     notice, this list of conditions and the following disclaimer in the */
15a58c1ecbStedu /*     documentation and/or other materials provided with the distribution. */
16a58c1ecbStedu 
17a58c1ecbStedu /*  Neither the name of the University nor the names of its contributors */
18a58c1ecbStedu /*  may be used to endorse or promote products derived from this software */
19a58c1ecbStedu /*  without specific prior written permission. */
20a58c1ecbStedu 
21a58c1ecbStedu /*  THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR */
22a58c1ecbStedu /*  IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED */
23a58c1ecbStedu /*  WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR */
24a58c1ecbStedu /*  PURPOSE. */
25a58c1ecbStedu 
26a58c1ecbStedu #include "flexdef.h"
27a58c1ecbStedu 
28a58c1ecbStedu 
29a58c1ecbStedu static const char* REGEXP_LINEDIR = "^#line ([[:digit:]]+) \"(.*)\"";
30a58c1ecbStedu static const char* REGEXP_BLANK_LINE = "^[[:space:]]*$";
31a58c1ecbStedu 
32a58c1ecbStedu regex_t regex_linedir; /**< matches line directives */
33a58c1ecbStedu regex_t regex_blank_line; /**< matches blank lines */
34a58c1ecbStedu 
35a58c1ecbStedu 
36a58c1ecbStedu /** Initialize the regular expressions.
37a58c1ecbStedu  * @return true upon success.
38a58c1ecbStedu  */
flex_init_regex(void)39a58c1ecbStedu bool flex_init_regex(void)
40a58c1ecbStedu {
41a58c1ecbStedu     flex_regcomp(&regex_linedir, REGEXP_LINEDIR, REG_EXTENDED);
42a58c1ecbStedu     flex_regcomp(&regex_blank_line, REGEXP_BLANK_LINE, REG_EXTENDED);
43a58c1ecbStedu 
44a58c1ecbStedu     return true;
45a58c1ecbStedu }
46a58c1ecbStedu 
47a58c1ecbStedu /** Compiles a regular expression or dies trying.
48a58c1ecbStedu  * @param preg  Same as for regcomp().
49a58c1ecbStedu  * @param regex Same as for regcomp().
50a58c1ecbStedu  * @param cflags Same as for regcomp().
51a58c1ecbStedu  */
flex_regcomp(regex_t * preg,const char * regex,int cflags)52a58c1ecbStedu void flex_regcomp(regex_t *preg, const char *regex, int cflags)
53a58c1ecbStedu {
54a58c1ecbStedu     int err;
55a58c1ecbStedu 
56a58c1ecbStedu 	memset (preg, 0, sizeof (regex_t));
57a58c1ecbStedu 
58a58c1ecbStedu 	if ((err = regcomp (preg, regex, cflags)) != 0) {
59a58c1ecbStedu         const int errbuf_sz = 200;
60a58c1ecbStedu         char *errbuf, *rxerr;
61a58c1ecbStedu 
62*4c01f208Stedu 		errbuf = (char*)malloc(errbuf_sz *sizeof(char));
63a58c1ecbStedu 		if (!errbuf)
64a58c1ecbStedu 			flexfatal(_("Unable to allocate buffer to report regcomp"));
65*4c01f208Stedu 		rxerr = (char*)malloc(errbuf_sz *sizeof(char));
66a58c1ecbStedu 		if (!rxerr)
67a58c1ecbStedu 			flexfatal(_("Unable to allocate buffer for regerror"));
68a58c1ecbStedu 		regerror (err, preg, rxerr, errbuf_sz);
69a58c1ecbStedu 		snprintf (errbuf, errbuf_sz, "regcomp for \"%s\" failed: %s", regex, rxerr);
70a58c1ecbStedu 
71a58c1ecbStedu 		flexfatal (errbuf);
72a58c1ecbStedu         free(errbuf);
73a58c1ecbStedu         free(rxerr);
74a58c1ecbStedu 	}
75a58c1ecbStedu }
76a58c1ecbStedu 
77a58c1ecbStedu /** Extract a copy of the match, or NULL if no match.
78a58c1ecbStedu  * @param m A match as returned by regexec().
79a58c1ecbStedu  * @param src The source string that was passed to regexec().
80a58c1ecbStedu  * @return The allocated string.
81a58c1ecbStedu  */
regmatch_dup(regmatch_t * m,const char * src)82a58c1ecbStedu char   *regmatch_dup (regmatch_t * m, const char *src)
83a58c1ecbStedu {
84a58c1ecbStedu 	char   *str;
85a58c1ecbStedu 	int     len;
86a58c1ecbStedu 
87a58c1ecbStedu 	if (m == NULL || m->rm_so < 0)
88a58c1ecbStedu 		return NULL;
89a58c1ecbStedu 	len = m->rm_eo - m->rm_so;
90*4c01f208Stedu 	str = (char *) malloc ((len + 1) * sizeof (char));
91a58c1ecbStedu 	if (!str)
92a58c1ecbStedu 		flexfatal(_("Unable to allocate a copy of the match"));
93a58c1ecbStedu 	strncpy (str, src + m->rm_so, len);
94a58c1ecbStedu 	str[len] = 0;
95a58c1ecbStedu 	return str;
96a58c1ecbStedu }
97a58c1ecbStedu 
98a58c1ecbStedu /** Copy the match.
99a58c1ecbStedu  * @param m A match as returned by regexec().
100a58c1ecbStedu  * @param dest The destination buffer.
101a58c1ecbStedu  * @param src The source string that was passed to regexec().
102a58c1ecbStedu  * @return dest
103a58c1ecbStedu  */
regmatch_cpy(regmatch_t * m,char * dest,const char * src)104a58c1ecbStedu char   *regmatch_cpy (regmatch_t * m, char *dest, const char *src)
105a58c1ecbStedu {
106a58c1ecbStedu 	if (m == NULL || m->rm_so < 0) {
107a58c1ecbStedu 		if (dest)
108a58c1ecbStedu 			dest[0] = '\0';
109a58c1ecbStedu 		return dest;
110a58c1ecbStedu 	}
111a58c1ecbStedu 
112a58c1ecbStedu 	snprintf (dest, regmatch_len(m), "%s", src + m->rm_so);
113a58c1ecbStedu     return dest;
114a58c1ecbStedu }
115a58c1ecbStedu 
116a58c1ecbStedu /** Get the length in characters of the match.
117a58c1ecbStedu  * @param m A match as returned by regexec().
118a58c1ecbStedu  * @param src The source string that was passed to regexec().
119a58c1ecbStedu  * @return The length of the match.
120a58c1ecbStedu  */
regmatch_len(regmatch_t * m)121a58c1ecbStedu int regmatch_len (regmatch_t * m)
122a58c1ecbStedu {
123a58c1ecbStedu 	if (m == NULL || m->rm_so < 0) {
124a58c1ecbStedu 		return 0;
125a58c1ecbStedu 	}
126a58c1ecbStedu 
127a58c1ecbStedu 	return m->rm_eo - m->rm_so;
128a58c1ecbStedu }
129a58c1ecbStedu 
130a58c1ecbStedu 
131a58c1ecbStedu 
132a58c1ecbStedu /** Convert a regmatch_t object to an integer using the strtol() function.
133a58c1ecbStedu  * @param m A match as returned by regexec().
134a58c1ecbStedu  * @param src The source string that was passed to regexec().
135a58c1ecbStedu  * @param endptr Same as the second argument to strtol().
136a58c1ecbStedu  * @param base   Same as the third argument to strtol().
137a58c1ecbStedu  * @return The converted integer or error (Return value is the same as for strtol()).
138a58c1ecbStedu  */
regmatch_strtol(regmatch_t * m,const char * src,char ** endptr,int base)139a58c1ecbStedu int regmatch_strtol (regmatch_t * m, const char *src, char **endptr,
140a58c1ecbStedu 		     int base)
141a58c1ecbStedu {
142a58c1ecbStedu 	int     n = 0;
143a58c1ecbStedu 
144a58c1ecbStedu #define bufsz 20
145a58c1ecbStedu 	char    buf[bufsz];
146a58c1ecbStedu 	char   *s;
147a58c1ecbStedu 
148a58c1ecbStedu 	if (m == NULL || m->rm_so < 0)
149a58c1ecbStedu 		return 0;
150a58c1ecbStedu 
151a58c1ecbStedu 	if (regmatch_len (m) < bufsz)
152a58c1ecbStedu 		s = regmatch_cpy (m, buf, src);
153a58c1ecbStedu 	else
154a58c1ecbStedu 		s = regmatch_dup (m, src);
155a58c1ecbStedu 
156a58c1ecbStedu 	n = strtol (s, endptr, base);
157a58c1ecbStedu 
158a58c1ecbStedu 	if (s != buf)
159a58c1ecbStedu 		free (s);
160a58c1ecbStedu 
161a58c1ecbStedu 	return n;
162a58c1ecbStedu }
163a58c1ecbStedu 
164a58c1ecbStedu /** Check for empty or non-existent match.
165a58c1ecbStedu  * @param m A match as returned by regexec().
166a58c1ecbStedu  * @return false if match length is non-zero.
167a58c1ecbStedu  * Note that reg_empty returns true even if match did not occur at all.
168a58c1ecbStedu  */
regmatch_empty(regmatch_t * m)169a58c1ecbStedu bool regmatch_empty (regmatch_t * m)
170a58c1ecbStedu {
171a58c1ecbStedu 	return (m == NULL || m->rm_so < 0 || m->rm_so == m->rm_eo);
172a58c1ecbStedu }
173