1 /*
2 tre_regcomp.c - TRE POSIX compatible regex compilation functions.
3
4 This software is released under a BSD-style license.
5 See the file LICENSE for details and copyright.
6
7 */
8
9 #ifdef HAVE_CONFIG_H
10 #include <config.h>
11 #endif /* HAVE_CONFIG_H */
12
13 #include <string.h>
14 #include <errno.h>
15 #include <stdlib.h>
16
17 #include "tre.h"
18 #include "tre-internal.h"
19 #include "xmalloc.h"
20
21 int
tre_regncomp(regex_t * preg,const char * regex,size_t n,int cflags)22 tre_regncomp(regex_t *preg, const char *regex, size_t n, int cflags)
23 {
24 int ret;
25 #if TRE_WCHAR
26 tre_char_t *wregex;
27 size_t wlen;
28
29 wregex = xmalloc(sizeof(tre_char_t) * (n + 1));
30 if (wregex == NULL)
31 return REG_ESPACE;
32
33 /* If the current locale uses the standard single byte encoding of
34 characters, we don't do a multibyte string conversion. If we did,
35 many applications which use the default locale would break since
36 the default "C" locale uses the 7-bit ASCII character set, and
37 all characters with the eighth bit set would be considered invalid. */
38 #if TRE_MULTIBYTE
39 if (TRE_MB_CUR_MAX == 1)
40 #endif /* TRE_MULTIBYTE */
41 {
42 unsigned int i;
43 const unsigned char *str = (const unsigned char *)regex;
44 tre_char_t *wstr = wregex;
45
46 for (i = 0; i < n; i++)
47 *(wstr++) = *(str++);
48 wlen = n;
49 }
50 #if TRE_MULTIBYTE
51 else
52 {
53 size_t consumed;
54 tre_char_t *wcptr = wregex;
55 #ifdef HAVE_MBSTATE_T
56 mbstate_t state;
57 memset(&state, '\0', sizeof(state));
58 #endif /* HAVE_MBSTATE_T */
59 while (n > 0)
60 {
61 consumed = tre_mbrtowc(wcptr, regex, n, &state);
62
63 switch (consumed)
64 {
65 case 0:
66 if (*regex == '\0')
67 consumed = 1;
68 else
69 {
70 xfree(wregex);
71 return REG_BADPAT;
72 }
73 break;
74 case (size_t)-1:
75 DPRINT(("mbrtowc: error %d: %s.\n", errno, strerror(errno)));
76 xfree(wregex);
77 return REG_BADPAT;
78 case (size_t)-2:
79 /* The last character wasn't complete. Let's not call it a
80 fatal error. */
81 consumed = n;
82 break;
83 }
84 regex += consumed;
85 n -= consumed;
86 wcptr++;
87 }
88 wlen = wcptr - wregex;
89 }
90 #endif /* TRE_MULTIBYTE */
91
92 wregex[wlen] = L'\0';
93 ret = tre_compile(preg, wregex, wlen, cflags);
94 xfree(wregex);
95 #else /* !TRE_WCHAR */
96 ret = tre_compile(preg, (const tre_char_t *)regex, n, cflags);
97 #endif /* !TRE_WCHAR */
98
99 return ret;
100 }
101
102 #ifdef REG_USEBYTES
103 /* this version takes bytes literally, to be used with raw vectors */
104 int
tre_regncompb(regex_t * preg,const char * regex,size_t n,int cflags)105 tre_regncompb(regex_t *preg, const char *regex, size_t n, int cflags)
106 {
107 int ret;
108 #if TRE_WCHAR /* wide chars = we need to convert it all to the wide format */
109 tre_char_t *wregex;
110 size_t i;
111
112 wregex = xmalloc(sizeof(tre_char_t) * n);
113 if (wregex == NULL)
114 return REG_ESPACE;
115
116 for (i = 0; i < n; i++)
117 wregex[i] = (tre_char_t) ((unsigned char) regex[i]);
118
119 ret = tre_compile(preg, wregex, n, cflags | REG_USEBYTES);
120 xfree(wregex);
121 #else /* !TRE_WCHAR */
122 ret = tre_compile(preg, (const tre_char_t *)regex, n, cflags | REG_USEBYTES);
123 #endif /* !TRE_WCHAR */
124
125 return ret;
126 }
127 #endif /* REG_USEBYTES */
128
129 int
tre_regcomp(regex_t * preg,const char * regex,int cflags)130 tre_regcomp(regex_t *preg, const char *regex, int cflags)
131 {
132 return tre_regncomp(preg, regex, regex ? strlen(regex) : 0, cflags);
133 }
134
135 #ifdef REG_USEBYTES
136 int
tre_regcompb(regex_t * preg,const char * regex,int cflags)137 tre_regcompb(regex_t *preg, const char *regex, int cflags)
138 {
139 int ret;
140 tre_char_t *wregex;
141 size_t wlen, n = strlen(regex);
142 unsigned int i;
143 const unsigned char *str = (const unsigned char *)regex;
144 tre_char_t *wstr;
145
146 wregex = xmalloc(sizeof(tre_char_t) * (n + 1));
147 if (wregex == NULL) return REG_ESPACE;
148 wstr = wregex;
149
150 for (i = 0; i < n; i++) *(wstr++) = *(str++);
151 wlen = n;
152 wregex[wlen] = L'\0';
153 ret = tre_compile(preg, wregex, wlen, cflags | REG_USEBYTES);
154 xfree(wregex);
155 return ret;
156 }
157 #endif /* REG_USEBYTES */
158
159
160 #ifdef TRE_WCHAR
161 int
tre_regwncomp(regex_t * preg,const wchar_t * regex,size_t n,int cflags)162 tre_regwncomp(regex_t *preg, const wchar_t *regex, size_t n, int cflags)
163 {
164 return tre_compile(preg, regex, n, cflags);
165 }
166
167 int
tre_regwcomp(regex_t * preg,const wchar_t * regex,int cflags)168 tre_regwcomp(regex_t *preg, const wchar_t *regex, int cflags)
169 {
170 return tre_compile(preg, regex, regex ? wcslen(regex) : 0, cflags);
171 }
172 #endif /* TRE_WCHAR */
173
174 void
tre_regfree(regex_t * preg)175 tre_regfree(regex_t *preg)
176 {
177 tre_free(preg);
178 }
179
180 /* EOF */
181