xref: /netbsd-src/external/bsd/tre/dist/lib/regcomp.c (revision d8dd61809f2028de0516694feeda43054b6a7cb4)
1 /*
2   tre_regcomp.c - TRE POSIX compatible regex compilation functions.
3 
4   This software is released under a BSD-style license.
5   See the file LICENSE for details and copyright.
6 
7 */
8 
9 #ifdef HAVE_CONFIG_H
10 #include <config.h>
11 #endif /* HAVE_CONFIG_H */
12 
13 #include <string.h>
14 #include <errno.h>
15 #include <stdlib.h>
16 
17 #include "tre.h"
18 #include "tre-internal.h"
19 #include "xmalloc.h"
20 
21 int
tre_regncomp(regex_t * preg,const char * regex,size_t n,int cflags)22 tre_regncomp(regex_t *preg, const char *regex, size_t n, int cflags)
23 {
24   int ret;
25 #if TRE_WCHAR
26   tre_char_t *wregex;
27   size_t wlen;
28 
29   wregex = xmalloc(sizeof(tre_char_t) * (n + 1));
30   if (wregex == NULL)
31     return REG_ESPACE;
32 
33   /* If the current locale uses the standard single byte encoding of
34      characters, we don't do a multibyte string conversion.  If we did,
35      many applications which use the default locale would break since
36      the default "C" locale uses the 7-bit ASCII character set, and
37      all characters with the eighth bit set would be considered invalid. */
38 #if TRE_MULTIBYTE
39   if (TRE_MB_CUR_MAX == 1)
40 #endif /* TRE_MULTIBYTE */
41     {
42       unsigned int i;
43       const unsigned char *str = (const unsigned char *)regex;
44       tre_char_t *wstr = wregex;
45 
46       for (i = 0; i < n; i++)
47 	*(wstr++) = *(str++);
48       wlen = n;
49     }
50 #if TRE_MULTIBYTE
51   else
52     {
53       size_t consumed;
54       tre_char_t *wcptr = wregex;
55 #ifdef HAVE_MBSTATE_T
56       mbstate_t state;
57       memset(&state, '\0', sizeof(state));
58 #endif /* HAVE_MBSTATE_T */
59       while (n > 0)
60 	{
61 	  consumed = tre_mbrtowc(wcptr, regex, n, &state);
62 
63 	  switch (consumed)
64 	    {
65 	    case 0:
66 	      if (*regex == '\0')
67 		consumed = 1;
68 	      else
69 		{
70 		  xfree(wregex);
71 		  return REG_BADPAT;
72 		}
73 	      break;
74 	    case (size_t)-1:
75 	      DPRINT(("mbrtowc: error %d: %s.\n", errno, strerror(errno)));
76 	      xfree(wregex);
77 	      return REG_BADPAT;
78 	    case (size_t)-2:
79 	      /* The last character wasn't complete.  Let's not call it a
80 		 fatal error. */
81 	      consumed = n;
82 	      break;
83 	    }
84 	  regex += consumed;
85 	  n -= consumed;
86 	  wcptr++;
87 	}
88       wlen = wcptr - wregex;
89     }
90 #endif /* TRE_MULTIBYTE */
91 
92   wregex[wlen] = L'\0';
93   ret = tre_compile(preg, wregex, wlen, cflags);
94   xfree(wregex);
95 #else /* !TRE_WCHAR */
96   ret = tre_compile(preg, (const tre_char_t *)regex, n, cflags);
97 #endif /* !TRE_WCHAR */
98 
99   return ret;
100 }
101 
102 #ifdef REG_USEBYTES
103 /* this version takes bytes literally, to be used with raw vectors */
104 int
tre_regncompb(regex_t * preg,const char * regex,size_t n,int cflags)105 tre_regncompb(regex_t *preg, const char *regex, size_t n, int cflags)
106 {
107   int ret;
108 #if TRE_WCHAR /* wide chars = we need to convert it all to the wide format */
109   tre_char_t *wregex;
110   size_t i;
111 
112   wregex = xmalloc(sizeof(tre_char_t) * n);
113   if (wregex == NULL)
114     return REG_ESPACE;
115 
116   for (i = 0; i < n; i++)
117     wregex[i] = (tre_char_t) ((unsigned char) regex[i]);
118 
119   ret = tre_compile(preg, wregex, n, cflags | REG_USEBYTES);
120   xfree(wregex);
121 #else /* !TRE_WCHAR */
122   ret = tre_compile(preg, (const tre_char_t *)regex, n, cflags | REG_USEBYTES);
123 #endif /* !TRE_WCHAR */
124 
125   return ret;
126 }
127 #endif /* REG_USEBYTES */
128 
129 int
tre_regcomp(regex_t * preg,const char * regex,int cflags)130 tre_regcomp(regex_t *preg, const char *regex, int cflags)
131 {
132   return tre_regncomp(preg, regex, regex ? strlen(regex) : 0, cflags);
133 }
134 
135 #ifdef REG_USEBYTES
136 int
tre_regcompb(regex_t * preg,const char * regex,int cflags)137 tre_regcompb(regex_t *preg, const char *regex, int cflags)
138 {
139   int ret;
140   tre_char_t *wregex;
141   size_t wlen, n = strlen(regex);
142   unsigned int i;
143   const unsigned char *str = (const unsigned char *)regex;
144   tre_char_t *wstr;
145 
146   wregex = xmalloc(sizeof(tre_char_t) * (n + 1));
147   if (wregex == NULL) return REG_ESPACE;
148   wstr = wregex;
149 
150   for (i = 0; i < n; i++) *(wstr++) = *(str++);
151   wlen = n;
152   wregex[wlen] = L'\0';
153   ret = tre_compile(preg, wregex, wlen, cflags | REG_USEBYTES);
154   xfree(wregex);
155   return ret;
156 }
157 #endif /* REG_USEBYTES */
158 
159 
160 #ifdef TRE_WCHAR
161 int
tre_regwncomp(regex_t * preg,const wchar_t * regex,size_t n,int cflags)162 tre_regwncomp(regex_t *preg, const wchar_t *regex, size_t n, int cflags)
163 {
164   return tre_compile(preg, regex, n, cflags);
165 }
166 
167 int
tre_regwcomp(regex_t * preg,const wchar_t * regex,int cflags)168 tre_regwcomp(regex_t *preg, const wchar_t *regex, int cflags)
169 {
170   return tre_compile(preg, regex, regex ? wcslen(regex) : 0, cflags);
171 }
172 #endif /* TRE_WCHAR */
173 
174 void
tre_regfree(regex_t * preg)175 tre_regfree(regex_t *preg)
176 {
177   tre_free(preg);
178 }
179 
180 /* EOF */
181