1 /* 2 tre_regcomp.c - TRE POSIX compatible regex compilation functions. 3 4 This software is released under a BSD-style license. 5 See the file LICENSE for details and copyright. 6 7 */ 8 9 #ifdef HAVE_CONFIG_H 10 #include <config.h> 11 #endif /* HAVE_CONFIG_H */ 12 13 #include <string.h> 14 #include <errno.h> 15 #include <stdlib.h> 16 17 #include "tre.h" 18 #include "tre-internal.h" 19 #include "xmalloc.h" 20 21 int 22 tre_regncomp(regex_t *preg, const char *regex, size_t n, int cflags) 23 { 24 int ret; 25 #if TRE_WCHAR 26 tre_char_t *wregex; 27 size_t wlen; 28 29 wregex = xmalloc(sizeof(tre_char_t) * (n + 1)); 30 if (wregex == NULL) 31 return REG_ESPACE; 32 33 /* If the current locale uses the standard single byte encoding of 34 characters, we don't do a multibyte string conversion. If we did, 35 many applications which use the default locale would break since 36 the default "C" locale uses the 7-bit ASCII character set, and 37 all characters with the eighth bit set would be considered invalid. */ 38 #if TRE_MULTIBYTE 39 if (TRE_MB_CUR_MAX == 1) 40 #endif /* TRE_MULTIBYTE */ 41 { 42 unsigned int i; 43 const unsigned char *str = (const unsigned char *)regex; 44 tre_char_t *wstr = wregex; 45 46 for (i = 0; i < n; i++) 47 *(wstr++) = *(str++); 48 wlen = n; 49 } 50 #if TRE_MULTIBYTE 51 else 52 { 53 size_t consumed; 54 tre_char_t *wcptr = wregex; 55 #ifdef HAVE_MBSTATE_T 56 mbstate_t state; 57 memset(&state, '\0', sizeof(state)); 58 #endif /* HAVE_MBSTATE_T */ 59 while (n > 0) 60 { 61 consumed = tre_mbrtowc(wcptr, regex, n, &state); 62 63 switch (consumed) 64 { 65 case 0: 66 if (*regex == '\0') 67 consumed = 1; 68 else 69 { 70 xfree(wregex); 71 return REG_BADPAT; 72 } 73 break; 74 case (size_t)-1: 75 DPRINT(("mbrtowc: error %d: %s.\n", errno, strerror(errno))); 76 xfree(wregex); 77 return REG_BADPAT; 78 case (size_t)-2: 79 /* The last character wasn't complete. Let's not call it a 80 fatal error. */ 81 consumed = n; 82 break; 83 } 84 regex += consumed; 85 n -= consumed; 86 wcptr++; 87 } 88 wlen = wcptr - wregex; 89 } 90 #endif /* TRE_MULTIBYTE */ 91 92 wregex[wlen] = L'\0'; 93 ret = tre_compile(preg, wregex, wlen, cflags); 94 xfree(wregex); 95 #else /* !TRE_WCHAR */ 96 ret = tre_compile(preg, (const tre_char_t *)regex, n, cflags); 97 #endif /* !TRE_WCHAR */ 98 99 return ret; 100 } 101 102 #ifdef REG_USEBYTES 103 /* this version takes bytes literally, to be used with raw vectors */ 104 int 105 tre_regncompb(regex_t *preg, const char *regex, size_t n, int cflags) 106 { 107 int ret; 108 #if TRE_WCHAR /* wide chars = we need to convert it all to the wide format */ 109 tre_char_t *wregex; 110 size_t i; 111 112 wregex = xmalloc(sizeof(tre_char_t) * n); 113 if (wregex == NULL) 114 return REG_ESPACE; 115 116 for (i = 0; i < n; i++) 117 wregex[i] = (tre_char_t) ((unsigned char) regex[i]); 118 119 ret = tre_compile(preg, wregex, n, cflags | REG_USEBYTES); 120 xfree(wregex); 121 #else /* !TRE_WCHAR */ 122 ret = tre_compile(preg, (const tre_char_t *)regex, n, cflags | REG_USEBYTES); 123 #endif /* !TRE_WCHAR */ 124 125 return ret; 126 } 127 #endif /* REG_USEBYTES */ 128 129 int 130 tre_regcomp(regex_t *preg, const char *regex, int cflags) 131 { 132 return tre_regncomp(preg, regex, regex ? strlen(regex) : 0, cflags); 133 } 134 135 #ifdef REG_USEBYTES 136 int 137 tre_regcompb(regex_t *preg, const char *regex, int cflags) 138 { 139 int ret; 140 tre_char_t *wregex; 141 size_t wlen, n = strlen(regex); 142 unsigned int i; 143 const unsigned char *str = (const unsigned char *)regex; 144 tre_char_t *wstr; 145 146 wregex = xmalloc(sizeof(tre_char_t) * (n + 1)); 147 if (wregex == NULL) return REG_ESPACE; 148 wstr = wregex; 149 150 for (i = 0; i < n; i++) *(wstr++) = *(str++); 151 wlen = n; 152 wregex[wlen] = L'\0'; 153 ret = tre_compile(preg, wregex, wlen, cflags | REG_USEBYTES); 154 xfree(wregex); 155 return ret; 156 } 157 #endif /* REG_USEBYTES */ 158 159 160 #ifdef TRE_WCHAR 161 int 162 tre_regwncomp(regex_t *preg, const wchar_t *regex, size_t n, int cflags) 163 { 164 return tre_compile(preg, regex, n, cflags); 165 } 166 167 int 168 tre_regwcomp(regex_t *preg, const wchar_t *regex, int cflags) 169 { 170 return tre_compile(preg, regex, regex ? wcslen(regex) : 0, cflags); 171 } 172 #endif /* TRE_WCHAR */ 173 174 void 175 tre_regfree(regex_t *preg) 176 { 177 tre_free(preg); 178 } 179 180 /* EOF */ 181