xref: /dflybsd-src/contrib/tre/lib/regcomp.c (revision d33005aaee6af52c80428b59b52aee522c002492)
15f2eab64SJohn Marino /*
25f2eab64SJohn Marino   tre_regcomp.c - TRE POSIX compatible regex compilation functions.
35f2eab64SJohn Marino 
45f2eab64SJohn Marino   This software is released under a BSD-style license.
55f2eab64SJohn Marino   See the file LICENSE for details and copyright.
65f2eab64SJohn Marino 
75f2eab64SJohn Marino */
85f2eab64SJohn Marino 
95f2eab64SJohn Marino #ifdef HAVE_CONFIG_H
105f2eab64SJohn Marino #include <config.h>
115f2eab64SJohn Marino #endif /* HAVE_CONFIG_H */
125f2eab64SJohn Marino 
135f2eab64SJohn Marino #include <string.h>
145f2eab64SJohn Marino #include <errno.h>
155f2eab64SJohn Marino #include <stdlib.h>
165f2eab64SJohn Marino 
175f2eab64SJohn Marino #include "tre.h"
185f2eab64SJohn Marino #include "tre-internal.h"
195f2eab64SJohn Marino #include "xmalloc.h"
205f2eab64SJohn Marino 
215f2eab64SJohn Marino int
tre_regncomp_l(regex_t * preg,const char * regex,size_t n,int cflags,locale_t loc)22d5f8dde1SJohn Marino tre_regncomp_l(regex_t *preg, const char *regex, size_t n, int cflags,
23d5f8dde1SJohn Marino     locale_t loc)
245f2eab64SJohn Marino {
255f2eab64SJohn Marino   int ret;
265f2eab64SJohn Marino #if TRE_WCHAR
275f2eab64SJohn Marino   tre_char_t *wregex;
285f2eab64SJohn Marino   size_t wlen;
295f2eab64SJohn Marino 
305f2eab64SJohn Marino   wregex = xmalloc(sizeof(tre_char_t) * (n + 1));
315f2eab64SJohn Marino   if (wregex == NULL)
325f2eab64SJohn Marino     return REG_ESPACE;
335f2eab64SJohn Marino 
34d5f8dde1SJohn Marino   FIX_LOCALE(loc);
35d5f8dde1SJohn Marino 
365f2eab64SJohn Marino   /* If the current locale uses the standard single byte encoding of
375f2eab64SJohn Marino      characters, we don't do a multibyte string conversion.  If we did,
385f2eab64SJohn Marino      many applications which use the default locale would break since
395f2eab64SJohn Marino      the default "C" locale uses the 7-bit ASCII character set, and
405f2eab64SJohn Marino      all characters with the eighth bit set would be considered invalid. */
415f2eab64SJohn Marino #if TRE_MULTIBYTE
42d5f8dde1SJohn Marino   if (TRE_MB_CUR_MAX_L(loc) == 1)
435f2eab64SJohn Marino #endif /* TRE_MULTIBYTE */
445f2eab64SJohn Marino     {
455f2eab64SJohn Marino       unsigned int i;
465f2eab64SJohn Marino       const unsigned char *str = (const unsigned char *)regex;
475f2eab64SJohn Marino       tre_char_t *wstr = wregex;
485f2eab64SJohn Marino 
495f2eab64SJohn Marino       for (i = 0; i < n; i++)
505f2eab64SJohn Marino 	*(wstr++) = *(str++);
515f2eab64SJohn Marino       wlen = n;
525f2eab64SJohn Marino     }
535f2eab64SJohn Marino #if TRE_MULTIBYTE
545f2eab64SJohn Marino   else
555f2eab64SJohn Marino     {
56d5f8dde1SJohn Marino       size_t consumed;
575f2eab64SJohn Marino       tre_char_t *wcptr = wregex;
585f2eab64SJohn Marino #ifdef HAVE_MBSTATE_T
595f2eab64SJohn Marino       mbstate_t state;
605f2eab64SJohn Marino       memset(&state, '\0', sizeof(state));
615f2eab64SJohn Marino #endif /* HAVE_MBSTATE_T */
625f2eab64SJohn Marino       while (n > 0)
635f2eab64SJohn Marino 	{
64d5f8dde1SJohn Marino 	  consumed = tre_mbrtowc_l(wcptr, regex, n, &state, loc);
655f2eab64SJohn Marino 
665f2eab64SJohn Marino 	  switch (consumed)
675f2eab64SJohn Marino 	    {
685f2eab64SJohn Marino 	    case 0:
695f2eab64SJohn Marino 	      if (*regex == '\0')
705f2eab64SJohn Marino 		consumed = 1;
715f2eab64SJohn Marino 	      else
725f2eab64SJohn Marino 		{
735f2eab64SJohn Marino 		  xfree(wregex);
745f2eab64SJohn Marino 		  return REG_BADPAT;
755f2eab64SJohn Marino 		}
765f2eab64SJohn Marino 	      break;
77d5f8dde1SJohn Marino 	    case (size_t)-1:
78d5f8dde1SJohn Marino 	    case (size_t)-2:
795f2eab64SJohn Marino 	      DPRINT(("mbrtowc: error %d: %s.\n", errno, strerror(errno)));
805f2eab64SJohn Marino 	      xfree(wregex);
81d5f8dde1SJohn Marino 	      return REG_ILLSEQ;
825f2eab64SJohn Marino 	    }
835f2eab64SJohn Marino 	  regex += consumed;
845f2eab64SJohn Marino 	  n -= consumed;
855f2eab64SJohn Marino 	  wcptr++;
865f2eab64SJohn Marino 	}
875f2eab64SJohn Marino       wlen = wcptr - wregex;
885f2eab64SJohn Marino     }
895f2eab64SJohn Marino #endif /* TRE_MULTIBYTE */
905f2eab64SJohn Marino 
915f2eab64SJohn Marino   wregex[wlen] = L'\0';
92d5f8dde1SJohn Marino   ret = tre_compile(preg, wregex, wlen, cflags, loc);
935f2eab64SJohn Marino   xfree(wregex);
945f2eab64SJohn Marino #else /* !TRE_WCHAR */
95d5f8dde1SJohn Marino   FIX_LOCALE(loc);
96d5f8dde1SJohn Marino   ret = tre_compile(preg, (const tre_char_t *)regex, n, cflags, loc);
975f2eab64SJohn Marino #endif /* !TRE_WCHAR */
985f2eab64SJohn Marino 
995f2eab64SJohn Marino   return ret;
1005f2eab64SJohn Marino }
1015f2eab64SJohn Marino 
1025f2eab64SJohn Marino int
tre_regncomp(regex_t * preg,const char * regex,size_t n,int cflags)103d5f8dde1SJohn Marino tre_regncomp(regex_t *preg, const char *regex, size_t n, int cflags)
1045f2eab64SJohn Marino {
105d5f8dde1SJohn Marino   return tre_regncomp_l(preg, regex, n, cflags, __get_locale());
1065f2eab64SJohn Marino }
1075f2eab64SJohn Marino 
108d5f8dde1SJohn Marino int
tre_regcomp_l(regex_t * preg,const char * regex,int cflags,locale_t loc)109d5f8dde1SJohn Marino tre_regcomp_l(regex_t *preg, const char *regex, int cflags, locale_t loc)
110d5f8dde1SJohn Marino {
111d5f8dde1SJohn Marino   size_t len;
112d5f8dde1SJohn Marino 
113d5f8dde1SJohn Marino   if (cflags & REG_PEND)
114d5f8dde1SJohn Marino     {
115d5f8dde1SJohn Marino       if ((const char *)(preg->re_endp) < regex)
116d5f8dde1SJohn Marino 	return REG_INVARG;
117d5f8dde1SJohn Marino       len = (const char *)(preg->re_endp) - regex;
118d5f8dde1SJohn Marino     }
119d5f8dde1SJohn Marino   else
120d5f8dde1SJohn Marino     len = strlen(regex);
121d5f8dde1SJohn Marino   return tre_regncomp_l(preg, regex, len, cflags, loc);
122d5f8dde1SJohn Marino }
123d5f8dde1SJohn Marino 
124d5f8dde1SJohn Marino int
tre_regcomp(regex_t * __restrict preg,const char * __restrict regex,int cflags)125*d33005aaSSascha Wildner tre_regcomp(regex_t * __restrict preg, const char * __restrict regex,
126*d33005aaSSascha Wildner     int cflags)
127d5f8dde1SJohn Marino {
128d5f8dde1SJohn Marino   return tre_regcomp_l(preg, regex, cflags, __get_locale());
129d5f8dde1SJohn Marino }
1305f2eab64SJohn Marino 
1315f2eab64SJohn Marino #ifdef TRE_WCHAR
1325f2eab64SJohn Marino int
tre_regwncomp_l(regex_t * preg,const wchar_t * regex,size_t n,int cflags,locale_t loc)133d5f8dde1SJohn Marino tre_regwncomp_l(regex_t *preg, const wchar_t *regex, size_t n, int cflags,
134d5f8dde1SJohn Marino     locale_t loc)
135d5f8dde1SJohn Marino {
136d5f8dde1SJohn Marino   FIX_LOCALE(loc);
137d5f8dde1SJohn Marino   return tre_compile(preg, regex, n, cflags, loc);
138d5f8dde1SJohn Marino }
139d5f8dde1SJohn Marino 
140d5f8dde1SJohn Marino int
tre_regwncomp(regex_t * preg,const wchar_t * regex,size_t n,int cflags)1415f2eab64SJohn Marino tre_regwncomp(regex_t *preg, const wchar_t *regex, size_t n, int cflags)
1425f2eab64SJohn Marino {
143d5f8dde1SJohn Marino   return tre_compile(preg, regex, n, cflags, __get_locale());
144d5f8dde1SJohn Marino }
145d5f8dde1SJohn Marino 
146d5f8dde1SJohn Marino int
tre_regwcomp_l(regex_t * preg,const wchar_t * regex,int cflags,locale_t loc)147d5f8dde1SJohn Marino tre_regwcomp_l(regex_t *preg, const wchar_t *regex, int cflags, locale_t loc)
148d5f8dde1SJohn Marino {
149d5f8dde1SJohn Marino   FIX_LOCALE(loc);
150d5f8dde1SJohn Marino   return tre_compile(preg, regex, wcslen(regex), cflags, loc);
1515f2eab64SJohn Marino }
1525f2eab64SJohn Marino 
1535f2eab64SJohn Marino int
tre_regwcomp(regex_t * preg,const wchar_t * regex,int cflags)1545f2eab64SJohn Marino tre_regwcomp(regex_t *preg, const wchar_t *regex, int cflags)
1555f2eab64SJohn Marino {
156d5f8dde1SJohn Marino   return tre_regwncomp(preg, regex, wcslen(regex), cflags);
1575f2eab64SJohn Marino }
1585f2eab64SJohn Marino #endif /* TRE_WCHAR */
1595f2eab64SJohn Marino 
1605f2eab64SJohn Marino void
tre_regfree(regex_t * preg)1615f2eab64SJohn Marino tre_regfree(regex_t *preg)
1625f2eab64SJohn Marino {
1635f2eab64SJohn Marino   tre_free(preg);
1645f2eab64SJohn Marino }
1655f2eab64SJohn Marino 
1665f2eab64SJohn Marino /* EOF */
167