xref: /netbsd-src/external/bsd/tre/dist/lib/tre-match-utils.h (revision 27d137ae33c432de2caf57d165d3327bf822b152)
163d4abf0Sagc /*
263d4abf0Sagc   tre-match-utils.h - TRE matcher helper definitions
363d4abf0Sagc 
463d4abf0Sagc   This software is released under a BSD-style license.
563d4abf0Sagc   See the file LICENSE for details and copyright.
663d4abf0Sagc 
763d4abf0Sagc */
863d4abf0Sagc 
963d4abf0Sagc #define str_source ((const tre_str_source*)string)
1063d4abf0Sagc 
1163d4abf0Sagc #ifdef TRE_WCHAR
1263d4abf0Sagc 
1363d4abf0Sagc #ifdef TRE_MULTIBYTE
1463d4abf0Sagc 
1563d4abf0Sagc /* Wide character and multibyte support. */
1663d4abf0Sagc 
1763d4abf0Sagc #define GET_NEXT_WCHAR()						      \
1863d4abf0Sagc   do {									      \
1963d4abf0Sagc     prev_c = next_c;							      \
20f2a3d147Schristos     switch (type) {							      \
21f2a3d147Schristos       case STR_BYTE:						      	      \
2263d4abf0Sagc 	pos++;								      \
2363d4abf0Sagc 	if (len >= 0 && pos >= len)					      \
2463d4abf0Sagc 	  next_c = '\0';						      \
2563d4abf0Sagc 	else								      \
2663d4abf0Sagc 	  next_c = (unsigned char)(*str_byte++);			      \
27f2a3d147Schristos 	break;							              \
28f2a3d147Schristos       case STR_WIDE:						      	      \
2963d4abf0Sagc 	pos++;								      \
3063d4abf0Sagc 	if (len >= 0 && pos >= len)					      \
3163d4abf0Sagc 	  next_c = L'\0';						      \
3263d4abf0Sagc 	else								      \
3363d4abf0Sagc 	  next_c = *str_wide++;						      \
34f2a3d147Schristos         break;								      \
35f2a3d147Schristos       case STR_MBS:						      	      \
3663d4abf0Sagc         pos += pos_add_next;					      	      \
3763d4abf0Sagc 	if (str_byte == NULL)						      \
3863d4abf0Sagc 	  next_c = L'\0';						      \
3963d4abf0Sagc 	else								      \
4063d4abf0Sagc 	  {								      \
4163d4abf0Sagc 	    size_t w;							      \
42f2a3d147Schristos 	    long max;							      \
4363d4abf0Sagc 	    if (len >= 0)						      \
4463d4abf0Sagc 	      max = len - pos;						      \
4563d4abf0Sagc 	    else							      \
4663d4abf0Sagc 	      max = 32;							      \
4763d4abf0Sagc 	    if (max <= 0)						      \
4863d4abf0Sagc 	      {								      \
4963d4abf0Sagc 		next_c = L'\0';						      \
5063d4abf0Sagc 		pos_add_next = 1;					      \
5163d4abf0Sagc 	      }								      \
5263d4abf0Sagc 	    else							      \
5363d4abf0Sagc 	      {								      \
5463d4abf0Sagc 		w = tre_mbrtowc(&next_c, str_byte, (size_t)max, &mbstate);    \
55*27d137aeSrin 		if (w == (size_t)-1 || w == (size_t)-2) {		      \
56*27d137aeSrin 		  ret = REG_NOMATCH;					      \
57*27d137aeSrin 		  goto error_exit;					      \
58*27d137aeSrin 		}							      \
5963d4abf0Sagc 		if (w == 0 && len >= 0)					      \
6063d4abf0Sagc 		  {							      \
6163d4abf0Sagc 		    pos_add_next = 1;					      \
6263d4abf0Sagc 		    next_c = 0;						      \
6363d4abf0Sagc 		    str_byte++;						      \
6463d4abf0Sagc 		  }							      \
6563d4abf0Sagc 		else							      \
6663d4abf0Sagc 		  {							      \
67f2a3d147Schristos 		    pos_add_next = (unsigned int)w;			      \
6863d4abf0Sagc 		    str_byte += w;					      \
6963d4abf0Sagc 		  }							      \
7063d4abf0Sagc 	      }								      \
7163d4abf0Sagc 	  } 								      \
72f2a3d147Schristos         break;								      \
73f2a3d147Schristos       case STR_USER:						      	      \
7463d4abf0Sagc         pos += pos_add_next;					      	      \
7563d4abf0Sagc 	str_user_end = str_source->get_next_char(&next_c, &pos_add_next,      \
7663d4abf0Sagc                                                  str_source->context);	      \
7763d4abf0Sagc       }									      \
7813498f30Srin   } while(/*CONSTCOND*/(void)0,0)
7963d4abf0Sagc 
8063d4abf0Sagc #else /* !TRE_MULTIBYTE */
8163d4abf0Sagc 
8263d4abf0Sagc /* Wide character support, no multibyte support. */
8363d4abf0Sagc 
8463d4abf0Sagc #define GET_NEXT_WCHAR()						      \
8563d4abf0Sagc   do {									      \
8663d4abf0Sagc     prev_c = next_c;							      \
87f2a3d147Schristos     switch (type) {							      \
88f2a3d147Schristos       case STR_BYTE:							      \
8963d4abf0Sagc 	pos++;								      \
9063d4abf0Sagc 	if (len >= 0 && pos >= len)					      \
9163d4abf0Sagc 	  next_c = '\0';						      \
9263d4abf0Sagc 	else								      \
9363d4abf0Sagc 	  next_c = (unsigned char)(*str_byte++);			      \
94f2a3d147Schristos         break;								      \
95f2a3d147Schristos       case STR_WIDE:							      \
9663d4abf0Sagc 	pos++;								      \
9763d4abf0Sagc 	if (len >= 0 && pos >= len)					      \
9863d4abf0Sagc 	  next_c = L'\0';						      \
9963d4abf0Sagc 	else								      \
10063d4abf0Sagc 	  next_c = *str_wide++;						      \
101f2a3d147Schristos         break;								      \
102f2a3d147Schristos       case STR_USER:							      \
10363d4abf0Sagc         pos += pos_add_next;					      	      \
10463d4abf0Sagc 	str_user_end = str_source->get_next_char(&next_c, &pos_add_next,      \
10563d4abf0Sagc                                                  str_source->context);	      \
10663d4abf0Sagc       }									      \
10713498f30Srin   } while(/*CONSTCOND*/(void)0,0)
10863d4abf0Sagc 
10963d4abf0Sagc #endif /* !TRE_MULTIBYTE */
11063d4abf0Sagc 
11163d4abf0Sagc #else /* !TRE_WCHAR */
11263d4abf0Sagc 
11363d4abf0Sagc /* No wide character or multibyte support. */
11463d4abf0Sagc 
11563d4abf0Sagc #define GET_NEXT_WCHAR()						      \
11663d4abf0Sagc   do {									      \
11763d4abf0Sagc     prev_c = next_c;							      \
118f2a3d147Schristos     switch (type) {							      \
119f2a3d147Schristos       case STR_BYTE:							      \
12063d4abf0Sagc 	pos++;								      \
12163d4abf0Sagc 	if (len >= 0 && pos >= len)					      \
12263d4abf0Sagc 	  next_c = '\0';						      \
12363d4abf0Sagc 	else								      \
12463d4abf0Sagc 	  next_c = (unsigned char)(*str_byte++);			      \
125f2a3d147Schristos         break;								      \
126f2a3d147Schristos       case STR_USER:						      	      \
12763d4abf0Sagc 	pos += pos_add_next;						      \
12863d4abf0Sagc 	str_user_end = str_source->get_next_char(&next_c, &pos_add_next,      \
12963d4abf0Sagc 						 str_source->context);	      \
13063d4abf0Sagc       }									      \
13113498f30Srin   } while(/*CONSTCOND*/(void)0,0)
13263d4abf0Sagc 
13363d4abf0Sagc #endif /* !TRE_WCHAR */
13463d4abf0Sagc 
13563d4abf0Sagc 
13663d4abf0Sagc 
13763d4abf0Sagc #define IS_WORD_CHAR(c)	 ((c) == L'_' || tre_isalnum(c))
13863d4abf0Sagc 
13963d4abf0Sagc #define CHECK_ASSERTIONS(assertions)					      \
14063d4abf0Sagc   (((assertions & ASSERT_AT_BOL)					      \
14163d4abf0Sagc     && (pos > 0 || reg_notbol)						      \
14263d4abf0Sagc     && (prev_c != L'\n' || !reg_newline))				      \
14363d4abf0Sagc    || ((assertions & ASSERT_AT_EOL)					      \
14463d4abf0Sagc        && (next_c != L'\0' || reg_noteol)				      \
14563d4abf0Sagc        && (next_c != L'\n' || !reg_newline))				      \
14663d4abf0Sagc    || ((assertions & ASSERT_AT_BOW)					      \
14763d4abf0Sagc        && (IS_WORD_CHAR(prev_c) || !IS_WORD_CHAR(next_c)))	              \
14863d4abf0Sagc    || ((assertions & ASSERT_AT_EOW)					      \
14963d4abf0Sagc        && (!IS_WORD_CHAR(prev_c) || IS_WORD_CHAR(next_c)))		      \
15063d4abf0Sagc    || ((assertions & ASSERT_AT_WB)					      \
15163d4abf0Sagc        && (pos != 0 && next_c != L'\0'					      \
15263d4abf0Sagc 	   && IS_WORD_CHAR(prev_c) == IS_WORD_CHAR(next_c)))		      \
15363d4abf0Sagc    || ((assertions & ASSERT_AT_WB_NEG)					      \
15463d4abf0Sagc        && (pos == 0 || next_c == L'\0'					      \
15563d4abf0Sagc 	   || IS_WORD_CHAR(prev_c) != IS_WORD_CHAR(next_c))))
15663d4abf0Sagc 
15763d4abf0Sagc #define CHECK_CHAR_CLASSES(trans_i, tnfa, eflags)                             \
15863d4abf0Sagc   (((trans_i->assertions & ASSERT_CHAR_CLASS)                                 \
15963d4abf0Sagc        && !(tnfa->cflags & REG_ICASE)                                         \
16063d4abf0Sagc        && !tre_isctype((tre_cint_t)prev_c, trans_i->u.class))                 \
16163d4abf0Sagc     || ((trans_i->assertions & ASSERT_CHAR_CLASS)                             \
16263d4abf0Sagc         && (tnfa->cflags & REG_ICASE)                                         \
16363d4abf0Sagc         && !tre_isctype(tre_tolower((tre_cint_t)prev_c),trans_i->u.class)     \
16463d4abf0Sagc 	&& !tre_isctype(tre_toupper((tre_cint_t)prev_c),trans_i->u.class))    \
16563d4abf0Sagc     || ((trans_i->assertions & ASSERT_CHAR_CLASS_NEG)                         \
16663d4abf0Sagc         && tre_neg_char_classes_match(trans_i->neg_classes,(tre_cint_t)prev_c,\
16763d4abf0Sagc                                       tnfa->cflags & REG_ICASE)))
16863d4abf0Sagc 
16963d4abf0Sagc 
17063d4abf0Sagc 
17163d4abf0Sagc 
17263d4abf0Sagc /* Returns 1 if `t1' wins `t2', 0 otherwise. */
17363d4abf0Sagc inline static int
tre_tag_order(size_t num_tags,tre_tag_direction_t * tag_directions,int * t1,int * t2)174f2a3d147Schristos tre_tag_order(size_t num_tags, tre_tag_direction_t *tag_directions,
17563d4abf0Sagc 	      int *t1, int *t2)
17663d4abf0Sagc {
177f2a3d147Schristos   size_t i;
17863d4abf0Sagc   for (i = 0; i < num_tags; i++)
17963d4abf0Sagc     {
18063d4abf0Sagc       if (tag_directions[i] == TRE_TAG_MINIMIZE)
18163d4abf0Sagc 	{
18263d4abf0Sagc 	  if (t1[i] < t2[i])
18363d4abf0Sagc 	    return 1;
18463d4abf0Sagc 	  if (t1[i] > t2[i])
18563d4abf0Sagc 	    return 0;
18663d4abf0Sagc 	}
18763d4abf0Sagc       else
18863d4abf0Sagc 	{
18963d4abf0Sagc 	  if (t1[i] > t2[i])
19063d4abf0Sagc 	    return 1;
19163d4abf0Sagc 	  if (t1[i] < t2[i])
19263d4abf0Sagc 	    return 0;
19363d4abf0Sagc 	}
19463d4abf0Sagc     }
19563d4abf0Sagc   /*  assert(0);*/
19663d4abf0Sagc   return 0;
19763d4abf0Sagc }
19863d4abf0Sagc 
19963d4abf0Sagc inline static int
tre_neg_char_classes_match(tre_ctype_t * classes,tre_cint_t wc,int icase)20063d4abf0Sagc tre_neg_char_classes_match(tre_ctype_t *classes, tre_cint_t wc, int icase)
20163d4abf0Sagc {
20263d4abf0Sagc   DPRINT(("neg_char_classes_test: %p, %d, %d\n", classes, wc, icase));
20363d4abf0Sagc   while (*classes != (tre_ctype_t)0)
20463d4abf0Sagc     if ((!icase && tre_isctype(wc, *classes))
20563d4abf0Sagc 	|| (icase && (tre_isctype(tre_toupper(wc), *classes)
20663d4abf0Sagc 		      || tre_isctype(tre_tolower(wc), *classes))))
20763d4abf0Sagc       return 1; /* Match. */
20863d4abf0Sagc     else
20963d4abf0Sagc       classes++;
21063d4abf0Sagc   return 0; /* No match. */
21163d4abf0Sagc }
212