xref: /netbsd-src/external/bsd/tre/dist/lib/tre-match-utils.h (revision 27d137ae33c432de2caf57d165d3327bf822b152)
1 /*
2   tre-match-utils.h - TRE matcher helper definitions
3 
4   This software is released under a BSD-style license.
5   See the file LICENSE for details and copyright.
6 
7 */
8 
9 #define str_source ((const tre_str_source*)string)
10 
11 #ifdef TRE_WCHAR
12 
13 #ifdef TRE_MULTIBYTE
14 
15 /* Wide character and multibyte support. */
16 
17 #define GET_NEXT_WCHAR()						      \
18   do {									      \
19     prev_c = next_c;							      \
20     switch (type) {							      \
21       case STR_BYTE:						      	      \
22 	pos++;								      \
23 	if (len >= 0 && pos >= len)					      \
24 	  next_c = '\0';						      \
25 	else								      \
26 	  next_c = (unsigned char)(*str_byte++);			      \
27 	break;							              \
28       case STR_WIDE:						      	      \
29 	pos++;								      \
30 	if (len >= 0 && pos >= len)					      \
31 	  next_c = L'\0';						      \
32 	else								      \
33 	  next_c = *str_wide++;						      \
34         break;								      \
35       case STR_MBS:						      	      \
36         pos += pos_add_next;					      	      \
37 	if (str_byte == NULL)						      \
38 	  next_c = L'\0';						      \
39 	else								      \
40 	  {								      \
41 	    size_t w;							      \
42 	    long max;							      \
43 	    if (len >= 0)						      \
44 	      max = len - pos;						      \
45 	    else							      \
46 	      max = 32;							      \
47 	    if (max <= 0)						      \
48 	      {								      \
49 		next_c = L'\0';						      \
50 		pos_add_next = 1;					      \
51 	      }								      \
52 	    else							      \
53 	      {								      \
54 		w = tre_mbrtowc(&next_c, str_byte, (size_t)max, &mbstate);    \
55 		if (w == (size_t)-1 || w == (size_t)-2) {		      \
56 		  ret = REG_NOMATCH;					      \
57 		  goto error_exit;					      \
58 		}							      \
59 		if (w == 0 && len >= 0)					      \
60 		  {							      \
61 		    pos_add_next = 1;					      \
62 		    next_c = 0;						      \
63 		    str_byte++;						      \
64 		  }							      \
65 		else							      \
66 		  {							      \
67 		    pos_add_next = (unsigned int)w;			      \
68 		    str_byte += w;					      \
69 		  }							      \
70 	      }								      \
71 	  } 								      \
72         break;								      \
73       case STR_USER:						      	      \
74         pos += pos_add_next;					      	      \
75 	str_user_end = str_source->get_next_char(&next_c, &pos_add_next,      \
76                                                  str_source->context);	      \
77       }									      \
78   } while(/*CONSTCOND*/(void)0,0)
79 
80 #else /* !TRE_MULTIBYTE */
81 
82 /* Wide character support, no multibyte support. */
83 
84 #define GET_NEXT_WCHAR()						      \
85   do {									      \
86     prev_c = next_c;							      \
87     switch (type) {							      \
88       case STR_BYTE:							      \
89 	pos++;								      \
90 	if (len >= 0 && pos >= len)					      \
91 	  next_c = '\0';						      \
92 	else								      \
93 	  next_c = (unsigned char)(*str_byte++);			      \
94         break;								      \
95       case STR_WIDE:							      \
96 	pos++;								      \
97 	if (len >= 0 && pos >= len)					      \
98 	  next_c = L'\0';						      \
99 	else								      \
100 	  next_c = *str_wide++;						      \
101         break;								      \
102       case STR_USER:							      \
103         pos += pos_add_next;					      	      \
104 	str_user_end = str_source->get_next_char(&next_c, &pos_add_next,      \
105                                                  str_source->context);	      \
106       }									      \
107   } while(/*CONSTCOND*/(void)0,0)
108 
109 #endif /* !TRE_MULTIBYTE */
110 
111 #else /* !TRE_WCHAR */
112 
113 /* No wide character or multibyte support. */
114 
115 #define GET_NEXT_WCHAR()						      \
116   do {									      \
117     prev_c = next_c;							      \
118     switch (type) {							      \
119       case STR_BYTE:							      \
120 	pos++;								      \
121 	if (len >= 0 && pos >= len)					      \
122 	  next_c = '\0';						      \
123 	else								      \
124 	  next_c = (unsigned char)(*str_byte++);			      \
125         break;								      \
126       case STR_USER:						      	      \
127 	pos += pos_add_next;						      \
128 	str_user_end = str_source->get_next_char(&next_c, &pos_add_next,      \
129 						 str_source->context);	      \
130       }									      \
131   } while(/*CONSTCOND*/(void)0,0)
132 
133 #endif /* !TRE_WCHAR */
134 
135 
136 
137 #define IS_WORD_CHAR(c)	 ((c) == L'_' || tre_isalnum(c))
138 
139 #define CHECK_ASSERTIONS(assertions)					      \
140   (((assertions & ASSERT_AT_BOL)					      \
141     && (pos > 0 || reg_notbol)						      \
142     && (prev_c != L'\n' || !reg_newline))				      \
143    || ((assertions & ASSERT_AT_EOL)					      \
144        && (next_c != L'\0' || reg_noteol)				      \
145        && (next_c != L'\n' || !reg_newline))				      \
146    || ((assertions & ASSERT_AT_BOW)					      \
147        && (IS_WORD_CHAR(prev_c) || !IS_WORD_CHAR(next_c)))	              \
148    || ((assertions & ASSERT_AT_EOW)					      \
149        && (!IS_WORD_CHAR(prev_c) || IS_WORD_CHAR(next_c)))		      \
150    || ((assertions & ASSERT_AT_WB)					      \
151        && (pos != 0 && next_c != L'\0'					      \
152 	   && IS_WORD_CHAR(prev_c) == IS_WORD_CHAR(next_c)))		      \
153    || ((assertions & ASSERT_AT_WB_NEG)					      \
154        && (pos == 0 || next_c == L'\0'					      \
155 	   || IS_WORD_CHAR(prev_c) != IS_WORD_CHAR(next_c))))
156 
157 #define CHECK_CHAR_CLASSES(trans_i, tnfa, eflags)                             \
158   (((trans_i->assertions & ASSERT_CHAR_CLASS)                                 \
159        && !(tnfa->cflags & REG_ICASE)                                         \
160        && !tre_isctype((tre_cint_t)prev_c, trans_i->u.class))                 \
161     || ((trans_i->assertions & ASSERT_CHAR_CLASS)                             \
162         && (tnfa->cflags & REG_ICASE)                                         \
163         && !tre_isctype(tre_tolower((tre_cint_t)prev_c),trans_i->u.class)     \
164 	&& !tre_isctype(tre_toupper((tre_cint_t)prev_c),trans_i->u.class))    \
165     || ((trans_i->assertions & ASSERT_CHAR_CLASS_NEG)                         \
166         && tre_neg_char_classes_match(trans_i->neg_classes,(tre_cint_t)prev_c,\
167                                       tnfa->cflags & REG_ICASE)))
168 
169 
170 
171 
172 /* Returns 1 if `t1' wins `t2', 0 otherwise. */
173 inline static int
tre_tag_order(size_t num_tags,tre_tag_direction_t * tag_directions,int * t1,int * t2)174 tre_tag_order(size_t num_tags, tre_tag_direction_t *tag_directions,
175 	      int *t1, int *t2)
176 {
177   size_t i;
178   for (i = 0; i < num_tags; i++)
179     {
180       if (tag_directions[i] == TRE_TAG_MINIMIZE)
181 	{
182 	  if (t1[i] < t2[i])
183 	    return 1;
184 	  if (t1[i] > t2[i])
185 	    return 0;
186 	}
187       else
188 	{
189 	  if (t1[i] > t2[i])
190 	    return 1;
191 	  if (t1[i] < t2[i])
192 	    return 0;
193 	}
194     }
195   /*  assert(0);*/
196   return 0;
197 }
198 
199 inline static int
tre_neg_char_classes_match(tre_ctype_t * classes,tre_cint_t wc,int icase)200 tre_neg_char_classes_match(tre_ctype_t *classes, tre_cint_t wc, int icase)
201 {
202   DPRINT(("neg_char_classes_test: %p, %d, %d\n", classes, wc, icase));
203   while (*classes != (tre_ctype_t)0)
204     if ((!icase && tre_isctype(wc, *classes))
205 	|| (icase && (tre_isctype(tre_toupper(wc), *classes)
206 		      || tre_isctype(tre_tolower(wc), *classes))))
207       return 1; /* Match. */
208     else
209       classes++;
210   return 0; /* No match. */
211 }
212