1 /*
2 tre-match-utils.h - TRE matcher helper definitions
3
4 This software is released under a BSD-style license.
5 See the file LICENSE for details and copyright.
6
7 */
8
9 #define str_source ((const tre_str_source*)string)
10
11 #ifdef TRE_WCHAR
12
13 #ifdef TRE_MULTIBYTE
14
15 /* Wide character and multibyte support. */
16
17 #define GET_NEXT_WCHAR() \
18 do { \
19 prev_c = next_c; \
20 switch (type) { \
21 case STR_BYTE: \
22 pos++; \
23 if (len >= 0 && pos >= len) \
24 next_c = '\0'; \
25 else \
26 next_c = (unsigned char)(*str_byte++); \
27 break; \
28 case STR_WIDE: \
29 pos++; \
30 if (len >= 0 && pos >= len) \
31 next_c = L'\0'; \
32 else \
33 next_c = *str_wide++; \
34 break; \
35 case STR_MBS: \
36 pos += pos_add_next; \
37 if (str_byte == NULL) \
38 next_c = L'\0'; \
39 else \
40 { \
41 size_t w; \
42 long max; \
43 if (len >= 0) \
44 max = len - pos; \
45 else \
46 max = 32; \
47 if (max <= 0) \
48 { \
49 next_c = L'\0'; \
50 pos_add_next = 1; \
51 } \
52 else \
53 { \
54 w = tre_mbrtowc(&next_c, str_byte, (size_t)max, &mbstate); \
55 if (w == (size_t)-1 || w == (size_t)-2) { \
56 ret = REG_NOMATCH; \
57 goto error_exit; \
58 } \
59 if (w == 0 && len >= 0) \
60 { \
61 pos_add_next = 1; \
62 next_c = 0; \
63 str_byte++; \
64 } \
65 else \
66 { \
67 pos_add_next = (unsigned int)w; \
68 str_byte += w; \
69 } \
70 } \
71 } \
72 break; \
73 case STR_USER: \
74 pos += pos_add_next; \
75 str_user_end = str_source->get_next_char(&next_c, &pos_add_next, \
76 str_source->context); \
77 } \
78 } while(/*CONSTCOND*/(void)0,0)
79
80 #else /* !TRE_MULTIBYTE */
81
82 /* Wide character support, no multibyte support. */
83
84 #define GET_NEXT_WCHAR() \
85 do { \
86 prev_c = next_c; \
87 switch (type) { \
88 case STR_BYTE: \
89 pos++; \
90 if (len >= 0 && pos >= len) \
91 next_c = '\0'; \
92 else \
93 next_c = (unsigned char)(*str_byte++); \
94 break; \
95 case STR_WIDE: \
96 pos++; \
97 if (len >= 0 && pos >= len) \
98 next_c = L'\0'; \
99 else \
100 next_c = *str_wide++; \
101 break; \
102 case STR_USER: \
103 pos += pos_add_next; \
104 str_user_end = str_source->get_next_char(&next_c, &pos_add_next, \
105 str_source->context); \
106 } \
107 } while(/*CONSTCOND*/(void)0,0)
108
109 #endif /* !TRE_MULTIBYTE */
110
111 #else /* !TRE_WCHAR */
112
113 /* No wide character or multibyte support. */
114
115 #define GET_NEXT_WCHAR() \
116 do { \
117 prev_c = next_c; \
118 switch (type) { \
119 case STR_BYTE: \
120 pos++; \
121 if (len >= 0 && pos >= len) \
122 next_c = '\0'; \
123 else \
124 next_c = (unsigned char)(*str_byte++); \
125 break; \
126 case STR_USER: \
127 pos += pos_add_next; \
128 str_user_end = str_source->get_next_char(&next_c, &pos_add_next, \
129 str_source->context); \
130 } \
131 } while(/*CONSTCOND*/(void)0,0)
132
133 #endif /* !TRE_WCHAR */
134
135
136
137 #define IS_WORD_CHAR(c) ((c) == L'_' || tre_isalnum(c))
138
139 #define CHECK_ASSERTIONS(assertions) \
140 (((assertions & ASSERT_AT_BOL) \
141 && (pos > 0 || reg_notbol) \
142 && (prev_c != L'\n' || !reg_newline)) \
143 || ((assertions & ASSERT_AT_EOL) \
144 && (next_c != L'\0' || reg_noteol) \
145 && (next_c != L'\n' || !reg_newline)) \
146 || ((assertions & ASSERT_AT_BOW) \
147 && (IS_WORD_CHAR(prev_c) || !IS_WORD_CHAR(next_c))) \
148 || ((assertions & ASSERT_AT_EOW) \
149 && (!IS_WORD_CHAR(prev_c) || IS_WORD_CHAR(next_c))) \
150 || ((assertions & ASSERT_AT_WB) \
151 && (pos != 0 && next_c != L'\0' \
152 && IS_WORD_CHAR(prev_c) == IS_WORD_CHAR(next_c))) \
153 || ((assertions & ASSERT_AT_WB_NEG) \
154 && (pos == 0 || next_c == L'\0' \
155 || IS_WORD_CHAR(prev_c) != IS_WORD_CHAR(next_c))))
156
157 #define CHECK_CHAR_CLASSES(trans_i, tnfa, eflags) \
158 (((trans_i->assertions & ASSERT_CHAR_CLASS) \
159 && !(tnfa->cflags & REG_ICASE) \
160 && !tre_isctype((tre_cint_t)prev_c, trans_i->u.class)) \
161 || ((trans_i->assertions & ASSERT_CHAR_CLASS) \
162 && (tnfa->cflags & REG_ICASE) \
163 && !tre_isctype(tre_tolower((tre_cint_t)prev_c),trans_i->u.class) \
164 && !tre_isctype(tre_toupper((tre_cint_t)prev_c),trans_i->u.class)) \
165 || ((trans_i->assertions & ASSERT_CHAR_CLASS_NEG) \
166 && tre_neg_char_classes_match(trans_i->neg_classes,(tre_cint_t)prev_c,\
167 tnfa->cflags & REG_ICASE)))
168
169
170
171
172 /* Returns 1 if `t1' wins `t2', 0 otherwise. */
173 inline static int
tre_tag_order(size_t num_tags,tre_tag_direction_t * tag_directions,int * t1,int * t2)174 tre_tag_order(size_t num_tags, tre_tag_direction_t *tag_directions,
175 int *t1, int *t2)
176 {
177 size_t i;
178 for (i = 0; i < num_tags; i++)
179 {
180 if (tag_directions[i] == TRE_TAG_MINIMIZE)
181 {
182 if (t1[i] < t2[i])
183 return 1;
184 if (t1[i] > t2[i])
185 return 0;
186 }
187 else
188 {
189 if (t1[i] > t2[i])
190 return 1;
191 if (t1[i] < t2[i])
192 return 0;
193 }
194 }
195 /* assert(0);*/
196 return 0;
197 }
198
199 inline static int
tre_neg_char_classes_match(tre_ctype_t * classes,tre_cint_t wc,int icase)200 tre_neg_char_classes_match(tre_ctype_t *classes, tre_cint_t wc, int icase)
201 {
202 DPRINT(("neg_char_classes_test: %p, %d, %d\n", classes, wc, icase));
203 while (*classes != (tre_ctype_t)0)
204 if ((!icase && tre_isctype(wc, *classes))
205 || (icase && (tre_isctype(tre_toupper(wc), *classes)
206 || tre_isctype(tre_tolower(wc), *classes))))
207 return 1; /* Match. */
208 else
209 classes++;
210 return 0; /* No match. */
211 }
212