163d4abf0Sagc /*
263d4abf0Sagc tre-match-utils.h - TRE matcher helper definitions
363d4abf0Sagc
463d4abf0Sagc This software is released under a BSD-style license.
563d4abf0Sagc See the file LICENSE for details and copyright.
663d4abf0Sagc
763d4abf0Sagc */
863d4abf0Sagc
963d4abf0Sagc #define str_source ((const tre_str_source*)string)
1063d4abf0Sagc
1163d4abf0Sagc #ifdef TRE_WCHAR
1263d4abf0Sagc
1363d4abf0Sagc #ifdef TRE_MULTIBYTE
1463d4abf0Sagc
1563d4abf0Sagc /* Wide character and multibyte support. */
1663d4abf0Sagc
1763d4abf0Sagc #define GET_NEXT_WCHAR() \
1863d4abf0Sagc do { \
1963d4abf0Sagc prev_c = next_c; \
20f2a3d147Schristos switch (type) { \
21f2a3d147Schristos case STR_BYTE: \
2263d4abf0Sagc pos++; \
2363d4abf0Sagc if (len >= 0 && pos >= len) \
2463d4abf0Sagc next_c = '\0'; \
2563d4abf0Sagc else \
2663d4abf0Sagc next_c = (unsigned char)(*str_byte++); \
27f2a3d147Schristos break; \
28f2a3d147Schristos case STR_WIDE: \
2963d4abf0Sagc pos++; \
3063d4abf0Sagc if (len >= 0 && pos >= len) \
3163d4abf0Sagc next_c = L'\0'; \
3263d4abf0Sagc else \
3363d4abf0Sagc next_c = *str_wide++; \
34f2a3d147Schristos break; \
35f2a3d147Schristos case STR_MBS: \
3663d4abf0Sagc pos += pos_add_next; \
3763d4abf0Sagc if (str_byte == NULL) \
3863d4abf0Sagc next_c = L'\0'; \
3963d4abf0Sagc else \
4063d4abf0Sagc { \
4163d4abf0Sagc size_t w; \
42f2a3d147Schristos long max; \
4363d4abf0Sagc if (len >= 0) \
4463d4abf0Sagc max = len - pos; \
4563d4abf0Sagc else \
4663d4abf0Sagc max = 32; \
4763d4abf0Sagc if (max <= 0) \
4863d4abf0Sagc { \
4963d4abf0Sagc next_c = L'\0'; \
5063d4abf0Sagc pos_add_next = 1; \
5163d4abf0Sagc } \
5263d4abf0Sagc else \
5363d4abf0Sagc { \
5463d4abf0Sagc w = tre_mbrtowc(&next_c, str_byte, (size_t)max, &mbstate); \
55*27d137aeSrin if (w == (size_t)-1 || w == (size_t)-2) { \
56*27d137aeSrin ret = REG_NOMATCH; \
57*27d137aeSrin goto error_exit; \
58*27d137aeSrin } \
5963d4abf0Sagc if (w == 0 && len >= 0) \
6063d4abf0Sagc { \
6163d4abf0Sagc pos_add_next = 1; \
6263d4abf0Sagc next_c = 0; \
6363d4abf0Sagc str_byte++; \
6463d4abf0Sagc } \
6563d4abf0Sagc else \
6663d4abf0Sagc { \
67f2a3d147Schristos pos_add_next = (unsigned int)w; \
6863d4abf0Sagc str_byte += w; \
6963d4abf0Sagc } \
7063d4abf0Sagc } \
7163d4abf0Sagc } \
72f2a3d147Schristos break; \
73f2a3d147Schristos case STR_USER: \
7463d4abf0Sagc pos += pos_add_next; \
7563d4abf0Sagc str_user_end = str_source->get_next_char(&next_c, &pos_add_next, \
7663d4abf0Sagc str_source->context); \
7763d4abf0Sagc } \
7813498f30Srin } while(/*CONSTCOND*/(void)0,0)
7963d4abf0Sagc
8063d4abf0Sagc #else /* !TRE_MULTIBYTE */
8163d4abf0Sagc
8263d4abf0Sagc /* Wide character support, no multibyte support. */
8363d4abf0Sagc
8463d4abf0Sagc #define GET_NEXT_WCHAR() \
8563d4abf0Sagc do { \
8663d4abf0Sagc prev_c = next_c; \
87f2a3d147Schristos switch (type) { \
88f2a3d147Schristos case STR_BYTE: \
8963d4abf0Sagc pos++; \
9063d4abf0Sagc if (len >= 0 && pos >= len) \
9163d4abf0Sagc next_c = '\0'; \
9263d4abf0Sagc else \
9363d4abf0Sagc next_c = (unsigned char)(*str_byte++); \
94f2a3d147Schristos break; \
95f2a3d147Schristos case STR_WIDE: \
9663d4abf0Sagc pos++; \
9763d4abf0Sagc if (len >= 0 && pos >= len) \
9863d4abf0Sagc next_c = L'\0'; \
9963d4abf0Sagc else \
10063d4abf0Sagc next_c = *str_wide++; \
101f2a3d147Schristos break; \
102f2a3d147Schristos case STR_USER: \
10363d4abf0Sagc pos += pos_add_next; \
10463d4abf0Sagc str_user_end = str_source->get_next_char(&next_c, &pos_add_next, \
10563d4abf0Sagc str_source->context); \
10663d4abf0Sagc } \
10713498f30Srin } while(/*CONSTCOND*/(void)0,0)
10863d4abf0Sagc
10963d4abf0Sagc #endif /* !TRE_MULTIBYTE */
11063d4abf0Sagc
11163d4abf0Sagc #else /* !TRE_WCHAR */
11263d4abf0Sagc
11363d4abf0Sagc /* No wide character or multibyte support. */
11463d4abf0Sagc
11563d4abf0Sagc #define GET_NEXT_WCHAR() \
11663d4abf0Sagc do { \
11763d4abf0Sagc prev_c = next_c; \
118f2a3d147Schristos switch (type) { \
119f2a3d147Schristos case STR_BYTE: \
12063d4abf0Sagc pos++; \
12163d4abf0Sagc if (len >= 0 && pos >= len) \
12263d4abf0Sagc next_c = '\0'; \
12363d4abf0Sagc else \
12463d4abf0Sagc next_c = (unsigned char)(*str_byte++); \
125f2a3d147Schristos break; \
126f2a3d147Schristos case STR_USER: \
12763d4abf0Sagc pos += pos_add_next; \
12863d4abf0Sagc str_user_end = str_source->get_next_char(&next_c, &pos_add_next, \
12963d4abf0Sagc str_source->context); \
13063d4abf0Sagc } \
13113498f30Srin } while(/*CONSTCOND*/(void)0,0)
13263d4abf0Sagc
13363d4abf0Sagc #endif /* !TRE_WCHAR */
13463d4abf0Sagc
13563d4abf0Sagc
13663d4abf0Sagc
13763d4abf0Sagc #define IS_WORD_CHAR(c) ((c) == L'_' || tre_isalnum(c))
13863d4abf0Sagc
13963d4abf0Sagc #define CHECK_ASSERTIONS(assertions) \
14063d4abf0Sagc (((assertions & ASSERT_AT_BOL) \
14163d4abf0Sagc && (pos > 0 || reg_notbol) \
14263d4abf0Sagc && (prev_c != L'\n' || !reg_newline)) \
14363d4abf0Sagc || ((assertions & ASSERT_AT_EOL) \
14463d4abf0Sagc && (next_c != L'\0' || reg_noteol) \
14563d4abf0Sagc && (next_c != L'\n' || !reg_newline)) \
14663d4abf0Sagc || ((assertions & ASSERT_AT_BOW) \
14763d4abf0Sagc && (IS_WORD_CHAR(prev_c) || !IS_WORD_CHAR(next_c))) \
14863d4abf0Sagc || ((assertions & ASSERT_AT_EOW) \
14963d4abf0Sagc && (!IS_WORD_CHAR(prev_c) || IS_WORD_CHAR(next_c))) \
15063d4abf0Sagc || ((assertions & ASSERT_AT_WB) \
15163d4abf0Sagc && (pos != 0 && next_c != L'\0' \
15263d4abf0Sagc && IS_WORD_CHAR(prev_c) == IS_WORD_CHAR(next_c))) \
15363d4abf0Sagc || ((assertions & ASSERT_AT_WB_NEG) \
15463d4abf0Sagc && (pos == 0 || next_c == L'\0' \
15563d4abf0Sagc || IS_WORD_CHAR(prev_c) != IS_WORD_CHAR(next_c))))
15663d4abf0Sagc
15763d4abf0Sagc #define CHECK_CHAR_CLASSES(trans_i, tnfa, eflags) \
15863d4abf0Sagc (((trans_i->assertions & ASSERT_CHAR_CLASS) \
15963d4abf0Sagc && !(tnfa->cflags & REG_ICASE) \
16063d4abf0Sagc && !tre_isctype((tre_cint_t)prev_c, trans_i->u.class)) \
16163d4abf0Sagc || ((trans_i->assertions & ASSERT_CHAR_CLASS) \
16263d4abf0Sagc && (tnfa->cflags & REG_ICASE) \
16363d4abf0Sagc && !tre_isctype(tre_tolower((tre_cint_t)prev_c),trans_i->u.class) \
16463d4abf0Sagc && !tre_isctype(tre_toupper((tre_cint_t)prev_c),trans_i->u.class)) \
16563d4abf0Sagc || ((trans_i->assertions & ASSERT_CHAR_CLASS_NEG) \
16663d4abf0Sagc && tre_neg_char_classes_match(trans_i->neg_classes,(tre_cint_t)prev_c,\
16763d4abf0Sagc tnfa->cflags & REG_ICASE)))
16863d4abf0Sagc
16963d4abf0Sagc
17063d4abf0Sagc
17163d4abf0Sagc
17263d4abf0Sagc /* Returns 1 if `t1' wins `t2', 0 otherwise. */
17363d4abf0Sagc inline static int
tre_tag_order(size_t num_tags,tre_tag_direction_t * tag_directions,int * t1,int * t2)174f2a3d147Schristos tre_tag_order(size_t num_tags, tre_tag_direction_t *tag_directions,
17563d4abf0Sagc int *t1, int *t2)
17663d4abf0Sagc {
177f2a3d147Schristos size_t i;
17863d4abf0Sagc for (i = 0; i < num_tags; i++)
17963d4abf0Sagc {
18063d4abf0Sagc if (tag_directions[i] == TRE_TAG_MINIMIZE)
18163d4abf0Sagc {
18263d4abf0Sagc if (t1[i] < t2[i])
18363d4abf0Sagc return 1;
18463d4abf0Sagc if (t1[i] > t2[i])
18563d4abf0Sagc return 0;
18663d4abf0Sagc }
18763d4abf0Sagc else
18863d4abf0Sagc {
18963d4abf0Sagc if (t1[i] > t2[i])
19063d4abf0Sagc return 1;
19163d4abf0Sagc if (t1[i] < t2[i])
19263d4abf0Sagc return 0;
19363d4abf0Sagc }
19463d4abf0Sagc }
19563d4abf0Sagc /* assert(0);*/
19663d4abf0Sagc return 0;
19763d4abf0Sagc }
19863d4abf0Sagc
19963d4abf0Sagc inline static int
tre_neg_char_classes_match(tre_ctype_t * classes,tre_cint_t wc,int icase)20063d4abf0Sagc tre_neg_char_classes_match(tre_ctype_t *classes, tre_cint_t wc, int icase)
20163d4abf0Sagc {
20263d4abf0Sagc DPRINT(("neg_char_classes_test: %p, %d, %d\n", classes, wc, icase));
20363d4abf0Sagc while (*classes != (tre_ctype_t)0)
20463d4abf0Sagc if ((!icase && tre_isctype(wc, *classes))
20563d4abf0Sagc || (icase && (tre_isctype(tre_toupper(wc), *classes)
20663d4abf0Sagc || tre_isctype(tre_tolower(wc), *classes))))
20763d4abf0Sagc return 1; /* Match. */
20863d4abf0Sagc else
20963d4abf0Sagc classes++;
21063d4abf0Sagc return 0; /* No match. */
21163d4abf0Sagc }
212