1 /* 2 tre-match-utils.h - TRE matcher helper definitions 3 4 This software is released under a BSD-style license. 5 See the file LICENSE for details and copyright. 6 7 */ 8 9 #define str_source ((const tre_str_source*)string) 10 11 #ifdef TRE_WCHAR 12 13 #ifdef TRE_MULTIBYTE 14 15 /* Wide character and multibyte support. */ 16 17 #define GET_NEXT_WCHAR() \ 18 do { \ 19 prev_c = next_c; \ 20 switch (type) { \ 21 case STR_BYTE: \ 22 pos++; \ 23 if (len >= 0 && pos >= len) \ 24 next_c = '\0'; \ 25 else \ 26 next_c = (unsigned char)(*str_byte++); \ 27 break; \ 28 case STR_WIDE: \ 29 pos++; \ 30 if (len >= 0 && pos >= len) \ 31 next_c = L'\0'; \ 32 else \ 33 next_c = *str_wide++; \ 34 break; \ 35 case STR_MBS: \ 36 pos += pos_add_next; \ 37 if (str_byte == NULL) \ 38 next_c = L'\0'; \ 39 else \ 40 { \ 41 size_t w; \ 42 long max; \ 43 if (len >= 0) \ 44 max = len - pos; \ 45 else \ 46 max = 32; \ 47 if (max <= 0) \ 48 { \ 49 next_c = L'\0'; \ 50 pos_add_next = 1; \ 51 } \ 52 else \ 53 { \ 54 w = tre_mbrtowc(&next_c, str_byte, (size_t)max, &mbstate); \ 55 if (w == (size_t)-1 || w == (size_t)-2) \ 56 return REG_NOMATCH; \ 57 if (w == 0 && len >= 0) \ 58 { \ 59 pos_add_next = 1; \ 60 next_c = 0; \ 61 str_byte++; \ 62 } \ 63 else \ 64 { \ 65 pos_add_next = (unsigned int)w; \ 66 str_byte += w; \ 67 } \ 68 } \ 69 } \ 70 break; \ 71 case STR_USER: \ 72 pos += pos_add_next; \ 73 str_user_end = str_source->get_next_char(&next_c, &pos_add_next, \ 74 str_source->context); \ 75 break; \ 76 } \ 77 } while(/*CONSTCOND*/0) 78 79 #else /* !TRE_MULTIBYTE */ 80 81 /* Wide character support, no multibyte support. */ 82 83 #define GET_NEXT_WCHAR() \ 84 do { \ 85 prev_c = next_c; \ 86 switch (type) { \ 87 case STR_BYTE: \ 88 pos++; \ 89 if (len >= 0 && pos >= len) \ 90 next_c = '\0'; \ 91 else \ 92 next_c = (unsigned char)(*str_byte++); \ 93 break; \ 94 case STR_WIDE: \ 95 pos++; \ 96 if (len >= 0 && pos >= len) \ 97 next_c = L'\0'; \ 98 else \ 99 next_c = *str_wide++; \ 100 break; \ 101 case STR_USER: \ 102 pos += pos_add_next; \ 103 str_user_end = str_source->get_next_char(&next_c, &pos_add_next, \ 104 str_source->context); \ 105 break; \ 106 } \ 107 } while(/*CONSTCOND*/0) 108 109 #endif /* !TRE_MULTIBYTE */ 110 111 #else /* !TRE_WCHAR */ 112 113 /* No wide character or multibyte support. */ 114 115 #define GET_NEXT_WCHAR() \ 116 do { \ 117 prev_c = next_c; \ 118 switch (type) { \ 119 case STR_BYTE: \ 120 pos++; \ 121 if (len >= 0 && pos >= len) \ 122 next_c = '\0'; \ 123 else \ 124 next_c = (unsigned char)(*str_byte++); \ 125 break; \ 126 case STR_USER: \ 127 pos += pos_add_next; \ 128 str_user_end = str_source->get_next_char(&next_c, &pos_add_next, \ 129 str_source->context); \ 130 break; \ 131 } \ 132 } while(/*CONSTCOND*/0) 133 134 #endif /* !TRE_WCHAR */ 135 136 137 138 #define IS_WORD_CHAR(c) ((c) == L'_' || tre_isalnum(c)) 139 140 #define CHECK_ASSERTIONS(assertions) \ 141 (((assertions & ASSERT_AT_BOL) \ 142 && (pos > 0 || reg_notbol) \ 143 && (prev_c != L'\n' || !reg_newline)) \ 144 || ((assertions & ASSERT_AT_EOL) \ 145 && (next_c != L'\0' || reg_noteol) \ 146 && (next_c != L'\n' || !reg_newline)) \ 147 || ((assertions & ASSERT_AT_BOW) \ 148 && (IS_WORD_CHAR(prev_c) || !IS_WORD_CHAR(next_c))) \ 149 || ((assertions & ASSERT_AT_EOW) \ 150 && (!IS_WORD_CHAR(prev_c) || IS_WORD_CHAR(next_c))) \ 151 || ((assertions & ASSERT_AT_WB) \ 152 && (pos != 0 && next_c != L'\0' \ 153 && IS_WORD_CHAR(prev_c) == IS_WORD_CHAR(next_c))) \ 154 || ((assertions & ASSERT_AT_WB_NEG) \ 155 && (pos == 0 || next_c == L'\0' \ 156 || IS_WORD_CHAR(prev_c) != IS_WORD_CHAR(next_c)))) 157 158 #define CHECK_CHAR_CLASSES(trans_i, tnfa, eflags) \ 159 (((trans_i->assertions & ASSERT_CHAR_CLASS) \ 160 && !(tnfa->cflags & REG_ICASE) \ 161 && !tre_isctype((tre_cint_t)prev_c, trans_i->u.class)) \ 162 || ((trans_i->assertions & ASSERT_CHAR_CLASS) \ 163 && (tnfa->cflags & REG_ICASE) \ 164 && !tre_isctype(tre_tolower((tre_cint_t)prev_c),trans_i->u.class) \ 165 && !tre_isctype(tre_toupper((tre_cint_t)prev_c),trans_i->u.class)) \ 166 || ((trans_i->assertions & ASSERT_CHAR_CLASS_NEG) \ 167 && tre_neg_char_classes_match(trans_i->neg_classes,(tre_cint_t)prev_c,\ 168 tnfa->cflags & REG_ICASE))) 169 170 171 172 173 /* Returns 1 if `t1' wins `t2', 0 otherwise. */ 174 inline static int 175 tre_tag_order(size_t num_tags, tre_tag_direction_t *tag_directions, 176 int *t1, int *t2) 177 { 178 size_t i; 179 for (i = 0; i < num_tags; i++) 180 { 181 if (tag_directions[i] == TRE_TAG_MINIMIZE) 182 { 183 if (t1[i] < t2[i]) 184 return 1; 185 if (t1[i] > t2[i]) 186 return 0; 187 } 188 else 189 { 190 if (t1[i] > t2[i]) 191 return 1; 192 if (t1[i] < t2[i]) 193 return 0; 194 } 195 } 196 /* assert(0);*/ 197 return 0; 198 } 199 200 inline static int 201 tre_neg_char_classes_match(tre_ctype_t *classes, tre_cint_t wc, int icase) 202 { 203 DPRINT(("neg_char_classes_test: %p, %d, %d\n", classes, wc, icase)); 204 while (*classes != (tre_ctype_t)0) 205 if ((!icase && tre_isctype(wc, *classes)) 206 || (icase && (tre_isctype(tre_toupper(wc), *classes) 207 || tre_isctype(tre_tolower(wc), *classes)))) 208 return 1; /* Match. */ 209 else 210 classes++; 211 return 0; /* No match. */ 212 } 213