1*0Sstevel@tonic-gate /* 2*0Sstevel@tonic-gate * CDDL HEADER START 3*0Sstevel@tonic-gate * 4*0Sstevel@tonic-gate * The contents of this file are subject to the terms of the 5*0Sstevel@tonic-gate * Common Development and Distribution License, Version 1.0 only 6*0Sstevel@tonic-gate * (the "License"). You may not use this file except in compliance 7*0Sstevel@tonic-gate * with the License. 8*0Sstevel@tonic-gate * 9*0Sstevel@tonic-gate * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 10*0Sstevel@tonic-gate * or http://www.opensolaris.org/os/licensing. 11*0Sstevel@tonic-gate * See the License for the specific language governing permissions 12*0Sstevel@tonic-gate * and limitations under the License. 13*0Sstevel@tonic-gate * 14*0Sstevel@tonic-gate * When distributing Covered Code, include this CDDL HEADER in each 15*0Sstevel@tonic-gate * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 16*0Sstevel@tonic-gate * If applicable, add the following below this CDDL HEADER, with the 17*0Sstevel@tonic-gate * fields enclosed by brackets "[]" replaced with your own identifying 18*0Sstevel@tonic-gate * information: Portions Copyright [yyyy] [name of copyright owner] 19*0Sstevel@tonic-gate * 20*0Sstevel@tonic-gate * CDDL HEADER END 21*0Sstevel@tonic-gate */ 22*0Sstevel@tonic-gate /* 23*0Sstevel@tonic-gate * Copyright 2004 Sun Microsystems, Inc. All rights reserved. 24*0Sstevel@tonic-gate * Use is subject to license terms. 25*0Sstevel@tonic-gate */ 26*0Sstevel@tonic-gate 27*0Sstevel@tonic-gate /* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */ 28*0Sstevel@tonic-gate /* All Rights Reserved */ 29*0Sstevel@tonic-gate 30*0Sstevel@tonic-gate #pragma ident "%Z%%M% %I% %E% SMI" 31*0Sstevel@tonic-gate 32*0Sstevel@tonic-gate /* 33*0Sstevel@tonic-gate * IMPORTANT NOTE: 34*0Sstevel@tonic-gate * 35*0Sstevel@tonic-gate * regex() WORKS **ONLY** WITH THE ASCII AND THE Solaris EUC CHARACTER SETS. 36*0Sstevel@tonic-gate * IT IS **NOT** CHARACTER SET INDEPENDENT. 37*0Sstevel@tonic-gate * 38*0Sstevel@tonic-gate */ 39*0Sstevel@tonic-gate 40*0Sstevel@tonic-gate #pragma weak regex = _regex 41*0Sstevel@tonic-gate 42*0Sstevel@tonic-gate /* CONSTANTS SHARED WITH regcmp() */ 43*0Sstevel@tonic-gate #include "regex.h" 44*0Sstevel@tonic-gate 45*0Sstevel@tonic-gate #include "lint.h" 46*0Sstevel@tonic-gate #include "mtlib.h" 47*0Sstevel@tonic-gate #include <limits.h> 48*0Sstevel@tonic-gate #include <stdarg.h> 49*0Sstevel@tonic-gate #include <stdlib.h> 50*0Sstevel@tonic-gate #include <thread.h> 51*0Sstevel@tonic-gate #include <widec.h> 52*0Sstevel@tonic-gate #include "tsd.h" 53*0Sstevel@tonic-gate 54*0Sstevel@tonic-gate 55*0Sstevel@tonic-gate /* PRIVATE CONSTANTS */ 56*0Sstevel@tonic-gate 57*0Sstevel@tonic-gate #define ADD_256_TO_GROUP_LENGTH 0x1 58*0Sstevel@tonic-gate #define ADD_512_TO_GROUP_LENGTH 0x2 59*0Sstevel@tonic-gate #define ADD_768_TO_GROUP_LENGTH 0x3 60*0Sstevel@tonic-gate #define ADDED_LENGTH_BITS 0x3 61*0Sstevel@tonic-gate #define SINGLE_BYTE_MASK 0xff 62*0Sstevel@tonic-gate #define STRINGP_STACK_SIZE 50 63*0Sstevel@tonic-gate 64*0Sstevel@tonic-gate 65*0Sstevel@tonic-gate /* PRIVATE TYPE DEFINITIONS */ 66*0Sstevel@tonic-gate 67*0Sstevel@tonic-gate typedef enum { 68*0Sstevel@tonic-gate NOT_IN_CLASS = 0, 69*0Sstevel@tonic-gate IN_CLASS 70*0Sstevel@tonic-gate } char_test_condition_t; 71*0Sstevel@tonic-gate 72*0Sstevel@tonic-gate typedef enum { 73*0Sstevel@tonic-gate TESTING_CHAR = 0, 74*0Sstevel@tonic-gate CONDITION_TRUE, 75*0Sstevel@tonic-gate CONDITION_FALSE, 76*0Sstevel@tonic-gate CHAR_TEST_ERROR 77*0Sstevel@tonic-gate } char_test_result_t; 78*0Sstevel@tonic-gate 79*0Sstevel@tonic-gate 80*0Sstevel@tonic-gate /* PRIVATE GLOBAL VARIABLES */ 81*0Sstevel@tonic-gate 82*0Sstevel@tonic-gate static mutex_t regex_lock = DEFAULTMUTEX; 83*0Sstevel@tonic-gate static int return_arg_number[NSUBSTRINGS]; 84*0Sstevel@tonic-gate static const char *substring_endp[NSUBSTRINGS]; 85*0Sstevel@tonic-gate static const char *substring_startp[NSUBSTRINGS]; 86*0Sstevel@tonic-gate static const char *stringp_stack[STRINGP_STACK_SIZE]; 87*0Sstevel@tonic-gate static const char **stringp_stackp; 88*0Sstevel@tonic-gate 89*0Sstevel@tonic-gate 90*0Sstevel@tonic-gate /* DECLARATIONS OF PRIVATE FUNCTIONS */ 91*0Sstevel@tonic-gate 92*0Sstevel@tonic-gate static int 93*0Sstevel@tonic-gate get_wchar(wchar_t *wcharp, 94*0Sstevel@tonic-gate const char *stringp); 95*0Sstevel@tonic-gate 96*0Sstevel@tonic-gate static void 97*0Sstevel@tonic-gate get_match_counts(int *nmust_matchp, 98*0Sstevel@tonic-gate int *nextra_matches_allowedp, 99*0Sstevel@tonic-gate const char *count_stringp); 100*0Sstevel@tonic-gate 101*0Sstevel@tonic-gate static boolean_t 102*0Sstevel@tonic-gate in_wchar_range(wchar_t test_char, 103*0Sstevel@tonic-gate wchar_t lower_char, 104*0Sstevel@tonic-gate wchar_t upper_char); 105*0Sstevel@tonic-gate 106*0Sstevel@tonic-gate static const char * 107*0Sstevel@tonic-gate pop_stringp(void); 108*0Sstevel@tonic-gate 109*0Sstevel@tonic-gate static const char * 110*0Sstevel@tonic-gate previous_charp(const char *current_charp); 111*0Sstevel@tonic-gate 112*0Sstevel@tonic-gate static const char * 113*0Sstevel@tonic-gate push_stringp(const char *stringp); 114*0Sstevel@tonic-gate 115*0Sstevel@tonic-gate static char_test_result_t 116*0Sstevel@tonic-gate test_char_against_ascii_class(char test_char, 117*0Sstevel@tonic-gate const char *classp, 118*0Sstevel@tonic-gate char_test_condition_t test_condition); 119*0Sstevel@tonic-gate 120*0Sstevel@tonic-gate static char_test_result_t 121*0Sstevel@tonic-gate test_char_against_multibyte_class(wchar_t test_char, 122*0Sstevel@tonic-gate const char *classp, 123*0Sstevel@tonic-gate char_test_condition_t test_condition); 124*0Sstevel@tonic-gate 125*0Sstevel@tonic-gate 126*0Sstevel@tonic-gate /* FOR COMPATIBILITY WITH PREVIOUS ASCII VERSIONS OF regcmp() */ 127*0Sstevel@tonic-gate 128*0Sstevel@tonic-gate static char_test_result_t 129*0Sstevel@tonic-gate test_char_against_old_ascii_class(char test_char, 130*0Sstevel@tonic-gate const char *classp, 131*0Sstevel@tonic-gate char_test_condition_t test_condition); 132*0Sstevel@tonic-gate 133*0Sstevel@tonic-gate static const char * 134*0Sstevel@tonic-gate test_repeated_ascii_char(const char *repeat_startp, 135*0Sstevel@tonic-gate const char *stringp, 136*0Sstevel@tonic-gate const char *regexp); 137*0Sstevel@tonic-gate 138*0Sstevel@tonic-gate static const char * 139*0Sstevel@tonic-gate test_repeated_multibyte_char(const char *repeat_startp, 140*0Sstevel@tonic-gate const char *stringp, 141*0Sstevel@tonic-gate const char *regexp); 142*0Sstevel@tonic-gate 143*0Sstevel@tonic-gate static const char * 144*0Sstevel@tonic-gate test_repeated_group(const char *repeat_startp, 145*0Sstevel@tonic-gate const char *stringp, 146*0Sstevel@tonic-gate const char *regexp); 147*0Sstevel@tonic-gate 148*0Sstevel@tonic-gate static const char * 149*0Sstevel@tonic-gate test_string(const char *stringp, 150*0Sstevel@tonic-gate const char *regexp); 151*0Sstevel@tonic-gate 152*0Sstevel@tonic-gate 153*0Sstevel@tonic-gate /* DEFINITIONS OF PUBLIC VARIABLES */ 154*0Sstevel@tonic-gate 155*0Sstevel@tonic-gate char *__loc1; 156*0Sstevel@tonic-gate 157*0Sstevel@tonic-gate /* 158*0Sstevel@tonic-gate * reserve thread-specific storage for __loc1 159*0Sstevel@tonic-gate */ 160*0Sstevel@tonic-gate char ** 161*0Sstevel@tonic-gate ____loc1(void) 162*0Sstevel@tonic-gate { 163*0Sstevel@tonic-gate if (_thr_main()) 164*0Sstevel@tonic-gate return (&__loc1); 165*0Sstevel@tonic-gate return ((char **)tsdalloc(_T_REGEX_LOC1, sizeof (char *), NULL)); 166*0Sstevel@tonic-gate } 167*0Sstevel@tonic-gate 168*0Sstevel@tonic-gate #define __loc1 (*(____loc1())) 169*0Sstevel@tonic-gate 170*0Sstevel@tonic-gate /* DEFINITION OF regex() */ 171*0Sstevel@tonic-gate 172*0Sstevel@tonic-gate extern char * 173*0Sstevel@tonic-gate _regex(const char *regexp, 174*0Sstevel@tonic-gate const char *stringp, ...) 175*0Sstevel@tonic-gate { 176*0Sstevel@tonic-gate va_list arg_listp; 177*0Sstevel@tonic-gate int char_size; 178*0Sstevel@tonic-gate const char *end_of_matchp; 179*0Sstevel@tonic-gate wchar_t regex_wchar; 180*0Sstevel@tonic-gate char *return_argp[NSUBSTRINGS]; 181*0Sstevel@tonic-gate char *returned_substringp; 182*0Sstevel@tonic-gate int substringn; 183*0Sstevel@tonic-gate const char *substringp; 184*0Sstevel@tonic-gate wchar_t string_wchar; 185*0Sstevel@tonic-gate 186*0Sstevel@tonic-gate if (____loc1() == (char **)0) { 187*0Sstevel@tonic-gate return ((char *)0); 188*0Sstevel@tonic-gate } else { 189*0Sstevel@tonic-gate lmutex_lock(®ex_lock); 190*0Sstevel@tonic-gate __loc1 = (char *)0; 191*0Sstevel@tonic-gate } 192*0Sstevel@tonic-gate 193*0Sstevel@tonic-gate if ((stringp == (char *)0) || (regexp == (char *)0)) { 194*0Sstevel@tonic-gate lmutex_unlock(®ex_lock); 195*0Sstevel@tonic-gate return ((char *)0); 196*0Sstevel@tonic-gate } 197*0Sstevel@tonic-gate 198*0Sstevel@tonic-gate 199*0Sstevel@tonic-gate /* INITIALIZE SUBSTRINGS THAT MIGHT BE RETURNED IN VARARGS */ 200*0Sstevel@tonic-gate 201*0Sstevel@tonic-gate substringn = 0; 202*0Sstevel@tonic-gate va_start(arg_listp, stringp); 203*0Sstevel@tonic-gate while (substringn < NSUBSTRINGS) { 204*0Sstevel@tonic-gate return_argp[substringn] = va_arg(arg_listp, char *); 205*0Sstevel@tonic-gate substring_startp[substringn] = (char *)0; 206*0Sstevel@tonic-gate return_arg_number[substringn] = -1; 207*0Sstevel@tonic-gate substringn++; 208*0Sstevel@tonic-gate } 209*0Sstevel@tonic-gate va_end(arg_listp); 210*0Sstevel@tonic-gate 211*0Sstevel@tonic-gate 212*0Sstevel@tonic-gate /* TEST THE STRING AGAINST THE REGULAR EXPRESSION */ 213*0Sstevel@tonic-gate 214*0Sstevel@tonic-gate end_of_matchp = (char *)0; 215*0Sstevel@tonic-gate stringp_stackp = &stringp_stack[STRINGP_STACK_SIZE]; 216*0Sstevel@tonic-gate 217*0Sstevel@tonic-gate if ((int)*regexp == (int)START_OF_STRING_MARK) { 218*0Sstevel@tonic-gate 219*0Sstevel@tonic-gate /* 220*0Sstevel@tonic-gate * the match must start at the beginning of the string 221*0Sstevel@tonic-gate */ 222*0Sstevel@tonic-gate 223*0Sstevel@tonic-gate __loc1 = (char *)stringp; 224*0Sstevel@tonic-gate regexp++; 225*0Sstevel@tonic-gate end_of_matchp = test_string(stringp, regexp); 226*0Sstevel@tonic-gate 227*0Sstevel@tonic-gate } else if ((int)*regexp == (int)ASCII_CHAR) { 228*0Sstevel@tonic-gate 229*0Sstevel@tonic-gate /* 230*0Sstevel@tonic-gate * test a string against a regular expression 231*0Sstevel@tonic-gate * that starts with a single ASCII character: 232*0Sstevel@tonic-gate * 233*0Sstevel@tonic-gate * move to each character in the string that matches 234*0Sstevel@tonic-gate * the first character in the regular expression 235*0Sstevel@tonic-gate * and test the remaining string 236*0Sstevel@tonic-gate */ 237*0Sstevel@tonic-gate 238*0Sstevel@tonic-gate while ((*stringp != *(regexp + 1)) && (*stringp != '\0')) { 239*0Sstevel@tonic-gate stringp++; 240*0Sstevel@tonic-gate } 241*0Sstevel@tonic-gate while ((end_of_matchp == (char *)0) && (*stringp != '\0')) { 242*0Sstevel@tonic-gate end_of_matchp = test_string(stringp, regexp); 243*0Sstevel@tonic-gate if (end_of_matchp != (char *)0) { 244*0Sstevel@tonic-gate __loc1 = (char *)stringp; 245*0Sstevel@tonic-gate } else { 246*0Sstevel@tonic-gate stringp++; 247*0Sstevel@tonic-gate while ((*stringp != *(regexp + 1)) && (*stringp != '\0')) { 248*0Sstevel@tonic-gate stringp++; 249*0Sstevel@tonic-gate } 250*0Sstevel@tonic-gate } 251*0Sstevel@tonic-gate } 252*0Sstevel@tonic-gate 253*0Sstevel@tonic-gate } else if (!multibyte) { 254*0Sstevel@tonic-gate 255*0Sstevel@tonic-gate /* 256*0Sstevel@tonic-gate * if the value of the "multibyte" macro defined in <euc.h> 257*0Sstevel@tonic-gate * is false, regex() is running in an ASCII locale; 258*0Sstevel@tonic-gate * test an ASCII string against an ASCII regular expression 259*0Sstevel@tonic-gate * that doesn't start with a single ASCII character: 260*0Sstevel@tonic-gate * 261*0Sstevel@tonic-gate * move forward in the string one byte at a time, testing 262*0Sstevel@tonic-gate * the remaining string against the regular expression 263*0Sstevel@tonic-gate */ 264*0Sstevel@tonic-gate 265*0Sstevel@tonic-gate end_of_matchp = test_string(stringp, regexp); 266*0Sstevel@tonic-gate while ((end_of_matchp == (char *)0) && (*stringp != '\0')) { 267*0Sstevel@tonic-gate stringp++; 268*0Sstevel@tonic-gate end_of_matchp = test_string(stringp, regexp); 269*0Sstevel@tonic-gate } 270*0Sstevel@tonic-gate if (end_of_matchp != (char *)0) { 271*0Sstevel@tonic-gate __loc1 = (char *)stringp; 272*0Sstevel@tonic-gate } 273*0Sstevel@tonic-gate 274*0Sstevel@tonic-gate } else if ((int)*regexp == (int)MULTIBYTE_CHAR) { 275*0Sstevel@tonic-gate 276*0Sstevel@tonic-gate /* 277*0Sstevel@tonic-gate * test a multibyte string against a multibyte regular expression 278*0Sstevel@tonic-gate * that starts with a single multibyte character: 279*0Sstevel@tonic-gate * 280*0Sstevel@tonic-gate * move to each character in the string that matches 281*0Sstevel@tonic-gate * the first character in the regular expression 282*0Sstevel@tonic-gate * and test the remaining string 283*0Sstevel@tonic-gate */ 284*0Sstevel@tonic-gate 285*0Sstevel@tonic-gate (void) get_wchar(®ex_wchar, regexp + 1); 286*0Sstevel@tonic-gate char_size = get_wchar(&string_wchar, stringp); 287*0Sstevel@tonic-gate while ((string_wchar != regex_wchar) && (char_size > 0)) { 288*0Sstevel@tonic-gate stringp += char_size; 289*0Sstevel@tonic-gate char_size = get_wchar(&string_wchar, stringp); 290*0Sstevel@tonic-gate } 291*0Sstevel@tonic-gate while ((end_of_matchp == (char *)0) && (char_size > 0)) { 292*0Sstevel@tonic-gate end_of_matchp = test_string(stringp, regexp); 293*0Sstevel@tonic-gate if (end_of_matchp != (char *)0) { 294*0Sstevel@tonic-gate __loc1 = (char *)stringp; 295*0Sstevel@tonic-gate } else { 296*0Sstevel@tonic-gate stringp += char_size; 297*0Sstevel@tonic-gate char_size = get_wchar(&string_wchar, stringp); 298*0Sstevel@tonic-gate while ((string_wchar != regex_wchar) && (char_size > 0)) { 299*0Sstevel@tonic-gate stringp += char_size; 300*0Sstevel@tonic-gate char_size = get_wchar(&string_wchar, stringp); 301*0Sstevel@tonic-gate } 302*0Sstevel@tonic-gate } 303*0Sstevel@tonic-gate } 304*0Sstevel@tonic-gate 305*0Sstevel@tonic-gate } else { 306*0Sstevel@tonic-gate 307*0Sstevel@tonic-gate /* 308*0Sstevel@tonic-gate * test a multibyte string against a multibyte regular expression 309*0Sstevel@tonic-gate * that doesn't start with a single multibyte character 310*0Sstevel@tonic-gate * 311*0Sstevel@tonic-gate * move forward in the string one multibyte character at a time, 312*0Sstevel@tonic-gate * testing the remaining string against the regular expression 313*0Sstevel@tonic-gate */ 314*0Sstevel@tonic-gate 315*0Sstevel@tonic-gate end_of_matchp = test_string(stringp, regexp); 316*0Sstevel@tonic-gate char_size = get_wchar(&string_wchar, stringp); 317*0Sstevel@tonic-gate while ((end_of_matchp == (char *)0) && (char_size > 0)) { 318*0Sstevel@tonic-gate stringp += char_size; 319*0Sstevel@tonic-gate end_of_matchp = test_string(stringp, regexp); 320*0Sstevel@tonic-gate char_size = get_wchar(&string_wchar, stringp); 321*0Sstevel@tonic-gate } 322*0Sstevel@tonic-gate if (end_of_matchp != (char *)0) { 323*0Sstevel@tonic-gate __loc1 = (char *)stringp; 324*0Sstevel@tonic-gate } 325*0Sstevel@tonic-gate } 326*0Sstevel@tonic-gate 327*0Sstevel@tonic-gate /* 328*0Sstevel@tonic-gate * Return substrings that matched subexpressions for which 329*0Sstevel@tonic-gate * matching substrings are to be returned. 330*0Sstevel@tonic-gate * 331*0Sstevel@tonic-gate * NOTE: 332*0Sstevel@tonic-gate * 333*0Sstevel@tonic-gate * According to manual page regcmp(3G), regex() returns substrings 334*0Sstevel@tonic-gate * that match subexpressions even when no substring matches the 335*0Sstevel@tonic-gate * entire regular expression. 336*0Sstevel@tonic-gate */ 337*0Sstevel@tonic-gate 338*0Sstevel@tonic-gate substringn = 0; 339*0Sstevel@tonic-gate while (substringn < NSUBSTRINGS) { 340*0Sstevel@tonic-gate substringp = substring_startp[substringn]; 341*0Sstevel@tonic-gate if ((substringp != (char *)0) && 342*0Sstevel@tonic-gate (return_arg_number[substringn] >= 0)) { 343*0Sstevel@tonic-gate returned_substringp = 344*0Sstevel@tonic-gate return_argp[return_arg_number[substringn]]; 345*0Sstevel@tonic-gate if (returned_substringp != (char *)0) { 346*0Sstevel@tonic-gate while (substringp < substring_endp[substringn]) { 347*0Sstevel@tonic-gate *returned_substringp = (char)*substringp; 348*0Sstevel@tonic-gate returned_substringp++; 349*0Sstevel@tonic-gate substringp++; 350*0Sstevel@tonic-gate } 351*0Sstevel@tonic-gate *returned_substringp = '\0'; 352*0Sstevel@tonic-gate } 353*0Sstevel@tonic-gate } 354*0Sstevel@tonic-gate substringn++; 355*0Sstevel@tonic-gate } 356*0Sstevel@tonic-gate lmutex_unlock(®ex_lock); 357*0Sstevel@tonic-gate return ((char *)end_of_matchp); 358*0Sstevel@tonic-gate } /* regex() */ 359*0Sstevel@tonic-gate 360*0Sstevel@tonic-gate 361*0Sstevel@tonic-gate /* DEFINITIONS OF PRIVATE FUNCTIONS */ 362*0Sstevel@tonic-gate 363*0Sstevel@tonic-gate static int 364*0Sstevel@tonic-gate get_wchar(wchar_t *wcharp, 365*0Sstevel@tonic-gate const char *stringp) 366*0Sstevel@tonic-gate { 367*0Sstevel@tonic-gate int char_size; 368*0Sstevel@tonic-gate 369*0Sstevel@tonic-gate if (stringp == (char *)0) { 370*0Sstevel@tonic-gate char_size = 0; 371*0Sstevel@tonic-gate *wcharp = (wchar_t)((unsigned int)'\0'); 372*0Sstevel@tonic-gate } else if (*stringp == '\0') { 373*0Sstevel@tonic-gate char_size = 0; 374*0Sstevel@tonic-gate *wcharp = (wchar_t)((unsigned int)*stringp); 375*0Sstevel@tonic-gate } else if ((unsigned char)*stringp <= (unsigned char)0x7f) { 376*0Sstevel@tonic-gate char_size = 1; 377*0Sstevel@tonic-gate *wcharp = (wchar_t)((unsigned int)*stringp); 378*0Sstevel@tonic-gate } else { 379*0Sstevel@tonic-gate char_size = mbtowc(wcharp, stringp, MB_LEN_MAX); 380*0Sstevel@tonic-gate } 381*0Sstevel@tonic-gate return (char_size); 382*0Sstevel@tonic-gate } 383*0Sstevel@tonic-gate 384*0Sstevel@tonic-gate static void 385*0Sstevel@tonic-gate get_match_counts(int *nmust_matchp, 386*0Sstevel@tonic-gate int *nextra_matches_allowedp, 387*0Sstevel@tonic-gate const char *count_stringp) 388*0Sstevel@tonic-gate { 389*0Sstevel@tonic-gate int minimum_match_count; 390*0Sstevel@tonic-gate int maximum_match_count; 391*0Sstevel@tonic-gate 392*0Sstevel@tonic-gate minimum_match_count = 393*0Sstevel@tonic-gate (int)((unsigned int)*count_stringp & SINGLE_BYTE_MASK); 394*0Sstevel@tonic-gate *nmust_matchp = minimum_match_count; 395*0Sstevel@tonic-gate 396*0Sstevel@tonic-gate count_stringp++; 397*0Sstevel@tonic-gate maximum_match_count = 398*0Sstevel@tonic-gate (int)((unsigned int)*count_stringp & SINGLE_BYTE_MASK); 399*0Sstevel@tonic-gate if (maximum_match_count == (int)UNLIMITED) { 400*0Sstevel@tonic-gate *nextra_matches_allowedp = (int)UNLIMITED; 401*0Sstevel@tonic-gate } else { 402*0Sstevel@tonic-gate *nextra_matches_allowedp = 403*0Sstevel@tonic-gate maximum_match_count - minimum_match_count; 404*0Sstevel@tonic-gate } 405*0Sstevel@tonic-gate return; 406*0Sstevel@tonic-gate 407*0Sstevel@tonic-gate } /* get_match_counts() */ 408*0Sstevel@tonic-gate 409*0Sstevel@tonic-gate static boolean_t 410*0Sstevel@tonic-gate in_wchar_range(wchar_t test_char, 411*0Sstevel@tonic-gate wchar_t lower_char, 412*0Sstevel@tonic-gate wchar_t upper_char) 413*0Sstevel@tonic-gate { 414*0Sstevel@tonic-gate return (((lower_char <= 0x7f) && (upper_char <= 0x7f) && 415*0Sstevel@tonic-gate (lower_char <= test_char) && (test_char <= upper_char)) || 416*0Sstevel@tonic-gate (((test_char & WCHAR_CSMASK) == (lower_char & WCHAR_CSMASK)) && 417*0Sstevel@tonic-gate ((test_char & WCHAR_CSMASK) == (upper_char & WCHAR_CSMASK)) && 418*0Sstevel@tonic-gate (lower_char <= test_char) && (test_char <= upper_char))); 419*0Sstevel@tonic-gate 420*0Sstevel@tonic-gate } /* in_wchar_range() */ 421*0Sstevel@tonic-gate 422*0Sstevel@tonic-gate static const char * 423*0Sstevel@tonic-gate pop_stringp(void) 424*0Sstevel@tonic-gate { 425*0Sstevel@tonic-gate const char *stringp; 426*0Sstevel@tonic-gate 427*0Sstevel@tonic-gate if (stringp_stackp >= &stringp_stack[STRINGP_STACK_SIZE]) { 428*0Sstevel@tonic-gate return ((char *)0); 429*0Sstevel@tonic-gate } else { 430*0Sstevel@tonic-gate stringp = *stringp_stackp; 431*0Sstevel@tonic-gate stringp_stackp++; 432*0Sstevel@tonic-gate return (stringp); 433*0Sstevel@tonic-gate } 434*0Sstevel@tonic-gate } 435*0Sstevel@tonic-gate 436*0Sstevel@tonic-gate 437*0Sstevel@tonic-gate static const char * 438*0Sstevel@tonic-gate previous_charp(const char *current_charp) 439*0Sstevel@tonic-gate { 440*0Sstevel@tonic-gate /* 441*0Sstevel@tonic-gate * returns the pointer to the previous character in 442*0Sstevel@tonic-gate * a string of multibyte characters 443*0Sstevel@tonic-gate */ 444*0Sstevel@tonic-gate 445*0Sstevel@tonic-gate const char *prev_cs0 = current_charp - 1; 446*0Sstevel@tonic-gate const char *prev_cs1 = current_charp - eucw1; 447*0Sstevel@tonic-gate const char *prev_cs2 = current_charp - eucw2 - 1; 448*0Sstevel@tonic-gate const char *prev_cs3 = current_charp - eucw3 - 1; 449*0Sstevel@tonic-gate const char *prev_charp; 450*0Sstevel@tonic-gate 451*0Sstevel@tonic-gate if ((unsigned char)*prev_cs0 <= 0x7f) { 452*0Sstevel@tonic-gate prev_charp = prev_cs0; 453*0Sstevel@tonic-gate } else if ((unsigned char)*prev_cs2 == SS2) { 454*0Sstevel@tonic-gate prev_charp = prev_cs2; 455*0Sstevel@tonic-gate } else if ((unsigned char)*prev_cs3 == SS3) { 456*0Sstevel@tonic-gate prev_charp = prev_cs3; 457*0Sstevel@tonic-gate } else { 458*0Sstevel@tonic-gate prev_charp = prev_cs1; 459*0Sstevel@tonic-gate } 460*0Sstevel@tonic-gate return (prev_charp); 461*0Sstevel@tonic-gate 462*0Sstevel@tonic-gate } /* previous_charp() */ 463*0Sstevel@tonic-gate 464*0Sstevel@tonic-gate static const char * 465*0Sstevel@tonic-gate push_stringp(const char *stringp) 466*0Sstevel@tonic-gate { 467*0Sstevel@tonic-gate if (stringp_stackp <= &stringp_stack[0]) { 468*0Sstevel@tonic-gate return ((char *)0); 469*0Sstevel@tonic-gate } else { 470*0Sstevel@tonic-gate stringp_stackp--; 471*0Sstevel@tonic-gate *stringp_stackp = stringp; 472*0Sstevel@tonic-gate return (stringp); 473*0Sstevel@tonic-gate } 474*0Sstevel@tonic-gate } 475*0Sstevel@tonic-gate 476*0Sstevel@tonic-gate 477*0Sstevel@tonic-gate static char_test_result_t 478*0Sstevel@tonic-gate test_char_against_ascii_class(char test_char, 479*0Sstevel@tonic-gate const char *classp, 480*0Sstevel@tonic-gate char_test_condition_t test_condition) 481*0Sstevel@tonic-gate { 482*0Sstevel@tonic-gate /* 483*0Sstevel@tonic-gate * tests a character for membership in an ASCII character class compiled 484*0Sstevel@tonic-gate * by the internationalized version of regcmp(); 485*0Sstevel@tonic-gate * 486*0Sstevel@tonic-gate * NOTE: The internationalized version of regcmp() compiles 487*0Sstevel@tonic-gate * the range a-z in an ASCII character class to aTHRUz. 488*0Sstevel@tonic-gate */ 489*0Sstevel@tonic-gate 490*0Sstevel@tonic-gate int nbytes_to_check; 491*0Sstevel@tonic-gate 492*0Sstevel@tonic-gate nbytes_to_check = (int)*classp; 493*0Sstevel@tonic-gate classp++; 494*0Sstevel@tonic-gate nbytes_to_check--; 495*0Sstevel@tonic-gate 496*0Sstevel@tonic-gate while (nbytes_to_check > 0) { 497*0Sstevel@tonic-gate if (test_char == *classp) { 498*0Sstevel@tonic-gate if (test_condition == IN_CLASS) 499*0Sstevel@tonic-gate return (CONDITION_TRUE); 500*0Sstevel@tonic-gate else 501*0Sstevel@tonic-gate return (CONDITION_FALSE); 502*0Sstevel@tonic-gate } else if (*classp == THRU) { 503*0Sstevel@tonic-gate if ((*(classp - 1) <= test_char) && 504*0Sstevel@tonic-gate (test_char <= *(classp + 1))) { 505*0Sstevel@tonic-gate if (test_condition == IN_CLASS) 506*0Sstevel@tonic-gate return (CONDITION_TRUE); 507*0Sstevel@tonic-gate else 508*0Sstevel@tonic-gate return (CONDITION_FALSE); 509*0Sstevel@tonic-gate } else { 510*0Sstevel@tonic-gate classp += 2; 511*0Sstevel@tonic-gate nbytes_to_check -= 2; 512*0Sstevel@tonic-gate } 513*0Sstevel@tonic-gate } else { 514*0Sstevel@tonic-gate classp++; 515*0Sstevel@tonic-gate nbytes_to_check--; 516*0Sstevel@tonic-gate } 517*0Sstevel@tonic-gate } 518*0Sstevel@tonic-gate if (test_condition == NOT_IN_CLASS) { 519*0Sstevel@tonic-gate return (CONDITION_TRUE); 520*0Sstevel@tonic-gate } else { 521*0Sstevel@tonic-gate return (CONDITION_FALSE); 522*0Sstevel@tonic-gate } 523*0Sstevel@tonic-gate } /* test_char_against_ascii_class() */ 524*0Sstevel@tonic-gate 525*0Sstevel@tonic-gate static char_test_result_t 526*0Sstevel@tonic-gate test_char_against_multibyte_class(wchar_t test_char, 527*0Sstevel@tonic-gate const char *classp, 528*0Sstevel@tonic-gate char_test_condition_t test_condition) 529*0Sstevel@tonic-gate { 530*0Sstevel@tonic-gate /* 531*0Sstevel@tonic-gate * tests a character for membership in a multibyte character class; 532*0Sstevel@tonic-gate * 533*0Sstevel@tonic-gate * NOTE: The range a-z in a multibyte character class compiles to 534*0Sstevel@tonic-gate * aTHRUz. 535*0Sstevel@tonic-gate */ 536*0Sstevel@tonic-gate 537*0Sstevel@tonic-gate int char_size; 538*0Sstevel@tonic-gate wchar_t current_char; 539*0Sstevel@tonic-gate int nbytes_to_check; 540*0Sstevel@tonic-gate wchar_t previous_char; 541*0Sstevel@tonic-gate 542*0Sstevel@tonic-gate nbytes_to_check = (int)*classp; 543*0Sstevel@tonic-gate classp++; 544*0Sstevel@tonic-gate nbytes_to_check--; 545*0Sstevel@tonic-gate 546*0Sstevel@tonic-gate char_size = get_wchar(¤t_char, classp); 547*0Sstevel@tonic-gate if (char_size <= 0) { 548*0Sstevel@tonic-gate return (CHAR_TEST_ERROR); 549*0Sstevel@tonic-gate } else if (test_char == current_char) { 550*0Sstevel@tonic-gate if (test_condition == IN_CLASS) { 551*0Sstevel@tonic-gate return (CONDITION_TRUE); 552*0Sstevel@tonic-gate } else { 553*0Sstevel@tonic-gate return (CONDITION_FALSE); 554*0Sstevel@tonic-gate } 555*0Sstevel@tonic-gate } else { 556*0Sstevel@tonic-gate classp += char_size; 557*0Sstevel@tonic-gate nbytes_to_check -= char_size; 558*0Sstevel@tonic-gate } 559*0Sstevel@tonic-gate 560*0Sstevel@tonic-gate while (nbytes_to_check > 0) { 561*0Sstevel@tonic-gate previous_char = current_char; 562*0Sstevel@tonic-gate char_size = get_wchar(¤t_char, classp); 563*0Sstevel@tonic-gate if (char_size <= 0) { 564*0Sstevel@tonic-gate return (CHAR_TEST_ERROR); 565*0Sstevel@tonic-gate } else if (test_char == current_char) { 566*0Sstevel@tonic-gate if (test_condition == IN_CLASS) { 567*0Sstevel@tonic-gate return (CONDITION_TRUE); 568*0Sstevel@tonic-gate } else { 569*0Sstevel@tonic-gate return (CONDITION_FALSE); 570*0Sstevel@tonic-gate } 571*0Sstevel@tonic-gate } else if (current_char == THRU) { 572*0Sstevel@tonic-gate classp += char_size; 573*0Sstevel@tonic-gate nbytes_to_check -= char_size; 574*0Sstevel@tonic-gate char_size = get_wchar(¤t_char, classp); 575*0Sstevel@tonic-gate if (char_size <= 0) { 576*0Sstevel@tonic-gate return (CHAR_TEST_ERROR); 577*0Sstevel@tonic-gate } else if (in_wchar_range(test_char, previous_char, 578*0Sstevel@tonic-gate current_char)) { 579*0Sstevel@tonic-gate if (test_condition == IN_CLASS) { 580*0Sstevel@tonic-gate return (CONDITION_TRUE); 581*0Sstevel@tonic-gate } else { 582*0Sstevel@tonic-gate return (CONDITION_FALSE); 583*0Sstevel@tonic-gate } 584*0Sstevel@tonic-gate } else { 585*0Sstevel@tonic-gate classp += char_size; 586*0Sstevel@tonic-gate nbytes_to_check -= char_size; 587*0Sstevel@tonic-gate } 588*0Sstevel@tonic-gate } else { 589*0Sstevel@tonic-gate classp += char_size; 590*0Sstevel@tonic-gate nbytes_to_check -= char_size; 591*0Sstevel@tonic-gate } 592*0Sstevel@tonic-gate } 593*0Sstevel@tonic-gate if (test_condition == NOT_IN_CLASS) { 594*0Sstevel@tonic-gate return (CONDITION_TRUE); 595*0Sstevel@tonic-gate } else { 596*0Sstevel@tonic-gate return (CONDITION_FALSE); 597*0Sstevel@tonic-gate } 598*0Sstevel@tonic-gate } /* test_char_against_multibyte_class() */ 599*0Sstevel@tonic-gate 600*0Sstevel@tonic-gate 601*0Sstevel@tonic-gate /* FOR COMPATIBILITY WITH PREVIOUS ASCII VERSIONS OF regcmp() */ 602*0Sstevel@tonic-gate 603*0Sstevel@tonic-gate static char_test_result_t 604*0Sstevel@tonic-gate test_char_against_old_ascii_class(char test_char, 605*0Sstevel@tonic-gate const char *classp, 606*0Sstevel@tonic-gate char_test_condition_t test_condition) 607*0Sstevel@tonic-gate { 608*0Sstevel@tonic-gate /* 609*0Sstevel@tonic-gate * tests a character for membership in an ASCII character class compiled 610*0Sstevel@tonic-gate * by the ASCII version of regcmp(); 611*0Sstevel@tonic-gate * 612*0Sstevel@tonic-gate * NOTE: ASCII versions of regcmp() compile the range a-z in an 613*0Sstevel@tonic-gate * ASCII character class to THRUaz. The internationalized 614*0Sstevel@tonic-gate * version compiles the same range to aTHRUz. 615*0Sstevel@tonic-gate */ 616*0Sstevel@tonic-gate 617*0Sstevel@tonic-gate int nbytes_to_check; 618*0Sstevel@tonic-gate 619*0Sstevel@tonic-gate nbytes_to_check = (int)*classp; 620*0Sstevel@tonic-gate classp++; 621*0Sstevel@tonic-gate nbytes_to_check--; 622*0Sstevel@tonic-gate 623*0Sstevel@tonic-gate while (nbytes_to_check > 0) { 624*0Sstevel@tonic-gate if (test_char == *classp) { 625*0Sstevel@tonic-gate if (test_condition == IN_CLASS) { 626*0Sstevel@tonic-gate return (CONDITION_TRUE); 627*0Sstevel@tonic-gate } else { 628*0Sstevel@tonic-gate return (CONDITION_FALSE); 629*0Sstevel@tonic-gate } 630*0Sstevel@tonic-gate } else if (*classp == THRU) { 631*0Sstevel@tonic-gate if ((*(classp + 1) <= test_char) && 632*0Sstevel@tonic-gate (test_char <= *(classp + 2))) { 633*0Sstevel@tonic-gate if (test_condition == IN_CLASS) { 634*0Sstevel@tonic-gate return (CONDITION_TRUE); 635*0Sstevel@tonic-gate } else { 636*0Sstevel@tonic-gate return (CONDITION_FALSE); 637*0Sstevel@tonic-gate } 638*0Sstevel@tonic-gate } else { 639*0Sstevel@tonic-gate classp += 3; 640*0Sstevel@tonic-gate nbytes_to_check -= 3; 641*0Sstevel@tonic-gate } 642*0Sstevel@tonic-gate } else { 643*0Sstevel@tonic-gate classp++; 644*0Sstevel@tonic-gate nbytes_to_check--; 645*0Sstevel@tonic-gate } 646*0Sstevel@tonic-gate } 647*0Sstevel@tonic-gate if (test_condition == NOT_IN_CLASS) { 648*0Sstevel@tonic-gate return (CONDITION_TRUE); 649*0Sstevel@tonic-gate } else { 650*0Sstevel@tonic-gate return (CONDITION_FALSE); 651*0Sstevel@tonic-gate } 652*0Sstevel@tonic-gate } /* test_char_against_old_ascii_class() */ 653*0Sstevel@tonic-gate 654*0Sstevel@tonic-gate static const char * 655*0Sstevel@tonic-gate test_repeated_ascii_char(const char *repeat_startp, 656*0Sstevel@tonic-gate const char *stringp, 657*0Sstevel@tonic-gate const char *regexp) 658*0Sstevel@tonic-gate { 659*0Sstevel@tonic-gate const char *end_of_matchp; 660*0Sstevel@tonic-gate 661*0Sstevel@tonic-gate end_of_matchp = test_string(stringp, regexp); 662*0Sstevel@tonic-gate while ((end_of_matchp == (char *)0) && 663*0Sstevel@tonic-gate (stringp > repeat_startp)) { 664*0Sstevel@tonic-gate stringp--; 665*0Sstevel@tonic-gate end_of_matchp = test_string(stringp, regexp); 666*0Sstevel@tonic-gate } 667*0Sstevel@tonic-gate return (end_of_matchp); 668*0Sstevel@tonic-gate } 669*0Sstevel@tonic-gate 670*0Sstevel@tonic-gate static const char * 671*0Sstevel@tonic-gate test_repeated_multibyte_char(const char *repeat_startp, 672*0Sstevel@tonic-gate const char *stringp, 673*0Sstevel@tonic-gate const char *regexp) 674*0Sstevel@tonic-gate { 675*0Sstevel@tonic-gate const char *end_of_matchp; 676*0Sstevel@tonic-gate 677*0Sstevel@tonic-gate end_of_matchp = test_string(stringp, regexp); 678*0Sstevel@tonic-gate while ((end_of_matchp == (char *)0) && 679*0Sstevel@tonic-gate (stringp > repeat_startp)) { 680*0Sstevel@tonic-gate stringp = previous_charp(stringp); 681*0Sstevel@tonic-gate end_of_matchp = test_string(stringp, regexp); 682*0Sstevel@tonic-gate } 683*0Sstevel@tonic-gate return (end_of_matchp); 684*0Sstevel@tonic-gate } 685*0Sstevel@tonic-gate 686*0Sstevel@tonic-gate static const char * 687*0Sstevel@tonic-gate test_repeated_group(const char *repeat_startp, 688*0Sstevel@tonic-gate const char *stringp, 689*0Sstevel@tonic-gate const char *regexp) 690*0Sstevel@tonic-gate { 691*0Sstevel@tonic-gate const char *end_of_matchp; 692*0Sstevel@tonic-gate 693*0Sstevel@tonic-gate end_of_matchp = test_string(stringp, regexp); 694*0Sstevel@tonic-gate while ((end_of_matchp == (char *)0) && 695*0Sstevel@tonic-gate (stringp > repeat_startp)) { 696*0Sstevel@tonic-gate stringp = pop_stringp(); 697*0Sstevel@tonic-gate if (stringp == (char *)0) { 698*0Sstevel@tonic-gate return ((char *)0); 699*0Sstevel@tonic-gate } 700*0Sstevel@tonic-gate end_of_matchp = test_string(stringp, regexp); 701*0Sstevel@tonic-gate } 702*0Sstevel@tonic-gate return (end_of_matchp); 703*0Sstevel@tonic-gate } 704*0Sstevel@tonic-gate 705*0Sstevel@tonic-gate static const char * 706*0Sstevel@tonic-gate test_string(const char *stringp, 707*0Sstevel@tonic-gate const char *regexp) 708*0Sstevel@tonic-gate { 709*0Sstevel@tonic-gate /* 710*0Sstevel@tonic-gate * returns a pointer to the first character following the first 711*0Sstevel@tonic-gate * substring of the string addressed by stringp that matches 712*0Sstevel@tonic-gate * the compiled regular expression addressed by regexp 713*0Sstevel@tonic-gate */ 714*0Sstevel@tonic-gate 715*0Sstevel@tonic-gate unsigned int group_length; 716*0Sstevel@tonic-gate int nextra_matches_allowed; 717*0Sstevel@tonic-gate int nmust_match; 718*0Sstevel@tonic-gate wchar_t regex_wchar; 719*0Sstevel@tonic-gate int regex_char_size; 720*0Sstevel@tonic-gate const char *repeat_startp; 721*0Sstevel@tonic-gate unsigned int return_argn; 722*0Sstevel@tonic-gate wchar_t string_wchar; 723*0Sstevel@tonic-gate int string_char_size; 724*0Sstevel@tonic-gate unsigned int substringn; 725*0Sstevel@tonic-gate char_test_condition_t test_condition; 726*0Sstevel@tonic-gate const char *test_stringp; 727*0Sstevel@tonic-gate 728*0Sstevel@tonic-gate for (;;) { 729*0Sstevel@tonic-gate 730*0Sstevel@tonic-gate /* 731*0Sstevel@tonic-gate * Exit the loop via a return whenever there's a match 732*0Sstevel@tonic-gate * or it's clear that there can be no match. 733*0Sstevel@tonic-gate */ 734*0Sstevel@tonic-gate 735*0Sstevel@tonic-gate switch ((int)*regexp) { 736*0Sstevel@tonic-gate 737*0Sstevel@tonic-gate /* 738*0Sstevel@tonic-gate * No fall-through. 739*0Sstevel@tonic-gate * Each case ends with either a return or with stringp 740*0Sstevel@tonic-gate * addressing the next character to be tested and regexp 741*0Sstevel@tonic-gate * addressing the next compiled regular expression 742*0Sstevel@tonic-gate * 743*0Sstevel@tonic-gate * NOTE: The comments for each case give the meaning 744*0Sstevel@tonic-gate * of the compiled regular expression decoded by the case 745*0Sstevel@tonic-gate * and the character string that the compiled regular 746*0Sstevel@tonic-gate * expression uses to encode the case. Each single 747*0Sstevel@tonic-gate * character encoded in the compiled regular expression 748*0Sstevel@tonic-gate * is shown enclosed in angle brackets (<>). Each 749*0Sstevel@tonic-gate * compiled regular expression begins with a marker 750*0Sstevel@tonic-gate * character which is shown as a named constant 751*0Sstevel@tonic-gate * (e.g. <ASCII_CHAR>). Character constants are shown 752*0Sstevel@tonic-gate * enclosed in single quotes (e.g. <'$'>). All other 753*0Sstevel@tonic-gate * single characters encoded in the compiled regular 754*0Sstevel@tonic-gate * expression are shown as lower case variable names 755*0Sstevel@tonic-gate * (e.g. <ascii_char> or <multibyte_char>). Multicharacter 756*0Sstevel@tonic-gate * strings encoded in the compiled regular expression 757*0Sstevel@tonic-gate * are shown as variable names followed by elipses 758*0Sstevel@tonic-gate * (e.g. <compiled_regex...>). 759*0Sstevel@tonic-gate */ 760*0Sstevel@tonic-gate 761*0Sstevel@tonic-gate case ASCII_CHAR: /* single ASCII char */ 762*0Sstevel@tonic-gate 763*0Sstevel@tonic-gate /* encoded as <ASCII_CHAR><ascii_char> */ 764*0Sstevel@tonic-gate 765*0Sstevel@tonic-gate regexp++; 766*0Sstevel@tonic-gate if (*regexp == *stringp) { 767*0Sstevel@tonic-gate regexp++; 768*0Sstevel@tonic-gate stringp++; 769*0Sstevel@tonic-gate } else { 770*0Sstevel@tonic-gate return ((char *)0); 771*0Sstevel@tonic-gate } 772*0Sstevel@tonic-gate break; /* end case ASCII_CHAR */ 773*0Sstevel@tonic-gate 774*0Sstevel@tonic-gate case MULTIBYTE_CHAR: /* single multibyte char */ 775*0Sstevel@tonic-gate 776*0Sstevel@tonic-gate /* encoded as <MULTIBYTE_CHAR><multibyte_char> */ 777*0Sstevel@tonic-gate 778*0Sstevel@tonic-gate regexp++; 779*0Sstevel@tonic-gate regex_char_size = get_wchar(®ex_wchar, regexp); 780*0Sstevel@tonic-gate string_char_size = get_wchar(&string_wchar, stringp); 781*0Sstevel@tonic-gate if ((string_char_size <= 0) || (string_wchar != regex_wchar)) { 782*0Sstevel@tonic-gate return ((char *)0); 783*0Sstevel@tonic-gate } else { 784*0Sstevel@tonic-gate regexp += regex_char_size; 785*0Sstevel@tonic-gate stringp += string_char_size; 786*0Sstevel@tonic-gate } 787*0Sstevel@tonic-gate break; /* end case MULTIBYTE_CHAR */ 788*0Sstevel@tonic-gate 789*0Sstevel@tonic-gate case ANY_CHAR: /* any single ASCII or multibyte char */ 790*0Sstevel@tonic-gate 791*0Sstevel@tonic-gate /* encoded as <ANY_CHAR> */ 792*0Sstevel@tonic-gate 793*0Sstevel@tonic-gate if (!multibyte) { 794*0Sstevel@tonic-gate if (*stringp == '\0') { 795*0Sstevel@tonic-gate return ((char *)0); 796*0Sstevel@tonic-gate } else { 797*0Sstevel@tonic-gate regexp++; 798*0Sstevel@tonic-gate stringp++; 799*0Sstevel@tonic-gate } 800*0Sstevel@tonic-gate } else { 801*0Sstevel@tonic-gate string_char_size = get_wchar(&string_wchar, stringp); 802*0Sstevel@tonic-gate if (string_char_size <= 0) { 803*0Sstevel@tonic-gate return ((char *)0); 804*0Sstevel@tonic-gate } else { 805*0Sstevel@tonic-gate regexp++; 806*0Sstevel@tonic-gate stringp += string_char_size; 807*0Sstevel@tonic-gate } 808*0Sstevel@tonic-gate } 809*0Sstevel@tonic-gate break; /* end case ANY_CHAR */ 810*0Sstevel@tonic-gate 811*0Sstevel@tonic-gate case IN_ASCII_CHAR_CLASS: /* [.....] */ 812*0Sstevel@tonic-gate case NOT_IN_ASCII_CHAR_CLASS: 813*0Sstevel@tonic-gate 814*0Sstevel@tonic-gate /* 815*0Sstevel@tonic-gate * encoded as <IN_ASCII_CHAR_CLASS><class_length><class...> 816*0Sstevel@tonic-gate * or <NOT_IN_ASCII_CHAR_CLASS><class_length><class...> 817*0Sstevel@tonic-gate * 818*0Sstevel@tonic-gate * NOTE: <class_length> includes the <class_length> byte 819*0Sstevel@tonic-gate */ 820*0Sstevel@tonic-gate 821*0Sstevel@tonic-gate if ((int)*regexp == (int)IN_ASCII_CHAR_CLASS) { 822*0Sstevel@tonic-gate test_condition = IN_CLASS; 823*0Sstevel@tonic-gate } else { 824*0Sstevel@tonic-gate test_condition = NOT_IN_CLASS; 825*0Sstevel@tonic-gate } 826*0Sstevel@tonic-gate regexp++; /* point to the <class_length> byte */ 827*0Sstevel@tonic-gate 828*0Sstevel@tonic-gate if ((*stringp != '\0') && 829*0Sstevel@tonic-gate (test_char_against_ascii_class(*stringp, regexp, 830*0Sstevel@tonic-gate test_condition) == CONDITION_TRUE)) { 831*0Sstevel@tonic-gate regexp += (int)*regexp; /* add the class length to regexp */ 832*0Sstevel@tonic-gate stringp++; 833*0Sstevel@tonic-gate } else { 834*0Sstevel@tonic-gate return ((char *)0); 835*0Sstevel@tonic-gate } 836*0Sstevel@tonic-gate break; /* end case IN_ASCII_CHAR_CLASS */ 837*0Sstevel@tonic-gate 838*0Sstevel@tonic-gate case IN_MULTIBYTE_CHAR_CLASS: /* [....] */ 839*0Sstevel@tonic-gate case NOT_IN_MULTIBYTE_CHAR_CLASS: 840*0Sstevel@tonic-gate 841*0Sstevel@tonic-gate /* 842*0Sstevel@tonic-gate * encoded as <IN_MULTIBYTE_CHAR_CLASS><class_length><class...> 843*0Sstevel@tonic-gate * or <NOT_IN_MULTIBYTE_CHAR_CLASS><class_length><class...> 844*0Sstevel@tonic-gate * 845*0Sstevel@tonic-gate * NOTE: <class_length> includes the <class_length> byte 846*0Sstevel@tonic-gate */ 847*0Sstevel@tonic-gate 848*0Sstevel@tonic-gate if ((int)*regexp == (int)IN_MULTIBYTE_CHAR_CLASS) { 849*0Sstevel@tonic-gate test_condition = IN_CLASS; 850*0Sstevel@tonic-gate } else { 851*0Sstevel@tonic-gate test_condition = NOT_IN_CLASS; 852*0Sstevel@tonic-gate } 853*0Sstevel@tonic-gate regexp++; /* point to the <class_length> byte */ 854*0Sstevel@tonic-gate 855*0Sstevel@tonic-gate string_char_size = get_wchar(&string_wchar, stringp); 856*0Sstevel@tonic-gate if ((string_char_size > 0) && 857*0Sstevel@tonic-gate (test_char_against_multibyte_class(string_wchar, regexp, 858*0Sstevel@tonic-gate test_condition) == CONDITION_TRUE)) { 859*0Sstevel@tonic-gate regexp += (int)*regexp; /* add the class length to regexp */ 860*0Sstevel@tonic-gate stringp += string_char_size; 861*0Sstevel@tonic-gate } else { 862*0Sstevel@tonic-gate return ((char *)0); 863*0Sstevel@tonic-gate } 864*0Sstevel@tonic-gate break; /* end case IN_MULTIBYTE_CHAR_CLASS */ 865*0Sstevel@tonic-gate 866*0Sstevel@tonic-gate case IN_OLD_ASCII_CHAR_CLASS: /* [...] */ 867*0Sstevel@tonic-gate case NOT_IN_OLD_ASCII_CHAR_CLASS: 868*0Sstevel@tonic-gate 869*0Sstevel@tonic-gate /* 870*0Sstevel@tonic-gate * encoded as <IN_OLD_ASCII_CHAR_CLASS><class_length><class...> 871*0Sstevel@tonic-gate * or <NOT_IN_OLD_ASCII_CHAR_CLASS><class_length><class...> 872*0Sstevel@tonic-gate * 873*0Sstevel@tonic-gate * NOTE: <class_length> includes the <class_length> byte 874*0Sstevel@tonic-gate */ 875*0Sstevel@tonic-gate 876*0Sstevel@tonic-gate if ((int)*regexp == (int)IN_OLD_ASCII_CHAR_CLASS) { 877*0Sstevel@tonic-gate test_condition = IN_CLASS; 878*0Sstevel@tonic-gate } else { 879*0Sstevel@tonic-gate test_condition = NOT_IN_CLASS; 880*0Sstevel@tonic-gate } 881*0Sstevel@tonic-gate regexp++; /* point to the <class_length> byte */ 882*0Sstevel@tonic-gate 883*0Sstevel@tonic-gate if ((*stringp != '\0') && 884*0Sstevel@tonic-gate (test_char_against_old_ascii_class(*stringp, regexp, 885*0Sstevel@tonic-gate test_condition) == CONDITION_TRUE)) { 886*0Sstevel@tonic-gate regexp += (int)*regexp; /* add the class length to regexp */ 887*0Sstevel@tonic-gate stringp++; 888*0Sstevel@tonic-gate } else { 889*0Sstevel@tonic-gate return ((char *)0); 890*0Sstevel@tonic-gate } 891*0Sstevel@tonic-gate break; /* end case [NOT_]IN_OLD_ASCII_CHAR_CLASS */ 892*0Sstevel@tonic-gate 893*0Sstevel@tonic-gate case SIMPLE_GROUP: /* (.....) */ 894*0Sstevel@tonic-gate 895*0Sstevel@tonic-gate /* encoded as <SIMPLE_GROUP><group_length> */ 896*0Sstevel@tonic-gate 897*0Sstevel@tonic-gate regexp += 2; 898*0Sstevel@tonic-gate break; /* end case SIMPLE_GROUP */ 899*0Sstevel@tonic-gate 900*0Sstevel@tonic-gate case END_GROUP: /* (.....) */ 901*0Sstevel@tonic-gate 902*0Sstevel@tonic-gate /* encoded as <END_GROUP><groupn> */ 903*0Sstevel@tonic-gate 904*0Sstevel@tonic-gate regexp += 2; 905*0Sstevel@tonic-gate break; /* end case END_GROUP */ 906*0Sstevel@tonic-gate 907*0Sstevel@tonic-gate case SAVED_GROUP: /* (.....)$0-9 */ 908*0Sstevel@tonic-gate 909*0Sstevel@tonic-gate /* encoded as <SAVED_GROUP><substringn> */ 910*0Sstevel@tonic-gate 911*0Sstevel@tonic-gate regexp++; 912*0Sstevel@tonic-gate substringn = (unsigned int)*regexp; 913*0Sstevel@tonic-gate if (substringn >= NSUBSTRINGS) 914*0Sstevel@tonic-gate return ((char *)0); 915*0Sstevel@tonic-gate substring_startp[substringn] = stringp; 916*0Sstevel@tonic-gate regexp++; 917*0Sstevel@tonic-gate break; /* end case SAVED_GROUP */ 918*0Sstevel@tonic-gate 919*0Sstevel@tonic-gate case END_SAVED_GROUP: /* (.....)$0-9 */ 920*0Sstevel@tonic-gate 921*0Sstevel@tonic-gate /* 922*0Sstevel@tonic-gate * encoded as <END_SAVED_GROUP><substringn>\ 923*0Sstevel@tonic-gate * <return_arg_number[substringn]> 924*0Sstevel@tonic-gate */ 925*0Sstevel@tonic-gate 926*0Sstevel@tonic-gate regexp++; 927*0Sstevel@tonic-gate substringn = (unsigned int)*regexp; 928*0Sstevel@tonic-gate if (substringn >= NSUBSTRINGS) 929*0Sstevel@tonic-gate return ((char *)0); 930*0Sstevel@tonic-gate substring_endp[substringn] = stringp; 931*0Sstevel@tonic-gate regexp++; 932*0Sstevel@tonic-gate return_argn = (unsigned int)*regexp; 933*0Sstevel@tonic-gate if (return_argn >= NSUBSTRINGS) 934*0Sstevel@tonic-gate return ((char *)0); 935*0Sstevel@tonic-gate return_arg_number[substringn] = return_argn; 936*0Sstevel@tonic-gate regexp++; 937*0Sstevel@tonic-gate break; /* end case END_SAVED_GROUP */ 938*0Sstevel@tonic-gate 939*0Sstevel@tonic-gate case ASCII_CHAR|ZERO_OR_MORE: /* char* */ 940*0Sstevel@tonic-gate 941*0Sstevel@tonic-gate /* encoded as <ASCII_CHAR|ZERO_OR_MORE><ascii_char> */ 942*0Sstevel@tonic-gate 943*0Sstevel@tonic-gate regexp++; 944*0Sstevel@tonic-gate repeat_startp = stringp; 945*0Sstevel@tonic-gate while (*stringp == *regexp) { 946*0Sstevel@tonic-gate stringp++; 947*0Sstevel@tonic-gate } 948*0Sstevel@tonic-gate regexp++; 949*0Sstevel@tonic-gate return (test_repeated_ascii_char(repeat_startp, 950*0Sstevel@tonic-gate stringp, regexp)); 951*0Sstevel@tonic-gate 952*0Sstevel@tonic-gate /* end case ASCII_CHAR|ZERO_OR_MORE */ 953*0Sstevel@tonic-gate 954*0Sstevel@tonic-gate case ASCII_CHAR|ONE_OR_MORE: /* char+ */ 955*0Sstevel@tonic-gate 956*0Sstevel@tonic-gate /* encoded as <ASCII_CHAR|ONE_OR_MORE><ascii_char> */ 957*0Sstevel@tonic-gate 958*0Sstevel@tonic-gate regexp++; 959*0Sstevel@tonic-gate if (*stringp != *regexp) { 960*0Sstevel@tonic-gate return ((char *)0); 961*0Sstevel@tonic-gate } else { 962*0Sstevel@tonic-gate stringp++; 963*0Sstevel@tonic-gate repeat_startp = stringp; 964*0Sstevel@tonic-gate while (*stringp == *regexp) { 965*0Sstevel@tonic-gate stringp++; 966*0Sstevel@tonic-gate } 967*0Sstevel@tonic-gate regexp++; 968*0Sstevel@tonic-gate return (test_repeated_ascii_char(repeat_startp, stringp, 969*0Sstevel@tonic-gate regexp)); 970*0Sstevel@tonic-gate } 971*0Sstevel@tonic-gate /* end case ASCII_CHAR|ONE_OR_MORE */ 972*0Sstevel@tonic-gate 973*0Sstevel@tonic-gate case ASCII_CHAR|COUNT: /* char{min_count,max_count} */ 974*0Sstevel@tonic-gate 975*0Sstevel@tonic-gate /* 976*0Sstevel@tonic-gate * encoded as <ASCII_CHAR|COUNT><ascii_char>\ 977*0Sstevel@tonic-gate * <minimum_match_count><maximum_match_count> 978*0Sstevel@tonic-gate */ 979*0Sstevel@tonic-gate 980*0Sstevel@tonic-gate regexp++; 981*0Sstevel@tonic-gate get_match_counts(&nmust_match, &nextra_matches_allowed, 982*0Sstevel@tonic-gate regexp + 1); 983*0Sstevel@tonic-gate while ((*stringp == *regexp) && (nmust_match > 0)) { 984*0Sstevel@tonic-gate nmust_match--; 985*0Sstevel@tonic-gate stringp++; 986*0Sstevel@tonic-gate } 987*0Sstevel@tonic-gate if (nmust_match > 0) { 988*0Sstevel@tonic-gate return ((char *)0); 989*0Sstevel@tonic-gate } else if (nextra_matches_allowed == UNLIMITED) { 990*0Sstevel@tonic-gate repeat_startp = stringp; 991*0Sstevel@tonic-gate while (*stringp == *regexp) { 992*0Sstevel@tonic-gate stringp++; 993*0Sstevel@tonic-gate } 994*0Sstevel@tonic-gate regexp += 3; 995*0Sstevel@tonic-gate return (test_repeated_ascii_char(repeat_startp, stringp, 996*0Sstevel@tonic-gate regexp)); 997*0Sstevel@tonic-gate } else { 998*0Sstevel@tonic-gate repeat_startp = stringp; 999*0Sstevel@tonic-gate while ((*stringp == *regexp) && 1000*0Sstevel@tonic-gate (nextra_matches_allowed > 0)) { 1001*0Sstevel@tonic-gate nextra_matches_allowed--; 1002*0Sstevel@tonic-gate stringp++; 1003*0Sstevel@tonic-gate } 1004*0Sstevel@tonic-gate regexp += 3; 1005*0Sstevel@tonic-gate return (test_repeated_ascii_char(repeat_startp, stringp, 1006*0Sstevel@tonic-gate regexp)); 1007*0Sstevel@tonic-gate } 1008*0Sstevel@tonic-gate /* end case ASCII_CHAR|COUNT */ 1009*0Sstevel@tonic-gate 1010*0Sstevel@tonic-gate case MULTIBYTE_CHAR|ZERO_OR_MORE: /* char* */ 1011*0Sstevel@tonic-gate 1012*0Sstevel@tonic-gate /* encoded as <MULTIBYTE_CHAR|ZERO_OR_MORE><multibyte_char> */ 1013*0Sstevel@tonic-gate 1014*0Sstevel@tonic-gate regexp++; 1015*0Sstevel@tonic-gate regex_char_size = get_wchar(®ex_wchar, regexp); 1016*0Sstevel@tonic-gate repeat_startp = stringp; 1017*0Sstevel@tonic-gate string_char_size = get_wchar(&string_wchar, stringp); 1018*0Sstevel@tonic-gate while ((string_char_size > 0) && 1019*0Sstevel@tonic-gate (string_wchar == regex_wchar)) { 1020*0Sstevel@tonic-gate stringp += string_char_size; 1021*0Sstevel@tonic-gate string_char_size = get_wchar(&string_wchar, stringp); 1022*0Sstevel@tonic-gate } 1023*0Sstevel@tonic-gate regexp += regex_char_size; 1024*0Sstevel@tonic-gate return (test_repeated_multibyte_char(repeat_startp, stringp, 1025*0Sstevel@tonic-gate regexp)); 1026*0Sstevel@tonic-gate 1027*0Sstevel@tonic-gate /* end case MULTIBYTE_CHAR|ZERO_OR_MORE */ 1028*0Sstevel@tonic-gate 1029*0Sstevel@tonic-gate case MULTIBYTE_CHAR|ONE_OR_MORE: /* char+ */ 1030*0Sstevel@tonic-gate 1031*0Sstevel@tonic-gate /* encoded as <MULTIBYTE_CHAR|ONE_OR_MORE><multibyte_char> */ 1032*0Sstevel@tonic-gate 1033*0Sstevel@tonic-gate regexp++; 1034*0Sstevel@tonic-gate regex_char_size = get_wchar(®ex_wchar, regexp); 1035*0Sstevel@tonic-gate string_char_size = get_wchar(&string_wchar, stringp); 1036*0Sstevel@tonic-gate if ((string_char_size <= 0) || (string_wchar != regex_wchar)) { 1037*0Sstevel@tonic-gate return ((char *)0); 1038*0Sstevel@tonic-gate } else { 1039*0Sstevel@tonic-gate stringp += string_char_size; 1040*0Sstevel@tonic-gate repeat_startp = stringp; 1041*0Sstevel@tonic-gate string_char_size = get_wchar(&string_wchar, stringp); 1042*0Sstevel@tonic-gate while ((string_char_size > 0) && 1043*0Sstevel@tonic-gate (string_wchar == regex_wchar)) { 1044*0Sstevel@tonic-gate stringp += string_char_size; 1045*0Sstevel@tonic-gate string_char_size = get_wchar(&string_wchar, stringp); 1046*0Sstevel@tonic-gate } 1047*0Sstevel@tonic-gate regexp += regex_char_size; 1048*0Sstevel@tonic-gate return (test_repeated_multibyte_char(repeat_startp, stringp, 1049*0Sstevel@tonic-gate regexp)); 1050*0Sstevel@tonic-gate } 1051*0Sstevel@tonic-gate /* end case MULTIBYTE_CHAR|ONE_OR_MORE */ 1052*0Sstevel@tonic-gate 1053*0Sstevel@tonic-gate case MULTIBYTE_CHAR|COUNT: /* char{min_count,max_count} */ 1054*0Sstevel@tonic-gate 1055*0Sstevel@tonic-gate /* 1056*0Sstevel@tonic-gate * encoded as <MULTIBYTE_CHAR|COUNT><multibyte_char>\ 1057*0Sstevel@tonic-gate * <minimum_match_count><maximum_match_count> 1058*0Sstevel@tonic-gate */ 1059*0Sstevel@tonic-gate 1060*0Sstevel@tonic-gate regexp++; 1061*0Sstevel@tonic-gate regex_char_size = get_wchar(®ex_wchar, regexp); 1062*0Sstevel@tonic-gate get_match_counts(&nmust_match, &nextra_matches_allowed, 1063*0Sstevel@tonic-gate regexp + regex_char_size); 1064*0Sstevel@tonic-gate string_char_size = get_wchar(&string_wchar, stringp); 1065*0Sstevel@tonic-gate while ((string_char_size > 0) && 1066*0Sstevel@tonic-gate (string_wchar == regex_wchar) && 1067*0Sstevel@tonic-gate (nmust_match > 0)) { 1068*0Sstevel@tonic-gate 1069*0Sstevel@tonic-gate nmust_match--; 1070*0Sstevel@tonic-gate stringp += string_char_size; 1071*0Sstevel@tonic-gate string_char_size = get_wchar(&string_wchar, stringp); 1072*0Sstevel@tonic-gate } 1073*0Sstevel@tonic-gate if (nmust_match > 0) { 1074*0Sstevel@tonic-gate return ((char *)0); 1075*0Sstevel@tonic-gate } else if (nextra_matches_allowed == UNLIMITED) { 1076*0Sstevel@tonic-gate repeat_startp = stringp; 1077*0Sstevel@tonic-gate while ((string_char_size > 0) && 1078*0Sstevel@tonic-gate (string_wchar == regex_wchar)) { 1079*0Sstevel@tonic-gate stringp += string_char_size; 1080*0Sstevel@tonic-gate string_char_size = get_wchar(&string_wchar, stringp); 1081*0Sstevel@tonic-gate } 1082*0Sstevel@tonic-gate regexp += regex_char_size + 2; 1083*0Sstevel@tonic-gate return (test_repeated_multibyte_char(repeat_startp, stringp, 1084*0Sstevel@tonic-gate regexp)); 1085*0Sstevel@tonic-gate } else { 1086*0Sstevel@tonic-gate repeat_startp = stringp; 1087*0Sstevel@tonic-gate while ((string_char_size > 0) && 1088*0Sstevel@tonic-gate (string_wchar == regex_wchar) && 1089*0Sstevel@tonic-gate (nextra_matches_allowed > 0)) { 1090*0Sstevel@tonic-gate nextra_matches_allowed--; 1091*0Sstevel@tonic-gate stringp += string_char_size; 1092*0Sstevel@tonic-gate string_char_size = get_wchar(&string_wchar, stringp); 1093*0Sstevel@tonic-gate } 1094*0Sstevel@tonic-gate regexp += regex_char_size + 2; 1095*0Sstevel@tonic-gate return (test_repeated_multibyte_char(repeat_startp, stringp, 1096*0Sstevel@tonic-gate regexp)); 1097*0Sstevel@tonic-gate } 1098*0Sstevel@tonic-gate /* end case MULTIBYTE_CHAR|COUNT */ 1099*0Sstevel@tonic-gate 1100*0Sstevel@tonic-gate case ANY_CHAR|ZERO_OR_MORE: /* .* */ 1101*0Sstevel@tonic-gate 1102*0Sstevel@tonic-gate /* encoded as <ANY_CHAR|ZERO_OR_MORE> */ 1103*0Sstevel@tonic-gate 1104*0Sstevel@tonic-gate repeat_startp = stringp; 1105*0Sstevel@tonic-gate if (!multibyte) { 1106*0Sstevel@tonic-gate while (*stringp != '\0') { 1107*0Sstevel@tonic-gate stringp++; 1108*0Sstevel@tonic-gate } 1109*0Sstevel@tonic-gate regexp++; 1110*0Sstevel@tonic-gate return (test_repeated_ascii_char(repeat_startp, stringp, 1111*0Sstevel@tonic-gate regexp)); 1112*0Sstevel@tonic-gate } else { 1113*0Sstevel@tonic-gate string_char_size = get_wchar(&string_wchar, stringp); 1114*0Sstevel@tonic-gate while (string_char_size > 0) { 1115*0Sstevel@tonic-gate stringp += string_char_size; 1116*0Sstevel@tonic-gate string_char_size = get_wchar(&string_wchar, stringp); 1117*0Sstevel@tonic-gate } 1118*0Sstevel@tonic-gate regexp++; 1119*0Sstevel@tonic-gate return (test_repeated_multibyte_char(repeat_startp, stringp, 1120*0Sstevel@tonic-gate regexp)); 1121*0Sstevel@tonic-gate } 1122*0Sstevel@tonic-gate /* end case <ANY_CHAR|ZERO_OR_MORE> */ 1123*0Sstevel@tonic-gate 1124*0Sstevel@tonic-gate case ANY_CHAR|ONE_OR_MORE: /* .+ */ 1125*0Sstevel@tonic-gate 1126*0Sstevel@tonic-gate /* encoded as <ANY_CHAR|ONE_OR_MORE> */ 1127*0Sstevel@tonic-gate 1128*0Sstevel@tonic-gate if (!multibyte) { 1129*0Sstevel@tonic-gate if (*stringp == '\0') { 1130*0Sstevel@tonic-gate return ((char *)0); 1131*0Sstevel@tonic-gate } else { 1132*0Sstevel@tonic-gate stringp++; 1133*0Sstevel@tonic-gate repeat_startp = stringp; 1134*0Sstevel@tonic-gate while (*stringp != '\0') { 1135*0Sstevel@tonic-gate stringp++; 1136*0Sstevel@tonic-gate } 1137*0Sstevel@tonic-gate regexp++; 1138*0Sstevel@tonic-gate return (test_repeated_ascii_char(repeat_startp, stringp, 1139*0Sstevel@tonic-gate regexp)); 1140*0Sstevel@tonic-gate } 1141*0Sstevel@tonic-gate } else { 1142*0Sstevel@tonic-gate string_char_size = get_wchar(&string_wchar, stringp); 1143*0Sstevel@tonic-gate if (string_char_size <= 0) { 1144*0Sstevel@tonic-gate return ((char *)0); 1145*0Sstevel@tonic-gate } else { 1146*0Sstevel@tonic-gate stringp += string_char_size; 1147*0Sstevel@tonic-gate repeat_startp = stringp; 1148*0Sstevel@tonic-gate string_char_size = get_wchar(&string_wchar, stringp); 1149*0Sstevel@tonic-gate while (string_char_size > 0) { 1150*0Sstevel@tonic-gate stringp += string_char_size; 1151*0Sstevel@tonic-gate string_char_size = 1152*0Sstevel@tonic-gate get_wchar(&string_wchar, stringp); 1153*0Sstevel@tonic-gate } 1154*0Sstevel@tonic-gate regexp++; 1155*0Sstevel@tonic-gate return (test_repeated_multibyte_char(repeat_startp, 1156*0Sstevel@tonic-gate stringp, regexp)); 1157*0Sstevel@tonic-gate } 1158*0Sstevel@tonic-gate } 1159*0Sstevel@tonic-gate /* end case <ANY_CHAR|ONE_OR_MORE> */ 1160*0Sstevel@tonic-gate 1161*0Sstevel@tonic-gate case ANY_CHAR|COUNT: /* .{min_count,max_count} */ 1162*0Sstevel@tonic-gate 1163*0Sstevel@tonic-gate /* 1164*0Sstevel@tonic-gate * encoded as <ANY_CHAR|COUNT>\ 1165*0Sstevel@tonic-gate * <minimum_match_count><maximum_match_count> 1166*0Sstevel@tonic-gate */ 1167*0Sstevel@tonic-gate 1168*0Sstevel@tonic-gate get_match_counts(&nmust_match, &nextra_matches_allowed, 1169*0Sstevel@tonic-gate regexp + 1); 1170*0Sstevel@tonic-gate if (!multibyte) { 1171*0Sstevel@tonic-gate while ((*stringp != '\0') && (nmust_match > 0)) { 1172*0Sstevel@tonic-gate nmust_match--; 1173*0Sstevel@tonic-gate stringp++; 1174*0Sstevel@tonic-gate } 1175*0Sstevel@tonic-gate if (nmust_match > 0) { 1176*0Sstevel@tonic-gate return ((char *)0); 1177*0Sstevel@tonic-gate } else if (nextra_matches_allowed == UNLIMITED) { 1178*0Sstevel@tonic-gate repeat_startp = stringp; 1179*0Sstevel@tonic-gate while (*stringp != '\0') { 1180*0Sstevel@tonic-gate stringp++; 1181*0Sstevel@tonic-gate } 1182*0Sstevel@tonic-gate regexp += 3; 1183*0Sstevel@tonic-gate return (test_repeated_ascii_char(repeat_startp, stringp, 1184*0Sstevel@tonic-gate regexp)); 1185*0Sstevel@tonic-gate } else { 1186*0Sstevel@tonic-gate repeat_startp = stringp; 1187*0Sstevel@tonic-gate while ((*stringp != '\0') && 1188*0Sstevel@tonic-gate (nextra_matches_allowed > 0)) { 1189*0Sstevel@tonic-gate nextra_matches_allowed--; 1190*0Sstevel@tonic-gate stringp++; 1191*0Sstevel@tonic-gate } 1192*0Sstevel@tonic-gate regexp += 3; 1193*0Sstevel@tonic-gate return (test_repeated_ascii_char(repeat_startp, stringp, 1194*0Sstevel@tonic-gate regexp)); 1195*0Sstevel@tonic-gate } 1196*0Sstevel@tonic-gate } else { /* multibyte character */ 1197*0Sstevel@tonic-gate 1198*0Sstevel@tonic-gate string_char_size = get_wchar(&string_wchar, stringp); 1199*0Sstevel@tonic-gate while ((string_char_size > 0) && (nmust_match > 0)) { 1200*0Sstevel@tonic-gate nmust_match--; 1201*0Sstevel@tonic-gate stringp += string_char_size; 1202*0Sstevel@tonic-gate string_char_size = get_wchar(&string_wchar, stringp); 1203*0Sstevel@tonic-gate } 1204*0Sstevel@tonic-gate if (nmust_match > 0) { 1205*0Sstevel@tonic-gate return ((char *)0); 1206*0Sstevel@tonic-gate } else if (nextra_matches_allowed == UNLIMITED) { 1207*0Sstevel@tonic-gate repeat_startp = stringp; 1208*0Sstevel@tonic-gate while (string_char_size > 0) { 1209*0Sstevel@tonic-gate stringp += string_char_size; 1210*0Sstevel@tonic-gate string_char_size = 1211*0Sstevel@tonic-gate get_wchar(&string_wchar, stringp); 1212*0Sstevel@tonic-gate } 1213*0Sstevel@tonic-gate regexp += 3; 1214*0Sstevel@tonic-gate return (test_repeated_multibyte_char(repeat_startp, 1215*0Sstevel@tonic-gate stringp, regexp)); 1216*0Sstevel@tonic-gate } else { 1217*0Sstevel@tonic-gate repeat_startp = stringp; 1218*0Sstevel@tonic-gate while ((string_char_size > 0) && 1219*0Sstevel@tonic-gate (nextra_matches_allowed > 0)) { 1220*0Sstevel@tonic-gate nextra_matches_allowed--; 1221*0Sstevel@tonic-gate stringp += string_char_size; 1222*0Sstevel@tonic-gate string_char_size = 1223*0Sstevel@tonic-gate get_wchar(&string_wchar, stringp); 1224*0Sstevel@tonic-gate } 1225*0Sstevel@tonic-gate regexp += 3; 1226*0Sstevel@tonic-gate return (test_repeated_multibyte_char(repeat_startp, 1227*0Sstevel@tonic-gate stringp, regexp)); 1228*0Sstevel@tonic-gate } 1229*0Sstevel@tonic-gate } /* end case ANY_CHAR|COUNT */ 1230*0Sstevel@tonic-gate 1231*0Sstevel@tonic-gate case IN_ASCII_CHAR_CLASS|ZERO_OR_MORE: /* [.....]* */ 1232*0Sstevel@tonic-gate case NOT_IN_ASCII_CHAR_CLASS|ZERO_OR_MORE: 1233*0Sstevel@tonic-gate 1234*0Sstevel@tonic-gate /* 1235*0Sstevel@tonic-gate * encoded as <IN_ASCII_CHAR_CLASS|ZERO_OR_MORE>\ 1236*0Sstevel@tonic-gate * <class_length><class ...> 1237*0Sstevel@tonic-gate * or <NOT_IN_ASCII_CHAR_CLASS|ZERO_OR_MORE>\ 1238*0Sstevel@tonic-gate * <class_length><class ...> 1239*0Sstevel@tonic-gate * 1240*0Sstevel@tonic-gate * NOTE: <class_length> includes the <class_length> byte 1241*0Sstevel@tonic-gate */ 1242*0Sstevel@tonic-gate 1243*0Sstevel@tonic-gate if ((int)*regexp == (int)(IN_ASCII_CHAR_CLASS|ZERO_OR_MORE)) { 1244*0Sstevel@tonic-gate test_condition = IN_CLASS; 1245*0Sstevel@tonic-gate } else { 1246*0Sstevel@tonic-gate test_condition = NOT_IN_CLASS; 1247*0Sstevel@tonic-gate } 1248*0Sstevel@tonic-gate regexp++; /* point to the <class_length> byte */ 1249*0Sstevel@tonic-gate 1250*0Sstevel@tonic-gate repeat_startp = stringp; 1251*0Sstevel@tonic-gate while ((*stringp != '\0') && 1252*0Sstevel@tonic-gate (test_char_against_ascii_class(*stringp, regexp, 1253*0Sstevel@tonic-gate test_condition) == CONDITION_TRUE)) { 1254*0Sstevel@tonic-gate stringp++; 1255*0Sstevel@tonic-gate } 1256*0Sstevel@tonic-gate regexp += (int)*regexp; /* add the class length to regexp */ 1257*0Sstevel@tonic-gate return (test_repeated_ascii_char(repeat_startp, stringp, 1258*0Sstevel@tonic-gate regexp)); 1259*0Sstevel@tonic-gate 1260*0Sstevel@tonic-gate /* end case IN_ASCII_CHAR_CLASS|ZERO_OR_MORE */ 1261*0Sstevel@tonic-gate 1262*0Sstevel@tonic-gate case IN_ASCII_CHAR_CLASS|ONE_OR_MORE: /* [.....]+ */ 1263*0Sstevel@tonic-gate case NOT_IN_ASCII_CHAR_CLASS|ONE_OR_MORE: 1264*0Sstevel@tonic-gate 1265*0Sstevel@tonic-gate /* 1266*0Sstevel@tonic-gate * encoded as <IN_ASCII_CHAR_CLASS|ONE_OR_MORE>\ 1267*0Sstevel@tonic-gate * <class_length><class ...> 1268*0Sstevel@tonic-gate * or <NOT_IN_ASCII_CHAR_CLASS|ONE_OR_MORE>\ 1269*0Sstevel@tonic-gate * <class_length><class ...> 1270*0Sstevel@tonic-gate * 1271*0Sstevel@tonic-gate * NOTE: <class_length> includes the <class_length> byte 1272*0Sstevel@tonic-gate */ 1273*0Sstevel@tonic-gate 1274*0Sstevel@tonic-gate if ((int)*regexp == (int)(IN_ASCII_CHAR_CLASS|ONE_OR_MORE)) { 1275*0Sstevel@tonic-gate test_condition = IN_CLASS; 1276*0Sstevel@tonic-gate } else { 1277*0Sstevel@tonic-gate test_condition = NOT_IN_CLASS; 1278*0Sstevel@tonic-gate } 1279*0Sstevel@tonic-gate regexp++; /* point to the <class_length> byte */ 1280*0Sstevel@tonic-gate 1281*0Sstevel@tonic-gate if ((*stringp == '\0') || 1282*0Sstevel@tonic-gate (test_char_against_ascii_class(*stringp, regexp, 1283*0Sstevel@tonic-gate test_condition) != CONDITION_TRUE)) { 1284*0Sstevel@tonic-gate return ((char *)0); 1285*0Sstevel@tonic-gate } else { 1286*0Sstevel@tonic-gate stringp++; 1287*0Sstevel@tonic-gate repeat_startp = stringp; 1288*0Sstevel@tonic-gate while ((*stringp != '\0') && 1289*0Sstevel@tonic-gate (test_char_against_ascii_class(*stringp, regexp, 1290*0Sstevel@tonic-gate test_condition) == CONDITION_TRUE)) { 1291*0Sstevel@tonic-gate stringp++; 1292*0Sstevel@tonic-gate } 1293*0Sstevel@tonic-gate regexp += (int)*regexp; /* add the class length to regexp */ 1294*0Sstevel@tonic-gate return (test_repeated_ascii_char(repeat_startp, stringp, 1295*0Sstevel@tonic-gate regexp)); 1296*0Sstevel@tonic-gate } 1297*0Sstevel@tonic-gate /* end case IN_ASCII_CHAR_CLASS|ONE_OR_MORE */ 1298*0Sstevel@tonic-gate 1299*0Sstevel@tonic-gate case IN_ASCII_CHAR_CLASS | COUNT: /* [.....]{max_count,min_count} */ 1300*0Sstevel@tonic-gate case NOT_IN_ASCII_CHAR_CLASS | COUNT: 1301*0Sstevel@tonic-gate 1302*0Sstevel@tonic-gate /* 1303*0Sstevel@tonic-gate * endoded as <IN_ASCII_CHAR_CLASS|COUNT><class_length>\ 1304*0Sstevel@tonic-gate * <class ...><minimum_match_count>\ 1305*0Sstevel@tonic-gate * <maximum_match_count> 1306*0Sstevel@tonic-gate * or <NOT_IN_ASCII_CHAR_CLASS|COUNT><class_length>\ 1307*0Sstevel@tonic-gate * <class ...><minimum_match_count>\ 1308*0Sstevel@tonic-gate * <maximum_match_count> 1309*0Sstevel@tonic-gate * 1310*0Sstevel@tonic-gate * NOTE: <class_length> includes the <class_length> byte, 1311*0Sstevel@tonic-gate * but not the <minimum_match_count> or 1312*0Sstevel@tonic-gate * <maximum_match_count> bytes 1313*0Sstevel@tonic-gate */ 1314*0Sstevel@tonic-gate 1315*0Sstevel@tonic-gate if ((int)*regexp == (int)(IN_ASCII_CHAR_CLASS|COUNT)) { 1316*0Sstevel@tonic-gate test_condition = IN_CLASS; 1317*0Sstevel@tonic-gate } else { 1318*0Sstevel@tonic-gate test_condition = NOT_IN_CLASS; 1319*0Sstevel@tonic-gate } 1320*0Sstevel@tonic-gate regexp++; /* point to the <class_length> byte */ 1321*0Sstevel@tonic-gate 1322*0Sstevel@tonic-gate get_match_counts(&nmust_match, &nextra_matches_allowed, 1323*0Sstevel@tonic-gate regexp + (int)*regexp); 1324*0Sstevel@tonic-gate while ((*stringp != '\0') && 1325*0Sstevel@tonic-gate (test_char_against_ascii_class(*stringp, regexp, 1326*0Sstevel@tonic-gate test_condition) == CONDITION_TRUE) && 1327*0Sstevel@tonic-gate (nmust_match > 0)) { 1328*0Sstevel@tonic-gate nmust_match--; 1329*0Sstevel@tonic-gate stringp++; 1330*0Sstevel@tonic-gate } 1331*0Sstevel@tonic-gate if (nmust_match > 0) { 1332*0Sstevel@tonic-gate return ((char *)0); 1333*0Sstevel@tonic-gate } else if (nextra_matches_allowed == UNLIMITED) { 1334*0Sstevel@tonic-gate repeat_startp = stringp; 1335*0Sstevel@tonic-gate while ((*stringp != '\0') && 1336*0Sstevel@tonic-gate (test_char_against_ascii_class(*stringp, regexp, 1337*0Sstevel@tonic-gate test_condition) == CONDITION_TRUE)) { 1338*0Sstevel@tonic-gate stringp++; 1339*0Sstevel@tonic-gate } 1340*0Sstevel@tonic-gate regexp += (int)*regexp + 2; 1341*0Sstevel@tonic-gate return (test_repeated_ascii_char(repeat_startp, stringp, 1342*0Sstevel@tonic-gate regexp)); 1343*0Sstevel@tonic-gate } else { 1344*0Sstevel@tonic-gate repeat_startp = stringp; 1345*0Sstevel@tonic-gate while ((*stringp != '\0') && 1346*0Sstevel@tonic-gate (test_char_against_ascii_class(*stringp, regexp, 1347*0Sstevel@tonic-gate test_condition) == CONDITION_TRUE) && 1348*0Sstevel@tonic-gate (nextra_matches_allowed > 0)) { 1349*0Sstevel@tonic-gate nextra_matches_allowed--; 1350*0Sstevel@tonic-gate stringp++; 1351*0Sstevel@tonic-gate } 1352*0Sstevel@tonic-gate regexp += (int)*regexp + 2; 1353*0Sstevel@tonic-gate return (test_repeated_ascii_char(repeat_startp, stringp, 1354*0Sstevel@tonic-gate regexp)); 1355*0Sstevel@tonic-gate } 1356*0Sstevel@tonic-gate /* end case IN_ASCII_CHAR_CLASS|COUNT */ 1357*0Sstevel@tonic-gate 1358*0Sstevel@tonic-gate case IN_MULTIBYTE_CHAR_CLASS|ZERO_OR_MORE: /* [.....]* */ 1359*0Sstevel@tonic-gate case NOT_IN_MULTIBYTE_CHAR_CLASS|ZERO_OR_MORE: 1360*0Sstevel@tonic-gate 1361*0Sstevel@tonic-gate /* 1362*0Sstevel@tonic-gate * encoded as <IN_MULTIBYTE_CHAR_CLASS|ZERO_OR_MORE>\ 1363*0Sstevel@tonic-gate * <class_length><class ...> 1364*0Sstevel@tonic-gate * or <NOT_IN_MULTIBYTE_CHAR_CLASS|ZERO_OR_MORE>\ 1365*0Sstevel@tonic-gate * <class_length><class ...> 1366*0Sstevel@tonic-gate * 1367*0Sstevel@tonic-gate * NOTE: <class_length> includes the <class_length> byte 1368*0Sstevel@tonic-gate */ 1369*0Sstevel@tonic-gate 1370*0Sstevel@tonic-gate if ((int)*regexp == 1371*0Sstevel@tonic-gate (int)(IN_MULTIBYTE_CHAR_CLASS|ZERO_OR_MORE)) { 1372*0Sstevel@tonic-gate test_condition = IN_CLASS; 1373*0Sstevel@tonic-gate } else { 1374*0Sstevel@tonic-gate test_condition = NOT_IN_CLASS; 1375*0Sstevel@tonic-gate } 1376*0Sstevel@tonic-gate regexp++; /* point to the <class_length> byte */ 1377*0Sstevel@tonic-gate 1378*0Sstevel@tonic-gate repeat_startp = stringp; 1379*0Sstevel@tonic-gate string_char_size = get_wchar(&string_wchar, stringp); 1380*0Sstevel@tonic-gate while ((string_char_size > 0) && 1381*0Sstevel@tonic-gate (test_char_against_multibyte_class(string_wchar, regexp, 1382*0Sstevel@tonic-gate test_condition) == CONDITION_TRUE)) { 1383*0Sstevel@tonic-gate stringp += string_char_size; 1384*0Sstevel@tonic-gate string_char_size = get_wchar(&string_wchar, stringp); 1385*0Sstevel@tonic-gate } 1386*0Sstevel@tonic-gate regexp += (int)*regexp; /* add the class length to regexp */ 1387*0Sstevel@tonic-gate return (test_repeated_multibyte_char(repeat_startp, stringp, 1388*0Sstevel@tonic-gate regexp)); 1389*0Sstevel@tonic-gate 1390*0Sstevel@tonic-gate /* end case IN_MULTIBYTE_CHAR_CLASS|ZERO_OR_MORE */ 1391*0Sstevel@tonic-gate 1392*0Sstevel@tonic-gate case IN_MULTIBYTE_CHAR_CLASS|ONE_OR_MORE: /* [.....]+ */ 1393*0Sstevel@tonic-gate case NOT_IN_MULTIBYTE_CHAR_CLASS|ONE_OR_MORE: 1394*0Sstevel@tonic-gate 1395*0Sstevel@tonic-gate /* 1396*0Sstevel@tonic-gate * encoded as <IN_MULTIBYTE_CHAR_CLASS|ONE_OR_MORE>\ 1397*0Sstevel@tonic-gate * <class_length><class ...> 1398*0Sstevel@tonic-gate * or <NOT_IN_MULTIBYTE_CHAR_CLASS|ONE_OR_MORE>\ 1399*0Sstevel@tonic-gate * <class_length><class ...> 1400*0Sstevel@tonic-gate * 1401*0Sstevel@tonic-gate * NOTE: <class_length> includes the <class_length> byte 1402*0Sstevel@tonic-gate */ 1403*0Sstevel@tonic-gate 1404*0Sstevel@tonic-gate if ((int)*regexp == 1405*0Sstevel@tonic-gate (int)(IN_MULTIBYTE_CHAR_CLASS|ONE_OR_MORE)) { 1406*0Sstevel@tonic-gate test_condition = IN_CLASS; 1407*0Sstevel@tonic-gate } else { 1408*0Sstevel@tonic-gate test_condition = NOT_IN_CLASS; 1409*0Sstevel@tonic-gate } 1410*0Sstevel@tonic-gate regexp++; /* point to the <class_length> byte */ 1411*0Sstevel@tonic-gate 1412*0Sstevel@tonic-gate string_char_size = get_wchar(&string_wchar, stringp); 1413*0Sstevel@tonic-gate if ((string_char_size <= 0) || 1414*0Sstevel@tonic-gate (test_char_against_multibyte_class(string_wchar, regexp, 1415*0Sstevel@tonic-gate test_condition) != CONDITION_TRUE)) { 1416*0Sstevel@tonic-gate return ((char *)0); 1417*0Sstevel@tonic-gate } else { 1418*0Sstevel@tonic-gate stringp += string_char_size; 1419*0Sstevel@tonic-gate repeat_startp = stringp; 1420*0Sstevel@tonic-gate string_char_size = get_wchar(&string_wchar, stringp); 1421*0Sstevel@tonic-gate while ((string_char_size > 0) && 1422*0Sstevel@tonic-gate (test_char_against_multibyte_class(string_wchar, 1423*0Sstevel@tonic-gate regexp, test_condition) == CONDITION_TRUE)) { 1424*0Sstevel@tonic-gate stringp += string_char_size; 1425*0Sstevel@tonic-gate string_char_size = get_wchar(&string_wchar, stringp); 1426*0Sstevel@tonic-gate } 1427*0Sstevel@tonic-gate regexp += (int)*regexp; /* add the class length to regexp */ 1428*0Sstevel@tonic-gate return (test_repeated_multibyte_char(repeat_startp, stringp, 1429*0Sstevel@tonic-gate regexp)); 1430*0Sstevel@tonic-gate } 1431*0Sstevel@tonic-gate /* end case IN_MULTIBYTE_CHAR_CLASS|ONE_OR_MORE */ 1432*0Sstevel@tonic-gate 1433*0Sstevel@tonic-gate case IN_MULTIBYTE_CHAR_CLASS|COUNT: /* [...]{min_count,max_count} */ 1434*0Sstevel@tonic-gate case NOT_IN_MULTIBYTE_CHAR_CLASS|COUNT: 1435*0Sstevel@tonic-gate 1436*0Sstevel@tonic-gate /* 1437*0Sstevel@tonic-gate * encoded as <IN_MULTIBYTE_CHAR_CLASS|COUNT>\ 1438*0Sstevel@tonic-gate * <class_length><class ...><min_count><max_count> 1439*0Sstevel@tonic-gate * or <NOT_IN_MULTIBYTE_CHAR_CLASS|COUNT>\ 1440*0Sstevel@tonic-gate * <class_length><class ...><min_count><max_count> 1441*0Sstevel@tonic-gate * 1442*0Sstevel@tonic-gate * NOTE: <class_length> includes the <class_length> byte 1443*0Sstevel@tonic-gate * but not the <minimum_match_count> or 1444*0Sstevel@tonic-gate * <maximum_match_count> bytes 1445*0Sstevel@tonic-gate */ 1446*0Sstevel@tonic-gate 1447*0Sstevel@tonic-gate if ((int)*regexp == (int)(IN_MULTIBYTE_CHAR_CLASS|COUNT)) { 1448*0Sstevel@tonic-gate test_condition = IN_CLASS; 1449*0Sstevel@tonic-gate } else { 1450*0Sstevel@tonic-gate test_condition = NOT_IN_CLASS; 1451*0Sstevel@tonic-gate } 1452*0Sstevel@tonic-gate regexp++; /* point to the <class_length> byte */ 1453*0Sstevel@tonic-gate 1454*0Sstevel@tonic-gate get_match_counts(&nmust_match, &nextra_matches_allowed, 1455*0Sstevel@tonic-gate regexp + (int)*regexp); 1456*0Sstevel@tonic-gate string_char_size = get_wchar(&string_wchar, stringp); 1457*0Sstevel@tonic-gate while ((string_char_size > 0) && 1458*0Sstevel@tonic-gate (test_char_against_multibyte_class(string_wchar, regexp, 1459*0Sstevel@tonic-gate test_condition) == CONDITION_TRUE) && 1460*0Sstevel@tonic-gate (nmust_match > 0)) { 1461*0Sstevel@tonic-gate nmust_match--; 1462*0Sstevel@tonic-gate stringp += string_char_size; 1463*0Sstevel@tonic-gate string_char_size = get_wchar(&string_wchar, stringp); 1464*0Sstevel@tonic-gate } 1465*0Sstevel@tonic-gate if (nmust_match > 0) { 1466*0Sstevel@tonic-gate return ((char *)0); 1467*0Sstevel@tonic-gate } else if (nextra_matches_allowed == UNLIMITED) { 1468*0Sstevel@tonic-gate repeat_startp = stringp; 1469*0Sstevel@tonic-gate while ((string_char_size > 0) && 1470*0Sstevel@tonic-gate (test_char_against_multibyte_class(string_wchar, 1471*0Sstevel@tonic-gate regexp, test_condition) == CONDITION_TRUE)) { 1472*0Sstevel@tonic-gate stringp += string_char_size; 1473*0Sstevel@tonic-gate string_char_size = get_wchar(&string_wchar, stringp); 1474*0Sstevel@tonic-gate } 1475*0Sstevel@tonic-gate regexp += (int)*regexp + 2; 1476*0Sstevel@tonic-gate return (test_repeated_multibyte_char(repeat_startp, stringp, 1477*0Sstevel@tonic-gate regexp)); 1478*0Sstevel@tonic-gate } else { 1479*0Sstevel@tonic-gate repeat_startp = stringp; 1480*0Sstevel@tonic-gate while ((string_char_size > 0) && 1481*0Sstevel@tonic-gate (test_char_against_multibyte_class(string_wchar, 1482*0Sstevel@tonic-gate regexp, test_condition) == CONDITION_TRUE) && 1483*0Sstevel@tonic-gate (nextra_matches_allowed > 0)) { 1484*0Sstevel@tonic-gate nextra_matches_allowed--; 1485*0Sstevel@tonic-gate stringp += string_char_size; 1486*0Sstevel@tonic-gate string_char_size = get_wchar(&string_wchar, stringp); 1487*0Sstevel@tonic-gate } 1488*0Sstevel@tonic-gate regexp += (int)*regexp + 2; 1489*0Sstevel@tonic-gate return (test_repeated_multibyte_char(repeat_startp, stringp, 1490*0Sstevel@tonic-gate regexp)); 1491*0Sstevel@tonic-gate } 1492*0Sstevel@tonic-gate /* end case IN_MULTIBYTE_CHAR_CLASS|COUNT */ 1493*0Sstevel@tonic-gate 1494*0Sstevel@tonic-gate case IN_OLD_ASCII_CHAR_CLASS|ZERO_OR_MORE: /* [.....]* */ 1495*0Sstevel@tonic-gate case NOT_IN_OLD_ASCII_CHAR_CLASS|ZERO_OR_MORE: 1496*0Sstevel@tonic-gate 1497*0Sstevel@tonic-gate /* 1498*0Sstevel@tonic-gate * encoded as <IN_OLD_ASCII_CHAR_CLASS|ZERO_OR_MORE>\ 1499*0Sstevel@tonic-gate * <class_length><class ...> 1500*0Sstevel@tonic-gate * or <NOT_IN_OLD_ASCII_CHAR_CLASS|ZERO_OR_MORE>\ 1501*0Sstevel@tonic-gate * <class_length><class ...> 1502*0Sstevel@tonic-gate * 1503*0Sstevel@tonic-gate * NOTE: <class_length> includes the <class_length> byte 1504*0Sstevel@tonic-gate */ 1505*0Sstevel@tonic-gate 1506*0Sstevel@tonic-gate if ((int)*regexp == 1507*0Sstevel@tonic-gate (int)(IN_OLD_ASCII_CHAR_CLASS|ZERO_OR_MORE)) { 1508*0Sstevel@tonic-gate test_condition = IN_CLASS; 1509*0Sstevel@tonic-gate } else { 1510*0Sstevel@tonic-gate test_condition = NOT_IN_CLASS; 1511*0Sstevel@tonic-gate } 1512*0Sstevel@tonic-gate regexp++; /* point to the <class_length> byte */ 1513*0Sstevel@tonic-gate 1514*0Sstevel@tonic-gate repeat_startp = stringp; 1515*0Sstevel@tonic-gate while ((*stringp != '\0') && 1516*0Sstevel@tonic-gate (test_char_against_old_ascii_class(*stringp, regexp, 1517*0Sstevel@tonic-gate test_condition) == CONDITION_TRUE)) { 1518*0Sstevel@tonic-gate stringp++; 1519*0Sstevel@tonic-gate } 1520*0Sstevel@tonic-gate regexp += (int)*regexp; /* add the class length to regexp */ 1521*0Sstevel@tonic-gate return (test_repeated_ascii_char(repeat_startp, stringp, 1522*0Sstevel@tonic-gate regexp)); 1523*0Sstevel@tonic-gate 1524*0Sstevel@tonic-gate /* end case IN_OLD_ASCII_CHAR_CLASS|ZERO_OR_MORE */ 1525*0Sstevel@tonic-gate 1526*0Sstevel@tonic-gate case IN_OLD_ASCII_CHAR_CLASS|ONE_OR_MORE: /* [.....]+ */ 1527*0Sstevel@tonic-gate case NOT_IN_OLD_ASCII_CHAR_CLASS|ONE_OR_MORE: 1528*0Sstevel@tonic-gate 1529*0Sstevel@tonic-gate /* 1530*0Sstevel@tonic-gate * encoded as <IN_OLD_ASCII_CHAR_CLASS|ONE_OR_MORE>\ 1531*0Sstevel@tonic-gate * <class_length><class ...> 1532*0Sstevel@tonic-gate * or <NOT_IN_OLD_ASCII_CHAR_CLASS|ONE_OR_MORE>\ 1533*0Sstevel@tonic-gate * <class_length><class ...> 1534*0Sstevel@tonic-gate * 1535*0Sstevel@tonic-gate * NOTE: <class length> includes the <class_length> byte 1536*0Sstevel@tonic-gate */ 1537*0Sstevel@tonic-gate 1538*0Sstevel@tonic-gate if ((int)*regexp == 1539*0Sstevel@tonic-gate (int)(IN_OLD_ASCII_CHAR_CLASS|ONE_OR_MORE)) { 1540*0Sstevel@tonic-gate test_condition = IN_CLASS; 1541*0Sstevel@tonic-gate } else { 1542*0Sstevel@tonic-gate test_condition = NOT_IN_CLASS; 1543*0Sstevel@tonic-gate } 1544*0Sstevel@tonic-gate regexp++; /* point to the <class_length> byte */ 1545*0Sstevel@tonic-gate 1546*0Sstevel@tonic-gate if ((*stringp == '\0') || 1547*0Sstevel@tonic-gate (test_char_against_old_ascii_class(*stringp, regexp, 1548*0Sstevel@tonic-gate test_condition) != CONDITION_TRUE)) { 1549*0Sstevel@tonic-gate return ((char *)0); 1550*0Sstevel@tonic-gate } else { 1551*0Sstevel@tonic-gate stringp++; 1552*0Sstevel@tonic-gate repeat_startp = stringp; 1553*0Sstevel@tonic-gate while ((*stringp != '\0') && 1554*0Sstevel@tonic-gate (test_char_against_old_ascii_class(*stringp, regexp, 1555*0Sstevel@tonic-gate test_condition) == CONDITION_TRUE)) { 1556*0Sstevel@tonic-gate stringp++; 1557*0Sstevel@tonic-gate } 1558*0Sstevel@tonic-gate regexp += (int)*regexp; /* add the class length to regexp */ 1559*0Sstevel@tonic-gate return (test_repeated_ascii_char(repeat_startp, stringp, 1560*0Sstevel@tonic-gate regexp)); 1561*0Sstevel@tonic-gate } 1562*0Sstevel@tonic-gate /* end case IN_OLD_ASCII_CHAR_CLASS | ONE_OR_MORE */ 1563*0Sstevel@tonic-gate 1564*0Sstevel@tonic-gate case IN_OLD_ASCII_CHAR_CLASS|COUNT: /* [...]{min_count,max_count} */ 1565*0Sstevel@tonic-gate case NOT_IN_OLD_ASCII_CHAR_CLASS|COUNT: 1566*0Sstevel@tonic-gate 1567*0Sstevel@tonic-gate /* 1568*0Sstevel@tonic-gate * encoded as <IN_OLD_ASCII_CHAR_CLASS|COUNT><class_length>\ 1569*0Sstevel@tonic-gate * <class ...><minimum_match_count>\ 1570*0Sstevel@tonic-gate * <maximum_match_count> 1571*0Sstevel@tonic-gate * or <NOT_IN_OLD_ASCII_CHAR_CLASS|COUNT>\ 1572*0Sstevel@tonic-gate * <class_length><class ...><minimum_match_count>\ 1573*0Sstevel@tonic-gate * <maximum_match_count> 1574*0Sstevel@tonic-gate * 1575*0Sstevel@tonic-gate * NOTE: <class_length> includes the <class_length> byte 1576*0Sstevel@tonic-gate * but not the <minimum_match_count> or 1577*0Sstevel@tonic-gate * <maximum_match_count> bytes 1578*0Sstevel@tonic-gate */ 1579*0Sstevel@tonic-gate 1580*0Sstevel@tonic-gate if ((int)*regexp == (int)(IN_OLD_ASCII_CHAR_CLASS|COUNT)) { 1581*0Sstevel@tonic-gate test_condition = IN_CLASS; 1582*0Sstevel@tonic-gate } else { 1583*0Sstevel@tonic-gate test_condition = NOT_IN_CLASS; 1584*0Sstevel@tonic-gate } 1585*0Sstevel@tonic-gate regexp++; /* point to the <class_length> byte */ 1586*0Sstevel@tonic-gate 1587*0Sstevel@tonic-gate get_match_counts(&nmust_match, &nextra_matches_allowed, 1588*0Sstevel@tonic-gate regexp + (int)*regexp); 1589*0Sstevel@tonic-gate while ((*stringp != '\0') && 1590*0Sstevel@tonic-gate (test_char_against_old_ascii_class(*stringp, regexp, 1591*0Sstevel@tonic-gate test_condition) == CONDITION_TRUE) && 1592*0Sstevel@tonic-gate (nmust_match > 0)) { 1593*0Sstevel@tonic-gate nmust_match--; 1594*0Sstevel@tonic-gate stringp++; 1595*0Sstevel@tonic-gate } 1596*0Sstevel@tonic-gate if (nmust_match > 0) { 1597*0Sstevel@tonic-gate return ((char *)0); 1598*0Sstevel@tonic-gate } else if (nextra_matches_allowed == UNLIMITED) { 1599*0Sstevel@tonic-gate repeat_startp = stringp; 1600*0Sstevel@tonic-gate while ((*stringp != '\0') && 1601*0Sstevel@tonic-gate (test_char_against_old_ascii_class(*stringp, regexp, 1602*0Sstevel@tonic-gate test_condition) == CONDITION_TRUE)) { 1603*0Sstevel@tonic-gate stringp++; 1604*0Sstevel@tonic-gate } 1605*0Sstevel@tonic-gate regexp += (int)*regexp + 2; 1606*0Sstevel@tonic-gate return (test_repeated_ascii_char(repeat_startp, stringp, 1607*0Sstevel@tonic-gate regexp)); 1608*0Sstevel@tonic-gate } else { 1609*0Sstevel@tonic-gate repeat_startp = stringp; 1610*0Sstevel@tonic-gate while ((*stringp != '\0') && 1611*0Sstevel@tonic-gate (test_char_against_old_ascii_class(*stringp, regexp, 1612*0Sstevel@tonic-gate test_condition) == CONDITION_TRUE) && 1613*0Sstevel@tonic-gate (nextra_matches_allowed > 0)) { 1614*0Sstevel@tonic-gate nextra_matches_allowed--; 1615*0Sstevel@tonic-gate stringp++; 1616*0Sstevel@tonic-gate } 1617*0Sstevel@tonic-gate regexp += (int)*regexp + 2; 1618*0Sstevel@tonic-gate return (test_repeated_ascii_char(repeat_startp, stringp, 1619*0Sstevel@tonic-gate regexp)); 1620*0Sstevel@tonic-gate } 1621*0Sstevel@tonic-gate /* end case IN_OLD_ASCII_CHAR_CLASS|COUNT */ 1622*0Sstevel@tonic-gate 1623*0Sstevel@tonic-gate case ZERO_OR_MORE_GROUP: /* (.....)* */ 1624*0Sstevel@tonic-gate case ZERO_OR_MORE_GROUP|ADD_256_TO_GROUP_LENGTH: 1625*0Sstevel@tonic-gate case ZERO_OR_MORE_GROUP|ADD_512_TO_GROUP_LENGTH: 1626*0Sstevel@tonic-gate case ZERO_OR_MORE_GROUP|ADD_768_TO_GROUP_LENGTH: 1627*0Sstevel@tonic-gate 1628*0Sstevel@tonic-gate /* 1629*0Sstevel@tonic-gate * encoded as <ZERO_OR_MORE_GROUP|ADDED_LENGTH_BITS>\ 1630*0Sstevel@tonic-gate * <group_length><compiled_regex...>\ 1631*0Sstevel@tonic-gate * <END_GROUP|ZERO_OR_MORE><groupn> 1632*0Sstevel@tonic-gate * 1633*0Sstevel@tonic-gate * NOTE: 1634*0Sstevel@tonic-gate * 1635*0Sstevel@tonic-gate * group_length + (256 * ADDED_LENGTH_BITS) == 1636*0Sstevel@tonic-gate * length_of(<compiled_regex...><END_GROUP|ZERO_OR_MORE>\ 1637*0Sstevel@tonic-gate * <groupn>) 1638*0Sstevel@tonic-gate * 1639*0Sstevel@tonic-gate */ 1640*0Sstevel@tonic-gate 1641*0Sstevel@tonic-gate group_length = 1642*0Sstevel@tonic-gate (((unsigned int)*regexp & ADDED_LENGTH_BITS) << 1643*0Sstevel@tonic-gate TIMES_256_SHIFT); 1644*0Sstevel@tonic-gate regexp++; 1645*0Sstevel@tonic-gate group_length += (unsigned int)*regexp; 1646*0Sstevel@tonic-gate regexp++; 1647*0Sstevel@tonic-gate repeat_startp = stringp; 1648*0Sstevel@tonic-gate test_stringp = test_string(stringp, regexp); 1649*0Sstevel@tonic-gate while (test_stringp != (char *)0) { 1650*0Sstevel@tonic-gate if (push_stringp(stringp) == (char *)0) 1651*0Sstevel@tonic-gate return ((char *)0); 1652*0Sstevel@tonic-gate stringp = test_stringp; 1653*0Sstevel@tonic-gate test_stringp = test_string(stringp, regexp); 1654*0Sstevel@tonic-gate } 1655*0Sstevel@tonic-gate regexp += group_length; 1656*0Sstevel@tonic-gate return (test_repeated_group(repeat_startp, stringp, regexp)); 1657*0Sstevel@tonic-gate 1658*0Sstevel@tonic-gate /* end case ZERO_OR_MORE_GROUP */ 1659*0Sstevel@tonic-gate 1660*0Sstevel@tonic-gate case END_GROUP|ZERO_OR_MORE: /* (.....)* */ 1661*0Sstevel@tonic-gate 1662*0Sstevel@tonic-gate /* encoded as <END_GROUP|ZERO_OR_MORE> */ 1663*0Sstevel@tonic-gate 1664*0Sstevel@tonic-gate /* return from recursive call to test_string() */ 1665*0Sstevel@tonic-gate 1666*0Sstevel@tonic-gate return ((char *)stringp); 1667*0Sstevel@tonic-gate 1668*0Sstevel@tonic-gate /* end case END_GROUP|ZERO_OR_MORE */ 1669*0Sstevel@tonic-gate 1670*0Sstevel@tonic-gate case ONE_OR_MORE_GROUP: /* (.....)+ */ 1671*0Sstevel@tonic-gate case ONE_OR_MORE_GROUP|ADD_256_TO_GROUP_LENGTH: 1672*0Sstevel@tonic-gate case ONE_OR_MORE_GROUP|ADD_512_TO_GROUP_LENGTH: 1673*0Sstevel@tonic-gate case ONE_OR_MORE_GROUP|ADD_768_TO_GROUP_LENGTH: 1674*0Sstevel@tonic-gate 1675*0Sstevel@tonic-gate /* 1676*0Sstevel@tonic-gate * encoded as <ONE_OR_MORE_GROUP|ADDED_LENGTH_BITS>\ 1677*0Sstevel@tonic-gate * <group_length><compiled_regex...>\ 1678*0Sstevel@tonic-gate * <END_GROUP|ONE_OR_MORE><groupn> 1679*0Sstevel@tonic-gate * 1680*0Sstevel@tonic-gate * NOTE: 1681*0Sstevel@tonic-gate * 1682*0Sstevel@tonic-gate * group_length + (256 * ADDED_LENGTH_BITS) == 1683*0Sstevel@tonic-gate * length_of(<compiled_regex...><END_GROUP|ONE_OR_MORE>\ 1684*0Sstevel@tonic-gate * <groupn>) 1685*0Sstevel@tonic-gate */ 1686*0Sstevel@tonic-gate 1687*0Sstevel@tonic-gate group_length = 1688*0Sstevel@tonic-gate (((unsigned int)*regexp & ADDED_LENGTH_BITS) << 1689*0Sstevel@tonic-gate TIMES_256_SHIFT); 1690*0Sstevel@tonic-gate regexp++; 1691*0Sstevel@tonic-gate group_length += (unsigned int)*regexp; 1692*0Sstevel@tonic-gate regexp++; 1693*0Sstevel@tonic-gate stringp = test_string(stringp, regexp); 1694*0Sstevel@tonic-gate if (stringp == (char *)0) 1695*0Sstevel@tonic-gate return ((char *)0); 1696*0Sstevel@tonic-gate repeat_startp = stringp; 1697*0Sstevel@tonic-gate test_stringp = test_string(stringp, regexp); 1698*0Sstevel@tonic-gate while (test_stringp != (char *)0) { 1699*0Sstevel@tonic-gate if (push_stringp(stringp) == (char *)0) 1700*0Sstevel@tonic-gate return ((char *)0); 1701*0Sstevel@tonic-gate stringp = test_stringp; 1702*0Sstevel@tonic-gate test_stringp = test_string(stringp, regexp); 1703*0Sstevel@tonic-gate } 1704*0Sstevel@tonic-gate regexp += group_length; 1705*0Sstevel@tonic-gate return (test_repeated_group(repeat_startp, stringp, regexp)); 1706*0Sstevel@tonic-gate 1707*0Sstevel@tonic-gate /* end case ONE_OR_MORE_GROUP */ 1708*0Sstevel@tonic-gate 1709*0Sstevel@tonic-gate case END_GROUP|ONE_OR_MORE: /* (.....)+ */ 1710*0Sstevel@tonic-gate 1711*0Sstevel@tonic-gate /* encoded as <END_GROUP|ONE_OR_MORE><groupn> */ 1712*0Sstevel@tonic-gate 1713*0Sstevel@tonic-gate /* return from recursive call to test_string() */ 1714*0Sstevel@tonic-gate 1715*0Sstevel@tonic-gate return ((char *)stringp); 1716*0Sstevel@tonic-gate 1717*0Sstevel@tonic-gate /* end case END_GROUP|ONE_OR_MORE */ 1718*0Sstevel@tonic-gate 1719*0Sstevel@tonic-gate case COUNTED_GROUP: /* (.....){max_count,min_count} */ 1720*0Sstevel@tonic-gate case COUNTED_GROUP|ADD_256_TO_GROUP_LENGTH: 1721*0Sstevel@tonic-gate case COUNTED_GROUP|ADD_512_TO_GROUP_LENGTH: 1722*0Sstevel@tonic-gate case COUNTED_GROUP|ADD_768_TO_GROUP_LENGTH: 1723*0Sstevel@tonic-gate 1724*0Sstevel@tonic-gate /* 1725*0Sstevel@tonic-gate * encoded as <COUNTED_GROUP|ADDED_LENGTH_BITS><group_length>\ 1726*0Sstevel@tonic-gate * <compiled_regex...>\<END_GROUP|COUNT><groupn>\ 1727*0Sstevel@tonic-gate * <minimum_match_count><maximum_match_count> 1728*0Sstevel@tonic-gate * 1729*0Sstevel@tonic-gate * NOTE: 1730*0Sstevel@tonic-gate * 1731*0Sstevel@tonic-gate * group_length + (256 * ADDED_LENGTH_BITS) == 1732*0Sstevel@tonic-gate * length_of(<compiled_regex...><END_GROUP|COUNT><groupn>) 1733*0Sstevel@tonic-gate * 1734*0Sstevel@tonic-gate * but does not include the <minimum_match_count> or 1735*0Sstevel@tonic-gate * <maximum_match_count> bytes 1736*0Sstevel@tonic-gate */ 1737*0Sstevel@tonic-gate 1738*0Sstevel@tonic-gate group_length = 1739*0Sstevel@tonic-gate (((unsigned int)*regexp & ADDED_LENGTH_BITS) << 1740*0Sstevel@tonic-gate TIMES_256_SHIFT); 1741*0Sstevel@tonic-gate regexp++; 1742*0Sstevel@tonic-gate group_length += (unsigned int)*regexp; 1743*0Sstevel@tonic-gate regexp++; 1744*0Sstevel@tonic-gate get_match_counts(&nmust_match, &nextra_matches_allowed, 1745*0Sstevel@tonic-gate regexp + group_length); 1746*0Sstevel@tonic-gate test_stringp = test_string(stringp, regexp); 1747*0Sstevel@tonic-gate while ((test_stringp != (char *)0) && (nmust_match > 0)) { 1748*0Sstevel@tonic-gate stringp = test_stringp; 1749*0Sstevel@tonic-gate nmust_match--; 1750*0Sstevel@tonic-gate test_stringp = test_string(stringp, regexp); 1751*0Sstevel@tonic-gate } 1752*0Sstevel@tonic-gate if (nmust_match > 0) { 1753*0Sstevel@tonic-gate return ((char *)0); 1754*0Sstevel@tonic-gate } else if (nextra_matches_allowed == UNLIMITED) { 1755*0Sstevel@tonic-gate repeat_startp = stringp; 1756*0Sstevel@tonic-gate while (test_stringp != (char *)0) { 1757*0Sstevel@tonic-gate if (push_stringp(stringp) == (char *)0) 1758*0Sstevel@tonic-gate return ((char *)0); 1759*0Sstevel@tonic-gate stringp = test_stringp; 1760*0Sstevel@tonic-gate test_stringp = test_string(stringp, regexp); 1761*0Sstevel@tonic-gate } 1762*0Sstevel@tonic-gate regexp += group_length + 2; 1763*0Sstevel@tonic-gate return (test_repeated_group(repeat_startp, stringp, 1764*0Sstevel@tonic-gate regexp)); 1765*0Sstevel@tonic-gate } else { 1766*0Sstevel@tonic-gate repeat_startp = stringp; 1767*0Sstevel@tonic-gate while ((test_stringp != (char *)0) && 1768*0Sstevel@tonic-gate (nextra_matches_allowed > 0)) { 1769*0Sstevel@tonic-gate nextra_matches_allowed--; 1770*0Sstevel@tonic-gate if (push_stringp(stringp) == (char *)0) 1771*0Sstevel@tonic-gate return ((char *)0); 1772*0Sstevel@tonic-gate stringp = test_stringp; 1773*0Sstevel@tonic-gate test_stringp = test_string(stringp, regexp); 1774*0Sstevel@tonic-gate } 1775*0Sstevel@tonic-gate regexp += group_length + 2; 1776*0Sstevel@tonic-gate return (test_repeated_group(repeat_startp, stringp, 1777*0Sstevel@tonic-gate regexp)); 1778*0Sstevel@tonic-gate } 1779*0Sstevel@tonic-gate /* end case COUNTED_GROUP */ 1780*0Sstevel@tonic-gate 1781*0Sstevel@tonic-gate case END_GROUP|COUNT: /* (.....){max_count,min_count} */ 1782*0Sstevel@tonic-gate 1783*0Sstevel@tonic-gate /* encoded as <END_GROUP|COUNT> */ 1784*0Sstevel@tonic-gate 1785*0Sstevel@tonic-gate /* return from recursive call to test_string() */ 1786*0Sstevel@tonic-gate 1787*0Sstevel@tonic-gate return (stringp); 1788*0Sstevel@tonic-gate 1789*0Sstevel@tonic-gate /* end case END_GROUP|COUNT */ 1790*0Sstevel@tonic-gate 1791*0Sstevel@tonic-gate case END_OF_STRING_MARK: 1792*0Sstevel@tonic-gate 1793*0Sstevel@tonic-gate /* encoded as <END_OF_STRING_MARK><END_REGEX> */ 1794*0Sstevel@tonic-gate 1795*0Sstevel@tonic-gate if (*stringp == '\0') { 1796*0Sstevel@tonic-gate regexp++; 1797*0Sstevel@tonic-gate } else { 1798*0Sstevel@tonic-gate return ((char *)0); 1799*0Sstevel@tonic-gate } 1800*0Sstevel@tonic-gate break; /* end case END_OF_STRING_MARK */ 1801*0Sstevel@tonic-gate 1802*0Sstevel@tonic-gate case END_REGEX: /* end of the compiled regular expression */ 1803*0Sstevel@tonic-gate 1804*0Sstevel@tonic-gate /* encoded as <END_REGEX> */ 1805*0Sstevel@tonic-gate 1806*0Sstevel@tonic-gate return (stringp); 1807*0Sstevel@tonic-gate 1808*0Sstevel@tonic-gate /* end case END_REGEX */ 1809*0Sstevel@tonic-gate 1810*0Sstevel@tonic-gate default: 1811*0Sstevel@tonic-gate 1812*0Sstevel@tonic-gate return ((char *)0); 1813*0Sstevel@tonic-gate 1814*0Sstevel@tonic-gate } /* end switch (*regexp) */ 1815*0Sstevel@tonic-gate 1816*0Sstevel@tonic-gate } /* end for (;;) */ 1817*0Sstevel@tonic-gate 1818*0Sstevel@tonic-gate } /* test_string() */ 1819