10Sstevel@tonic-gate /*
20Sstevel@tonic-gate * CDDL HEADER START
30Sstevel@tonic-gate *
40Sstevel@tonic-gate * The contents of this file are subject to the terms of the
5*6812Sraf * Common Development and Distribution License (the "License").
6*6812Sraf * You may not use this file except in compliance with the License.
70Sstevel@tonic-gate *
80Sstevel@tonic-gate * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
90Sstevel@tonic-gate * or http://www.opensolaris.org/os/licensing.
100Sstevel@tonic-gate * See the License for the specific language governing permissions
110Sstevel@tonic-gate * and limitations under the License.
120Sstevel@tonic-gate *
130Sstevel@tonic-gate * When distributing Covered Code, include this CDDL HEADER in each
140Sstevel@tonic-gate * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
150Sstevel@tonic-gate * If applicable, add the following below this CDDL HEADER, with the
160Sstevel@tonic-gate * fields enclosed by brackets "[]" replaced with your own identifying
170Sstevel@tonic-gate * information: Portions Copyright [yyyy] [name of copyright owner]
180Sstevel@tonic-gate *
190Sstevel@tonic-gate * CDDL HEADER END
200Sstevel@tonic-gate */
211219Sraf
220Sstevel@tonic-gate /*
23*6812Sraf * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
240Sstevel@tonic-gate * Use is subject to license terms.
250Sstevel@tonic-gate */
260Sstevel@tonic-gate
270Sstevel@tonic-gate /* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */
280Sstevel@tonic-gate /* All Rights Reserved */
290Sstevel@tonic-gate
300Sstevel@tonic-gate #pragma ident "%Z%%M% %I% %E% SMI"
310Sstevel@tonic-gate
320Sstevel@tonic-gate /*
330Sstevel@tonic-gate * IMPORTANT NOTE:
340Sstevel@tonic-gate *
350Sstevel@tonic-gate * regex() WORKS **ONLY** WITH THE ASCII AND THE Solaris EUC CHARACTER SETS.
360Sstevel@tonic-gate * IT IS **NOT** CHARACTER SET INDEPENDENT.
370Sstevel@tonic-gate *
380Sstevel@tonic-gate */
390Sstevel@tonic-gate
40*6812Sraf #pragma weak _regex = regex
410Sstevel@tonic-gate
42*6812Sraf #include "lint.h"
430Sstevel@tonic-gate /* CONSTANTS SHARED WITH regcmp() */
440Sstevel@tonic-gate #include "regex.h"
450Sstevel@tonic-gate #include "mtlib.h"
460Sstevel@tonic-gate #include <limits.h>
470Sstevel@tonic-gate #include <stdarg.h>
480Sstevel@tonic-gate #include <stdlib.h>
490Sstevel@tonic-gate #include <thread.h>
500Sstevel@tonic-gate #include <widec.h>
510Sstevel@tonic-gate #include "tsd.h"
520Sstevel@tonic-gate
530Sstevel@tonic-gate
540Sstevel@tonic-gate /* PRIVATE CONSTANTS */
550Sstevel@tonic-gate
560Sstevel@tonic-gate #define ADD_256_TO_GROUP_LENGTH 0x1
570Sstevel@tonic-gate #define ADD_512_TO_GROUP_LENGTH 0x2
580Sstevel@tonic-gate #define ADD_768_TO_GROUP_LENGTH 0x3
590Sstevel@tonic-gate #define ADDED_LENGTH_BITS 0x3
600Sstevel@tonic-gate #define SINGLE_BYTE_MASK 0xff
610Sstevel@tonic-gate #define STRINGP_STACK_SIZE 50
620Sstevel@tonic-gate
630Sstevel@tonic-gate
640Sstevel@tonic-gate /* PRIVATE TYPE DEFINITIONS */
650Sstevel@tonic-gate
660Sstevel@tonic-gate typedef enum {
670Sstevel@tonic-gate NOT_IN_CLASS = 0,
680Sstevel@tonic-gate IN_CLASS
690Sstevel@tonic-gate } char_test_condition_t;
700Sstevel@tonic-gate
710Sstevel@tonic-gate typedef enum {
720Sstevel@tonic-gate TESTING_CHAR = 0,
730Sstevel@tonic-gate CONDITION_TRUE,
740Sstevel@tonic-gate CONDITION_FALSE,
750Sstevel@tonic-gate CHAR_TEST_ERROR
760Sstevel@tonic-gate } char_test_result_t;
770Sstevel@tonic-gate
780Sstevel@tonic-gate
790Sstevel@tonic-gate /* PRIVATE GLOBAL VARIABLES */
800Sstevel@tonic-gate
810Sstevel@tonic-gate static mutex_t regex_lock = DEFAULTMUTEX;
820Sstevel@tonic-gate static int return_arg_number[NSUBSTRINGS];
830Sstevel@tonic-gate static const char *substring_endp[NSUBSTRINGS];
840Sstevel@tonic-gate static const char *substring_startp[NSUBSTRINGS];
850Sstevel@tonic-gate static const char *stringp_stack[STRINGP_STACK_SIZE];
860Sstevel@tonic-gate static const char **stringp_stackp;
870Sstevel@tonic-gate
880Sstevel@tonic-gate
890Sstevel@tonic-gate /* DECLARATIONS OF PRIVATE FUNCTIONS */
900Sstevel@tonic-gate
910Sstevel@tonic-gate static int
920Sstevel@tonic-gate get_wchar(wchar_t *wcharp,
930Sstevel@tonic-gate const char *stringp);
940Sstevel@tonic-gate
950Sstevel@tonic-gate static void
960Sstevel@tonic-gate get_match_counts(int *nmust_matchp,
970Sstevel@tonic-gate int *nextra_matches_allowedp,
980Sstevel@tonic-gate const char *count_stringp);
990Sstevel@tonic-gate
1000Sstevel@tonic-gate static boolean_t
1010Sstevel@tonic-gate in_wchar_range(wchar_t test_char,
1020Sstevel@tonic-gate wchar_t lower_char,
1030Sstevel@tonic-gate wchar_t upper_char);
1040Sstevel@tonic-gate
1050Sstevel@tonic-gate static const char *
1060Sstevel@tonic-gate pop_stringp(void);
1070Sstevel@tonic-gate
1080Sstevel@tonic-gate static const char *
1090Sstevel@tonic-gate previous_charp(const char *current_charp);
1100Sstevel@tonic-gate
1110Sstevel@tonic-gate static const char *
1120Sstevel@tonic-gate push_stringp(const char *stringp);
1130Sstevel@tonic-gate
1140Sstevel@tonic-gate static char_test_result_t
1150Sstevel@tonic-gate test_char_against_ascii_class(char test_char,
1160Sstevel@tonic-gate const char *classp,
1170Sstevel@tonic-gate char_test_condition_t test_condition);
1180Sstevel@tonic-gate
1190Sstevel@tonic-gate static char_test_result_t
1200Sstevel@tonic-gate test_char_against_multibyte_class(wchar_t test_char,
1210Sstevel@tonic-gate const char *classp,
1220Sstevel@tonic-gate char_test_condition_t test_condition);
1230Sstevel@tonic-gate
1240Sstevel@tonic-gate
1250Sstevel@tonic-gate /* FOR COMPATIBILITY WITH PREVIOUS ASCII VERSIONS OF regcmp() */
1260Sstevel@tonic-gate
1270Sstevel@tonic-gate static char_test_result_t
1280Sstevel@tonic-gate test_char_against_old_ascii_class(char test_char,
1290Sstevel@tonic-gate const char *classp,
1300Sstevel@tonic-gate char_test_condition_t test_condition);
1310Sstevel@tonic-gate
1320Sstevel@tonic-gate static const char *
1330Sstevel@tonic-gate test_repeated_ascii_char(const char *repeat_startp,
1340Sstevel@tonic-gate const char *stringp,
1350Sstevel@tonic-gate const char *regexp);
1360Sstevel@tonic-gate
1370Sstevel@tonic-gate static const char *
1380Sstevel@tonic-gate test_repeated_multibyte_char(const char *repeat_startp,
1390Sstevel@tonic-gate const char *stringp,
1400Sstevel@tonic-gate const char *regexp);
1410Sstevel@tonic-gate
1420Sstevel@tonic-gate static const char *
1430Sstevel@tonic-gate test_repeated_group(const char *repeat_startp,
1440Sstevel@tonic-gate const char *stringp,
1450Sstevel@tonic-gate const char *regexp);
1460Sstevel@tonic-gate
1470Sstevel@tonic-gate static const char *
1480Sstevel@tonic-gate test_string(const char *stringp,
1490Sstevel@tonic-gate const char *regexp);
1500Sstevel@tonic-gate
1510Sstevel@tonic-gate
1520Sstevel@tonic-gate /* DEFINITIONS OF PUBLIC VARIABLES */
1530Sstevel@tonic-gate
1540Sstevel@tonic-gate char *__loc1;
1550Sstevel@tonic-gate
1560Sstevel@tonic-gate /*
1570Sstevel@tonic-gate * reserve thread-specific storage for __loc1
1580Sstevel@tonic-gate */
1590Sstevel@tonic-gate char **
____loc1(void)1600Sstevel@tonic-gate ____loc1(void)
1610Sstevel@tonic-gate {
162*6812Sraf if (thr_main())
1630Sstevel@tonic-gate return (&__loc1);
1640Sstevel@tonic-gate return ((char **)tsdalloc(_T_REGEX_LOC1, sizeof (char *), NULL));
1650Sstevel@tonic-gate }
1660Sstevel@tonic-gate
1670Sstevel@tonic-gate #define __loc1 (*(____loc1()))
1680Sstevel@tonic-gate
1690Sstevel@tonic-gate /* DEFINITION OF regex() */
1700Sstevel@tonic-gate
1710Sstevel@tonic-gate extern char *
regex(const char * regexp,const char * stringp,...)172*6812Sraf regex(const char *regexp, const char *stringp, ...)
1730Sstevel@tonic-gate {
1740Sstevel@tonic-gate va_list arg_listp;
1750Sstevel@tonic-gate int char_size;
1760Sstevel@tonic-gate const char *end_of_matchp;
1770Sstevel@tonic-gate wchar_t regex_wchar;
1780Sstevel@tonic-gate char *return_argp[NSUBSTRINGS];
1790Sstevel@tonic-gate char *returned_substringp;
1800Sstevel@tonic-gate int substringn;
1810Sstevel@tonic-gate const char *substringp;
1820Sstevel@tonic-gate wchar_t string_wchar;
1830Sstevel@tonic-gate
1840Sstevel@tonic-gate if (____loc1() == (char **)0) {
1850Sstevel@tonic-gate return ((char *)0);
1860Sstevel@tonic-gate } else {
1870Sstevel@tonic-gate lmutex_lock(®ex_lock);
1880Sstevel@tonic-gate __loc1 = (char *)0;
1890Sstevel@tonic-gate }
1900Sstevel@tonic-gate
1910Sstevel@tonic-gate if ((stringp == (char *)0) || (regexp == (char *)0)) {
1920Sstevel@tonic-gate lmutex_unlock(®ex_lock);
1930Sstevel@tonic-gate return ((char *)0);
1940Sstevel@tonic-gate }
1950Sstevel@tonic-gate
1960Sstevel@tonic-gate
1970Sstevel@tonic-gate /* INITIALIZE SUBSTRINGS THAT MIGHT BE RETURNED IN VARARGS */
1980Sstevel@tonic-gate
1990Sstevel@tonic-gate substringn = 0;
2000Sstevel@tonic-gate va_start(arg_listp, stringp);
2010Sstevel@tonic-gate while (substringn < NSUBSTRINGS) {
2020Sstevel@tonic-gate return_argp[substringn] = va_arg(arg_listp, char *);
2030Sstevel@tonic-gate substring_startp[substringn] = (char *)0;
2040Sstevel@tonic-gate return_arg_number[substringn] = -1;
2050Sstevel@tonic-gate substringn++;
2060Sstevel@tonic-gate }
2070Sstevel@tonic-gate va_end(arg_listp);
2080Sstevel@tonic-gate
2090Sstevel@tonic-gate
2100Sstevel@tonic-gate /* TEST THE STRING AGAINST THE REGULAR EXPRESSION */
2110Sstevel@tonic-gate
2120Sstevel@tonic-gate end_of_matchp = (char *)0;
2130Sstevel@tonic-gate stringp_stackp = &stringp_stack[STRINGP_STACK_SIZE];
2140Sstevel@tonic-gate
2150Sstevel@tonic-gate if ((int)*regexp == (int)START_OF_STRING_MARK) {
2160Sstevel@tonic-gate
2170Sstevel@tonic-gate /*
2180Sstevel@tonic-gate * the match must start at the beginning of the string
2190Sstevel@tonic-gate */
2200Sstevel@tonic-gate
2210Sstevel@tonic-gate __loc1 = (char *)stringp;
2220Sstevel@tonic-gate regexp++;
2230Sstevel@tonic-gate end_of_matchp = test_string(stringp, regexp);
2240Sstevel@tonic-gate
2250Sstevel@tonic-gate } else if ((int)*regexp == (int)ASCII_CHAR) {
2260Sstevel@tonic-gate
2270Sstevel@tonic-gate /*
2280Sstevel@tonic-gate * test a string against a regular expression
2290Sstevel@tonic-gate * that starts with a single ASCII character:
2300Sstevel@tonic-gate *
2310Sstevel@tonic-gate * move to each character in the string that matches
2320Sstevel@tonic-gate * the first character in the regular expression
2330Sstevel@tonic-gate * and test the remaining string
2340Sstevel@tonic-gate */
2350Sstevel@tonic-gate
2360Sstevel@tonic-gate while ((*stringp != *(regexp + 1)) && (*stringp != '\0')) {
2370Sstevel@tonic-gate stringp++;
2380Sstevel@tonic-gate }
2390Sstevel@tonic-gate while ((end_of_matchp == (char *)0) && (*stringp != '\0')) {
2400Sstevel@tonic-gate end_of_matchp = test_string(stringp, regexp);
2410Sstevel@tonic-gate if (end_of_matchp != (char *)0) {
2420Sstevel@tonic-gate __loc1 = (char *)stringp;
2430Sstevel@tonic-gate } else {
2440Sstevel@tonic-gate stringp++;
2450Sstevel@tonic-gate while ((*stringp != *(regexp + 1)) && (*stringp != '\0')) {
2460Sstevel@tonic-gate stringp++;
2470Sstevel@tonic-gate }
2480Sstevel@tonic-gate }
2490Sstevel@tonic-gate }
2500Sstevel@tonic-gate
2510Sstevel@tonic-gate } else if (!multibyte) {
2520Sstevel@tonic-gate
2530Sstevel@tonic-gate /*
2540Sstevel@tonic-gate * if the value of the "multibyte" macro defined in <euc.h>
2550Sstevel@tonic-gate * is false, regex() is running in an ASCII locale;
2560Sstevel@tonic-gate * test an ASCII string against an ASCII regular expression
2570Sstevel@tonic-gate * that doesn't start with a single ASCII character:
2580Sstevel@tonic-gate *
2590Sstevel@tonic-gate * move forward in the string one byte at a time, testing
2600Sstevel@tonic-gate * the remaining string against the regular expression
2610Sstevel@tonic-gate */
2620Sstevel@tonic-gate
2630Sstevel@tonic-gate end_of_matchp = test_string(stringp, regexp);
2640Sstevel@tonic-gate while ((end_of_matchp == (char *)0) && (*stringp != '\0')) {
2650Sstevel@tonic-gate stringp++;
2660Sstevel@tonic-gate end_of_matchp = test_string(stringp, regexp);
2670Sstevel@tonic-gate }
2680Sstevel@tonic-gate if (end_of_matchp != (char *)0) {
2690Sstevel@tonic-gate __loc1 = (char *)stringp;
2700Sstevel@tonic-gate }
2710Sstevel@tonic-gate
2720Sstevel@tonic-gate } else if ((int)*regexp == (int)MULTIBYTE_CHAR) {
2730Sstevel@tonic-gate
2740Sstevel@tonic-gate /*
2750Sstevel@tonic-gate * test a multibyte string against a multibyte regular expression
2760Sstevel@tonic-gate * that starts with a single multibyte character:
2770Sstevel@tonic-gate *
2780Sstevel@tonic-gate * move to each character in the string that matches
2790Sstevel@tonic-gate * the first character in the regular expression
2800Sstevel@tonic-gate * and test the remaining string
2810Sstevel@tonic-gate */
2820Sstevel@tonic-gate
2830Sstevel@tonic-gate (void) get_wchar(®ex_wchar, regexp + 1);
2840Sstevel@tonic-gate char_size = get_wchar(&string_wchar, stringp);
2850Sstevel@tonic-gate while ((string_wchar != regex_wchar) && (char_size > 0)) {
2860Sstevel@tonic-gate stringp += char_size;
2870Sstevel@tonic-gate char_size = get_wchar(&string_wchar, stringp);
2880Sstevel@tonic-gate }
2890Sstevel@tonic-gate while ((end_of_matchp == (char *)0) && (char_size > 0)) {
2900Sstevel@tonic-gate end_of_matchp = test_string(stringp, regexp);
2910Sstevel@tonic-gate if (end_of_matchp != (char *)0) {
2920Sstevel@tonic-gate __loc1 = (char *)stringp;
2930Sstevel@tonic-gate } else {
2940Sstevel@tonic-gate stringp += char_size;
2950Sstevel@tonic-gate char_size = get_wchar(&string_wchar, stringp);
2960Sstevel@tonic-gate while ((string_wchar != regex_wchar) && (char_size > 0)) {
2970Sstevel@tonic-gate stringp += char_size;
2980Sstevel@tonic-gate char_size = get_wchar(&string_wchar, stringp);
2990Sstevel@tonic-gate }
3000Sstevel@tonic-gate }
3010Sstevel@tonic-gate }
3020Sstevel@tonic-gate
3030Sstevel@tonic-gate } else {
3040Sstevel@tonic-gate
3050Sstevel@tonic-gate /*
3060Sstevel@tonic-gate * test a multibyte string against a multibyte regular expression
3070Sstevel@tonic-gate * that doesn't start with a single multibyte character
3080Sstevel@tonic-gate *
3090Sstevel@tonic-gate * move forward in the string one multibyte character at a time,
3100Sstevel@tonic-gate * testing the remaining string against the regular expression
3110Sstevel@tonic-gate */
3120Sstevel@tonic-gate
3130Sstevel@tonic-gate end_of_matchp = test_string(stringp, regexp);
3140Sstevel@tonic-gate char_size = get_wchar(&string_wchar, stringp);
3150Sstevel@tonic-gate while ((end_of_matchp == (char *)0) && (char_size > 0)) {
3160Sstevel@tonic-gate stringp += char_size;
3170Sstevel@tonic-gate end_of_matchp = test_string(stringp, regexp);
3180Sstevel@tonic-gate char_size = get_wchar(&string_wchar, stringp);
3190Sstevel@tonic-gate }
3200Sstevel@tonic-gate if (end_of_matchp != (char *)0) {
3210Sstevel@tonic-gate __loc1 = (char *)stringp;
3220Sstevel@tonic-gate }
3230Sstevel@tonic-gate }
3240Sstevel@tonic-gate
3250Sstevel@tonic-gate /*
3260Sstevel@tonic-gate * Return substrings that matched subexpressions for which
3270Sstevel@tonic-gate * matching substrings are to be returned.
3280Sstevel@tonic-gate *
3290Sstevel@tonic-gate * NOTE:
3300Sstevel@tonic-gate *
3310Sstevel@tonic-gate * According to manual page regcmp(3G), regex() returns substrings
3320Sstevel@tonic-gate * that match subexpressions even when no substring matches the
3330Sstevel@tonic-gate * entire regular expression.
3340Sstevel@tonic-gate */
3350Sstevel@tonic-gate
3360Sstevel@tonic-gate substringn = 0;
3370Sstevel@tonic-gate while (substringn < NSUBSTRINGS) {
3380Sstevel@tonic-gate substringp = substring_startp[substringn];
3390Sstevel@tonic-gate if ((substringp != (char *)0) &&
3400Sstevel@tonic-gate (return_arg_number[substringn] >= 0)) {
3410Sstevel@tonic-gate returned_substringp =
3420Sstevel@tonic-gate return_argp[return_arg_number[substringn]];
3430Sstevel@tonic-gate if (returned_substringp != (char *)0) {
3440Sstevel@tonic-gate while (substringp < substring_endp[substringn]) {
3450Sstevel@tonic-gate *returned_substringp = (char)*substringp;
3460Sstevel@tonic-gate returned_substringp++;
3470Sstevel@tonic-gate substringp++;
3480Sstevel@tonic-gate }
3490Sstevel@tonic-gate *returned_substringp = '\0';
3500Sstevel@tonic-gate }
3510Sstevel@tonic-gate }
3520Sstevel@tonic-gate substringn++;
3530Sstevel@tonic-gate }
3540Sstevel@tonic-gate lmutex_unlock(®ex_lock);
3550Sstevel@tonic-gate return ((char *)end_of_matchp);
3560Sstevel@tonic-gate } /* regex() */
3570Sstevel@tonic-gate
3580Sstevel@tonic-gate
3590Sstevel@tonic-gate /* DEFINITIONS OF PRIVATE FUNCTIONS */
3600Sstevel@tonic-gate
3610Sstevel@tonic-gate static int
get_wchar(wchar_t * wcharp,const char * stringp)3620Sstevel@tonic-gate get_wchar(wchar_t *wcharp,
3630Sstevel@tonic-gate const char *stringp)
3640Sstevel@tonic-gate {
3650Sstevel@tonic-gate int char_size;
3660Sstevel@tonic-gate
3670Sstevel@tonic-gate if (stringp == (char *)0) {
3680Sstevel@tonic-gate char_size = 0;
3690Sstevel@tonic-gate *wcharp = (wchar_t)((unsigned int)'\0');
3700Sstevel@tonic-gate } else if (*stringp == '\0') {
3710Sstevel@tonic-gate char_size = 0;
3720Sstevel@tonic-gate *wcharp = (wchar_t)((unsigned int)*stringp);
3730Sstevel@tonic-gate } else if ((unsigned char)*stringp <= (unsigned char)0x7f) {
3740Sstevel@tonic-gate char_size = 1;
3750Sstevel@tonic-gate *wcharp = (wchar_t)((unsigned int)*stringp);
3760Sstevel@tonic-gate } else {
3770Sstevel@tonic-gate char_size = mbtowc(wcharp, stringp, MB_LEN_MAX);
3780Sstevel@tonic-gate }
3790Sstevel@tonic-gate return (char_size);
3800Sstevel@tonic-gate }
3810Sstevel@tonic-gate
3820Sstevel@tonic-gate static void
get_match_counts(int * nmust_matchp,int * nextra_matches_allowedp,const char * count_stringp)3830Sstevel@tonic-gate get_match_counts(int *nmust_matchp,
3840Sstevel@tonic-gate int *nextra_matches_allowedp,
3850Sstevel@tonic-gate const char *count_stringp)
3860Sstevel@tonic-gate {
3870Sstevel@tonic-gate int minimum_match_count;
3880Sstevel@tonic-gate int maximum_match_count;
3890Sstevel@tonic-gate
3900Sstevel@tonic-gate minimum_match_count =
3910Sstevel@tonic-gate (int)((unsigned int)*count_stringp & SINGLE_BYTE_MASK);
3920Sstevel@tonic-gate *nmust_matchp = minimum_match_count;
3930Sstevel@tonic-gate
3940Sstevel@tonic-gate count_stringp++;
3950Sstevel@tonic-gate maximum_match_count =
3960Sstevel@tonic-gate (int)((unsigned int)*count_stringp & SINGLE_BYTE_MASK);
3970Sstevel@tonic-gate if (maximum_match_count == (int)UNLIMITED) {
3980Sstevel@tonic-gate *nextra_matches_allowedp = (int)UNLIMITED;
3990Sstevel@tonic-gate } else {
4000Sstevel@tonic-gate *nextra_matches_allowedp =
4010Sstevel@tonic-gate maximum_match_count - minimum_match_count;
4020Sstevel@tonic-gate }
4030Sstevel@tonic-gate return;
4040Sstevel@tonic-gate
4050Sstevel@tonic-gate } /* get_match_counts() */
4060Sstevel@tonic-gate
4070Sstevel@tonic-gate static boolean_t
in_wchar_range(wchar_t test_char,wchar_t lower_char,wchar_t upper_char)4080Sstevel@tonic-gate in_wchar_range(wchar_t test_char,
4090Sstevel@tonic-gate wchar_t lower_char,
4100Sstevel@tonic-gate wchar_t upper_char)
4110Sstevel@tonic-gate {
4120Sstevel@tonic-gate return (((lower_char <= 0x7f) && (upper_char <= 0x7f) &&
4130Sstevel@tonic-gate (lower_char <= test_char) && (test_char <= upper_char)) ||
4140Sstevel@tonic-gate (((test_char & WCHAR_CSMASK) == (lower_char & WCHAR_CSMASK)) &&
4150Sstevel@tonic-gate ((test_char & WCHAR_CSMASK) == (upper_char & WCHAR_CSMASK)) &&
4160Sstevel@tonic-gate (lower_char <= test_char) && (test_char <= upper_char)));
4170Sstevel@tonic-gate
4180Sstevel@tonic-gate } /* in_wchar_range() */
4190Sstevel@tonic-gate
4200Sstevel@tonic-gate static const char *
pop_stringp(void)4210Sstevel@tonic-gate pop_stringp(void)
4220Sstevel@tonic-gate {
4230Sstevel@tonic-gate const char *stringp;
4240Sstevel@tonic-gate
4250Sstevel@tonic-gate if (stringp_stackp >= &stringp_stack[STRINGP_STACK_SIZE]) {
4260Sstevel@tonic-gate return ((char *)0);
4270Sstevel@tonic-gate } else {
4280Sstevel@tonic-gate stringp = *stringp_stackp;
4290Sstevel@tonic-gate stringp_stackp++;
4300Sstevel@tonic-gate return (stringp);
4310Sstevel@tonic-gate }
4320Sstevel@tonic-gate }
4330Sstevel@tonic-gate
4340Sstevel@tonic-gate
4350Sstevel@tonic-gate static const char *
previous_charp(const char * current_charp)4360Sstevel@tonic-gate previous_charp(const char *current_charp)
4370Sstevel@tonic-gate {
4380Sstevel@tonic-gate /*
4390Sstevel@tonic-gate * returns the pointer to the previous character in
4400Sstevel@tonic-gate * a string of multibyte characters
4410Sstevel@tonic-gate */
4420Sstevel@tonic-gate
4430Sstevel@tonic-gate const char *prev_cs0 = current_charp - 1;
4440Sstevel@tonic-gate const char *prev_cs1 = current_charp - eucw1;
4450Sstevel@tonic-gate const char *prev_cs2 = current_charp - eucw2 - 1;
4460Sstevel@tonic-gate const char *prev_cs3 = current_charp - eucw3 - 1;
4470Sstevel@tonic-gate const char *prev_charp;
4480Sstevel@tonic-gate
4490Sstevel@tonic-gate if ((unsigned char)*prev_cs0 <= 0x7f) {
4500Sstevel@tonic-gate prev_charp = prev_cs0;
4510Sstevel@tonic-gate } else if ((unsigned char)*prev_cs2 == SS2) {
4520Sstevel@tonic-gate prev_charp = prev_cs2;
4530Sstevel@tonic-gate } else if ((unsigned char)*prev_cs3 == SS3) {
4540Sstevel@tonic-gate prev_charp = prev_cs3;
4550Sstevel@tonic-gate } else {
4560Sstevel@tonic-gate prev_charp = prev_cs1;
4570Sstevel@tonic-gate }
4580Sstevel@tonic-gate return (prev_charp);
4590Sstevel@tonic-gate
4600Sstevel@tonic-gate } /* previous_charp() */
4610Sstevel@tonic-gate
4620Sstevel@tonic-gate static const char *
push_stringp(const char * stringp)4630Sstevel@tonic-gate push_stringp(const char *stringp)
4640Sstevel@tonic-gate {
4650Sstevel@tonic-gate if (stringp_stackp <= &stringp_stack[0]) {
4660Sstevel@tonic-gate return ((char *)0);
4670Sstevel@tonic-gate } else {
4680Sstevel@tonic-gate stringp_stackp--;
4690Sstevel@tonic-gate *stringp_stackp = stringp;
4700Sstevel@tonic-gate return (stringp);
4710Sstevel@tonic-gate }
4720Sstevel@tonic-gate }
4730Sstevel@tonic-gate
4740Sstevel@tonic-gate
4750Sstevel@tonic-gate static char_test_result_t
test_char_against_ascii_class(char test_char,const char * classp,char_test_condition_t test_condition)4760Sstevel@tonic-gate test_char_against_ascii_class(char test_char,
4770Sstevel@tonic-gate const char *classp,
4780Sstevel@tonic-gate char_test_condition_t test_condition)
4790Sstevel@tonic-gate {
4800Sstevel@tonic-gate /*
4810Sstevel@tonic-gate * tests a character for membership in an ASCII character class compiled
4820Sstevel@tonic-gate * by the internationalized version of regcmp();
4830Sstevel@tonic-gate *
4840Sstevel@tonic-gate * NOTE: The internationalized version of regcmp() compiles
4850Sstevel@tonic-gate * the range a-z in an ASCII character class to aTHRUz.
4860Sstevel@tonic-gate */
4870Sstevel@tonic-gate
4880Sstevel@tonic-gate int nbytes_to_check;
4890Sstevel@tonic-gate
4900Sstevel@tonic-gate nbytes_to_check = (int)*classp;
4910Sstevel@tonic-gate classp++;
4920Sstevel@tonic-gate nbytes_to_check--;
4930Sstevel@tonic-gate
4940Sstevel@tonic-gate while (nbytes_to_check > 0) {
4950Sstevel@tonic-gate if (test_char == *classp) {
4960Sstevel@tonic-gate if (test_condition == IN_CLASS)
4970Sstevel@tonic-gate return (CONDITION_TRUE);
4980Sstevel@tonic-gate else
4990Sstevel@tonic-gate return (CONDITION_FALSE);
5000Sstevel@tonic-gate } else if (*classp == THRU) {
5010Sstevel@tonic-gate if ((*(classp - 1) <= test_char) &&
5020Sstevel@tonic-gate (test_char <= *(classp + 1))) {
5030Sstevel@tonic-gate if (test_condition == IN_CLASS)
5040Sstevel@tonic-gate return (CONDITION_TRUE);
5050Sstevel@tonic-gate else
5060Sstevel@tonic-gate return (CONDITION_FALSE);
5070Sstevel@tonic-gate } else {
5080Sstevel@tonic-gate classp += 2;
5090Sstevel@tonic-gate nbytes_to_check -= 2;
5100Sstevel@tonic-gate }
5110Sstevel@tonic-gate } else {
5120Sstevel@tonic-gate classp++;
5130Sstevel@tonic-gate nbytes_to_check--;
5140Sstevel@tonic-gate }
5150Sstevel@tonic-gate }
5160Sstevel@tonic-gate if (test_condition == NOT_IN_CLASS) {
5170Sstevel@tonic-gate return (CONDITION_TRUE);
5180Sstevel@tonic-gate } else {
5190Sstevel@tonic-gate return (CONDITION_FALSE);
5200Sstevel@tonic-gate }
5210Sstevel@tonic-gate } /* test_char_against_ascii_class() */
5220Sstevel@tonic-gate
5230Sstevel@tonic-gate static char_test_result_t
test_char_against_multibyte_class(wchar_t test_char,const char * classp,char_test_condition_t test_condition)5240Sstevel@tonic-gate test_char_against_multibyte_class(wchar_t test_char,
5250Sstevel@tonic-gate const char *classp,
5260Sstevel@tonic-gate char_test_condition_t test_condition)
5270Sstevel@tonic-gate {
5280Sstevel@tonic-gate /*
5290Sstevel@tonic-gate * tests a character for membership in a multibyte character class;
5300Sstevel@tonic-gate *
5310Sstevel@tonic-gate * NOTE: The range a-z in a multibyte character class compiles to
5320Sstevel@tonic-gate * aTHRUz.
5330Sstevel@tonic-gate */
5340Sstevel@tonic-gate
5350Sstevel@tonic-gate int char_size;
5360Sstevel@tonic-gate wchar_t current_char;
5370Sstevel@tonic-gate int nbytes_to_check;
5380Sstevel@tonic-gate wchar_t previous_char;
5390Sstevel@tonic-gate
5400Sstevel@tonic-gate nbytes_to_check = (int)*classp;
5410Sstevel@tonic-gate classp++;
5420Sstevel@tonic-gate nbytes_to_check--;
5430Sstevel@tonic-gate
5440Sstevel@tonic-gate char_size = get_wchar(¤t_char, classp);
5450Sstevel@tonic-gate if (char_size <= 0) {
5460Sstevel@tonic-gate return (CHAR_TEST_ERROR);
5470Sstevel@tonic-gate } else if (test_char == current_char) {
5480Sstevel@tonic-gate if (test_condition == IN_CLASS) {
5490Sstevel@tonic-gate return (CONDITION_TRUE);
5500Sstevel@tonic-gate } else {
5510Sstevel@tonic-gate return (CONDITION_FALSE);
5520Sstevel@tonic-gate }
5530Sstevel@tonic-gate } else {
5540Sstevel@tonic-gate classp += char_size;
5550Sstevel@tonic-gate nbytes_to_check -= char_size;
5560Sstevel@tonic-gate }
5570Sstevel@tonic-gate
5580Sstevel@tonic-gate while (nbytes_to_check > 0) {
5590Sstevel@tonic-gate previous_char = current_char;
5600Sstevel@tonic-gate char_size = get_wchar(¤t_char, classp);
5610Sstevel@tonic-gate if (char_size <= 0) {
5620Sstevel@tonic-gate return (CHAR_TEST_ERROR);
5630Sstevel@tonic-gate } else if (test_char == current_char) {
5640Sstevel@tonic-gate if (test_condition == IN_CLASS) {
5650Sstevel@tonic-gate return (CONDITION_TRUE);
5660Sstevel@tonic-gate } else {
5670Sstevel@tonic-gate return (CONDITION_FALSE);
5680Sstevel@tonic-gate }
5690Sstevel@tonic-gate } else if (current_char == THRU) {
5700Sstevel@tonic-gate classp += char_size;
5710Sstevel@tonic-gate nbytes_to_check -= char_size;
5720Sstevel@tonic-gate char_size = get_wchar(¤t_char, classp);
5730Sstevel@tonic-gate if (char_size <= 0) {
5740Sstevel@tonic-gate return (CHAR_TEST_ERROR);
5750Sstevel@tonic-gate } else if (in_wchar_range(test_char, previous_char,
5760Sstevel@tonic-gate current_char)) {
5770Sstevel@tonic-gate if (test_condition == IN_CLASS) {
5780Sstevel@tonic-gate return (CONDITION_TRUE);
5790Sstevel@tonic-gate } else {
5800Sstevel@tonic-gate return (CONDITION_FALSE);
5810Sstevel@tonic-gate }
5820Sstevel@tonic-gate } else {
5830Sstevel@tonic-gate classp += char_size;
5840Sstevel@tonic-gate nbytes_to_check -= char_size;
5850Sstevel@tonic-gate }
5860Sstevel@tonic-gate } else {
5870Sstevel@tonic-gate classp += char_size;
5880Sstevel@tonic-gate nbytes_to_check -= char_size;
5890Sstevel@tonic-gate }
5900Sstevel@tonic-gate }
5910Sstevel@tonic-gate if (test_condition == NOT_IN_CLASS) {
5920Sstevel@tonic-gate return (CONDITION_TRUE);
5930Sstevel@tonic-gate } else {
5940Sstevel@tonic-gate return (CONDITION_FALSE);
5950Sstevel@tonic-gate }
5960Sstevel@tonic-gate } /* test_char_against_multibyte_class() */
5970Sstevel@tonic-gate
5980Sstevel@tonic-gate
5990Sstevel@tonic-gate /* FOR COMPATIBILITY WITH PREVIOUS ASCII VERSIONS OF regcmp() */
6000Sstevel@tonic-gate
6010Sstevel@tonic-gate static char_test_result_t
test_char_against_old_ascii_class(char test_char,const char * classp,char_test_condition_t test_condition)6020Sstevel@tonic-gate test_char_against_old_ascii_class(char test_char,
6030Sstevel@tonic-gate const char *classp,
6040Sstevel@tonic-gate char_test_condition_t test_condition)
6050Sstevel@tonic-gate {
6060Sstevel@tonic-gate /*
6070Sstevel@tonic-gate * tests a character for membership in an ASCII character class compiled
6080Sstevel@tonic-gate * by the ASCII version of regcmp();
6090Sstevel@tonic-gate *
6100Sstevel@tonic-gate * NOTE: ASCII versions of regcmp() compile the range a-z in an
6110Sstevel@tonic-gate * ASCII character class to THRUaz. The internationalized
6120Sstevel@tonic-gate * version compiles the same range to aTHRUz.
6130Sstevel@tonic-gate */
6140Sstevel@tonic-gate
6150Sstevel@tonic-gate int nbytes_to_check;
6160Sstevel@tonic-gate
6170Sstevel@tonic-gate nbytes_to_check = (int)*classp;
6180Sstevel@tonic-gate classp++;
6190Sstevel@tonic-gate nbytes_to_check--;
6200Sstevel@tonic-gate
6210Sstevel@tonic-gate while (nbytes_to_check > 0) {
6220Sstevel@tonic-gate if (test_char == *classp) {
6230Sstevel@tonic-gate if (test_condition == IN_CLASS) {
6240Sstevel@tonic-gate return (CONDITION_TRUE);
6250Sstevel@tonic-gate } else {
6260Sstevel@tonic-gate return (CONDITION_FALSE);
6270Sstevel@tonic-gate }
6280Sstevel@tonic-gate } else if (*classp == THRU) {
6290Sstevel@tonic-gate if ((*(classp + 1) <= test_char) &&
6300Sstevel@tonic-gate (test_char <= *(classp + 2))) {
6310Sstevel@tonic-gate if (test_condition == IN_CLASS) {
6320Sstevel@tonic-gate return (CONDITION_TRUE);
6330Sstevel@tonic-gate } else {
6340Sstevel@tonic-gate return (CONDITION_FALSE);
6350Sstevel@tonic-gate }
6360Sstevel@tonic-gate } else {
6370Sstevel@tonic-gate classp += 3;
6380Sstevel@tonic-gate nbytes_to_check -= 3;
6390Sstevel@tonic-gate }
6400Sstevel@tonic-gate } else {
6410Sstevel@tonic-gate classp++;
6420Sstevel@tonic-gate nbytes_to_check--;
6430Sstevel@tonic-gate }
6440Sstevel@tonic-gate }
6450Sstevel@tonic-gate if (test_condition == NOT_IN_CLASS) {
6460Sstevel@tonic-gate return (CONDITION_TRUE);
6470Sstevel@tonic-gate } else {
6480Sstevel@tonic-gate return (CONDITION_FALSE);
6490Sstevel@tonic-gate }
6500Sstevel@tonic-gate } /* test_char_against_old_ascii_class() */
6510Sstevel@tonic-gate
6520Sstevel@tonic-gate static const char *
test_repeated_ascii_char(const char * repeat_startp,const char * stringp,const char * regexp)6530Sstevel@tonic-gate test_repeated_ascii_char(const char *repeat_startp,
6540Sstevel@tonic-gate const char *stringp,
6550Sstevel@tonic-gate const char *regexp)
6560Sstevel@tonic-gate {
6570Sstevel@tonic-gate const char *end_of_matchp;
6580Sstevel@tonic-gate
6590Sstevel@tonic-gate end_of_matchp = test_string(stringp, regexp);
6600Sstevel@tonic-gate while ((end_of_matchp == (char *)0) &&
6610Sstevel@tonic-gate (stringp > repeat_startp)) {
6620Sstevel@tonic-gate stringp--;
6630Sstevel@tonic-gate end_of_matchp = test_string(stringp, regexp);
6640Sstevel@tonic-gate }
6650Sstevel@tonic-gate return (end_of_matchp);
6660Sstevel@tonic-gate }
6670Sstevel@tonic-gate
6680Sstevel@tonic-gate static const char *
test_repeated_multibyte_char(const char * repeat_startp,const char * stringp,const char * regexp)6690Sstevel@tonic-gate test_repeated_multibyte_char(const char *repeat_startp,
6700Sstevel@tonic-gate const char *stringp,
6710Sstevel@tonic-gate const char *regexp)
6720Sstevel@tonic-gate {
6730Sstevel@tonic-gate const char *end_of_matchp;
6740Sstevel@tonic-gate
6750Sstevel@tonic-gate end_of_matchp = test_string(stringp, regexp);
6760Sstevel@tonic-gate while ((end_of_matchp == (char *)0) &&
6770Sstevel@tonic-gate (stringp > repeat_startp)) {
6780Sstevel@tonic-gate stringp = previous_charp(stringp);
6790Sstevel@tonic-gate end_of_matchp = test_string(stringp, regexp);
6800Sstevel@tonic-gate }
6810Sstevel@tonic-gate return (end_of_matchp);
6820Sstevel@tonic-gate }
6830Sstevel@tonic-gate
6840Sstevel@tonic-gate static const char *
test_repeated_group(const char * repeat_startp,const char * stringp,const char * regexp)6850Sstevel@tonic-gate test_repeated_group(const char *repeat_startp,
6860Sstevel@tonic-gate const char *stringp,
6870Sstevel@tonic-gate const char *regexp)
6880Sstevel@tonic-gate {
6890Sstevel@tonic-gate const char *end_of_matchp;
6900Sstevel@tonic-gate
6910Sstevel@tonic-gate end_of_matchp = test_string(stringp, regexp);
6920Sstevel@tonic-gate while ((end_of_matchp == (char *)0) &&
6930Sstevel@tonic-gate (stringp > repeat_startp)) {
6940Sstevel@tonic-gate stringp = pop_stringp();
6950Sstevel@tonic-gate if (stringp == (char *)0) {
6960Sstevel@tonic-gate return ((char *)0);
6970Sstevel@tonic-gate }
6980Sstevel@tonic-gate end_of_matchp = test_string(stringp, regexp);
6990Sstevel@tonic-gate }
7000Sstevel@tonic-gate return (end_of_matchp);
7010Sstevel@tonic-gate }
7020Sstevel@tonic-gate
7030Sstevel@tonic-gate static const char *
test_string(const char * stringp,const char * regexp)7040Sstevel@tonic-gate test_string(const char *stringp,
7050Sstevel@tonic-gate const char *regexp)
7060Sstevel@tonic-gate {
7070Sstevel@tonic-gate /*
7080Sstevel@tonic-gate * returns a pointer to the first character following the first
7090Sstevel@tonic-gate * substring of the string addressed by stringp that matches
7100Sstevel@tonic-gate * the compiled regular expression addressed by regexp
7110Sstevel@tonic-gate */
7120Sstevel@tonic-gate
7130Sstevel@tonic-gate unsigned int group_length;
7140Sstevel@tonic-gate int nextra_matches_allowed;
7150Sstevel@tonic-gate int nmust_match;
7160Sstevel@tonic-gate wchar_t regex_wchar;
7170Sstevel@tonic-gate int regex_char_size;
7180Sstevel@tonic-gate const char *repeat_startp;
7190Sstevel@tonic-gate unsigned int return_argn;
7200Sstevel@tonic-gate wchar_t string_wchar;
7210Sstevel@tonic-gate int string_char_size;
7220Sstevel@tonic-gate unsigned int substringn;
7230Sstevel@tonic-gate char_test_condition_t test_condition;
7240Sstevel@tonic-gate const char *test_stringp;
7250Sstevel@tonic-gate
7260Sstevel@tonic-gate for (;;) {
7270Sstevel@tonic-gate
7280Sstevel@tonic-gate /*
7290Sstevel@tonic-gate * Exit the loop via a return whenever there's a match
7300Sstevel@tonic-gate * or it's clear that there can be no match.
7310Sstevel@tonic-gate */
7320Sstevel@tonic-gate
7330Sstevel@tonic-gate switch ((int)*regexp) {
7340Sstevel@tonic-gate
7350Sstevel@tonic-gate /*
7360Sstevel@tonic-gate * No fall-through.
7370Sstevel@tonic-gate * Each case ends with either a return or with stringp
7380Sstevel@tonic-gate * addressing the next character to be tested and regexp
7390Sstevel@tonic-gate * addressing the next compiled regular expression
7400Sstevel@tonic-gate *
7410Sstevel@tonic-gate * NOTE: The comments for each case give the meaning
7420Sstevel@tonic-gate * of the compiled regular expression decoded by the case
7430Sstevel@tonic-gate * and the character string that the compiled regular
7440Sstevel@tonic-gate * expression uses to encode the case. Each single
7450Sstevel@tonic-gate * character encoded in the compiled regular expression
7460Sstevel@tonic-gate * is shown enclosed in angle brackets (<>). Each
7470Sstevel@tonic-gate * compiled regular expression begins with a marker
7480Sstevel@tonic-gate * character which is shown as a named constant
7490Sstevel@tonic-gate * (e.g. <ASCII_CHAR>). Character constants are shown
7500Sstevel@tonic-gate * enclosed in single quotes (e.g. <'$'>). All other
7510Sstevel@tonic-gate * single characters encoded in the compiled regular
7520Sstevel@tonic-gate * expression are shown as lower case variable names
7530Sstevel@tonic-gate * (e.g. <ascii_char> or <multibyte_char>). Multicharacter
7540Sstevel@tonic-gate * strings encoded in the compiled regular expression
7550Sstevel@tonic-gate * are shown as variable names followed by elipses
7560Sstevel@tonic-gate * (e.g. <compiled_regex...>).
7570Sstevel@tonic-gate */
7580Sstevel@tonic-gate
7590Sstevel@tonic-gate case ASCII_CHAR: /* single ASCII char */
7600Sstevel@tonic-gate
7610Sstevel@tonic-gate /* encoded as <ASCII_CHAR><ascii_char> */
7620Sstevel@tonic-gate
7630Sstevel@tonic-gate regexp++;
7640Sstevel@tonic-gate if (*regexp == *stringp) {
7650Sstevel@tonic-gate regexp++;
7660Sstevel@tonic-gate stringp++;
7670Sstevel@tonic-gate } else {
7680Sstevel@tonic-gate return ((char *)0);
7690Sstevel@tonic-gate }
7700Sstevel@tonic-gate break; /* end case ASCII_CHAR */
7710Sstevel@tonic-gate
7720Sstevel@tonic-gate case MULTIBYTE_CHAR: /* single multibyte char */
7730Sstevel@tonic-gate
7740Sstevel@tonic-gate /* encoded as <MULTIBYTE_CHAR><multibyte_char> */
7750Sstevel@tonic-gate
7760Sstevel@tonic-gate regexp++;
7770Sstevel@tonic-gate regex_char_size = get_wchar(®ex_wchar, regexp);
7780Sstevel@tonic-gate string_char_size = get_wchar(&string_wchar, stringp);
7790Sstevel@tonic-gate if ((string_char_size <= 0) || (string_wchar != regex_wchar)) {
7800Sstevel@tonic-gate return ((char *)0);
7810Sstevel@tonic-gate } else {
7820Sstevel@tonic-gate regexp += regex_char_size;
7830Sstevel@tonic-gate stringp += string_char_size;
7840Sstevel@tonic-gate }
7850Sstevel@tonic-gate break; /* end case MULTIBYTE_CHAR */
7860Sstevel@tonic-gate
7870Sstevel@tonic-gate case ANY_CHAR: /* any single ASCII or multibyte char */
7880Sstevel@tonic-gate
7890Sstevel@tonic-gate /* encoded as <ANY_CHAR> */
7900Sstevel@tonic-gate
7910Sstevel@tonic-gate if (!multibyte) {
7920Sstevel@tonic-gate if (*stringp == '\0') {
7930Sstevel@tonic-gate return ((char *)0);
7940Sstevel@tonic-gate } else {
7950Sstevel@tonic-gate regexp++;
7960Sstevel@tonic-gate stringp++;
7970Sstevel@tonic-gate }
7980Sstevel@tonic-gate } else {
7990Sstevel@tonic-gate string_char_size = get_wchar(&string_wchar, stringp);
8000Sstevel@tonic-gate if (string_char_size <= 0) {
8010Sstevel@tonic-gate return ((char *)0);
8020Sstevel@tonic-gate } else {
8030Sstevel@tonic-gate regexp++;
8040Sstevel@tonic-gate stringp += string_char_size;
8050Sstevel@tonic-gate }
8060Sstevel@tonic-gate }
8070Sstevel@tonic-gate break; /* end case ANY_CHAR */
8080Sstevel@tonic-gate
8090Sstevel@tonic-gate case IN_ASCII_CHAR_CLASS: /* [.....] */
8100Sstevel@tonic-gate case NOT_IN_ASCII_CHAR_CLASS:
8110Sstevel@tonic-gate
8120Sstevel@tonic-gate /*
8130Sstevel@tonic-gate * encoded as <IN_ASCII_CHAR_CLASS><class_length><class...>
8140Sstevel@tonic-gate * or <NOT_IN_ASCII_CHAR_CLASS><class_length><class...>
8150Sstevel@tonic-gate *
8160Sstevel@tonic-gate * NOTE: <class_length> includes the <class_length> byte
8170Sstevel@tonic-gate */
8180Sstevel@tonic-gate
8190Sstevel@tonic-gate if ((int)*regexp == (int)IN_ASCII_CHAR_CLASS) {
8200Sstevel@tonic-gate test_condition = IN_CLASS;
8210Sstevel@tonic-gate } else {
8220Sstevel@tonic-gate test_condition = NOT_IN_CLASS;
8230Sstevel@tonic-gate }
8240Sstevel@tonic-gate regexp++; /* point to the <class_length> byte */
8250Sstevel@tonic-gate
8260Sstevel@tonic-gate if ((*stringp != '\0') &&
8270Sstevel@tonic-gate (test_char_against_ascii_class(*stringp, regexp,
8280Sstevel@tonic-gate test_condition) == CONDITION_TRUE)) {
8290Sstevel@tonic-gate regexp += (int)*regexp; /* add the class length to regexp */
8300Sstevel@tonic-gate stringp++;
8310Sstevel@tonic-gate } else {
8320Sstevel@tonic-gate return ((char *)0);
8330Sstevel@tonic-gate }
8340Sstevel@tonic-gate break; /* end case IN_ASCII_CHAR_CLASS */
8350Sstevel@tonic-gate
8360Sstevel@tonic-gate case IN_MULTIBYTE_CHAR_CLASS: /* [....] */
8370Sstevel@tonic-gate case NOT_IN_MULTIBYTE_CHAR_CLASS:
8380Sstevel@tonic-gate
8390Sstevel@tonic-gate /*
8400Sstevel@tonic-gate * encoded as <IN_MULTIBYTE_CHAR_CLASS><class_length><class...>
8410Sstevel@tonic-gate * or <NOT_IN_MULTIBYTE_CHAR_CLASS><class_length><class...>
8420Sstevel@tonic-gate *
8430Sstevel@tonic-gate * NOTE: <class_length> includes the <class_length> byte
8440Sstevel@tonic-gate */
8450Sstevel@tonic-gate
8460Sstevel@tonic-gate if ((int)*regexp == (int)IN_MULTIBYTE_CHAR_CLASS) {
8470Sstevel@tonic-gate test_condition = IN_CLASS;
8480Sstevel@tonic-gate } else {
8490Sstevel@tonic-gate test_condition = NOT_IN_CLASS;
8500Sstevel@tonic-gate }
8510Sstevel@tonic-gate regexp++; /* point to the <class_length> byte */
8520Sstevel@tonic-gate
8530Sstevel@tonic-gate string_char_size = get_wchar(&string_wchar, stringp);
8540Sstevel@tonic-gate if ((string_char_size > 0) &&
8550Sstevel@tonic-gate (test_char_against_multibyte_class(string_wchar, regexp,
8560Sstevel@tonic-gate test_condition) == CONDITION_TRUE)) {
8570Sstevel@tonic-gate regexp += (int)*regexp; /* add the class length to regexp */
8580Sstevel@tonic-gate stringp += string_char_size;
8590Sstevel@tonic-gate } else {
8600Sstevel@tonic-gate return ((char *)0);
8610Sstevel@tonic-gate }
8620Sstevel@tonic-gate break; /* end case IN_MULTIBYTE_CHAR_CLASS */
8630Sstevel@tonic-gate
8640Sstevel@tonic-gate case IN_OLD_ASCII_CHAR_CLASS: /* [...] */
8650Sstevel@tonic-gate case NOT_IN_OLD_ASCII_CHAR_CLASS:
8660Sstevel@tonic-gate
8670Sstevel@tonic-gate /*
8680Sstevel@tonic-gate * encoded as <IN_OLD_ASCII_CHAR_CLASS><class_length><class...>
8690Sstevel@tonic-gate * or <NOT_IN_OLD_ASCII_CHAR_CLASS><class_length><class...>
8700Sstevel@tonic-gate *
8710Sstevel@tonic-gate * NOTE: <class_length> includes the <class_length> byte
8720Sstevel@tonic-gate */
8730Sstevel@tonic-gate
8740Sstevel@tonic-gate if ((int)*regexp == (int)IN_OLD_ASCII_CHAR_CLASS) {
8750Sstevel@tonic-gate test_condition = IN_CLASS;
8760Sstevel@tonic-gate } else {
8770Sstevel@tonic-gate test_condition = NOT_IN_CLASS;
8780Sstevel@tonic-gate }
8790Sstevel@tonic-gate regexp++; /* point to the <class_length> byte */
8800Sstevel@tonic-gate
8810Sstevel@tonic-gate if ((*stringp != '\0') &&
8820Sstevel@tonic-gate (test_char_against_old_ascii_class(*stringp, regexp,
8830Sstevel@tonic-gate test_condition) == CONDITION_TRUE)) {
8840Sstevel@tonic-gate regexp += (int)*regexp; /* add the class length to regexp */
8850Sstevel@tonic-gate stringp++;
8860Sstevel@tonic-gate } else {
8870Sstevel@tonic-gate return ((char *)0);
8880Sstevel@tonic-gate }
8890Sstevel@tonic-gate break; /* end case [NOT_]IN_OLD_ASCII_CHAR_CLASS */
8900Sstevel@tonic-gate
8910Sstevel@tonic-gate case SIMPLE_GROUP: /* (.....) */
8920Sstevel@tonic-gate
8930Sstevel@tonic-gate /* encoded as <SIMPLE_GROUP><group_length> */
8940Sstevel@tonic-gate
8950Sstevel@tonic-gate regexp += 2;
8960Sstevel@tonic-gate break; /* end case SIMPLE_GROUP */
8970Sstevel@tonic-gate
8980Sstevel@tonic-gate case END_GROUP: /* (.....) */
8990Sstevel@tonic-gate
9000Sstevel@tonic-gate /* encoded as <END_GROUP><groupn> */
9010Sstevel@tonic-gate
9020Sstevel@tonic-gate regexp += 2;
9030Sstevel@tonic-gate break; /* end case END_GROUP */
9040Sstevel@tonic-gate
9050Sstevel@tonic-gate case SAVED_GROUP: /* (.....)$0-9 */
9060Sstevel@tonic-gate
9070Sstevel@tonic-gate /* encoded as <SAVED_GROUP><substringn> */
9080Sstevel@tonic-gate
9090Sstevel@tonic-gate regexp++;
9100Sstevel@tonic-gate substringn = (unsigned int)*regexp;
9110Sstevel@tonic-gate if (substringn >= NSUBSTRINGS)
9120Sstevel@tonic-gate return ((char *)0);
9130Sstevel@tonic-gate substring_startp[substringn] = stringp;
9140Sstevel@tonic-gate regexp++;
9150Sstevel@tonic-gate break; /* end case SAVED_GROUP */
9160Sstevel@tonic-gate
9170Sstevel@tonic-gate case END_SAVED_GROUP: /* (.....)$0-9 */
9180Sstevel@tonic-gate
9190Sstevel@tonic-gate /*
9200Sstevel@tonic-gate * encoded as <END_SAVED_GROUP><substringn>\
9210Sstevel@tonic-gate * <return_arg_number[substringn]>
9220Sstevel@tonic-gate */
9230Sstevel@tonic-gate
9240Sstevel@tonic-gate regexp++;
9250Sstevel@tonic-gate substringn = (unsigned int)*regexp;
9260Sstevel@tonic-gate if (substringn >= NSUBSTRINGS)
9270Sstevel@tonic-gate return ((char *)0);
9280Sstevel@tonic-gate substring_endp[substringn] = stringp;
9290Sstevel@tonic-gate regexp++;
9300Sstevel@tonic-gate return_argn = (unsigned int)*regexp;
9310Sstevel@tonic-gate if (return_argn >= NSUBSTRINGS)
9320Sstevel@tonic-gate return ((char *)0);
9330Sstevel@tonic-gate return_arg_number[substringn] = return_argn;
9340Sstevel@tonic-gate regexp++;
9350Sstevel@tonic-gate break; /* end case END_SAVED_GROUP */
9360Sstevel@tonic-gate
9370Sstevel@tonic-gate case ASCII_CHAR|ZERO_OR_MORE: /* char* */
9380Sstevel@tonic-gate
9390Sstevel@tonic-gate /* encoded as <ASCII_CHAR|ZERO_OR_MORE><ascii_char> */
9400Sstevel@tonic-gate
9410Sstevel@tonic-gate regexp++;
9420Sstevel@tonic-gate repeat_startp = stringp;
9430Sstevel@tonic-gate while (*stringp == *regexp) {
9440Sstevel@tonic-gate stringp++;
9450Sstevel@tonic-gate }
9460Sstevel@tonic-gate regexp++;
9470Sstevel@tonic-gate return (test_repeated_ascii_char(repeat_startp,
9480Sstevel@tonic-gate stringp, regexp));
9490Sstevel@tonic-gate
9500Sstevel@tonic-gate /* end case ASCII_CHAR|ZERO_OR_MORE */
9510Sstevel@tonic-gate
9520Sstevel@tonic-gate case ASCII_CHAR|ONE_OR_MORE: /* char+ */
9530Sstevel@tonic-gate
9540Sstevel@tonic-gate /* encoded as <ASCII_CHAR|ONE_OR_MORE><ascii_char> */
9550Sstevel@tonic-gate
9560Sstevel@tonic-gate regexp++;
9570Sstevel@tonic-gate if (*stringp != *regexp) {
9580Sstevel@tonic-gate return ((char *)0);
9590Sstevel@tonic-gate } else {
9600Sstevel@tonic-gate stringp++;
9610Sstevel@tonic-gate repeat_startp = stringp;
9620Sstevel@tonic-gate while (*stringp == *regexp) {
9630Sstevel@tonic-gate stringp++;
9640Sstevel@tonic-gate }
9650Sstevel@tonic-gate regexp++;
9660Sstevel@tonic-gate return (test_repeated_ascii_char(repeat_startp, stringp,
9670Sstevel@tonic-gate regexp));
9680Sstevel@tonic-gate }
9690Sstevel@tonic-gate /* end case ASCII_CHAR|ONE_OR_MORE */
9700Sstevel@tonic-gate
9710Sstevel@tonic-gate case ASCII_CHAR|COUNT: /* char{min_count,max_count} */
9720Sstevel@tonic-gate
9730Sstevel@tonic-gate /*
9740Sstevel@tonic-gate * encoded as <ASCII_CHAR|COUNT><ascii_char>\
9750Sstevel@tonic-gate * <minimum_match_count><maximum_match_count>
9760Sstevel@tonic-gate */
9770Sstevel@tonic-gate
9780Sstevel@tonic-gate regexp++;
9790Sstevel@tonic-gate get_match_counts(&nmust_match, &nextra_matches_allowed,
9800Sstevel@tonic-gate regexp + 1);
9810Sstevel@tonic-gate while ((*stringp == *regexp) && (nmust_match > 0)) {
9820Sstevel@tonic-gate nmust_match--;
9830Sstevel@tonic-gate stringp++;
9840Sstevel@tonic-gate }
9850Sstevel@tonic-gate if (nmust_match > 0) {
9860Sstevel@tonic-gate return ((char *)0);
9870Sstevel@tonic-gate } else if (nextra_matches_allowed == UNLIMITED) {
9880Sstevel@tonic-gate repeat_startp = stringp;
9890Sstevel@tonic-gate while (*stringp == *regexp) {
9900Sstevel@tonic-gate stringp++;
9910Sstevel@tonic-gate }
9920Sstevel@tonic-gate regexp += 3;
9930Sstevel@tonic-gate return (test_repeated_ascii_char(repeat_startp, stringp,
9940Sstevel@tonic-gate regexp));
9950Sstevel@tonic-gate } else {
9960Sstevel@tonic-gate repeat_startp = stringp;
9970Sstevel@tonic-gate while ((*stringp == *regexp) &&
9980Sstevel@tonic-gate (nextra_matches_allowed > 0)) {
9990Sstevel@tonic-gate nextra_matches_allowed--;
10000Sstevel@tonic-gate stringp++;
10010Sstevel@tonic-gate }
10020Sstevel@tonic-gate regexp += 3;
10030Sstevel@tonic-gate return (test_repeated_ascii_char(repeat_startp, stringp,
10040Sstevel@tonic-gate regexp));
10050Sstevel@tonic-gate }
10060Sstevel@tonic-gate /* end case ASCII_CHAR|COUNT */
10070Sstevel@tonic-gate
10080Sstevel@tonic-gate case MULTIBYTE_CHAR|ZERO_OR_MORE: /* char* */
10090Sstevel@tonic-gate
10100Sstevel@tonic-gate /* encoded as <MULTIBYTE_CHAR|ZERO_OR_MORE><multibyte_char> */
10110Sstevel@tonic-gate
10120Sstevel@tonic-gate regexp++;
10130Sstevel@tonic-gate regex_char_size = get_wchar(®ex_wchar, regexp);
10140Sstevel@tonic-gate repeat_startp = stringp;
10150Sstevel@tonic-gate string_char_size = get_wchar(&string_wchar, stringp);
10160Sstevel@tonic-gate while ((string_char_size > 0) &&
10170Sstevel@tonic-gate (string_wchar == regex_wchar)) {
10180Sstevel@tonic-gate stringp += string_char_size;
10190Sstevel@tonic-gate string_char_size = get_wchar(&string_wchar, stringp);
10200Sstevel@tonic-gate }
10210Sstevel@tonic-gate regexp += regex_char_size;
10220Sstevel@tonic-gate return (test_repeated_multibyte_char(repeat_startp, stringp,
10230Sstevel@tonic-gate regexp));
10240Sstevel@tonic-gate
10250Sstevel@tonic-gate /* end case MULTIBYTE_CHAR|ZERO_OR_MORE */
10260Sstevel@tonic-gate
10270Sstevel@tonic-gate case MULTIBYTE_CHAR|ONE_OR_MORE: /* char+ */
10280Sstevel@tonic-gate
10290Sstevel@tonic-gate /* encoded as <MULTIBYTE_CHAR|ONE_OR_MORE><multibyte_char> */
10300Sstevel@tonic-gate
10310Sstevel@tonic-gate regexp++;
10320Sstevel@tonic-gate regex_char_size = get_wchar(®ex_wchar, regexp);
10330Sstevel@tonic-gate string_char_size = get_wchar(&string_wchar, stringp);
10340Sstevel@tonic-gate if ((string_char_size <= 0) || (string_wchar != regex_wchar)) {
10350Sstevel@tonic-gate return ((char *)0);
10360Sstevel@tonic-gate } else {
10370Sstevel@tonic-gate stringp += string_char_size;
10380Sstevel@tonic-gate repeat_startp = stringp;
10390Sstevel@tonic-gate string_char_size = get_wchar(&string_wchar, stringp);
10400Sstevel@tonic-gate while ((string_char_size > 0) &&
10410Sstevel@tonic-gate (string_wchar == regex_wchar)) {
10420Sstevel@tonic-gate stringp += string_char_size;
10430Sstevel@tonic-gate string_char_size = get_wchar(&string_wchar, stringp);
10440Sstevel@tonic-gate }
10450Sstevel@tonic-gate regexp += regex_char_size;
10460Sstevel@tonic-gate return (test_repeated_multibyte_char(repeat_startp, stringp,
10470Sstevel@tonic-gate regexp));
10480Sstevel@tonic-gate }
10490Sstevel@tonic-gate /* end case MULTIBYTE_CHAR|ONE_OR_MORE */
10500Sstevel@tonic-gate
10510Sstevel@tonic-gate case MULTIBYTE_CHAR|COUNT: /* char{min_count,max_count} */
10520Sstevel@tonic-gate
10530Sstevel@tonic-gate /*
10540Sstevel@tonic-gate * encoded as <MULTIBYTE_CHAR|COUNT><multibyte_char>\
10550Sstevel@tonic-gate * <minimum_match_count><maximum_match_count>
10560Sstevel@tonic-gate */
10570Sstevel@tonic-gate
10580Sstevel@tonic-gate regexp++;
10590Sstevel@tonic-gate regex_char_size = get_wchar(®ex_wchar, regexp);
10600Sstevel@tonic-gate get_match_counts(&nmust_match, &nextra_matches_allowed,
10610Sstevel@tonic-gate regexp + regex_char_size);
10620Sstevel@tonic-gate string_char_size = get_wchar(&string_wchar, stringp);
10630Sstevel@tonic-gate while ((string_char_size > 0) &&
10640Sstevel@tonic-gate (string_wchar == regex_wchar) &&
10650Sstevel@tonic-gate (nmust_match > 0)) {
10660Sstevel@tonic-gate
10670Sstevel@tonic-gate nmust_match--;
10680Sstevel@tonic-gate stringp += string_char_size;
10690Sstevel@tonic-gate string_char_size = get_wchar(&string_wchar, stringp);
10700Sstevel@tonic-gate }
10710Sstevel@tonic-gate if (nmust_match > 0) {
10720Sstevel@tonic-gate return ((char *)0);
10730Sstevel@tonic-gate } else if (nextra_matches_allowed == UNLIMITED) {
10740Sstevel@tonic-gate repeat_startp = stringp;
10750Sstevel@tonic-gate while ((string_char_size > 0) &&
10760Sstevel@tonic-gate (string_wchar == regex_wchar)) {
10770Sstevel@tonic-gate stringp += string_char_size;
10780Sstevel@tonic-gate string_char_size = get_wchar(&string_wchar, stringp);
10790Sstevel@tonic-gate }
10800Sstevel@tonic-gate regexp += regex_char_size + 2;
10810Sstevel@tonic-gate return (test_repeated_multibyte_char(repeat_startp, stringp,
10820Sstevel@tonic-gate regexp));
10830Sstevel@tonic-gate } else {
10840Sstevel@tonic-gate repeat_startp = stringp;
10850Sstevel@tonic-gate while ((string_char_size > 0) &&
10860Sstevel@tonic-gate (string_wchar == regex_wchar) &&
10870Sstevel@tonic-gate (nextra_matches_allowed > 0)) {
10880Sstevel@tonic-gate nextra_matches_allowed--;
10890Sstevel@tonic-gate stringp += string_char_size;
10900Sstevel@tonic-gate string_char_size = get_wchar(&string_wchar, stringp);
10910Sstevel@tonic-gate }
10920Sstevel@tonic-gate regexp += regex_char_size + 2;
10930Sstevel@tonic-gate return (test_repeated_multibyte_char(repeat_startp, stringp,
10940Sstevel@tonic-gate regexp));
10950Sstevel@tonic-gate }
10960Sstevel@tonic-gate /* end case MULTIBYTE_CHAR|COUNT */
10970Sstevel@tonic-gate
10980Sstevel@tonic-gate case ANY_CHAR|ZERO_OR_MORE: /* .* */
10990Sstevel@tonic-gate
11000Sstevel@tonic-gate /* encoded as <ANY_CHAR|ZERO_OR_MORE> */
11010Sstevel@tonic-gate
11020Sstevel@tonic-gate repeat_startp = stringp;
11030Sstevel@tonic-gate if (!multibyte) {
11040Sstevel@tonic-gate while (*stringp != '\0') {
11050Sstevel@tonic-gate stringp++;
11060Sstevel@tonic-gate }
11070Sstevel@tonic-gate regexp++;
11080Sstevel@tonic-gate return (test_repeated_ascii_char(repeat_startp, stringp,
11090Sstevel@tonic-gate regexp));
11100Sstevel@tonic-gate } else {
11110Sstevel@tonic-gate string_char_size = get_wchar(&string_wchar, stringp);
11120Sstevel@tonic-gate while (string_char_size > 0) {
11130Sstevel@tonic-gate stringp += string_char_size;
11140Sstevel@tonic-gate string_char_size = get_wchar(&string_wchar, stringp);
11150Sstevel@tonic-gate }
11160Sstevel@tonic-gate regexp++;
11170Sstevel@tonic-gate return (test_repeated_multibyte_char(repeat_startp, stringp,
11180Sstevel@tonic-gate regexp));
11190Sstevel@tonic-gate }
11200Sstevel@tonic-gate /* end case <ANY_CHAR|ZERO_OR_MORE> */
11210Sstevel@tonic-gate
11220Sstevel@tonic-gate case ANY_CHAR|ONE_OR_MORE: /* .+ */
11230Sstevel@tonic-gate
11240Sstevel@tonic-gate /* encoded as <ANY_CHAR|ONE_OR_MORE> */
11250Sstevel@tonic-gate
11260Sstevel@tonic-gate if (!multibyte) {
11270Sstevel@tonic-gate if (*stringp == '\0') {
11280Sstevel@tonic-gate return ((char *)0);
11290Sstevel@tonic-gate } else {
11300Sstevel@tonic-gate stringp++;
11310Sstevel@tonic-gate repeat_startp = stringp;
11320Sstevel@tonic-gate while (*stringp != '\0') {
11330Sstevel@tonic-gate stringp++;
11340Sstevel@tonic-gate }
11350Sstevel@tonic-gate regexp++;
11360Sstevel@tonic-gate return (test_repeated_ascii_char(repeat_startp, stringp,
11370Sstevel@tonic-gate regexp));
11380Sstevel@tonic-gate }
11390Sstevel@tonic-gate } else {
11400Sstevel@tonic-gate string_char_size = get_wchar(&string_wchar, stringp);
11410Sstevel@tonic-gate if (string_char_size <= 0) {
11420Sstevel@tonic-gate return ((char *)0);
11430Sstevel@tonic-gate } else {
11440Sstevel@tonic-gate stringp += string_char_size;
11450Sstevel@tonic-gate repeat_startp = stringp;
11460Sstevel@tonic-gate string_char_size = get_wchar(&string_wchar, stringp);
11470Sstevel@tonic-gate while (string_char_size > 0) {
11480Sstevel@tonic-gate stringp += string_char_size;
11490Sstevel@tonic-gate string_char_size =
11500Sstevel@tonic-gate get_wchar(&string_wchar, stringp);
11510Sstevel@tonic-gate }
11520Sstevel@tonic-gate regexp++;
11530Sstevel@tonic-gate return (test_repeated_multibyte_char(repeat_startp,
11540Sstevel@tonic-gate stringp, regexp));
11550Sstevel@tonic-gate }
11560Sstevel@tonic-gate }
11570Sstevel@tonic-gate /* end case <ANY_CHAR|ONE_OR_MORE> */
11580Sstevel@tonic-gate
11590Sstevel@tonic-gate case ANY_CHAR|COUNT: /* .{min_count,max_count} */
11600Sstevel@tonic-gate
11610Sstevel@tonic-gate /*
11620Sstevel@tonic-gate * encoded as <ANY_CHAR|COUNT>\
11630Sstevel@tonic-gate * <minimum_match_count><maximum_match_count>
11640Sstevel@tonic-gate */
11650Sstevel@tonic-gate
11660Sstevel@tonic-gate get_match_counts(&nmust_match, &nextra_matches_allowed,
11670Sstevel@tonic-gate regexp + 1);
11680Sstevel@tonic-gate if (!multibyte) {
11690Sstevel@tonic-gate while ((*stringp != '\0') && (nmust_match > 0)) {
11700Sstevel@tonic-gate nmust_match--;
11710Sstevel@tonic-gate stringp++;
11720Sstevel@tonic-gate }
11730Sstevel@tonic-gate if (nmust_match > 0) {
11740Sstevel@tonic-gate return ((char *)0);
11750Sstevel@tonic-gate } else if (nextra_matches_allowed == UNLIMITED) {
11760Sstevel@tonic-gate repeat_startp = stringp;
11770Sstevel@tonic-gate while (*stringp != '\0') {
11780Sstevel@tonic-gate stringp++;
11790Sstevel@tonic-gate }
11800Sstevel@tonic-gate regexp += 3;
11810Sstevel@tonic-gate return (test_repeated_ascii_char(repeat_startp, stringp,
11820Sstevel@tonic-gate regexp));
11830Sstevel@tonic-gate } else {
11840Sstevel@tonic-gate repeat_startp = stringp;
11850Sstevel@tonic-gate while ((*stringp != '\0') &&
11860Sstevel@tonic-gate (nextra_matches_allowed > 0)) {
11870Sstevel@tonic-gate nextra_matches_allowed--;
11880Sstevel@tonic-gate stringp++;
11890Sstevel@tonic-gate }
11900Sstevel@tonic-gate regexp += 3;
11910Sstevel@tonic-gate return (test_repeated_ascii_char(repeat_startp, stringp,
11920Sstevel@tonic-gate regexp));
11930Sstevel@tonic-gate }
11940Sstevel@tonic-gate } else { /* multibyte character */
11950Sstevel@tonic-gate
11960Sstevel@tonic-gate string_char_size = get_wchar(&string_wchar, stringp);
11970Sstevel@tonic-gate while ((string_char_size > 0) && (nmust_match > 0)) {
11980Sstevel@tonic-gate nmust_match--;
11990Sstevel@tonic-gate stringp += string_char_size;
12000Sstevel@tonic-gate string_char_size = get_wchar(&string_wchar, stringp);
12010Sstevel@tonic-gate }
12020Sstevel@tonic-gate if (nmust_match > 0) {
12030Sstevel@tonic-gate return ((char *)0);
12040Sstevel@tonic-gate } else if (nextra_matches_allowed == UNLIMITED) {
12050Sstevel@tonic-gate repeat_startp = stringp;
12060Sstevel@tonic-gate while (string_char_size > 0) {
12070Sstevel@tonic-gate stringp += string_char_size;
12080Sstevel@tonic-gate string_char_size =
12090Sstevel@tonic-gate get_wchar(&string_wchar, stringp);
12100Sstevel@tonic-gate }
12110Sstevel@tonic-gate regexp += 3;
12120Sstevel@tonic-gate return (test_repeated_multibyte_char(repeat_startp,
12130Sstevel@tonic-gate stringp, regexp));
12140Sstevel@tonic-gate } else {
12150Sstevel@tonic-gate repeat_startp = stringp;
12160Sstevel@tonic-gate while ((string_char_size > 0) &&
12170Sstevel@tonic-gate (nextra_matches_allowed > 0)) {
12180Sstevel@tonic-gate nextra_matches_allowed--;
12190Sstevel@tonic-gate stringp += string_char_size;
12200Sstevel@tonic-gate string_char_size =
12210Sstevel@tonic-gate get_wchar(&string_wchar, stringp);
12220Sstevel@tonic-gate }
12230Sstevel@tonic-gate regexp += 3;
12240Sstevel@tonic-gate return (test_repeated_multibyte_char(repeat_startp,
12250Sstevel@tonic-gate stringp, regexp));
12260Sstevel@tonic-gate }
12270Sstevel@tonic-gate } /* end case ANY_CHAR|COUNT */
12280Sstevel@tonic-gate
12290Sstevel@tonic-gate case IN_ASCII_CHAR_CLASS|ZERO_OR_MORE: /* [.....]* */
12300Sstevel@tonic-gate case NOT_IN_ASCII_CHAR_CLASS|ZERO_OR_MORE:
12310Sstevel@tonic-gate
12320Sstevel@tonic-gate /*
12330Sstevel@tonic-gate * encoded as <IN_ASCII_CHAR_CLASS|ZERO_OR_MORE>\
12340Sstevel@tonic-gate * <class_length><class ...>
12350Sstevel@tonic-gate * or <NOT_IN_ASCII_CHAR_CLASS|ZERO_OR_MORE>\
12360Sstevel@tonic-gate * <class_length><class ...>
12370Sstevel@tonic-gate *
12380Sstevel@tonic-gate * NOTE: <class_length> includes the <class_length> byte
12390Sstevel@tonic-gate */
12400Sstevel@tonic-gate
12410Sstevel@tonic-gate if ((int)*regexp == (int)(IN_ASCII_CHAR_CLASS|ZERO_OR_MORE)) {
12420Sstevel@tonic-gate test_condition = IN_CLASS;
12430Sstevel@tonic-gate } else {
12440Sstevel@tonic-gate test_condition = NOT_IN_CLASS;
12450Sstevel@tonic-gate }
12460Sstevel@tonic-gate regexp++; /* point to the <class_length> byte */
12470Sstevel@tonic-gate
12480Sstevel@tonic-gate repeat_startp = stringp;
12490Sstevel@tonic-gate while ((*stringp != '\0') &&
12500Sstevel@tonic-gate (test_char_against_ascii_class(*stringp, regexp,
12510Sstevel@tonic-gate test_condition) == CONDITION_TRUE)) {
12520Sstevel@tonic-gate stringp++;
12530Sstevel@tonic-gate }
12540Sstevel@tonic-gate regexp += (int)*regexp; /* add the class length to regexp */
12550Sstevel@tonic-gate return (test_repeated_ascii_char(repeat_startp, stringp,
12560Sstevel@tonic-gate regexp));
12570Sstevel@tonic-gate
12580Sstevel@tonic-gate /* end case IN_ASCII_CHAR_CLASS|ZERO_OR_MORE */
12590Sstevel@tonic-gate
12600Sstevel@tonic-gate case IN_ASCII_CHAR_CLASS|ONE_OR_MORE: /* [.....]+ */
12610Sstevel@tonic-gate case NOT_IN_ASCII_CHAR_CLASS|ONE_OR_MORE:
12620Sstevel@tonic-gate
12630Sstevel@tonic-gate /*
12640Sstevel@tonic-gate * encoded as <IN_ASCII_CHAR_CLASS|ONE_OR_MORE>\
12650Sstevel@tonic-gate * <class_length><class ...>
12660Sstevel@tonic-gate * or <NOT_IN_ASCII_CHAR_CLASS|ONE_OR_MORE>\
12670Sstevel@tonic-gate * <class_length><class ...>
12680Sstevel@tonic-gate *
12690Sstevel@tonic-gate * NOTE: <class_length> includes the <class_length> byte
12700Sstevel@tonic-gate */
12710Sstevel@tonic-gate
12720Sstevel@tonic-gate if ((int)*regexp == (int)(IN_ASCII_CHAR_CLASS|ONE_OR_MORE)) {
12730Sstevel@tonic-gate test_condition = IN_CLASS;
12740Sstevel@tonic-gate } else {
12750Sstevel@tonic-gate test_condition = NOT_IN_CLASS;
12760Sstevel@tonic-gate }
12770Sstevel@tonic-gate regexp++; /* point to the <class_length> byte */
12780Sstevel@tonic-gate
12790Sstevel@tonic-gate if ((*stringp == '\0') ||
12800Sstevel@tonic-gate (test_char_against_ascii_class(*stringp, regexp,
12810Sstevel@tonic-gate test_condition) != CONDITION_TRUE)) {
12820Sstevel@tonic-gate return ((char *)0);
12830Sstevel@tonic-gate } else {
12840Sstevel@tonic-gate stringp++;
12850Sstevel@tonic-gate repeat_startp = stringp;
12860Sstevel@tonic-gate while ((*stringp != '\0') &&
12870Sstevel@tonic-gate (test_char_against_ascii_class(*stringp, regexp,
12880Sstevel@tonic-gate test_condition) == CONDITION_TRUE)) {
12890Sstevel@tonic-gate stringp++;
12900Sstevel@tonic-gate }
12910Sstevel@tonic-gate regexp += (int)*regexp; /* add the class length to regexp */
12920Sstevel@tonic-gate return (test_repeated_ascii_char(repeat_startp, stringp,
12930Sstevel@tonic-gate regexp));
12940Sstevel@tonic-gate }
12950Sstevel@tonic-gate /* end case IN_ASCII_CHAR_CLASS|ONE_OR_MORE */
12960Sstevel@tonic-gate
12970Sstevel@tonic-gate case IN_ASCII_CHAR_CLASS | COUNT: /* [.....]{max_count,min_count} */
12980Sstevel@tonic-gate case NOT_IN_ASCII_CHAR_CLASS | COUNT:
12990Sstevel@tonic-gate
13000Sstevel@tonic-gate /*
13010Sstevel@tonic-gate * endoded as <IN_ASCII_CHAR_CLASS|COUNT><class_length>\
13020Sstevel@tonic-gate * <class ...><minimum_match_count>\
13030Sstevel@tonic-gate * <maximum_match_count>
13040Sstevel@tonic-gate * or <NOT_IN_ASCII_CHAR_CLASS|COUNT><class_length>\
13050Sstevel@tonic-gate * <class ...><minimum_match_count>\
13060Sstevel@tonic-gate * <maximum_match_count>
13070Sstevel@tonic-gate *
13080Sstevel@tonic-gate * NOTE: <class_length> includes the <class_length> byte,
13090Sstevel@tonic-gate * but not the <minimum_match_count> or
13100Sstevel@tonic-gate * <maximum_match_count> bytes
13110Sstevel@tonic-gate */
13120Sstevel@tonic-gate
13130Sstevel@tonic-gate if ((int)*regexp == (int)(IN_ASCII_CHAR_CLASS|COUNT)) {
13140Sstevel@tonic-gate test_condition = IN_CLASS;
13150Sstevel@tonic-gate } else {
13160Sstevel@tonic-gate test_condition = NOT_IN_CLASS;
13170Sstevel@tonic-gate }
13180Sstevel@tonic-gate regexp++; /* point to the <class_length> byte */
13190Sstevel@tonic-gate
13200Sstevel@tonic-gate get_match_counts(&nmust_match, &nextra_matches_allowed,
13210Sstevel@tonic-gate regexp + (int)*regexp);
13220Sstevel@tonic-gate while ((*stringp != '\0') &&
13230Sstevel@tonic-gate (test_char_against_ascii_class(*stringp, regexp,
13240Sstevel@tonic-gate test_condition) == CONDITION_TRUE) &&
13250Sstevel@tonic-gate (nmust_match > 0)) {
13260Sstevel@tonic-gate nmust_match--;
13270Sstevel@tonic-gate stringp++;
13280Sstevel@tonic-gate }
13290Sstevel@tonic-gate if (nmust_match > 0) {
13300Sstevel@tonic-gate return ((char *)0);
13310Sstevel@tonic-gate } else if (nextra_matches_allowed == UNLIMITED) {
13320Sstevel@tonic-gate repeat_startp = stringp;
13330Sstevel@tonic-gate while ((*stringp != '\0') &&
13340Sstevel@tonic-gate (test_char_against_ascii_class(*stringp, regexp,
13350Sstevel@tonic-gate test_condition) == CONDITION_TRUE)) {
13360Sstevel@tonic-gate stringp++;
13370Sstevel@tonic-gate }
13380Sstevel@tonic-gate regexp += (int)*regexp + 2;
13390Sstevel@tonic-gate return (test_repeated_ascii_char(repeat_startp, stringp,
13400Sstevel@tonic-gate regexp));
13410Sstevel@tonic-gate } else {
13420Sstevel@tonic-gate repeat_startp = stringp;
13430Sstevel@tonic-gate while ((*stringp != '\0') &&
13440Sstevel@tonic-gate (test_char_against_ascii_class(*stringp, regexp,
13450Sstevel@tonic-gate test_condition) == CONDITION_TRUE) &&
13460Sstevel@tonic-gate (nextra_matches_allowed > 0)) {
13470Sstevel@tonic-gate nextra_matches_allowed--;
13480Sstevel@tonic-gate stringp++;
13490Sstevel@tonic-gate }
13500Sstevel@tonic-gate regexp += (int)*regexp + 2;
13510Sstevel@tonic-gate return (test_repeated_ascii_char(repeat_startp, stringp,
13520Sstevel@tonic-gate regexp));
13530Sstevel@tonic-gate }
13540Sstevel@tonic-gate /* end case IN_ASCII_CHAR_CLASS|COUNT */
13550Sstevel@tonic-gate
13560Sstevel@tonic-gate case IN_MULTIBYTE_CHAR_CLASS|ZERO_OR_MORE: /* [.....]* */
13570Sstevel@tonic-gate case NOT_IN_MULTIBYTE_CHAR_CLASS|ZERO_OR_MORE:
13580Sstevel@tonic-gate
13590Sstevel@tonic-gate /*
13600Sstevel@tonic-gate * encoded as <IN_MULTIBYTE_CHAR_CLASS|ZERO_OR_MORE>\
13610Sstevel@tonic-gate * <class_length><class ...>
13620Sstevel@tonic-gate * or <NOT_IN_MULTIBYTE_CHAR_CLASS|ZERO_OR_MORE>\
13630Sstevel@tonic-gate * <class_length><class ...>
13640Sstevel@tonic-gate *
13650Sstevel@tonic-gate * NOTE: <class_length> includes the <class_length> byte
13660Sstevel@tonic-gate */
13670Sstevel@tonic-gate
13680Sstevel@tonic-gate if ((int)*regexp ==
13690Sstevel@tonic-gate (int)(IN_MULTIBYTE_CHAR_CLASS|ZERO_OR_MORE)) {
13700Sstevel@tonic-gate test_condition = IN_CLASS;
13710Sstevel@tonic-gate } else {
13720Sstevel@tonic-gate test_condition = NOT_IN_CLASS;
13730Sstevel@tonic-gate }
13740Sstevel@tonic-gate regexp++; /* point to the <class_length> byte */
13750Sstevel@tonic-gate
13760Sstevel@tonic-gate repeat_startp = stringp;
13770Sstevel@tonic-gate string_char_size = get_wchar(&string_wchar, stringp);
13780Sstevel@tonic-gate while ((string_char_size > 0) &&
13790Sstevel@tonic-gate (test_char_against_multibyte_class(string_wchar, regexp,
13800Sstevel@tonic-gate test_condition) == CONDITION_TRUE)) {
13810Sstevel@tonic-gate stringp += string_char_size;
13820Sstevel@tonic-gate string_char_size = get_wchar(&string_wchar, stringp);
13830Sstevel@tonic-gate }
13840Sstevel@tonic-gate regexp += (int)*regexp; /* add the class length to regexp */
13850Sstevel@tonic-gate return (test_repeated_multibyte_char(repeat_startp, stringp,
13860Sstevel@tonic-gate regexp));
13870Sstevel@tonic-gate
13880Sstevel@tonic-gate /* end case IN_MULTIBYTE_CHAR_CLASS|ZERO_OR_MORE */
13890Sstevel@tonic-gate
13900Sstevel@tonic-gate case IN_MULTIBYTE_CHAR_CLASS|ONE_OR_MORE: /* [.....]+ */
13910Sstevel@tonic-gate case NOT_IN_MULTIBYTE_CHAR_CLASS|ONE_OR_MORE:
13920Sstevel@tonic-gate
13930Sstevel@tonic-gate /*
13940Sstevel@tonic-gate * encoded as <IN_MULTIBYTE_CHAR_CLASS|ONE_OR_MORE>\
13950Sstevel@tonic-gate * <class_length><class ...>
13960Sstevel@tonic-gate * or <NOT_IN_MULTIBYTE_CHAR_CLASS|ONE_OR_MORE>\
13970Sstevel@tonic-gate * <class_length><class ...>
13980Sstevel@tonic-gate *
13990Sstevel@tonic-gate * NOTE: <class_length> includes the <class_length> byte
14000Sstevel@tonic-gate */
14010Sstevel@tonic-gate
14020Sstevel@tonic-gate if ((int)*regexp ==
14030Sstevel@tonic-gate (int)(IN_MULTIBYTE_CHAR_CLASS|ONE_OR_MORE)) {
14040Sstevel@tonic-gate test_condition = IN_CLASS;
14050Sstevel@tonic-gate } else {
14060Sstevel@tonic-gate test_condition = NOT_IN_CLASS;
14070Sstevel@tonic-gate }
14080Sstevel@tonic-gate regexp++; /* point to the <class_length> byte */
14090Sstevel@tonic-gate
14100Sstevel@tonic-gate string_char_size = get_wchar(&string_wchar, stringp);
14110Sstevel@tonic-gate if ((string_char_size <= 0) ||
14120Sstevel@tonic-gate (test_char_against_multibyte_class(string_wchar, regexp,
14130Sstevel@tonic-gate test_condition) != CONDITION_TRUE)) {
14140Sstevel@tonic-gate return ((char *)0);
14150Sstevel@tonic-gate } else {
14160Sstevel@tonic-gate stringp += string_char_size;
14170Sstevel@tonic-gate repeat_startp = stringp;
14180Sstevel@tonic-gate string_char_size = get_wchar(&string_wchar, stringp);
14190Sstevel@tonic-gate while ((string_char_size > 0) &&
14200Sstevel@tonic-gate (test_char_against_multibyte_class(string_wchar,
14210Sstevel@tonic-gate regexp, test_condition) == CONDITION_TRUE)) {
14220Sstevel@tonic-gate stringp += string_char_size;
14230Sstevel@tonic-gate string_char_size = get_wchar(&string_wchar, stringp);
14240Sstevel@tonic-gate }
14250Sstevel@tonic-gate regexp += (int)*regexp; /* add the class length to regexp */
14260Sstevel@tonic-gate return (test_repeated_multibyte_char(repeat_startp, stringp,
14270Sstevel@tonic-gate regexp));
14280Sstevel@tonic-gate }
14290Sstevel@tonic-gate /* end case IN_MULTIBYTE_CHAR_CLASS|ONE_OR_MORE */
14300Sstevel@tonic-gate
14310Sstevel@tonic-gate case IN_MULTIBYTE_CHAR_CLASS|COUNT: /* [...]{min_count,max_count} */
14320Sstevel@tonic-gate case NOT_IN_MULTIBYTE_CHAR_CLASS|COUNT:
14330Sstevel@tonic-gate
14340Sstevel@tonic-gate /*
14350Sstevel@tonic-gate * encoded as <IN_MULTIBYTE_CHAR_CLASS|COUNT>\
14360Sstevel@tonic-gate * <class_length><class ...><min_count><max_count>
14370Sstevel@tonic-gate * or <NOT_IN_MULTIBYTE_CHAR_CLASS|COUNT>\
14380Sstevel@tonic-gate * <class_length><class ...><min_count><max_count>
14390Sstevel@tonic-gate *
14400Sstevel@tonic-gate * NOTE: <class_length> includes the <class_length> byte
14410Sstevel@tonic-gate * but not the <minimum_match_count> or
14420Sstevel@tonic-gate * <maximum_match_count> bytes
14430Sstevel@tonic-gate */
14440Sstevel@tonic-gate
14450Sstevel@tonic-gate if ((int)*regexp == (int)(IN_MULTIBYTE_CHAR_CLASS|COUNT)) {
14460Sstevel@tonic-gate test_condition = IN_CLASS;
14470Sstevel@tonic-gate } else {
14480Sstevel@tonic-gate test_condition = NOT_IN_CLASS;
14490Sstevel@tonic-gate }
14500Sstevel@tonic-gate regexp++; /* point to the <class_length> byte */
14510Sstevel@tonic-gate
14520Sstevel@tonic-gate get_match_counts(&nmust_match, &nextra_matches_allowed,
14530Sstevel@tonic-gate regexp + (int)*regexp);
14540Sstevel@tonic-gate string_char_size = get_wchar(&string_wchar, stringp);
14550Sstevel@tonic-gate while ((string_char_size > 0) &&
14560Sstevel@tonic-gate (test_char_against_multibyte_class(string_wchar, regexp,
14570Sstevel@tonic-gate test_condition) == CONDITION_TRUE) &&
14580Sstevel@tonic-gate (nmust_match > 0)) {
14590Sstevel@tonic-gate nmust_match--;
14600Sstevel@tonic-gate stringp += string_char_size;
14610Sstevel@tonic-gate string_char_size = get_wchar(&string_wchar, stringp);
14620Sstevel@tonic-gate }
14630Sstevel@tonic-gate if (nmust_match > 0) {
14640Sstevel@tonic-gate return ((char *)0);
14650Sstevel@tonic-gate } else if (nextra_matches_allowed == UNLIMITED) {
14660Sstevel@tonic-gate repeat_startp = stringp;
14670Sstevel@tonic-gate while ((string_char_size > 0) &&
14680Sstevel@tonic-gate (test_char_against_multibyte_class(string_wchar,
14690Sstevel@tonic-gate regexp, test_condition) == CONDITION_TRUE)) {
14700Sstevel@tonic-gate stringp += string_char_size;
14710Sstevel@tonic-gate string_char_size = get_wchar(&string_wchar, stringp);
14720Sstevel@tonic-gate }
14730Sstevel@tonic-gate regexp += (int)*regexp + 2;
14740Sstevel@tonic-gate return (test_repeated_multibyte_char(repeat_startp, stringp,
14750Sstevel@tonic-gate regexp));
14760Sstevel@tonic-gate } else {
14770Sstevel@tonic-gate repeat_startp = stringp;
14780Sstevel@tonic-gate while ((string_char_size > 0) &&
14790Sstevel@tonic-gate (test_char_against_multibyte_class(string_wchar,
14800Sstevel@tonic-gate regexp, test_condition) == CONDITION_TRUE) &&
14810Sstevel@tonic-gate (nextra_matches_allowed > 0)) {
14820Sstevel@tonic-gate nextra_matches_allowed--;
14830Sstevel@tonic-gate stringp += string_char_size;
14840Sstevel@tonic-gate string_char_size = get_wchar(&string_wchar, stringp);
14850Sstevel@tonic-gate }
14860Sstevel@tonic-gate regexp += (int)*regexp + 2;
14870Sstevel@tonic-gate return (test_repeated_multibyte_char(repeat_startp, stringp,
14880Sstevel@tonic-gate regexp));
14890Sstevel@tonic-gate }
14900Sstevel@tonic-gate /* end case IN_MULTIBYTE_CHAR_CLASS|COUNT */
14910Sstevel@tonic-gate
14920Sstevel@tonic-gate case IN_OLD_ASCII_CHAR_CLASS|ZERO_OR_MORE: /* [.....]* */
14930Sstevel@tonic-gate case NOT_IN_OLD_ASCII_CHAR_CLASS|ZERO_OR_MORE:
14940Sstevel@tonic-gate
14950Sstevel@tonic-gate /*
14960Sstevel@tonic-gate * encoded as <IN_OLD_ASCII_CHAR_CLASS|ZERO_OR_MORE>\
14970Sstevel@tonic-gate * <class_length><class ...>
14980Sstevel@tonic-gate * or <NOT_IN_OLD_ASCII_CHAR_CLASS|ZERO_OR_MORE>\
14990Sstevel@tonic-gate * <class_length><class ...>
15000Sstevel@tonic-gate *
15010Sstevel@tonic-gate * NOTE: <class_length> includes the <class_length> byte
15020Sstevel@tonic-gate */
15030Sstevel@tonic-gate
15040Sstevel@tonic-gate if ((int)*regexp ==
15050Sstevel@tonic-gate (int)(IN_OLD_ASCII_CHAR_CLASS|ZERO_OR_MORE)) {
15060Sstevel@tonic-gate test_condition = IN_CLASS;
15070Sstevel@tonic-gate } else {
15080Sstevel@tonic-gate test_condition = NOT_IN_CLASS;
15090Sstevel@tonic-gate }
15100Sstevel@tonic-gate regexp++; /* point to the <class_length> byte */
15110Sstevel@tonic-gate
15120Sstevel@tonic-gate repeat_startp = stringp;
15130Sstevel@tonic-gate while ((*stringp != '\0') &&
15140Sstevel@tonic-gate (test_char_against_old_ascii_class(*stringp, regexp,
15150Sstevel@tonic-gate test_condition) == CONDITION_TRUE)) {
15160Sstevel@tonic-gate stringp++;
15170Sstevel@tonic-gate }
15180Sstevel@tonic-gate regexp += (int)*regexp; /* add the class length to regexp */
15190Sstevel@tonic-gate return (test_repeated_ascii_char(repeat_startp, stringp,
15200Sstevel@tonic-gate regexp));
15210Sstevel@tonic-gate
15220Sstevel@tonic-gate /* end case IN_OLD_ASCII_CHAR_CLASS|ZERO_OR_MORE */
15230Sstevel@tonic-gate
15240Sstevel@tonic-gate case IN_OLD_ASCII_CHAR_CLASS|ONE_OR_MORE: /* [.....]+ */
15250Sstevel@tonic-gate case NOT_IN_OLD_ASCII_CHAR_CLASS|ONE_OR_MORE:
15260Sstevel@tonic-gate
15270Sstevel@tonic-gate /*
15280Sstevel@tonic-gate * encoded as <IN_OLD_ASCII_CHAR_CLASS|ONE_OR_MORE>\
15290Sstevel@tonic-gate * <class_length><class ...>
15300Sstevel@tonic-gate * or <NOT_IN_OLD_ASCII_CHAR_CLASS|ONE_OR_MORE>\
15310Sstevel@tonic-gate * <class_length><class ...>
15320Sstevel@tonic-gate *
15330Sstevel@tonic-gate * NOTE: <class length> includes the <class_length> byte
15340Sstevel@tonic-gate */
15350Sstevel@tonic-gate
15360Sstevel@tonic-gate if ((int)*regexp ==
15370Sstevel@tonic-gate (int)(IN_OLD_ASCII_CHAR_CLASS|ONE_OR_MORE)) {
15380Sstevel@tonic-gate test_condition = IN_CLASS;
15390Sstevel@tonic-gate } else {
15400Sstevel@tonic-gate test_condition = NOT_IN_CLASS;
15410Sstevel@tonic-gate }
15420Sstevel@tonic-gate regexp++; /* point to the <class_length> byte */
15430Sstevel@tonic-gate
15440Sstevel@tonic-gate if ((*stringp == '\0') ||
15450Sstevel@tonic-gate (test_char_against_old_ascii_class(*stringp, regexp,
15460Sstevel@tonic-gate test_condition) != CONDITION_TRUE)) {
15470Sstevel@tonic-gate return ((char *)0);
15480Sstevel@tonic-gate } else {
15490Sstevel@tonic-gate stringp++;
15500Sstevel@tonic-gate repeat_startp = stringp;
15510Sstevel@tonic-gate while ((*stringp != '\0') &&
15520Sstevel@tonic-gate (test_char_against_old_ascii_class(*stringp, regexp,
15530Sstevel@tonic-gate test_condition) == CONDITION_TRUE)) {
15540Sstevel@tonic-gate stringp++;
15550Sstevel@tonic-gate }
15560Sstevel@tonic-gate regexp += (int)*regexp; /* add the class length to regexp */
15570Sstevel@tonic-gate return (test_repeated_ascii_char(repeat_startp, stringp,
15580Sstevel@tonic-gate regexp));
15590Sstevel@tonic-gate }
15600Sstevel@tonic-gate /* end case IN_OLD_ASCII_CHAR_CLASS | ONE_OR_MORE */
15610Sstevel@tonic-gate
15620Sstevel@tonic-gate case IN_OLD_ASCII_CHAR_CLASS|COUNT: /* [...]{min_count,max_count} */
15630Sstevel@tonic-gate case NOT_IN_OLD_ASCII_CHAR_CLASS|COUNT:
15640Sstevel@tonic-gate
15650Sstevel@tonic-gate /*
15660Sstevel@tonic-gate * encoded as <IN_OLD_ASCII_CHAR_CLASS|COUNT><class_length>\
15670Sstevel@tonic-gate * <class ...><minimum_match_count>\
15680Sstevel@tonic-gate * <maximum_match_count>
15690Sstevel@tonic-gate * or <NOT_IN_OLD_ASCII_CHAR_CLASS|COUNT>\
15700Sstevel@tonic-gate * <class_length><class ...><minimum_match_count>\
15710Sstevel@tonic-gate * <maximum_match_count>
15720Sstevel@tonic-gate *
15730Sstevel@tonic-gate * NOTE: <class_length> includes the <class_length> byte
15740Sstevel@tonic-gate * but not the <minimum_match_count> or
15750Sstevel@tonic-gate * <maximum_match_count> bytes
15760Sstevel@tonic-gate */
15770Sstevel@tonic-gate
15780Sstevel@tonic-gate if ((int)*regexp == (int)(IN_OLD_ASCII_CHAR_CLASS|COUNT)) {
15790Sstevel@tonic-gate test_condition = IN_CLASS;
15800Sstevel@tonic-gate } else {
15810Sstevel@tonic-gate test_condition = NOT_IN_CLASS;
15820Sstevel@tonic-gate }
15830Sstevel@tonic-gate regexp++; /* point to the <class_length> byte */
15840Sstevel@tonic-gate
15850Sstevel@tonic-gate get_match_counts(&nmust_match, &nextra_matches_allowed,
15860Sstevel@tonic-gate regexp + (int)*regexp);
15870Sstevel@tonic-gate while ((*stringp != '\0') &&
15880Sstevel@tonic-gate (test_char_against_old_ascii_class(*stringp, regexp,
15890Sstevel@tonic-gate test_condition) == CONDITION_TRUE) &&
15900Sstevel@tonic-gate (nmust_match > 0)) {
15910Sstevel@tonic-gate nmust_match--;
15920Sstevel@tonic-gate stringp++;
15930Sstevel@tonic-gate }
15940Sstevel@tonic-gate if (nmust_match > 0) {
15950Sstevel@tonic-gate return ((char *)0);
15960Sstevel@tonic-gate } else if (nextra_matches_allowed == UNLIMITED) {
15970Sstevel@tonic-gate repeat_startp = stringp;
15980Sstevel@tonic-gate while ((*stringp != '\0') &&
15990Sstevel@tonic-gate (test_char_against_old_ascii_class(*stringp, regexp,
16000Sstevel@tonic-gate test_condition) == CONDITION_TRUE)) {
16010Sstevel@tonic-gate stringp++;
16020Sstevel@tonic-gate }
16030Sstevel@tonic-gate regexp += (int)*regexp + 2;
16040Sstevel@tonic-gate return (test_repeated_ascii_char(repeat_startp, stringp,
16050Sstevel@tonic-gate regexp));
16060Sstevel@tonic-gate } else {
16070Sstevel@tonic-gate repeat_startp = stringp;
16080Sstevel@tonic-gate while ((*stringp != '\0') &&
16090Sstevel@tonic-gate (test_char_against_old_ascii_class(*stringp, regexp,
16100Sstevel@tonic-gate test_condition) == CONDITION_TRUE) &&
16110Sstevel@tonic-gate (nextra_matches_allowed > 0)) {
16120Sstevel@tonic-gate nextra_matches_allowed--;
16130Sstevel@tonic-gate stringp++;
16140Sstevel@tonic-gate }
16150Sstevel@tonic-gate regexp += (int)*regexp + 2;
16160Sstevel@tonic-gate return (test_repeated_ascii_char(repeat_startp, stringp,
16170Sstevel@tonic-gate regexp));
16180Sstevel@tonic-gate }
16190Sstevel@tonic-gate /* end case IN_OLD_ASCII_CHAR_CLASS|COUNT */
16200Sstevel@tonic-gate
16210Sstevel@tonic-gate case ZERO_OR_MORE_GROUP: /* (.....)* */
16220Sstevel@tonic-gate case ZERO_OR_MORE_GROUP|ADD_256_TO_GROUP_LENGTH:
16230Sstevel@tonic-gate case ZERO_OR_MORE_GROUP|ADD_512_TO_GROUP_LENGTH:
16240Sstevel@tonic-gate case ZERO_OR_MORE_GROUP|ADD_768_TO_GROUP_LENGTH:
16250Sstevel@tonic-gate
16260Sstevel@tonic-gate /*
16270Sstevel@tonic-gate * encoded as <ZERO_OR_MORE_GROUP|ADDED_LENGTH_BITS>\
16280Sstevel@tonic-gate * <group_length><compiled_regex...>\
16290Sstevel@tonic-gate * <END_GROUP|ZERO_OR_MORE><groupn>
16300Sstevel@tonic-gate *
16310Sstevel@tonic-gate * NOTE:
16320Sstevel@tonic-gate *
16330Sstevel@tonic-gate * group_length + (256 * ADDED_LENGTH_BITS) ==
16340Sstevel@tonic-gate * length_of(<compiled_regex...><END_GROUP|ZERO_OR_MORE>\
16350Sstevel@tonic-gate * <groupn>)
16360Sstevel@tonic-gate *
16370Sstevel@tonic-gate */
16380Sstevel@tonic-gate
16390Sstevel@tonic-gate group_length =
16400Sstevel@tonic-gate (((unsigned int)*regexp & ADDED_LENGTH_BITS) <<
16410Sstevel@tonic-gate TIMES_256_SHIFT);
16420Sstevel@tonic-gate regexp++;
16430Sstevel@tonic-gate group_length += (unsigned int)*regexp;
16440Sstevel@tonic-gate regexp++;
16450Sstevel@tonic-gate repeat_startp = stringp;
16460Sstevel@tonic-gate test_stringp = test_string(stringp, regexp);
16470Sstevel@tonic-gate while (test_stringp != (char *)0) {
16480Sstevel@tonic-gate if (push_stringp(stringp) == (char *)0)
16490Sstevel@tonic-gate return ((char *)0);
16500Sstevel@tonic-gate stringp = test_stringp;
16510Sstevel@tonic-gate test_stringp = test_string(stringp, regexp);
16520Sstevel@tonic-gate }
16530Sstevel@tonic-gate regexp += group_length;
16540Sstevel@tonic-gate return (test_repeated_group(repeat_startp, stringp, regexp));
16550Sstevel@tonic-gate
16560Sstevel@tonic-gate /* end case ZERO_OR_MORE_GROUP */
16570Sstevel@tonic-gate
16580Sstevel@tonic-gate case END_GROUP|ZERO_OR_MORE: /* (.....)* */
16590Sstevel@tonic-gate
16600Sstevel@tonic-gate /* encoded as <END_GROUP|ZERO_OR_MORE> */
16610Sstevel@tonic-gate
16620Sstevel@tonic-gate /* return from recursive call to test_string() */
16630Sstevel@tonic-gate
16640Sstevel@tonic-gate return ((char *)stringp);
16650Sstevel@tonic-gate
16660Sstevel@tonic-gate /* end case END_GROUP|ZERO_OR_MORE */
16670Sstevel@tonic-gate
16680Sstevel@tonic-gate case ONE_OR_MORE_GROUP: /* (.....)+ */
16690Sstevel@tonic-gate case ONE_OR_MORE_GROUP|ADD_256_TO_GROUP_LENGTH:
16700Sstevel@tonic-gate case ONE_OR_MORE_GROUP|ADD_512_TO_GROUP_LENGTH:
16710Sstevel@tonic-gate case ONE_OR_MORE_GROUP|ADD_768_TO_GROUP_LENGTH:
16720Sstevel@tonic-gate
16730Sstevel@tonic-gate /*
16740Sstevel@tonic-gate * encoded as <ONE_OR_MORE_GROUP|ADDED_LENGTH_BITS>\
16750Sstevel@tonic-gate * <group_length><compiled_regex...>\
16760Sstevel@tonic-gate * <END_GROUP|ONE_OR_MORE><groupn>
16770Sstevel@tonic-gate *
16780Sstevel@tonic-gate * NOTE:
16790Sstevel@tonic-gate *
16800Sstevel@tonic-gate * group_length + (256 * ADDED_LENGTH_BITS) ==
16810Sstevel@tonic-gate * length_of(<compiled_regex...><END_GROUP|ONE_OR_MORE>\
16820Sstevel@tonic-gate * <groupn>)
16830Sstevel@tonic-gate */
16840Sstevel@tonic-gate
16850Sstevel@tonic-gate group_length =
16860Sstevel@tonic-gate (((unsigned int)*regexp & ADDED_LENGTH_BITS) <<
16870Sstevel@tonic-gate TIMES_256_SHIFT);
16880Sstevel@tonic-gate regexp++;
16890Sstevel@tonic-gate group_length += (unsigned int)*regexp;
16900Sstevel@tonic-gate regexp++;
16910Sstevel@tonic-gate stringp = test_string(stringp, regexp);
16920Sstevel@tonic-gate if (stringp == (char *)0)
16930Sstevel@tonic-gate return ((char *)0);
16940Sstevel@tonic-gate repeat_startp = stringp;
16950Sstevel@tonic-gate test_stringp = test_string(stringp, regexp);
16960Sstevel@tonic-gate while (test_stringp != (char *)0) {
16970Sstevel@tonic-gate if (push_stringp(stringp) == (char *)0)
16980Sstevel@tonic-gate return ((char *)0);
16990Sstevel@tonic-gate stringp = test_stringp;
17000Sstevel@tonic-gate test_stringp = test_string(stringp, regexp);
17010Sstevel@tonic-gate }
17020Sstevel@tonic-gate regexp += group_length;
17030Sstevel@tonic-gate return (test_repeated_group(repeat_startp, stringp, regexp));
17040Sstevel@tonic-gate
17050Sstevel@tonic-gate /* end case ONE_OR_MORE_GROUP */
17060Sstevel@tonic-gate
17070Sstevel@tonic-gate case END_GROUP|ONE_OR_MORE: /* (.....)+ */
17080Sstevel@tonic-gate
17090Sstevel@tonic-gate /* encoded as <END_GROUP|ONE_OR_MORE><groupn> */
17100Sstevel@tonic-gate
17110Sstevel@tonic-gate /* return from recursive call to test_string() */
17120Sstevel@tonic-gate
17130Sstevel@tonic-gate return ((char *)stringp);
17140Sstevel@tonic-gate
17150Sstevel@tonic-gate /* end case END_GROUP|ONE_OR_MORE */
17160Sstevel@tonic-gate
17170Sstevel@tonic-gate case COUNTED_GROUP: /* (.....){max_count,min_count} */
17180Sstevel@tonic-gate case COUNTED_GROUP|ADD_256_TO_GROUP_LENGTH:
17190Sstevel@tonic-gate case COUNTED_GROUP|ADD_512_TO_GROUP_LENGTH:
17200Sstevel@tonic-gate case COUNTED_GROUP|ADD_768_TO_GROUP_LENGTH:
17210Sstevel@tonic-gate
17220Sstevel@tonic-gate /*
17230Sstevel@tonic-gate * encoded as <COUNTED_GROUP|ADDED_LENGTH_BITS><group_length>\
17240Sstevel@tonic-gate * <compiled_regex...>\<END_GROUP|COUNT><groupn>\
17250Sstevel@tonic-gate * <minimum_match_count><maximum_match_count>
17260Sstevel@tonic-gate *
17270Sstevel@tonic-gate * NOTE:
17280Sstevel@tonic-gate *
17290Sstevel@tonic-gate * group_length + (256 * ADDED_LENGTH_BITS) ==
17300Sstevel@tonic-gate * length_of(<compiled_regex...><END_GROUP|COUNT><groupn>)
17310Sstevel@tonic-gate *
17320Sstevel@tonic-gate * but does not include the <minimum_match_count> or
17330Sstevel@tonic-gate * <maximum_match_count> bytes
17340Sstevel@tonic-gate */
17350Sstevel@tonic-gate
17360Sstevel@tonic-gate group_length =
17370Sstevel@tonic-gate (((unsigned int)*regexp & ADDED_LENGTH_BITS) <<
17380Sstevel@tonic-gate TIMES_256_SHIFT);
17390Sstevel@tonic-gate regexp++;
17400Sstevel@tonic-gate group_length += (unsigned int)*regexp;
17410Sstevel@tonic-gate regexp++;
17420Sstevel@tonic-gate get_match_counts(&nmust_match, &nextra_matches_allowed,
17430Sstevel@tonic-gate regexp + group_length);
17440Sstevel@tonic-gate test_stringp = test_string(stringp, regexp);
17450Sstevel@tonic-gate while ((test_stringp != (char *)0) && (nmust_match > 0)) {
17460Sstevel@tonic-gate stringp = test_stringp;
17470Sstevel@tonic-gate nmust_match--;
17480Sstevel@tonic-gate test_stringp = test_string(stringp, regexp);
17490Sstevel@tonic-gate }
17500Sstevel@tonic-gate if (nmust_match > 0) {
17510Sstevel@tonic-gate return ((char *)0);
17520Sstevel@tonic-gate } else if (nextra_matches_allowed == UNLIMITED) {
17530Sstevel@tonic-gate repeat_startp = stringp;
17540Sstevel@tonic-gate while (test_stringp != (char *)0) {
17550Sstevel@tonic-gate if (push_stringp(stringp) == (char *)0)
17560Sstevel@tonic-gate return ((char *)0);
17570Sstevel@tonic-gate stringp = test_stringp;
17580Sstevel@tonic-gate test_stringp = test_string(stringp, regexp);
17590Sstevel@tonic-gate }
17600Sstevel@tonic-gate regexp += group_length + 2;
17610Sstevel@tonic-gate return (test_repeated_group(repeat_startp, stringp,
17620Sstevel@tonic-gate regexp));
17630Sstevel@tonic-gate } else {
17640Sstevel@tonic-gate repeat_startp = stringp;
17650Sstevel@tonic-gate while ((test_stringp != (char *)0) &&
17660Sstevel@tonic-gate (nextra_matches_allowed > 0)) {
17670Sstevel@tonic-gate nextra_matches_allowed--;
17680Sstevel@tonic-gate if (push_stringp(stringp) == (char *)0)
17690Sstevel@tonic-gate return ((char *)0);
17700Sstevel@tonic-gate stringp = test_stringp;
17710Sstevel@tonic-gate test_stringp = test_string(stringp, regexp);
17720Sstevel@tonic-gate }
17730Sstevel@tonic-gate regexp += group_length + 2;
17740Sstevel@tonic-gate return (test_repeated_group(repeat_startp, stringp,
17750Sstevel@tonic-gate regexp));
17760Sstevel@tonic-gate }
17770Sstevel@tonic-gate /* end case COUNTED_GROUP */
17780Sstevel@tonic-gate
17790Sstevel@tonic-gate case END_GROUP|COUNT: /* (.....){max_count,min_count} */
17800Sstevel@tonic-gate
17810Sstevel@tonic-gate /* encoded as <END_GROUP|COUNT> */
17820Sstevel@tonic-gate
17830Sstevel@tonic-gate /* return from recursive call to test_string() */
17840Sstevel@tonic-gate
17850Sstevel@tonic-gate return (stringp);
17860Sstevel@tonic-gate
17870Sstevel@tonic-gate /* end case END_GROUP|COUNT */
17880Sstevel@tonic-gate
17890Sstevel@tonic-gate case END_OF_STRING_MARK:
17900Sstevel@tonic-gate
17910Sstevel@tonic-gate /* encoded as <END_OF_STRING_MARK><END_REGEX> */
17920Sstevel@tonic-gate
17930Sstevel@tonic-gate if (*stringp == '\0') {
17940Sstevel@tonic-gate regexp++;
17950Sstevel@tonic-gate } else {
17960Sstevel@tonic-gate return ((char *)0);
17970Sstevel@tonic-gate }
17980Sstevel@tonic-gate break; /* end case END_OF_STRING_MARK */
17990Sstevel@tonic-gate
18000Sstevel@tonic-gate case END_REGEX: /* end of the compiled regular expression */
18010Sstevel@tonic-gate
18020Sstevel@tonic-gate /* encoded as <END_REGEX> */
18030Sstevel@tonic-gate
18040Sstevel@tonic-gate return (stringp);
18050Sstevel@tonic-gate
18060Sstevel@tonic-gate /* end case END_REGEX */
18070Sstevel@tonic-gate
18080Sstevel@tonic-gate default:
18090Sstevel@tonic-gate
18100Sstevel@tonic-gate return ((char *)0);
18110Sstevel@tonic-gate
18120Sstevel@tonic-gate } /* end switch (*regexp) */
18130Sstevel@tonic-gate
18140Sstevel@tonic-gate } /* end for (;;) */
18150Sstevel@tonic-gate
18160Sstevel@tonic-gate } /* test_string() */
1817