xref: /csrg-svn/contrib/ed/re.c (revision 59483)
157698Sbostic /*-
257698Sbostic  * Copyright (c) 1992 The Regents of the University of California.
357698Sbostic  * All rights reserved.
457698Sbostic  *
557698Sbostic  * This code is derived from software contributed to Berkeley by
657698Sbostic  * Rodney Ruddock of the University of Guelph.
757698Sbostic  *
857698Sbostic  * %sccs.include.redist.c%
957698Sbostic  */
1057698Sbostic 
1157698Sbostic #ifndef lint
12*59483Sbostic static char sccsid[] = "@(#)re.c	5.5 (Berkeley) 04/28/93";
1357698Sbostic #endif /* not lint */
1457698Sbostic 
1557710Sbostic #include <sys/types.h>
1657710Sbostic 
1757710Sbostic #include <regex.h>
1857710Sbostic #include <setjmp.h>
1957710Sbostic #include <stdio.h>
2057710Sbostic #include <stdlib.h>
2157710Sbostic #include <string.h>
2257710Sbostic 
2358315Sbostic #ifdef DBI
2458315Sbostic #include <db.h>
2558315Sbostic #endif
2658315Sbostic 
2757698Sbostic #include "ed.h"
2857710Sbostic #include "extern.h"
2957698Sbostic 
3057698Sbostic /*
3157698Sbostic  * This finds the n-th occurrence of an RE in a line. If '^' was at the start
3257710Sbostic  * of the RE then look once (in case n=1). There is no standard RE interface
3357710Sbostic  * to do this.  Returns 0 for success.  NOTE: the #ifdef REG_STARTEND is if
3457710Sbostic  * the regex package has the BSD extensions to it.
3557698Sbostic  */
3657698Sbostic int
3757698Sbostic #ifdef REG_STARTEND
3857698Sbostic regexec_n(reprecomp, strg, num_subexp, reprematch, flags, n, len, pass)
3957698Sbostic #else
4057698Sbostic regexec_n(reprecomp, strg, num_subexp, reprematch, flags, n, offset, pass)
4157698Sbostic #endif
4257710Sbostic 	regex_t *reprecomp;
4357710Sbostic 	char *strg;
4457710Sbostic 	size_t num_subexp;
4557710Sbostic 	regmatch_t reprematch[];
4657710Sbostic 	int flags, n;
4757698Sbostic #ifdef REG_STARTEND
4857710Sbostic 	size_t len;
4957698Sbostic #else
5057710Sbostic 	size_t *offset;
5157698Sbostic #endif
5257710Sbostic 	int pass; /* if pass == 0 .rm_so user set, else set default */
5357698Sbostic {
5459475Sbostic 	int l_cnt, l_flag=0;
5557698Sbostic #ifndef REG_STARTEND
5657710Sbostic 	char *l_offset = strg;
5757698Sbostic #endif
5857698Sbostic 
5957710Sbostic 	if (n <= 0)
6057710Sbostic 		return (REG_NOMATCH);
6157698Sbostic #ifdef REG_STARTEND
6257710Sbostic 	flags = (flags | REG_STARTEND);
6357710Sbostic 	if (pass)
6457710Sbostic 		reprematch[0].rm_so = 0;
6557710Sbostic 	reprematch[0].rm_eo = len;
6659475Sbostic 	if (!reprematch[0].rm_so)
6759475Sbostic 		l_flag = 1;
6857698Sbostic #else
6957710Sbostic 	strg = &strg[offset];
7059475Sbostic 	if (!offset)
7159475Sbostic 		l_flag = 1;
7257698Sbostic #endif
7357710Sbostic 	for (l_cnt = 0;;) {
7457710Sbostic 		if (regexec(reprecomp,
7557710Sbostic 		    strg, num_subexp, reprematch, flags) == 0)
7657710Sbostic 			l_cnt++;
7757710Sbostic 		else
7857710Sbostic 			return (REG_NOMATCH);
7959475Sbostic 		/* to skip over null RE matchings */
8059475Sbostic 		if (l_flag)
8159475Sbostic 			l_flag = 0;
8259475Sbostic 		else
8359475Sbostic 			if (reprematch[0].rm_so == reprematch[0].rm_eo) {
8459475Sbostic 				l_cnt--;
8559475Sbostic 				if ((++reprematch[0].rm_eo) > len)
8659475Sbostic 					return (REG_NOMATCH);
8759475Sbostic 			}
8857710Sbostic 		if (l_cnt >= n)
8957710Sbostic 			break;
9057698Sbostic #ifdef REG_STARTEND
9157710Sbostic 		reprematch[0].rm_so = reprematch[0].rm_eo;
9257710Sbostic 		reprematch[0].rm_eo = len;
9357698Sbostic #else
9457710Sbostic 		strg = &strg[reprematch[0].rm_eo];
9557698Sbostic #endif
9657710Sbostic 		/* if a "^" started the current RE we only loop once */
9757710Sbostic 		if (RE_sol)
9857710Sbostic 			return (REG_NOMATCH);
9957710Sbostic 	}
10057698Sbostic #ifndef REG_STARTEND
10157710Sbostic 	*offset = (size_t) (strg - l_offset);
10257698Sbostic #endif
10357710Sbostic 	return (0);		/* success */
10457710Sbostic }
10557698Sbostic 
10657698Sbostic /*
10757698Sbostic  * Replace in the line specified at the found locations with the
10857698Sbostic  * specified replacement. There is no standard RE interface to do
10957698Sbostic  * this.
11057698Sbostic  */
11157710Sbostic char *
11257698Sbostic #ifdef REG_STARTEND
11357710Sbostic re_replace(line, num_subexp, repmatch, replacer)
11457698Sbostic #else
11557710Sbostic re_replace(line, num_subexp, repmatch, replacer, offset)
11657698Sbostic #endif
11757710Sbostic 	char *line;
11857710Sbostic 	size_t num_subexp;
11957710Sbostic 	regmatch_t repmatch[];
12057710Sbostic 	char *replacer;
12157698Sbostic #ifndef REG_STARTEND
12257710Sbostic 	size_t offset;
12357698Sbostic #endif
12457698Sbostic {
12557710Sbostic 	static char *l_prev_r = NULL;
12657710Sbostic 	static int l_prev_r_flag = 0;
12757710Sbostic 	regoff_t l_len_before, l_len_whole, l_slen[RE_SEC];
12857710Sbostic 	int l_cnt, l_len_new = 0, l_new_rm_eo = 0;
12957710Sbostic 	char *l_string, *l_head;
13057698Sbostic 
13157710Sbostic 	if (l_prev_r_flag == 0) {
13257710Sbostic 		l_prev_r_flag = 1;
13357710Sbostic 		l_prev_r = NULL;
13457710Sbostic 	}
13557710Sbostic 	l_head = replacer;
13657710Sbostic 	/* Length of what stays the same before. */
13757710Sbostic 	l_len_before = (repmatch[0].rm_so);
13857710Sbostic 	l_len_whole = strlen(line);
13957710Sbostic 	if (num_subexp > RE_SEC - 1)
14057710Sbostic 		num_subexp = RE_SEC - 1;
14157710Sbostic 	for (l_cnt = 0; l_cnt <= num_subexp; l_cnt++)
14257710Sbostic 		l_slen[l_cnt] =
14357710Sbostic 		    (repmatch[l_cnt].rm_eo) - (repmatch[l_cnt].rm_so);
14457698Sbostic 
14557710Sbostic 	/*
14657710Sbostic 	 * l_slen[0] == len of what is to be replaced.
14757710Sbostic 	 * l_slen[1-9] == len of each backref.
14857710Sbostic 	 */
14957710Sbostic 	if ((*replacer == '%') && (replacer[1] == 1)) {
15057710Sbostic 		l_string = calloc(l_len_whole - l_slen[0] +
15157710Sbostic 		    (strlen(l_prev_r)) + 2, sizeof(char));
15257710Sbostic 		if (l_string == NULL) {
15357710Sbostic 			/* *errnum = -1; */
15457710Sbostic 			strcpy(help_msg, "out of memory error");
15557710Sbostic 			return (NULL);
15657710Sbostic 		}
15757698Sbostic #ifdef REG_STARTEND
158*59483Sbostic 		memmove(l_string, line, (int) l_len_before);
15957698Sbostic #else
160*59483Sbostic 		memmove(l_string, line, (int) l_len_before + offset);
16157698Sbostic #endif
16257698Sbostic #ifdef REG_STARTEND
16357710Sbostic 		l_string[l_len_before] = '\0';
16457698Sbostic #else
16557710Sbostic 		l_string[l_len_before + offset] = '\0';
16657698Sbostic #endif
16757710Sbostic 		strcat(l_string, l_prev_r);
16857698Sbostic #ifdef REG_STARTEND
16957710Sbostic 		strcat(l_string, &line[repmatch[0].rm_eo]);
17057698Sbostic #else
17157710Sbostic 		strcat(l_string, &line[repmatch[0].rm_eo + offset]);
17257698Sbostic #endif
17357710Sbostic 		return (l_string);
17457710Sbostic 	}
17557698Sbostic 
17657710Sbostic 	/* Figure out length of new line first. */
17757710Sbostic 	while (*replacer != '\0') {
17857710Sbostic 		/* Add in the length of the RE match. */
17957710Sbostic 		if (*replacer == '&')
18057710Sbostic 			l_len_new = l_len_new + l_slen[0];
18157710Sbostic 		/* Add in the length of a backref. */
18257710Sbostic 		else if (*replacer == '\\') {
18357710Sbostic 			replacer++;
18457710Sbostic 			if ((*replacer > '0') &&
18557710Sbostic 			    (*replacer < ('9' + 1)) &&
18657710Sbostic 			    (repmatch[*replacer - '0'].rm_so > -1))
18757710Sbostic 				/* -1 - -1 = 0 */
18857710Sbostic 				l_len_new = l_len_new + l_slen[*replacer - '0'];
18957710Sbostic 			else
19057710Sbostic 				l_len_new++;
19157710Sbostic 		} else
19257710Sbostic 			l_len_new++;
19357710Sbostic 		replacer++;
19457710Sbostic 	}
19557698Sbostic 
19657710Sbostic 	/* Create the line of an appropriate length. */
19757710Sbostic 	l_string =
19857710Sbostic 	    calloc(l_len_whole - l_slen[0] + l_len_new + 2, sizeof(char));
19957710Sbostic 	if (l_string == NULL) {
20057710Sbostic 		strcpy(help_msg, "out of memory error");
20157710Sbostic 		return (NULL);
20257710Sbostic 	}
20357710Sbostic 	if (l_prev_r != NULL)
20457710Sbostic 		free(l_prev_r);
20557710Sbostic 	l_prev_r = calloc(l_len_new + 2, sizeof(char));
20657710Sbostic 	if (l_prev_r == NULL) {
20757710Sbostic 		strcpy(help_msg, "out of memory error");
20857710Sbostic 		return (NULL);
20957710Sbostic 	}
21057710Sbostic 	/* Copy over what doesn't change before the chars to be replaced. */
21157698Sbostic #ifdef REG_STARTEND
212*59483Sbostic 	memmove(l_string, line, (size_t)l_len_before);
21357698Sbostic #else
214*59483Sbostic 	memmove(l_string, line, l_len_before + offset);
21557698Sbostic #endif
21657698Sbostic #ifdef REG_STARTEND
21757710Sbostic 	l_string[l_len_before] = '\0';
21857698Sbostic #else
21957710Sbostic 	l_string[l_len_before + offset] = '\0';
22057698Sbostic #endif
22157710Sbostic 	l_prev_r[0] = '\0';
22257698Sbostic 
22357710Sbostic 	/* Make the replacement. */
22457710Sbostic 	replacer = l_head;
22557710Sbostic 	while (*replacer != '\0') {
22657710Sbostic 		/* Put what matched the RE into the replacement. */
22757710Sbostic 		if (*replacer == '&') {
22857698Sbostic #ifdef REG_STARTEND
22957710Sbostic 			strncat(l_string,
23057710Sbostic 			    &line[repmatch[0].rm_so], (int)l_slen[0]);
23157710Sbostic 			strncat(l_prev_r,
23257710Sbostic 			    &line[repmatch[0].rm_so], (int) l_slen[0]);
23357698Sbostic #else
23457710Sbostic 			strncat(l_string,
23557710Sbostic 			    &line[repmatch[0].rm_so + offset], (int) l_slen[0]);
23657710Sbostic 			strncat(l_prev_r,
23757710Sbostic 			    &line[repmatch[0].rm_so + offset], (int) l_slen[0]);
23857698Sbostic #endif
23957710Sbostic 		} else if (*replacer == '\\') {
24057710Sbostic 			/* Likely a backref to be included. */
24157710Sbostic 			replacer++;
24257710Sbostic 			if ((*replacer > '0') && (*replacer < ('9' + 1)) &&
24357710Sbostic 			    (repmatch[*replacer - '0'].rm_so > -1)) {
24457698Sbostic #ifdef REG_STARTEND
24557710Sbostic 				strncat(l_string,
24657710Sbostic 				    &line[repmatch[*replacer - '0'].rm_so],
24757710Sbostic 				    (int) l_slen[*replacer - '0']);
24857710Sbostic 				strncat(l_prev_r,
24957710Sbostic 				    &line[repmatch[*replacer - '0'].rm_so],
25057710Sbostic 				    (int) l_slen[*replacer - '0']);
25157698Sbostic #else
25257710Sbostic 				strncat(l_string,
25357710Sbostic 				    &line[repmatch[*replacer - '0'].rm_so +
25457710Sbostic 				    offset], (int) l_slen[*replacer - '0']);
25557710Sbostic 				strncat(l_prev_r,
25657710Sbostic 				    &line[repmatch[*replacer - '0'].rm_so +
25757710Sbostic 				    offset], (int) l_slen[*replacer - '0']);
25857698Sbostic #endif
25957710Sbostic 			}
26057710Sbostic 			/* Put the replacement in. */
26157710Sbostic 			else {
26257710Sbostic 				strncat(l_string, replacer, 1);
26357710Sbostic 				strncat(l_prev_r, replacer, 1);
26457710Sbostic 			}
26557710Sbostic 		}
26657710Sbostic 		/* Put the replacement in. */
26757710Sbostic 		else {
26857710Sbostic 			strncat(l_string, replacer, 1);
26957710Sbostic 			strncat(l_prev_r, replacer, 1);
27057710Sbostic 		}
27157710Sbostic 		replacer++;
27257710Sbostic 	}
27357698Sbostic 
27457710Sbostic 	l_new_rm_eo = strlen(l_string);
27557698Sbostic 
27657710Sbostic 	/* Copy over what was after the chars to be replaced to the new line. */
27757698Sbostic #ifdef REG_STARTEND
27857710Sbostic 	strcat(l_string, &line[repmatch[0].rm_eo]);
27957698Sbostic #else
28057710Sbostic 	strcat(l_string, &line[repmatch[0].rm_eo + offset]);
28157698Sbostic #endif
28257698Sbostic 
28357710Sbostic 	repmatch[0].rm_eo = l_new_rm_eo;	/* Update rm_eo. */
28457698Sbostic #ifndef REG_STARTEND
28557710Sbostic 	offset += l_new_rm_eo;			/* Update offset. */
28657698Sbostic #endif
28757710Sbostic 	return (l_string);			/* Return the new line. */
28857710Sbostic }
289