xref: /netbsd-src/external/bsd/nvi/dist/ex/ex_subst.c (revision f8570f8a97f412a33574ec77d412b16dbe332965)
1 /*	$NetBSD: ex_subst.c,v 1.5 2023/08/01 07:04:14 mrg Exp $ */
2 /*-
3  * Copyright (c) 1992, 1993, 1994
4  *	The Regents of the University of California.  All rights reserved.
5  * Copyright (c) 1992, 1993, 1994, 1995, 1996
6  *	Keith Bostic.  All rights reserved.
7  *
8  * See the LICENSE file for redistribution information.
9  */
10 
11 #include "config.h"
12 
13 #include <sys/cdefs.h>
14 #if 0
15 #ifndef lint
16 static const char sccsid[] = "Id: ex_subst.c,v 10.50 2002/02/09 21:18:23 skimo Exp  (Berkeley) Date: 2002/02/09 21:18:23 ";
17 #endif /* not lint */
18 #else
19 __RCSID("$NetBSD: ex_subst.c,v 1.5 2023/08/01 07:04:14 mrg Exp $");
20 #endif
21 
22 #include <sys/types.h>
23 #include <sys/queue.h>
24 #include <sys/time.h>
25 
26 #include <bitstring.h>
27 #include <ctype.h>
28 #include <errno.h>
29 #include <limits.h>
30 #include <stdio.h>
31 #include <stdlib.h>
32 #include <string.h>
33 #include <unistd.h>
34 
35 #include "../common/common.h"
36 #include "../vi/vi.h"
37 
38 #define	SUB_FIRST	0x01		/* The 'r' flag isn't reasonable. */
39 #define	SUB_MUSTSETR	0x02		/* The 'r' flag is required. */
40 
41 static int re_conv __P((SCR *, CHAR_T **, size_t *, int *));
42 static int re_cscope_conv __P((SCR *, CHAR_T **, size_t *, int *));
43 static int re_sub __P((SCR *,
44 		CHAR_T *, CHAR_T **, size_t *, size_t *, regmatch_t [10]));
45 static int re_tag_conv __P((SCR *, CHAR_T **, size_t *, int *));
46 static int s __P((SCR *, EXCMD *, CHAR_T *, regex_t *, u_int));
47 
48 /*
49  * ex_s --
50  *	[line [,line]] s[ubstitute] [[/;]pat[/;]/repl[/;] [cgr] [count] [#lp]]
51  *
52  *	Substitute on lines matching a pattern.
53  *
54  * PUBLIC: int ex_s __P((SCR *, EXCMD *));
55  */
56 int
ex_s(SCR * sp,EXCMD * cmdp)57 ex_s(SCR *sp, EXCMD *cmdp)
58 {
59 	regex_t *re;
60 	size_t blen, len;
61 	u_int flags;
62 	ARG_CHAR_T delim;
63 	CHAR_T *bp, *p, *ptrn, *rep, *t;
64 
65 	/*
66 	 * Skip leading white space.
67 	 *
68 	 * !!!
69 	 * Historic vi allowed any non-alphanumeric to serve as the
70 	 * substitution command delimiter.
71 	 *
72 	 * !!!
73 	 * If the arguments are empty, it's the same as &, i.e. we
74 	 * repeat the last substitution.
75 	 */
76 	if (cmdp->argc == 0)
77 		goto subagain;
78 	for (p = cmdp->argv[0]->bp,
79 	    len = cmdp->argv[0]->len; len > 0; --len, ++p) {
80 		if (!ISBLANK((UCHAR_T)*p))
81 			break;
82 	}
83 	if (len == 0)
84 subagain:	return (ex_subagain(sp, cmdp));
85 
86 	delim = (UCHAR_T)*p++;
87 	if (ISALNUM(delim) || delim == '\\')
88 		return (s(sp, cmdp, p, &sp->subre_c, SUB_MUSTSETR));
89 
90 	/*
91 	 * !!!
92 	 * The full-blown substitute command reset the remembered
93 	 * state of the 'c' and 'g' suffices.
94 	 */
95 	sp->c_suffix = sp->g_suffix = 0;
96 
97 	/*
98 	 * Get the pattern string, toss escaping characters.
99 	 *
100 	 * !!!
101 	 * Historic vi accepted any of the following forms:
102 	 *
103 	 *	:s/abc/def/		change "abc" to "def"
104 	 *	:s/abc/def		change "abc" to "def"
105 	 *	:s/abc/			delete "abc"
106 	 *	:s/abc			delete "abc"
107 	 *
108 	 * QUOTING NOTE:
109 	 *
110 	 * Only toss an escaping character if it escapes a delimiter.
111 	 * This means that "s/A/\\\\f" replaces "A" with "\\f".  It
112 	 * would be nice to be more regular, i.e. for each layer of
113 	 * escaping a single escaping character is removed, but that's
114 	 * not how the historic vi worked.
115 	 */
116 	for (ptrn = t = p;;) {
117 		if (p[0] == '\0' || p[0] == delim) {
118 			if (p[0] == delim)
119 				++p;
120 			/*
121 			 * !!!
122 			 * Nul terminate the pattern string -- it's passed
123 			 * to regcomp which doesn't understand anything else.
124 			 */
125 			*t = '\0';
126 			break;
127 		}
128 		if (p[0] == '\\') {
129 			if (p[1] == delim)
130 				++p;
131 			else if (p[1] == '\\')
132 				*t++ = *p++;
133 		}
134 		*t++ = *p++;
135 	}
136 
137 	/*
138 	 * If the pattern string is empty, use the last RE (not just the
139 	 * last substitution RE).
140 	 */
141 	if (*ptrn == '\0') {
142 		if (sp->re == NULL) {
143 			ex_emsg(sp, NULL, EXM_NOPREVRE);
144 			return (1);
145 		}
146 
147 		/* Re-compile the RE if necessary. */
148 		if (!F_ISSET(sp, SC_RE_SEARCH) &&
149 		    re_compile(sp, sp->re, sp->re_len,
150 		    NULL, NULL, &sp->re_c, SEARCH_CSEARCH | SEARCH_MSG))
151 			return (1);
152 		flags = 0;
153 	} else {
154 		/*
155 		 * !!!
156 		 * Compile the RE.  Historic practice is that substitutes set
157 		 * the search direction as well as both substitute and search
158 		 * RE's.  We compile the RE twice, as we don't want to bother
159 		 * ref counting the pattern string and (opaque) structure.
160 		 */
161 		if (re_compile(sp, ptrn, t - ptrn, &sp->re,
162 		    &sp->re_len, &sp->re_c, SEARCH_CSEARCH | SEARCH_MSG))
163 			return (1);
164 		if (re_compile(sp, ptrn, t - ptrn, &sp->subre,
165 		    &sp->subre_len, &sp->subre_c, SEARCH_CSUBST | SEARCH_MSG))
166 			return (1);
167 
168 		flags = SUB_FIRST;
169 		sp->searchdir = FORWARD;
170 	}
171 	re = &sp->re_c;
172 
173 	/*
174 	 * Get the replacement string.
175 	 *
176 	 * The special character & (\& if O_MAGIC not set) matches the
177 	 * entire RE.  No handling of & is required here, it's done by
178 	 * re_sub().
179 	 *
180 	 * The special character ~ (\~ if O_MAGIC not set) inserts the
181 	 * previous replacement string into this replacement string.
182 	 * Count ~'s to figure out how much space we need.  We could
183 	 * special case nonexistent last patterns or whether or not
184 	 * O_MAGIC is set, but it's probably not worth the effort.
185 	 *
186 	 * QUOTING NOTE:
187 	 *
188 	 * Only toss an escaping character if it escapes a delimiter or
189 	 * if O_MAGIC is set and it escapes a tilde.
190 	 *
191 	 * !!!
192 	 * If the entire replacement pattern is "%", then use the last
193 	 * replacement pattern.  This semantic was added to vi in System
194 	 * V and then percolated elsewhere, presumably around the time
195 	 * that it was added to their version of ed(1).
196 	 */
197 	if (p[0] == L('\0') || p[0] == delim) {
198 		if (p[0] == delim)
199 			++p;
200 		if (sp->repl != NULL)
201 			free(sp->repl);
202 		sp->repl = NULL;
203 		sp->repl_len = 0;
204 	} else if (p[0] == L('%') && (p[1] == L('\0') || p[1] == delim))
205 		p += p[1] == delim ? 2 : 1;
206 	else {
207 		for (rep = p, len = 0;
208 		    p[0] != L('\0') && p[0] != delim; ++p, ++len)
209 			if (p[0] == L('~'))
210 				len += sp->repl_len;
211 		GET_SPACE_RETW(sp, bp, blen, len);
212 		for (t = bp, len = 0, p = rep;;) {
213 			if (p[0] == L('\0') || p[0] == delim) {
214 				if (p[0] == delim)
215 					++p;
216 				break;
217 			}
218 			if (p[0] == L('\\')) {
219 				if (p[1] == delim)
220 					++p;
221 				else if (p[1] == L('\\')) {
222 					*t++ = *p++;
223 					++len;
224 				} else if (p[1] == L('~')) {
225 					++p;
226 					if (!O_ISSET(sp, O_MAGIC))
227 						goto tilde;
228 				}
229 			} else if (p[0] == L('~') && O_ISSET(sp, O_MAGIC)) {
230 tilde:				++p;
231 				MEMCPYW(t, sp->repl, sp->repl_len);
232 				t += sp->repl_len;
233 				len += sp->repl_len;
234 				continue;
235 			}
236 			*t++ = *p++;
237 			++len;
238 		}
239 		if ((sp->repl_len = len) != 0) {
240 			if (sp->repl != NULL)
241 				free(sp->repl);
242 			if ((sp->repl = malloc(len * sizeof(CHAR_T))) == NULL) {
243 				msgq(sp, M_SYSERR, NULL);
244 				FREE_SPACEW(sp, bp, blen);
245 				return (1);
246 			}
247 			MEMCPYW(sp->repl, bp, len);
248 		}
249 		FREE_SPACEW(sp, bp, blen);
250 	}
251 	return (s(sp, cmdp, p, re, flags));
252 }
253 
254 /*
255  * ex_subagain --
256  *	[line [,line]] & [cgr] [count] [#lp]]
257  *
258  *	Substitute using the last substitute RE and replacement pattern.
259  *
260  * PUBLIC: int ex_subagain __P((SCR *, EXCMD *));
261  */
262 int
ex_subagain(SCR * sp,EXCMD * cmdp)263 ex_subagain(SCR *sp, EXCMD *cmdp)
264 {
265 	if (sp->subre == NULL) {
266 		ex_emsg(sp, NULL, EXM_NOPREVRE);
267 		return (1);
268 	}
269 	if (!F_ISSET(sp, SC_RE_SUBST) &&
270 	    re_compile(sp, sp->subre, sp->subre_len,
271 	    NULL, NULL, &sp->subre_c, SEARCH_CSUBST | SEARCH_MSG))
272 		return (1);
273 	return (s(sp,
274 	    cmdp, cmdp->argc ? cmdp->argv[0]->bp : NULL, &sp->subre_c, 0));
275 }
276 
277 /*
278  * ex_subtilde --
279  *	[line [,line]] ~ [cgr] [count] [#lp]]
280  *
281  *	Substitute using the last RE and last substitute replacement pattern.
282  *
283  * PUBLIC: int ex_subtilde __P((SCR *, EXCMD *));
284  */
285 int
ex_subtilde(SCR * sp,EXCMD * cmdp)286 ex_subtilde(SCR *sp, EXCMD *cmdp)
287 {
288 	if (sp->re == NULL) {
289 		ex_emsg(sp, NULL, EXM_NOPREVRE);
290 		return (1);
291 	}
292 	if (!F_ISSET(sp, SC_RE_SEARCH) && re_compile(sp, sp->re,
293 	    sp->re_len, NULL, NULL, &sp->re_c, SEARCH_CSEARCH | SEARCH_MSG))
294 		return (1);
295 	return (s(sp,
296 	    cmdp, cmdp->argc ? cmdp->argv[0]->bp : NULL, &sp->re_c, 0));
297 }
298 
299 /*
300  * s --
301  * Do the substitution.  This stuff is *really* tricky.  There are lots of
302  * special cases, and general nastiness.  Don't mess with it unless you're
303  * pretty confident.
304  *
305  * The nasty part of the substitution is what happens when the replacement
306  * string contains newlines.  It's a bit tricky -- consider the information
307  * that has to be retained for "s/f\(o\)o/^M\1^M\1/".  The solution here is
308  * to build a set of newline offsets which we use to break the line up later,
309  * when the replacement is done.  Don't change it unless you're *damned*
310  * confident.
311  */
312 #define	NEEDNEWLINE(sp) {						\
313 	if (sp->newl_len == sp->newl_cnt) {				\
314 		sp->newl_len += 25;					\
315 		REALLOC(sp, sp->newl, size_t *,				\
316 		    sp->newl_len * sizeof(size_t));			\
317 		if (sp->newl == NULL) {					\
318 			sp->newl_len = 0;				\
319 			return (1);					\
320 		}							\
321 	}								\
322 }
323 
324 #define	BUILD(sp, l, len) {						\
325 	if (lbclen + (len) > lblen) {					\
326 		lblen += MAX(lbclen + (len), 256);			\
327 		REALLOC(sp, lb, CHAR_T *, lblen * sizeof(CHAR_T));	\
328 		if (lb == NULL) {					\
329 			lbclen = 0;					\
330 			return (1);					\
331 		}							\
332 	}								\
333 	MEMCPYW(lb + lbclen, l, len);					\
334 	lbclen += len;							\
335 }
336 
337 #define	NEEDSP(sp, len, pnt) {						\
338 	if (lbclen + (len) > lblen) {					\
339 		lblen += MAX(lbclen + (len), 256);			\
340 		REALLOC(sp, lb, CHAR_T *, lblen * sizeof(CHAR_T));	\
341 		if (lb == NULL) {					\
342 			lbclen = 0;					\
343 			return (1);					\
344 		}							\
345 		pnt = lb + lbclen;					\
346 	}								\
347 }
348 
349 static int
s(SCR * sp,EXCMD * cmdp,CHAR_T * st,regex_t * re,u_int flags)350 s(SCR *sp, EXCMD *cmdp, CHAR_T *st, regex_t *re, u_int flags)
351 {
352 	EVENT ev;
353 	MARK from, to;
354 	TEXTH tiq;
355 	db_recno_t elno, lno, slno;
356 	u_long ul;
357 	regmatch_t match[10];
358 	size_t blen, cnt, last, lbclen, lblen, len, llen;
359 	size_t offset, saved_offset, scno;
360 	int lflag, nflag, pflag, rflag;
361 	int didsub, do_eol_match, eflags, empty_ok, eval;
362 	int linechanged, matched, quit, rval;
363 	CHAR_T *lb, *bp;
364 	enum nresult nret;
365 
366 	NEEDFILE(sp, cmdp);
367 
368 	slno = sp->lno;
369 	scno = sp->cno;
370 
371 	/*
372 	 * !!!
373 	 * Historically, the 'g' and 'c' suffices were always toggled as flags,
374 	 * so ":s/A/B/" was the same as ":s/A/B/ccgg".  If O_EDCOMPATIBLE was
375 	 * not set, they were initialized to 0 for all substitute commands.  If
376 	 * O_EDCOMPATIBLE was set, they were initialized to 0 only if the user
377 	 * specified substitute/replacement patterns (see ex_s()).
378 	 */
379 	if (!O_ISSET(sp, O_EDCOMPATIBLE))
380 		sp->c_suffix = sp->g_suffix = 0;
381 
382 	/*
383 	 * Historic vi permitted the '#', 'l' and 'p' options in vi mode, but
384 	 * it only displayed the last change.  I'd disallow them, but they are
385 	 * useful in combination with the [v]global commands.  In the current
386 	 * model the problem is combining them with the 'c' flag -- the screen
387 	 * would have to flip back and forth between the confirm screen and the
388 	 * ex print screen, which would be pretty awful.  We do display all
389 	 * changes, though, for what that's worth.
390 	 *
391 	 * !!!
392 	 * Historic vi was fairly strict about the order of "options", the
393 	 * count, and "flags".  I'm somewhat fuzzy on the difference between
394 	 * options and flags, anyway, so this is a simpler approach, and we
395 	 * just take it them in whatever order the user gives them.  (The ex
396 	 * usage statement doesn't reflect this.)
397 	 */
398 	lflag = nflag = pflag = rflag = 0;
399 	if (st == NULL)
400 		goto noargs;
401 	for (lno = OOBLNO; *st != '\0'; ++st)
402 		switch (*st) {
403 		case ' ':
404 		case '\t':
405 			continue;
406 		case '+':
407 			++cmdp->flagoff;
408 			break;
409 		case '-':
410 			--cmdp->flagoff;
411 			break;
412 		case '0': case '1': case '2': case '3': case '4':
413 		case '5': case '6': case '7': case '8': case '9':
414 			if (lno != OOBLNO)
415 				goto usage;
416 			errno = 0;
417 			nret = nget_uslong(sp, &ul, st, &st, 10);
418 			lno = ul;
419 			if (*st == '\0')		/* Loop increment correction. */
420 				--st;
421 			if (nret != NUM_OK) {
422 				if (nret == NUM_OVER)
423 					msgq(sp, M_ERR, "153|Count overflow");
424 				else if (nret == NUM_UNDER)
425 					msgq(sp, M_ERR, "154|Count underflow");
426 				else
427 					msgq(sp, M_SYSERR, NULL);
428 				return (1);
429 			}
430 			/*
431 			 * In historic vi, the count was inclusive from the
432 			 * second address.
433 			 */
434 			cmdp->addr1.lno = cmdp->addr2.lno;
435 			cmdp->addr2.lno += lno - 1;
436 			if (!db_exist(sp, cmdp->addr2.lno) &&
437 			    db_last(sp, &cmdp->addr2.lno))
438 				return (1);
439 			break;
440 		case '#':
441 			nflag = 1;
442 			break;
443 		case 'c':
444 			sp->c_suffix = !sp->c_suffix;
445 
446 			/* Ex text structure initialization. */
447 			if (F_ISSET(sp, SC_EX)) {
448 				memset(&tiq, 0, sizeof(TEXTH));
449 				TAILQ_INIT(&tiq);
450 			}
451 			break;
452 		case 'g':
453 			sp->g_suffix = !sp->g_suffix;
454 			break;
455 		case 'l':
456 			lflag = 1;
457 			break;
458 		case 'p':
459 			pflag = 1;
460 			break;
461 		case 'r':
462 			if (LF_ISSET(SUB_FIRST)) {
463 				msgq(sp, M_ERR,
464 		    "155|Regular expression specified; r flag meaningless");
465 				return (1);
466 			}
467 			if (!F_ISSET(sp, SC_RE_SEARCH)) {
468 				ex_emsg(sp, NULL, EXM_NOPREVRE);
469 				return (1);
470 			}
471 			rflag = 1;
472 			re = &sp->re_c;
473 			break;
474 		default:
475 			goto usage;
476 		}
477 
478 	if (*st != '\0' || (!rflag && LF_ISSET(SUB_MUSTSETR))) {
479 usage:		ex_emsg(sp, cmdp->cmd->usage, EXM_USAGE);
480 		return (1);
481 	}
482 
483 noargs:	if (F_ISSET(sp, SC_VI) && sp->c_suffix && (lflag || nflag || pflag)) {
484 		msgq(sp, M_ERR,
485 "156|The #, l and p flags may not be combined with the c flag in vi mode");
486 		return (1);
487 	}
488 
489 	/*
490 	 * bp:		if interactive, line cache
491 	 * blen:	if interactive, line cache length
492 	 * lb:		build buffer pointer.
493 	 * lbclen:	current length of built buffer.
494 	 * lblen;	length of build buffer.
495 	 */
496 	bp = lb = NULL;
497 	blen = lbclen = lblen = 0;
498 
499 	/* For each line... */
500 	lno = cmdp->addr1.lno == 0 ? 1 : cmdp->addr1.lno;
501 	for (matched = quit = 0,
502 	    elno = cmdp->addr2.lno; !quit && lno <= elno; ++lno) {
503 
504 		/* Someone's unhappy, time to stop. */
505 		if (INTERRUPTED(sp))
506 			break;
507 
508 		/* Get the line. */
509 		if (db_get(sp, lno, DBG_FATAL, &st, &llen))
510 			goto err;
511 
512 		/*
513 		 * Make a local copy if doing confirmation -- when calling
514 		 * the confirm routine we're likely to lose the cached copy.
515 		 */
516 		if (sp->c_suffix) {
517 			if (bp == NULL) {
518 				GET_SPACE_RETW(sp, bp, blen, llen);
519 			} else
520 				ADD_SPACE_RETW(sp, bp, blen, llen);
521 			MEMCPYW(bp, st, llen);
522 			st = bp;
523 		}
524 
525 		/* Start searching from the beginning. */
526 		offset = 0;
527 		len = llen;
528 
529 		/* Reset the build buffer offset. */
530 		lbclen = 0;
531 
532 		/* Reset empty match flag. */
533 		empty_ok = 1;
534 
535 		/*
536 		 * We don't want to have to do a setline if the line didn't
537 		 * change -- keep track of whether or not this line changed.
538 		 * If doing confirmations, don't want to keep setting the
539 		 * line if change is refused -- keep track of substitutions.
540 		 */
541 		didsub = linechanged = 0;
542 
543 		/* New line, do an EOL match. */
544 		do_eol_match = 1;
545 
546 		/* It's not nul terminated, but we pretend it is. */
547 		eflags = REG_STARTEND;
548 
549 		/*
550 		 * The search area is from st + offset to the EOL.
551 		 *
552 		 * Generally, match[0].rm_so is the offset of the start
553 		 * of the match from the start of the search, and offset
554 		 * is the offset of the start of the last search.
555 		 */
556 nextmatch:	match[0].rm_so = 0;
557 		match[0].rm_eo = len;
558 
559 		/* Get the next match. */
560 		eval = regexec(re, st + offset, 10, match, eflags);
561 
562 		/*
563 		 * There wasn't a match or if there was an error, deal with
564 		 * it.  If there was a previous match in this line, resolve
565 		 * the changes into the database.  Otherwise, just move on.
566 		 */
567 		if (eval == REG_NOMATCH)
568 			goto endmatch;
569 		if (eval != 0) {
570 			re_error(sp, eval, re);
571 			goto err;
572 		}
573 		matched = 1;
574 
575 		/* Only the first search can match an anchored expression. */
576 		eflags |= REG_NOTBOL;
577 
578 		/*
579 		 * !!!
580 		 * It's possible to match 0-length strings -- for example, the
581 		 * command s;a*;X;, when matched against the string "aabb" will
582 		 * result in "XbXbX", i.e. the matches are "aa", the space
583 		 * between the b's and the space between the b's and the end of
584 		 * the string.  There is a similar space between the beginning
585 		 * of the string and the a's.  The rule that we use (because vi
586 		 * historically used it) is that any 0-length match, occurring
587 		 * immediately after a match, is ignored.  Otherwise, the above
588 		 * example would have resulted in "XXbXbX".  Another example is
589 		 * incorrectly using " *" to replace groups of spaces with one
590 		 * space.
591 		 *
592 		 * The way we do this is that if we just had a successful match,
593 		 * the starting offset does not skip characters, and the match
594 		 * is empty, ignore the match and move forward.  If there's no
595 		 * more characters in the string, we were attempting to match
596 		 * after the last character, so quit.
597 		 */
598 		if (!empty_ok && match[0].rm_so == 0 && match[0].rm_eo == 0) {
599 			empty_ok = 1;
600 			if (len == 0)
601 				goto endmatch;
602 			BUILD(sp, st + offset, 1)
603 			++offset;
604 			--len;
605 			goto nextmatch;
606 		}
607 
608 		/* Confirm change. */
609 		if (sp->c_suffix) {
610 			/*
611 			 * Set the cursor position for confirmation.  Note,
612 			 * if we matched on a '$', the cursor may be past
613 			 * the end of line.
614 			 */
615 			from.lno = to.lno = lno;
616 			from.cno = match[0].rm_so + offset;
617 			to.cno = match[0].rm_eo + offset;
618 			/*
619 			 * Both ex and vi have to correct for a change before
620 			 * the first character in the line.
621 			 */
622 			if (llen == 0)
623 				from.cno = to.cno = 0;
624 			if (F_ISSET(sp, SC_VI)) {
625 				/*
626 				 * Only vi has to correct for a change after
627 				 * the last character in the line.
628 				 *
629 				 * XXX
630 				 * It would be nice to change the vi code so
631 				 * that we could display a cursor past EOL.
632 				 */
633 				if (to.cno >= llen)
634 					to.cno = llen - 1;
635 				if (from.cno >= llen)
636 					from.cno = llen - 1;
637 
638 				sp->lno = from.lno;
639 				sp->cno = from.cno;
640 				if (vs_refresh(sp, 1))
641 					goto err;
642 
643 				vs_update(sp, msg_cat(sp,
644 				    "169|Confirm change? [n]", NULL), NULL);
645 
646 				if (v_event_get(sp, &ev, 0, 0))
647 					goto err;
648 				switch (ev.e_event) {
649 				case E_CHARACTER:
650 					break;
651 				case E_EOF:
652 				case E_ERR:
653 				case E_INTERRUPT:
654 					goto lquit;
655 				default:
656 					v_event_err(sp, &ev);
657 					goto lquit;
658 				}
659 			} else {
660 				if (ex_print(sp, cmdp, &from, &to, 0) ||
661 				    ex_scprint(sp, &from, &to))
662 					goto lquit;
663 				if (ex_txt(sp, &tiq, 0, TXT_CR))
664 					goto err;
665 				ev.e_c = TAILQ_FIRST(&tiq)->lb[0];
666 			}
667 
668 			switch (ev.e_c) {
669 			case CH_YES:
670 				break;
671 			default:
672 			case CH_NO:
673 				didsub = 0;
674 				BUILD(sp, st + offset, match[0].rm_eo);
675 				goto skip;
676 			case CH_QUIT:
677 				/* Set the quit/interrupted flags. */
678 lquit:				quit = 1;
679 				F_SET(sp->gp, G_INTERRUPTED);
680 
681 				/*
682 				 * Resolve any changes, then return to (and
683 				 * exit from) the main loop.
684 				 */
685 				goto endmatch;
686 			}
687 		}
688 
689 		/*
690 		 * Set the cursor to the last position changed, converting
691 		 * from 1-based to 0-based.
692 		 */
693 		sp->lno = lno;
694 		sp->cno = match[0].rm_so;
695 
696 		/* Copy the bytes before the match into the build buffer. */
697 		BUILD(sp, st + offset, match[0].rm_so);
698 
699 		/* Substitute the matching bytes. */
700 		didsub = 1;
701 		if (re_sub(sp, st + offset, &lb, &lbclen, &lblen, match))
702 			goto err;
703 
704 		/* Set the change flag so we know this line was modified. */
705 		linechanged = 1;
706 
707 		/* Move past the matched bytes. */
708 skip:		offset += match[0].rm_eo;
709 		len -= match[0].rm_eo;
710 
711 		/* A match cannot be followed by an empty pattern. */
712 		empty_ok = 0;
713 
714 		/*
715 		 * If doing a global change with confirmation, we have to
716 		 * update the screen.  The basic idea is to store the line
717 		 * so the screen update routines can find it, and restart.
718 		 */
719 		if (didsub && sp->c_suffix && sp->g_suffix) {
720 			/*
721 			 * The new search offset will be the end of the
722 			 * modified line.
723 			 */
724 			saved_offset = lbclen;
725 
726 			/* Copy the rest of the line. */
727 			if (len)
728 				BUILD(sp, st + offset, len)
729 
730 			/* Set the new offset. */
731 			offset = saved_offset;
732 
733 			/* Store inserted lines, adjusting the build buffer. */
734 			last = 0;
735 			if (sp->newl_cnt) {
736 				for (cnt = 0;
737 				    cnt < sp->newl_cnt; ++cnt, ++lno, ++elno) {
738 					if (db_insert(sp, lno,
739 					    lb + last, sp->newl[cnt] - last))
740 						goto err;
741 					last = sp->newl[cnt] + 1;
742 					++sp->rptlines[L_ADDED];
743 				}
744 				lbclen -= last;
745 				offset -= last;
746 				sp->newl_cnt = 0;
747 			}
748 
749 			/* Store and retrieve the line. */
750 			if (db_set(sp, lno, lb + last, lbclen))
751 				goto err;
752 			if (db_get(sp, lno, DBG_FATAL, &st, &llen))
753 				goto err;
754 			ADD_SPACE_RETW(sp, bp, blen, llen)
755 			MEMCPYW(bp, st, llen);
756 			st = bp;
757 			len = llen - offset;
758 
759 			/* Restart the build. */
760 			lbclen = 0;
761 			BUILD(sp, st, offset);
762 
763 			/*
764 			 * If we haven't already done the after-the-string
765 			 * match, do one.  Set REG_NOTEOL so the '$' pattern
766 			 * only matches once.
767 			 */
768 			if (!do_eol_match)
769 				goto endmatch;
770 			if (offset == len) {
771 				do_eol_match = 0;
772 				eflags |= REG_NOTEOL;
773 			}
774 			goto nextmatch;
775 		}
776 
777 		/*
778 		 * If it's a global:
779 		 *
780 		 * If at the end of the string, do a test for the after
781 		 * the string match.  Set REG_NOTEOL so the '$' pattern
782 		 * only matches once.
783 		 */
784 		if (sp->g_suffix && do_eol_match) {
785 			if (len == 0) {
786 				do_eol_match = 0;
787 				eflags |= REG_NOTEOL;
788 			}
789 			goto nextmatch;
790 		}
791 
792 endmatch:	if (!linechanged)
793 			continue;
794 
795 		/* Copy any remaining bytes into the build buffer. */
796 		if (len)
797 			BUILD(sp, st + offset, len)
798 
799 		/* Store inserted lines, adjusting the build buffer. */
800 		last = 0;
801 		if (sp->newl_cnt) {
802 			for (cnt = 0;
803 			    cnt < sp->newl_cnt; ++cnt, ++lno, ++elno) {
804 				if (db_insert(sp,
805 				    lno, lb + last, sp->newl[cnt] - last))
806 					goto err;
807 				last = sp->newl[cnt] + 1;
808 				++sp->rptlines[L_ADDED];
809 			}
810 			lbclen -= last;
811 			sp->newl_cnt = 0;
812 		}
813 
814 		/* Store the changed line. */
815 		if (db_set(sp, lno, lb + last, lbclen))
816 			goto err;
817 
818 		/* Update changed line counter. */
819 		if (sp->rptlchange != lno) {
820 			sp->rptlchange = lno;
821 			++sp->rptlines[L_CHANGED];
822 		}
823 
824 		/*
825 		 * !!!
826 		 * Display as necessary.  Historic practice is to only
827 		 * display the last line of a line split into multiple
828 		 * lines.
829 		 */
830 		if (lflag || nflag || pflag) {
831 			from.lno = to.lno = lno;
832 			from.cno = to.cno = 0;
833 			if (lflag)
834 				(void)ex_print(sp, cmdp, &from, &to, E_C_LIST);
835 			if (nflag)
836 				(void)ex_print(sp, cmdp, &from, &to, E_C_HASH);
837 			if (pflag)
838 				(void)ex_print(sp, cmdp, &from, &to, E_C_PRINT);
839 		}
840 	}
841 
842 	/*
843 	 * !!!
844 	 * Historically, vi attempted to leave the cursor at the same place if
845 	 * the substitution was done at the current cursor position.  Otherwise
846 	 * it moved it to the first non-blank of the last line changed.  There
847 	 * were some problems: for example, :s/$/foo/ with the cursor on the
848 	 * last character of the line left the cursor on the last character, or
849 	 * the & command with multiple occurrences of the matching string in the
850 	 * line usually left the cursor in a fairly random position.
851 	 *
852 	 * We try to do the same thing, with the exception that if the user is
853 	 * doing substitution with confirmation, we move to the last line about
854 	 * which the user was consulted, as opposed to the last line that they
855 	 * actually changed.  This prevents a screen flash if the user doesn't
856 	 * change many of the possible lines.
857 	 */
858 	if (!sp->c_suffix && (sp->lno != slno || sp->cno != scno)) {
859 		sp->cno = 0;
860 		(void)nonblank(sp, sp->lno, &sp->cno);
861 	}
862 
863 	/*
864 	 * If not in a global command, and nothing matched, say so.
865 	 * Else, if none of the lines displayed, put something up.
866 	 */
867 	rval = 0;
868 	if (!matched) {
869 		if (!F_ISSET(sp, SC_EX_GLOBAL)) {
870 			msgq(sp, M_ERR, "157|No match found");
871 			goto err;
872 		}
873 	} else if (!lflag && !nflag && !pflag)
874 		F_SET(cmdp, E_AUTOPRINT);
875 
876 	if (0) {
877 err:		rval = 1;
878 	}
879 
880 	if (bp != NULL)
881 		FREE_SPACEW(sp, bp, blen);
882 	if (lb != NULL)
883 		free(lb);
884 	return (rval);
885 }
886 
887 /*
888  * re_compile --
889  *	Compile the RE.
890  *
891  * PUBLIC: int re_compile __P((SCR *,
892  * PUBLIC:     CHAR_T *, size_t, CHAR_T **, size_t *, regex_t *, u_int));
893  */
894 int
re_compile(SCR * sp,CHAR_T * ptrn,size_t plen,CHAR_T ** ptrnp,size_t * lenp,regex_t * rep,u_int flags)895 re_compile(SCR *sp, CHAR_T *ptrn, size_t plen, CHAR_T **ptrnp, size_t *lenp, regex_t *rep, u_int flags)
896 {
897 	size_t len;
898 	int reflags, replaced, rval;
899 	CHAR_T *p;
900 
901 	/* Set RE flags. */
902 	reflags = 0;
903 	if (LF_ISSET(SEARCH_EXTEND))
904 		reflags |= REG_EXTENDED;
905 	if (LF_ISSET(SEARCH_IC))
906 		reflags |= REG_ICASE;
907 	if (LF_ISSET(SEARCH_LITERAL))
908 		reflags |= REG_NOSPEC;
909 	if (!LF_ISSET(SEARCH_NOOPT | SEARCH_CSCOPE | SEARCH_TAG)) {
910 		if (O_ISSET(sp, O_EXTENDED))
911 			reflags |= REG_EXTENDED;
912 		if (O_ISSET(sp, O_IGNORECASE))
913 			reflags |= REG_ICASE;
914 		if (O_ISSET(sp, O_ICLOWER))
915 			goto iclower;
916 	}
917 	if (LF_ISSET(SEARCH_ICL)) {
918 iclower:	for (p = ptrn, len = plen; len > 0; ++p, --len)
919 			if (ISUPPER((UCHAR_T)*p))
920 				break;
921 		if (len == 0)
922 			reflags |= REG_ICASE;
923 	}
924 
925 	/* If we're replacing a saved value, clear the old one. */
926 	if (LF_ISSET(SEARCH_CSEARCH) && F_ISSET(sp, SC_RE_SEARCH)) {
927 		regfree(&sp->re_c);
928 		F_CLR(sp, SC_RE_SEARCH);
929 	}
930 	if (LF_ISSET(SEARCH_CSUBST) && F_ISSET(sp, SC_RE_SUBST)) {
931 		regfree(&sp->subre_c);
932 		F_CLR(sp, SC_RE_SUBST);
933 	}
934 
935 	/*
936 	 * If we're saving the string, it's a pattern we haven't seen before,
937 	 * so convert the vi-style RE's to POSIX 1003.2 RE's.  Save a copy for
938 	 * later recompilation.   Free any previously saved value.
939 	 */
940 	if (ptrnp != NULL) {
941 		replaced = 0;
942 		if (LF_ISSET(SEARCH_CSCOPE)) {
943 			if (re_cscope_conv(sp, &ptrn, &plen, &replaced))
944 				return (1);
945 			/*
946 			 * XXX
947 			 * Currently, the match-any-<blank> expression used in
948 			 * re_cscope_conv() requires extended RE's.  This may
949 			 * not be right or safe.
950 			 */
951 			reflags |= REG_EXTENDED;
952 		} else if (LF_ISSET(SEARCH_TAG)) {
953 			if (re_tag_conv(sp, &ptrn, &plen, &replaced))
954 				return (1);
955 		} else if (!LF_ISSET(SEARCH_LITERAL))
956 			if (re_conv(sp, &ptrn, &plen, &replaced))
957 				return (1);
958 
959 		/* Discard previous pattern. */
960 		if (*ptrnp != NULL) {
961 			free(*ptrnp);
962 			*ptrnp = NULL;
963 		}
964 		if (lenp != NULL)
965 			*lenp = plen;
966 
967 		/*
968 		 * Copy the string into allocated memory.
969 		 *
970 		 * XXX
971 		 * Regcomp isn't 8-bit clean, so the pattern is nul-terminated
972 		 * for now.  There's just no other solution.
973 		 */
974 		MALLOC(sp, *ptrnp, CHAR_T *, (plen + 1) * sizeof(CHAR_T));
975 		if (*ptrnp != NULL) {
976 			MEMCPYW(*ptrnp, ptrn, plen);
977 			(*ptrnp)[plen] = '\0';
978 		}
979 
980 		/* Free up conversion-routine-allocated memory. */
981 		if (replaced)
982 			FREE_SPACEW(sp, ptrn, 0);
983 
984 		if (*ptrnp == NULL)
985 			return (1);
986 
987 		ptrn = *ptrnp;
988 	}
989 
990 	/*
991 	 * XXX
992 	 * Regcomp isn't 8-bit clean, so we just lost if the pattern
993 	 * contained a nul.  Bummer!
994 	 */
995 	if ((rval = regcomp(rep, ptrn, /* plen, */ reflags)) != 0) {
996 		if (LF_ISSET(SEARCH_MSG))
997 			re_error(sp, rval, rep);
998 		return (1);
999 	}
1000 
1001 	if (LF_ISSET(SEARCH_CSEARCH))
1002 		F_SET(sp, SC_RE_SEARCH);
1003 	if (LF_ISSET(SEARCH_CSUBST))
1004 		F_SET(sp, SC_RE_SUBST);
1005 
1006 	return (0);
1007 }
1008 
1009 /*
1010  * re_conv --
1011  *	Convert vi's regular expressions into something that the
1012  *	the POSIX 1003.2 RE functions can handle.
1013  *
1014  * There are three conversions we make to make vi's RE's (specifically
1015  * the global, search, and substitute patterns) work with POSIX RE's.
1016  *
1017  * 1: If O_MAGIC is not set, strip backslashes from the magic character
1018  *    set (.[*~) that have them, and add them to the ones that don't.
1019  * 2: If O_MAGIC is not set, the string "\~" is replaced with the text
1020  *    from the last substitute command's replacement string.  If O_MAGIC
1021  *    is set, it's the string "~".
1022  * 3: The pattern \<ptrn\> does "word" searches, convert it to use the
1023  *    new RE escapes.
1024  *
1025  * !!!/XXX
1026  * This doesn't exactly match the historic behavior of vi because we do
1027  * the ~ substitution before calling the RE engine, so magic characters
1028  * in the replacement string will be expanded by the RE engine, and they
1029  * weren't historically.  It's a bug.
1030  */
1031 static int
re_conv(SCR * sp,CHAR_T ** ptrnp,size_t * plenp,int * replacedp)1032 re_conv(SCR *sp, CHAR_T **ptrnp, size_t *plenp, int *replacedp)
1033 {
1034 	size_t blen, len, needlen;
1035 	int magic;
1036 	CHAR_T *bp, *p, *t;
1037 
1038 	/*
1039 	 * First pass through, we figure out how much space we'll need.
1040 	 * We do it in two passes, on the grounds that most of the time
1041 	 * the user is doing a search and won't have magic characters.
1042 	 * That way we can skip most of the memory allocation and copies.
1043 	 */
1044 	magic = 0;
1045 	for (p = *ptrnp, len = *plenp, needlen = 0; len > 0; ++p, --len)
1046 		switch (*p) {
1047 		case '\\':
1048 			if (len > 1) {
1049 				--len;
1050 				switch (*++p) {
1051 				case '<':
1052 					magic = 1;
1053 					needlen += RE_WSTART_LEN + 1;
1054 					break;
1055 				case '>':
1056 					magic = 1;
1057 					needlen += RE_WSTOP_LEN + 1;
1058 					break;
1059 				case '~':
1060 					if (!O_ISSET(sp, O_MAGIC)) {
1061 						magic = 1;
1062 						needlen += sp->repl_len;
1063 					}
1064 					break;
1065 				case '.':
1066 				case '[':
1067 				case '*':
1068 					if (!O_ISSET(sp, O_MAGIC)) {
1069 						magic = 1;
1070 						needlen += 1;
1071 					}
1072 					break;
1073 				default:
1074 					needlen += 2;
1075 				}
1076 			} else
1077 				needlen += 1;
1078 			break;
1079 		case '~':
1080 			if (O_ISSET(sp, O_MAGIC)) {
1081 				magic = 1;
1082 				needlen += sp->repl_len;
1083 			}
1084 			break;
1085 		case '.':
1086 		case '[':
1087 		case '*':
1088 			if (!O_ISSET(sp, O_MAGIC)) {
1089 				magic = 1;
1090 				needlen += 2;
1091 			}
1092 			break;
1093 		default:
1094 			needlen += 1;
1095 			break;
1096 		}
1097 
1098 	if (!magic) {
1099 		*replacedp = 0;
1100 		return (0);
1101 	}
1102 
1103 	/* Get enough memory to hold the final pattern. */
1104 	*replacedp = 1;
1105 	GET_SPACE_RETW(sp, bp, blen, needlen);
1106 
1107 	for (p = *ptrnp, len = *plenp, t = bp; len > 0; ++p, --len)
1108 		switch (*p) {
1109 		case '\\':
1110 			if (len > 1) {
1111 				--len;
1112 				switch (*++p) {
1113 				case '<':
1114 					MEMCPY(t,
1115 					    RE_WSTART, RE_WSTART_LEN);
1116 					t += RE_WSTART_LEN;
1117 					break;
1118 				case '>':
1119 					MEMCPY(t,
1120 					    RE_WSTOP, RE_WSTOP_LEN);
1121 					t += RE_WSTOP_LEN;
1122 					break;
1123 				case '~':
1124 					if (O_ISSET(sp, O_MAGIC))
1125 						*t++ = '~';
1126 					else {
1127 						MEMCPYW(t,
1128 						    sp->repl, sp->repl_len);
1129 						t += sp->repl_len;
1130 					}
1131 					break;
1132 				case '.':
1133 				case '[':
1134 				case '*':
1135 					if (O_ISSET(sp, O_MAGIC))
1136 						*t++ = '\\';
1137 					*t++ = *p;
1138 					break;
1139 				default:
1140 					*t++ = '\\';
1141 					*t++ = *p;
1142 				}
1143 			} else
1144 				*t++ = '\\';
1145 			break;
1146 		case '~':
1147 			if (O_ISSET(sp, O_MAGIC)) {
1148 				MEMCPYW(t, sp->repl, sp->repl_len);
1149 				t += sp->repl_len;
1150 			} else
1151 				*t++ = '~';
1152 			break;
1153 		case '.':
1154 		case '[':
1155 		case '*':
1156 			if (!O_ISSET(sp, O_MAGIC))
1157 				*t++ = '\\';
1158 			*t++ = *p;
1159 			break;
1160 		default:
1161 			*t++ = *p;
1162 			break;
1163 		}
1164 
1165 	*ptrnp = bp;
1166 	*plenp = t - bp;
1167 	return (0);
1168 }
1169 
1170 /*
1171  * re_tag_conv --
1172  *	Convert a tags search path into something that the POSIX
1173  *	1003.2 RE functions can handle.
1174  */
1175 static int
re_tag_conv(SCR * sp,CHAR_T ** ptrnp,size_t * plenp,int * replacedp)1176 re_tag_conv(SCR *sp, CHAR_T **ptrnp, size_t *plenp, int *replacedp)
1177 {
1178 	size_t blen, len;
1179 	int lastdollar;
1180 	CHAR_T *bp, *p, *t;
1181 
1182 	len = *plenp;
1183 
1184 	/* Max memory usage is 2 times the length of the string. */
1185 	*replacedp = 1;
1186 	GET_SPACE_RETW(sp, bp, blen, len * 2);
1187 
1188 	p = *ptrnp;
1189 	t = bp;
1190 
1191 	/* If the last character is a '/' or '?', we just strip it. */
1192 	if (len > 0 && (p[len - 1] == '/' || p[len - 1] == '?'))
1193 		--len;
1194 
1195 	/* If the next-to-last or last character is a '$', it's magic. */
1196 	if (len > 0 && p[len - 1] == '$') {
1197 		--len;
1198 		lastdollar = 1;
1199 	} else
1200 		lastdollar = 0;
1201 
1202 	/* If the first character is a '/' or '?', we just strip it. */
1203 	if (len > 0 && (p[0] == '/' || p[0] == '?')) {
1204 		++p;
1205 		--len;
1206 	}
1207 
1208 	/* If the first or second character is a '^', it's magic. */
1209 	if (p[0] == '^') {
1210 		*t++ = *p++;
1211 		--len;
1212 	}
1213 
1214 	/*
1215 	 * Escape every other magic character we can find, meanwhile stripping
1216 	 * the backslashes ctags inserts when escaping the search delimiter
1217 	 * characters.
1218 	 */
1219 	for (; len > 0; --len) {
1220 		if (p[0] == '\\' && (p[1] == '/' || p[1] == '?')) {
1221 			++p;
1222 			--len;
1223 		} else if (strchr("^.[]$*", p[0]))
1224 			*t++ = '\\';
1225 		*t++ = *p++;
1226 	}
1227 	if (lastdollar)
1228 		*t++ = '$';
1229 
1230 	*ptrnp = bp;
1231 	*plenp = t - bp;
1232 	return (0);
1233 }
1234 
1235 /*
1236  * re_cscope_conv --
1237  *	 Convert a cscope search path into something that the POSIX
1238  *      1003.2 RE functions can handle.
1239  */
1240 static int
re_cscope_conv(SCR * sp,CHAR_T ** ptrnp,size_t * plenp,int * replacedp)1241 re_cscope_conv(SCR *sp, CHAR_T **ptrnp, size_t *plenp, int *replacedp)
1242 {
1243 	size_t blen, len, nspaces;
1244 	CHAR_T *bp, *t;
1245 	CHAR_T *p;
1246 	const CHAR_T *wp;
1247 	size_t wlen;
1248 
1249 	/*
1250 	 * Each space in the source line printed by cscope represents an
1251 	 * arbitrary sequence of spaces, tabs, and comments.
1252 	 */
1253 #define	CSCOPE_RE_SPACE		"([ \t]|/\\*([^*]|\\*/)*\\*/)*"
1254 #define CSCOPE_LEN	sizeof(CSCOPE_RE_SPACE) - 1
1255 	CHAR2INT(sp, CSCOPE_RE_SPACE, CSCOPE_LEN, wp, wlen);
1256 	for (nspaces = 0, p = *ptrnp, len = *plenp; len > 0; ++p, --len)
1257 		if (*p == ' ')
1258 			++nspaces;
1259 
1260 	/*
1261 	 * Allocate plenty of space:
1262 	 *	the string, plus potential escaping characters;
1263 	 *	nspaces + 2 copies of CSCOPE_RE_SPACE;
1264 	 *	^, $, nul terminator characters.
1265 	 */
1266 	*replacedp = 1;
1267 	len = (p - *ptrnp) * 2 + (nspaces + 2) * sizeof(CSCOPE_RE_SPACE) + 3;
1268 	GET_SPACE_RETW(sp, bp, blen, len);
1269 
1270 	p = *ptrnp;
1271 	t = bp;
1272 
1273 	*t++ = '^';
1274 	MEMCPYW(t, wp, wlen);
1275 	t += wlen;
1276 
1277 	for (len = *plenp; len > 0; ++p, --len)
1278 		if (*p == ' ') {
1279 			MEMCPYW(t, wp, wlen);
1280 			t += wlen;
1281 		} else {
1282 			if (strchr("\\^.[]$*+?()|{}", *p))
1283 				*t++ = '\\';
1284 			*t++ = *p;
1285 		}
1286 
1287 	MEMCPYW(t, wp, wlen);
1288 	t += wlen;
1289 	*t++ = '$';
1290 
1291 	*ptrnp = bp;
1292 	*plenp = t - bp;
1293 	return (0);
1294 }
1295 
1296 /*
1297  * re_error --
1298  *	Report a regular expression error.
1299  *
1300  * PUBLIC: void re_error __P((SCR *, int, regex_t *));
1301  */
1302 void
re_error(SCR * sp,int errcode,regex_t * preg)1303 re_error(SCR *sp, int errcode, regex_t *preg)
1304 {
1305 	size_t sz;
1306 	char *oe;
1307 
1308 	sz = regerror(errcode, preg, NULL, 0);
1309 	if ((oe = malloc(sz)) == NULL)
1310 		msgq(sp, M_SYSERR, NULL);
1311 	else {
1312 		(void)regerror(errcode, preg, oe, sz);
1313 		msgq(sp, M_ERR, "RE error: %s", oe);
1314 		free(oe);
1315 	}
1316 }
1317 
1318 /*
1319  * re_sub --
1320  * 	Do the substitution for a regular expression.
1321  */
1322 static int
re_sub(SCR * sp,CHAR_T * ip,CHAR_T ** lbp,size_t * lbclenp,size_t * lblenp,regmatch_t match[10])1323 re_sub(SCR *sp, CHAR_T *ip, CHAR_T **lbp, size_t *lbclenp, size_t *lblenp, regmatch_t match[10])
1324 
1325 	           			/* Input line. */
1326 
1327 
1328 
1329 {
1330 	enum { C_NOT_SET, C_LOWER, C_ONE_LOWER, C_ONE_UPPER, C_UPPER } conv;
1331 	size_t lbclen, lblen;		/* Local copies. */
1332 	size_t mlen;			/* Match length. */
1333 	size_t rpl;			/* Remaining replacement length. */
1334 	CHAR_T *rp;			/* Replacement pointer. */
1335 	int ch;
1336 	int no;				/* Match replacement offset. */
1337 	CHAR_T *p, *t;			/* Buffer pointers. */
1338 	CHAR_T *lb;			/* Local copies. */
1339 
1340 	lb = *lbp;			/* Get local copies. */
1341 	lbclen = *lbclenp;
1342 	lblen = *lblenp;
1343 
1344 	/*
1345 	 * QUOTING NOTE:
1346 	 *
1347 	 * There are some special sequences that vi provides in the
1348 	 * replacement patterns.
1349 	 *	 & string the RE matched (\& if nomagic set)
1350 	 *	\# n-th regular subexpression
1351 	 *	\E end \U, \L conversion
1352 	 *	\e end \U, \L conversion
1353 	 *	\l convert the next character to lower-case
1354 	 *	\L convert to lower-case, until \E, \e, or end of replacement
1355 	 *	\u convert the next character to upper-case
1356 	 *	\U convert to upper-case, until \E, \e, or end of replacement
1357 	 *
1358 	 * Otherwise, since this is the lowest level of replacement, discard
1359 	 * all escaping characters.  This (hopefully) matches historic practice.
1360 	 */
1361 #define	OUTCH(ch, nltrans) {						\
1362 	ARG_CHAR_T __ch = (ch);						\
1363 	e_key_t __value = KEY_VAL(sp, __ch);				\
1364 	if (nltrans && (__value == K_CR || __value == K_NL)) {		\
1365 		NEEDNEWLINE(sp);					\
1366 		sp->newl[sp->newl_cnt++] = lbclen;			\
1367 	} else if (conv != C_NOT_SET) {					\
1368 		switch (conv) {						\
1369 		case C_ONE_LOWER:					\
1370 			conv = C_NOT_SET;				\
1371 			/* FALLTHROUGH */				\
1372 		case C_LOWER:						\
1373 			if (ISUPPER(__ch))				\
1374 				__ch = TOLOWER(__ch);			\
1375 			break;						\
1376 		case C_ONE_UPPER:					\
1377 			conv = C_NOT_SET;				\
1378 			/* FALLTHROUGH */				\
1379 		case C_UPPER:						\
1380 			if (ISLOWER(__ch))				\
1381 				__ch = TOUPPER(__ch);			\
1382 			break;						\
1383 		default:						\
1384 			abort();					\
1385 		}							\
1386 	}								\
1387 	NEEDSP(sp, 1, p);						\
1388 	*p++ = __ch;							\
1389 	++lbclen;							\
1390 }
1391 	conv = C_NOT_SET;
1392 	for (rp = sp->repl, rpl = sp->repl_len, p = lb + lbclen; rpl--;) {
1393 		switch (ch = *rp++) {
1394 		case '&':
1395 			if (O_ISSET(sp, O_MAGIC)) {
1396 				no = 0;
1397 				goto subzero;
1398 			}
1399 			break;
1400 		case '\\':
1401 			if (rpl == 0)
1402 				break;
1403 			--rpl;
1404 			switch (ch = *rp) {
1405 			case '&':
1406 				++rp;
1407 				if (!O_ISSET(sp, O_MAGIC)) {
1408 					no = 0;
1409 					goto subzero;
1410 				}
1411 				break;
1412 			case '0': case '1': case '2': case '3': case '4':
1413 			case '5': case '6': case '7': case '8': case '9':
1414 				no = *rp++ - '0';
1415 subzero:			if (match[no].rm_so == -1 ||
1416 			    	    match[no].rm_eo == -1)
1417 					break;
1418 				mlen = match[no].rm_eo - match[no].rm_so;
1419 				for (t = ip + match[no].rm_so; mlen--; ++t)
1420 					OUTCH((UCHAR_T)*t, 0);
1421 				continue;
1422 			case 'e':
1423 			case 'E':
1424 				++rp;
1425 				conv = C_NOT_SET;
1426 				continue;
1427 			case 'l':
1428 				++rp;
1429 				conv = C_ONE_LOWER;
1430 				continue;
1431 			case 'L':
1432 				++rp;
1433 				conv = C_LOWER;
1434 				continue;
1435 			case 'u':
1436 				++rp;
1437 				conv = C_ONE_UPPER;
1438 				continue;
1439 			case 'U':
1440 				++rp;
1441 				conv = C_UPPER;
1442 				continue;
1443 			default:
1444 				++rp;
1445 				break;
1446 			}
1447 		}
1448 		OUTCH(ch, 1);
1449 	}
1450 
1451 	*lbp = lb;			/* Update caller's information. */
1452 	*lbclenp = lbclen;
1453 	*lblenp = lblen;
1454 	return (0);
1455 }
1456