1 /* $OpenBSD: ex_subst.c,v 1.29 2016/09/02 15:38:42 martijn Exp $ */ 2 3 /*- 4 * Copyright (c) 1992, 1993, 1994 5 * The Regents of the University of California. All rights reserved. 6 * Copyright (c) 1992, 1993, 1994, 1995, 1996 7 * Keith Bostic. All rights reserved. 8 * 9 * See the LICENSE file for redistribution information. 10 */ 11 12 #include "config.h" 13 14 #include <sys/queue.h> 15 #include <sys/time.h> 16 17 #include <bitstring.h> 18 #include <ctype.h> 19 #include <errno.h> 20 #include <limits.h> 21 #include <stdio.h> 22 #include <stdlib.h> 23 #include <string.h> 24 #include <unistd.h> 25 26 #include "../common/common.h" 27 #include "../vi/vi.h" 28 29 #define MAXIMUM(a, b) (((a) > (b)) ? (a) : (b)) 30 31 #define SUB_FIRST 0x01 /* The 'r' flag isn't reasonable. */ 32 #define SUB_MUSTSETR 0x02 /* The 'r' flag is required. */ 33 34 static int re_conv(SCR *, char **, size_t *, int *); 35 static int re_sub(SCR *, char *, char **, size_t *, size_t *, regmatch_t [10]); 36 static int re_tag_conv(SCR *, char **, size_t *, int *); 37 static int s(SCR *, EXCMD *, char *, regex_t *, u_int); 38 39 /* 40 * ex_s -- 41 * [line [,line]] s[ubstitute] [[/;]pat[/;]/repl[/;] [cgr] [count] [#lp]] 42 * 43 * Substitute on lines matching a pattern. 44 * 45 * PUBLIC: int ex_s(SCR *, EXCMD *); 46 */ 47 int 48 ex_s(SCR *sp, EXCMD *cmdp) 49 { 50 regex_t *re; 51 size_t blen, len; 52 u_int flags; 53 int delim; 54 char *bp, *ptrn, *rep, *p, *t; 55 56 /* 57 * Skip leading white space. 58 * 59 * !!! 60 * Historic vi allowed any non-alphanumeric to serve as the 61 * substitution command delimiter. 62 * 63 * !!! 64 * If the arguments are empty, it's the same as &, i.e. we 65 * repeat the last substitution. 66 */ 67 if (cmdp->argc == 0) 68 goto subagain; 69 for (p = cmdp->argv[0]->bp, 70 len = cmdp->argv[0]->len; len > 0; --len, ++p) { 71 if (!isblank(*p)) 72 break; 73 } 74 if (len == 0) 75 subagain: return (ex_subagain(sp, cmdp)); 76 77 delim = *p++; 78 if (isalnum(delim) || delim == '\\') 79 return (s(sp, cmdp, p, &sp->subre_c, SUB_MUSTSETR)); 80 81 /* 82 * !!! 83 * The full-blown substitute command reset the remembered 84 * state of the 'c' and 'g' suffices. 85 */ 86 sp->c_suffix = sp->g_suffix = 0; 87 88 /* 89 * Get the pattern string, toss escaping characters. 90 * 91 * !!! 92 * Historic vi accepted any of the following forms: 93 * 94 * :s/abc/def/ change "abc" to "def" 95 * :s/abc/def change "abc" to "def" 96 * :s/abc/ delete "abc" 97 * :s/abc delete "abc" 98 * 99 * QUOTING NOTE: 100 * 101 * Only toss an escaping character if it escapes a delimiter. 102 * This means that "s/A/\\\\f" replaces "A" with "\\f". It 103 * would be nice to be more regular, i.e. for each layer of 104 * escaping a single escaping character is removed, but that's 105 * not how the historic vi worked. 106 */ 107 for (ptrn = t = p;;) { 108 if (p[0] == '\0' || p[0] == delim) { 109 if (p[0] == delim) 110 ++p; 111 /* 112 * !!! 113 * Nul terminate the pattern string -- it's passed 114 * to regcomp which doesn't understand anything else. 115 */ 116 *t = '\0'; 117 break; 118 } 119 if (p[0] == '\\') { 120 if (p[1] == delim) 121 ++p; 122 else if (p[1] == '\\') 123 *t++ = *p++; 124 } 125 *t++ = *p++; 126 } 127 128 /* 129 * If the pattern string is empty, use the last RE (not just the 130 * last substitution RE). 131 */ 132 if (*ptrn == '\0') { 133 if (sp->re == NULL) { 134 ex_emsg(sp, NULL, EXM_NOPREVRE); 135 return (1); 136 } 137 138 /* Re-compile the RE if necessary. */ 139 if (!F_ISSET(sp, SC_RE_SEARCH) && re_compile(sp, 140 sp->re, sp->re_len, NULL, NULL, &sp->re_c, RE_C_SEARCH)) 141 return (1); 142 flags = 0; 143 } else { 144 /* 145 * !!! 146 * Compile the RE. Historic practice is that substitutes set 147 * the search direction as well as both substitute and search 148 * RE's. We compile the RE twice, as we don't want to bother 149 * ref counting the pattern string and (opaque) structure. 150 */ 151 if (re_compile(sp, ptrn, t - ptrn, 152 &sp->re, &sp->re_len, &sp->re_c, RE_C_SEARCH)) 153 return (1); 154 if (re_compile(sp, ptrn, t - ptrn, 155 &sp->subre, &sp->subre_len, &sp->subre_c, RE_C_SUBST)) 156 return (1); 157 158 flags = SUB_FIRST; 159 sp->searchdir = FORWARD; 160 } 161 re = &sp->re_c; 162 163 /* 164 * Get the replacement string. 165 * 166 * The special character & (\& if O_MAGIC not set) matches the 167 * entire RE. No handling of & is required here, it's done by 168 * re_sub(). 169 * 170 * The special character ~ (\~ if O_MAGIC not set) inserts the 171 * previous replacement string into this replacement string. 172 * Count ~'s to figure out how much space we need. We could 173 * special case nonexistent last patterns or whether or not 174 * O_MAGIC is set, but it's probably not worth the effort. 175 * 176 * QUOTING NOTE: 177 * 178 * Only toss an escaping character if it escapes a delimiter or 179 * if O_MAGIC is set and it escapes a tilde. 180 * 181 * !!! 182 * If the entire replacement pattern is "%", then use the last 183 * replacement pattern. This semantic was added to vi in System 184 * V and then percolated elsewhere, presumably around the time 185 * that it was added to their version of ed(1). 186 */ 187 if (p[0] == '\0' || p[0] == delim) { 188 if (p[0] == delim) 189 ++p; 190 if (sp->repl != NULL) 191 free(sp->repl); 192 sp->repl = NULL; 193 sp->repl_len = 0; 194 } else if (p[0] == '%' && (p[1] == '\0' || p[1] == delim)) 195 p += p[1] == delim ? 2 : 1; 196 else { 197 for (rep = p, len = 0; 198 p[0] != '\0' && p[0] != delim; ++p, ++len) 199 if (p[0] == '~') 200 len += sp->repl_len; 201 GET_SPACE_RET(sp, bp, blen, len); 202 for (t = bp, len = 0, p = rep;;) { 203 if (p[0] == '\0' || p[0] == delim) { 204 if (p[0] == delim) 205 ++p; 206 break; 207 } 208 if (p[0] == '\\') { 209 if (p[1] == delim) 210 ++p; 211 else if (p[1] == '\\') { 212 *t++ = *p++; 213 ++len; 214 } else if (p[1] == '~') { 215 ++p; 216 if (!O_ISSET(sp, O_MAGIC)) 217 goto tilde; 218 } 219 } else if (p[0] == '~' && O_ISSET(sp, O_MAGIC)) { 220 tilde: ++p; 221 memcpy(t, sp->repl, sp->repl_len); 222 t += sp->repl_len; 223 len += sp->repl_len; 224 continue; 225 } 226 *t++ = *p++; 227 ++len; 228 } 229 if ((sp->repl_len = len) != 0) { 230 if (sp->repl != NULL) 231 free(sp->repl); 232 if ((sp->repl = malloc(len)) == NULL) { 233 msgq(sp, M_SYSERR, NULL); 234 FREE_SPACE(sp, bp, blen); 235 return (1); 236 } 237 memcpy(sp->repl, bp, len); 238 } 239 FREE_SPACE(sp, bp, blen); 240 } 241 return (s(sp, cmdp, p, re, flags)); 242 } 243 244 /* 245 * ex_subagain -- 246 * [line [,line]] & [cgr] [count] [#lp]] 247 * 248 * Substitute using the last substitute RE and replacement pattern. 249 * 250 * PUBLIC: int ex_subagain(SCR *, EXCMD *); 251 */ 252 int 253 ex_subagain(SCR *sp, EXCMD *cmdp) 254 { 255 if (sp->subre == NULL) { 256 ex_emsg(sp, NULL, EXM_NOPREVRE); 257 return (1); 258 } 259 if (!F_ISSET(sp, SC_RE_SUBST) && re_compile(sp, 260 sp->subre, sp->subre_len, NULL, NULL, &sp->subre_c, RE_C_SUBST)) 261 return (1); 262 return (s(sp, 263 cmdp, cmdp->argc ? cmdp->argv[0]->bp : NULL, &sp->subre_c, 0)); 264 } 265 266 /* 267 * ex_subtilde -- 268 * [line [,line]] ~ [cgr] [count] [#lp]] 269 * 270 * Substitute using the last RE and last substitute replacement pattern. 271 * 272 * PUBLIC: int ex_subtilde(SCR *, EXCMD *); 273 */ 274 int 275 ex_subtilde(SCR *sp, EXCMD *cmdp) 276 { 277 if (sp->re == NULL) { 278 ex_emsg(sp, NULL, EXM_NOPREVRE); 279 return (1); 280 } 281 if (!F_ISSET(sp, SC_RE_SEARCH) && re_compile(sp, 282 sp->re, sp->re_len, NULL, NULL, &sp->re_c, RE_C_SEARCH)) 283 return (1); 284 return (s(sp, 285 cmdp, cmdp->argc ? cmdp->argv[0]->bp : NULL, &sp->re_c, 0)); 286 } 287 288 /* 289 * s -- 290 * Do the substitution. This stuff is *really* tricky. There are lots of 291 * special cases, and general nastiness. Don't mess with it unless you're 292 * pretty confident. 293 * 294 * The nasty part of the substitution is what happens when the replacement 295 * string contains newlines. It's a bit tricky -- consider the information 296 * that has to be retained for "s/f\(o\)o/^M\1^M\1/". The solution here is 297 * to build a set of newline offsets which we use to break the line up later, 298 * when the replacement is done. Don't change it unless you're *damned* 299 * confident. 300 */ 301 #define NEEDNEWLINE(sp) { \ 302 if ((sp)->newl_len == (sp)->newl_cnt) { \ 303 (sp)->newl_len += 25; \ 304 REALLOCARRAY((sp), (sp)->newl, \ 305 (sp)->newl_len, sizeof(size_t)); \ 306 if ((sp)->newl == NULL) { \ 307 (sp)->newl_len = 0; \ 308 return (1); \ 309 } \ 310 } \ 311 } 312 313 #define BUILD(sp, l, len) { \ 314 if (lbclen + (len) > lblen) { \ 315 lblen += MAXIMUM(lbclen + (len), 256); \ 316 REALLOC((sp), lb, lblen); \ 317 if (lb == NULL) { \ 318 lbclen = 0; \ 319 return (1); \ 320 } \ 321 } \ 322 memcpy(lb + lbclen, (l), (len)); \ 323 lbclen += (len); \ 324 } 325 326 #define NEEDSP(sp, len, pnt) { \ 327 if (lbclen + (len) > lblen) { \ 328 lblen += MAXIMUM(lbclen + (len), 256); \ 329 REALLOC((sp), lb, lblen); \ 330 if (lb == NULL) { \ 331 lbclen = 0; \ 332 return (1); \ 333 } \ 334 (pnt) = lb + lbclen; \ 335 } \ 336 } 337 338 static int 339 s(SCR *sp, EXCMD *cmdp, char *s, regex_t *re, u_int flags) 340 { 341 EVENT ev; 342 MARK from, to; 343 TEXTH tiq; 344 recno_t elno, lno, slno; 345 regmatch_t match[10]; 346 size_t blen, cnt, last, lbclen, lblen, len, llen; 347 size_t offset, saved_offset, scno; 348 int lflag, nflag, pflag, rflag; 349 int didsub, do_eol_match, eflags, nempty, eval; 350 int linechanged, matched, quit, rval; 351 unsigned long ul; 352 char *bp, *lb; 353 354 NEEDFILE(sp, cmdp); 355 356 slno = sp->lno; 357 scno = sp->cno; 358 359 /* 360 * !!! 361 * Historically, the 'g' and 'c' suffices were always toggled as flags, 362 * so ":s/A/B/" was the same as ":s/A/B/ccgg". If O_EDCOMPATIBLE was 363 * not set, they were initialized to 0 for all substitute commands. If 364 * O_EDCOMPATIBLE was set, they were initialized to 0 only if the user 365 * specified substitute/replacement patterns (see ex_s()). 366 */ 367 if (!O_ISSET(sp, O_EDCOMPATIBLE)) 368 sp->c_suffix = sp->g_suffix = 0; 369 370 /* 371 * Historic vi permitted the '#', 'l' and 'p' options in vi mode, but 372 * it only displayed the last change. I'd disallow them, but they are 373 * useful in combination with the [v]global commands. In the current 374 * model the problem is combining them with the 'c' flag -- the screen 375 * would have to flip back and forth between the confirm screen and the 376 * ex print screen, which would be pretty awful. We do display all 377 * changes, though, for what that's worth. 378 * 379 * !!! 380 * Historic vi was fairly strict about the order of "options", the 381 * count, and "flags". I'm somewhat fuzzy on the difference between 382 * options and flags, anyway, so this is a simpler approach, and we 383 * just take it them in whatever order the user gives them. (The ex 384 * usage statement doesn't reflect this.) 385 */ 386 lflag = nflag = pflag = rflag = 0; 387 if (s == NULL) 388 goto noargs; 389 for (lno = OOBLNO; *s != '\0'; ++s) 390 switch (*s) { 391 case ' ': 392 case '\t': 393 continue; 394 case '+': 395 ++cmdp->flagoff; 396 break; 397 case '-': 398 --cmdp->flagoff; 399 break; 400 case '0': case '1': case '2': case '3': case '4': 401 case '5': case '6': case '7': case '8': case '9': 402 if (lno != OOBLNO) 403 goto usage; 404 errno = 0; 405 if ((ul = strtoul(s, &s, 10)) >= UINT_MAX) 406 errno = ERANGE; 407 if (*s == '\0') /* Loop increment correction. */ 408 --s; 409 if (errno == ERANGE) { 410 if (ul >= UINT_MAX) 411 msgq(sp, M_ERR, "Count overflow"); 412 else 413 msgq(sp, M_SYSERR, NULL); 414 return (1); 415 } 416 lno = (recno_t)ul; 417 /* 418 * In historic vi, the count was inclusive from the 419 * second address. 420 */ 421 cmdp->addr1.lno = cmdp->addr2.lno; 422 cmdp->addr2.lno += lno - 1; 423 if (!db_exist(sp, cmdp->addr2.lno) && 424 db_last(sp, &cmdp->addr2.lno)) 425 return (1); 426 break; 427 case '#': 428 nflag = 1; 429 break; 430 case 'c': 431 sp->c_suffix = !sp->c_suffix; 432 433 /* Ex text structure initialization. */ 434 if (F_ISSET(sp, SC_EX)) { 435 memset(&tiq, 0, sizeof(TEXTH)); 436 TAILQ_INIT(&tiq); 437 } 438 break; 439 case 'g': 440 sp->g_suffix = !sp->g_suffix; 441 break; 442 case 'l': 443 lflag = 1; 444 break; 445 case 'p': 446 pflag = 1; 447 break; 448 case 'r': 449 if (LF_ISSET(SUB_FIRST)) { 450 msgq(sp, M_ERR, 451 "Regular expression specified; r flag meaningless"); 452 return (1); 453 } 454 if (!F_ISSET(sp, SC_RE_SEARCH)) { 455 ex_emsg(sp, NULL, EXM_NOPREVRE); 456 return (1); 457 } 458 rflag = 1; 459 re = &sp->re_c; 460 break; 461 default: 462 goto usage; 463 } 464 465 if (*s != '\0' || (!rflag && LF_ISSET(SUB_MUSTSETR))) { 466 usage: ex_emsg(sp, cmdp->cmd->usage, EXM_USAGE); 467 return (1); 468 } 469 470 noargs: if (F_ISSET(sp, SC_VI) && sp->c_suffix && (lflag || nflag || pflag)) { 471 msgq(sp, M_ERR, 472 "The #, l and p flags may not be combined with the c flag in vi mode"); 473 return (1); 474 } 475 476 /* 477 * bp: if interactive, line cache 478 * blen: if interactive, line cache length 479 * lb: build buffer pointer. 480 * lbclen: current length of built buffer. 481 * lblen; length of build buffer. 482 */ 483 bp = lb = NULL; 484 blen = lbclen = lblen = 0; 485 486 /* For each line... */ 487 for (matched = quit = 0, lno = cmdp->addr1.lno, 488 elno = cmdp->addr2.lno; !quit && lno <= elno; ++lno) { 489 490 /* Someone's unhappy, time to stop. */ 491 if (INTERRUPTED(sp)) 492 break; 493 494 /* Get the line. */ 495 if (db_get(sp, lno, DBG_FATAL, &s, &llen)) 496 goto err; 497 498 /* 499 * Make a local copy if doing confirmation -- when calling 500 * the confirm routine we're likely to lose the cached copy. 501 */ 502 if (sp->c_suffix) { 503 if (bp == NULL) { 504 GET_SPACE_RET(sp, bp, blen, llen); 505 } else 506 ADD_SPACE_RET(sp, bp, blen, llen); 507 memcpy(bp, s, llen); 508 s = bp; 509 } 510 511 /* Start searching from the beginning. */ 512 offset = 0; 513 len = llen; 514 515 /* Reset the build buffer offset. */ 516 lbclen = 0; 517 518 /* Reset empty match test variable. */ 519 nempty = -1; 520 521 /* 522 * We don't want to have to do a setline if the line didn't 523 * change -- keep track of whether or not this line changed. 524 * If doing confirmations, don't want to keep setting the 525 * line if change is refused -- keep track of substitutions. 526 */ 527 didsub = linechanged = 0; 528 529 /* New line, do an EOL match. */ 530 do_eol_match = 1; 531 532 /* It's not nul terminated, but we pretend it is. */ 533 eflags = REG_STARTEND; 534 535 /* The search area is from s + offset to the EOL. */ 536 nextmatch: match[0].rm_so = offset; 537 match[0].rm_eo = llen; 538 539 /* Get the next match. */ 540 eval = regexec(re, (char *)s, 10, match, eflags); 541 542 /* 543 * There wasn't a match or if there was an error, deal with 544 * it. If there was a previous match in this line, resolve 545 * the changes into the database. Otherwise, just move on. 546 */ 547 if (eval == REG_NOMATCH) 548 goto endmatch; 549 if (eval != 0) { 550 re_error(sp, eval, re); 551 goto err; 552 } 553 matched = 1; 554 555 /* Only the first search can match an anchored expression. */ 556 eflags |= REG_NOTBOL; 557 558 /* 559 * !!! 560 * It's possible to match 0-length strings -- for example, the 561 * command s;a*;X;, when matched against the string "aabb" will 562 * result in "XbXbX", i.e. the matches are "aa", the space 563 * between the b's and the space between the b's and the end of 564 * the string. There is a similar space between the beginning 565 * of the string and the a's. The rule that we use (because vi 566 * historically used it) is that any 0-length match, occurring 567 * immediately after a match, is ignored. Otherwise, the above 568 * example would have resulted in "XXbXbX". Another example is 569 * incorrectly using " *" to replace groups of spaces with one 570 * space. 571 * 572 * If the match is empty and at the same place as the end of the 573 * previous match, ignore the match and move forward. If 574 * there's no more characters in the string, we were 575 * attempting to match after the last character, so quit. 576 */ 577 if (match[0].rm_so == nempty && match[0].rm_eo == nempty) { 578 nempty = -1; 579 if (len == 0) 580 goto endmatch; 581 BUILD(sp, s + offset, 1) 582 ++offset; 583 --len; 584 goto nextmatch; 585 } 586 587 /* Confirm change. */ 588 if (sp->c_suffix) { 589 /* 590 * Set the cursor position for confirmation. Note, 591 * if we matched on a '$', the cursor may be past 592 * the end of line. 593 */ 594 from.lno = to.lno = lno; 595 from.cno = match[0].rm_so; 596 to.cno = match[0].rm_eo; 597 /* 598 * Both ex and vi have to correct for a change before 599 * the first character in the line. 600 */ 601 if (llen == 0) 602 from.cno = to.cno = 0; 603 if (F_ISSET(sp, SC_VI)) { 604 /* 605 * Only vi has to correct for a change after 606 * the last character in the line. 607 * 608 * XXX 609 * It would be nice to change the vi code so 610 * that we could display a cursor past EOL. 611 */ 612 if (to.cno >= llen) 613 to.cno = llen - 1; 614 if (from.cno >= llen) 615 from.cno = llen - 1; 616 617 sp->lno = from.lno; 618 sp->cno = from.cno; 619 if (vs_refresh(sp, 1)) 620 goto err; 621 622 vs_update(sp, "Confirm change? [n]", NULL); 623 624 if (v_event_get(sp, &ev, 0, 0)) 625 goto err; 626 switch (ev.e_event) { 627 case E_CHARACTER: 628 break; 629 case E_EOF: 630 case E_ERR: 631 case E_INTERRUPT: 632 goto lquit; 633 default: 634 v_event_err(sp, &ev); 635 goto lquit; 636 } 637 } else { 638 if (ex_print(sp, cmdp, &from, &to, 0) || 639 ex_scprint(sp, &from, &to)) 640 goto lquit; 641 if (ex_txt(sp, &tiq, 0, TXT_CR)) 642 goto err; 643 ev.e_c = TAILQ_FIRST(&tiq)->lb[0]; 644 } 645 646 switch (ev.e_c) { 647 case CH_YES: 648 break; 649 default: 650 case CH_NO: 651 didsub = 0; 652 BUILD(sp, s + offset, match[0].rm_eo - offset); 653 goto skip; 654 case CH_QUIT: 655 /* Set the quit/interrupted flags. */ 656 lquit: quit = 1; 657 F_SET(sp->gp, G_INTERRUPTED); 658 659 /* 660 * Resolve any changes, then return to (and 661 * exit from) the main loop. 662 */ 663 goto endmatch; 664 } 665 } 666 667 /* 668 * Set the cursor to the last position changed, converting 669 * from 1-based to 0-based. 670 */ 671 sp->lno = lno; 672 sp->cno = match[0].rm_so; 673 674 /* Copy the bytes before the match into the build buffer. */ 675 BUILD(sp, s + offset, match[0].rm_so - offset); 676 677 /* Substitute the matching bytes. */ 678 didsub = 1; 679 if (re_sub(sp, s, &lb, &lbclen, &lblen, match)) 680 goto err; 681 682 /* Set the change flag so we know this line was modified. */ 683 linechanged = 1; 684 685 /* Move past the matched bytes. */ 686 skip: offset = match[0].rm_eo; 687 len = llen - match[0].rm_eo; 688 689 /* A match cannot be followed by an empty pattern. */ 690 nempty = match[0].rm_eo; 691 692 /* 693 * If doing a global change with confirmation, we have to 694 * update the screen. The basic idea is to store the line 695 * so the screen update routines can find it, and restart. 696 */ 697 if (didsub && sp->c_suffix && sp->g_suffix) { 698 /* 699 * The new search offset will be the end of the 700 * modified line. 701 */ 702 saved_offset = lbclen; 703 704 /* Copy the rest of the line. */ 705 if (len) 706 BUILD(sp, s + offset, len) 707 708 /* Set the new offset. */ 709 offset = saved_offset; 710 711 /* Store inserted lines, adjusting the build buffer. */ 712 last = 0; 713 if (sp->newl_cnt) { 714 for (cnt = 0; 715 cnt < sp->newl_cnt; ++cnt, ++lno, ++elno) { 716 if (db_insert(sp, lno, 717 lb + last, sp->newl[cnt] - last)) 718 goto err; 719 last = sp->newl[cnt] + 1; 720 ++sp->rptlines[L_ADDED]; 721 } 722 lbclen -= last; 723 offset -= last; 724 sp->newl_cnt = 0; 725 } 726 727 /* Store and retrieve the line. */ 728 if (db_set(sp, lno, lb + last, lbclen)) 729 goto err; 730 if (db_get(sp, lno, DBG_FATAL, &s, &llen)) 731 goto err; 732 ADD_SPACE_RET(sp, bp, blen, llen) 733 memcpy(bp, s, llen); 734 s = bp; 735 len = llen - offset; 736 737 /* Restart the build. */ 738 lbclen = 0; 739 BUILD(sp, s, offset); 740 741 /* 742 * If we haven't already done the after-the-string 743 * match, do one. Set REG_NOTEOL so the '$' pattern 744 * only matches once. 745 */ 746 if (!do_eol_match) 747 goto endmatch; 748 if (offset == len) { 749 do_eol_match = 0; 750 eflags |= REG_NOTEOL; 751 } 752 goto nextmatch; 753 } 754 755 /* 756 * If it's a global: 757 * 758 * If at the end of the string, do a test for the after 759 * the string match. Set REG_NOTEOL so the '$' pattern 760 * only matches once. 761 */ 762 if (sp->g_suffix && do_eol_match) { 763 if (len == 0) { 764 do_eol_match = 0; 765 eflags |= REG_NOTEOL; 766 } 767 goto nextmatch; 768 } 769 770 endmatch: if (!linechanged) 771 continue; 772 773 /* Copy any remaining bytes into the build buffer. */ 774 if (len) 775 BUILD(sp, s + offset, len) 776 777 /* Store inserted lines, adjusting the build buffer. */ 778 last = 0; 779 if (sp->newl_cnt) { 780 for (cnt = 0; 781 cnt < sp->newl_cnt; ++cnt, ++lno, ++elno) { 782 if (db_insert(sp, 783 lno, lb + last, sp->newl[cnt] - last)) 784 goto err; 785 last = sp->newl[cnt] + 1; 786 ++sp->rptlines[L_ADDED]; 787 } 788 lbclen -= last; 789 sp->newl_cnt = 0; 790 } 791 792 /* Store the changed line. */ 793 if (db_set(sp, lno, lb + last, lbclen)) 794 goto err; 795 796 /* Update changed line counter. */ 797 if (sp->rptlchange != lno) { 798 sp->rptlchange = lno; 799 ++sp->rptlines[L_CHANGED]; 800 } 801 802 /* 803 * !!! 804 * Display as necessary. Historic practice is to only 805 * display the last line of a line split into multiple 806 * lines. 807 */ 808 if (lflag || nflag || pflag) { 809 from.lno = to.lno = lno; 810 from.cno = to.cno = 0; 811 if (lflag) 812 (void)ex_print(sp, cmdp, &from, &to, E_C_LIST); 813 if (nflag) 814 (void)ex_print(sp, cmdp, &from, &to, E_C_HASH); 815 if (pflag) 816 (void)ex_print(sp, cmdp, &from, &to, E_C_PRINT); 817 } 818 } 819 820 /* 821 * !!! 822 * Historically, vi attempted to leave the cursor at the same place if 823 * the substitution was done at the current cursor position. Otherwise 824 * it moved it to the first non-blank of the last line changed. There 825 * were some problems: for example, :s/$/foo/ with the cursor on the 826 * last character of the line left the cursor on the last character, or 827 * the & command with multiple occurrences of the matching string in the 828 * line usually left the cursor in a fairly random position. 829 * 830 * We try to do the same thing, with the exception that if the user is 831 * doing substitution with confirmation, we move to the last line about 832 * which the user was consulted, as opposed to the last line that they 833 * actually changed. This prevents a screen flash if the user doesn't 834 * change many of the possible lines. 835 */ 836 if (!sp->c_suffix && (sp->lno != slno || sp->cno != scno)) { 837 sp->cno = 0; 838 (void)nonblank(sp, sp->lno, &sp->cno); 839 } 840 841 /* 842 * If not in a global command, and nothing matched, say so. 843 * Else, if none of the lines displayed, put something up. 844 */ 845 rval = 0; 846 if (!matched) { 847 if (!F_ISSET(sp, SC_EX_GLOBAL)) { 848 msgq(sp, M_ERR, "No match found"); 849 goto err; 850 } 851 } else if (!lflag && !nflag && !pflag) 852 F_SET(cmdp, E_AUTOPRINT); 853 854 if (0) { 855 err: rval = 1; 856 } 857 858 if (bp != NULL) 859 FREE_SPACE(sp, bp, blen); 860 if (lb != NULL) 861 free(lb); 862 return (rval); 863 } 864 865 /* 866 * re_compile -- 867 * Compile the RE. 868 * 869 * PUBLIC: int re_compile(SCR *, 870 * PUBLIC: char *, size_t, char **, size_t *, regex_t *, u_int); 871 */ 872 int 873 re_compile(SCR *sp, char *ptrn, size_t plen, char **ptrnp, size_t *lenp, 874 regex_t *rep, u_int flags) 875 { 876 size_t len; 877 int reflags, replaced, rval; 878 char *p; 879 880 /* Set RE flags. */ 881 reflags = 0; 882 if (!LF_ISSET(RE_C_TAG)) { 883 if (O_ISSET(sp, O_EXTENDED)) 884 reflags |= REG_EXTENDED; 885 if (O_ISSET(sp, O_IGNORECASE)) 886 reflags |= REG_ICASE; 887 if (O_ISSET(sp, O_ICLOWER)) { 888 for (p = ptrn, len = plen; len > 0; ++p, --len) 889 if (isupper(*p)) 890 break; 891 if (len == 0) 892 reflags |= REG_ICASE; 893 } 894 } 895 896 /* If we're replacing a saved value, clear the old one. */ 897 if (LF_ISSET(RE_C_SEARCH) && F_ISSET(sp, SC_RE_SEARCH)) { 898 regfree(&sp->re_c); 899 F_CLR(sp, SC_RE_SEARCH); 900 } 901 if (LF_ISSET(RE_C_SUBST) && F_ISSET(sp, SC_RE_SUBST)) { 902 regfree(&sp->subre_c); 903 F_CLR(sp, SC_RE_SUBST); 904 } 905 906 /* 907 * If we're saving the string, it's a pattern we haven't seen before, 908 * so convert the vi-style RE's to POSIX 1003.2 RE's. Save a copy for 909 * later recompilation. Free any previously saved value. 910 */ 911 if (ptrnp != NULL) { 912 if (LF_ISSET(RE_C_TAG)) { 913 if (re_tag_conv(sp, &ptrn, &plen, &replaced)) 914 return (1); 915 } else 916 if (re_conv(sp, &ptrn, &plen, &replaced)) 917 return (1); 918 919 /* Discard previous pattern. */ 920 if (*ptrnp != NULL) { 921 free(*ptrnp); 922 *ptrnp = NULL; 923 } 924 if (lenp != NULL) 925 *lenp = plen; 926 927 /* 928 * Copy the string into allocated memory. 929 * 930 * XXX 931 * Regcomp isn't 8-bit clean, so the pattern is nul-terminated 932 * for now. There's just no other solution. 933 */ 934 MALLOC(sp, *ptrnp, plen + 1); 935 if (*ptrnp != NULL) { 936 memcpy(*ptrnp, ptrn, plen); 937 (*ptrnp)[plen] = '\0'; 938 } 939 940 /* Free up conversion-routine-allocated memory. */ 941 if (replaced) 942 FREE_SPACE(sp, ptrn, 0); 943 944 if (*ptrnp == NULL) 945 return (1); 946 947 ptrn = *ptrnp; 948 } 949 950 /* 951 * XXX 952 * Regcomp isn't 8-bit clean, so we just lost if the pattern 953 * contained a nul. Bummer! 954 */ 955 if ((rval = regcomp(rep, ptrn, /* plen, */ reflags)) != 0) { 956 if (!LF_ISSET(RE_C_SILENT)) 957 re_error(sp, rval, rep); 958 return (1); 959 } 960 961 if (LF_ISSET(RE_C_SEARCH)) 962 F_SET(sp, SC_RE_SEARCH); 963 if (LF_ISSET(RE_C_SUBST)) 964 F_SET(sp, SC_RE_SUBST); 965 966 return (0); 967 } 968 969 /* 970 * re_conv -- 971 * Convert vi's regular expressions into something that the 972 * the POSIX 1003.2 RE functions can handle. 973 * 974 * There are two conversions we make to make vi's RE's (specifically 975 * the global, search, and substitute patterns) work with POSIX RE's. 976 * We assume that \<ptrn\> does "word" searches, which is non-standard 977 * but supported by most regexp libraries.. 978 * 979 * 1: If O_MAGIC is not set, strip backslashes from the magic character 980 * set (.[*~) that have them, and add them to the ones that don't. 981 * 2: If O_MAGIC is not set, the string "\~" is replaced with the text 982 * from the last substitute command's replacement string. If O_MAGIC 983 * is set, it's the string "~". 984 * 985 * !!!/XXX 986 * This doesn't exactly match the historic behavior of vi because we do 987 * the ~ substitution before calling the RE engine, so magic characters 988 * in the replacement string will be expanded by the RE engine, and they 989 * weren't historically. It's a bug. 990 */ 991 static int 992 re_conv(SCR *sp, char **ptrnp, size_t *plenp, int *replacedp) 993 { 994 size_t blen, len, needlen; 995 int magic; 996 char *bp, *p, *t; 997 998 /* 999 * First pass through, we figure out how much space we'll need. 1000 * We do it in two passes, on the grounds that most of the time 1001 * the user is doing a search and won't have magic characters. 1002 * That way we can skip most of the memory allocation and copies. 1003 */ 1004 magic = 0; 1005 for (p = *ptrnp, len = *plenp, needlen = 0; len > 0; ++p, --len) 1006 switch (*p) { 1007 case '\\': 1008 if (len > 1) { 1009 --len; 1010 switch (*++p) { 1011 case '~': 1012 if (!O_ISSET(sp, O_MAGIC)) { 1013 magic = 1; 1014 needlen += sp->repl_len; 1015 } 1016 break; 1017 case '.': 1018 case '[': 1019 case '*': 1020 if (!O_ISSET(sp, O_MAGIC)) { 1021 magic = 1; 1022 needlen += 1; 1023 } 1024 break; 1025 default: 1026 needlen += 2; 1027 } 1028 } else 1029 needlen += 1; 1030 break; 1031 case '~': 1032 if (O_ISSET(sp, O_MAGIC)) { 1033 magic = 1; 1034 needlen += sp->repl_len; 1035 } 1036 break; 1037 case '.': 1038 case '[': 1039 case '*': 1040 if (!O_ISSET(sp, O_MAGIC)) { 1041 magic = 1; 1042 needlen += 2; 1043 } 1044 break; 1045 default: 1046 needlen += 1; 1047 break; 1048 } 1049 1050 if (!magic) { 1051 *replacedp = 0; 1052 return (0); 1053 } 1054 1055 /* Get enough memory to hold the final pattern. */ 1056 *replacedp = 1; 1057 GET_SPACE_RET(sp, bp, blen, needlen); 1058 1059 for (p = *ptrnp, len = *plenp, t = bp; len > 0; ++p, --len) 1060 switch (*p) { 1061 case '\\': 1062 if (len > 1) { 1063 --len; 1064 switch (*++p) { 1065 case '~': 1066 if (O_ISSET(sp, O_MAGIC)) 1067 *t++ = '~'; 1068 else { 1069 memcpy(t, 1070 sp->repl, sp->repl_len); 1071 t += sp->repl_len; 1072 } 1073 break; 1074 case '.': 1075 case '[': 1076 case '*': 1077 if (O_ISSET(sp, O_MAGIC)) 1078 *t++ = '\\'; 1079 *t++ = *p; 1080 break; 1081 default: 1082 *t++ = '\\'; 1083 *t++ = *p; 1084 } 1085 } else 1086 *t++ = '\\'; 1087 break; 1088 case '~': 1089 if (O_ISSET(sp, O_MAGIC)) { 1090 memcpy(t, sp->repl, sp->repl_len); 1091 t += sp->repl_len; 1092 } else 1093 *t++ = '~'; 1094 break; 1095 case '.': 1096 case '[': 1097 case '*': 1098 if (!O_ISSET(sp, O_MAGIC)) 1099 *t++ = '\\'; 1100 *t++ = *p; 1101 break; 1102 default: 1103 *t++ = *p; 1104 break; 1105 } 1106 1107 *ptrnp = bp; 1108 *plenp = t - bp; 1109 return (0); 1110 } 1111 1112 /* 1113 * re_tag_conv -- 1114 * Convert a tags search path into something that the POSIX 1115 * 1003.2 RE functions can handle. 1116 */ 1117 static int 1118 re_tag_conv(SCR *sp, char **ptrnp, size_t *plenp, int *replacedp) 1119 { 1120 size_t blen, len; 1121 int lastdollar; 1122 char *bp, *p, *t; 1123 1124 len = *plenp; 1125 1126 /* Max memory usage is 2 times the length of the string. */ 1127 *replacedp = 1; 1128 GET_SPACE_RET(sp, bp, blen, len * 2); 1129 1130 p = *ptrnp; 1131 t = bp; 1132 1133 /* If the last character is a '/' or '?', we just strip it. */ 1134 if (len > 0 && (p[len - 1] == '/' || p[len - 1] == '?')) 1135 --len; 1136 1137 /* If the next-to-last or last character is a '$', it's magic. */ 1138 if (len > 0 && p[len - 1] == '$') { 1139 --len; 1140 lastdollar = 1; 1141 } else 1142 lastdollar = 0; 1143 1144 /* If the first character is a '/' or '?', we just strip it. */ 1145 if (len > 0 && (p[0] == '/' || p[0] == '?')) { 1146 ++p; 1147 --len; 1148 } 1149 1150 /* If the first or second character is a '^', it's magic. */ 1151 if (p[0] == '^') { 1152 *t++ = *p++; 1153 --len; 1154 } 1155 1156 /* 1157 * Escape every other magic character we can find, meanwhile stripping 1158 * the backslashes ctags inserts when escaping the search delimiter 1159 * characters. 1160 */ 1161 for (; len > 0; --len) { 1162 if (p[0] == '\\' && (p[1] == '/' || p[1] == '?')) { 1163 ++p; 1164 --len; 1165 } else if (strchr("^.[]$*", p[0])) 1166 *t++ = '\\'; 1167 *t++ = *p++; 1168 if (len == 0) 1169 break; 1170 } 1171 if (lastdollar) 1172 *t++ = '$'; 1173 1174 *ptrnp = bp; 1175 *plenp = t - bp; 1176 return (0); 1177 } 1178 1179 /* 1180 * re_error -- 1181 * Report a regular expression error. 1182 * 1183 * PUBLIC: void re_error(SCR *, int, regex_t *); 1184 */ 1185 void 1186 re_error(SCR *sp, int errcode, regex_t *preg) 1187 { 1188 size_t s; 1189 char *oe; 1190 1191 s = regerror(errcode, preg, "", 0); 1192 if ((oe = malloc(s)) == NULL) 1193 msgq(sp, M_SYSERR, NULL); 1194 else { 1195 (void)regerror(errcode, preg, oe, s); 1196 msgq(sp, M_ERR, "RE error: %s", oe); 1197 free(oe); 1198 } 1199 } 1200 1201 /* 1202 * re_sub -- 1203 * Do the substitution for a regular expression. 1204 */ 1205 static int 1206 re_sub(SCR *sp, char *ip, char **lbp, size_t *lbclenp, size_t *lblenp, 1207 regmatch_t match[10]) 1208 { 1209 enum { C_NOTSET, C_LOWER, C_ONELOWER, C_ONEUPPER, C_UPPER } conv; 1210 size_t lbclen, lblen; /* Local copies. */ 1211 size_t mlen; /* Match length. */ 1212 size_t rpl; /* Remaining replacement length. */ 1213 char *rp; /* Replacement pointer. */ 1214 int ch; 1215 int no; /* Match replacement offset. */ 1216 char *p, *t; /* Buffer pointers. */ 1217 char *lb; /* Local copies. */ 1218 1219 lb = *lbp; /* Get local copies. */ 1220 lbclen = *lbclenp; 1221 lblen = *lblenp; 1222 1223 /* 1224 * QUOTING NOTE: 1225 * 1226 * There are some special sequences that vi provides in the 1227 * replacement patterns. 1228 * & string the RE matched (\& if nomagic set) 1229 * \# n-th regular subexpression 1230 * \E end \U, \L conversion 1231 * \e end \U, \L conversion 1232 * \l convert the next character to lower-case 1233 * \L convert to lower-case, until \E, \e, or end of replacement 1234 * \u convert the next character to upper-case 1235 * \U convert to upper-case, until \E, \e, or end of replacement 1236 * 1237 * Otherwise, since this is the lowest level of replacement, discard 1238 * all escaping characters. This (hopefully) matches historic practice. 1239 */ 1240 #define OUTCH(ch, nltrans) { \ 1241 CHAR_T __ch = (ch); \ 1242 u_int __value = KEY_VAL(sp, __ch); \ 1243 if ((nltrans) && (__value == K_CR || __value == K_NL)) { \ 1244 NEEDNEWLINE(sp); \ 1245 sp->newl[sp->newl_cnt++] = lbclen; \ 1246 } else if (conv != C_NOTSET) { \ 1247 switch (conv) { \ 1248 case C_ONELOWER: \ 1249 conv = C_NOTSET; \ 1250 /* FALLTHROUGH */ \ 1251 case C_LOWER: \ 1252 if (isupper(__ch)) \ 1253 __ch = tolower(__ch); \ 1254 break; \ 1255 case C_ONEUPPER: \ 1256 conv = C_NOTSET; \ 1257 /* FALLTHROUGH */ \ 1258 case C_UPPER: \ 1259 if (islower(__ch)) \ 1260 __ch = toupper(__ch); \ 1261 break; \ 1262 default: \ 1263 abort(); \ 1264 } \ 1265 } \ 1266 NEEDSP(sp, 1, p); \ 1267 *p++ = __ch; \ 1268 ++lbclen; \ 1269 } 1270 conv = C_NOTSET; 1271 for (rp = sp->repl, rpl = sp->repl_len, p = lb + lbclen; rpl--;) { 1272 switch (ch = *rp++) { 1273 case '&': 1274 if (O_ISSET(sp, O_MAGIC)) { 1275 no = 0; 1276 goto subzero; 1277 } 1278 break; 1279 case '\\': 1280 if (rpl == 0) 1281 break; 1282 --rpl; 1283 switch (ch = *rp) { 1284 case '&': 1285 ++rp; 1286 if (!O_ISSET(sp, O_MAGIC)) { 1287 no = 0; 1288 goto subzero; 1289 } 1290 break; 1291 case '0': case '1': case '2': case '3': case '4': 1292 case '5': case '6': case '7': case '8': case '9': 1293 no = *rp++ - '0'; 1294 subzero: if (match[no].rm_so == -1 || 1295 match[no].rm_eo == -1) 1296 break; 1297 mlen = match[no].rm_eo - match[no].rm_so; 1298 for (t = ip + match[no].rm_so; mlen--; ++t) 1299 OUTCH(*t, 0); 1300 continue; 1301 case 'e': 1302 case 'E': 1303 ++rp; 1304 conv = C_NOTSET; 1305 continue; 1306 case 'l': 1307 ++rp; 1308 conv = C_ONELOWER; 1309 continue; 1310 case 'L': 1311 ++rp; 1312 conv = C_LOWER; 1313 continue; 1314 case 'u': 1315 ++rp; 1316 conv = C_ONEUPPER; 1317 continue; 1318 case 'U': 1319 ++rp; 1320 conv = C_UPPER; 1321 continue; 1322 default: 1323 ++rp; 1324 break; 1325 } 1326 } 1327 OUTCH(ch, 1); 1328 } 1329 1330 *lbp = lb; /* Update caller's information. */ 1331 *lbclenp = lbclen; 1332 *lblenp = lblen; 1333 return (0); 1334 } 1335