1 /* $NetBSD: search.c,v 1.5 2023/02/13 23:08:43 gutteridge Exp $ */ 2 /*- 3 * Copyright (c) 1992, 1993, 1994 4 * The Regents of the University of California. All rights reserved. 5 * Copyright (c) 1992, 1993, 1994, 1995, 1996 6 * Keith Bostic. All rights reserved. 7 * 8 * See the LICENSE file for redistribution information. 9 */ 10 11 #include "config.h" 12 13 #include <sys/cdefs.h> 14 #if 0 15 #ifndef lint 16 static const char sccsid[] = "Id: search.c,v 10.31 2001/06/25 15:19:12 skimo Exp (Berkeley) Date: 2001/06/25 15:19:12 "; 17 #endif /* not lint */ 18 #else 19 __RCSID("$NetBSD: search.c,v 1.5 2023/02/13 23:08:43 gutteridge Exp $"); 20 #endif 21 22 #include <sys/types.h> 23 #include <sys/queue.h> 24 25 #include <bitstring.h> 26 #include <ctype.h> 27 #include <errno.h> 28 #include <limits.h> 29 #include <stdio.h> 30 #include <stdlib.h> 31 #include <string.h> 32 #include <unistd.h> 33 34 #include "common.h" 35 36 typedef enum { S_EMPTY, S_EOF, S_NOPREV, S_NOTFOUND, S_SOF, S_WRAP } smsg_t; 37 38 static void search_msg __P((SCR *, smsg_t)); 39 static int search_init __P((SCR *, dir_t, CHAR_T *, size_t, CHAR_T **, u_int)); 40 41 /* 42 * search_init -- 43 * Set up a search. 44 */ 45 static int 46 search_init(SCR *sp, dir_t dir, CHAR_T *ptrn, size_t plen, CHAR_T **epp, u_int flags) 47 { 48 db_recno_t lno; 49 int delim; 50 CHAR_T *p, *t; 51 52 /* If the file is empty, it's a fast search. */ 53 if (sp->lno <= 1) { 54 if (db_last(sp, &lno)) 55 return (1); 56 if (lno == 0) { 57 if (LF_ISSET(SEARCH_MSG)) 58 search_msg(sp, S_EMPTY); 59 return (1); 60 } 61 } 62 63 if (LF_ISSET(SEARCH_PARSE)) { /* Parse the string. */ 64 /* 65 * Use the saved pattern if no pattern specified, or if only 66 * one or two delimiter characters specified. 67 * 68 * !!! 69 * Historically, only the pattern itself was saved, vi didn't 70 * preserve addressing or delta information. 71 */ 72 if (ptrn == NULL) 73 goto prev; 74 if (plen == 1) { 75 if (epp != NULL) 76 *epp = ptrn + 1; 77 goto prev; 78 } 79 if (ptrn[0] == ptrn[1]) { 80 if (epp != NULL) 81 *epp = ptrn + 2; 82 83 /* Complain if we don't have a previous pattern. */ 84 prev: if (sp->re == NULL) { 85 search_msg(sp, S_NOPREV); 86 return (1); 87 } 88 /* Re-compile the search pattern if necessary. */ 89 if (!F_ISSET(sp, SC_RE_SEARCH) && re_compile(sp, 90 sp->re, sp->re_len, NULL, NULL, &sp->re_c, 91 SEARCH_CSEARCH | SEARCH_MSG)) 92 return (1); 93 94 /* Set the search direction. */ 95 if (LF_ISSET(SEARCH_SET)) 96 sp->searchdir = dir; 97 return (0); 98 } 99 100 /* 101 * Set the delimiter, and move forward to the terminating 102 * delimiter, handling escaped delimiters. 103 * 104 * QUOTING NOTE: 105 * Only discard an escape character if it escapes a delimiter. 106 */ 107 for (delim = *ptrn, p = t = ++ptrn;; *t++ = *p++) { 108 if (--plen == 0 || p[0] == delim) { 109 if (plen != 0) 110 ++p; 111 break; 112 } 113 if (plen > 1 && p[0] == '\\') { 114 if(p[1] == delim) { 115 ++p; 116 --plen; 117 } else if(p[1] == '\\') { 118 *t++ = *p++; 119 --plen; 120 } 121 } 122 } 123 if (epp != NULL) 124 *epp = p; 125 126 plen = t - ptrn; 127 } 128 129 /* Compile the RE. */ 130 if (re_compile(sp, ptrn, plen, &sp->re, &sp->re_len, &sp->re_c, 131 SEARCH_CSEARCH | LF_ISSET(SEARCH_CSCOPE | SEARCH_EXTEND | 132 SEARCH_IC | SEARCH_LITERAL | SEARCH_MSG | SEARCH_TAG))) 133 return (1); 134 135 /* Set the search direction. */ 136 if (LF_ISSET(SEARCH_SET)) 137 sp->searchdir = dir; 138 139 return (0); 140 } 141 142 /* 143 * f_search -- 144 * Do a forward search. 145 * 146 * PUBLIC: int f_search __P((SCR *, 147 * PUBLIC: MARK *, MARK *, CHAR_T *, size_t, CHAR_T **, u_int)); 148 */ 149 int 150 f_search(SCR *sp, MARK *fm, MARK *rm, CHAR_T *ptrn, size_t plen, CHAR_T **eptrn, u_int flags) 151 { 152 busy_t btype; 153 db_recno_t lno; 154 regmatch_t match[1]; 155 size_t coff, len; 156 int cnt, eval, rval, wrapped = 0; 157 CHAR_T *l; 158 159 if (search_init(sp, FORWARD, ptrn, plen, eptrn, flags)) 160 return (1); 161 162 /* Figure out if we're going to wrap. */ 163 if (!LF_ISSET(SEARCH_NOOPT) && O_ISSET(sp, O_WRAPSCAN)) 164 LF_SET(SEARCH_WRAP); 165 166 if (LF_ISSET(SEARCH_FIRST)) { 167 lno = 1; 168 coff = 0; 169 } else { 170 if (db_get(sp, fm->lno, DBG_FATAL, &l, &len)) 171 return (1); 172 lno = fm->lno; 173 174 /* 175 * If doing incremental search, start searching at the previous 176 * column, so that we search a minimal distance and still match 177 * special patterns, e.g., \< for beginning of a word. 178 * 179 * Otherwise, start searching immediately after the cursor. If 180 * at the end of the line, start searching on the next line. 181 * This is incompatible (read bug fix) with the historic vi -- 182 * searches for the '$' pattern never moved forward, and the 183 * "-t foo" didn't work if the 'f' was the first character in 184 * the file. 185 */ 186 if (LF_ISSET(SEARCH_INCR)) { 187 if ((coff = fm->cno) != 0) 188 --coff; 189 } else if (fm->cno + 1 >= len) { 190 coff = 0; 191 lno = fm->lno + 1; 192 if (db_get(sp, lno, 0, &l, &len)) { 193 if (!LF_ISSET(SEARCH_WRAP)) { 194 if (LF_ISSET(SEARCH_MSG)) 195 search_msg(sp, S_EOF); 196 return (1); 197 } 198 lno = 1; 199 wrapped = 1; 200 } 201 } else 202 coff = fm->cno + 1; 203 } 204 205 btype = BUSY_ON; 206 for (cnt = INTERRUPT_CHECK, rval = 1;; ++lno, coff = 0) { 207 if (cnt-- == 0) { 208 if (INTERRUPTED(sp)) 209 break; 210 if (LF_ISSET(SEARCH_MSG)) { 211 search_busy(sp, btype); 212 btype = BUSY_UPDATE; 213 } 214 cnt = INTERRUPT_CHECK; 215 } 216 if ((wrapped && lno > fm->lno) || 217 db_get(sp, lno, 0, &l, &len)) { 218 if (wrapped) { 219 if (LF_ISSET(SEARCH_MSG)) 220 search_msg(sp, S_NOTFOUND); 221 break; 222 } 223 if (!LF_ISSET(SEARCH_WRAP)) { 224 if (LF_ISSET(SEARCH_MSG)) 225 search_msg(sp, S_EOF); 226 break; 227 } 228 lno = 0; 229 wrapped = 1; 230 continue; 231 } 232 233 /* If already at EOL, just keep going. */ 234 if (len != 0 && coff == len) 235 continue; 236 237 /* Set the termination. */ 238 match[0].rm_so = coff; 239 match[0].rm_eo = len; 240 241 #if defined(DEBUG) && 0 242 vtrace(sp, "F search: %lu from %u to %u\n", 243 lno, coff, len != 0 ? len - 1 : len); 244 #endif 245 /* Search the line. */ 246 eval = regexec(&sp->re_c, l, 1, match, 247 (match[0].rm_so == 0 ? 0 : REG_NOTBOL) | REG_STARTEND); 248 if (eval == REG_NOMATCH) 249 continue; 250 if (eval != 0) { 251 if (LF_ISSET(SEARCH_MSG)) 252 re_error(sp, eval, &sp->re_c); 253 else 254 (void)sp->gp->scr_bell(sp); 255 break; 256 } 257 258 /* Warn if the search wrapped. */ 259 if (wrapped && LF_ISSET(SEARCH_WMSG)) 260 search_msg(sp, S_WRAP); 261 262 #if defined(DEBUG) && 0 263 vtrace(sp, "F search: %qu to %qu\n", 264 match[0].rm_so, match[0].rm_eo); 265 #endif 266 rm->lno = lno; 267 rm->cno = match[0].rm_so; 268 269 /* 270 * If a change command, it's possible to move beyond the end 271 * of a line. Historic vi generally got this wrong (e.g. try 272 * "c?$<cr>"). Not all that sure this gets it right, there 273 * are lots of strange cases. 274 */ 275 if (!LF_ISSET(SEARCH_EOL) && rm->cno >= len) 276 rm->cno = len != 0 ? len - 1 : 0; 277 278 rval = 0; 279 break; 280 } 281 282 if (LF_ISSET(SEARCH_MSG)) 283 search_busy(sp, BUSY_OFF); 284 return (rval); 285 } 286 287 /* 288 * b_search -- 289 * Do a backward search. 290 * 291 * PUBLIC: int b_search __P((SCR *, 292 * PUBLIC: MARK *, MARK *, CHAR_T *, size_t, CHAR_T **, u_int)); 293 */ 294 int 295 b_search(SCR *sp, MARK *fm, MARK *rm, CHAR_T *ptrn, size_t plen, CHAR_T **eptrn, u_int flags) 296 { 297 busy_t btype; 298 db_recno_t lno; 299 regmatch_t match[1]; 300 size_t coff, last, len; 301 int cnt, eval, rval, wrapped; 302 CHAR_T *l; 303 304 if (search_init(sp, BACKWARD, ptrn, plen, eptrn, flags)) 305 return (1); 306 307 /* Figure out if we're going to wrap. */ 308 if (!LF_ISSET(SEARCH_NOOPT) && O_ISSET(sp, O_WRAPSCAN)) 309 LF_SET(SEARCH_WRAP); 310 311 /* 312 * If doing incremental search, set the "starting" position past the 313 * current column, so that we search a minimal distance and still 314 * match special patterns, e.g., \> for the end of a word. This is 315 * safe when the cursor is at the end of a line because we only use 316 * it for comparison with the location of the match. 317 * 318 * Otherwise, start searching immediately before the cursor. If in 319 * the first column, start search on the previous line. 320 */ 321 if (LF_ISSET(SEARCH_INCR)) { 322 lno = fm->lno; 323 coff = fm->cno + 1; 324 } else { 325 if (fm->cno == 0) { 326 if (fm->lno == 1 && !LF_ISSET(SEARCH_WRAP)) { 327 if (LF_ISSET(SEARCH_MSG)) 328 search_msg(sp, S_SOF); 329 return (1); 330 } 331 lno = fm->lno - 1; 332 } else 333 lno = fm->lno; 334 coff = fm->cno; 335 } 336 337 btype = BUSY_ON; 338 for (cnt = INTERRUPT_CHECK, rval = 1, wrapped = 0;; --lno, coff = 0) { 339 if (cnt-- == 0) { 340 if (INTERRUPTED(sp)) 341 break; 342 if (LF_ISSET(SEARCH_MSG)) { 343 search_busy(sp, btype); 344 btype = BUSY_UPDATE; 345 } 346 cnt = INTERRUPT_CHECK; 347 } 348 if ((wrapped && lno < fm->lno) || lno == 0) { 349 if (wrapped) { 350 if (LF_ISSET(SEARCH_MSG)) 351 search_msg(sp, S_NOTFOUND); 352 break; 353 } 354 if (!LF_ISSET(SEARCH_WRAP)) { 355 if (LF_ISSET(SEARCH_MSG)) 356 search_msg(sp, S_SOF); 357 break; 358 } 359 if (db_last(sp, &lno)) 360 break; 361 if (lno == 0) { 362 if (LF_ISSET(SEARCH_MSG)) 363 search_msg(sp, S_EMPTY); 364 break; 365 } 366 ++lno; 367 wrapped = 1; 368 continue; 369 } 370 371 if (db_get(sp, lno, 0, &l, &len)) 372 break; 373 374 /* Set the termination. */ 375 match[0].rm_so = 0; 376 match[0].rm_eo = len; 377 378 #if defined(DEBUG) && 0 379 vtrace(sp, 380 "B search: %lu from 0 to %qu\n", lno, match[0].rm_eo); 381 #endif 382 /* Search the line. */ 383 eval = regexec(&sp->re_c, l, 1, match, 384 ((size_t)match[0].rm_eo == len ? 0 : REG_NOTEOL) | REG_STARTEND); 385 if (eval == REG_NOMATCH) 386 continue; 387 if (eval != 0) { 388 if (LF_ISSET(SEARCH_MSG)) 389 re_error(sp, eval, &sp->re_c); 390 else 391 (void)sp->gp->scr_bell(sp); 392 break; 393 } 394 395 /* Check for a match starting past the cursor. */ 396 if (coff != 0 && (size_t)match[0].rm_so >= coff) 397 continue; 398 399 /* Warn if the search wrapped. */ 400 if (wrapped && LF_ISSET(SEARCH_WMSG)) 401 search_msg(sp, S_WRAP); 402 403 #if defined(DEBUG) && 0 404 vtrace(sp, "B found: %qu to %qu\n", 405 match[0].rm_so, match[0].rm_eo); 406 #endif 407 /* 408 * We now have the first match on the line. Step through the 409 * line character by character until find the last acceptable 410 * match. This is painful, we need a better interface to regex 411 * to make this work. 412 */ 413 for (;;) { 414 last = match[0].rm_so++; 415 if ((size_t)match[0].rm_so >= len) 416 break; 417 match[0].rm_eo = len; 418 eval = regexec(&sp->re_c, l, 1, match, 419 (match[0].rm_so == 0 ? 0 : REG_NOTBOL) | 420 REG_STARTEND); 421 if (eval == REG_NOMATCH) 422 break; 423 if (eval != 0) { 424 if (LF_ISSET(SEARCH_MSG)) 425 re_error(sp, eval, &sp->re_c); 426 else 427 (void)sp->gp->scr_bell(sp); 428 goto err; 429 } 430 if (coff && (size_t)match[0].rm_so >= coff) 431 break; 432 } 433 rm->lno = lno; 434 435 /* See comment in f_search(). */ 436 if (!LF_ISSET(SEARCH_EOL) && last >= len) 437 rm->cno = len != 0 ? len - 1 : 0; 438 else 439 rm->cno = last; 440 rval = 0; 441 break; 442 } 443 444 err: if (LF_ISSET(SEARCH_MSG)) 445 search_busy(sp, BUSY_OFF); 446 return (rval); 447 } 448 449 /* 450 * search_msg -- 451 * Display one of the search messages. 452 */ 453 static void 454 search_msg(SCR *sp, smsg_t msg) 455 { 456 switch (msg) { 457 case S_EMPTY: 458 msgq(sp, M_ERR, "072|File empty; nothing to search"); 459 break; 460 case S_EOF: 461 msgq(sp, M_ERR, 462 "073|Reached end-of-file without finding the pattern"); 463 break; 464 case S_NOPREV: 465 msgq(sp, M_ERR, "074|No previous search pattern"); 466 break; 467 case S_NOTFOUND: 468 msgq(sp, M_ERR, "075|Pattern not found"); 469 break; 470 case S_SOF: 471 msgq(sp, M_ERR, 472 "076|Reached top-of-file without finding the pattern"); 473 break; 474 case S_WRAP: 475 msgq(sp, M_ERR, "077|Search wrapped"); 476 break; 477 default: 478 abort(); 479 } 480 } 481 482 /* 483 * search_busy -- 484 * Put up the busy searching message. 485 * 486 * PUBLIC: void search_busy __P((SCR *, busy_t)); 487 */ 488 void 489 search_busy(SCR *sp, busy_t btype) 490 { 491 sp->gp->scr_busy(sp, "078|Searching...", btype); 492 } 493