1 /* $OpenBSD: search.c,v 1.15 2022/12/10 16:06:18 millert Exp $ */
2
3 /*-
4 * Copyright (c) 1992, 1993, 1994
5 * The Regents of the University of California. All rights reserved.
6 * Copyright (c) 1992, 1993, 1994, 1995, 1996
7 * Keith Bostic. All rights reserved.
8 *
9 * See the LICENSE file for redistribution information.
10 */
11
12 #include "config.h"
13
14 #include <sys/types.h>
15 #include <sys/queue.h>
16
17 #include <bitstring.h>
18 #include <ctype.h>
19 #include <errno.h>
20 #include <limits.h>
21 #include <stdio.h>
22 #include <stdlib.h>
23 #include <string.h>
24 #include <unistd.h>
25
26 #include "common.h"
27
28 typedef enum { S_EMPTY, S_EOF, S_NOPREV, S_NOTFOUND, S_SOF, S_WRAP } smsg_t;
29
30 static void search_msg(SCR *, smsg_t);
31 static int search_init(SCR *, dir_t, char *, size_t, char **, u_int);
32
33 /*
34 * search_init --
35 * Set up a search.
36 */
37 static int
search_init(SCR * sp,dir_t dir,char * ptrn,size_t plen,char ** epp,u_int flags)38 search_init(SCR *sp, dir_t dir, char *ptrn, size_t plen, char **epp,
39 u_int flags)
40 {
41 recno_t lno;
42 int delim;
43 char *p, *t;
44
45 /* If the file is empty, it's a fast search. */
46 if (sp->lno <= 1) {
47 if (db_last(sp, &lno))
48 return (1);
49 if (lno == 0) {
50 if (LF_ISSET(SEARCH_MSG))
51 search_msg(sp, S_EMPTY);
52 return (1);
53 }
54 }
55
56 if (LF_ISSET(SEARCH_PARSE)) { /* Parse the string. */
57 /*
58 * Use the saved pattern if no pattern specified, or if only
59 * one or two delimiter characters specified.
60 *
61 * !!!
62 * Historically, only the pattern itself was saved, vi didn't
63 * preserve addressing or delta information.
64 */
65 if (ptrn == NULL)
66 goto prev;
67 if (plen == 1) {
68 if (epp != NULL)
69 *epp = ptrn + 1;
70 goto prev;
71 }
72 if (ptrn[0] == ptrn[1]) {
73 if (epp != NULL)
74 *epp = ptrn + 2;
75
76 /* Complain if we don't have a previous pattern. */
77 prev: if (sp->re == NULL) {
78 search_msg(sp, S_NOPREV);
79 return (1);
80 }
81 /* Re-compile the search pattern if necessary. */
82 if (!F_ISSET(sp, SC_RE_SEARCH) && re_compile(sp,
83 sp->re, sp->re_len, NULL, NULL, &sp->re_c,
84 RE_C_SEARCH |
85 (LF_ISSET(SEARCH_MSG) ? 0 : RE_C_SILENT)))
86 return (1);
87
88 /* Set the search direction. */
89 if (LF_ISSET(SEARCH_SET))
90 sp->searchdir = dir;
91 return (0);
92 }
93
94 /*
95 * Set the delimiter, and move forward to the terminating
96 * delimiter, handling escaped delimiters.
97 *
98 * QUOTING NOTE:
99 * Only discard an escape character if it escapes a delimiter.
100 */
101 for (delim = *ptrn, p = t = ++ptrn;; *t++ = *p++) {
102 if (--plen == 0 || p[0] == delim) {
103 if (plen != 0)
104 ++p;
105 break;
106 }
107 if (plen > 1 && p[0] == '\\') {
108 if (p[1] == delim) {
109 ++p;
110 --plen;
111 } else if (p[1] == '\\') {
112 *t++ = *p++;
113 --plen;
114 }
115 }
116 }
117 if (epp != NULL)
118 *epp = p;
119
120 plen = t - ptrn;
121 }
122
123 /* Compile the RE. */
124 if (re_compile(sp, ptrn, plen, &sp->re, &sp->re_len, &sp->re_c,
125 RE_C_SEARCH |
126 (LF_ISSET(SEARCH_MSG) ? 0 : RE_C_SILENT) |
127 (LF_ISSET(SEARCH_TAG) ? RE_C_TAG : 0)))
128 return (1);
129
130 /* Set the search direction. */
131 if (LF_ISSET(SEARCH_SET))
132 sp->searchdir = dir;
133
134 return (0);
135 }
136
137 /*
138 * f_search --
139 * Do a forward search.
140 *
141 * PUBLIC: int f_search(SCR *, MARK *, MARK *, char *, size_t, char **, u_int);
142 */
143 int
f_search(SCR * sp,MARK * fm,MARK * rm,char * ptrn,size_t plen,char ** eptrn,u_int flags)144 f_search(SCR *sp, MARK *fm, MARK *rm, char *ptrn, size_t plen, char **eptrn,
145 u_int flags)
146 {
147 busy_t btype;
148 recno_t lno;
149 regmatch_t match[1];
150 size_t coff, len;
151 int cnt, eval, rval, wrapped = 0;
152 char *l;
153
154 if (search_init(sp, FORWARD, ptrn, plen, eptrn, flags))
155 return (1);
156
157 if (LF_ISSET(SEARCH_FILE)) {
158 lno = 1;
159 coff = 0;
160 } else {
161 if (db_get(sp, fm->lno, DBG_FATAL, &l, &len))
162 return (1);
163 lno = fm->lno;
164
165 /*
166 * If doing incremental search, start searching at the previous
167 * column, so that we search a minimal distance and still match
168 * special patterns, e.g., \< for beginning of a word.
169 *
170 * Otherwise, start searching immediately after the cursor. If
171 * at the end of the line, start searching on the next line.
172 * This is incompatible (read bug fix) with the historic vi --
173 * searches for the '$' pattern never moved forward, and the
174 * "-t foo" didn't work if the 'f' was the first character in
175 * the file.
176 */
177 if (LF_ISSET(SEARCH_INCR)) {
178 if ((coff = fm->cno) != 0)
179 --coff;
180 } else if (fm->cno + 1 >= len) {
181 coff = 0;
182 lno = fm->lno + 1;
183 if (db_get(sp, lno, 0, &l, &len)) {
184 if (!O_ISSET(sp, O_WRAPSCAN)) {
185 if (LF_ISSET(SEARCH_MSG))
186 search_msg(sp, S_EOF);
187 return (1);
188 }
189 lno = 1;
190 wrapped = 1;
191 }
192 } else
193 coff = fm->cno + 1;
194 }
195
196 btype = BUSY_ON;
197 for (cnt = INTERRUPT_CHECK, rval = 1;; ++lno, coff = 0) {
198 if (cnt-- == 0) {
199 if (INTERRUPTED(sp))
200 break;
201 if (LF_ISSET(SEARCH_MSG)) {
202 search_busy(sp, btype);
203 btype = BUSY_UPDATE;
204 }
205 cnt = INTERRUPT_CHECK;
206 }
207 if ((wrapped && lno > fm->lno) || db_get(sp, lno, 0, &l, &len)) {
208 if (wrapped) {
209 if (LF_ISSET(SEARCH_MSG))
210 search_msg(sp, S_NOTFOUND);
211 break;
212 }
213 if (!O_ISSET(sp, O_WRAPSCAN)) {
214 if (LF_ISSET(SEARCH_MSG))
215 search_msg(sp, S_EOF);
216 break;
217 }
218 lno = 0;
219 wrapped = 1;
220 continue;
221 }
222
223 /* If already at EOL, just keep going. */
224 if (len != 0 && coff == len)
225 continue;
226
227 /* Set the termination. */
228 match[0].rm_so = coff;
229 match[0].rm_eo = len;
230
231 /* Search the line. */
232 eval = regexec(&sp->re_c, l, 1, match,
233 (match[0].rm_so == 0 ? 0 : REG_NOTBOL) | REG_STARTEND);
234 if (eval == REG_NOMATCH)
235 continue;
236 if (eval != 0) {
237 if (LF_ISSET(SEARCH_MSG))
238 re_error(sp, eval, &sp->re_c);
239 else
240 (void)sp->gp->scr_bell(sp);
241 break;
242 }
243
244 /* Warn if the search wrapped. */
245 if (wrapped && LF_ISSET(SEARCH_WMSG))
246 search_msg(sp, S_WRAP);
247
248 rm->lno = lno;
249 rm->cno = match[0].rm_so;
250
251 /*
252 * If a change command, it's possible to move beyond the end
253 * of a line. Historic vi generally got this wrong (e.g. try
254 * "c?$<cr>"). Not all that sure this gets it right, there
255 * are lots of strange cases.
256 */
257 if (!LF_ISSET(SEARCH_EOL) && rm->cno >= len)
258 rm->cno = len != 0 ? len - 1 : 0;
259
260 rval = 0;
261 break;
262 }
263
264 if (LF_ISSET(SEARCH_MSG))
265 search_busy(sp, BUSY_OFF);
266 return (rval);
267 }
268
269 /*
270 * b_search --
271 * Do a backward search.
272 *
273 * PUBLIC: int b_search(SCR *, MARK *, MARK *, char *, size_t, char **, u_int);
274 */
275 int
b_search(SCR * sp,MARK * fm,MARK * rm,char * ptrn,size_t plen,char ** eptrn,u_int flags)276 b_search(SCR *sp, MARK *fm, MARK *rm, char *ptrn, size_t plen, char **eptrn,
277 u_int flags)
278 {
279 busy_t btype;
280 recno_t lno;
281 regmatch_t match[1];
282 size_t coff, last, len;
283 int cnt, eval, rval, wrapped;
284 char *l;
285
286 if (search_init(sp, BACKWARD, ptrn, plen, eptrn, flags))
287 return (1);
288
289 /*
290 * If doing incremental search, set the "starting" position past the
291 * current column, so that we search a minimal distance and still
292 * match special patterns, e.g., \> for the end of a word. This is
293 * safe when the cursor is at the end of a line because we only use
294 * it for comparison with the location of the match.
295 *
296 * Otherwise, start searching immediately before the cursor. If in
297 * the first column, start search on the previous line.
298 */
299 if (LF_ISSET(SEARCH_INCR)) {
300 lno = fm->lno;
301 coff = fm->cno + 1;
302 } else {
303 if (fm->cno == 0) {
304 if (fm->lno == 1 && !O_ISSET(sp, O_WRAPSCAN)) {
305 if (LF_ISSET(SEARCH_MSG))
306 search_msg(sp, S_SOF);
307 return (1);
308 }
309 lno = fm->lno - 1;
310 } else
311 lno = fm->lno;
312 coff = fm->cno;
313 }
314
315 btype = BUSY_ON;
316 for (cnt = INTERRUPT_CHECK, rval = 1, wrapped = 0;; --lno, coff = 0) {
317 if (cnt-- == 0) {
318 if (INTERRUPTED(sp))
319 break;
320 if (LF_ISSET(SEARCH_MSG)) {
321 search_busy(sp, btype);
322 btype = BUSY_UPDATE;
323 }
324 cnt = INTERRUPT_CHECK;
325 }
326 if ((wrapped && lno < fm->lno) || lno == 0) {
327 if (wrapped) {
328 if (LF_ISSET(SEARCH_MSG))
329 search_msg(sp, S_NOTFOUND);
330 break;
331 }
332 if (!O_ISSET(sp, O_WRAPSCAN)) {
333 if (LF_ISSET(SEARCH_MSG))
334 search_msg(sp, S_SOF);
335 break;
336 }
337 if (db_last(sp, &lno))
338 break;
339 if (lno == 0) {
340 if (LF_ISSET(SEARCH_MSG))
341 search_msg(sp, S_EMPTY);
342 break;
343 }
344 ++lno;
345 wrapped = 1;
346 continue;
347 }
348
349 if (db_get(sp, lno, 0, &l, &len))
350 break;
351
352 /* Set the termination. */
353 match[0].rm_so = 0;
354 match[0].rm_eo = len;
355
356 /* Search the line. */
357 eval = regexec(&sp->re_c, l, 1, match,
358 (match[0].rm_eo == len ? 0 : REG_NOTEOL) | REG_STARTEND);
359 if (eval == REG_NOMATCH)
360 continue;
361 if (eval != 0) {
362 if (LF_ISSET(SEARCH_MSG))
363 re_error(sp, eval, &sp->re_c);
364 else
365 (void)sp->gp->scr_bell(sp);
366 break;
367 }
368
369 /* Check for a match starting past the cursor. */
370 if (coff != 0 && match[0].rm_so >= coff)
371 continue;
372
373 /* Warn if the search wrapped. */
374 if (wrapped && LF_ISSET(SEARCH_WMSG))
375 search_msg(sp, S_WRAP);
376
377 /*
378 * We now have the first match on the line. Step through the
379 * line character by character until find the last acceptable
380 * match. This is painful, we need a better interface to regex
381 * to make this work.
382 */
383 for (;;) {
384 last = match[0].rm_so++;
385 if (match[0].rm_so >= len)
386 break;
387 match[0].rm_eo = len;
388 eval = regexec(&sp->re_c, l, 1, match,
389 (match[0].rm_so == 0 ? 0 : REG_NOTBOL) |
390 REG_STARTEND);
391 if (eval == REG_NOMATCH)
392 break;
393 if (eval != 0) {
394 if (LF_ISSET(SEARCH_MSG))
395 re_error(sp, eval, &sp->re_c);
396 else
397 (void)sp->gp->scr_bell(sp);
398 goto err;
399 }
400 if (coff && match[0].rm_so >= coff)
401 break;
402 }
403 rm->lno = lno;
404
405 /* See comment in f_search(). */
406 if (!LF_ISSET(SEARCH_EOL) && last >= len)
407 rm->cno = len != 0 ? len - 1 : 0;
408 else
409 rm->cno = last;
410 rval = 0;
411 break;
412 }
413
414 err: if (LF_ISSET(SEARCH_MSG))
415 search_busy(sp, BUSY_OFF);
416 return (rval);
417 }
418
419 /*
420 * search_msg --
421 * Display one of the search messages.
422 */
423 static void
search_msg(SCR * sp,smsg_t msg)424 search_msg(SCR *sp, smsg_t msg)
425 {
426 switch (msg) {
427 case S_EMPTY:
428 msgq(sp, M_ERR, "File empty; nothing to search");
429 break;
430 case S_EOF:
431 msgq(sp, M_ERR,
432 "Reached end-of-file without finding the pattern");
433 break;
434 case S_NOPREV:
435 msgq(sp, M_ERR, "No previous search pattern");
436 break;
437 case S_NOTFOUND:
438 msgq(sp, M_ERR, "Pattern not found");
439 break;
440 case S_SOF:
441 msgq(sp, M_ERR,
442 "Reached top-of-file without finding the pattern");
443 break;
444 case S_WRAP:
445 msgq(sp, M_ERR, "Search wrapped");
446 break;
447 default:
448 abort();
449 }
450 }
451
452 /*
453 * search_busy --
454 * Put up the busy searching message.
455 *
456 * PUBLIC: void search_busy(SCR *, busy_t);
457 */
458 void
search_busy(SCR * sp,busy_t btype)459 search_busy(SCR *sp, busy_t btype)
460 {
461 sp->gp->scr_busy(sp, "Searching...", btype);
462 }
463