xref: /openbsd-src/usr.bin/vi/common/search.c (revision a64626f4d97a9338d0f4af8649e97dc3870beb0a)
1 /*	$OpenBSD: search.c,v 1.15 2022/12/10 16:06:18 millert Exp $	*/
2 
3 /*-
4  * Copyright (c) 1992, 1993, 1994
5  *	The Regents of the University of California.  All rights reserved.
6  * Copyright (c) 1992, 1993, 1994, 1995, 1996
7  *	Keith Bostic.  All rights reserved.
8  *
9  * See the LICENSE file for redistribution information.
10  */
11 
12 #include "config.h"
13 
14 #include <sys/types.h>
15 #include <sys/queue.h>
16 
17 #include <bitstring.h>
18 #include <ctype.h>
19 #include <errno.h>
20 #include <limits.h>
21 #include <stdio.h>
22 #include <stdlib.h>
23 #include <string.h>
24 #include <unistd.h>
25 
26 #include "common.h"
27 
28 typedef enum { S_EMPTY, S_EOF, S_NOPREV, S_NOTFOUND, S_SOF, S_WRAP } smsg_t;
29 
30 static void	search_msg(SCR *, smsg_t);
31 static int	search_init(SCR *, dir_t, char *, size_t, char **, u_int);
32 
33 /*
34  * search_init --
35  *	Set up a search.
36  */
37 static int
search_init(SCR * sp,dir_t dir,char * ptrn,size_t plen,char ** epp,u_int flags)38 search_init(SCR *sp, dir_t dir, char *ptrn, size_t plen, char **epp,
39     u_int flags)
40 {
41 	recno_t lno;
42 	int delim;
43 	char *p, *t;
44 
45 	/* If the file is empty, it's a fast search. */
46 	if (sp->lno <= 1) {
47 		if (db_last(sp, &lno))
48 			return (1);
49 		if (lno == 0) {
50 			if (LF_ISSET(SEARCH_MSG))
51 				search_msg(sp, S_EMPTY);
52 			return (1);
53 		}
54 	}
55 
56 	if (LF_ISSET(SEARCH_PARSE)) {		/* Parse the string. */
57 		/*
58 		 * Use the saved pattern if no pattern specified, or if only
59 		 * one or two delimiter characters specified.
60 		 *
61 		 * !!!
62 		 * Historically, only the pattern itself was saved, vi didn't
63 		 * preserve addressing or delta information.
64 		 */
65 		if (ptrn == NULL)
66 			goto prev;
67 		if (plen == 1) {
68 			if (epp != NULL)
69 				*epp = ptrn + 1;
70 			goto prev;
71 		}
72 		if (ptrn[0] == ptrn[1]) {
73 			if (epp != NULL)
74 				*epp = ptrn + 2;
75 
76 			/* Complain if we don't have a previous pattern. */
77 prev:			if (sp->re == NULL) {
78 				search_msg(sp, S_NOPREV);
79 				return (1);
80 			}
81 			/* Re-compile the search pattern if necessary. */
82 			if (!F_ISSET(sp, SC_RE_SEARCH) && re_compile(sp,
83 			    sp->re, sp->re_len, NULL, NULL, &sp->re_c,
84 			    RE_C_SEARCH |
85 			    (LF_ISSET(SEARCH_MSG) ? 0 : RE_C_SILENT)))
86 				return (1);
87 
88 			/* Set the search direction. */
89 			if (LF_ISSET(SEARCH_SET))
90 				sp->searchdir = dir;
91 			return (0);
92 		}
93 
94 		/*
95 		 * Set the delimiter, and move forward to the terminating
96 		 * delimiter, handling escaped delimiters.
97 		 *
98 		 * QUOTING NOTE:
99 		 * Only discard an escape character if it escapes a delimiter.
100 		 */
101 		for (delim = *ptrn, p = t = ++ptrn;; *t++ = *p++) {
102 			if (--plen == 0 || p[0] == delim) {
103 				if (plen != 0)
104 					++p;
105 				break;
106 			}
107 			if (plen > 1 && p[0] == '\\') {
108 				if (p[1] == delim) {
109 					++p;
110 					--plen;
111 				} else if (p[1] == '\\') {
112 					*t++ = *p++;
113 					--plen;
114 				}
115 			}
116 		}
117 		if (epp != NULL)
118 			*epp = p;
119 
120 		plen = t - ptrn;
121 	}
122 
123 	/* Compile the RE. */
124 	if (re_compile(sp, ptrn, plen, &sp->re, &sp->re_len, &sp->re_c,
125 	    RE_C_SEARCH |
126 	    (LF_ISSET(SEARCH_MSG) ? 0 : RE_C_SILENT) |
127 	    (LF_ISSET(SEARCH_TAG) ? RE_C_TAG : 0)))
128 		return (1);
129 
130 	/* Set the search direction. */
131 	if (LF_ISSET(SEARCH_SET))
132 		sp->searchdir = dir;
133 
134 	return (0);
135 }
136 
137 /*
138  * f_search --
139  *	Do a forward search.
140  *
141  * PUBLIC: int f_search(SCR *, MARK *, MARK *, char *, size_t, char **, u_int);
142  */
143 int
f_search(SCR * sp,MARK * fm,MARK * rm,char * ptrn,size_t plen,char ** eptrn,u_int flags)144 f_search(SCR *sp, MARK *fm, MARK *rm, char *ptrn, size_t plen, char **eptrn,
145     u_int flags)
146 {
147 	busy_t btype;
148 	recno_t lno;
149 	regmatch_t match[1];
150 	size_t coff, len;
151 	int cnt, eval, rval, wrapped = 0;
152 	char *l;
153 
154 	if (search_init(sp, FORWARD, ptrn, plen, eptrn, flags))
155 		return (1);
156 
157 	if (LF_ISSET(SEARCH_FILE)) {
158 		lno = 1;
159 		coff = 0;
160 	} else {
161 		if (db_get(sp, fm->lno, DBG_FATAL, &l, &len))
162 			return (1);
163 		lno = fm->lno;
164 
165 		/*
166 		 * If doing incremental search, start searching at the previous
167 		 * column, so that we search a minimal distance and still match
168 		 * special patterns, e.g., \< for beginning of a word.
169 		 *
170 		 * Otherwise, start searching immediately after the cursor.  If
171 		 * at the end of the line, start searching on the next line.
172 		 * This is incompatible (read bug fix) with the historic vi --
173 		 * searches for the '$' pattern never moved forward, and the
174 		 * "-t foo" didn't work if the 'f' was the first character in
175 		 * the file.
176 		 */
177 		if (LF_ISSET(SEARCH_INCR)) {
178 			if ((coff = fm->cno) != 0)
179 				--coff;
180 		} else if (fm->cno + 1 >= len) {
181 			coff = 0;
182 			lno = fm->lno + 1;
183 			if (db_get(sp, lno, 0, &l, &len)) {
184 				if (!O_ISSET(sp, O_WRAPSCAN)) {
185 					if (LF_ISSET(SEARCH_MSG))
186 						search_msg(sp, S_EOF);
187 					return (1);
188 				}
189 				lno = 1;
190 				wrapped = 1;
191 			}
192 		} else
193 			coff = fm->cno + 1;
194 	}
195 
196 	btype = BUSY_ON;
197 	for (cnt = INTERRUPT_CHECK, rval = 1;; ++lno, coff = 0) {
198 		if (cnt-- == 0) {
199 			if (INTERRUPTED(sp))
200 				break;
201 			if (LF_ISSET(SEARCH_MSG)) {
202 				search_busy(sp, btype);
203 				btype = BUSY_UPDATE;
204 			}
205 			cnt = INTERRUPT_CHECK;
206 		}
207 		if ((wrapped && lno > fm->lno) || db_get(sp, lno, 0, &l, &len)) {
208 			if (wrapped) {
209 				if (LF_ISSET(SEARCH_MSG))
210 					search_msg(sp, S_NOTFOUND);
211 				break;
212 			}
213 			if (!O_ISSET(sp, O_WRAPSCAN)) {
214 				if (LF_ISSET(SEARCH_MSG))
215 					search_msg(sp, S_EOF);
216 				break;
217 			}
218 			lno = 0;
219 			wrapped = 1;
220 			continue;
221 		}
222 
223 		/* If already at EOL, just keep going. */
224 		if (len != 0 && coff == len)
225 			continue;
226 
227 		/* Set the termination. */
228 		match[0].rm_so = coff;
229 		match[0].rm_eo = len;
230 
231 		/* Search the line. */
232 		eval = regexec(&sp->re_c, l, 1, match,
233 		    (match[0].rm_so == 0 ? 0 : REG_NOTBOL) | REG_STARTEND);
234 		if (eval == REG_NOMATCH)
235 			continue;
236 		if (eval != 0) {
237 			if (LF_ISSET(SEARCH_MSG))
238 				re_error(sp, eval, &sp->re_c);
239 			else
240 				(void)sp->gp->scr_bell(sp);
241 			break;
242 		}
243 
244 		/* Warn if the search wrapped. */
245 		if (wrapped && LF_ISSET(SEARCH_WMSG))
246 			search_msg(sp, S_WRAP);
247 
248 		rm->lno = lno;
249 		rm->cno = match[0].rm_so;
250 
251 		/*
252 		 * If a change command, it's possible to move beyond the end
253 		 * of a line.  Historic vi generally got this wrong (e.g. try
254 		 * "c?$<cr>").  Not all that sure this gets it right, there
255 		 * are lots of strange cases.
256 		 */
257 		if (!LF_ISSET(SEARCH_EOL) && rm->cno >= len)
258 			rm->cno = len != 0 ? len - 1 : 0;
259 
260 		rval = 0;
261 		break;
262 	}
263 
264 	if (LF_ISSET(SEARCH_MSG))
265 		search_busy(sp, BUSY_OFF);
266 	return (rval);
267 }
268 
269 /*
270  * b_search --
271  *	Do a backward search.
272  *
273  * PUBLIC: int b_search(SCR *, MARK *, MARK *, char *, size_t, char **, u_int);
274  */
275 int
b_search(SCR * sp,MARK * fm,MARK * rm,char * ptrn,size_t plen,char ** eptrn,u_int flags)276 b_search(SCR *sp, MARK *fm, MARK *rm, char *ptrn, size_t plen, char **eptrn,
277     u_int flags)
278 {
279 	busy_t btype;
280 	recno_t lno;
281 	regmatch_t match[1];
282 	size_t coff, last, len;
283 	int cnt, eval, rval, wrapped;
284 	char *l;
285 
286 	if (search_init(sp, BACKWARD, ptrn, plen, eptrn, flags))
287 		return (1);
288 
289 	/*
290 	 * If doing incremental search, set the "starting" position past the
291 	 * current column, so that we search a minimal distance and still
292 	 * match special patterns, e.g., \> for the end of a word.  This is
293 	 * safe when the cursor is at the end of a line because we only use
294 	 * it for comparison with the location of the match.
295 	 *
296 	 * Otherwise, start searching immediately before the cursor.  If in
297 	 * the first column, start search on the previous line.
298 	 */
299 	if (LF_ISSET(SEARCH_INCR)) {
300 		lno = fm->lno;
301 		coff = fm->cno + 1;
302 	} else {
303 		if (fm->cno == 0) {
304 			if (fm->lno == 1 && !O_ISSET(sp, O_WRAPSCAN)) {
305 				if (LF_ISSET(SEARCH_MSG))
306 					search_msg(sp, S_SOF);
307 				return (1);
308 			}
309 			lno = fm->lno - 1;
310 		} else
311 			lno = fm->lno;
312 		coff = fm->cno;
313 	}
314 
315 	btype = BUSY_ON;
316 	for (cnt = INTERRUPT_CHECK, rval = 1, wrapped = 0;; --lno, coff = 0) {
317 		if (cnt-- == 0) {
318 			if (INTERRUPTED(sp))
319 				break;
320 			if (LF_ISSET(SEARCH_MSG)) {
321 				search_busy(sp, btype);
322 				btype = BUSY_UPDATE;
323 			}
324 			cnt = INTERRUPT_CHECK;
325 		}
326 		if ((wrapped && lno < fm->lno) || lno == 0) {
327 			if (wrapped) {
328 				if (LF_ISSET(SEARCH_MSG))
329 					search_msg(sp, S_NOTFOUND);
330 				break;
331 			}
332 			if (!O_ISSET(sp, O_WRAPSCAN)) {
333 				if (LF_ISSET(SEARCH_MSG))
334 					search_msg(sp, S_SOF);
335 				break;
336 			}
337 			if (db_last(sp, &lno))
338 				break;
339 			if (lno == 0) {
340 				if (LF_ISSET(SEARCH_MSG))
341 					search_msg(sp, S_EMPTY);
342 				break;
343 			}
344 			++lno;
345 			wrapped = 1;
346 			continue;
347 		}
348 
349 		if (db_get(sp, lno, 0, &l, &len))
350 			break;
351 
352 		/* Set the termination. */
353 		match[0].rm_so = 0;
354 		match[0].rm_eo = len;
355 
356 		/* Search the line. */
357 		eval = regexec(&sp->re_c, l, 1, match,
358 		    (match[0].rm_eo == len ? 0 : REG_NOTEOL) | REG_STARTEND);
359 		if (eval == REG_NOMATCH)
360 			continue;
361 		if (eval != 0) {
362 			if (LF_ISSET(SEARCH_MSG))
363 				re_error(sp, eval, &sp->re_c);
364 			else
365 				(void)sp->gp->scr_bell(sp);
366 			break;
367 		}
368 
369 		/* Check for a match starting past the cursor. */
370 		if (coff != 0 && match[0].rm_so >= coff)
371 			continue;
372 
373 		/* Warn if the search wrapped. */
374 		if (wrapped && LF_ISSET(SEARCH_WMSG))
375 			search_msg(sp, S_WRAP);
376 
377 		/*
378 		 * We now have the first match on the line.  Step through the
379 		 * line character by character until find the last acceptable
380 		 * match.  This is painful, we need a better interface to regex
381 		 * to make this work.
382 		 */
383 		for (;;) {
384 			last = match[0].rm_so++;
385 			if (match[0].rm_so >= len)
386 				break;
387 			match[0].rm_eo = len;
388 			eval = regexec(&sp->re_c, l, 1, match,
389 			    (match[0].rm_so == 0 ? 0 : REG_NOTBOL) |
390 			    REG_STARTEND);
391 			if (eval == REG_NOMATCH)
392 				break;
393 			if (eval != 0) {
394 				if (LF_ISSET(SEARCH_MSG))
395 					re_error(sp, eval, &sp->re_c);
396 				else
397 					(void)sp->gp->scr_bell(sp);
398 				goto err;
399 			}
400 			if (coff && match[0].rm_so >= coff)
401 				break;
402 		}
403 		rm->lno = lno;
404 
405 		/* See comment in f_search(). */
406 		if (!LF_ISSET(SEARCH_EOL) && last >= len)
407 			rm->cno = len != 0 ? len - 1 : 0;
408 		else
409 			rm->cno = last;
410 		rval = 0;
411 		break;
412 	}
413 
414 err:	if (LF_ISSET(SEARCH_MSG))
415 		search_busy(sp, BUSY_OFF);
416 	return (rval);
417 }
418 
419 /*
420  * search_msg --
421  *	Display one of the search messages.
422  */
423 static void
search_msg(SCR * sp,smsg_t msg)424 search_msg(SCR *sp, smsg_t msg)
425 {
426 	switch (msg) {
427 	case S_EMPTY:
428 		msgq(sp, M_ERR, "File empty; nothing to search");
429 		break;
430 	case S_EOF:
431 		msgq(sp, M_ERR,
432 		    "Reached end-of-file without finding the pattern");
433 		break;
434 	case S_NOPREV:
435 		msgq(sp, M_ERR, "No previous search pattern");
436 		break;
437 	case S_NOTFOUND:
438 		msgq(sp, M_ERR, "Pattern not found");
439 		break;
440 	case S_SOF:
441 		msgq(sp, M_ERR,
442 		    "Reached top-of-file without finding the pattern");
443 		break;
444 	case S_WRAP:
445 		msgq(sp, M_ERR, "Search wrapped");
446 		break;
447 	default:
448 		abort();
449 	}
450 }
451 
452 /*
453  * search_busy --
454  *	Put up the busy searching message.
455  *
456  * PUBLIC: void search_busy(SCR *, busy_t);
457  */
458 void
search_busy(SCR * sp,busy_t btype)459 search_busy(SCR *sp, busy_t btype)
460 {
461 	sp->gp->scr_busy(sp, "Searching...", btype);
462 }
463