xref: /openbsd-src/usr.bin/vi/common/search.c (revision b2ea75c1b17e1a9a339660e7ed45cd24946b230e)
1 /*	$OpenBSD: search.c,v 1.5 2001/01/29 01:58:32 niklas Exp $	*/
2 
3 /*-
4  * Copyright (c) 1992, 1993, 1994
5  *	The Regents of the University of California.  All rights reserved.
6  * Copyright (c) 1992, 1993, 1994, 1995, 1996
7  *	Keith Bostic.  All rights reserved.
8  *
9  * See the LICENSE file for redistribution information.
10  */
11 
12 #include "config.h"
13 
14 #ifndef lint
15 static const char sccsid[] = "@(#)search.c	10.25 (Berkeley) 6/30/96";
16 #endif /* not lint */
17 
18 #include <sys/types.h>
19 #include <sys/queue.h>
20 
21 #include <bitstring.h>
22 #include <ctype.h>
23 #include <errno.h>
24 #include <limits.h>
25 #include <stdio.h>
26 #include <stdlib.h>
27 #include <string.h>
28 #include <unistd.h>
29 
30 #include "common.h"
31 
32 typedef enum { S_EMPTY, S_EOF, S_NOPREV, S_NOTFOUND, S_SOF, S_WRAP } smsg_t;
33 
34 static void	search_msg __P((SCR *, smsg_t));
35 static int	search_init __P((SCR *, dir_t, char *, size_t, char **, u_int));
36 
37 /*
38  * search_init --
39  *	Set up a search.
40  */
41 static int
42 search_init(sp, dir, ptrn, plen, epp, flags)
43 	SCR *sp;
44 	dir_t dir;
45 	char *ptrn, **epp;
46 	size_t plen;
47 	u_int flags;
48 {
49 	recno_t lno;
50 	int delim;
51 	char *p, *t;
52 
53 	/* If the file is empty, it's a fast search. */
54 	if (sp->lno <= 1) {
55 		if (db_last(sp, &lno))
56 			return (1);
57 		if (lno == 0) {
58 			if (LF_ISSET(SEARCH_MSG))
59 				search_msg(sp, S_EMPTY);
60 			return (1);
61 		}
62 	}
63 
64 	if (LF_ISSET(SEARCH_PARSE)) {		/* Parse the string. */
65 		/*
66 		 * Use the saved pattern if no pattern specified, or if only
67 		 * one or two delimiter characters specified.
68 		 *
69 		 * !!!
70 		 * Historically, only the pattern itself was saved, vi didn't
71 		 * preserve addressing or delta information.
72 		 */
73 		if (ptrn == NULL)
74 			goto prev;
75 		if (plen == 1) {
76 			if (epp != NULL)
77 				*epp = ptrn + 1;
78 			goto prev;
79 		}
80 		if (ptrn[0] == ptrn[1]) {
81 			if (epp != NULL)
82 				*epp = ptrn + 2;
83 
84 			/* Complain if we don't have a previous pattern. */
85 prev:			if (sp->re == NULL) {
86 				search_msg(sp, S_NOPREV);
87 				return (1);
88 			}
89 			/* Re-compile the search pattern if necessary. */
90 			if (!F_ISSET(sp, SC_RE_SEARCH) && re_compile(sp,
91 			    sp->re, sp->re_len, NULL, NULL, &sp->re_c,
92 			    RE_C_SEARCH |
93 			    (LF_ISSET(SEARCH_MSG) ? 0 : RE_C_SILENT)))
94 				return (1);
95 
96 			/* Set the search direction. */
97 			if (LF_ISSET(SEARCH_SET))
98 				sp->searchdir = dir;
99 			return (0);
100 		}
101 
102 		/*
103 		 * Set the delimiter, and move forward to the terminating
104 		 * delimiter, handling escaped delimiters.
105 		 *
106 		 * QUOTING NOTE:
107 		 * Only discard an escape character if it escapes a delimiter.
108 		 */
109 		for (delim = *ptrn, p = t = ++ptrn;; *t++ = *p++) {
110 			if (--plen == 0 || p[0] == delim) {
111 				if (plen != 0)
112 					++p;
113 				break;
114 			}
115 			if (plen > 1 && p[0] == '\\' && p[1] == delim) {
116 				++p;
117 				--plen;
118 			}
119 		}
120 		if (epp != NULL)
121 			*epp = p;
122 
123 		plen = t - ptrn;
124 	}
125 
126 	/* Compile the RE. */
127 	if (re_compile(sp, ptrn, plen, &sp->re, &sp->re_len, &sp->re_c,
128 	    RE_C_SEARCH |
129 	    (LF_ISSET(SEARCH_MSG) ? 0 : RE_C_SILENT) |
130 	    (LF_ISSET(SEARCH_TAG) ? RE_C_TAG : 0) |
131 	    (LF_ISSET(SEARCH_CSCOPE) ? RE_C_CSCOPE : 0)))
132 		return (1);
133 
134 	/* Set the search direction. */
135 	if (LF_ISSET(SEARCH_SET))
136 		sp->searchdir = dir;
137 
138 	return (0);
139 }
140 
141 /*
142  * f_search --
143  *	Do a forward search.
144  *
145  * PUBLIC: int f_search __P((SCR *,
146  * PUBLIC:    MARK *, MARK *, char *, size_t, char **, u_int));
147  */
148 int
149 f_search(sp, fm, rm, ptrn, plen, eptrn, flags)
150 	SCR *sp;
151 	MARK *fm, *rm;
152 	char *ptrn, **eptrn;
153 	size_t plen;
154 	u_int flags;
155 {
156 	busy_t btype;
157 	recno_t lno;
158 	regmatch_t match[1];
159 	size_t coff, len;
160 	int cnt, eval, rval, wrapped;
161 	char *l;
162 
163 	if (search_init(sp, FORWARD, ptrn, plen, eptrn, flags))
164 		return (1);
165 
166 	if (LF_ISSET(SEARCH_FILE)) {
167 		lno = 1;
168 		coff = 0;
169 	} else {
170 		if (db_get(sp, fm->lno, DBG_FATAL, &l, &len))
171 			return (1);
172 		lno = fm->lno;
173 
174 		/*
175 		 * If doing incremental search, start searching at the previous
176 		 * column, so that we search a minimal distance and still match
177 		 * special patterns, e.g., \< for beginning of a word.
178 		 *
179 		 * Otherwise, start searching immediately after the cursor.  If
180 		 * at the end of the line, start searching on the next line.
181 		 * This is incompatible (read bug fix) with the historic vi --
182 		 * searches for the '$' pattern never moved forward, and the
183 		 * "-t foo" didn't work if the 'f' was the first character in
184 		 * the file.
185 		 */
186 		if (LF_ISSET(SEARCH_INCR)) {
187 			if ((coff = fm->cno) != 0)
188 				--coff;
189 		} else if (fm->cno + 1 >= len) {
190 			coff = 0;
191 			lno = fm->lno + 1;
192 			if (db_get(sp, lno, 0, &l, &len)) {
193 				if (!O_ISSET(sp, O_WRAPSCAN)) {
194 					if (LF_ISSET(SEARCH_MSG))
195 						search_msg(sp, S_EOF);
196 					return (1);
197 				}
198 				lno = 1;
199 			}
200 		} else
201 			coff = fm->cno + 1;
202 	}
203 
204 	btype = BUSY_ON;
205 	for (cnt = INTERRUPT_CHECK, rval = 1, wrapped = 0;; ++lno, coff = 0) {
206 		if (cnt-- == 0) {
207 			if (INTERRUPTED(sp))
208 				break;
209 			if (LF_ISSET(SEARCH_MSG)) {
210 				search_busy(sp, btype);
211 				btype = BUSY_UPDATE;
212 			}
213 			cnt = INTERRUPT_CHECK;
214 		}
215 		if (wrapped && lno > fm->lno || db_get(sp, lno, 0, &l, &len)) {
216 			if (wrapped) {
217 				if (LF_ISSET(SEARCH_MSG))
218 					search_msg(sp, S_NOTFOUND);
219 				break;
220 			}
221 			if (!O_ISSET(sp, O_WRAPSCAN)) {
222 				if (LF_ISSET(SEARCH_MSG))
223 					search_msg(sp, S_EOF);
224 				break;
225 			}
226 			lno = 0;
227 			wrapped = 1;
228 			continue;
229 		}
230 
231 		/* If already at EOL, just keep going. */
232 		if (len != 0 && coff == len)
233 			continue;
234 
235 		/* Set the termination. */
236 		match[0].rm_so = coff;
237 		match[0].rm_eo = len;
238 
239 #if defined(DEBUG) && 0
240 		TRACE(sp, "F search: %lu from %u to %u\n",
241 		    lno, coff, len != 0 ? len - 1 : len);
242 #endif
243 		/* Search the line. */
244 		eval = regexec(&sp->re_c, l, 1, match,
245 		    (match[0].rm_so == 0 ? 0 : REG_NOTBOL) | REG_STARTEND);
246 		if (eval == REG_NOMATCH)
247 			continue;
248 		if (eval != 0) {
249 			if (LF_ISSET(SEARCH_MSG))
250 				re_error(sp, eval, &sp->re_c);
251 			else
252 				(void)sp->gp->scr_bell(sp);
253 			break;
254 		}
255 
256 		/* Warn if the search wrapped. */
257 		if (wrapped && LF_ISSET(SEARCH_WMSG))
258 			search_msg(sp, S_WRAP);
259 
260 #if defined(DEBUG) && 0
261 		TRACE(sp, "F search: %qu to %qu\n",
262 		    match[0].rm_so, match[0].rm_eo);
263 #endif
264 		rm->lno = lno;
265 		rm->cno = match[0].rm_so;
266 
267 		/*
268 		 * If a change command, it's possible to move beyond the end
269 		 * of a line.  Historic vi generally got this wrong (e.g. try
270 		 * "c?$<cr>").  Not all that sure this gets it right, there
271 		 * are lots of strange cases.
272 		 */
273 		if (!LF_ISSET(SEARCH_EOL) && rm->cno >= len)
274 			rm->cno = len != 0 ? len - 1 : 0;
275 
276 		rval = 0;
277 		break;
278 	}
279 
280 	if (LF_ISSET(SEARCH_MSG))
281 		search_busy(sp, BUSY_OFF);
282 	return (rval);
283 }
284 
285 /*
286  * b_search --
287  *	Do a backward search.
288  *
289  * PUBLIC: int b_search __P((SCR *,
290  * PUBLIC:    MARK *, MARK *, char *, size_t, char **, u_int));
291  */
292 int
293 b_search(sp, fm, rm, ptrn, plen, eptrn, flags)
294 	SCR *sp;
295 	MARK *fm, *rm;
296 	char *ptrn, **eptrn;
297 	size_t plen;
298 	u_int flags;
299 {
300 	busy_t btype;
301 	recno_t lno;
302 	regmatch_t match[1];
303 	size_t coff, last, len;
304 	int cnt, eval, rval, wrapped;
305 	char *l;
306 
307 	if (search_init(sp, BACKWARD, ptrn, plen, eptrn, flags))
308 		return (1);
309 
310 	/*
311 	 * If doing incremental search, set the "starting" position past the
312 	 * current column, so that we search a minimal distance and still
313 	 * match special patterns, e.g., \> for the end of a word.  This is
314 	 * safe when the cursor is at the end of a line because we only use
315 	 * it for comparison with the location of the match.
316 	 *
317 	 * Otherwise, start searching immediately before the cursor.  If in
318 	 * the first column, start search on the previous line.
319 	 */
320 	if (LF_ISSET(SEARCH_INCR)) {
321 		lno = fm->lno;
322 		coff = fm->cno + 1;
323 	} else {
324 		if (fm->cno == 0) {
325 			if (fm->lno == 1 && !O_ISSET(sp, O_WRAPSCAN)) {
326 				if (LF_ISSET(SEARCH_MSG))
327 					search_msg(sp, S_SOF);
328 				return (1);
329 			}
330 			lno = fm->lno - 1;
331 		} else
332 			lno = fm->lno;
333 		coff = fm->cno;
334 	}
335 
336 	btype = BUSY_ON;
337 	for (cnt = INTERRUPT_CHECK, rval = 1, wrapped = 0;; --lno, coff = 0) {
338 		if (cnt-- == 0) {
339 			if (INTERRUPTED(sp))
340 				break;
341 			if (LF_ISSET(SEARCH_MSG)) {
342 				search_busy(sp, btype);
343 				btype = BUSY_UPDATE;
344 			}
345 			cnt = INTERRUPT_CHECK;
346 		}
347 		if (wrapped && lno < fm->lno || lno == 0) {
348 			if (wrapped) {
349 				if (LF_ISSET(SEARCH_MSG))
350 					search_msg(sp, S_NOTFOUND);
351 				break;
352 			}
353 			if (!O_ISSET(sp, O_WRAPSCAN)) {
354 				if (LF_ISSET(SEARCH_MSG))
355 					search_msg(sp, S_SOF);
356 				break;
357 			}
358 			if (db_last(sp, &lno))
359 				break;
360 			if (lno == 0) {
361 				if (LF_ISSET(SEARCH_MSG))
362 					search_msg(sp, S_EMPTY);
363 				break;
364 			}
365 			++lno;
366 			wrapped = 1;
367 			continue;
368 		}
369 
370 		if (db_get(sp, lno, 0, &l, &len))
371 			break;
372 
373 		/* Set the termination. */
374 		match[0].rm_so = 0;
375 		match[0].rm_eo = len;
376 
377 #if defined(DEBUG) && 0
378 		TRACE(sp, "B search: %lu from 0 to %qu\n", lno, match[0].rm_eo);
379 #endif
380 		/* Search the line. */
381 		eval = regexec(&sp->re_c, l, 1, match,
382 		    (match[0].rm_eo == len ? 0 : REG_NOTEOL) | REG_STARTEND);
383 		if (eval == REG_NOMATCH)
384 			continue;
385 		if (eval != 0) {
386 			if (LF_ISSET(SEARCH_MSG))
387 				re_error(sp, eval, &sp->re_c);
388 			else
389 				(void)sp->gp->scr_bell(sp);
390 			break;
391 		}
392 
393 		/* Check for a match starting past the cursor. */
394 		if (coff != 0 && match[0].rm_so >= coff)
395 			continue;
396 
397 		/* Warn if the search wrapped. */
398 		if (wrapped && LF_ISSET(SEARCH_WMSG))
399 			search_msg(sp, S_WRAP);
400 
401 #if defined(DEBUG) && 0
402 		TRACE(sp, "B found: %qu to %qu\n",
403 		    match[0].rm_so, match[0].rm_eo);
404 #endif
405 		/*
406 		 * We now have the first match on the line.  Step through the
407 		 * line character by character until find the last acceptable
408 		 * match.  This is painful, we need a better interface to regex
409 		 * to make this work.
410 		 */
411 		for (;;) {
412 			last = match[0].rm_so++;
413 			if (match[0].rm_so >= len)
414 				break;
415 			match[0].rm_eo = len;
416 			eval = regexec(&sp->re_c, l, 1, match,
417 			    (match[0].rm_so == 0 ? 0 : REG_NOTBOL) |
418 			    REG_STARTEND);
419 			if (eval == REG_NOMATCH)
420 				break;
421 			if (eval != 0) {
422 				if (LF_ISSET(SEARCH_MSG))
423 					re_error(sp, eval, &sp->re_c);
424 				else
425 					(void)sp->gp->scr_bell(sp);
426 				goto err;
427 			}
428 			if (coff && match[0].rm_so >= coff)
429 				break;
430 		}
431 		rm->lno = lno;
432 
433 		/* See comment in f_search(). */
434 		if (!LF_ISSET(SEARCH_EOL) && last >= len)
435 			rm->cno = len != 0 ? len - 1 : 0;
436 		else
437 			rm->cno = last;
438 		rval = 0;
439 		break;
440 	}
441 
442 err:	if (LF_ISSET(SEARCH_MSG))
443 		search_busy(sp, BUSY_OFF);
444 	return (rval);
445 }
446 
447 /*
448  * search_msg --
449  *	Display one of the search messages.
450  */
451 static void
452 search_msg(sp, msg)
453 	SCR *sp;
454 	smsg_t msg;
455 {
456 	switch (msg) {
457 	case S_EMPTY:
458 		msgq(sp, M_ERR, "072|File empty; nothing to search");
459 		break;
460 	case S_EOF:
461 		msgq(sp, M_ERR,
462 		    "073|Reached end-of-file without finding the pattern");
463 		break;
464 	case S_NOPREV:
465 		msgq(sp, M_ERR, "074|No previous search pattern");
466 		break;
467 	case S_NOTFOUND:
468 		msgq(sp, M_ERR, "075|Pattern not found");
469 		break;
470 	case S_SOF:
471 		msgq(sp, M_ERR,
472 		    "076|Reached top-of-file without finding the pattern");
473 		break;
474 	case S_WRAP:
475 		msgq(sp, M_ERR, "077|Search wrapped");
476 		break;
477 	default:
478 		abort();
479 	}
480 }
481 
482 /*
483  * search_busy --
484  *	Put up the busy searching message.
485  *
486  * PUBLIC: void search_busy __P((SCR *, busy_t));
487  */
488 void
489 search_busy(sp, btype)
490 	SCR *sp;
491 	busy_t btype;
492 {
493 	sp->gp->scr_busy(sp, "078|Searching...", btype);
494 }
495