xref: /openbsd-src/usr.bin/vi/common/search.c (revision 91f110e064cd7c194e59e019b83bb7496c1c84d4)
1 /*	$OpenBSD: search.c,v 1.9 2009/10/27 23:59:47 deraadt Exp $	*/
2 
3 /*-
4  * Copyright (c) 1992, 1993, 1994
5  *	The Regents of the University of California.  All rights reserved.
6  * Copyright (c) 1992, 1993, 1994, 1995, 1996
7  *	Keith Bostic.  All rights reserved.
8  *
9  * See the LICENSE file for redistribution information.
10  */
11 
12 #include "config.h"
13 
14 #include <sys/types.h>
15 #include <sys/queue.h>
16 
17 #include <bitstring.h>
18 #include <ctype.h>
19 #include <errno.h>
20 #include <limits.h>
21 #include <stdio.h>
22 #include <stdlib.h>
23 #include <string.h>
24 #include <unistd.h>
25 
26 #include "common.h"
27 
28 typedef enum { S_EMPTY, S_EOF, S_NOPREV, S_NOTFOUND, S_SOF, S_WRAP } smsg_t;
29 
30 static void	search_msg(SCR *, smsg_t);
31 static int	search_init(SCR *, dir_t, char *, size_t, char **, u_int);
32 
33 /*
34  * search_init --
35  *	Set up a search.
36  */
37 static int
38 search_init(sp, dir, ptrn, plen, epp, flags)
39 	SCR *sp;
40 	dir_t dir;
41 	char *ptrn, **epp;
42 	size_t plen;
43 	u_int flags;
44 {
45 	recno_t lno;
46 	int delim;
47 	char *p, *t;
48 
49 	/* If the file is empty, it's a fast search. */
50 	if (sp->lno <= 1) {
51 		if (db_last(sp, &lno))
52 			return (1);
53 		if (lno == 0) {
54 			if (LF_ISSET(SEARCH_MSG))
55 				search_msg(sp, S_EMPTY);
56 			return (1);
57 		}
58 	}
59 
60 	if (LF_ISSET(SEARCH_PARSE)) {		/* Parse the string. */
61 		/*
62 		 * Use the saved pattern if no pattern specified, or if only
63 		 * one or two delimiter characters specified.
64 		 *
65 		 * !!!
66 		 * Historically, only the pattern itself was saved, vi didn't
67 		 * preserve addressing or delta information.
68 		 */
69 		if (ptrn == NULL)
70 			goto prev;
71 		if (plen == 1) {
72 			if (epp != NULL)
73 				*epp = ptrn + 1;
74 			goto prev;
75 		}
76 		if (ptrn[0] == ptrn[1]) {
77 			if (epp != NULL)
78 				*epp = ptrn + 2;
79 
80 			/* Complain if we don't have a previous pattern. */
81 prev:			if (sp->re == NULL) {
82 				search_msg(sp, S_NOPREV);
83 				return (1);
84 			}
85 			/* Re-compile the search pattern if necessary. */
86 			if (!F_ISSET(sp, SC_RE_SEARCH) && re_compile(sp,
87 			    sp->re, sp->re_len, NULL, NULL, &sp->re_c,
88 			    RE_C_SEARCH |
89 			    (LF_ISSET(SEARCH_MSG) ? 0 : RE_C_SILENT)))
90 				return (1);
91 
92 			/* Set the search direction. */
93 			if (LF_ISSET(SEARCH_SET))
94 				sp->searchdir = dir;
95 			return (0);
96 		}
97 
98 		/*
99 		 * Set the delimiter, and move forward to the terminating
100 		 * delimiter, handling escaped delimiters.
101 		 *
102 		 * QUOTING NOTE:
103 		 * Only discard an escape character if it escapes a delimiter.
104 		 */
105 		for (delim = *ptrn, p = t = ++ptrn;; *t++ = *p++) {
106 			if (--plen == 0 || p[0] == delim) {
107 				if (plen != 0)
108 					++p;
109 				break;
110 			}
111 			if (plen > 1 && p[0] == '\\' && p[1] == delim) {
112 				++p;
113 				--plen;
114 			}
115 		}
116 		if (epp != NULL)
117 			*epp = p;
118 
119 		plen = t - ptrn;
120 	}
121 
122 	/* Compile the RE. */
123 	if (re_compile(sp, ptrn, plen, &sp->re, &sp->re_len, &sp->re_c,
124 	    RE_C_SEARCH |
125 	    (LF_ISSET(SEARCH_MSG) ? 0 : RE_C_SILENT) |
126 	    (LF_ISSET(SEARCH_TAG) ? RE_C_TAG : 0) |
127 	    (LF_ISSET(SEARCH_CSCOPE) ? RE_C_CSCOPE : 0)))
128 		return (1);
129 
130 	/* Set the search direction. */
131 	if (LF_ISSET(SEARCH_SET))
132 		sp->searchdir = dir;
133 
134 	return (0);
135 }
136 
137 /*
138  * f_search --
139  *	Do a forward search.
140  *
141  * PUBLIC: int f_search(SCR *, MARK *, MARK *, char *, size_t, char **, u_int);
142  */
143 int
144 f_search(sp, fm, rm, ptrn, plen, eptrn, flags)
145 	SCR *sp;
146 	MARK *fm, *rm;
147 	char *ptrn, **eptrn;
148 	size_t plen;
149 	u_int flags;
150 {
151 	busy_t btype;
152 	recno_t lno;
153 	regmatch_t match[1];
154 	size_t coff, len;
155 	int cnt, eval, rval, wrapped;
156 	char *l;
157 
158 	if (search_init(sp, FORWARD, ptrn, plen, eptrn, flags))
159 		return (1);
160 
161 	if (LF_ISSET(SEARCH_FILE)) {
162 		lno = 1;
163 		coff = 0;
164 	} else {
165 		if (db_get(sp, fm->lno, DBG_FATAL, &l, &len))
166 			return (1);
167 		lno = fm->lno;
168 
169 		/*
170 		 * If doing incremental search, start searching at the previous
171 		 * column, so that we search a minimal distance and still match
172 		 * special patterns, e.g., \< for beginning of a word.
173 		 *
174 		 * Otherwise, start searching immediately after the cursor.  If
175 		 * at the end of the line, start searching on the next line.
176 		 * This is incompatible (read bug fix) with the historic vi --
177 		 * searches for the '$' pattern never moved forward, and the
178 		 * "-t foo" didn't work if the 'f' was the first character in
179 		 * the file.
180 		 */
181 		if (LF_ISSET(SEARCH_INCR)) {
182 			if ((coff = fm->cno) != 0)
183 				--coff;
184 		} else if (fm->cno + 1 >= len) {
185 			coff = 0;
186 			lno = fm->lno + 1;
187 			if (db_get(sp, lno, 0, &l, &len)) {
188 				if (!O_ISSET(sp, O_WRAPSCAN)) {
189 					if (LF_ISSET(SEARCH_MSG))
190 						search_msg(sp, S_EOF);
191 					return (1);
192 				}
193 				lno = 1;
194 			}
195 		} else
196 			coff = fm->cno + 1;
197 	}
198 
199 	btype = BUSY_ON;
200 	for (cnt = INTERRUPT_CHECK, rval = 1, wrapped = 0;; ++lno, coff = 0) {
201 		if (cnt-- == 0) {
202 			if (INTERRUPTED(sp))
203 				break;
204 			if (LF_ISSET(SEARCH_MSG)) {
205 				search_busy(sp, btype);
206 				btype = BUSY_UPDATE;
207 			}
208 			cnt = INTERRUPT_CHECK;
209 		}
210 		if ((wrapped && lno > fm->lno) || db_get(sp, lno, 0, &l, &len)) {
211 			if (wrapped) {
212 				if (LF_ISSET(SEARCH_MSG))
213 					search_msg(sp, S_NOTFOUND);
214 				break;
215 			}
216 			if (!O_ISSET(sp, O_WRAPSCAN)) {
217 				if (LF_ISSET(SEARCH_MSG))
218 					search_msg(sp, S_EOF);
219 				break;
220 			}
221 			lno = 0;
222 			wrapped = 1;
223 			continue;
224 		}
225 
226 		/* If already at EOL, just keep going. */
227 		if (len != 0 && coff == len)
228 			continue;
229 
230 		/* Set the termination. */
231 		match[0].rm_so = coff;
232 		match[0].rm_eo = len;
233 
234 #if defined(DEBUG) && 0
235 		TRACE(sp, "F search: %lu from %u to %u\n",
236 		    lno, coff, len != 0 ? len - 1 : len);
237 #endif
238 		/* Search the line. */
239 		eval = regexec(&sp->re_c, l, 1, match,
240 		    (match[0].rm_so == 0 ? 0 : REG_NOTBOL) | REG_STARTEND);
241 		if (eval == REG_NOMATCH)
242 			continue;
243 		if (eval != 0) {
244 			if (LF_ISSET(SEARCH_MSG))
245 				re_error(sp, eval, &sp->re_c);
246 			else
247 				(void)sp->gp->scr_bell(sp);
248 			break;
249 		}
250 
251 		/* Warn if the search wrapped. */
252 		if (wrapped && LF_ISSET(SEARCH_WMSG))
253 			search_msg(sp, S_WRAP);
254 
255 #if defined(DEBUG) && 0
256 		TRACE(sp, "F search: %qu to %qu\n",
257 		    match[0].rm_so, match[0].rm_eo);
258 #endif
259 		rm->lno = lno;
260 		rm->cno = match[0].rm_so;
261 
262 		/*
263 		 * If a change command, it's possible to move beyond the end
264 		 * of a line.  Historic vi generally got this wrong (e.g. try
265 		 * "c?$<cr>").  Not all that sure this gets it right, there
266 		 * are lots of strange cases.
267 		 */
268 		if (!LF_ISSET(SEARCH_EOL) && rm->cno >= len)
269 			rm->cno = len != 0 ? len - 1 : 0;
270 
271 		rval = 0;
272 		break;
273 	}
274 
275 	if (LF_ISSET(SEARCH_MSG))
276 		search_busy(sp, BUSY_OFF);
277 	return (rval);
278 }
279 
280 /*
281  * b_search --
282  *	Do a backward search.
283  *
284  * PUBLIC: int b_search(SCR *, MARK *, MARK *, char *, size_t, char **, u_int);
285  */
286 int
287 b_search(sp, fm, rm, ptrn, plen, eptrn, flags)
288 	SCR *sp;
289 	MARK *fm, *rm;
290 	char *ptrn, **eptrn;
291 	size_t plen;
292 	u_int flags;
293 {
294 	busy_t btype;
295 	recno_t lno;
296 	regmatch_t match[1];
297 	size_t coff, last, len;
298 	int cnt, eval, rval, wrapped;
299 	char *l;
300 
301 	if (search_init(sp, BACKWARD, ptrn, plen, eptrn, flags))
302 		return (1);
303 
304 	/*
305 	 * If doing incremental search, set the "starting" position past the
306 	 * current column, so that we search a minimal distance and still
307 	 * match special patterns, e.g., \> for the end of a word.  This is
308 	 * safe when the cursor is at the end of a line because we only use
309 	 * it for comparison with the location of the match.
310 	 *
311 	 * Otherwise, start searching immediately before the cursor.  If in
312 	 * the first column, start search on the previous line.
313 	 */
314 	if (LF_ISSET(SEARCH_INCR)) {
315 		lno = fm->lno;
316 		coff = fm->cno + 1;
317 	} else {
318 		if (fm->cno == 0) {
319 			if (fm->lno == 1 && !O_ISSET(sp, O_WRAPSCAN)) {
320 				if (LF_ISSET(SEARCH_MSG))
321 					search_msg(sp, S_SOF);
322 				return (1);
323 			}
324 			lno = fm->lno - 1;
325 		} else
326 			lno = fm->lno;
327 		coff = fm->cno;
328 	}
329 
330 	btype = BUSY_ON;
331 	for (cnt = INTERRUPT_CHECK, rval = 1, wrapped = 0;; --lno, coff = 0) {
332 		if (cnt-- == 0) {
333 			if (INTERRUPTED(sp))
334 				break;
335 			if (LF_ISSET(SEARCH_MSG)) {
336 				search_busy(sp, btype);
337 				btype = BUSY_UPDATE;
338 			}
339 			cnt = INTERRUPT_CHECK;
340 		}
341 		if ((wrapped && lno < fm->lno) || lno == 0) {
342 			if (wrapped) {
343 				if (LF_ISSET(SEARCH_MSG))
344 					search_msg(sp, S_NOTFOUND);
345 				break;
346 			}
347 			if (!O_ISSET(sp, O_WRAPSCAN)) {
348 				if (LF_ISSET(SEARCH_MSG))
349 					search_msg(sp, S_SOF);
350 				break;
351 			}
352 			if (db_last(sp, &lno))
353 				break;
354 			if (lno == 0) {
355 				if (LF_ISSET(SEARCH_MSG))
356 					search_msg(sp, S_EMPTY);
357 				break;
358 			}
359 			++lno;
360 			wrapped = 1;
361 			continue;
362 		}
363 
364 		if (db_get(sp, lno, 0, &l, &len))
365 			break;
366 
367 		/* Set the termination. */
368 		match[0].rm_so = 0;
369 		match[0].rm_eo = len;
370 
371 #if defined(DEBUG) && 0
372 		TRACE(sp, "B search: %lu from 0 to %qu\n", lno, match[0].rm_eo);
373 #endif
374 		/* Search the line. */
375 		eval = regexec(&sp->re_c, l, 1, match,
376 		    (match[0].rm_eo == len ? 0 : REG_NOTEOL) | REG_STARTEND);
377 		if (eval == REG_NOMATCH)
378 			continue;
379 		if (eval != 0) {
380 			if (LF_ISSET(SEARCH_MSG))
381 				re_error(sp, eval, &sp->re_c);
382 			else
383 				(void)sp->gp->scr_bell(sp);
384 			break;
385 		}
386 
387 		/* Check for a match starting past the cursor. */
388 		if (coff != 0 && match[0].rm_so >= coff)
389 			continue;
390 
391 		/* Warn if the search wrapped. */
392 		if (wrapped && LF_ISSET(SEARCH_WMSG))
393 			search_msg(sp, S_WRAP);
394 
395 #if defined(DEBUG) && 0
396 		TRACE(sp, "B found: %qu to %qu\n",
397 		    match[0].rm_so, match[0].rm_eo);
398 #endif
399 		/*
400 		 * We now have the first match on the line.  Step through the
401 		 * line character by character until find the last acceptable
402 		 * match.  This is painful, we need a better interface to regex
403 		 * to make this work.
404 		 */
405 		for (;;) {
406 			last = match[0].rm_so++;
407 			if (match[0].rm_so >= len)
408 				break;
409 			match[0].rm_eo = len;
410 			eval = regexec(&sp->re_c, l, 1, match,
411 			    (match[0].rm_so == 0 ? 0 : REG_NOTBOL) |
412 			    REG_STARTEND);
413 			if (eval == REG_NOMATCH)
414 				break;
415 			if (eval != 0) {
416 				if (LF_ISSET(SEARCH_MSG))
417 					re_error(sp, eval, &sp->re_c);
418 				else
419 					(void)sp->gp->scr_bell(sp);
420 				goto err;
421 			}
422 			if (coff && match[0].rm_so >= coff)
423 				break;
424 		}
425 		rm->lno = lno;
426 
427 		/* See comment in f_search(). */
428 		if (!LF_ISSET(SEARCH_EOL) && last >= len)
429 			rm->cno = len != 0 ? len - 1 : 0;
430 		else
431 			rm->cno = last;
432 		rval = 0;
433 		break;
434 	}
435 
436 err:	if (LF_ISSET(SEARCH_MSG))
437 		search_busy(sp, BUSY_OFF);
438 	return (rval);
439 }
440 
441 /*
442  * search_msg --
443  *	Display one of the search messages.
444  */
445 static void
446 search_msg(sp, msg)
447 	SCR *sp;
448 	smsg_t msg;
449 {
450 	switch (msg) {
451 	case S_EMPTY:
452 		msgq(sp, M_ERR, "072|File empty; nothing to search");
453 		break;
454 	case S_EOF:
455 		msgq(sp, M_ERR,
456 		    "073|Reached end-of-file without finding the pattern");
457 		break;
458 	case S_NOPREV:
459 		msgq(sp, M_ERR, "074|No previous search pattern");
460 		break;
461 	case S_NOTFOUND:
462 		msgq(sp, M_ERR, "075|Pattern not found");
463 		break;
464 	case S_SOF:
465 		msgq(sp, M_ERR,
466 		    "076|Reached top-of-file without finding the pattern");
467 		break;
468 	case S_WRAP:
469 		msgq(sp, M_ERR, "077|Search wrapped");
470 		break;
471 	default:
472 		abort();
473 	}
474 }
475 
476 /*
477  * search_busy --
478  *	Put up the busy searching message.
479  *
480  * PUBLIC: void search_busy(SCR *, busy_t);
481  */
482 void
483 search_busy(sp, btype)
484 	SCR *sp;
485 	busy_t btype;
486 {
487 	sp->gp->scr_busy(sp, "078|Searching...", btype);
488 }
489