xref: /openbsd-src/usr.bin/vi/vi/v_word.c (revision 486aa1f00aa9b3143d2a7d3b16c0c965eda76d7e)
1 /*	$OpenBSD: v_word.c,v 1.7 2014/11/12 04:28:41 bentley Exp $	*/
2 
3 /*-
4  * Copyright (c) 1992, 1993, 1994
5  *	The Regents of the University of California.  All rights reserved.
6  * Copyright (c) 1992, 1993, 1994, 1995, 1996
7  *	Keith Bostic.  All rights reserved.
8  *
9  * See the LICENSE file for redistribution information.
10  */
11 
12 #include "config.h"
13 
14 #include <sys/types.h>
15 #include <sys/queue.h>
16 #include <sys/time.h>
17 
18 #include <bitstring.h>
19 #include <ctype.h>
20 #include <limits.h>
21 #include <stdio.h>
22 
23 #include "../common/common.h"
24 #include "vi.h"
25 
26 /*
27  * There are two types of "words".  Bigwords are easy -- groups of anything
28  * delimited by whitespace.  Normal words are trickier.  They are either a
29  * group of characters, numbers and underscores, or a group of anything but,
30  * delimited by whitespace.  When for a word, if you're in whitespace, it's
31  * easy, just remove the whitespace and go to the beginning or end of the
32  * word.  Otherwise, figure out if the next character is in a different group.
33  * If it is, go to the beginning or end of that group, otherwise, go to the
34  * beginning or end of the current group.  The historic version of vi didn't
35  * get this right, so, for example, there were cases where "4e" was not the
36  * same as "eeee" -- in particular, single character words, and commands that
37  * began in whitespace were almost always handled incorrectly.  To get it right
38  * you have to resolve the cursor after each search so that the look-ahead to
39  * figure out what type of "word" the cursor is in will be correct.
40  *
41  * Empty lines, and lines that consist of only white-space characters count
42  * as a single word, and the beginning and end of the file counts as an
43  * infinite number of words.
44  *
45  * Movements associated with commands are different than movement commands.
46  * For example, in "abc  def", with the cursor on the 'a', "cw" is from
47  * 'a' to 'c', while "w" is from 'a' to 'd'.  In general, trailing white
48  * space is discarded from the change movement.  Another example is that,
49  * in the same string, a "cw" on any white space character replaces that
50  * single character, and nothing else.  Ain't nothin' in here that's easy.
51  *
52  * One historic note -- in the original vi, the 'w', 'W' and 'B' commands
53  * would treat groups of empty lines as individual words, i.e. the command
54  * would move the cursor to each new empty line.  The 'e' and 'E' commands
55  * would treat groups of empty lines as a single word, i.e. the first use
56  * would move past the group of lines.  The 'b' command would just beep at
57  * you, or, if you did it from the start of the line as part of a motion
58  * command, go absolutely nuts.  If the lines contained only white-space
59  * characters, the 'w' and 'W' commands would just beep at you, and the 'B',
60  * 'b', 'E' and 'e' commands would treat the group as a single word, and
61  * the 'B' and 'b' commands will treat the lines as individual words.  This
62  * implementation treats all of these cases as a single white-space word.
63  */
64 
65 enum which {BIGWORD, LITTLEWORD};
66 
67 static int bword(SCR *, VICMD *, enum which);
68 static int eword(SCR *, VICMD *, enum which);
69 static int fword(SCR *, VICMD *, enum which);
70 
71 /*
72  * v_wordW -- [count]W
73  *	Move forward a bigword at a time.
74  *
75  * PUBLIC: int v_wordW(SCR *, VICMD *);
76  */
77 int
v_wordW(SCR * sp,VICMD * vp)78 v_wordW(SCR *sp, VICMD *vp)
79 {
80 	return (fword(sp, vp, BIGWORD));
81 }
82 
83 /*
84  * v_wordw -- [count]w
85  *	Move forward a word at a time.
86  *
87  * PUBLIC: int v_wordw(SCR *, VICMD *);
88  */
89 int
v_wordw(SCR * sp,VICMD * vp)90 v_wordw(SCR *sp, VICMD *vp)
91 {
92 	return (fword(sp, vp, LITTLEWORD));
93 }
94 
95 /*
96  * fword --
97  *	Move forward by words.
98  */
99 static int
fword(SCR * sp,VICMD * vp,enum which type)100 fword(SCR *sp, VICMD *vp, enum which type)
101 {
102 	enum { INWORD, NOTWORD } state;
103 	VCS cs;
104 	u_long cnt;
105 
106 	cnt = F_ISSET(vp, VC_C1SET) ? vp->count : 1;
107 	cs.cs_lno = vp->m_start.lno;
108 	cs.cs_cno = vp->m_start.cno;
109 	if (cs_init(sp, &cs))
110 		return (1);
111 
112 	/*
113 	 * If in white-space:
114 	 *	If the count is 1, and it's a change command, we're done.
115 	 *	Else, move to the first non-white-space character, which
116 	 *	counts as a single word move.  If it's a motion command,
117 	 *	don't move off the end of the line.
118 	 */
119 	if (cs.cs_flags == CS_EMP || (cs.cs_flags == 0 && isblank(cs.cs_ch))) {
120 		if (ISMOTION(vp) && cs.cs_flags != CS_EMP && cnt == 1) {
121 			if (ISCMD(vp->rkp, 'c'))
122 				return (0);
123 			if (ISCMD(vp->rkp, 'd') || ISCMD(vp->rkp, 'y')) {
124 				if (cs_fspace(sp, &cs))
125 					return (1);
126 				goto ret;
127 			}
128 		}
129 		if (cs_fblank(sp, &cs))
130 			return (1);
131 		--cnt;
132 	}
133 
134 	/*
135 	 * Cyclically move to the next word -- this involves skipping
136 	 * over word characters and then any trailing non-word characters.
137 	 * Note, for the 'w' command, the definition of a word keeps
138 	 * switching.
139 	 */
140 	if (type == BIGWORD)
141 		while (cnt--) {
142 			for (;;) {
143 				if (cs_next(sp, &cs))
144 					return (1);
145 				if (cs.cs_flags == CS_EOF)
146 					goto ret;
147 				if (cs.cs_flags != 0 || isblank(cs.cs_ch))
148 					break;
149 			}
150 			/*
151 			 * If a motion command and we're at the end of the
152 			 * last word, we're done.  Delete and yank eat any
153 			 * trailing blanks, but we don't move off the end
154 			 * of the line regardless.
155 			 */
156 			if (cnt == 0 && ISMOTION(vp)) {
157 				if ((ISCMD(vp->rkp, 'd') ||
158 				    ISCMD(vp->rkp, 'y')) &&
159 				    cs_fspace(sp, &cs))
160 					return (1);
161 				break;
162 			}
163 
164 			/* Eat whitespace characters. */
165 			if (cs_fblank(sp, &cs))
166 				return (1);
167 			if (cs.cs_flags == CS_EOF)
168 				goto ret;
169 		}
170 	else
171 		while (cnt--) {
172 			state = cs.cs_flags == 0 &&
173 			    inword(cs.cs_ch) ? INWORD : NOTWORD;
174 			for (;;) {
175 				if (cs_next(sp, &cs))
176 					return (1);
177 				if (cs.cs_flags == CS_EOF)
178 					goto ret;
179 				if (cs.cs_flags != 0 || isblank(cs.cs_ch))
180 					break;
181 				if (state == INWORD) {
182 					if (!inword(cs.cs_ch))
183 						break;
184 				} else
185 					if (inword(cs.cs_ch))
186 						break;
187 			}
188 			/* See comment above. */
189 			if (cnt == 0 && ISMOTION(vp)) {
190 				if ((ISCMD(vp->rkp, 'd') ||
191 				    ISCMD(vp->rkp, 'y')) &&
192 				    cs_fspace(sp, &cs))
193 					return (1);
194 				break;
195 			}
196 
197 			/* Eat whitespace characters. */
198 			if (cs.cs_flags != 0 || isblank(cs.cs_ch))
199 				if (cs_fblank(sp, &cs))
200 					return (1);
201 			if (cs.cs_flags == CS_EOF)
202 				goto ret;
203 		}
204 
205 	/*
206 	 * If we didn't move, we must be at EOF.
207 	 *
208 	 * !!!
209 	 * That's okay for motion commands, however.
210 	 */
211 ret:	if (!ISMOTION(vp) &&
212 	    cs.cs_lno == vp->m_start.lno && cs.cs_cno == vp->m_start.cno) {
213 		v_eof(sp, &vp->m_start);
214 		return (1);
215 	}
216 
217 	/* Adjust the end of the range for motion commands. */
218 	vp->m_stop.lno = cs.cs_lno;
219 	vp->m_stop.cno = cs.cs_cno;
220 	if (ISMOTION(vp) && cs.cs_flags == 0)
221 		--vp->m_stop.cno;
222 
223 	/*
224 	 * Non-motion commands move to the end of the range.  Delete
225 	 * and yank stay at the start, ignore others.
226 	 */
227 	vp->m_final = ISMOTION(vp) ? vp->m_start : vp->m_stop;
228 	return (0);
229 }
230 
231 /*
232  * v_wordE -- [count]E
233  *	Move forward to the end of the bigword.
234  *
235  * PUBLIC: int v_wordE(SCR *, VICMD *);
236  */
237 int
v_wordE(SCR * sp,VICMD * vp)238 v_wordE(SCR *sp, VICMD *vp)
239 {
240 	return (eword(sp, vp, BIGWORD));
241 }
242 
243 /*
244  * v_worde -- [count]e
245  *	Move forward to the end of the word.
246  *
247  * PUBLIC: int v_worde(SCR *, VICMD *);
248  */
249 int
v_worde(SCR * sp,VICMD * vp)250 v_worde(SCR *sp, VICMD *vp)
251 {
252 	return (eword(sp, vp, LITTLEWORD));
253 }
254 
255 /*
256  * eword --
257  *	Move forward to the end of the word.
258  */
259 static int
eword(SCR * sp,VICMD * vp,enum which type)260 eword(SCR *sp, VICMD *vp, enum which type)
261 {
262 	enum { INWORD, NOTWORD } state;
263 	VCS cs;
264 	u_long cnt;
265 
266 	cnt = F_ISSET(vp, VC_C1SET) ? vp->count : 1;
267 	cs.cs_lno = vp->m_start.lno;
268 	cs.cs_cno = vp->m_start.cno;
269 	if (cs_init(sp, &cs))
270 		return (1);
271 
272 	/*
273 	 * !!!
274 	 * If in whitespace, or the next character is whitespace, move past
275 	 * it.  (This doesn't count as a word move.)  Stay at the character
276 	 * past the current one, it sets word "state" for the 'e' command.
277 	 */
278 	if (cs.cs_flags == 0 && !isblank(cs.cs_ch)) {
279 		if (cs_next(sp, &cs))
280 			return (1);
281 		if (cs.cs_flags == 0 && !isblank(cs.cs_ch))
282 			goto start;
283 	}
284 	if (cs_fblank(sp, &cs))
285 		return (1);
286 
287 	/*
288 	 * Cyclically move to the next word -- this involves skipping
289 	 * over word characters and then any trailing non-word characters.
290 	 * Note, for the 'e' command, the definition of a word keeps
291 	 * switching.
292 	 */
293 start:	if (type == BIGWORD)
294 		while (cnt--) {
295 			for (;;) {
296 				if (cs_next(sp, &cs))
297 					return (1);
298 				if (cs.cs_flags == CS_EOF)
299 					goto ret;
300 				if (cs.cs_flags != 0 || isblank(cs.cs_ch))
301 					break;
302 			}
303 			/*
304 			 * When we reach the start of the word after the last
305 			 * word, we're done.  If we changed state, back up one
306 			 * to the end of the previous word.
307 			 */
308 			if (cnt == 0) {
309 				if (cs.cs_flags == 0 && cs_prev(sp, &cs))
310 					return (1);
311 				break;
312 			}
313 
314 			/* Eat whitespace characters. */
315 			if (cs_fblank(sp, &cs))
316 				return (1);
317 			if (cs.cs_flags == CS_EOF)
318 				goto ret;
319 		}
320 	else
321 		while (cnt--) {
322 			state = cs.cs_flags == 0 &&
323 			    inword(cs.cs_ch) ? INWORD : NOTWORD;
324 			for (;;) {
325 				if (cs_next(sp, &cs))
326 					return (1);
327 				if (cs.cs_flags == CS_EOF)
328 					goto ret;
329 				if (cs.cs_flags != 0 || isblank(cs.cs_ch))
330 					break;
331 				if (state == INWORD) {
332 					if (!inword(cs.cs_ch))
333 						break;
334 				} else
335 					if (inword(cs.cs_ch))
336 						break;
337 			}
338 			/* See comment above. */
339 			if (cnt == 0) {
340 				if (cs.cs_flags == 0 && cs_prev(sp, &cs))
341 					return (1);
342 				break;
343 			}
344 
345 			/* Eat whitespace characters. */
346 			if (cs.cs_flags != 0 || isblank(cs.cs_ch))
347 				if (cs_fblank(sp, &cs))
348 					return (1);
349 			if (cs.cs_flags == CS_EOF)
350 				goto ret;
351 		}
352 
353 	/*
354 	 * If we didn't move, we must be at EOF.
355 	 *
356 	 * !!!
357 	 * That's okay for motion commands, however.
358 	 */
359 ret:	if (!ISMOTION(vp) &&
360 	    cs.cs_lno == vp->m_start.lno && cs.cs_cno == vp->m_start.cno) {
361 		v_eof(sp, &vp->m_start);
362 		return (1);
363 	}
364 
365 	/* Set the end of the range for motion commands. */
366 	vp->m_stop.lno = cs.cs_lno;
367 	vp->m_stop.cno = cs.cs_cno;
368 
369 	/*
370 	 * Non-motion commands move to the end of the range.
371 	 * Delete and yank stay at the start, ignore others.
372 	 */
373 	vp->m_final = ISMOTION(vp) ? vp->m_start : vp->m_stop;
374 	return (0);
375 }
376 
377 /*
378  * v_WordB -- [count]B
379  *	Move backward a bigword at a time.
380  *
381  * PUBLIC: int v_wordB(SCR *, VICMD *);
382  */
383 int
v_wordB(SCR * sp,VICMD * vp)384 v_wordB(SCR *sp, VICMD *vp)
385 {
386 	return (bword(sp, vp, BIGWORD));
387 }
388 
389 /*
390  * v_wordb -- [count]b
391  *	Move backward a word at a time.
392  *
393  * PUBLIC: int v_wordb(SCR *, VICMD *);
394  */
395 int
v_wordb(SCR * sp,VICMD * vp)396 v_wordb(SCR *sp, VICMD *vp)
397 {
398 	return (bword(sp, vp, LITTLEWORD));
399 }
400 
401 /*
402  * bword --
403  *	Move backward by words.
404  */
405 static int
bword(SCR * sp,VICMD * vp,enum which type)406 bword(SCR *sp, VICMD *vp, enum which type)
407 {
408 	enum { INWORD, NOTWORD } state;
409 	VCS cs;
410 	u_long cnt;
411 
412 	cnt = F_ISSET(vp, VC_C1SET) ? vp->count : 1;
413 	cs.cs_lno = vp->m_start.lno;
414 	cs.cs_cno = vp->m_start.cno;
415 	if (cs_init(sp, &cs))
416 		return (1);
417 
418 	/*
419 	 * !!!
420 	 * If in whitespace, or the previous character is whitespace, move
421 	 * past it.  (This doesn't count as a word move.)  Stay at the
422 	 * character before the current one, it sets word "state" for the
423 	 * 'b' command.
424 	 */
425 	if (cs.cs_flags == 0 && !isblank(cs.cs_ch)) {
426 		if (cs_prev(sp, &cs))
427 			return (1);
428 		if (cs.cs_flags == 0 && !isblank(cs.cs_ch))
429 			goto start;
430 	}
431 	if (cs_bblank(sp, &cs))
432 		return (1);
433 
434 	/*
435 	 * Cyclically move to the beginning of the previous word -- this
436 	 * involves skipping over word characters and then any trailing
437 	 * non-word characters.  Note, for the 'b' command, the definition
438 	 * of a word keeps switching.
439 	 */
440 start:	if (type == BIGWORD)
441 		while (cnt--) {
442 			for (;;) {
443 				if (cs_prev(sp, &cs))
444 					return (1);
445 				if (cs.cs_flags == CS_SOF)
446 					goto ret;
447 				if (cs.cs_flags != 0 || isblank(cs.cs_ch))
448 					break;
449 			}
450 			/*
451 			 * When we reach the end of the word before the last
452 			 * word, we're done.  If we changed state, move forward
453 			 * one to the end of the next word.
454 			 */
455 			if (cnt == 0) {
456 				if (cs.cs_flags == 0 && cs_next(sp, &cs))
457 					return (1);
458 				break;
459 			}
460 
461 			/* Eat whitespace characters. */
462 			if (cs_bblank(sp, &cs))
463 				return (1);
464 			if (cs.cs_flags == CS_SOF)
465 				goto ret;
466 		}
467 	else
468 		while (cnt--) {
469 			state = cs.cs_flags == 0 &&
470 			    inword(cs.cs_ch) ? INWORD : NOTWORD;
471 			for (;;) {
472 				if (cs_prev(sp, &cs))
473 					return (1);
474 				if (cs.cs_flags == CS_SOF)
475 					goto ret;
476 				if (cs.cs_flags != 0 || isblank(cs.cs_ch))
477 					break;
478 				if (state == INWORD) {
479 					if (!inword(cs.cs_ch))
480 						break;
481 				} else
482 					if (inword(cs.cs_ch))
483 						break;
484 			}
485 			/* See comment above. */
486 			if (cnt == 0) {
487 				if (cs.cs_flags == 0 && cs_next(sp, &cs))
488 					return (1);
489 				break;
490 			}
491 
492 			/* Eat whitespace characters. */
493 			if (cs.cs_flags != 0 || isblank(cs.cs_ch))
494 				if (cs_bblank(sp, &cs))
495 					return (1);
496 			if (cs.cs_flags == CS_SOF)
497 				goto ret;
498 		}
499 
500 	/* If we didn't move, we must be at SOF. */
501 ret:	if (cs.cs_lno == vp->m_start.lno && cs.cs_cno == vp->m_start.cno) {
502 		v_sof(sp, &vp->m_start);
503 		return (1);
504 	}
505 
506 	/* Set the end of the range for motion commands. */
507 	vp->m_stop.lno = cs.cs_lno;
508 	vp->m_stop.cno = cs.cs_cno;
509 
510 	/*
511 	 * All commands move to the end of the range.  Motion commands
512 	 * adjust the starting point to the character before the current
513 	 * one.
514 	 *
515 	 * !!!
516 	 * The historic vi didn't get this right -- the `yb' command yanked
517 	 * the right stuff and even updated the cursor value, but the cursor
518 	 * was not actually updated on the screen.
519 	 */
520 	vp->m_final = vp->m_stop;
521 	if (ISMOTION(vp))
522 		--vp->m_start.cno;
523 	return (0);
524 }
525