xref: /netbsd-src/external/bsd/nvi/dist/vi/v_word.c (revision a8ee716c2543b32b1b9ee65a0b5c59bf908c308d)
1 /*	$NetBSD: v_word.c,v 1.4 2017/11/21 07:43:47 rin Exp $ */
2 /*-
3  * Copyright (c) 1992, 1993, 1994
4  *	The Regents of the University of California.  All rights reserved.
5  * Copyright (c) 1992, 1993, 1994, 1995, 1996
6  *	Keith Bostic.  All rights reserved.
7  *
8  * See the LICENSE file for redistribution information.
9  */
10 
11 #include "config.h"
12 
13 #include <sys/cdefs.h>
14 #if 0
15 #ifndef lint
16 static const char sccsid[] = "Id: v_word.c,v 10.6 2001/06/25 15:19:36 skimo Exp  (Berkeley) Date: 2001/06/25 15:19:36 ";
17 #endif /* not lint */
18 #else
19 __RCSID("$NetBSD: v_word.c,v 1.4 2017/11/21 07:43:47 rin Exp $");
20 #endif
21 
22 #include <sys/types.h>
23 #include <sys/queue.h>
24 #include <sys/time.h>
25 
26 #include <bitstring.h>
27 #include <ctype.h>
28 #include <limits.h>
29 #include <stdio.h>
30 
31 #include "../common/common.h"
32 #include "vi.h"
33 
34 /*
35  * There are two types of "words".  Bigwords are easy -- groups of anything
36  * delimited by whitespace.  Normal words are trickier.  They are either a
37  * group of characters, numbers and underscores, or a group of anything but,
38  * delimited by whitespace.  When for a word, if you're in whitespace, it's
39  * easy, just remove the whitespace and go to the beginning or end of the
40  * word.  Otherwise, figure out if the next character is in a different group.
41  * If it is, go to the beginning or end of that group, otherwise, go to the
42  * beginning or end of the current group.  The historic version of vi didn't
43  * get this right, so, for example, there were cases where "4e" was not the
44  * same as "eeee" -- in particular, single character words, and commands that
45  * began in whitespace were almost always handled incorrectly.  To get it right
46  * you have to resolve the cursor after each search so that the look-ahead to
47  * figure out what type of "word" the cursor is in will be correct.
48  *
49  * Empty lines, and lines that consist of only white-space characters count
50  * as a single word, and the beginning and end of the file counts as an
51  * infinite number of words.
52  *
53  * Movements associated with commands are different than movement commands.
54  * For example, in "abc  def", with the cursor on the 'a', "cw" is from
55  * 'a' to 'c', while "w" is from 'a' to 'd'.  In general, trailing white
56  * space is discarded from the change movement.  Another example is that,
57  * in the same string, a "cw" on any white space character replaces that
58  * single character, and nothing else.  Ain't nothin' in here that's easy.
59  *
60  * One historic note -- in the original vi, the 'w', 'W' and 'B' commands
61  * would treat groups of empty lines as individual words, i.e. the command
62  * would move the cursor to each new empty line.  The 'e' and 'E' commands
63  * would treat groups of empty lines as a single word, i.e. the first use
64  * would move past the group of lines.  The 'b' command would just beep at
65  * you, or, if you did it from the start of the line as part of a motion
66  * command, go absolutely nuts.  If the lines contained only white-space
67  * characters, the 'w' and 'W' commands would just beep at you, and the 'B',
68  * 'b', 'E' and 'e' commands would treat the group as a single word, and
69  * the 'B' and 'b' commands will treat the lines as individual words.  This
70  * implementation treats all of these cases as a single white-space word.
71  *
72  * We regard a boundary between single- and multi-width characters as
73  * a word boundary.
74  */
75 
76 enum which {BIGWORD, LITTLEWORD};
77 
78 static int bword __P((SCR *, VICMD *, enum which));
79 static int eword __P((SCR *, VICMD *, enum which));
80 static int fword __P((SCR *, VICMD *, enum which));
81 
82 /*
83  * v_wordW -- [count]W
84  *	Move forward a bigword at a time.
85  *
86  * PUBLIC: int v_wordW __P((SCR *, VICMD *));
87  */
88 int
v_wordW(SCR * sp,VICMD * vp)89 v_wordW(SCR *sp, VICMD *vp)
90 {
91 	return (fword(sp, vp, BIGWORD));
92 }
93 
94 /*
95  * v_wordw -- [count]w
96  *	Move forward a word at a time.
97  *
98  * PUBLIC: int v_wordw __P((SCR *, VICMD *));
99  */
100 int
v_wordw(SCR * sp,VICMD * vp)101 v_wordw(SCR *sp, VICMD *vp)
102 {
103 	return (fword(sp, vp, LITTLEWORD));
104 }
105 
106 /*
107  * fword --
108  *	Move forward by words.
109  */
110 static int
fword(SCR * sp,VICMD * vp,enum which type)111 fword(SCR *sp, VICMD *vp, enum which type)
112 {
113 	enum { INWORD, NOTWORD } state;
114 	VCS cs;
115 	u_long cnt;
116 	int nmw, omw;
117 
118 	cnt = F_ISSET(vp, VC_C1SET) ? vp->count : 1;
119 	cs.cs_lno = vp->m_start.lno;
120 	cs.cs_cno = vp->m_start.cno;
121 	if (cs_init(sp, &cs))
122 		return (1);
123 
124 	/*
125 	 * If in white-space:
126 	 *	If the count is 1, and it's a change command, we're done.
127 	 *	Else, move to the first non-white-space character, which
128 	 *	counts as a single word move.  If it's a motion command,
129 	 *	don't move off the end of the line.
130 	 */
131 	if (cs.cs_flags == CS_EMP || (cs.cs_flags == 0 && ISBLANK2(cs.cs_ch))) {
132 		if (ISMOTION(vp) && cs.cs_flags != CS_EMP && cnt == 1) {
133 			if (ISCMD(vp->rkp, 'c'))
134 				return (0);
135 			if (ISCMD(vp->rkp, 'd') || ISCMD(vp->rkp, 'y')) {
136 				if (cs_fspace(sp, &cs))
137 					return (1);
138 				goto ret;
139 			}
140 		}
141 		if (cs_fblank(sp, &cs))
142 			return (1);
143 		--cnt;
144 	}
145 
146 	/*
147 	 * Cyclically move to the next word -- this involves skipping
148 	 * over word characters and then any trailing non-word characters.
149 	 * Note, for the 'w' command, the definition of a word keeps
150 	 * switching.
151 	 */
152 	if (type == BIGWORD)
153 		while (cnt--) {
154 			nmw = ISMULTIWIDTH(sp, cs.cs_ch);
155 			for (;;) {
156 				omw = nmw;
157 				if (cs_next(sp, &cs))
158 					return (1);
159 				if (cs.cs_flags == CS_EOF)
160 					goto ret;
161 				if (cs.cs_flags != 0 || ISBLANK2(cs.cs_ch) ||
162 				    (nmw = ISMULTIWIDTH(sp, cs.cs_ch)) != omw)
163 					break;
164 			}
165 			/*
166 			 * If a motion command and we're at the end of the
167 			 * last word, we're done.  Delete and yank eat any
168 			 * trailing blanks, but we don't move off the end
169 			 * of the line regardless.
170 			 */
171 			if (cnt == 0 && ISMOTION(vp)) {
172 				if ((ISCMD(vp->rkp, 'd') ||
173 				    ISCMD(vp->rkp, 'y')) &&
174 				    cs_fspace(sp, &cs))
175 					return (1);
176 				break;
177 			}
178 
179 			/* Eat whitespace characters. */
180 			if (nmw == omw && cs_fblank(sp, &cs))
181 				return (1);
182 			if (cs.cs_flags == CS_EOF)
183 				goto ret;
184 		}
185 	else
186 		while (cnt--) {
187 			state = cs.cs_flags == 0 &&
188 			    inword(cs.cs_ch) ? INWORD : NOTWORD;
189 			nmw = ISMULTIWIDTH(sp, cs.cs_ch);
190 			for (;;) {
191 				omw = nmw;
192 				if (cs_next(sp, &cs))
193 					return (1);
194 				if (cs.cs_flags == CS_EOF)
195 					goto ret;
196 				if (cs.cs_flags != 0 || ISBLANK2(cs.cs_ch) ||
197 				    (nmw = ISMULTIWIDTH(sp, cs.cs_ch)) != omw)
198 					break;
199 				if (state == INWORD) {
200 					if (!inword(cs.cs_ch))
201 						break;
202 				} else
203 					if (inword(cs.cs_ch))
204 						break;
205 			}
206 			/* See comment above. */
207 			if (cnt == 0 && ISMOTION(vp)) {
208 				if ((ISCMD(vp->rkp, 'd') ||
209 				    ISCMD(vp->rkp, 'y')) &&
210 				    cs_fspace(sp, &cs))
211 					return (1);
212 				break;
213 			}
214 
215 			/* Eat whitespace characters. */
216 			if (cs.cs_flags != 0 || ISBLANK2(cs.cs_ch))
217 				if (cs_fblank(sp, &cs))
218 					return (1);
219 			if (cs.cs_flags == CS_EOF)
220 				goto ret;
221 		}
222 
223 	/*
224 	 * If we didn't move, we must be at EOF.
225 	 *
226 	 * !!!
227 	 * That's okay for motion commands, however.
228 	 */
229 ret:	if (!ISMOTION(vp) &&
230 	    cs.cs_lno == vp->m_start.lno && cs.cs_cno == vp->m_start.cno) {
231 		v_eof(sp, &vp->m_start);
232 		return (1);
233 	}
234 
235 	/* Adjust the end of the range for motion commands. */
236 	vp->m_stop.lno = cs.cs_lno;
237 	vp->m_stop.cno = cs.cs_cno;
238 	if (ISMOTION(vp) && cs.cs_flags == 0)
239 		--vp->m_stop.cno;
240 
241 	/*
242 	 * Non-motion commands move to the end of the range.  Delete
243 	 * and yank stay at the start, ignore others.
244 	 */
245 	vp->m_final = ISMOTION(vp) ? vp->m_start : vp->m_stop;
246 	return (0);
247 }
248 
249 /*
250  * v_wordE -- [count]E
251  *	Move forward to the end of the bigword.
252  *
253  * PUBLIC: int v_wordE __P((SCR *, VICMD *));
254  */
255 int
v_wordE(SCR * sp,VICMD * vp)256 v_wordE(SCR *sp, VICMD *vp)
257 {
258 	return (eword(sp, vp, BIGWORD));
259 }
260 
261 /*
262  * v_worde -- [count]e
263  *	Move forward to the end of the word.
264  *
265  * PUBLIC: int v_worde __P((SCR *, VICMD *));
266  */
267 int
v_worde(SCR * sp,VICMD * vp)268 v_worde(SCR *sp, VICMD *vp)
269 {
270 	return (eword(sp, vp, LITTLEWORD));
271 }
272 
273 /*
274  * eword --
275  *	Move forward to the end of the word.
276  */
277 static int
eword(SCR * sp,VICMD * vp,enum which type)278 eword(SCR *sp, VICMD *vp, enum which type)
279 {
280 	enum { INWORD, NOTWORD } state;
281 	VCS cs;
282 	u_long cnt;
283 	int nmw, omw;
284 
285 	cnt = F_ISSET(vp, VC_C1SET) ? vp->count : 1;
286 	cs.cs_lno = vp->m_start.lno;
287 	cs.cs_cno = vp->m_start.cno;
288 	if (cs_init(sp, &cs))
289 		return (1);
290 
291 	/*
292 	 * !!!
293 	 * If in whitespace, or the next character is whitespace, move past
294 	 * it.  (This doesn't count as a word move.)  Stay at the character
295 	 * past the current one, it sets word "state" for the 'e' command.
296 	 */
297 	if (cs.cs_flags == 0 && !ISBLANK2(cs.cs_ch)) {
298 		if (cs_next(sp, &cs))
299 			return (1);
300 		if (cs.cs_flags == 0 && !ISBLANK2(cs.cs_ch))
301 			goto start;
302 	}
303 	if (cs_fblank(sp, &cs))
304 		return (1);
305 
306 	/*
307 	 * Cyclically move to the next word -- this involves skipping
308 	 * over word characters and then any trailing non-word characters.
309 	 * Note, for the 'e' command, the definition of a word keeps
310 	 * switching.
311 	 */
312 start:	if (type == BIGWORD)
313 		while (cnt--) {
314 			nmw = ISMULTIWIDTH(sp, cs.cs_ch);
315 			for (;;) {
316 				omw = nmw;
317 				if (cs_next(sp, &cs))
318 					return (1);
319 				if (cs.cs_flags == CS_EOF)
320 					goto ret;
321 				if (cs.cs_flags != 0 || ISBLANK2(cs.cs_ch) ||
322 				    (nmw = ISMULTIWIDTH(sp, cs.cs_ch)) != omw)
323 					break;
324 			}
325 			/*
326 			 * When we reach the start of the word after the last
327 			 * word, we're done.  If we changed state, back up one
328 			 * to the end of the previous word.
329 			 */
330 			if (cnt == 0) {
331 				if (cs.cs_flags == 0 && cs_prev(sp, &cs))
332 					return (1);
333 				break;
334 			}
335 
336 			/* Eat whitespace characters. */
337 			if (nmw == omw && cs_fblank(sp, &cs))
338 				return (1);
339 			if (cs.cs_flags == CS_EOF)
340 				goto ret;
341 		}
342 	else
343 		while (cnt--) {
344 			state = cs.cs_flags == 0 &&
345 			    inword(cs.cs_ch) ? INWORD : NOTWORD;
346 			nmw = ISMULTIWIDTH(sp, cs.cs_ch);
347 			for (;;) {
348 				omw = nmw;
349 				if (cs_next(sp, &cs))
350 					return (1);
351 				if (cs.cs_flags == CS_EOF)
352 					goto ret;
353 				if (cs.cs_flags != 0 || ISBLANK2(cs.cs_ch) ||
354 				    (nmw = ISMULTIWIDTH(sp, cs.cs_ch)) != omw)
355 					break;
356 				if (state == INWORD) {
357 					if (!inword(cs.cs_ch))
358 						break;
359 				} else
360 					if (inword(cs.cs_ch))
361 						break;
362 			}
363 			/* See comment above. */
364 			if (cnt == 0) {
365 				if (cs.cs_flags == 0 && cs_prev(sp, &cs))
366 					return (1);
367 				break;
368 			}
369 
370 			/* Eat whitespace characters. */
371 			if (cs.cs_flags != 0 || ISBLANK2(cs.cs_ch))
372 				if (cs_fblank(sp, &cs))
373 					return (1);
374 			if (cs.cs_flags == CS_EOF)
375 				goto ret;
376 		}
377 
378 	/*
379 	 * If we didn't move, we must be at EOF.
380 	 *
381 	 * !!!
382 	 * That's okay for motion commands, however.
383 	 */
384 ret:	if (!ISMOTION(vp) &&
385 	    cs.cs_lno == vp->m_start.lno && cs.cs_cno == vp->m_start.cno) {
386 		v_eof(sp, &vp->m_start);
387 		return (1);
388 	}
389 
390 	/* Set the end of the range for motion commands. */
391 	vp->m_stop.lno = cs.cs_lno;
392 	vp->m_stop.cno = cs.cs_cno;
393 
394 	/*
395 	 * Non-motion commands move to the end of the range.
396 	 * Delete and yank stay at the start, ignore others.
397 	 */
398 	vp->m_final = ISMOTION(vp) ? vp->m_start : vp->m_stop;
399 	return (0);
400 }
401 
402 /*
403  * v_WordB -- [count]B
404  *	Move backward a bigword at a time.
405  *
406  * PUBLIC: int v_wordB __P((SCR *, VICMD *));
407  */
408 int
v_wordB(SCR * sp,VICMD * vp)409 v_wordB(SCR *sp, VICMD *vp)
410 {
411 	return (bword(sp, vp, BIGWORD));
412 }
413 
414 /*
415  * v_wordb -- [count]b
416  *	Move backward a word at a time.
417  *
418  * PUBLIC: int v_wordb __P((SCR *, VICMD *));
419  */
420 int
v_wordb(SCR * sp,VICMD * vp)421 v_wordb(SCR *sp, VICMD *vp)
422 {
423 	return (bword(sp, vp, LITTLEWORD));
424 }
425 
426 /*
427  * bword --
428  *	Move backward by words.
429  */
430 static int
bword(SCR * sp,VICMD * vp,enum which type)431 bword(SCR *sp, VICMD *vp, enum which type)
432 {
433 	enum { INWORD, NOTWORD } state;
434 	VCS cs;
435 	u_long cnt;
436 	int nmw, omw;
437 
438 	cnt = F_ISSET(vp, VC_C1SET) ? vp->count : 1;
439 	cs.cs_lno = vp->m_start.lno;
440 	cs.cs_cno = vp->m_start.cno;
441 	if (cs_init(sp, &cs))
442 		return (1);
443 
444 	/*
445 	 * !!!
446 	 * If in whitespace, or the previous character is whitespace, move
447 	 * past it.  (This doesn't count as a word move.)  Stay at the
448 	 * character before the current one, it sets word "state" for the
449 	 * 'b' command.
450 	 */
451 	if (cs.cs_flags == 0 && !ISBLANK2(cs.cs_ch)) {
452 		if (cs_prev(sp, &cs))
453 			return (1);
454 		if (cs.cs_flags == 0 && !ISBLANK2(cs.cs_ch))
455 			goto start;
456 	}
457 	if (cs_bblank(sp, &cs))
458 		return (1);
459 
460 	/*
461 	 * Cyclically move to the beginning of the previous word -- this
462 	 * involves skipping over word characters and then any trailing
463 	 * non-word characters.  Note, for the 'b' command, the definition
464 	 * of a word keeps switching.
465 	 */
466 start:	if (type == BIGWORD)
467 		while (cnt--) {
468 			nmw = ISMULTIWIDTH(sp, cs.cs_ch);
469 			for (;;) {
470 				omw = nmw;
471 				if (cs_prev(sp, &cs))
472 					return (1);
473 				if (cs.cs_flags == CS_SOF)
474 					goto ret;
475 				if (cs.cs_flags != 0 || ISBLANK2(cs.cs_ch) ||
476 				    (nmw = ISMULTIWIDTH(sp, cs.cs_ch)) != omw)
477 					break;
478 			}
479 			/*
480 			 * When we reach the end of the word before the last
481 			 * word, we're done.  If we changed state, move forward
482 			 * one to the end of the next word.
483 			 */
484 			if (cnt == 0) {
485 				if (cs.cs_flags == 0 && cs_next(sp, &cs))
486 					return (1);
487 				break;
488 			}
489 
490 			/* Eat whitespace characters. */
491 			if (nmw == omw && cs_bblank(sp, &cs))
492 				return (1);
493 			if (cs.cs_flags == CS_SOF)
494 				goto ret;
495 		}
496 	else
497 		while (cnt--) {
498 			state = cs.cs_flags == 0 &&
499 			    inword(cs.cs_ch) ? INWORD : NOTWORD;
500 			nmw = ISMULTIWIDTH(sp, cs.cs_ch);
501 			for (;;) {
502 				omw = nmw;
503 				if (cs_prev(sp, &cs))
504 					return (1);
505 				if (cs.cs_flags == CS_SOF)
506 					goto ret;
507 				if (cs.cs_flags != 0 || ISBLANK2(cs.cs_ch) ||
508 				    (nmw = ISMULTIWIDTH(sp, cs.cs_ch)) != omw)
509 					break;
510 				if (state == INWORD) {
511 					if (!inword(cs.cs_ch))
512 						break;
513 				} else
514 					if (inword(cs.cs_ch))
515 						break;
516 			}
517 			/* See comment above. */
518 			if (cnt == 0) {
519 				if (cs.cs_flags == 0 && cs_next(sp, &cs))
520 					return (1);
521 				break;
522 			}
523 
524 			/* Eat whitespace characters. */
525 			if (cs.cs_flags != 0 || ISBLANK2(cs.cs_ch))
526 				if (cs_bblank(sp, &cs))
527 					return (1);
528 			if (cs.cs_flags == CS_SOF)
529 				goto ret;
530 		}
531 
532 	/* If we didn't move, we must be at SOF. */
533 ret:	if (cs.cs_lno == vp->m_start.lno && cs.cs_cno == vp->m_start.cno) {
534 		v_sof(sp, &vp->m_start);
535 		return (1);
536 	}
537 
538 	/* Set the end of the range for motion commands. */
539 	vp->m_stop.lno = cs.cs_lno;
540 	vp->m_stop.cno = cs.cs_cno;
541 
542 	/*
543 	 * All commands move to the end of the range.  Motion commands
544 	 * adjust the starting point to the character before the current
545 	 * one.
546 	 *
547 	 * !!!
548 	 * The historic vi didn't get this right -- the `yb' command yanked
549 	 * the right stuff and even updated the cursor value, but the cursor
550 	 * was not actually updated on the screen.
551 	 */
552 	vp->m_final = vp->m_stop;
553 	if (ISMOTION(vp))
554 		--vp->m_start.cno;
555 	return (0);
556 }
557