xref: /netbsd-src/external/bsd/nvi/dist/vi/v_word.c (revision aceb213538ec08a74028e213127af18aa17bf1cf)
1 /*	$NetBSD: v_word.c,v 1.2 2013/11/22 15:52:06 christos Exp $ */
2 /*-
3  * Copyright (c) 1992, 1993, 1994
4  *	The Regents of the University of California.  All rights reserved.
5  * Copyright (c) 1992, 1993, 1994, 1995, 1996
6  *	Keith Bostic.  All rights reserved.
7  *
8  * See the LICENSE file for redistribution information.
9  */
10 
11 #include "config.h"
12 
13 #ifndef lint
14 static const char sccsid[] = "Id: v_word.c,v 10.6 2001/06/25 15:19:36 skimo Exp  (Berkeley) Date: 2001/06/25 15:19:36 ";
15 #endif /* not lint */
16 
17 #include <sys/types.h>
18 #include <sys/queue.h>
19 #include <sys/time.h>
20 
21 #include <bitstring.h>
22 #include <ctype.h>
23 #include <limits.h>
24 #include <stdio.h>
25 
26 #include "../common/common.h"
27 #include "vi.h"
28 
29 /*
30  * There are two types of "words".  Bigwords are easy -- groups of anything
31  * delimited by whitespace.  Normal words are trickier.  They are either a
32  * group of characters, numbers and underscores, or a group of anything but,
33  * delimited by whitespace.  When for a word, if you're in whitespace, it's
34  * easy, just remove the whitespace and go to the beginning or end of the
35  * word.  Otherwise, figure out if the next character is in a different group.
36  * If it is, go to the beginning or end of that group, otherwise, go to the
37  * beginning or end of the current group.  The historic version of vi didn't
38  * get this right, so, for example, there were cases where "4e" was not the
39  * same as "eeee" -- in particular, single character words, and commands that
40  * began in whitespace were almost always handled incorrectly.  To get it right
41  * you have to resolve the cursor after each search so that the look-ahead to
42  * figure out what type of "word" the cursor is in will be correct.
43  *
44  * Empty lines, and lines that consist of only white-space characters count
45  * as a single word, and the beginning and end of the file counts as an
46  * infinite number of words.
47  *
48  * Movements associated with commands are different than movement commands.
49  * For example, in "abc  def", with the cursor on the 'a', "cw" is from
50  * 'a' to 'c', while "w" is from 'a' to 'd'.  In general, trailing white
51  * space is discarded from the change movement.  Another example is that,
52  * in the same string, a "cw" on any white space character replaces that
53  * single character, and nothing else.  Ain't nothin' in here that's easy.
54  *
55  * One historic note -- in the original vi, the 'w', 'W' and 'B' commands
56  * would treat groups of empty lines as individual words, i.e. the command
57  * would move the cursor to each new empty line.  The 'e' and 'E' commands
58  * would treat groups of empty lines as a single word, i.e. the first use
59  * would move past the group of lines.  The 'b' command would just beep at
60  * you, or, if you did it from the start of the line as part of a motion
61  * command, go absolutely nuts.  If the lines contained only white-space
62  * characters, the 'w' and 'W' commands would just beep at you, and the 'B',
63  * 'b', 'E' and 'e' commands would treat the group as a single word, and
64  * the 'B' and 'b' commands will treat the lines as individual words.  This
65  * implementation treats all of these cases as a single white-space word.
66  */
67 
68 enum which {BIGWORD, LITTLEWORD};
69 
70 static int bword __P((SCR *, VICMD *, enum which));
71 static int eword __P((SCR *, VICMD *, enum which));
72 static int fword __P((SCR *, VICMD *, enum which));
73 
74 /*
75  * v_wordW -- [count]W
76  *	Move forward a bigword at a time.
77  *
78  * PUBLIC: int v_wordW __P((SCR *, VICMD *));
79  */
80 int
81 v_wordW(SCR *sp, VICMD *vp)
82 {
83 	return (fword(sp, vp, BIGWORD));
84 }
85 
86 /*
87  * v_wordw -- [count]w
88  *	Move forward a word at a time.
89  *
90  * PUBLIC: int v_wordw __P((SCR *, VICMD *));
91  */
92 int
93 v_wordw(SCR *sp, VICMD *vp)
94 {
95 	return (fword(sp, vp, LITTLEWORD));
96 }
97 
98 /*
99  * fword --
100  *	Move forward by words.
101  */
102 static int
103 fword(SCR *sp, VICMD *vp, enum which type)
104 {
105 	enum { INWORD, NOTWORD } state;
106 	VCS cs;
107 	u_long cnt;
108 
109 	cnt = F_ISSET(vp, VC_C1SET) ? vp->count : 1;
110 	cs.cs_lno = vp->m_start.lno;
111 	cs.cs_cno = vp->m_start.cno;
112 	if (cs_init(sp, &cs))
113 		return (1);
114 
115 	/*
116 	 * If in white-space:
117 	 *	If the count is 1, and it's a change command, we're done.
118 	 *	Else, move to the first non-white-space character, which
119 	 *	counts as a single word move.  If it's a motion command,
120 	 *	don't move off the end of the line.
121 	 */
122 	if (cs.cs_flags == CS_EMP || (cs.cs_flags == 0 && ISBLANK2(cs.cs_ch))) {
123 		if (ISMOTION(vp) && cs.cs_flags != CS_EMP && cnt == 1) {
124 			if (ISCMD(vp->rkp, 'c'))
125 				return (0);
126 			if (ISCMD(vp->rkp, 'd') || ISCMD(vp->rkp, 'y')) {
127 				if (cs_fspace(sp, &cs))
128 					return (1);
129 				goto ret;
130 			}
131 		}
132 		if (cs_fblank(sp, &cs))
133 			return (1);
134 		--cnt;
135 	}
136 
137 	/*
138 	 * Cyclically move to the next word -- this involves skipping
139 	 * over word characters and then any trailing non-word characters.
140 	 * Note, for the 'w' command, the definition of a word keeps
141 	 * switching.
142 	 */
143 	if (type == BIGWORD)
144 		while (cnt--) {
145 			for (;;) {
146 				if (cs_next(sp, &cs))
147 					return (1);
148 				if (cs.cs_flags == CS_EOF)
149 					goto ret;
150 				if (cs.cs_flags != 0 || ISBLANK2(cs.cs_ch))
151 					break;
152 			}
153 			/*
154 			 * If a motion command and we're at the end of the
155 			 * last word, we're done.  Delete and yank eat any
156 			 * trailing blanks, but we don't move off the end
157 			 * of the line regardless.
158 			 */
159 			if (cnt == 0 && ISMOTION(vp)) {
160 				if ((ISCMD(vp->rkp, 'd') ||
161 				    ISCMD(vp->rkp, 'y')) &&
162 				    cs_fspace(sp, &cs))
163 					return (1);
164 				break;
165 			}
166 
167 			/* Eat whitespace characters. */
168 			if (cs_fblank(sp, &cs))
169 				return (1);
170 			if (cs.cs_flags == CS_EOF)
171 				goto ret;
172 		}
173 	else
174 		while (cnt--) {
175 			state = cs.cs_flags == 0 &&
176 			    inword(cs.cs_ch) ? INWORD : NOTWORD;
177 			for (;;) {
178 				if (cs_next(sp, &cs))
179 					return (1);
180 				if (cs.cs_flags == CS_EOF)
181 					goto ret;
182 				if (cs.cs_flags != 0 || ISBLANK2(cs.cs_ch))
183 					break;
184 				if (state == INWORD) {
185 					if (!inword(cs.cs_ch))
186 						break;
187 				} else
188 					if (inword(cs.cs_ch))
189 						break;
190 			}
191 			/* See comment above. */
192 			if (cnt == 0 && ISMOTION(vp)) {
193 				if ((ISCMD(vp->rkp, 'd') ||
194 				    ISCMD(vp->rkp, 'y')) &&
195 				    cs_fspace(sp, &cs))
196 					return (1);
197 				break;
198 			}
199 
200 			/* Eat whitespace characters. */
201 			if (cs.cs_flags != 0 || ISBLANK2(cs.cs_ch))
202 				if (cs_fblank(sp, &cs))
203 					return (1);
204 			if (cs.cs_flags == CS_EOF)
205 				goto ret;
206 		}
207 
208 	/*
209 	 * If we didn't move, we must be at EOF.
210 	 *
211 	 * !!!
212 	 * That's okay for motion commands, however.
213 	 */
214 ret:	if (!ISMOTION(vp) &&
215 	    cs.cs_lno == vp->m_start.lno && cs.cs_cno == vp->m_start.cno) {
216 		v_eof(sp, &vp->m_start);
217 		return (1);
218 	}
219 
220 	/* Adjust the end of the range for motion commands. */
221 	vp->m_stop.lno = cs.cs_lno;
222 	vp->m_stop.cno = cs.cs_cno;
223 	if (ISMOTION(vp) && cs.cs_flags == 0)
224 		--vp->m_stop.cno;
225 
226 	/*
227 	 * Non-motion commands move to the end of the range.  Delete
228 	 * and yank stay at the start, ignore others.
229 	 */
230 	vp->m_final = ISMOTION(vp) ? vp->m_start : vp->m_stop;
231 	return (0);
232 }
233 
234 /*
235  * v_wordE -- [count]E
236  *	Move forward to the end of the bigword.
237  *
238  * PUBLIC: int v_wordE __P((SCR *, VICMD *));
239  */
240 int
241 v_wordE(SCR *sp, VICMD *vp)
242 {
243 	return (eword(sp, vp, BIGWORD));
244 }
245 
246 /*
247  * v_worde -- [count]e
248  *	Move forward to the end of the word.
249  *
250  * PUBLIC: int v_worde __P((SCR *, VICMD *));
251  */
252 int
253 v_worde(SCR *sp, VICMD *vp)
254 {
255 	return (eword(sp, vp, LITTLEWORD));
256 }
257 
258 /*
259  * eword --
260  *	Move forward to the end of the word.
261  */
262 static int
263 eword(SCR *sp, VICMD *vp, enum which type)
264 {
265 	enum { INWORD, NOTWORD } state;
266 	VCS cs;
267 	u_long cnt;
268 
269 	cnt = F_ISSET(vp, VC_C1SET) ? vp->count : 1;
270 	cs.cs_lno = vp->m_start.lno;
271 	cs.cs_cno = vp->m_start.cno;
272 	if (cs_init(sp, &cs))
273 		return (1);
274 
275 	/*
276 	 * !!!
277 	 * If in whitespace, or the next character is whitespace, move past
278 	 * it.  (This doesn't count as a word move.)  Stay at the character
279 	 * past the current one, it sets word "state" for the 'e' command.
280 	 */
281 	if (cs.cs_flags == 0 && !ISBLANK2(cs.cs_ch)) {
282 		if (cs_next(sp, &cs))
283 			return (1);
284 		if (cs.cs_flags == 0 && !ISBLANK2(cs.cs_ch))
285 			goto start;
286 	}
287 	if (cs_fblank(sp, &cs))
288 		return (1);
289 
290 	/*
291 	 * Cyclically move to the next word -- this involves skipping
292 	 * over word characters and then any trailing non-word characters.
293 	 * Note, for the 'e' command, the definition of a word keeps
294 	 * switching.
295 	 */
296 start:	if (type == BIGWORD)
297 		while (cnt--) {
298 			for (;;) {
299 				if (cs_next(sp, &cs))
300 					return (1);
301 				if (cs.cs_flags == CS_EOF)
302 					goto ret;
303 				if (cs.cs_flags != 0 || ISBLANK2(cs.cs_ch))
304 					break;
305 			}
306 			/*
307 			 * When we reach the start of the word after the last
308 			 * word, we're done.  If we changed state, back up one
309 			 * to the end of the previous word.
310 			 */
311 			if (cnt == 0) {
312 				if (cs.cs_flags == 0 && cs_prev(sp, &cs))
313 					return (1);
314 				break;
315 			}
316 
317 			/* Eat whitespace characters. */
318 			if (cs_fblank(sp, &cs))
319 				return (1);
320 			if (cs.cs_flags == CS_EOF)
321 				goto ret;
322 		}
323 	else
324 		while (cnt--) {
325 			state = cs.cs_flags == 0 &&
326 			    inword(cs.cs_ch) ? INWORD : NOTWORD;
327 			for (;;) {
328 				if (cs_next(sp, &cs))
329 					return (1);
330 				if (cs.cs_flags == CS_EOF)
331 					goto ret;
332 				if (cs.cs_flags != 0 || ISBLANK2(cs.cs_ch))
333 					break;
334 				if (state == INWORD) {
335 					if (!inword(cs.cs_ch))
336 						break;
337 				} else
338 					if (inword(cs.cs_ch))
339 						break;
340 			}
341 			/* See comment above. */
342 			if (cnt == 0) {
343 				if (cs.cs_flags == 0 && cs_prev(sp, &cs))
344 					return (1);
345 				break;
346 			}
347 
348 			/* Eat whitespace characters. */
349 			if (cs.cs_flags != 0 || ISBLANK2(cs.cs_ch))
350 				if (cs_fblank(sp, &cs))
351 					return (1);
352 			if (cs.cs_flags == CS_EOF)
353 				goto ret;
354 		}
355 
356 	/*
357 	 * If we didn't move, we must be at EOF.
358 	 *
359 	 * !!!
360 	 * That's okay for motion commands, however.
361 	 */
362 ret:	if (!ISMOTION(vp) &&
363 	    cs.cs_lno == vp->m_start.lno && cs.cs_cno == vp->m_start.cno) {
364 		v_eof(sp, &vp->m_start);
365 		return (1);
366 	}
367 
368 	/* Set the end of the range for motion commands. */
369 	vp->m_stop.lno = cs.cs_lno;
370 	vp->m_stop.cno = cs.cs_cno;
371 
372 	/*
373 	 * Non-motion commands move to the end of the range.
374 	 * Delete and yank stay at the start, ignore others.
375 	 */
376 	vp->m_final = ISMOTION(vp) ? vp->m_start : vp->m_stop;
377 	return (0);
378 }
379 
380 /*
381  * v_WordB -- [count]B
382  *	Move backward a bigword at a time.
383  *
384  * PUBLIC: int v_wordB __P((SCR *, VICMD *));
385  */
386 int
387 v_wordB(SCR *sp, VICMD *vp)
388 {
389 	return (bword(sp, vp, BIGWORD));
390 }
391 
392 /*
393  * v_wordb -- [count]b
394  *	Move backward a word at a time.
395  *
396  * PUBLIC: int v_wordb __P((SCR *, VICMD *));
397  */
398 int
399 v_wordb(SCR *sp, VICMD *vp)
400 {
401 	return (bword(sp, vp, LITTLEWORD));
402 }
403 
404 /*
405  * bword --
406  *	Move backward by words.
407  */
408 static int
409 bword(SCR *sp, VICMD *vp, enum which type)
410 {
411 	enum { INWORD, NOTWORD } state;
412 	VCS cs;
413 	u_long cnt;
414 
415 	cnt = F_ISSET(vp, VC_C1SET) ? vp->count : 1;
416 	cs.cs_lno = vp->m_start.lno;
417 	cs.cs_cno = vp->m_start.cno;
418 	if (cs_init(sp, &cs))
419 		return (1);
420 
421 	/*
422 	 * !!!
423 	 * If in whitespace, or the previous character is whitespace, move
424 	 * past it.  (This doesn't count as a word move.)  Stay at the
425 	 * character before the current one, it sets word "state" for the
426 	 * 'b' command.
427 	 */
428 	if (cs.cs_flags == 0 && !ISBLANK2(cs.cs_ch)) {
429 		if (cs_prev(sp, &cs))
430 			return (1);
431 		if (cs.cs_flags == 0 && !ISBLANK2(cs.cs_ch))
432 			goto start;
433 	}
434 	if (cs_bblank(sp, &cs))
435 		return (1);
436 
437 	/*
438 	 * Cyclically move to the beginning of the previous word -- this
439 	 * involves skipping over word characters and then any trailing
440 	 * non-word characters.  Note, for the 'b' command, the definition
441 	 * of a word keeps switching.
442 	 */
443 start:	if (type == BIGWORD)
444 		while (cnt--) {
445 			for (;;) {
446 				if (cs_prev(sp, &cs))
447 					return (1);
448 				if (cs.cs_flags == CS_SOF)
449 					goto ret;
450 				if (cs.cs_flags != 0 || ISBLANK2(cs.cs_ch))
451 					break;
452 			}
453 			/*
454 			 * When we reach the end of the word before the last
455 			 * word, we're done.  If we changed state, move forward
456 			 * one to the end of the next word.
457 			 */
458 			if (cnt == 0) {
459 				if (cs.cs_flags == 0 && cs_next(sp, &cs))
460 					return (1);
461 				break;
462 			}
463 
464 			/* Eat whitespace characters. */
465 			if (cs_bblank(sp, &cs))
466 				return (1);
467 			if (cs.cs_flags == CS_SOF)
468 				goto ret;
469 		}
470 	else
471 		while (cnt--) {
472 			state = cs.cs_flags == 0 &&
473 			    inword(cs.cs_ch) ? INWORD : NOTWORD;
474 			for (;;) {
475 				if (cs_prev(sp, &cs))
476 					return (1);
477 				if (cs.cs_flags == CS_SOF)
478 					goto ret;
479 				if (cs.cs_flags != 0 || ISBLANK2(cs.cs_ch))
480 					break;
481 				if (state == INWORD) {
482 					if (!inword(cs.cs_ch))
483 						break;
484 				} else
485 					if (inword(cs.cs_ch))
486 						break;
487 			}
488 			/* See comment above. */
489 			if (cnt == 0) {
490 				if (cs.cs_flags == 0 && cs_next(sp, &cs))
491 					return (1);
492 				break;
493 			}
494 
495 			/* Eat whitespace characters. */
496 			if (cs.cs_flags != 0 || ISBLANK2(cs.cs_ch))
497 				if (cs_bblank(sp, &cs))
498 					return (1);
499 			if (cs.cs_flags == CS_SOF)
500 				goto ret;
501 		}
502 
503 	/* If we didn't move, we must be at SOF. */
504 ret:	if (cs.cs_lno == vp->m_start.lno && cs.cs_cno == vp->m_start.cno) {
505 		v_sof(sp, &vp->m_start);
506 		return (1);
507 	}
508 
509 	/* Set the end of the range for motion commands. */
510 	vp->m_stop.lno = cs.cs_lno;
511 	vp->m_stop.cno = cs.cs_cno;
512 
513 	/*
514 	 * All commands move to the end of the range.  Motion commands
515 	 * adjust the starting point to the character before the current
516 	 * one.
517 	 *
518 	 * !!!
519 	 * The historic vi didn't get this right -- the `yb' command yanked
520 	 * the right stuff and even updated the cursor value, but the cursor
521 	 * was not actually updated on the screen.
522 	 */
523 	vp->m_final = vp->m_stop;
524 	if (ISMOTION(vp))
525 		--vp->m_start.cno;
526 	return (0);
527 }
528