1 /* $NetBSD: v_word.c,v 1.4 2017/11/21 07:43:47 rin Exp $ */
2 /*-
3 * Copyright (c) 1992, 1993, 1994
4 * The Regents of the University of California. All rights reserved.
5 * Copyright (c) 1992, 1993, 1994, 1995, 1996
6 * Keith Bostic. All rights reserved.
7 *
8 * See the LICENSE file for redistribution information.
9 */
10
11 #include "config.h"
12
13 #include <sys/cdefs.h>
14 #if 0
15 #ifndef lint
16 static const char sccsid[] = "Id: v_word.c,v 10.6 2001/06/25 15:19:36 skimo Exp (Berkeley) Date: 2001/06/25 15:19:36 ";
17 #endif /* not lint */
18 #else
19 __RCSID("$NetBSD: v_word.c,v 1.4 2017/11/21 07:43:47 rin Exp $");
20 #endif
21
22 #include <sys/types.h>
23 #include <sys/queue.h>
24 #include <sys/time.h>
25
26 #include <bitstring.h>
27 #include <ctype.h>
28 #include <limits.h>
29 #include <stdio.h>
30
31 #include "../common/common.h"
32 #include "vi.h"
33
34 /*
35 * There are two types of "words". Bigwords are easy -- groups of anything
36 * delimited by whitespace. Normal words are trickier. They are either a
37 * group of characters, numbers and underscores, or a group of anything but,
38 * delimited by whitespace. When for a word, if you're in whitespace, it's
39 * easy, just remove the whitespace and go to the beginning or end of the
40 * word. Otherwise, figure out if the next character is in a different group.
41 * If it is, go to the beginning or end of that group, otherwise, go to the
42 * beginning or end of the current group. The historic version of vi didn't
43 * get this right, so, for example, there were cases where "4e" was not the
44 * same as "eeee" -- in particular, single character words, and commands that
45 * began in whitespace were almost always handled incorrectly. To get it right
46 * you have to resolve the cursor after each search so that the look-ahead to
47 * figure out what type of "word" the cursor is in will be correct.
48 *
49 * Empty lines, and lines that consist of only white-space characters count
50 * as a single word, and the beginning and end of the file counts as an
51 * infinite number of words.
52 *
53 * Movements associated with commands are different than movement commands.
54 * For example, in "abc def", with the cursor on the 'a', "cw" is from
55 * 'a' to 'c', while "w" is from 'a' to 'd'. In general, trailing white
56 * space is discarded from the change movement. Another example is that,
57 * in the same string, a "cw" on any white space character replaces that
58 * single character, and nothing else. Ain't nothin' in here that's easy.
59 *
60 * One historic note -- in the original vi, the 'w', 'W' and 'B' commands
61 * would treat groups of empty lines as individual words, i.e. the command
62 * would move the cursor to each new empty line. The 'e' and 'E' commands
63 * would treat groups of empty lines as a single word, i.e. the first use
64 * would move past the group of lines. The 'b' command would just beep at
65 * you, or, if you did it from the start of the line as part of a motion
66 * command, go absolutely nuts. If the lines contained only white-space
67 * characters, the 'w' and 'W' commands would just beep at you, and the 'B',
68 * 'b', 'E' and 'e' commands would treat the group as a single word, and
69 * the 'B' and 'b' commands will treat the lines as individual words. This
70 * implementation treats all of these cases as a single white-space word.
71 *
72 * We regard a boundary between single- and multi-width characters as
73 * a word boundary.
74 */
75
76 enum which {BIGWORD, LITTLEWORD};
77
78 static int bword __P((SCR *, VICMD *, enum which));
79 static int eword __P((SCR *, VICMD *, enum which));
80 static int fword __P((SCR *, VICMD *, enum which));
81
82 /*
83 * v_wordW -- [count]W
84 * Move forward a bigword at a time.
85 *
86 * PUBLIC: int v_wordW __P((SCR *, VICMD *));
87 */
88 int
v_wordW(SCR * sp,VICMD * vp)89 v_wordW(SCR *sp, VICMD *vp)
90 {
91 return (fword(sp, vp, BIGWORD));
92 }
93
94 /*
95 * v_wordw -- [count]w
96 * Move forward a word at a time.
97 *
98 * PUBLIC: int v_wordw __P((SCR *, VICMD *));
99 */
100 int
v_wordw(SCR * sp,VICMD * vp)101 v_wordw(SCR *sp, VICMD *vp)
102 {
103 return (fword(sp, vp, LITTLEWORD));
104 }
105
106 /*
107 * fword --
108 * Move forward by words.
109 */
110 static int
fword(SCR * sp,VICMD * vp,enum which type)111 fword(SCR *sp, VICMD *vp, enum which type)
112 {
113 enum { INWORD, NOTWORD } state;
114 VCS cs;
115 u_long cnt;
116 int nmw, omw;
117
118 cnt = F_ISSET(vp, VC_C1SET) ? vp->count : 1;
119 cs.cs_lno = vp->m_start.lno;
120 cs.cs_cno = vp->m_start.cno;
121 if (cs_init(sp, &cs))
122 return (1);
123
124 /*
125 * If in white-space:
126 * If the count is 1, and it's a change command, we're done.
127 * Else, move to the first non-white-space character, which
128 * counts as a single word move. If it's a motion command,
129 * don't move off the end of the line.
130 */
131 if (cs.cs_flags == CS_EMP || (cs.cs_flags == 0 && ISBLANK2(cs.cs_ch))) {
132 if (ISMOTION(vp) && cs.cs_flags != CS_EMP && cnt == 1) {
133 if (ISCMD(vp->rkp, 'c'))
134 return (0);
135 if (ISCMD(vp->rkp, 'd') || ISCMD(vp->rkp, 'y')) {
136 if (cs_fspace(sp, &cs))
137 return (1);
138 goto ret;
139 }
140 }
141 if (cs_fblank(sp, &cs))
142 return (1);
143 --cnt;
144 }
145
146 /*
147 * Cyclically move to the next word -- this involves skipping
148 * over word characters and then any trailing non-word characters.
149 * Note, for the 'w' command, the definition of a word keeps
150 * switching.
151 */
152 if (type == BIGWORD)
153 while (cnt--) {
154 nmw = ISMULTIWIDTH(sp, cs.cs_ch);
155 for (;;) {
156 omw = nmw;
157 if (cs_next(sp, &cs))
158 return (1);
159 if (cs.cs_flags == CS_EOF)
160 goto ret;
161 if (cs.cs_flags != 0 || ISBLANK2(cs.cs_ch) ||
162 (nmw = ISMULTIWIDTH(sp, cs.cs_ch)) != omw)
163 break;
164 }
165 /*
166 * If a motion command and we're at the end of the
167 * last word, we're done. Delete and yank eat any
168 * trailing blanks, but we don't move off the end
169 * of the line regardless.
170 */
171 if (cnt == 0 && ISMOTION(vp)) {
172 if ((ISCMD(vp->rkp, 'd') ||
173 ISCMD(vp->rkp, 'y')) &&
174 cs_fspace(sp, &cs))
175 return (1);
176 break;
177 }
178
179 /* Eat whitespace characters. */
180 if (nmw == omw && cs_fblank(sp, &cs))
181 return (1);
182 if (cs.cs_flags == CS_EOF)
183 goto ret;
184 }
185 else
186 while (cnt--) {
187 state = cs.cs_flags == 0 &&
188 inword(cs.cs_ch) ? INWORD : NOTWORD;
189 nmw = ISMULTIWIDTH(sp, cs.cs_ch);
190 for (;;) {
191 omw = nmw;
192 if (cs_next(sp, &cs))
193 return (1);
194 if (cs.cs_flags == CS_EOF)
195 goto ret;
196 if (cs.cs_flags != 0 || ISBLANK2(cs.cs_ch) ||
197 (nmw = ISMULTIWIDTH(sp, cs.cs_ch)) != omw)
198 break;
199 if (state == INWORD) {
200 if (!inword(cs.cs_ch))
201 break;
202 } else
203 if (inword(cs.cs_ch))
204 break;
205 }
206 /* See comment above. */
207 if (cnt == 0 && ISMOTION(vp)) {
208 if ((ISCMD(vp->rkp, 'd') ||
209 ISCMD(vp->rkp, 'y')) &&
210 cs_fspace(sp, &cs))
211 return (1);
212 break;
213 }
214
215 /* Eat whitespace characters. */
216 if (cs.cs_flags != 0 || ISBLANK2(cs.cs_ch))
217 if (cs_fblank(sp, &cs))
218 return (1);
219 if (cs.cs_flags == CS_EOF)
220 goto ret;
221 }
222
223 /*
224 * If we didn't move, we must be at EOF.
225 *
226 * !!!
227 * That's okay for motion commands, however.
228 */
229 ret: if (!ISMOTION(vp) &&
230 cs.cs_lno == vp->m_start.lno && cs.cs_cno == vp->m_start.cno) {
231 v_eof(sp, &vp->m_start);
232 return (1);
233 }
234
235 /* Adjust the end of the range for motion commands. */
236 vp->m_stop.lno = cs.cs_lno;
237 vp->m_stop.cno = cs.cs_cno;
238 if (ISMOTION(vp) && cs.cs_flags == 0)
239 --vp->m_stop.cno;
240
241 /*
242 * Non-motion commands move to the end of the range. Delete
243 * and yank stay at the start, ignore others.
244 */
245 vp->m_final = ISMOTION(vp) ? vp->m_start : vp->m_stop;
246 return (0);
247 }
248
249 /*
250 * v_wordE -- [count]E
251 * Move forward to the end of the bigword.
252 *
253 * PUBLIC: int v_wordE __P((SCR *, VICMD *));
254 */
255 int
v_wordE(SCR * sp,VICMD * vp)256 v_wordE(SCR *sp, VICMD *vp)
257 {
258 return (eword(sp, vp, BIGWORD));
259 }
260
261 /*
262 * v_worde -- [count]e
263 * Move forward to the end of the word.
264 *
265 * PUBLIC: int v_worde __P((SCR *, VICMD *));
266 */
267 int
v_worde(SCR * sp,VICMD * vp)268 v_worde(SCR *sp, VICMD *vp)
269 {
270 return (eword(sp, vp, LITTLEWORD));
271 }
272
273 /*
274 * eword --
275 * Move forward to the end of the word.
276 */
277 static int
eword(SCR * sp,VICMD * vp,enum which type)278 eword(SCR *sp, VICMD *vp, enum which type)
279 {
280 enum { INWORD, NOTWORD } state;
281 VCS cs;
282 u_long cnt;
283 int nmw, omw;
284
285 cnt = F_ISSET(vp, VC_C1SET) ? vp->count : 1;
286 cs.cs_lno = vp->m_start.lno;
287 cs.cs_cno = vp->m_start.cno;
288 if (cs_init(sp, &cs))
289 return (1);
290
291 /*
292 * !!!
293 * If in whitespace, or the next character is whitespace, move past
294 * it. (This doesn't count as a word move.) Stay at the character
295 * past the current one, it sets word "state" for the 'e' command.
296 */
297 if (cs.cs_flags == 0 && !ISBLANK2(cs.cs_ch)) {
298 if (cs_next(sp, &cs))
299 return (1);
300 if (cs.cs_flags == 0 && !ISBLANK2(cs.cs_ch))
301 goto start;
302 }
303 if (cs_fblank(sp, &cs))
304 return (1);
305
306 /*
307 * Cyclically move to the next word -- this involves skipping
308 * over word characters and then any trailing non-word characters.
309 * Note, for the 'e' command, the definition of a word keeps
310 * switching.
311 */
312 start: if (type == BIGWORD)
313 while (cnt--) {
314 nmw = ISMULTIWIDTH(sp, cs.cs_ch);
315 for (;;) {
316 omw = nmw;
317 if (cs_next(sp, &cs))
318 return (1);
319 if (cs.cs_flags == CS_EOF)
320 goto ret;
321 if (cs.cs_flags != 0 || ISBLANK2(cs.cs_ch) ||
322 (nmw = ISMULTIWIDTH(sp, cs.cs_ch)) != omw)
323 break;
324 }
325 /*
326 * When we reach the start of the word after the last
327 * word, we're done. If we changed state, back up one
328 * to the end of the previous word.
329 */
330 if (cnt == 0) {
331 if (cs.cs_flags == 0 && cs_prev(sp, &cs))
332 return (1);
333 break;
334 }
335
336 /* Eat whitespace characters. */
337 if (nmw == omw && cs_fblank(sp, &cs))
338 return (1);
339 if (cs.cs_flags == CS_EOF)
340 goto ret;
341 }
342 else
343 while (cnt--) {
344 state = cs.cs_flags == 0 &&
345 inword(cs.cs_ch) ? INWORD : NOTWORD;
346 nmw = ISMULTIWIDTH(sp, cs.cs_ch);
347 for (;;) {
348 omw = nmw;
349 if (cs_next(sp, &cs))
350 return (1);
351 if (cs.cs_flags == CS_EOF)
352 goto ret;
353 if (cs.cs_flags != 0 || ISBLANK2(cs.cs_ch) ||
354 (nmw = ISMULTIWIDTH(sp, cs.cs_ch)) != omw)
355 break;
356 if (state == INWORD) {
357 if (!inword(cs.cs_ch))
358 break;
359 } else
360 if (inword(cs.cs_ch))
361 break;
362 }
363 /* See comment above. */
364 if (cnt == 0) {
365 if (cs.cs_flags == 0 && cs_prev(sp, &cs))
366 return (1);
367 break;
368 }
369
370 /* Eat whitespace characters. */
371 if (cs.cs_flags != 0 || ISBLANK2(cs.cs_ch))
372 if (cs_fblank(sp, &cs))
373 return (1);
374 if (cs.cs_flags == CS_EOF)
375 goto ret;
376 }
377
378 /*
379 * If we didn't move, we must be at EOF.
380 *
381 * !!!
382 * That's okay for motion commands, however.
383 */
384 ret: if (!ISMOTION(vp) &&
385 cs.cs_lno == vp->m_start.lno && cs.cs_cno == vp->m_start.cno) {
386 v_eof(sp, &vp->m_start);
387 return (1);
388 }
389
390 /* Set the end of the range for motion commands. */
391 vp->m_stop.lno = cs.cs_lno;
392 vp->m_stop.cno = cs.cs_cno;
393
394 /*
395 * Non-motion commands move to the end of the range.
396 * Delete and yank stay at the start, ignore others.
397 */
398 vp->m_final = ISMOTION(vp) ? vp->m_start : vp->m_stop;
399 return (0);
400 }
401
402 /*
403 * v_WordB -- [count]B
404 * Move backward a bigword at a time.
405 *
406 * PUBLIC: int v_wordB __P((SCR *, VICMD *));
407 */
408 int
v_wordB(SCR * sp,VICMD * vp)409 v_wordB(SCR *sp, VICMD *vp)
410 {
411 return (bword(sp, vp, BIGWORD));
412 }
413
414 /*
415 * v_wordb -- [count]b
416 * Move backward a word at a time.
417 *
418 * PUBLIC: int v_wordb __P((SCR *, VICMD *));
419 */
420 int
v_wordb(SCR * sp,VICMD * vp)421 v_wordb(SCR *sp, VICMD *vp)
422 {
423 return (bword(sp, vp, LITTLEWORD));
424 }
425
426 /*
427 * bword --
428 * Move backward by words.
429 */
430 static int
bword(SCR * sp,VICMD * vp,enum which type)431 bword(SCR *sp, VICMD *vp, enum which type)
432 {
433 enum { INWORD, NOTWORD } state;
434 VCS cs;
435 u_long cnt;
436 int nmw, omw;
437
438 cnt = F_ISSET(vp, VC_C1SET) ? vp->count : 1;
439 cs.cs_lno = vp->m_start.lno;
440 cs.cs_cno = vp->m_start.cno;
441 if (cs_init(sp, &cs))
442 return (1);
443
444 /*
445 * !!!
446 * If in whitespace, or the previous character is whitespace, move
447 * past it. (This doesn't count as a word move.) Stay at the
448 * character before the current one, it sets word "state" for the
449 * 'b' command.
450 */
451 if (cs.cs_flags == 0 && !ISBLANK2(cs.cs_ch)) {
452 if (cs_prev(sp, &cs))
453 return (1);
454 if (cs.cs_flags == 0 && !ISBLANK2(cs.cs_ch))
455 goto start;
456 }
457 if (cs_bblank(sp, &cs))
458 return (1);
459
460 /*
461 * Cyclically move to the beginning of the previous word -- this
462 * involves skipping over word characters and then any trailing
463 * non-word characters. Note, for the 'b' command, the definition
464 * of a word keeps switching.
465 */
466 start: if (type == BIGWORD)
467 while (cnt--) {
468 nmw = ISMULTIWIDTH(sp, cs.cs_ch);
469 for (;;) {
470 omw = nmw;
471 if (cs_prev(sp, &cs))
472 return (1);
473 if (cs.cs_flags == CS_SOF)
474 goto ret;
475 if (cs.cs_flags != 0 || ISBLANK2(cs.cs_ch) ||
476 (nmw = ISMULTIWIDTH(sp, cs.cs_ch)) != omw)
477 break;
478 }
479 /*
480 * When we reach the end of the word before the last
481 * word, we're done. If we changed state, move forward
482 * one to the end of the next word.
483 */
484 if (cnt == 0) {
485 if (cs.cs_flags == 0 && cs_next(sp, &cs))
486 return (1);
487 break;
488 }
489
490 /* Eat whitespace characters. */
491 if (nmw == omw && cs_bblank(sp, &cs))
492 return (1);
493 if (cs.cs_flags == CS_SOF)
494 goto ret;
495 }
496 else
497 while (cnt--) {
498 state = cs.cs_flags == 0 &&
499 inword(cs.cs_ch) ? INWORD : NOTWORD;
500 nmw = ISMULTIWIDTH(sp, cs.cs_ch);
501 for (;;) {
502 omw = nmw;
503 if (cs_prev(sp, &cs))
504 return (1);
505 if (cs.cs_flags == CS_SOF)
506 goto ret;
507 if (cs.cs_flags != 0 || ISBLANK2(cs.cs_ch) ||
508 (nmw = ISMULTIWIDTH(sp, cs.cs_ch)) != omw)
509 break;
510 if (state == INWORD) {
511 if (!inword(cs.cs_ch))
512 break;
513 } else
514 if (inword(cs.cs_ch))
515 break;
516 }
517 /* See comment above. */
518 if (cnt == 0) {
519 if (cs.cs_flags == 0 && cs_next(sp, &cs))
520 return (1);
521 break;
522 }
523
524 /* Eat whitespace characters. */
525 if (cs.cs_flags != 0 || ISBLANK2(cs.cs_ch))
526 if (cs_bblank(sp, &cs))
527 return (1);
528 if (cs.cs_flags == CS_SOF)
529 goto ret;
530 }
531
532 /* If we didn't move, we must be at SOF. */
533 ret: if (cs.cs_lno == vp->m_start.lno && cs.cs_cno == vp->m_start.cno) {
534 v_sof(sp, &vp->m_start);
535 return (1);
536 }
537
538 /* Set the end of the range for motion commands. */
539 vp->m_stop.lno = cs.cs_lno;
540 vp->m_stop.cno = cs.cs_cno;
541
542 /*
543 * All commands move to the end of the range. Motion commands
544 * adjust the starting point to the character before the current
545 * one.
546 *
547 * !!!
548 * The historic vi didn't get this right -- the `yb' command yanked
549 * the right stuff and even updated the cursor value, but the cursor
550 * was not actually updated on the screen.
551 */
552 vp->m_final = vp->m_stop;
553 if (ISMOTION(vp))
554 --vp->m_start.cno;
555 return (0);
556 }
557