1 /* $OpenBSD: v_word.c,v 1.7 2014/11/12 04:28:41 bentley Exp $ */
2
3 /*-
4 * Copyright (c) 1992, 1993, 1994
5 * The Regents of the University of California. All rights reserved.
6 * Copyright (c) 1992, 1993, 1994, 1995, 1996
7 * Keith Bostic. All rights reserved.
8 *
9 * See the LICENSE file for redistribution information.
10 */
11
12 #include "config.h"
13
14 #include <sys/types.h>
15 #include <sys/queue.h>
16 #include <sys/time.h>
17
18 #include <bitstring.h>
19 #include <ctype.h>
20 #include <limits.h>
21 #include <stdio.h>
22
23 #include "../common/common.h"
24 #include "vi.h"
25
26 /*
27 * There are two types of "words". Bigwords are easy -- groups of anything
28 * delimited by whitespace. Normal words are trickier. They are either a
29 * group of characters, numbers and underscores, or a group of anything but,
30 * delimited by whitespace. When for a word, if you're in whitespace, it's
31 * easy, just remove the whitespace and go to the beginning or end of the
32 * word. Otherwise, figure out if the next character is in a different group.
33 * If it is, go to the beginning or end of that group, otherwise, go to the
34 * beginning or end of the current group. The historic version of vi didn't
35 * get this right, so, for example, there were cases where "4e" was not the
36 * same as "eeee" -- in particular, single character words, and commands that
37 * began in whitespace were almost always handled incorrectly. To get it right
38 * you have to resolve the cursor after each search so that the look-ahead to
39 * figure out what type of "word" the cursor is in will be correct.
40 *
41 * Empty lines, and lines that consist of only white-space characters count
42 * as a single word, and the beginning and end of the file counts as an
43 * infinite number of words.
44 *
45 * Movements associated with commands are different than movement commands.
46 * For example, in "abc def", with the cursor on the 'a', "cw" is from
47 * 'a' to 'c', while "w" is from 'a' to 'd'. In general, trailing white
48 * space is discarded from the change movement. Another example is that,
49 * in the same string, a "cw" on any white space character replaces that
50 * single character, and nothing else. Ain't nothin' in here that's easy.
51 *
52 * One historic note -- in the original vi, the 'w', 'W' and 'B' commands
53 * would treat groups of empty lines as individual words, i.e. the command
54 * would move the cursor to each new empty line. The 'e' and 'E' commands
55 * would treat groups of empty lines as a single word, i.e. the first use
56 * would move past the group of lines. The 'b' command would just beep at
57 * you, or, if you did it from the start of the line as part of a motion
58 * command, go absolutely nuts. If the lines contained only white-space
59 * characters, the 'w' and 'W' commands would just beep at you, and the 'B',
60 * 'b', 'E' and 'e' commands would treat the group as a single word, and
61 * the 'B' and 'b' commands will treat the lines as individual words. This
62 * implementation treats all of these cases as a single white-space word.
63 */
64
65 enum which {BIGWORD, LITTLEWORD};
66
67 static int bword(SCR *, VICMD *, enum which);
68 static int eword(SCR *, VICMD *, enum which);
69 static int fword(SCR *, VICMD *, enum which);
70
71 /*
72 * v_wordW -- [count]W
73 * Move forward a bigword at a time.
74 *
75 * PUBLIC: int v_wordW(SCR *, VICMD *);
76 */
77 int
v_wordW(SCR * sp,VICMD * vp)78 v_wordW(SCR *sp, VICMD *vp)
79 {
80 return (fword(sp, vp, BIGWORD));
81 }
82
83 /*
84 * v_wordw -- [count]w
85 * Move forward a word at a time.
86 *
87 * PUBLIC: int v_wordw(SCR *, VICMD *);
88 */
89 int
v_wordw(SCR * sp,VICMD * vp)90 v_wordw(SCR *sp, VICMD *vp)
91 {
92 return (fword(sp, vp, LITTLEWORD));
93 }
94
95 /*
96 * fword --
97 * Move forward by words.
98 */
99 static int
fword(SCR * sp,VICMD * vp,enum which type)100 fword(SCR *sp, VICMD *vp, enum which type)
101 {
102 enum { INWORD, NOTWORD } state;
103 VCS cs;
104 u_long cnt;
105
106 cnt = F_ISSET(vp, VC_C1SET) ? vp->count : 1;
107 cs.cs_lno = vp->m_start.lno;
108 cs.cs_cno = vp->m_start.cno;
109 if (cs_init(sp, &cs))
110 return (1);
111
112 /*
113 * If in white-space:
114 * If the count is 1, and it's a change command, we're done.
115 * Else, move to the first non-white-space character, which
116 * counts as a single word move. If it's a motion command,
117 * don't move off the end of the line.
118 */
119 if (cs.cs_flags == CS_EMP || (cs.cs_flags == 0 && isblank(cs.cs_ch))) {
120 if (ISMOTION(vp) && cs.cs_flags != CS_EMP && cnt == 1) {
121 if (ISCMD(vp->rkp, 'c'))
122 return (0);
123 if (ISCMD(vp->rkp, 'd') || ISCMD(vp->rkp, 'y')) {
124 if (cs_fspace(sp, &cs))
125 return (1);
126 goto ret;
127 }
128 }
129 if (cs_fblank(sp, &cs))
130 return (1);
131 --cnt;
132 }
133
134 /*
135 * Cyclically move to the next word -- this involves skipping
136 * over word characters and then any trailing non-word characters.
137 * Note, for the 'w' command, the definition of a word keeps
138 * switching.
139 */
140 if (type == BIGWORD)
141 while (cnt--) {
142 for (;;) {
143 if (cs_next(sp, &cs))
144 return (1);
145 if (cs.cs_flags == CS_EOF)
146 goto ret;
147 if (cs.cs_flags != 0 || isblank(cs.cs_ch))
148 break;
149 }
150 /*
151 * If a motion command and we're at the end of the
152 * last word, we're done. Delete and yank eat any
153 * trailing blanks, but we don't move off the end
154 * of the line regardless.
155 */
156 if (cnt == 0 && ISMOTION(vp)) {
157 if ((ISCMD(vp->rkp, 'd') ||
158 ISCMD(vp->rkp, 'y')) &&
159 cs_fspace(sp, &cs))
160 return (1);
161 break;
162 }
163
164 /* Eat whitespace characters. */
165 if (cs_fblank(sp, &cs))
166 return (1);
167 if (cs.cs_flags == CS_EOF)
168 goto ret;
169 }
170 else
171 while (cnt--) {
172 state = cs.cs_flags == 0 &&
173 inword(cs.cs_ch) ? INWORD : NOTWORD;
174 for (;;) {
175 if (cs_next(sp, &cs))
176 return (1);
177 if (cs.cs_flags == CS_EOF)
178 goto ret;
179 if (cs.cs_flags != 0 || isblank(cs.cs_ch))
180 break;
181 if (state == INWORD) {
182 if (!inword(cs.cs_ch))
183 break;
184 } else
185 if (inword(cs.cs_ch))
186 break;
187 }
188 /* See comment above. */
189 if (cnt == 0 && ISMOTION(vp)) {
190 if ((ISCMD(vp->rkp, 'd') ||
191 ISCMD(vp->rkp, 'y')) &&
192 cs_fspace(sp, &cs))
193 return (1);
194 break;
195 }
196
197 /* Eat whitespace characters. */
198 if (cs.cs_flags != 0 || isblank(cs.cs_ch))
199 if (cs_fblank(sp, &cs))
200 return (1);
201 if (cs.cs_flags == CS_EOF)
202 goto ret;
203 }
204
205 /*
206 * If we didn't move, we must be at EOF.
207 *
208 * !!!
209 * That's okay for motion commands, however.
210 */
211 ret: if (!ISMOTION(vp) &&
212 cs.cs_lno == vp->m_start.lno && cs.cs_cno == vp->m_start.cno) {
213 v_eof(sp, &vp->m_start);
214 return (1);
215 }
216
217 /* Adjust the end of the range for motion commands. */
218 vp->m_stop.lno = cs.cs_lno;
219 vp->m_stop.cno = cs.cs_cno;
220 if (ISMOTION(vp) && cs.cs_flags == 0)
221 --vp->m_stop.cno;
222
223 /*
224 * Non-motion commands move to the end of the range. Delete
225 * and yank stay at the start, ignore others.
226 */
227 vp->m_final = ISMOTION(vp) ? vp->m_start : vp->m_stop;
228 return (0);
229 }
230
231 /*
232 * v_wordE -- [count]E
233 * Move forward to the end of the bigword.
234 *
235 * PUBLIC: int v_wordE(SCR *, VICMD *);
236 */
237 int
v_wordE(SCR * sp,VICMD * vp)238 v_wordE(SCR *sp, VICMD *vp)
239 {
240 return (eword(sp, vp, BIGWORD));
241 }
242
243 /*
244 * v_worde -- [count]e
245 * Move forward to the end of the word.
246 *
247 * PUBLIC: int v_worde(SCR *, VICMD *);
248 */
249 int
v_worde(SCR * sp,VICMD * vp)250 v_worde(SCR *sp, VICMD *vp)
251 {
252 return (eword(sp, vp, LITTLEWORD));
253 }
254
255 /*
256 * eword --
257 * Move forward to the end of the word.
258 */
259 static int
eword(SCR * sp,VICMD * vp,enum which type)260 eword(SCR *sp, VICMD *vp, enum which type)
261 {
262 enum { INWORD, NOTWORD } state;
263 VCS cs;
264 u_long cnt;
265
266 cnt = F_ISSET(vp, VC_C1SET) ? vp->count : 1;
267 cs.cs_lno = vp->m_start.lno;
268 cs.cs_cno = vp->m_start.cno;
269 if (cs_init(sp, &cs))
270 return (1);
271
272 /*
273 * !!!
274 * If in whitespace, or the next character is whitespace, move past
275 * it. (This doesn't count as a word move.) Stay at the character
276 * past the current one, it sets word "state" for the 'e' command.
277 */
278 if (cs.cs_flags == 0 && !isblank(cs.cs_ch)) {
279 if (cs_next(sp, &cs))
280 return (1);
281 if (cs.cs_flags == 0 && !isblank(cs.cs_ch))
282 goto start;
283 }
284 if (cs_fblank(sp, &cs))
285 return (1);
286
287 /*
288 * Cyclically move to the next word -- this involves skipping
289 * over word characters and then any trailing non-word characters.
290 * Note, for the 'e' command, the definition of a word keeps
291 * switching.
292 */
293 start: if (type == BIGWORD)
294 while (cnt--) {
295 for (;;) {
296 if (cs_next(sp, &cs))
297 return (1);
298 if (cs.cs_flags == CS_EOF)
299 goto ret;
300 if (cs.cs_flags != 0 || isblank(cs.cs_ch))
301 break;
302 }
303 /*
304 * When we reach the start of the word after the last
305 * word, we're done. If we changed state, back up one
306 * to the end of the previous word.
307 */
308 if (cnt == 0) {
309 if (cs.cs_flags == 0 && cs_prev(sp, &cs))
310 return (1);
311 break;
312 }
313
314 /* Eat whitespace characters. */
315 if (cs_fblank(sp, &cs))
316 return (1);
317 if (cs.cs_flags == CS_EOF)
318 goto ret;
319 }
320 else
321 while (cnt--) {
322 state = cs.cs_flags == 0 &&
323 inword(cs.cs_ch) ? INWORD : NOTWORD;
324 for (;;) {
325 if (cs_next(sp, &cs))
326 return (1);
327 if (cs.cs_flags == CS_EOF)
328 goto ret;
329 if (cs.cs_flags != 0 || isblank(cs.cs_ch))
330 break;
331 if (state == INWORD) {
332 if (!inword(cs.cs_ch))
333 break;
334 } else
335 if (inword(cs.cs_ch))
336 break;
337 }
338 /* See comment above. */
339 if (cnt == 0) {
340 if (cs.cs_flags == 0 && cs_prev(sp, &cs))
341 return (1);
342 break;
343 }
344
345 /* Eat whitespace characters. */
346 if (cs.cs_flags != 0 || isblank(cs.cs_ch))
347 if (cs_fblank(sp, &cs))
348 return (1);
349 if (cs.cs_flags == CS_EOF)
350 goto ret;
351 }
352
353 /*
354 * If we didn't move, we must be at EOF.
355 *
356 * !!!
357 * That's okay for motion commands, however.
358 */
359 ret: if (!ISMOTION(vp) &&
360 cs.cs_lno == vp->m_start.lno && cs.cs_cno == vp->m_start.cno) {
361 v_eof(sp, &vp->m_start);
362 return (1);
363 }
364
365 /* Set the end of the range for motion commands. */
366 vp->m_stop.lno = cs.cs_lno;
367 vp->m_stop.cno = cs.cs_cno;
368
369 /*
370 * Non-motion commands move to the end of the range.
371 * Delete and yank stay at the start, ignore others.
372 */
373 vp->m_final = ISMOTION(vp) ? vp->m_start : vp->m_stop;
374 return (0);
375 }
376
377 /*
378 * v_WordB -- [count]B
379 * Move backward a bigword at a time.
380 *
381 * PUBLIC: int v_wordB(SCR *, VICMD *);
382 */
383 int
v_wordB(SCR * sp,VICMD * vp)384 v_wordB(SCR *sp, VICMD *vp)
385 {
386 return (bword(sp, vp, BIGWORD));
387 }
388
389 /*
390 * v_wordb -- [count]b
391 * Move backward a word at a time.
392 *
393 * PUBLIC: int v_wordb(SCR *, VICMD *);
394 */
395 int
v_wordb(SCR * sp,VICMD * vp)396 v_wordb(SCR *sp, VICMD *vp)
397 {
398 return (bword(sp, vp, LITTLEWORD));
399 }
400
401 /*
402 * bword --
403 * Move backward by words.
404 */
405 static int
bword(SCR * sp,VICMD * vp,enum which type)406 bword(SCR *sp, VICMD *vp, enum which type)
407 {
408 enum { INWORD, NOTWORD } state;
409 VCS cs;
410 u_long cnt;
411
412 cnt = F_ISSET(vp, VC_C1SET) ? vp->count : 1;
413 cs.cs_lno = vp->m_start.lno;
414 cs.cs_cno = vp->m_start.cno;
415 if (cs_init(sp, &cs))
416 return (1);
417
418 /*
419 * !!!
420 * If in whitespace, or the previous character is whitespace, move
421 * past it. (This doesn't count as a word move.) Stay at the
422 * character before the current one, it sets word "state" for the
423 * 'b' command.
424 */
425 if (cs.cs_flags == 0 && !isblank(cs.cs_ch)) {
426 if (cs_prev(sp, &cs))
427 return (1);
428 if (cs.cs_flags == 0 && !isblank(cs.cs_ch))
429 goto start;
430 }
431 if (cs_bblank(sp, &cs))
432 return (1);
433
434 /*
435 * Cyclically move to the beginning of the previous word -- this
436 * involves skipping over word characters and then any trailing
437 * non-word characters. Note, for the 'b' command, the definition
438 * of a word keeps switching.
439 */
440 start: if (type == BIGWORD)
441 while (cnt--) {
442 for (;;) {
443 if (cs_prev(sp, &cs))
444 return (1);
445 if (cs.cs_flags == CS_SOF)
446 goto ret;
447 if (cs.cs_flags != 0 || isblank(cs.cs_ch))
448 break;
449 }
450 /*
451 * When we reach the end of the word before the last
452 * word, we're done. If we changed state, move forward
453 * one to the end of the next word.
454 */
455 if (cnt == 0) {
456 if (cs.cs_flags == 0 && cs_next(sp, &cs))
457 return (1);
458 break;
459 }
460
461 /* Eat whitespace characters. */
462 if (cs_bblank(sp, &cs))
463 return (1);
464 if (cs.cs_flags == CS_SOF)
465 goto ret;
466 }
467 else
468 while (cnt--) {
469 state = cs.cs_flags == 0 &&
470 inword(cs.cs_ch) ? INWORD : NOTWORD;
471 for (;;) {
472 if (cs_prev(sp, &cs))
473 return (1);
474 if (cs.cs_flags == CS_SOF)
475 goto ret;
476 if (cs.cs_flags != 0 || isblank(cs.cs_ch))
477 break;
478 if (state == INWORD) {
479 if (!inword(cs.cs_ch))
480 break;
481 } else
482 if (inword(cs.cs_ch))
483 break;
484 }
485 /* See comment above. */
486 if (cnt == 0) {
487 if (cs.cs_flags == 0 && cs_next(sp, &cs))
488 return (1);
489 break;
490 }
491
492 /* Eat whitespace characters. */
493 if (cs.cs_flags != 0 || isblank(cs.cs_ch))
494 if (cs_bblank(sp, &cs))
495 return (1);
496 if (cs.cs_flags == CS_SOF)
497 goto ret;
498 }
499
500 /* If we didn't move, we must be at SOF. */
501 ret: if (cs.cs_lno == vp->m_start.lno && cs.cs_cno == vp->m_start.cno) {
502 v_sof(sp, &vp->m_start);
503 return (1);
504 }
505
506 /* Set the end of the range for motion commands. */
507 vp->m_stop.lno = cs.cs_lno;
508 vp->m_stop.cno = cs.cs_cno;
509
510 /*
511 * All commands move to the end of the range. Motion commands
512 * adjust the starting point to the character before the current
513 * one.
514 *
515 * !!!
516 * The historic vi didn't get this right -- the `yb' command yanked
517 * the right stuff and even updated the cursor value, but the cursor
518 * was not actually updated on the screen.
519 */
520 vp->m_final = vp->m_stop;
521 if (ISMOTION(vp))
522 --vp->m_start.cno;
523 return (0);
524 }
525