1 /* $NetBSD: v_word.c,v 1.4 2017/11/21 07:43:47 rin Exp $ */ 2 /*- 3 * Copyright (c) 1992, 1993, 1994 4 * The Regents of the University of California. All rights reserved. 5 * Copyright (c) 1992, 1993, 1994, 1995, 1996 6 * Keith Bostic. All rights reserved. 7 * 8 * See the LICENSE file for redistribution information. 9 */ 10 11 #include "config.h" 12 13 #include <sys/cdefs.h> 14 #if 0 15 #ifndef lint 16 static const char sccsid[] = "Id: v_word.c,v 10.6 2001/06/25 15:19:36 skimo Exp (Berkeley) Date: 2001/06/25 15:19:36 "; 17 #endif /* not lint */ 18 #else 19 __RCSID("$NetBSD: v_word.c,v 1.4 2017/11/21 07:43:47 rin Exp $"); 20 #endif 21 22 #include <sys/types.h> 23 #include <sys/queue.h> 24 #include <sys/time.h> 25 26 #include <bitstring.h> 27 #include <ctype.h> 28 #include <limits.h> 29 #include <stdio.h> 30 31 #include "../common/common.h" 32 #include "vi.h" 33 34 /* 35 * There are two types of "words". Bigwords are easy -- groups of anything 36 * delimited by whitespace. Normal words are trickier. They are either a 37 * group of characters, numbers and underscores, or a group of anything but, 38 * delimited by whitespace. When for a word, if you're in whitespace, it's 39 * easy, just remove the whitespace and go to the beginning or end of the 40 * word. Otherwise, figure out if the next character is in a different group. 41 * If it is, go to the beginning or end of that group, otherwise, go to the 42 * beginning or end of the current group. The historic version of vi didn't 43 * get this right, so, for example, there were cases where "4e" was not the 44 * same as "eeee" -- in particular, single character words, and commands that 45 * began in whitespace were almost always handled incorrectly. To get it right 46 * you have to resolve the cursor after each search so that the look-ahead to 47 * figure out what type of "word" the cursor is in will be correct. 48 * 49 * Empty lines, and lines that consist of only white-space characters count 50 * as a single word, and the beginning and end of the file counts as an 51 * infinite number of words. 52 * 53 * Movements associated with commands are different than movement commands. 54 * For example, in "abc def", with the cursor on the 'a', "cw" is from 55 * 'a' to 'c', while "w" is from 'a' to 'd'. In general, trailing white 56 * space is discarded from the change movement. Another example is that, 57 * in the same string, a "cw" on any white space character replaces that 58 * single character, and nothing else. Ain't nothin' in here that's easy. 59 * 60 * One historic note -- in the original vi, the 'w', 'W' and 'B' commands 61 * would treat groups of empty lines as individual words, i.e. the command 62 * would move the cursor to each new empty line. The 'e' and 'E' commands 63 * would treat groups of empty lines as a single word, i.e. the first use 64 * would move past the group of lines. The 'b' command would just beep at 65 * you, or, if you did it from the start of the line as part of a motion 66 * command, go absolutely nuts. If the lines contained only white-space 67 * characters, the 'w' and 'W' commands would just beep at you, and the 'B', 68 * 'b', 'E' and 'e' commands would treat the group as a single word, and 69 * the 'B' and 'b' commands will treat the lines as individual words. This 70 * implementation treats all of these cases as a single white-space word. 71 * 72 * We regard a boundary between single- and multi-width characters as 73 * a word boundary. 74 */ 75 76 enum which {BIGWORD, LITTLEWORD}; 77 78 static int bword __P((SCR *, VICMD *, enum which)); 79 static int eword __P((SCR *, VICMD *, enum which)); 80 static int fword __P((SCR *, VICMD *, enum which)); 81 82 /* 83 * v_wordW -- [count]W 84 * Move forward a bigword at a time. 85 * 86 * PUBLIC: int v_wordW __P((SCR *, VICMD *)); 87 */ 88 int 89 v_wordW(SCR *sp, VICMD *vp) 90 { 91 return (fword(sp, vp, BIGWORD)); 92 } 93 94 /* 95 * v_wordw -- [count]w 96 * Move forward a word at a time. 97 * 98 * PUBLIC: int v_wordw __P((SCR *, VICMD *)); 99 */ 100 int 101 v_wordw(SCR *sp, VICMD *vp) 102 { 103 return (fword(sp, vp, LITTLEWORD)); 104 } 105 106 /* 107 * fword -- 108 * Move forward by words. 109 */ 110 static int 111 fword(SCR *sp, VICMD *vp, enum which type) 112 { 113 enum { INWORD, NOTWORD } state; 114 VCS cs; 115 u_long cnt; 116 int nmw, omw; 117 118 cnt = F_ISSET(vp, VC_C1SET) ? vp->count : 1; 119 cs.cs_lno = vp->m_start.lno; 120 cs.cs_cno = vp->m_start.cno; 121 if (cs_init(sp, &cs)) 122 return (1); 123 124 /* 125 * If in white-space: 126 * If the count is 1, and it's a change command, we're done. 127 * Else, move to the first non-white-space character, which 128 * counts as a single word move. If it's a motion command, 129 * don't move off the end of the line. 130 */ 131 if (cs.cs_flags == CS_EMP || (cs.cs_flags == 0 && ISBLANK2(cs.cs_ch))) { 132 if (ISMOTION(vp) && cs.cs_flags != CS_EMP && cnt == 1) { 133 if (ISCMD(vp->rkp, 'c')) 134 return (0); 135 if (ISCMD(vp->rkp, 'd') || ISCMD(vp->rkp, 'y')) { 136 if (cs_fspace(sp, &cs)) 137 return (1); 138 goto ret; 139 } 140 } 141 if (cs_fblank(sp, &cs)) 142 return (1); 143 --cnt; 144 } 145 146 /* 147 * Cyclically move to the next word -- this involves skipping 148 * over word characters and then any trailing non-word characters. 149 * Note, for the 'w' command, the definition of a word keeps 150 * switching. 151 */ 152 if (type == BIGWORD) 153 while (cnt--) { 154 nmw = ISMULTIWIDTH(sp, cs.cs_ch); 155 for (;;) { 156 omw = nmw; 157 if (cs_next(sp, &cs)) 158 return (1); 159 if (cs.cs_flags == CS_EOF) 160 goto ret; 161 if (cs.cs_flags != 0 || ISBLANK2(cs.cs_ch) || 162 (nmw = ISMULTIWIDTH(sp, cs.cs_ch)) != omw) 163 break; 164 } 165 /* 166 * If a motion command and we're at the end of the 167 * last word, we're done. Delete and yank eat any 168 * trailing blanks, but we don't move off the end 169 * of the line regardless. 170 */ 171 if (cnt == 0 && ISMOTION(vp)) { 172 if ((ISCMD(vp->rkp, 'd') || 173 ISCMD(vp->rkp, 'y')) && 174 cs_fspace(sp, &cs)) 175 return (1); 176 break; 177 } 178 179 /* Eat whitespace characters. */ 180 if (nmw == omw && cs_fblank(sp, &cs)) 181 return (1); 182 if (cs.cs_flags == CS_EOF) 183 goto ret; 184 } 185 else 186 while (cnt--) { 187 state = cs.cs_flags == 0 && 188 inword(cs.cs_ch) ? INWORD : NOTWORD; 189 nmw = ISMULTIWIDTH(sp, cs.cs_ch); 190 for (;;) { 191 omw = nmw; 192 if (cs_next(sp, &cs)) 193 return (1); 194 if (cs.cs_flags == CS_EOF) 195 goto ret; 196 if (cs.cs_flags != 0 || ISBLANK2(cs.cs_ch) || 197 (nmw = ISMULTIWIDTH(sp, cs.cs_ch)) != omw) 198 break; 199 if (state == INWORD) { 200 if (!inword(cs.cs_ch)) 201 break; 202 } else 203 if (inword(cs.cs_ch)) 204 break; 205 } 206 /* See comment above. */ 207 if (cnt == 0 && ISMOTION(vp)) { 208 if ((ISCMD(vp->rkp, 'd') || 209 ISCMD(vp->rkp, 'y')) && 210 cs_fspace(sp, &cs)) 211 return (1); 212 break; 213 } 214 215 /* Eat whitespace characters. */ 216 if (cs.cs_flags != 0 || ISBLANK2(cs.cs_ch)) 217 if (cs_fblank(sp, &cs)) 218 return (1); 219 if (cs.cs_flags == CS_EOF) 220 goto ret; 221 } 222 223 /* 224 * If we didn't move, we must be at EOF. 225 * 226 * !!! 227 * That's okay for motion commands, however. 228 */ 229 ret: if (!ISMOTION(vp) && 230 cs.cs_lno == vp->m_start.lno && cs.cs_cno == vp->m_start.cno) { 231 v_eof(sp, &vp->m_start); 232 return (1); 233 } 234 235 /* Adjust the end of the range for motion commands. */ 236 vp->m_stop.lno = cs.cs_lno; 237 vp->m_stop.cno = cs.cs_cno; 238 if (ISMOTION(vp) && cs.cs_flags == 0) 239 --vp->m_stop.cno; 240 241 /* 242 * Non-motion commands move to the end of the range. Delete 243 * and yank stay at the start, ignore others. 244 */ 245 vp->m_final = ISMOTION(vp) ? vp->m_start : vp->m_stop; 246 return (0); 247 } 248 249 /* 250 * v_wordE -- [count]E 251 * Move forward to the end of the bigword. 252 * 253 * PUBLIC: int v_wordE __P((SCR *, VICMD *)); 254 */ 255 int 256 v_wordE(SCR *sp, VICMD *vp) 257 { 258 return (eword(sp, vp, BIGWORD)); 259 } 260 261 /* 262 * v_worde -- [count]e 263 * Move forward to the end of the word. 264 * 265 * PUBLIC: int v_worde __P((SCR *, VICMD *)); 266 */ 267 int 268 v_worde(SCR *sp, VICMD *vp) 269 { 270 return (eword(sp, vp, LITTLEWORD)); 271 } 272 273 /* 274 * eword -- 275 * Move forward to the end of the word. 276 */ 277 static int 278 eword(SCR *sp, VICMD *vp, enum which type) 279 { 280 enum { INWORD, NOTWORD } state; 281 VCS cs; 282 u_long cnt; 283 int nmw, omw; 284 285 cnt = F_ISSET(vp, VC_C1SET) ? vp->count : 1; 286 cs.cs_lno = vp->m_start.lno; 287 cs.cs_cno = vp->m_start.cno; 288 if (cs_init(sp, &cs)) 289 return (1); 290 291 /* 292 * !!! 293 * If in whitespace, or the next character is whitespace, move past 294 * it. (This doesn't count as a word move.) Stay at the character 295 * past the current one, it sets word "state" for the 'e' command. 296 */ 297 if (cs.cs_flags == 0 && !ISBLANK2(cs.cs_ch)) { 298 if (cs_next(sp, &cs)) 299 return (1); 300 if (cs.cs_flags == 0 && !ISBLANK2(cs.cs_ch)) 301 goto start; 302 } 303 if (cs_fblank(sp, &cs)) 304 return (1); 305 306 /* 307 * Cyclically move to the next word -- this involves skipping 308 * over word characters and then any trailing non-word characters. 309 * Note, for the 'e' command, the definition of a word keeps 310 * switching. 311 */ 312 start: if (type == BIGWORD) 313 while (cnt--) { 314 nmw = ISMULTIWIDTH(sp, cs.cs_ch); 315 for (;;) { 316 omw = nmw; 317 if (cs_next(sp, &cs)) 318 return (1); 319 if (cs.cs_flags == CS_EOF) 320 goto ret; 321 if (cs.cs_flags != 0 || ISBLANK2(cs.cs_ch) || 322 (nmw = ISMULTIWIDTH(sp, cs.cs_ch)) != omw) 323 break; 324 } 325 /* 326 * When we reach the start of the word after the last 327 * word, we're done. If we changed state, back up one 328 * to the end of the previous word. 329 */ 330 if (cnt == 0) { 331 if (cs.cs_flags == 0 && cs_prev(sp, &cs)) 332 return (1); 333 break; 334 } 335 336 /* Eat whitespace characters. */ 337 if (nmw == omw && cs_fblank(sp, &cs)) 338 return (1); 339 if (cs.cs_flags == CS_EOF) 340 goto ret; 341 } 342 else 343 while (cnt--) { 344 state = cs.cs_flags == 0 && 345 inword(cs.cs_ch) ? INWORD : NOTWORD; 346 nmw = ISMULTIWIDTH(sp, cs.cs_ch); 347 for (;;) { 348 omw = nmw; 349 if (cs_next(sp, &cs)) 350 return (1); 351 if (cs.cs_flags == CS_EOF) 352 goto ret; 353 if (cs.cs_flags != 0 || ISBLANK2(cs.cs_ch) || 354 (nmw = ISMULTIWIDTH(sp, cs.cs_ch)) != omw) 355 break; 356 if (state == INWORD) { 357 if (!inword(cs.cs_ch)) 358 break; 359 } else 360 if (inword(cs.cs_ch)) 361 break; 362 } 363 /* See comment above. */ 364 if (cnt == 0) { 365 if (cs.cs_flags == 0 && cs_prev(sp, &cs)) 366 return (1); 367 break; 368 } 369 370 /* Eat whitespace characters. */ 371 if (cs.cs_flags != 0 || ISBLANK2(cs.cs_ch)) 372 if (cs_fblank(sp, &cs)) 373 return (1); 374 if (cs.cs_flags == CS_EOF) 375 goto ret; 376 } 377 378 /* 379 * If we didn't move, we must be at EOF. 380 * 381 * !!! 382 * That's okay for motion commands, however. 383 */ 384 ret: if (!ISMOTION(vp) && 385 cs.cs_lno == vp->m_start.lno && cs.cs_cno == vp->m_start.cno) { 386 v_eof(sp, &vp->m_start); 387 return (1); 388 } 389 390 /* Set the end of the range for motion commands. */ 391 vp->m_stop.lno = cs.cs_lno; 392 vp->m_stop.cno = cs.cs_cno; 393 394 /* 395 * Non-motion commands move to the end of the range. 396 * Delete and yank stay at the start, ignore others. 397 */ 398 vp->m_final = ISMOTION(vp) ? vp->m_start : vp->m_stop; 399 return (0); 400 } 401 402 /* 403 * v_WordB -- [count]B 404 * Move backward a bigword at a time. 405 * 406 * PUBLIC: int v_wordB __P((SCR *, VICMD *)); 407 */ 408 int 409 v_wordB(SCR *sp, VICMD *vp) 410 { 411 return (bword(sp, vp, BIGWORD)); 412 } 413 414 /* 415 * v_wordb -- [count]b 416 * Move backward a word at a time. 417 * 418 * PUBLIC: int v_wordb __P((SCR *, VICMD *)); 419 */ 420 int 421 v_wordb(SCR *sp, VICMD *vp) 422 { 423 return (bword(sp, vp, LITTLEWORD)); 424 } 425 426 /* 427 * bword -- 428 * Move backward by words. 429 */ 430 static int 431 bword(SCR *sp, VICMD *vp, enum which type) 432 { 433 enum { INWORD, NOTWORD } state; 434 VCS cs; 435 u_long cnt; 436 int nmw, omw; 437 438 cnt = F_ISSET(vp, VC_C1SET) ? vp->count : 1; 439 cs.cs_lno = vp->m_start.lno; 440 cs.cs_cno = vp->m_start.cno; 441 if (cs_init(sp, &cs)) 442 return (1); 443 444 /* 445 * !!! 446 * If in whitespace, or the previous character is whitespace, move 447 * past it. (This doesn't count as a word move.) Stay at the 448 * character before the current one, it sets word "state" for the 449 * 'b' command. 450 */ 451 if (cs.cs_flags == 0 && !ISBLANK2(cs.cs_ch)) { 452 if (cs_prev(sp, &cs)) 453 return (1); 454 if (cs.cs_flags == 0 && !ISBLANK2(cs.cs_ch)) 455 goto start; 456 } 457 if (cs_bblank(sp, &cs)) 458 return (1); 459 460 /* 461 * Cyclically move to the beginning of the previous word -- this 462 * involves skipping over word characters and then any trailing 463 * non-word characters. Note, for the 'b' command, the definition 464 * of a word keeps switching. 465 */ 466 start: if (type == BIGWORD) 467 while (cnt--) { 468 nmw = ISMULTIWIDTH(sp, cs.cs_ch); 469 for (;;) { 470 omw = nmw; 471 if (cs_prev(sp, &cs)) 472 return (1); 473 if (cs.cs_flags == CS_SOF) 474 goto ret; 475 if (cs.cs_flags != 0 || ISBLANK2(cs.cs_ch) || 476 (nmw = ISMULTIWIDTH(sp, cs.cs_ch)) != omw) 477 break; 478 } 479 /* 480 * When we reach the end of the word before the last 481 * word, we're done. If we changed state, move forward 482 * one to the end of the next word. 483 */ 484 if (cnt == 0) { 485 if (cs.cs_flags == 0 && cs_next(sp, &cs)) 486 return (1); 487 break; 488 } 489 490 /* Eat whitespace characters. */ 491 if (nmw == omw && cs_bblank(sp, &cs)) 492 return (1); 493 if (cs.cs_flags == CS_SOF) 494 goto ret; 495 } 496 else 497 while (cnt--) { 498 state = cs.cs_flags == 0 && 499 inword(cs.cs_ch) ? INWORD : NOTWORD; 500 nmw = ISMULTIWIDTH(sp, cs.cs_ch); 501 for (;;) { 502 omw = nmw; 503 if (cs_prev(sp, &cs)) 504 return (1); 505 if (cs.cs_flags == CS_SOF) 506 goto ret; 507 if (cs.cs_flags != 0 || ISBLANK2(cs.cs_ch) || 508 (nmw = ISMULTIWIDTH(sp, cs.cs_ch)) != omw) 509 break; 510 if (state == INWORD) { 511 if (!inword(cs.cs_ch)) 512 break; 513 } else 514 if (inword(cs.cs_ch)) 515 break; 516 } 517 /* See comment above. */ 518 if (cnt == 0) { 519 if (cs.cs_flags == 0 && cs_next(sp, &cs)) 520 return (1); 521 break; 522 } 523 524 /* Eat whitespace characters. */ 525 if (cs.cs_flags != 0 || ISBLANK2(cs.cs_ch)) 526 if (cs_bblank(sp, &cs)) 527 return (1); 528 if (cs.cs_flags == CS_SOF) 529 goto ret; 530 } 531 532 /* If we didn't move, we must be at SOF. */ 533 ret: if (cs.cs_lno == vp->m_start.lno && cs.cs_cno == vp->m_start.cno) { 534 v_sof(sp, &vp->m_start); 535 return (1); 536 } 537 538 /* Set the end of the range for motion commands. */ 539 vp->m_stop.lno = cs.cs_lno; 540 vp->m_stop.cno = cs.cs_cno; 541 542 /* 543 * All commands move to the end of the range. Motion commands 544 * adjust the starting point to the character before the current 545 * one. 546 * 547 * !!! 548 * The historic vi didn't get this right -- the `yb' command yanked 549 * the right stuff and even updated the cursor value, but the cursor 550 * was not actually updated on the screen. 551 */ 552 vp->m_final = vp->m_stop; 553 if (ISMOTION(vp)) 554 --vp->m_start.cno; 555 return (0); 556 } 557