1 /* $NetBSD: v_word.c,v 1.2 2013/11/22 15:52:06 christos Exp $ */ 2 /*- 3 * Copyright (c) 1992, 1993, 1994 4 * The Regents of the University of California. All rights reserved. 5 * Copyright (c) 1992, 1993, 1994, 1995, 1996 6 * Keith Bostic. All rights reserved. 7 * 8 * See the LICENSE file for redistribution information. 9 */ 10 11 #include "config.h" 12 13 #ifndef lint 14 static const char sccsid[] = "Id: v_word.c,v 10.6 2001/06/25 15:19:36 skimo Exp (Berkeley) Date: 2001/06/25 15:19:36 "; 15 #endif /* not lint */ 16 17 #include <sys/types.h> 18 #include <sys/queue.h> 19 #include <sys/time.h> 20 21 #include <bitstring.h> 22 #include <ctype.h> 23 #include <limits.h> 24 #include <stdio.h> 25 26 #include "../common/common.h" 27 #include "vi.h" 28 29 /* 30 * There are two types of "words". Bigwords are easy -- groups of anything 31 * delimited by whitespace. Normal words are trickier. They are either a 32 * group of characters, numbers and underscores, or a group of anything but, 33 * delimited by whitespace. When for a word, if you're in whitespace, it's 34 * easy, just remove the whitespace and go to the beginning or end of the 35 * word. Otherwise, figure out if the next character is in a different group. 36 * If it is, go to the beginning or end of that group, otherwise, go to the 37 * beginning or end of the current group. The historic version of vi didn't 38 * get this right, so, for example, there were cases where "4e" was not the 39 * same as "eeee" -- in particular, single character words, and commands that 40 * began in whitespace were almost always handled incorrectly. To get it right 41 * you have to resolve the cursor after each search so that the look-ahead to 42 * figure out what type of "word" the cursor is in will be correct. 43 * 44 * Empty lines, and lines that consist of only white-space characters count 45 * as a single word, and the beginning and end of the file counts as an 46 * infinite number of words. 47 * 48 * Movements associated with commands are different than movement commands. 49 * For example, in "abc def", with the cursor on the 'a', "cw" is from 50 * 'a' to 'c', while "w" is from 'a' to 'd'. In general, trailing white 51 * space is discarded from the change movement. Another example is that, 52 * in the same string, a "cw" on any white space character replaces that 53 * single character, and nothing else. Ain't nothin' in here that's easy. 54 * 55 * One historic note -- in the original vi, the 'w', 'W' and 'B' commands 56 * would treat groups of empty lines as individual words, i.e. the command 57 * would move the cursor to each new empty line. The 'e' and 'E' commands 58 * would treat groups of empty lines as a single word, i.e. the first use 59 * would move past the group of lines. The 'b' command would just beep at 60 * you, or, if you did it from the start of the line as part of a motion 61 * command, go absolutely nuts. If the lines contained only white-space 62 * characters, the 'w' and 'W' commands would just beep at you, and the 'B', 63 * 'b', 'E' and 'e' commands would treat the group as a single word, and 64 * the 'B' and 'b' commands will treat the lines as individual words. This 65 * implementation treats all of these cases as a single white-space word. 66 */ 67 68 enum which {BIGWORD, LITTLEWORD}; 69 70 static int bword __P((SCR *, VICMD *, enum which)); 71 static int eword __P((SCR *, VICMD *, enum which)); 72 static int fword __P((SCR *, VICMD *, enum which)); 73 74 /* 75 * v_wordW -- [count]W 76 * Move forward a bigword at a time. 77 * 78 * PUBLIC: int v_wordW __P((SCR *, VICMD *)); 79 */ 80 int 81 v_wordW(SCR *sp, VICMD *vp) 82 { 83 return (fword(sp, vp, BIGWORD)); 84 } 85 86 /* 87 * v_wordw -- [count]w 88 * Move forward a word at a time. 89 * 90 * PUBLIC: int v_wordw __P((SCR *, VICMD *)); 91 */ 92 int 93 v_wordw(SCR *sp, VICMD *vp) 94 { 95 return (fword(sp, vp, LITTLEWORD)); 96 } 97 98 /* 99 * fword -- 100 * Move forward by words. 101 */ 102 static int 103 fword(SCR *sp, VICMD *vp, enum which type) 104 { 105 enum { INWORD, NOTWORD } state; 106 VCS cs; 107 u_long cnt; 108 109 cnt = F_ISSET(vp, VC_C1SET) ? vp->count : 1; 110 cs.cs_lno = vp->m_start.lno; 111 cs.cs_cno = vp->m_start.cno; 112 if (cs_init(sp, &cs)) 113 return (1); 114 115 /* 116 * If in white-space: 117 * If the count is 1, and it's a change command, we're done. 118 * Else, move to the first non-white-space character, which 119 * counts as a single word move. If it's a motion command, 120 * don't move off the end of the line. 121 */ 122 if (cs.cs_flags == CS_EMP || (cs.cs_flags == 0 && ISBLANK2(cs.cs_ch))) { 123 if (ISMOTION(vp) && cs.cs_flags != CS_EMP && cnt == 1) { 124 if (ISCMD(vp->rkp, 'c')) 125 return (0); 126 if (ISCMD(vp->rkp, 'd') || ISCMD(vp->rkp, 'y')) { 127 if (cs_fspace(sp, &cs)) 128 return (1); 129 goto ret; 130 } 131 } 132 if (cs_fblank(sp, &cs)) 133 return (1); 134 --cnt; 135 } 136 137 /* 138 * Cyclically move to the next word -- this involves skipping 139 * over word characters and then any trailing non-word characters. 140 * Note, for the 'w' command, the definition of a word keeps 141 * switching. 142 */ 143 if (type == BIGWORD) 144 while (cnt--) { 145 for (;;) { 146 if (cs_next(sp, &cs)) 147 return (1); 148 if (cs.cs_flags == CS_EOF) 149 goto ret; 150 if (cs.cs_flags != 0 || ISBLANK2(cs.cs_ch)) 151 break; 152 } 153 /* 154 * If a motion command and we're at the end of the 155 * last word, we're done. Delete and yank eat any 156 * trailing blanks, but we don't move off the end 157 * of the line regardless. 158 */ 159 if (cnt == 0 && ISMOTION(vp)) { 160 if ((ISCMD(vp->rkp, 'd') || 161 ISCMD(vp->rkp, 'y')) && 162 cs_fspace(sp, &cs)) 163 return (1); 164 break; 165 } 166 167 /* Eat whitespace characters. */ 168 if (cs_fblank(sp, &cs)) 169 return (1); 170 if (cs.cs_flags == CS_EOF) 171 goto ret; 172 } 173 else 174 while (cnt--) { 175 state = cs.cs_flags == 0 && 176 inword(cs.cs_ch) ? INWORD : NOTWORD; 177 for (;;) { 178 if (cs_next(sp, &cs)) 179 return (1); 180 if (cs.cs_flags == CS_EOF) 181 goto ret; 182 if (cs.cs_flags != 0 || ISBLANK2(cs.cs_ch)) 183 break; 184 if (state == INWORD) { 185 if (!inword(cs.cs_ch)) 186 break; 187 } else 188 if (inword(cs.cs_ch)) 189 break; 190 } 191 /* See comment above. */ 192 if (cnt == 0 && ISMOTION(vp)) { 193 if ((ISCMD(vp->rkp, 'd') || 194 ISCMD(vp->rkp, 'y')) && 195 cs_fspace(sp, &cs)) 196 return (1); 197 break; 198 } 199 200 /* Eat whitespace characters. */ 201 if (cs.cs_flags != 0 || ISBLANK2(cs.cs_ch)) 202 if (cs_fblank(sp, &cs)) 203 return (1); 204 if (cs.cs_flags == CS_EOF) 205 goto ret; 206 } 207 208 /* 209 * If we didn't move, we must be at EOF. 210 * 211 * !!! 212 * That's okay for motion commands, however. 213 */ 214 ret: if (!ISMOTION(vp) && 215 cs.cs_lno == vp->m_start.lno && cs.cs_cno == vp->m_start.cno) { 216 v_eof(sp, &vp->m_start); 217 return (1); 218 } 219 220 /* Adjust the end of the range for motion commands. */ 221 vp->m_stop.lno = cs.cs_lno; 222 vp->m_stop.cno = cs.cs_cno; 223 if (ISMOTION(vp) && cs.cs_flags == 0) 224 --vp->m_stop.cno; 225 226 /* 227 * Non-motion commands move to the end of the range. Delete 228 * and yank stay at the start, ignore others. 229 */ 230 vp->m_final = ISMOTION(vp) ? vp->m_start : vp->m_stop; 231 return (0); 232 } 233 234 /* 235 * v_wordE -- [count]E 236 * Move forward to the end of the bigword. 237 * 238 * PUBLIC: int v_wordE __P((SCR *, VICMD *)); 239 */ 240 int 241 v_wordE(SCR *sp, VICMD *vp) 242 { 243 return (eword(sp, vp, BIGWORD)); 244 } 245 246 /* 247 * v_worde -- [count]e 248 * Move forward to the end of the word. 249 * 250 * PUBLIC: int v_worde __P((SCR *, VICMD *)); 251 */ 252 int 253 v_worde(SCR *sp, VICMD *vp) 254 { 255 return (eword(sp, vp, LITTLEWORD)); 256 } 257 258 /* 259 * eword -- 260 * Move forward to the end of the word. 261 */ 262 static int 263 eword(SCR *sp, VICMD *vp, enum which type) 264 { 265 enum { INWORD, NOTWORD } state; 266 VCS cs; 267 u_long cnt; 268 269 cnt = F_ISSET(vp, VC_C1SET) ? vp->count : 1; 270 cs.cs_lno = vp->m_start.lno; 271 cs.cs_cno = vp->m_start.cno; 272 if (cs_init(sp, &cs)) 273 return (1); 274 275 /* 276 * !!! 277 * If in whitespace, or the next character is whitespace, move past 278 * it. (This doesn't count as a word move.) Stay at the character 279 * past the current one, it sets word "state" for the 'e' command. 280 */ 281 if (cs.cs_flags == 0 && !ISBLANK2(cs.cs_ch)) { 282 if (cs_next(sp, &cs)) 283 return (1); 284 if (cs.cs_flags == 0 && !ISBLANK2(cs.cs_ch)) 285 goto start; 286 } 287 if (cs_fblank(sp, &cs)) 288 return (1); 289 290 /* 291 * Cyclically move to the next word -- this involves skipping 292 * over word characters and then any trailing non-word characters. 293 * Note, for the 'e' command, the definition of a word keeps 294 * switching. 295 */ 296 start: if (type == BIGWORD) 297 while (cnt--) { 298 for (;;) { 299 if (cs_next(sp, &cs)) 300 return (1); 301 if (cs.cs_flags == CS_EOF) 302 goto ret; 303 if (cs.cs_flags != 0 || ISBLANK2(cs.cs_ch)) 304 break; 305 } 306 /* 307 * When we reach the start of the word after the last 308 * word, we're done. If we changed state, back up one 309 * to the end of the previous word. 310 */ 311 if (cnt == 0) { 312 if (cs.cs_flags == 0 && cs_prev(sp, &cs)) 313 return (1); 314 break; 315 } 316 317 /* Eat whitespace characters. */ 318 if (cs_fblank(sp, &cs)) 319 return (1); 320 if (cs.cs_flags == CS_EOF) 321 goto ret; 322 } 323 else 324 while (cnt--) { 325 state = cs.cs_flags == 0 && 326 inword(cs.cs_ch) ? INWORD : NOTWORD; 327 for (;;) { 328 if (cs_next(sp, &cs)) 329 return (1); 330 if (cs.cs_flags == CS_EOF) 331 goto ret; 332 if (cs.cs_flags != 0 || ISBLANK2(cs.cs_ch)) 333 break; 334 if (state == INWORD) { 335 if (!inword(cs.cs_ch)) 336 break; 337 } else 338 if (inword(cs.cs_ch)) 339 break; 340 } 341 /* See comment above. */ 342 if (cnt == 0) { 343 if (cs.cs_flags == 0 && cs_prev(sp, &cs)) 344 return (1); 345 break; 346 } 347 348 /* Eat whitespace characters. */ 349 if (cs.cs_flags != 0 || ISBLANK2(cs.cs_ch)) 350 if (cs_fblank(sp, &cs)) 351 return (1); 352 if (cs.cs_flags == CS_EOF) 353 goto ret; 354 } 355 356 /* 357 * If we didn't move, we must be at EOF. 358 * 359 * !!! 360 * That's okay for motion commands, however. 361 */ 362 ret: if (!ISMOTION(vp) && 363 cs.cs_lno == vp->m_start.lno && cs.cs_cno == vp->m_start.cno) { 364 v_eof(sp, &vp->m_start); 365 return (1); 366 } 367 368 /* Set the end of the range for motion commands. */ 369 vp->m_stop.lno = cs.cs_lno; 370 vp->m_stop.cno = cs.cs_cno; 371 372 /* 373 * Non-motion commands move to the end of the range. 374 * Delete and yank stay at the start, ignore others. 375 */ 376 vp->m_final = ISMOTION(vp) ? vp->m_start : vp->m_stop; 377 return (0); 378 } 379 380 /* 381 * v_WordB -- [count]B 382 * Move backward a bigword at a time. 383 * 384 * PUBLIC: int v_wordB __P((SCR *, VICMD *)); 385 */ 386 int 387 v_wordB(SCR *sp, VICMD *vp) 388 { 389 return (bword(sp, vp, BIGWORD)); 390 } 391 392 /* 393 * v_wordb -- [count]b 394 * Move backward a word at a time. 395 * 396 * PUBLIC: int v_wordb __P((SCR *, VICMD *)); 397 */ 398 int 399 v_wordb(SCR *sp, VICMD *vp) 400 { 401 return (bword(sp, vp, LITTLEWORD)); 402 } 403 404 /* 405 * bword -- 406 * Move backward by words. 407 */ 408 static int 409 bword(SCR *sp, VICMD *vp, enum which type) 410 { 411 enum { INWORD, NOTWORD } state; 412 VCS cs; 413 u_long cnt; 414 415 cnt = F_ISSET(vp, VC_C1SET) ? vp->count : 1; 416 cs.cs_lno = vp->m_start.lno; 417 cs.cs_cno = vp->m_start.cno; 418 if (cs_init(sp, &cs)) 419 return (1); 420 421 /* 422 * !!! 423 * If in whitespace, or the previous character is whitespace, move 424 * past it. (This doesn't count as a word move.) Stay at the 425 * character before the current one, it sets word "state" for the 426 * 'b' command. 427 */ 428 if (cs.cs_flags == 0 && !ISBLANK2(cs.cs_ch)) { 429 if (cs_prev(sp, &cs)) 430 return (1); 431 if (cs.cs_flags == 0 && !ISBLANK2(cs.cs_ch)) 432 goto start; 433 } 434 if (cs_bblank(sp, &cs)) 435 return (1); 436 437 /* 438 * Cyclically move to the beginning of the previous word -- this 439 * involves skipping over word characters and then any trailing 440 * non-word characters. Note, for the 'b' command, the definition 441 * of a word keeps switching. 442 */ 443 start: if (type == BIGWORD) 444 while (cnt--) { 445 for (;;) { 446 if (cs_prev(sp, &cs)) 447 return (1); 448 if (cs.cs_flags == CS_SOF) 449 goto ret; 450 if (cs.cs_flags != 0 || ISBLANK2(cs.cs_ch)) 451 break; 452 } 453 /* 454 * When we reach the end of the word before the last 455 * word, we're done. If we changed state, move forward 456 * one to the end of the next word. 457 */ 458 if (cnt == 0) { 459 if (cs.cs_flags == 0 && cs_next(sp, &cs)) 460 return (1); 461 break; 462 } 463 464 /* Eat whitespace characters. */ 465 if (cs_bblank(sp, &cs)) 466 return (1); 467 if (cs.cs_flags == CS_SOF) 468 goto ret; 469 } 470 else 471 while (cnt--) { 472 state = cs.cs_flags == 0 && 473 inword(cs.cs_ch) ? INWORD : NOTWORD; 474 for (;;) { 475 if (cs_prev(sp, &cs)) 476 return (1); 477 if (cs.cs_flags == CS_SOF) 478 goto ret; 479 if (cs.cs_flags != 0 || ISBLANK2(cs.cs_ch)) 480 break; 481 if (state == INWORD) { 482 if (!inword(cs.cs_ch)) 483 break; 484 } else 485 if (inword(cs.cs_ch)) 486 break; 487 } 488 /* See comment above. */ 489 if (cnt == 0) { 490 if (cs.cs_flags == 0 && cs_next(sp, &cs)) 491 return (1); 492 break; 493 } 494 495 /* Eat whitespace characters. */ 496 if (cs.cs_flags != 0 || ISBLANK2(cs.cs_ch)) 497 if (cs_bblank(sp, &cs)) 498 return (1); 499 if (cs.cs_flags == CS_SOF) 500 goto ret; 501 } 502 503 /* If we didn't move, we must be at SOF. */ 504 ret: if (cs.cs_lno == vp->m_start.lno && cs.cs_cno == vp->m_start.cno) { 505 v_sof(sp, &vp->m_start); 506 return (1); 507 } 508 509 /* Set the end of the range for motion commands. */ 510 vp->m_stop.lno = cs.cs_lno; 511 vp->m_stop.cno = cs.cs_cno; 512 513 /* 514 * All commands move to the end of the range. Motion commands 515 * adjust the starting point to the character before the current 516 * one. 517 * 518 * !!! 519 * The historic vi didn't get this right -- the `yb' command yanked 520 * the right stuff and even updated the cursor value, but the cursor 521 * was not actually updated on the screen. 522 */ 523 vp->m_final = vp->m_stop; 524 if (ISMOTION(vp)) 525 --vp->m_start.cno; 526 return (0); 527 } 528