1 /* $OpenBSD: v_sentence.c,v 1.8 2022/12/26 19:16:04 jmc Exp $ */
2
3 /*-
4 * Copyright (c) 1992, 1993, 1994
5 * The Regents of the University of California. All rights reserved.
6 * Copyright (c) 1992, 1993, 1994, 1995, 1996
7 * Keith Bostic. All rights reserved.
8 *
9 * See the LICENSE file for redistribution information.
10 */
11
12 #include "config.h"
13
14 #include <sys/types.h>
15 #include <sys/queue.h>
16 #include <sys/time.h>
17
18 #include <bitstring.h>
19 #include <ctype.h>
20 #include <limits.h>
21 #include <stdio.h>
22
23 #include "../common/common.h"
24 #include "vi.h"
25
26 /*
27 * !!!
28 * In historic vi, a sentence was delimited by a '.', '?' or '!' character
29 * followed by TWO spaces or a newline. One or more empty lines was also
30 * treated as a separate sentence. The Berkeley documentation for historical
31 * vi states that any number of ')', ']', '"' and '\'' characters can be
32 * between the delimiter character and the spaces or end of line, however,
33 * the historical implementation did not handle additional '"' characters.
34 * We follow the documentation here, not the implementation.
35 *
36 * Once again, historical vi didn't do sentence movements associated with
37 * counts consistently, mostly in the presence of lines containing only
38 * white-space characters.
39 *
40 * This implementation also permits a single tab to delimit sentences, and
41 * treats lines containing only white-space characters as empty lines.
42 * Finally, tabs are eaten (along with spaces) when skipping to the start
43 * of the text following a "sentence".
44 */
45
46 /*
47 * v_sentencef -- [count])
48 * Move forward count sentences.
49 *
50 * PUBLIC: int v_sentencef(SCR *, VICMD *);
51 */
52 int
v_sentencef(SCR * sp,VICMD * vp)53 v_sentencef(SCR *sp, VICMD *vp)
54 {
55 enum { BLANK, NONE, PERIOD } state;
56 VCS cs;
57 size_t len;
58 u_long cnt;
59
60 cs.cs_lno = vp->m_start.lno;
61 cs.cs_cno = vp->m_start.cno;
62 if (cs_init(sp, &cs))
63 return (1);
64
65 cnt = F_ISSET(vp, VC_C1SET) ? vp->count : 1;
66
67 /*
68 * !!!
69 * If in white-space, the next start of sentence counts as one.
70 * This may not handle " . " correctly, but it's real unclear
71 * what correctly means in that case.
72 */
73 if (cs.cs_flags == CS_EMP || (cs.cs_flags == 0 && isblank(cs.cs_ch))) {
74 if (cs_fblank(sp, &cs))
75 return (1);
76 if (--cnt == 0) {
77 if (vp->m_start.lno != cs.cs_lno ||
78 vp->m_start.cno != cs.cs_cno)
79 goto okret;
80 return (1);
81 }
82 }
83
84 for (state = NONE;;) {
85 if (cs_next(sp, &cs))
86 return (1);
87 if (cs.cs_flags == CS_EOF)
88 break;
89 if (cs.cs_flags == CS_EOL) {
90 if ((state == PERIOD || state == BLANK) && --cnt == 0) {
91 if (cs_next(sp, &cs))
92 return (1);
93 if (cs.cs_flags == 0 &&
94 isblank(cs.cs_ch) && cs_fblank(sp, &cs))
95 return (1);
96 goto okret;
97 }
98 state = NONE;
99 continue;
100 }
101 if (cs.cs_flags == CS_EMP) { /* An EMP is two sentences. */
102 if (--cnt == 0)
103 goto okret;
104 if (cs_fblank(sp, &cs))
105 return (1);
106 if (--cnt == 0)
107 goto okret;
108 state = NONE;
109 continue;
110 }
111 switch (cs.cs_ch) {
112 case '.':
113 case '?':
114 case '!':
115 state = PERIOD;
116 break;
117 case ')':
118 case ']':
119 case '"':
120 case '\'':
121 if (state != PERIOD)
122 state = NONE;
123 break;
124 case '\t':
125 if (state == PERIOD)
126 state = BLANK;
127 /* FALLTHROUGH */
128 case ' ':
129 if (state == PERIOD) {
130 state = BLANK;
131 break;
132 }
133 if (state == BLANK && --cnt == 0) {
134 if (cs_fblank(sp, &cs))
135 return (1);
136 goto okret;
137 }
138 /* FALLTHROUGH */
139 default:
140 state = NONE;
141 break;
142 }
143 }
144
145 /* EOF is a movement sink, but it's an error not to have moved. */
146 if (vp->m_start.lno == cs.cs_lno && vp->m_start.cno == cs.cs_cno) {
147 v_eof(sp, NULL);
148 return (1);
149 }
150
151 okret: vp->m_stop.lno = cs.cs_lno;
152 vp->m_stop.cno = cs.cs_cno;
153
154 /*
155 * !!!
156 * Historic, uh, features, yeah, that's right, call 'em features.
157 * If the starting and ending cursor positions are at the first
158 * column in their lines, i.e. the movement is cutting entire lines,
159 * the buffer is in line mode, and the ending position is the last
160 * character of the previous line. Note check to make sure that
161 * it's not within a single line.
162 *
163 * Non-motion commands move to the end of the range. Delete and
164 * yank stay at the start. Ignore others. Adjust the end of the
165 * range for motion commands.
166 */
167 if (ISMOTION(vp)) {
168 if (vp->m_start.cno == 0 &&
169 (cs.cs_flags != 0 || vp->m_stop.cno == 0)) {
170 if (vp->m_start.lno < vp->m_stop.lno) {
171 if (db_get(sp,
172 --vp->m_stop.lno, DBG_FATAL, NULL, &len))
173 return (1);
174 vp->m_stop.cno = len ? len - 1 : 0;
175 }
176 F_SET(vp, VM_LMODE);
177 } else
178 --vp->m_stop.cno;
179 vp->m_final = vp->m_start;
180 } else
181 vp->m_final = vp->m_stop;
182 return (0);
183 }
184
185 /*
186 * v_sentenceb -- [count](
187 * Move backward count sentences.
188 *
189 * PUBLIC: int v_sentenceb(SCR *, VICMD *);
190 */
191 int
v_sentenceb(SCR * sp,VICMD * vp)192 v_sentenceb(SCR *sp, VICMD *vp)
193 {
194 VCS cs;
195 recno_t slno;
196 size_t len, scno;
197 u_long cnt;
198 int last;
199
200 /*
201 * !!!
202 * Historic vi permitted the user to hit SOF repeatedly.
203 */
204 if (vp->m_start.lno == 1 && vp->m_start.cno == 0)
205 return (0);
206
207 cs.cs_lno = vp->m_start.lno;
208 cs.cs_cno = vp->m_start.cno;
209 if (cs_init(sp, &cs))
210 return (1);
211
212 cnt = F_ISSET(vp, VC_C1SET) ? vp->count : 1;
213
214 /*
215 * !!!
216 * In empty lines, skip to the previous non-white-space character.
217 * If in text, skip to the previous white-space character. Believe
218 * it or not, in the paragraph:
219 * ab cd.
220 * AB CD.
221 * if the cursor is on the 'A' or 'B', ( moves to the 'a'. If it
222 * is on the ' ', 'C' or 'D', it moves to the 'A'. Yes, Virginia,
223 * Berkeley was once a major center of drug activity.
224 */
225 if (cs.cs_flags == CS_EMP) {
226 if (cs_bblank(sp, &cs))
227 return (1);
228 for (;;) {
229 if (cs_prev(sp, &cs))
230 return (1);
231 if (cs.cs_flags != CS_EOL)
232 break;
233 }
234 } else if (cs.cs_flags == 0 && !isblank(cs.cs_ch))
235 for (;;) {
236 if (cs_prev(sp, &cs))
237 return (1);
238 if (cs.cs_flags != 0 || isblank(cs.cs_ch))
239 break;
240 }
241
242 for (last = 0;;) {
243 if (cs_prev(sp, &cs))
244 return (1);
245 if (cs.cs_flags == CS_SOF) /* SOF is a movement sink. */
246 break;
247 if (cs.cs_flags == CS_EOL) {
248 last = 1;
249 continue;
250 }
251 if (cs.cs_flags == CS_EMP) {
252 if (--cnt == 0)
253 goto ret;
254 if (cs_bblank(sp, &cs))
255 return (1);
256 last = 0;
257 continue;
258 }
259 switch (cs.cs_ch) {
260 case '.':
261 case '?':
262 case '!':
263 if (!last || --cnt != 0) {
264 last = 0;
265 continue;
266 }
267
268 ret: slno = cs.cs_lno;
269 scno = cs.cs_cno;
270
271 /*
272 * Move to the start of the sentence, skipping blanks
273 * and special characters.
274 */
275 do {
276 if (cs_next(sp, &cs))
277 return (1);
278 } while (!cs.cs_flags &&
279 (cs.cs_ch == ')' || cs.cs_ch == ']' ||
280 cs.cs_ch == '"' || cs.cs_ch == '\''));
281 if ((cs.cs_flags || isblank(cs.cs_ch)) &&
282 cs_fblank(sp, &cs))
283 return (1);
284
285 /*
286 * If it was ". xyz", with the cursor on the 'x', or
287 * "end. ", with the cursor in the spaces, or the
288 * beginning of a sentence preceded by an empty line,
289 * we can end up where we started. Fix it.
290 */
291 if (vp->m_start.lno != cs.cs_lno ||
292 vp->m_start.cno != cs.cs_cno)
293 goto okret;
294
295 /*
296 * Well, if an empty line preceded possible blanks
297 * and the sentence, it could be a real sentence.
298 */
299 for (;;) {
300 if (cs_prev(sp, &cs))
301 return (1);
302 if (cs.cs_flags == CS_EOL)
303 continue;
304 if (cs.cs_flags == 0 && isblank(cs.cs_ch))
305 continue;
306 break;
307 }
308 if (cs.cs_flags == CS_EMP)
309 goto okret;
310
311 /* But it wasn't; try again. */
312 ++cnt;
313 cs.cs_lno = slno;
314 cs.cs_cno = scno;
315 last = 0;
316 break;
317 case '\t':
318 last = 1;
319 break;
320 default:
321 last =
322 cs.cs_flags == CS_EOL || isblank(cs.cs_ch) ||
323 cs.cs_ch == ')' || cs.cs_ch == ']' ||
324 cs.cs_ch == '"' || cs.cs_ch == '\'' ? 1 : 0;
325 }
326 }
327
328 okret: vp->m_stop.lno = cs.cs_lno;
329 vp->m_stop.cno = cs.cs_cno;
330
331 /*
332 * !!!
333 * If the starting and stopping cursor positions are at the first
334 * columns in the line, i.e. the movement is cutting an entire line,
335 * the buffer is in line mode, and the starting position is the last
336 * character of the previous line.
337 *
338 * All commands move to the end of the range. Adjust the start of
339 * the range for motion commands.
340 */
341 if (ISMOTION(vp)) {
342 if (vp->m_start.cno == 0 &&
343 (cs.cs_flags != 0 || vp->m_stop.cno == 0)) {
344 if (db_get(sp,
345 --vp->m_start.lno, DBG_FATAL, NULL, &len))
346 return (1);
347 vp->m_start.cno = len ? len - 1 : 0;
348 F_SET(vp, VM_LMODE);
349 } else
350 --vp->m_start.cno;
351 }
352 vp->m_final = vp->m_stop;
353 return (0);
354 }
355