xref: /openbsd-src/usr.bin/vi/vi/v_sentence.c (revision d9a51c353c88dac7b4a389c112b4cfe97b8e3a46)
1 /*	$OpenBSD: v_sentence.c,v 1.8 2022/12/26 19:16:04 jmc Exp $	*/
2 
3 /*-
4  * Copyright (c) 1992, 1993, 1994
5  *	The Regents of the University of California.  All rights reserved.
6  * Copyright (c) 1992, 1993, 1994, 1995, 1996
7  *	Keith Bostic.  All rights reserved.
8  *
9  * See the LICENSE file for redistribution information.
10  */
11 
12 #include "config.h"
13 
14 #include <sys/types.h>
15 #include <sys/queue.h>
16 #include <sys/time.h>
17 
18 #include <bitstring.h>
19 #include <ctype.h>
20 #include <limits.h>
21 #include <stdio.h>
22 
23 #include "../common/common.h"
24 #include "vi.h"
25 
26 /*
27  * !!!
28  * In historic vi, a sentence was delimited by a '.', '?' or '!' character
29  * followed by TWO spaces or a newline.  One or more empty lines was also
30  * treated as a separate sentence.  The Berkeley documentation for historical
31  * vi states that any number of ')', ']', '"' and '\'' characters can be
32  * between the delimiter character and the spaces or end of line, however,
33  * the historical implementation did not handle additional '"' characters.
34  * We follow the documentation here, not the implementation.
35  *
36  * Once again, historical vi didn't do sentence movements associated with
37  * counts consistently, mostly in the presence of lines containing only
38  * white-space characters.
39  *
40  * This implementation also permits a single tab to delimit sentences, and
41  * treats lines containing only white-space characters as empty lines.
42  * Finally, tabs are eaten (along with spaces) when skipping to the start
43  * of the text following a "sentence".
44  */
45 
46 /*
47  * v_sentencef -- [count])
48  *	Move forward count sentences.
49  *
50  * PUBLIC: int v_sentencef(SCR *, VICMD *);
51  */
52 int
v_sentencef(SCR * sp,VICMD * vp)53 v_sentencef(SCR *sp, VICMD *vp)
54 {
55 	enum { BLANK, NONE, PERIOD } state;
56 	VCS cs;
57 	size_t len;
58 	u_long cnt;
59 
60 	cs.cs_lno = vp->m_start.lno;
61 	cs.cs_cno = vp->m_start.cno;
62 	if (cs_init(sp, &cs))
63 		return (1);
64 
65 	cnt = F_ISSET(vp, VC_C1SET) ? vp->count : 1;
66 
67 	/*
68 	 * !!!
69 	 * If in white-space, the next start of sentence counts as one.
70 	 * This may not handle "  .  " correctly, but it's real unclear
71 	 * what correctly means in that case.
72 	 */
73 	if (cs.cs_flags == CS_EMP || (cs.cs_flags == 0 && isblank(cs.cs_ch))) {
74 		if (cs_fblank(sp, &cs))
75 			return (1);
76 		if (--cnt == 0) {
77 			if (vp->m_start.lno != cs.cs_lno ||
78 			    vp->m_start.cno != cs.cs_cno)
79 				goto okret;
80 			return (1);
81 		}
82 	}
83 
84 	for (state = NONE;;) {
85 		if (cs_next(sp, &cs))
86 			return (1);
87 		if (cs.cs_flags == CS_EOF)
88 			break;
89 		if (cs.cs_flags == CS_EOL) {
90 			if ((state == PERIOD || state == BLANK) && --cnt == 0) {
91 				if (cs_next(sp, &cs))
92 					return (1);
93 				if (cs.cs_flags == 0 &&
94 				    isblank(cs.cs_ch) && cs_fblank(sp, &cs))
95 					return (1);
96 				goto okret;
97 			}
98 			state = NONE;
99 			continue;
100 		}
101 		if (cs.cs_flags == CS_EMP) {	/* An EMP is two sentences. */
102 			if (--cnt == 0)
103 				goto okret;
104 			if (cs_fblank(sp, &cs))
105 				return (1);
106 			if (--cnt == 0)
107 				goto okret;
108 			state = NONE;
109 			continue;
110 		}
111 		switch (cs.cs_ch) {
112 		case '.':
113 		case '?':
114 		case '!':
115 			state = PERIOD;
116 			break;
117 		case ')':
118 		case ']':
119 		case '"':
120 		case '\'':
121 			if (state != PERIOD)
122 				state = NONE;
123 			break;
124 		case '\t':
125 			if (state == PERIOD)
126 				state = BLANK;
127 			/* FALLTHROUGH */
128 		case ' ':
129 			if (state == PERIOD) {
130 				state = BLANK;
131 				break;
132 			}
133 			if (state == BLANK && --cnt == 0) {
134 				if (cs_fblank(sp, &cs))
135 					return (1);
136 				goto okret;
137 			}
138 			/* FALLTHROUGH */
139 		default:
140 			state = NONE;
141 			break;
142 		}
143 	}
144 
145 	/* EOF is a movement sink, but it's an error not to have moved. */
146 	if (vp->m_start.lno == cs.cs_lno && vp->m_start.cno == cs.cs_cno) {
147 		v_eof(sp, NULL);
148 		return (1);
149 	}
150 
151 okret:	vp->m_stop.lno = cs.cs_lno;
152 	vp->m_stop.cno = cs.cs_cno;
153 
154 	/*
155 	 * !!!
156 	 * Historic, uh, features, yeah, that's right, call 'em features.
157 	 * If the starting and ending cursor positions are at the first
158 	 * column in their lines, i.e. the movement is cutting entire lines,
159 	 * the buffer is in line mode, and the ending position is the last
160 	 * character of the previous line.  Note check to make sure that
161 	 * it's not within a single line.
162 	 *
163 	 * Non-motion commands move to the end of the range.  Delete and
164 	 * yank stay at the start.  Ignore others.  Adjust the end of the
165 	 * range for motion commands.
166 	 */
167 	if (ISMOTION(vp)) {
168 		if (vp->m_start.cno == 0 &&
169 		    (cs.cs_flags != 0 || vp->m_stop.cno == 0)) {
170 			if (vp->m_start.lno < vp->m_stop.lno) {
171 				if (db_get(sp,
172 				    --vp->m_stop.lno, DBG_FATAL, NULL, &len))
173 					return (1);
174 				vp->m_stop.cno = len ? len - 1 : 0;
175 			}
176 			F_SET(vp, VM_LMODE);
177 		} else
178 			--vp->m_stop.cno;
179 		vp->m_final = vp->m_start;
180 	} else
181 		vp->m_final = vp->m_stop;
182 	return (0);
183 }
184 
185 /*
186  * v_sentenceb -- [count](
187  *	Move backward count sentences.
188  *
189  * PUBLIC: int v_sentenceb(SCR *, VICMD *);
190  */
191 int
v_sentenceb(SCR * sp,VICMD * vp)192 v_sentenceb(SCR *sp, VICMD *vp)
193 {
194 	VCS cs;
195 	recno_t slno;
196 	size_t len, scno;
197 	u_long cnt;
198 	int last;
199 
200 	/*
201 	 * !!!
202 	 * Historic vi permitted the user to hit SOF repeatedly.
203 	 */
204 	if (vp->m_start.lno == 1 && vp->m_start.cno == 0)
205 		return (0);
206 
207 	cs.cs_lno = vp->m_start.lno;
208 	cs.cs_cno = vp->m_start.cno;
209 	if (cs_init(sp, &cs))
210 		return (1);
211 
212 	cnt = F_ISSET(vp, VC_C1SET) ? vp->count : 1;
213 
214 	/*
215 	 * !!!
216 	 * In empty lines, skip to the previous non-white-space character.
217 	 * If in text, skip to the previous white-space character.  Believe
218 	 * it or not, in the paragraph:
219 	 *	ab cd.
220 	 *	AB CD.
221 	 * if the cursor is on the 'A' or 'B', ( moves to the 'a'.  If it
222 	 * is on the ' ', 'C' or 'D', it moves to the 'A'.  Yes, Virginia,
223 	 * Berkeley was once a major center of drug activity.
224 	 */
225 	if (cs.cs_flags == CS_EMP) {
226 		if (cs_bblank(sp, &cs))
227 			return (1);
228 		for (;;) {
229 			if (cs_prev(sp, &cs))
230 				return (1);
231 			if (cs.cs_flags != CS_EOL)
232 				break;
233 		}
234 	} else if (cs.cs_flags == 0 && !isblank(cs.cs_ch))
235 		for (;;) {
236 			if (cs_prev(sp, &cs))
237 				return (1);
238 			if (cs.cs_flags != 0 || isblank(cs.cs_ch))
239 				break;
240 		}
241 
242 	for (last = 0;;) {
243 		if (cs_prev(sp, &cs))
244 			return (1);
245 		if (cs.cs_flags == CS_SOF)	/* SOF is a movement sink. */
246 			break;
247 		if (cs.cs_flags == CS_EOL) {
248 			last = 1;
249 			continue;
250 		}
251 		if (cs.cs_flags == CS_EMP) {
252 			if (--cnt == 0)
253 				goto ret;
254 			if (cs_bblank(sp, &cs))
255 				return (1);
256 			last = 0;
257 			continue;
258 		}
259 		switch (cs.cs_ch) {
260 		case '.':
261 		case '?':
262 		case '!':
263 			if (!last || --cnt != 0) {
264 				last = 0;
265 				continue;
266 			}
267 
268 ret:			slno = cs.cs_lno;
269 			scno = cs.cs_cno;
270 
271 			/*
272 			 * Move to the start of the sentence, skipping blanks
273 			 * and special characters.
274 			 */
275 			do {
276 				if (cs_next(sp, &cs))
277 					return (1);
278 			} while (!cs.cs_flags &&
279 			    (cs.cs_ch == ')' || cs.cs_ch == ']' ||
280 			    cs.cs_ch == '"' || cs.cs_ch == '\''));
281 			if ((cs.cs_flags || isblank(cs.cs_ch)) &&
282 			    cs_fblank(sp, &cs))
283 				return (1);
284 
285 			/*
286 			 * If it was ".  xyz", with the cursor on the 'x', or
287 			 * "end.  ", with the cursor in the spaces, or the
288 			 * beginning of a sentence preceded by an empty line,
289 			 * we can end up where we started.  Fix it.
290 			 */
291 			if (vp->m_start.lno != cs.cs_lno ||
292 			    vp->m_start.cno != cs.cs_cno)
293 				goto okret;
294 
295 			/*
296 			 * Well, if an empty line preceded possible blanks
297 			 * and the sentence, it could be a real sentence.
298 			 */
299 			for (;;) {
300 				if (cs_prev(sp, &cs))
301 					return (1);
302 				if (cs.cs_flags == CS_EOL)
303 					continue;
304 				if (cs.cs_flags == 0 && isblank(cs.cs_ch))
305 					continue;
306 				break;
307 			}
308 			if (cs.cs_flags == CS_EMP)
309 				goto okret;
310 
311 			/* But it wasn't; try again. */
312 			++cnt;
313 			cs.cs_lno = slno;
314 			cs.cs_cno = scno;
315 			last = 0;
316 			break;
317 		case '\t':
318 			last = 1;
319 			break;
320 		default:
321 			last =
322 			    cs.cs_flags == CS_EOL || isblank(cs.cs_ch) ||
323 			    cs.cs_ch == ')' || cs.cs_ch == ']' ||
324 			    cs.cs_ch == '"' || cs.cs_ch == '\'' ? 1 : 0;
325 		}
326 	}
327 
328 okret:	vp->m_stop.lno = cs.cs_lno;
329 	vp->m_stop.cno = cs.cs_cno;
330 
331 	/*
332 	 * !!!
333 	 * If the starting and stopping cursor positions are at the first
334 	 * columns in the line, i.e. the movement is cutting an entire line,
335 	 * the buffer is in line mode, and the starting position is the last
336 	 * character of the previous line.
337 	 *
338 	 * All commands move to the end of the range.  Adjust the start of
339 	 * the range for motion commands.
340 	 */
341 	if (ISMOTION(vp)) {
342 		if (vp->m_start.cno == 0 &&
343 		    (cs.cs_flags != 0 || vp->m_stop.cno == 0)) {
344 			if (db_get(sp,
345 			    --vp->m_start.lno, DBG_FATAL, NULL, &len))
346 				return (1);
347 			vp->m_start.cno = len ? len - 1 : 0;
348 			F_SET(vp, VM_LMODE);
349 		} else
350 			--vp->m_start.cno;
351 	}
352 	vp->m_final = vp->m_stop;
353 	return (0);
354 }
355