xref: /netbsd-src/external/bsd/nvi/dist/vi/v_sentence.c (revision a07ca21052e91224e1a8b79aa76256a519355213)
1 /*	$NetBSD: v_sentence.c,v 1.4 2017/11/22 12:47:30 rin Exp $ */
2 /*-
3  * Copyright (c) 1992, 1993, 1994
4  *	The Regents of the University of California.  All rights reserved.
5  * Copyright (c) 1992, 1993, 1994, 1995, 1996
6  *	Keith Bostic.  All rights reserved.
7  *
8  * See the LICENSE file for redistribution information.
9  */
10 
11 #include "config.h"
12 
13 #include <sys/cdefs.h>
14 #if 0
15 #ifndef lint
16 static const char sccsid[] = "Id: v_sentence.c,v 10.9 2001/06/25 15:19:35 skimo Exp  (Berkeley) Date: 2001/06/25 15:19:35 ";
17 #endif /* not lint */
18 #else
19 __RCSID("$NetBSD: v_sentence.c,v 1.4 2017/11/22 12:47:30 rin Exp $");
20 #endif
21 
22 #include <sys/types.h>
23 #include <sys/queue.h>
24 #include <sys/time.h>
25 
26 #include <bitstring.h>
27 #include <ctype.h>
28 #include <limits.h>
29 #include <stdio.h>
30 
31 #include "../common/common.h"
32 #include "vi.h"
33 
34 /*
35  * !!!
36  * In historic vi, a sentence was delimited by a '.', '?' or '!' character
37  * followed by TWO spaces or a newline.  One or more empty lines was also
38  * treated as a separate sentence.  The Berkeley documentation for historical
39  * vi states that any number of ')', ']', '"' and '\'' characters can be
40  * between the delimiter character and the spaces or end of line, however,
41  * the historical implementation did not handle additional '"' characters.
42  * We follow the documentation here, not the implementation.
43  *
44  * Once again, historical vi didn't do sentence movements associated with
45  * counts consistently, mostly in the presence of lines containing only
46  * white-space characters.
47  *
48  * This implementation also permits a single tab to delimit sentences, and
49  * treats lines containing only white-space characters as empty lines.
50  * Finally, tabs are eaten (along with spaces) when skipping to the start
51  * of the text following a "sentence".
52  */
53 
54 /*
55  * v_sentencef -- [count])
56  *	Move forward count sentences.
57  *
58  * PUBLIC: int v_sentencef __P((SCR *, VICMD *));
59  */
60 int
v_sentencef(SCR * sp,VICMD * vp)61 v_sentencef(SCR *sp, VICMD *vp)
62 {
63 	enum { BLANK, NONE, PERIOD } state;
64 	VCS cs;
65 	size_t len;
66 	u_long cnt;
67 
68 	cs.cs_lno = vp->m_start.lno;
69 	cs.cs_cno = vp->m_start.cno;
70 	if (cs_init(sp, &cs))
71 		return (1);
72 
73 	cnt = F_ISSET(vp, VC_C1SET) ? vp->count : 1;
74 
75 	/*
76 	 * !!!
77 	 * If in white-space, the next start of sentence counts as one.
78 	 * This may not handle "  .  " correctly, but it's real unclear
79 	 * what correctly means in that case.
80 	 */
81 	if (cs.cs_flags == CS_EMP || (cs.cs_flags == 0 && ISBLANK2(cs.cs_ch))) {
82 		if (cs_fblank(sp, &cs))
83 			return (1);
84 		if (--cnt == 0) {
85 			if (vp->m_start.lno != cs.cs_lno ||
86 			    vp->m_start.cno != cs.cs_cno)
87 				goto okret;
88 			return (1);
89 		}
90 	}
91 
92 	for (state = NONE;;) {
93 		if (cs_next(sp, &cs))
94 			return (1);
95 		if (cs.cs_flags == CS_EOF)
96 			break;
97 		if (cs.cs_flags == CS_EOL) {
98 			if ((state == PERIOD || state == BLANK) && --cnt == 0) {
99 				if (cs_next(sp, &cs))
100 					return (1);
101 				if (cs.cs_flags == 0 &&
102 				    ISBLANK2(cs.cs_ch) && cs_fblank(sp, &cs))
103 					return (1);
104 				goto okret;
105 			}
106 			state = NONE;
107 			continue;
108 		}
109 		if (cs.cs_flags == CS_EMP) {	/* An EMP is two sentences. */
110 			if (--cnt == 0)
111 				goto okret;
112 			if (cs_fblank(sp, &cs))
113 				return (1);
114 			if (--cnt == 0)
115 				goto okret;
116 			state = NONE;
117 			continue;
118 		}
119 		switch (cs.cs_ch) {
120 		case '.':
121 		case '?':
122 		case '!':
123 			state = PERIOD;
124 			break;
125 		case ')':
126 		case ']':
127 		case '"':
128 		case '\'':
129 			if (state != PERIOD)
130 				state = NONE;
131 			break;
132 		case '\t':
133 			if (state == PERIOD)
134 				state = BLANK;
135 			/* FALLTHROUGH */
136 		case ' ':
137 			if (state == PERIOD) {
138 				state = BLANK;
139 				break;
140 			}
141 			if (state == BLANK && --cnt == 0) {
142 				if (cs_fblank(sp, &cs))
143 					return (1);
144 				goto okret;
145 			}
146 			/* FALLTHROUGH */
147 		default:
148 			state = NONE;
149 			break;
150 		}
151 	}
152 
153 	/* EOF is a movement sink, but it's an error not to have moved. */
154 	if (vp->m_start.lno == cs.cs_lno && vp->m_start.cno == cs.cs_cno) {
155 		v_eof(sp, NULL);
156 		return (1);
157 	}
158 
159 okret:	vp->m_stop.lno = cs.cs_lno;
160 	vp->m_stop.cno = cs.cs_cno;
161 
162 	/*
163 	 * !!!
164 	 * Historic, uh, features, yeah, that's right, call 'em features.
165 	 * If the starting and ending cursor positions are at the first
166 	 * column in their lines, i.e. the movement is cutting entire lines,
167 	 * the buffer is in line mode, and the ending position is the last
168 	 * character of the previous line.  Note check to make sure that
169 	 * it's not within a single line.
170 	 *
171 	 * Non-motion commands move to the end of the range.  Delete and
172 	 * yank stay at the start.  Ignore others.  Adjust the end of the
173 	 * range for motion commands.
174 	 */
175 	if (ISMOTION(vp)) {
176 		if (vp->m_start.cno == 0 &&
177 		    (cs.cs_flags != 0 || vp->m_stop.cno == 0)) {
178 			if (vp->m_start.lno < vp->m_stop.lno) {
179 				if (db_get(sp,
180 				    --vp->m_stop.lno, DBG_FATAL, NULL, &len))
181 					return (1);
182 				vp->m_stop.cno = len ? len - 1 : 0;
183 			}
184 			F_SET(vp, VM_LMODE);
185 		} else
186 			--vp->m_stop.cno;
187 		vp->m_final = vp->m_start;
188 	} else
189 		vp->m_final = vp->m_stop;
190 	return (0);
191 }
192 
193 /*
194  * v_sentenceb -- [count](
195  *	Move backward count sentences.
196  *
197  * PUBLIC: int v_sentenceb __P((SCR *, VICMD *));
198  */
199 int
v_sentenceb(SCR * sp,VICMD * vp)200 v_sentenceb(SCR *sp, VICMD *vp)
201 {
202 	VCS cs;
203 	db_recno_t slno;
204 	size_t len, scno;
205 	u_long cnt;
206 	int last;
207 
208 	/*
209 	 * !!!
210 	 * Historic vi permitted the user to hit SOF repeatedly.
211 	 */
212 	if (vp->m_start.lno == 1 && vp->m_start.cno == 0)
213 		return (0);
214 
215 	cs.cs_lno = vp->m_start.lno;
216 	cs.cs_cno = vp->m_start.cno;
217 	if (cs_init(sp, &cs))
218 		return (1);
219 
220 	cnt = F_ISSET(vp, VC_C1SET) ? vp->count : 1;
221 
222 	/*
223 	 * !!!
224 	 * In empty lines, skip to the previous non-white-space character.
225 	 * If in text, skip to the prevous white-space character.  Believe
226 	 * it or not, in the paragraph:
227 	 *	ab cd.
228 	 *	AB CD.
229 	 * if the cursor is on the 'A' or 'B', ( moves to the 'a'.  If it
230 	 * is on the ' ', 'C' or 'D', it moves to the 'A'.  Yes, Virginia,
231 	 * Berkeley was once a major center of drug activity.
232 	 */
233 	if (cs.cs_flags == CS_EMP) {
234 		if (cs_bblank(sp, &cs))
235 			return (1);
236 		for (;;) {
237 			if (cs_prev(sp, &cs))
238 				return (1);
239 			if (cs.cs_flags != CS_EOL)
240 				break;
241 		}
242 	} else if (cs.cs_flags == 0 && !ISBLANK2(cs.cs_ch))
243 		for (;;) {
244 			if (cs_prev(sp, &cs))
245 				return (1);
246 			if (cs.cs_flags != 0 || ISBLANK2(cs.cs_ch))
247 				break;
248 		}
249 
250 	for (last = 0;;) {
251 		if (cs_prev(sp, &cs))
252 			return (1);
253 		if (cs.cs_flags == CS_SOF)	/* SOF is a movement sink. */
254 			break;
255 		if (cs.cs_flags == CS_EOL) {
256 			last = 1;
257 			continue;
258 		}
259 		if (cs.cs_flags == CS_EMP) {
260 			if (--cnt == 0)
261 				goto ret;
262 			if (cs_bblank(sp, &cs))
263 				return (1);
264 			last = 0;
265 			continue;
266 		}
267 		switch (cs.cs_ch) {
268 		case '.':
269 		case '?':
270 		case '!':
271 			if (!last || --cnt != 0) {
272 				last = 0;
273 				continue;
274 			}
275 
276 ret:			slno = cs.cs_lno;
277 			scno = cs.cs_cno;
278 
279 			/*
280 			 * Move to the start of the sentence, skipping blanks
281 			 * and special characters.
282 			 */
283 			do {
284 				if (cs_next(sp, &cs))
285 					return (1);
286 			} while (!cs.cs_flags &&
287 			    (cs.cs_ch == ')' || cs.cs_ch == ']' ||
288 			    cs.cs_ch == '"' || cs.cs_ch == '\''));
289 			if ((cs.cs_flags || ISBLANK2(cs.cs_ch)) &&
290 			    cs_fblank(sp, &cs))
291 				return (1);
292 
293 			/*
294 			 * If it was ".  xyz", with the cursor on the 'x', or
295 			 * "end.  ", with the cursor in the spaces, or the
296 			 * beginning of a sentence preceded by an empty line,
297 			 * we can end up where we started.  Fix it.
298 			 */
299 			if (vp->m_start.lno != cs.cs_lno ||
300 			    vp->m_start.cno > cs.cs_cno)
301 				goto okret;
302 
303 			/*
304 			 * Well, if an empty line preceded possible blanks
305 			 * and the sentence, it could be a real sentence.
306 			 */
307 			for (;;) {
308 				if (cs_prev(sp, &cs))
309 					return (1);
310 				if (cs.cs_flags == CS_EOL)
311 					continue;
312 				if (cs.cs_flags == 0 && ISBLANK2(cs.cs_ch))
313 					continue;
314 				break;
315 			}
316 			if (cs.cs_flags == CS_EMP)
317 				goto okret;
318 
319 			/* But it wasn't; try again. */
320 			++cnt;
321 			cs.cs_lno = slno;
322 			cs.cs_cno = scno;
323 			last = 0;
324 			break;
325 		case '\t':
326 			last = 1;
327 			break;
328 		default:
329 			last =
330 			    cs.cs_flags == CS_EOL || ISBLANK2(cs.cs_ch) ||
331 			    cs.cs_ch == ')' || cs.cs_ch == ']' ||
332 			    cs.cs_ch == '"' || cs.cs_ch == '\'' ? 1 : 0;
333 		}
334 	}
335 
336 okret:	vp->m_stop.lno = cs.cs_lno;
337 	vp->m_stop.cno = cs.cs_cno;
338 
339 	/*
340 	 * !!!
341 	 * If the starting and stopping cursor positions are at the first
342 	 * columns in the line, i.e. the movement is cutting an entire line,
343 	 * the buffer is in line mode, and the starting position is the last
344 	 * character of the previous line.
345 	 *
346 	 * All commands move to the end of the range.  Adjust the start of
347 	 * the range for motion commands.
348 	 */
349 	if (ISMOTION(vp)) {
350 		if (vp->m_start.cno == 0 &&
351 		    (cs.cs_flags != 0 || vp->m_stop.cno == 0)) {
352 			if (db_get(sp,
353 			    --vp->m_start.lno, DBG_FATAL, NULL, &len))
354 				return (1);
355 			vp->m_start.cno = len ? len - 1 : 0;
356 			F_SET(vp, VM_LMODE);
357 		} else
358 			--vp->m_start.cno;
359 	}
360 	vp->m_final = vp->m_stop;
361 	return (0);
362 }
363