1*15b117eaSkettenis /* mbutil.c -- readline multibyte character utility functions */
2*15b117eaSkettenis
3*15b117eaSkettenis /* Copyright (C) 2001 Free Software Foundation, Inc.
4*15b117eaSkettenis
5*15b117eaSkettenis This file is part of the GNU Readline Library, a library for
6*15b117eaSkettenis reading lines of text with interactive input and history editing.
7*15b117eaSkettenis
8*15b117eaSkettenis The GNU Readline Library is free software; you can redistribute it
9*15b117eaSkettenis and/or modify it under the terms of the GNU General Public License
10*15b117eaSkettenis as published by the Free Software Foundation; either version 2, or
11*15b117eaSkettenis (at your option) any later version.
12*15b117eaSkettenis
13*15b117eaSkettenis The GNU Readline Library is distributed in the hope that it will be
14*15b117eaSkettenis useful, but WITHOUT ANY WARRANTY; without even the implied warranty
15*15b117eaSkettenis of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16*15b117eaSkettenis GNU General Public License for more details.
17*15b117eaSkettenis
18*15b117eaSkettenis The GNU General Public License is often shipped with GNU software, and
19*15b117eaSkettenis is generally kept in a file called COPYING or LICENSE. If you do not
20*15b117eaSkettenis have a copy of the license, write to the Free Software Foundation,
21*15b117eaSkettenis 59 Temple Place, Suite 330, Boston, MA 02111 USA. */
22*15b117eaSkettenis #define READLINE_LIBRARY
23*15b117eaSkettenis
24*15b117eaSkettenis #if defined (HAVE_CONFIG_H)
25*15b117eaSkettenis # include <config.h>
26*15b117eaSkettenis #endif
27*15b117eaSkettenis
28*15b117eaSkettenis #include <sys/types.h>
29*15b117eaSkettenis #include <fcntl.h>
30*15b117eaSkettenis #include "posixjmp.h"
31*15b117eaSkettenis
32*15b117eaSkettenis #if defined (HAVE_UNISTD_H)
33*15b117eaSkettenis # include <unistd.h> /* for _POSIX_VERSION */
34*15b117eaSkettenis #endif /* HAVE_UNISTD_H */
35*15b117eaSkettenis
36*15b117eaSkettenis #if defined (HAVE_STDLIB_H)
37*15b117eaSkettenis # include <stdlib.h>
38*15b117eaSkettenis #else
39*15b117eaSkettenis # include "ansi_stdlib.h"
40*15b117eaSkettenis #endif /* HAVE_STDLIB_H */
41*15b117eaSkettenis
42*15b117eaSkettenis #include <stdio.h>
43*15b117eaSkettenis #include <ctype.h>
44*15b117eaSkettenis
45*15b117eaSkettenis /* System-specific feature definitions and include files. */
46*15b117eaSkettenis #include "rldefs.h"
47*15b117eaSkettenis #include "rlmbutil.h"
48*15b117eaSkettenis
49*15b117eaSkettenis #if defined (TIOCSTAT_IN_SYS_IOCTL)
50*15b117eaSkettenis # include <sys/ioctl.h>
51*15b117eaSkettenis #endif /* TIOCSTAT_IN_SYS_IOCTL */
52*15b117eaSkettenis
53*15b117eaSkettenis /* Some standard library routines. */
54*15b117eaSkettenis #include "readline.h"
55*15b117eaSkettenis
56*15b117eaSkettenis #include "rlprivate.h"
57*15b117eaSkettenis #include "xmalloc.h"
58*15b117eaSkettenis
59*15b117eaSkettenis /* Declared here so it can be shared between the readline and history
60*15b117eaSkettenis libraries. */
61*15b117eaSkettenis #if defined (HANDLE_MULTIBYTE)
62*15b117eaSkettenis int rl_byte_oriented = 0;
63*15b117eaSkettenis #else
64*15b117eaSkettenis int rl_byte_oriented = 1;
65*15b117eaSkettenis #endif
66*15b117eaSkettenis
67*15b117eaSkettenis /* **************************************************************** */
68*15b117eaSkettenis /* */
69*15b117eaSkettenis /* Multibyte Character Utility Functions */
70*15b117eaSkettenis /* */
71*15b117eaSkettenis /* **************************************************************** */
72*15b117eaSkettenis
73*15b117eaSkettenis #if defined(HANDLE_MULTIBYTE)
74*15b117eaSkettenis
75*15b117eaSkettenis static int
_rl_find_next_mbchar_internal(string,seed,count,find_non_zero)76*15b117eaSkettenis _rl_find_next_mbchar_internal (string, seed, count, find_non_zero)
77*15b117eaSkettenis char *string;
78*15b117eaSkettenis int seed, count, find_non_zero;
79*15b117eaSkettenis {
80*15b117eaSkettenis size_t tmp = 0;
81*15b117eaSkettenis mbstate_t ps;
82*15b117eaSkettenis int point = 0;
83*15b117eaSkettenis wchar_t wc;
84*15b117eaSkettenis
85*15b117eaSkettenis memset(&ps, 0, sizeof (mbstate_t));
86*15b117eaSkettenis if (seed < 0)
87*15b117eaSkettenis seed = 0;
88*15b117eaSkettenis if (count <= 0)
89*15b117eaSkettenis return seed;
90*15b117eaSkettenis
91*15b117eaSkettenis point = seed + _rl_adjust_point(string, seed, &ps);
92*15b117eaSkettenis /* if this is true, means that seed was not pointed character
93*15b117eaSkettenis started byte. So correct the point and consume count */
94*15b117eaSkettenis if (seed < point)
95*15b117eaSkettenis count --;
96*15b117eaSkettenis
97*15b117eaSkettenis while (count > 0)
98*15b117eaSkettenis {
99*15b117eaSkettenis tmp = mbrtowc (&wc, string+point, strlen(string + point), &ps);
100*15b117eaSkettenis if ((size_t)(tmp) == (size_t)-1 || (size_t)(tmp) == (size_t)-2)
101*15b117eaSkettenis {
102*15b117eaSkettenis /* invalid bytes. asume a byte represents a character */
103*15b117eaSkettenis point++;
104*15b117eaSkettenis count--;
105*15b117eaSkettenis /* reset states. */
106*15b117eaSkettenis memset(&ps, 0, sizeof(mbstate_t));
107*15b117eaSkettenis }
108*15b117eaSkettenis else if (tmp == (size_t)0)
109*15b117eaSkettenis /* found '\0' char */
110*15b117eaSkettenis break;
111*15b117eaSkettenis else
112*15b117eaSkettenis {
113*15b117eaSkettenis /* valid bytes */
114*15b117eaSkettenis point += tmp;
115*15b117eaSkettenis if (find_non_zero)
116*15b117eaSkettenis {
117*15b117eaSkettenis if (wcwidth (wc) == 0)
118*15b117eaSkettenis continue;
119*15b117eaSkettenis else
120*15b117eaSkettenis count--;
121*15b117eaSkettenis }
122*15b117eaSkettenis else
123*15b117eaSkettenis count--;
124*15b117eaSkettenis }
125*15b117eaSkettenis }
126*15b117eaSkettenis
127*15b117eaSkettenis if (find_non_zero)
128*15b117eaSkettenis {
129*15b117eaSkettenis tmp = mbrtowc (&wc, string + point, strlen (string + point), &ps);
130*15b117eaSkettenis while (wcwidth (wc) == 0)
131*15b117eaSkettenis {
132*15b117eaSkettenis point += tmp;
133*15b117eaSkettenis tmp = mbrtowc (&wc, string + point, strlen (string + point), &ps);
134*15b117eaSkettenis if (tmp == (size_t)(0) || tmp == (size_t)(-1) || tmp == (size_t)(-2))
135*15b117eaSkettenis break;
136*15b117eaSkettenis }
137*15b117eaSkettenis }
138*15b117eaSkettenis return point;
139*15b117eaSkettenis }
140*15b117eaSkettenis
141*15b117eaSkettenis static int
_rl_find_prev_mbchar_internal(string,seed,find_non_zero)142*15b117eaSkettenis _rl_find_prev_mbchar_internal (string, seed, find_non_zero)
143*15b117eaSkettenis char *string;
144*15b117eaSkettenis int seed, find_non_zero;
145*15b117eaSkettenis {
146*15b117eaSkettenis mbstate_t ps;
147*15b117eaSkettenis int prev, non_zero_prev, point, length;
148*15b117eaSkettenis size_t tmp;
149*15b117eaSkettenis wchar_t wc;
150*15b117eaSkettenis
151*15b117eaSkettenis memset(&ps, 0, sizeof(mbstate_t));
152*15b117eaSkettenis length = strlen(string);
153*15b117eaSkettenis
154*15b117eaSkettenis if (seed < 0)
155*15b117eaSkettenis return 0;
156*15b117eaSkettenis else if (length < seed)
157*15b117eaSkettenis return length;
158*15b117eaSkettenis
159*15b117eaSkettenis prev = non_zero_prev = point = 0;
160*15b117eaSkettenis while (point < seed)
161*15b117eaSkettenis {
162*15b117eaSkettenis tmp = mbrtowc (&wc, string + point, length - point, &ps);
163*15b117eaSkettenis if ((size_t)(tmp) == (size_t)-1 || (size_t)(tmp) == (size_t)-2)
164*15b117eaSkettenis {
165*15b117eaSkettenis /* in this case, bytes are invalid or shorted to compose
166*15b117eaSkettenis multibyte char, so assume that the first byte represents
167*15b117eaSkettenis a single character anyway. */
168*15b117eaSkettenis tmp = 1;
169*15b117eaSkettenis /* clear the state of the byte sequence, because
170*15b117eaSkettenis in this case effect of mbstate is undefined */
171*15b117eaSkettenis memset(&ps, 0, sizeof (mbstate_t));
172*15b117eaSkettenis }
173*15b117eaSkettenis else if (tmp == 0)
174*15b117eaSkettenis break; /* Found '\0' char. Can this happen? */
175*15b117eaSkettenis else
176*15b117eaSkettenis {
177*15b117eaSkettenis if (find_non_zero)
178*15b117eaSkettenis {
179*15b117eaSkettenis if (wcwidth (wc) != 0)
180*15b117eaSkettenis prev = point;
181*15b117eaSkettenis }
182*15b117eaSkettenis else
183*15b117eaSkettenis prev = point;
184*15b117eaSkettenis }
185*15b117eaSkettenis
186*15b117eaSkettenis point += tmp;
187*15b117eaSkettenis }
188*15b117eaSkettenis
189*15b117eaSkettenis return prev;
190*15b117eaSkettenis }
191*15b117eaSkettenis
192*15b117eaSkettenis /* return the number of bytes parsed from the multibyte sequence starting
193*15b117eaSkettenis at src, if a non-L'\0' wide character was recognized. It returns 0,
194*15b117eaSkettenis if a L'\0' wide character was recognized. It returns (size_t)(-1),
195*15b117eaSkettenis if an invalid multibyte sequence was encountered. It returns (size_t)(-2)
196*15b117eaSkettenis if it couldn't parse a complete multibyte character. */
197*15b117eaSkettenis int
_rl_get_char_len(src,ps)198*15b117eaSkettenis _rl_get_char_len (src, ps)
199*15b117eaSkettenis char *src;
200*15b117eaSkettenis mbstate_t *ps;
201*15b117eaSkettenis {
202*15b117eaSkettenis size_t tmp;
203*15b117eaSkettenis
204*15b117eaSkettenis tmp = mbrlen((const char *)src, (size_t)strlen (src), ps);
205*15b117eaSkettenis if (tmp == (size_t)(-2))
206*15b117eaSkettenis {
207*15b117eaSkettenis /* shorted to compose multibyte char */
208*15b117eaSkettenis if (ps)
209*15b117eaSkettenis memset (ps, 0, sizeof(mbstate_t));
210*15b117eaSkettenis return -2;
211*15b117eaSkettenis }
212*15b117eaSkettenis else if (tmp == (size_t)(-1))
213*15b117eaSkettenis {
214*15b117eaSkettenis /* invalid to compose multibyte char */
215*15b117eaSkettenis /* initialize the conversion state */
216*15b117eaSkettenis if (ps)
217*15b117eaSkettenis memset (ps, 0, sizeof(mbstate_t));
218*15b117eaSkettenis return -1;
219*15b117eaSkettenis }
220*15b117eaSkettenis else if (tmp == (size_t)0)
221*15b117eaSkettenis return 0;
222*15b117eaSkettenis else
223*15b117eaSkettenis return (int)tmp;
224*15b117eaSkettenis }
225*15b117eaSkettenis
226*15b117eaSkettenis /* compare the specified two characters. If the characters matched,
227*15b117eaSkettenis return 1. Otherwise return 0. */
228*15b117eaSkettenis int
_rl_compare_chars(buf1,pos1,ps1,buf2,pos2,ps2)229*15b117eaSkettenis _rl_compare_chars (buf1, pos1, ps1, buf2, pos2, ps2)
230*15b117eaSkettenis char *buf1;
231*15b117eaSkettenis int pos1;
232*15b117eaSkettenis mbstate_t *ps1;
233*15b117eaSkettenis char *buf2;
234*15b117eaSkettenis int pos2;
235*15b117eaSkettenis mbstate_t *ps2;
236*15b117eaSkettenis {
237*15b117eaSkettenis int i, w1, w2;
238*15b117eaSkettenis
239*15b117eaSkettenis if ((w1 = _rl_get_char_len (&buf1[pos1], ps1)) <= 0 ||
240*15b117eaSkettenis (w2 = _rl_get_char_len (&buf2[pos2], ps2)) <= 0 ||
241*15b117eaSkettenis (w1 != w2) ||
242*15b117eaSkettenis (buf1[pos1] != buf2[pos2]))
243*15b117eaSkettenis return 0;
244*15b117eaSkettenis
245*15b117eaSkettenis for (i = 1; i < w1; i++)
246*15b117eaSkettenis if (buf1[pos1+i] != buf2[pos2+i])
247*15b117eaSkettenis return 0;
248*15b117eaSkettenis
249*15b117eaSkettenis return 1;
250*15b117eaSkettenis }
251*15b117eaSkettenis
252*15b117eaSkettenis /* adjust pointed byte and find mbstate of the point of string.
253*15b117eaSkettenis adjusted point will be point <= adjusted_point, and returns
254*15b117eaSkettenis differences of the byte(adjusted_point - point).
255*15b117eaSkettenis if point is invalied (point < 0 || more than string length),
256*15b117eaSkettenis it returns -1 */
257*15b117eaSkettenis int
_rl_adjust_point(string,point,ps)258*15b117eaSkettenis _rl_adjust_point(string, point, ps)
259*15b117eaSkettenis char *string;
260*15b117eaSkettenis int point;
261*15b117eaSkettenis mbstate_t *ps;
262*15b117eaSkettenis {
263*15b117eaSkettenis size_t tmp = 0;
264*15b117eaSkettenis int length;
265*15b117eaSkettenis int pos = 0;
266*15b117eaSkettenis
267*15b117eaSkettenis length = strlen(string);
268*15b117eaSkettenis if (point < 0)
269*15b117eaSkettenis return -1;
270*15b117eaSkettenis if (length < point)
271*15b117eaSkettenis return -1;
272*15b117eaSkettenis
273*15b117eaSkettenis while (pos < point)
274*15b117eaSkettenis {
275*15b117eaSkettenis tmp = mbrlen (string + pos, length - pos, ps);
276*15b117eaSkettenis if((size_t)(tmp) == (size_t)-1 || (size_t)(tmp) == (size_t)-2)
277*15b117eaSkettenis {
278*15b117eaSkettenis /* in this case, bytes are invalid or shorted to compose
279*15b117eaSkettenis multibyte char, so assume that the first byte represents
280*15b117eaSkettenis a single character anyway. */
281*15b117eaSkettenis pos++;
282*15b117eaSkettenis /* clear the state of the byte sequence, because
283*15b117eaSkettenis in this case effect of mbstate is undefined */
284*15b117eaSkettenis if (ps)
285*15b117eaSkettenis memset (ps, 0, sizeof (mbstate_t));
286*15b117eaSkettenis }
287*15b117eaSkettenis else if (tmp == 0)
288*15b117eaSkettenis pos++;
289*15b117eaSkettenis else
290*15b117eaSkettenis pos += tmp;
291*15b117eaSkettenis }
292*15b117eaSkettenis
293*15b117eaSkettenis return (pos - point);
294*15b117eaSkettenis }
295*15b117eaSkettenis
296*15b117eaSkettenis int
_rl_is_mbchar_matched(string,seed,end,mbchar,length)297*15b117eaSkettenis _rl_is_mbchar_matched (string, seed, end, mbchar, length)
298*15b117eaSkettenis char *string;
299*15b117eaSkettenis int seed, end;
300*15b117eaSkettenis char *mbchar;
301*15b117eaSkettenis int length;
302*15b117eaSkettenis {
303*15b117eaSkettenis int i;
304*15b117eaSkettenis
305*15b117eaSkettenis if ((end - seed) < length)
306*15b117eaSkettenis return 0;
307*15b117eaSkettenis
308*15b117eaSkettenis for (i = 0; i < length; i++)
309*15b117eaSkettenis if (string[seed + i] != mbchar[i])
310*15b117eaSkettenis return 0;
311*15b117eaSkettenis return 1;
312*15b117eaSkettenis }
313*15b117eaSkettenis #endif /* HANDLE_MULTIBYTE */
314*15b117eaSkettenis
315*15b117eaSkettenis /* Find next `count' characters started byte point of the specified seed.
316*15b117eaSkettenis If flags is MB_FIND_NONZERO, we look for non-zero-width multibyte
317*15b117eaSkettenis characters. */
318*15b117eaSkettenis #undef _rl_find_next_mbchar
319*15b117eaSkettenis int
_rl_find_next_mbchar(string,seed,count,flags)320*15b117eaSkettenis _rl_find_next_mbchar (string, seed, count, flags)
321*15b117eaSkettenis char *string;
322*15b117eaSkettenis int seed, count, flags;
323*15b117eaSkettenis {
324*15b117eaSkettenis #if defined (HANDLE_MULTIBYTE)
325*15b117eaSkettenis return _rl_find_next_mbchar_internal (string, seed, count, flags);
326*15b117eaSkettenis #else
327*15b117eaSkettenis return (seed + count);
328*15b117eaSkettenis #endif
329*15b117eaSkettenis }
330*15b117eaSkettenis
331*15b117eaSkettenis /* Find previous character started byte point of the specified seed.
332*15b117eaSkettenis Returned point will be point <= seed. If flags is MB_FIND_NONZERO,
333*15b117eaSkettenis we look for non-zero-width multibyte characters. */
334*15b117eaSkettenis #undef _rl_find_prev_mbchar
335*15b117eaSkettenis int
_rl_find_prev_mbchar(string,seed,flags)336*15b117eaSkettenis _rl_find_prev_mbchar (string, seed, flags)
337*15b117eaSkettenis char *string;
338*15b117eaSkettenis int seed, flags;
339*15b117eaSkettenis {
340*15b117eaSkettenis #if defined (HANDLE_MULTIBYTE)
341*15b117eaSkettenis return _rl_find_prev_mbchar_internal (string, seed, flags);
342*15b117eaSkettenis #else
343*15b117eaSkettenis return ((seed == 0) ? seed : seed - 1);
344*15b117eaSkettenis #endif
345*15b117eaSkettenis }
346