1a0ef22f9Sshadchin /*
226ad794dSshadchin * Copyright (C) 1984-2012 Mark Nudelman
3b8c1323eSnicm * Modified for use with illumos by Garrett D'Amore.
4b8c1323eSnicm * Copyright 2014 Garrett D'Amore <garrett@damore.org>
5a0ef22f9Sshadchin *
6a0ef22f9Sshadchin * You may distribute under the terms of either the GNU General Public
7a0ef22f9Sshadchin * License or the Less License, as specified in the README file.
8a0ef22f9Sshadchin *
926ad794dSshadchin * For more information, see the README file.
10a0ef22f9Sshadchin */
11a0ef22f9Sshadchin
12a0ef22f9Sshadchin /*
13a0ef22f9Sshadchin * Routines to convert text in various ways. Used by search.
14a0ef22f9Sshadchin */
15a0ef22f9Sshadchin
16a0ef22f9Sshadchin #include "charset.h"
1727b3ade3Smmcc #include "less.h"
18a0ef22f9Sshadchin
19a0ef22f9Sshadchin extern int utf_mode;
20a0ef22f9Sshadchin
21a0ef22f9Sshadchin /*
22a0ef22f9Sshadchin * Get the length of a buffer needed to convert a string.
23a0ef22f9Sshadchin */
24171bb95eSnicm int
cvt_length(int len)25171bb95eSnicm cvt_length(int len)
26a0ef22f9Sshadchin {
27a0ef22f9Sshadchin if (utf_mode)
28a0ef22f9Sshadchin /*
29a0ef22f9Sshadchin * Just copying a string in UTF-8 mode can cause it to grow
30a0ef22f9Sshadchin * in length.
31a0ef22f9Sshadchin * Four output bytes for one input byte is the worst case.
32a0ef22f9Sshadchin */
33a0ef22f9Sshadchin len *= 4;
34a0ef22f9Sshadchin return (len + 1);
35a0ef22f9Sshadchin }
36a0ef22f9Sshadchin
37a0ef22f9Sshadchin /*
38a0ef22f9Sshadchin * Allocate a chpos array for use by cvt_text.
39a0ef22f9Sshadchin */
40171bb95eSnicm int *
cvt_alloc_chpos(int len)41171bb95eSnicm cvt_alloc_chpos(int len)
42a0ef22f9Sshadchin {
43a0ef22f9Sshadchin int i;
44171bb95eSnicm int *chpos = ecalloc(sizeof (int), len);
45a0ef22f9Sshadchin /* Initialize all entries to an invalid position. */
46a0ef22f9Sshadchin for (i = 0; i < len; i++)
47a0ef22f9Sshadchin chpos[i] = -1;
48a0ef22f9Sshadchin return (chpos);
49a0ef22f9Sshadchin }
50a0ef22f9Sshadchin
51a0ef22f9Sshadchin /*
52a0ef22f9Sshadchin * Convert text. Perform the transformations specified by ops.
53a0ef22f9Sshadchin * Returns converted text in odst. The original offset of each
54a0ef22f9Sshadchin * odst character (when it was in osrc) is returned in the chpos array.
55a0ef22f9Sshadchin */
56171bb95eSnicm void
cvt_text(char * odst,char * osrc,int * chpos,int * lenp,int ops)57171bb95eSnicm cvt_text(char *odst, char *osrc, int *chpos, int *lenp, int ops)
58a0ef22f9Sshadchin {
59a0ef22f9Sshadchin char *dst;
6026ad794dSshadchin char *edst = odst;
61a0ef22f9Sshadchin char *src;
62171bb95eSnicm char *src_end;
63358ad9abSschwarze wchar_t ch;
64358ad9abSschwarze int len;
65a0ef22f9Sshadchin
66a0ef22f9Sshadchin if (lenp != NULL)
67a0ef22f9Sshadchin src_end = osrc + *lenp;
68a0ef22f9Sshadchin else
69a0ef22f9Sshadchin src_end = osrc + strlen(osrc);
70a0ef22f9Sshadchin
71171bb95eSnicm for (src = osrc, dst = odst; src < src_end; ) {
72a0ef22f9Sshadchin int src_pos = src - osrc;
73a0ef22f9Sshadchin int dst_pos = dst - odst;
74358ad9abSschwarze if ((len = mbtowc(&ch, src, src_end - src)) < 1)
75358ad9abSschwarze ch = L'\0';
76171bb95eSnicm if ((ops & CVT_BS) && ch == '\b' && dst > odst) {
77358ad9abSschwarze src++;
78a0ef22f9Sshadchin /* Delete backspace and preceding char. */
79a0ef22f9Sshadchin do {
80a0ef22f9Sshadchin dst--;
81*fcc1e361Sschwarze } while (dst > odst && IS_UTF8_TRAIL(*dst));
82b5cf3e7cSschwarze } else if ((ops & CVT_ANSI) && ch == ESC) {
83a0ef22f9Sshadchin /* Skip to end of ANSI escape sequence. */
84a0ef22f9Sshadchin src++; /* skip the CSI start char */
85a0ef22f9Sshadchin while (src < src_end)
86a0ef22f9Sshadchin if (!is_ansi_middle(*src++))
87a0ef22f9Sshadchin break;
88358ad9abSschwarze } else if (len < 1) {
89358ad9abSschwarze *dst++ = *src++;
90358ad9abSschwarze if (chpos != NULL)
91358ad9abSschwarze chpos[dst_pos] = src_pos;
92171bb95eSnicm } else {
93358ad9abSschwarze src += len;
94a0ef22f9Sshadchin /* Just copy the char to the destination buffer. */
9573e28e7cSmmcc if ((ops & CVT_TO_LC) && iswupper(ch))
9673e28e7cSmmcc ch = towlower(ch);
97358ad9abSschwarze dst += wctomb(dst, ch);
9826ad794dSshadchin /* Record the original position of the char. */
9926ad794dSshadchin if (chpos != NULL)
100a0ef22f9Sshadchin chpos[dst_pos] = src_pos;
101a0ef22f9Sshadchin }
10226ad794dSshadchin if (dst > edst)
10326ad794dSshadchin edst = dst;
104a0ef22f9Sshadchin }
10526ad794dSshadchin if ((ops & CVT_CRLF) && edst > odst && edst[-1] == '\r')
10626ad794dSshadchin edst--;
10726ad794dSshadchin *edst = '\0';
108a0ef22f9Sshadchin if (lenp != NULL)
10926ad794dSshadchin *lenp = edst - odst;
110a0ef22f9Sshadchin }
111