xref: /openbsd-src/usr.bin/less/cvt.c (revision fcc1e361256302d3f352797a79f4b5028195457b)
1 /*
2  * Copyright (C) 1984-2012  Mark Nudelman
3  * Modified for use with illumos by Garrett D'Amore.
4  * Copyright 2014 Garrett D'Amore <garrett@damore.org>
5  *
6  * You may distribute under the terms of either the GNU General Public
7  * License or the Less License, as specified in the README file.
8  *
9  * For more information, see the README file.
10  */
11 
12 /*
13  * Routines to convert text in various ways.  Used by search.
14  */
15 
16 #include "charset.h"
17 #include "less.h"
18 
19 extern int utf_mode;
20 
21 /*
22  * Get the length of a buffer needed to convert a string.
23  */
24 int
cvt_length(int len)25 cvt_length(int len)
26 {
27 	if (utf_mode)
28 		/*
29 		 * Just copying a string in UTF-8 mode can cause it to grow
30 		 * in length.
31 		 * Four output bytes for one input byte is the worst case.
32 		 */
33 		len *= 4;
34 	return (len + 1);
35 }
36 
37 /*
38  * Allocate a chpos array for use by cvt_text.
39  */
40 int *
cvt_alloc_chpos(int len)41 cvt_alloc_chpos(int len)
42 {
43 	int i;
44 	int *chpos = ecalloc(sizeof (int), len);
45 	/* Initialize all entries to an invalid position. */
46 	for (i = 0; i < len; i++)
47 		chpos[i] = -1;
48 	return (chpos);
49 }
50 
51 /*
52  * Convert text.  Perform the transformations specified by ops.
53  * Returns converted text in odst.  The original offset of each
54  * odst character (when it was in osrc) is returned in the chpos array.
55  */
56 void
cvt_text(char * odst,char * osrc,int * chpos,int * lenp,int ops)57 cvt_text(char *odst, char *osrc, int *chpos, int *lenp, int ops)
58 {
59 	char *dst;
60 	char *edst = odst;
61 	char *src;
62 	char *src_end;
63 	wchar_t ch;
64 	int len;
65 
66 	if (lenp != NULL)
67 		src_end = osrc + *lenp;
68 	else
69 		src_end = osrc + strlen(osrc);
70 
71 	for (src = osrc, dst = odst; src < src_end; ) {
72 		int src_pos = src - osrc;
73 		int dst_pos = dst - odst;
74 		if ((len = mbtowc(&ch, src, src_end - src)) < 1)
75 			ch = L'\0';
76 		if ((ops & CVT_BS) && ch == '\b' && dst > odst) {
77 			src++;
78 			/* Delete backspace and preceding char. */
79 			do {
80 				dst--;
81 			} while (dst > odst && IS_UTF8_TRAIL(*dst));
82 		} else if ((ops & CVT_ANSI) && ch == ESC) {
83 			/* Skip to end of ANSI escape sequence. */
84 			src++;	/* skip the CSI start char */
85 			while (src < src_end)
86 				if (!is_ansi_middle(*src++))
87 					break;
88 		} else if (len < 1) {
89 			*dst++ = *src++;
90 			if (chpos != NULL)
91 				chpos[dst_pos] = src_pos;
92 		} else {
93 			src += len;
94 			/* Just copy the char to the destination buffer. */
95 			if ((ops & CVT_TO_LC) && iswupper(ch))
96 				ch = towlower(ch);
97 			dst += wctomb(dst, ch);
98 			/* Record the original position of the char. */
99 			if (chpos != NULL)
100 				chpos[dst_pos] = src_pos;
101 		}
102 		if (dst > edst)
103 			edst = dst;
104 	}
105 	if ((ops & CVT_CRLF) && edst > odst && edst[-1] == '\r')
106 		edst--;
107 	*edst = '\0';
108 	if (lenp != NULL)
109 		*lenp = edst - odst;
110 }
111