xref: /openbsd-src/usr.bin/less/cvt.c (revision 897fc685943471cf985a0fe38ba076ea6fe74fa5)
1 /*
2  * Copyright (C) 1984-2012  Mark Nudelman
3  * Modified for use with illumos by Garrett D'Amore.
4  * Copyright 2014 Garrett D'Amore <garrett@damore.org>
5  *
6  * You may distribute under the terms of either the GNU General Public
7  * License or the Less License, as specified in the README file.
8  *
9  * For more information, see the README file.
10  */
11 
12 /*
13  * Routines to convert text in various ways.  Used by search.
14  */
15 
16 #include "charset.h"
17 #include "less.h"
18 
19 extern int utf_mode;
20 
21 /*
22  * Get the length of a buffer needed to convert a string.
23  */
24 int
25 cvt_length(int len)
26 {
27 	if (utf_mode)
28 		/*
29 		 * Just copying a string in UTF-8 mode can cause it to grow
30 		 * in length.
31 		 * Four output bytes for one input byte is the worst case.
32 		 */
33 		len *= 4;
34 	return (len + 1);
35 }
36 
37 /*
38  * Allocate a chpos array for use by cvt_text.
39  */
40 int *
41 cvt_alloc_chpos(int len)
42 {
43 	int i;
44 	int *chpos = ecalloc(sizeof (int), len);
45 	/* Initialize all entries to an invalid position. */
46 	for (i = 0; i < len; i++)
47 		chpos[i] = -1;
48 	return (chpos);
49 }
50 
51 /*
52  * Convert text.  Perform the transformations specified by ops.
53  * Returns converted text in odst.  The original offset of each
54  * odst character (when it was in osrc) is returned in the chpos array.
55  */
56 void
57 cvt_text(char *odst, char *osrc, int *chpos, int *lenp, int ops)
58 {
59 	char *dst;
60 	char *edst = odst;
61 	char *src;
62 	char *src_end;
63 	LWCHAR ch;
64 
65 	if (lenp != NULL)
66 		src_end = osrc + *lenp;
67 	else
68 		src_end = osrc + strlen(osrc);
69 
70 	for (src = osrc, dst = odst; src < src_end; ) {
71 		int src_pos = src - osrc;
72 		int dst_pos = dst - odst;
73 		ch = step_char(&src, +1, src_end);
74 		if ((ops & CVT_BS) && ch == '\b' && dst > odst) {
75 			/* Delete backspace and preceding char. */
76 			do {
77 				dst--;
78 			} while (dst > odst &&
79 			    !IS_ASCII_OCTET(*dst) && !IS_UTF8_LEAD(*dst));
80 		} else if ((ops & CVT_ANSI) && IS_CSI_START(ch)) {
81 			/* Skip to end of ANSI escape sequence. */
82 			src++;	/* skip the CSI start char */
83 			while (src < src_end)
84 				if (!is_ansi_middle(*src++))
85 					break;
86 		} else {
87 			/* Just copy the char to the destination buffer. */
88 			if ((ops & CVT_TO_LC) && iswupper(ch))
89 				ch = towlower(ch);
90 			put_wchar(&dst, ch);
91 			/* Record the original position of the char. */
92 			if (chpos != NULL)
93 				chpos[dst_pos] = src_pos;
94 		}
95 		if (dst > edst)
96 			edst = dst;
97 	}
98 	if ((ops & CVT_CRLF) && edst > odst && edst[-1] == '\r')
99 		edst--;
100 	*edst = '\0';
101 	if (lenp != NULL)
102 		*lenp = edst - odst;
103 }
104