xref: /minix3/external/bsd/less/dist/cvt.c (revision 84d9c625bfea59e274550651111ae9edfdc40fbd)
1*84d9c625SLionel Sambuc /*	$NetBSD: cvt.c,v 1.3 2013/09/04 19:44:21 tron Exp $	*/
2f7cf2976SLionel Sambuc 
3f7cf2976SLionel Sambuc /*
4*84d9c625SLionel Sambuc  * Copyright (C) 1984-2012  Mark Nudelman
5f7cf2976SLionel Sambuc  *
6f7cf2976SLionel Sambuc  * You may distribute under the terms of either the GNU General Public
7f7cf2976SLionel Sambuc  * License or the Less License, as specified in the README file.
8f7cf2976SLionel Sambuc  *
9*84d9c625SLionel Sambuc  * For more information, see the README file.
10f7cf2976SLionel Sambuc  */
11f7cf2976SLionel Sambuc 
12f7cf2976SLionel Sambuc /*
13f7cf2976SLionel Sambuc  * Routines to convert text in various ways.  Used by search.
14f7cf2976SLionel Sambuc  */
15f7cf2976SLionel Sambuc 
16f7cf2976SLionel Sambuc #include "less.h"
17f7cf2976SLionel Sambuc #include "charset.h"
18f7cf2976SLionel Sambuc 
19f7cf2976SLionel Sambuc extern int utf_mode;
20f7cf2976SLionel Sambuc 
21f7cf2976SLionel Sambuc /*
22f7cf2976SLionel Sambuc  * Get the length of a buffer needed to convert a string.
23f7cf2976SLionel Sambuc  */
24f7cf2976SLionel Sambuc 	public int
cvt_length(len,ops)25f7cf2976SLionel Sambuc cvt_length(len, ops)
26f7cf2976SLionel Sambuc 	int len;
27f7cf2976SLionel Sambuc 	int ops;
28f7cf2976SLionel Sambuc {
29f7cf2976SLionel Sambuc 	if (utf_mode)
30f7cf2976SLionel Sambuc 		/*
31f7cf2976SLionel Sambuc 		 * Just copying a string in UTF-8 mode can cause it to grow
32f7cf2976SLionel Sambuc 		 * in length.
33f7cf2976SLionel Sambuc 		 * Four output bytes for one input byte is the worst case.
34f7cf2976SLionel Sambuc 		 */
35f7cf2976SLionel Sambuc 		len *= 4;
36f7cf2976SLionel Sambuc 	return (len + 1);
37f7cf2976SLionel Sambuc }
38f7cf2976SLionel Sambuc 
39f7cf2976SLionel Sambuc /*
40f7cf2976SLionel Sambuc  * Allocate a chpos array for use by cvt_text.
41f7cf2976SLionel Sambuc  */
42f7cf2976SLionel Sambuc 	public int *
cvt_alloc_chpos(len)43f7cf2976SLionel Sambuc cvt_alloc_chpos(len)
44f7cf2976SLionel Sambuc 	int len;
45f7cf2976SLionel Sambuc {
46f7cf2976SLionel Sambuc 	int i;
47f7cf2976SLionel Sambuc 	int *chpos = (int *) ecalloc(sizeof(int), len);
48f7cf2976SLionel Sambuc 	/* Initialize all entries to an invalid position. */
49f7cf2976SLionel Sambuc 	for (i = 0;  i < len;  i++)
50f7cf2976SLionel Sambuc 		chpos[i] = -1;
51f7cf2976SLionel Sambuc 	return (chpos);
52f7cf2976SLionel Sambuc }
53f7cf2976SLionel Sambuc 
54f7cf2976SLionel Sambuc /*
55f7cf2976SLionel Sambuc  * Convert text.  Perform the transformations specified by ops.
56f7cf2976SLionel Sambuc  * Returns converted text in odst.  The original offset of each
57f7cf2976SLionel Sambuc  * odst character (when it was in osrc) is returned in the chpos array.
58f7cf2976SLionel Sambuc  */
59f7cf2976SLionel Sambuc 	public void
cvt_text(odst,osrc,chpos,lenp,ops)60f7cf2976SLionel Sambuc cvt_text(odst, osrc, chpos, lenp, ops)
61f7cf2976SLionel Sambuc 	char *odst;
62f7cf2976SLionel Sambuc 	char *osrc;
63f7cf2976SLionel Sambuc 	int *chpos;
64f7cf2976SLionel Sambuc 	int *lenp;
65f7cf2976SLionel Sambuc 	int ops;
66f7cf2976SLionel Sambuc {
67f7cf2976SLionel Sambuc 	char *dst;
68*84d9c625SLionel Sambuc 	char *edst = odst;
69f7cf2976SLionel Sambuc 	char *src;
70f7cf2976SLionel Sambuc 	register char *src_end;
71f7cf2976SLionel Sambuc 	LWCHAR ch;
72f7cf2976SLionel Sambuc 
73f7cf2976SLionel Sambuc 	if (lenp != NULL)
74f7cf2976SLionel Sambuc 		src_end = osrc + *lenp;
75f7cf2976SLionel Sambuc 	else
76f7cf2976SLionel Sambuc 		src_end = osrc + strlen(osrc);
77f7cf2976SLionel Sambuc 
78f7cf2976SLionel Sambuc 	for (src = osrc, dst = odst;  src < src_end;  )
79f7cf2976SLionel Sambuc 	{
80f7cf2976SLionel Sambuc 		int src_pos = src - osrc;
81f7cf2976SLionel Sambuc 		int dst_pos = dst - odst;
82f7cf2976SLionel Sambuc 		ch = step_char(&src, +1, src_end);
83f7cf2976SLionel Sambuc 		if ((ops & CVT_BS) && ch == '\b' && dst > odst)
84f7cf2976SLionel Sambuc 		{
85f7cf2976SLionel Sambuc 			/* Delete backspace and preceding char. */
86f7cf2976SLionel Sambuc 			do {
87f7cf2976SLionel Sambuc 				dst--;
88f7cf2976SLionel Sambuc 			} while (dst > odst &&
89f7cf2976SLionel Sambuc 				!IS_ASCII_OCTET(*dst) && !IS_UTF8_LEAD(*dst));
90f7cf2976SLionel Sambuc 		} else if ((ops & CVT_ANSI) && IS_CSI_START(ch))
91f7cf2976SLionel Sambuc 		{
92f7cf2976SLionel Sambuc 			/* Skip to end of ANSI escape sequence. */
93f7cf2976SLionel Sambuc 			src++;  /* skip the CSI start char */
94f7cf2976SLionel Sambuc 			while (src < src_end)
95f7cf2976SLionel Sambuc 				if (!is_ansi_middle(*src++))
96f7cf2976SLionel Sambuc 					break;
97f7cf2976SLionel Sambuc 		} else
98f7cf2976SLionel Sambuc 		{
99f7cf2976SLionel Sambuc 			/* Just copy the char to the destination buffer. */
100f7cf2976SLionel Sambuc 			if ((ops & CVT_TO_LC) && IS_UPPER(ch))
101f7cf2976SLionel Sambuc 				ch = TO_LOWER(ch);
102f7cf2976SLionel Sambuc 			put_wchar(&dst, ch);
103*84d9c625SLionel Sambuc 			/* Record the original position of the char. */
104*84d9c625SLionel Sambuc 			if (chpos != NULL)
105f7cf2976SLionel Sambuc 				chpos[dst_pos] = src_pos;
106f7cf2976SLionel Sambuc 		}
107*84d9c625SLionel Sambuc 		if (dst > edst)
108*84d9c625SLionel Sambuc 			edst = dst;
109f7cf2976SLionel Sambuc 	}
110*84d9c625SLionel Sambuc 	if ((ops & CVT_CRLF) && edst > odst && edst[-1] == '\r')
111*84d9c625SLionel Sambuc 		edst--;
112*84d9c625SLionel Sambuc 	*edst = '\0';
113f7cf2976SLionel Sambuc 	if (lenp != NULL)
114*84d9c625SLionel Sambuc 		*lenp = edst - odst;
115*84d9c625SLionel Sambuc 	/* FIXME: why was this here?  if (chpos != NULL) chpos[dst - odst] = src - osrc; */
116f7cf2976SLionel Sambuc }
117