xref: /netbsd-src/external/bsd/less/dist/lesstest/wchar.c (revision e4a6e799a67c2028562d75b4e61407b22434aa36)
1*e4a6e799Ssimonb #include <unistd.h>
2*e4a6e799Ssimonb #include "lt_types.h"
3*e4a6e799Ssimonb 
4*e4a6e799Ssimonb // Return number of bytes in the UTF-8 sequence which begins with a given byte.
wchar_len(byte b)5*e4a6e799Ssimonb int wchar_len(byte b) {
6*e4a6e799Ssimonb 	if ((b & 0xE0) == 0xC0) return 2;
7*e4a6e799Ssimonb 	if ((b & 0xF0) == 0xE0) return 3;
8*e4a6e799Ssimonb 	if ((b & 0xF8) == 0xF0) return 4;
9*e4a6e799Ssimonb 	return 1;
10*e4a6e799Ssimonb }
11*e4a6e799Ssimonb 
store_wchar(byte ** p,wchar ch)12*e4a6e799Ssimonb void store_wchar(byte** p, wchar ch) {
13*e4a6e799Ssimonb 	if (ch < 0x80) {
14*e4a6e799Ssimonb 		*(*p)++ = (char) ch;
15*e4a6e799Ssimonb 	} else if (ch < 0x800) {
16*e4a6e799Ssimonb 		*(*p)++ = (byte) (0xC0 | ((ch >> 6) & 0x1F));
17*e4a6e799Ssimonb 		*(*p)++ = (byte) (0x80 | (ch & 0x3F));
18*e4a6e799Ssimonb 	} else if (ch < 0x10000) {
19*e4a6e799Ssimonb 		*(*p)++ = (byte) (0xE0 | ((ch >> 12) & 0x0F));
20*e4a6e799Ssimonb 		*(*p)++ = (byte) (0x80 | ((ch >> 6) & 0x3F));
21*e4a6e799Ssimonb 		*(*p)++ = (byte) (0x80 | (ch & 0x3F));
22*e4a6e799Ssimonb 	} else {
23*e4a6e799Ssimonb 		*(*p)++ = (byte) (0xF0 | ((ch >> 18) & 0x07));
24*e4a6e799Ssimonb 		*(*p)++ = (byte) (0x80 | ((ch >> 12) & 0x3F));
25*e4a6e799Ssimonb 		*(*p)++ = (byte) (0x80 | ((ch >> 6) & 0x3F));
26*e4a6e799Ssimonb 		*(*p)++ = (byte) (0x80 | (ch & 0x3F));
27*e4a6e799Ssimonb 	}
28*e4a6e799Ssimonb }
29*e4a6e799Ssimonb 
load_wchar(const byte ** p)30*e4a6e799Ssimonb wchar load_wchar(const byte** p) {
31*e4a6e799Ssimonb 	wchar ch;
32*e4a6e799Ssimonb 	switch (wchar_len(**p)) {
33*e4a6e799Ssimonb 	default:
34*e4a6e799Ssimonb 		ch = *(*p)++ & 0xFF;
35*e4a6e799Ssimonb 		break;
36*e4a6e799Ssimonb 	case 2:
37*e4a6e799Ssimonb 		ch = (*(*p)++ & 0x1F) << 6;
38*e4a6e799Ssimonb 		ch |= *(*p)++ & 0x3F;
39*e4a6e799Ssimonb 		break;
40*e4a6e799Ssimonb 	case 3:
41*e4a6e799Ssimonb 		ch = (*(*p)++ & 0x0F) << 12;
42*e4a6e799Ssimonb 		ch |= (*(*p)++ & 0x3F) << 6;
43*e4a6e799Ssimonb 		ch |= (*(*p)++ & 0x3F);
44*e4a6e799Ssimonb 		break;
45*e4a6e799Ssimonb 	case 4:
46*e4a6e799Ssimonb 		ch = (*(*p)++ & 0x07) << 18;
47*e4a6e799Ssimonb 		ch |= (*(*p)++ & 0x3F) << 12;
48*e4a6e799Ssimonb 		ch |= (*(*p)++ & 0x3F) << 6;
49*e4a6e799Ssimonb 		ch |= (*(*p)++ & 0x3F);
50*e4a6e799Ssimonb 		break;
51*e4a6e799Ssimonb 	}
52*e4a6e799Ssimonb 	return ch;
53*e4a6e799Ssimonb }
54*e4a6e799Ssimonb 
read_wchar(int fd)55*e4a6e799Ssimonb wchar read_wchar(int fd) {
56*e4a6e799Ssimonb 	byte cbuf[UNICODE_MAX_BYTES];
57*e4a6e799Ssimonb 	int n = read(fd, &cbuf[0], 1);
58*e4a6e799Ssimonb 	if (n <= 0)
59*e4a6e799Ssimonb 		return 0;
60*e4a6e799Ssimonb 	int len = wchar_len(cbuf[0]);
61*e4a6e799Ssimonb 	int i;
62*e4a6e799Ssimonb 	for (i = 1; i < len; ++i) {
63*e4a6e799Ssimonb 		int n = read(fd, &cbuf[i], 1);
64*e4a6e799Ssimonb 		if (n != 1) return 0;
65*e4a6e799Ssimonb 	}
66*e4a6e799Ssimonb 	const byte* cp = cbuf;
67*e4a6e799Ssimonb 	wchar ch = load_wchar(&cp);
68*e4a6e799Ssimonb 	// assert(cp-cbuf == len);
69*e4a6e799Ssimonb 	return ch;
70*e4a6e799Ssimonb }
71