xref: /plan9/sys/src/cmd/tcs/conv_big5.c (revision ec46fab06dcae3e636b775c4eaa679036316e1d8)
1 #ifdef	PLAN9
2 #include	<u.h>
3 #include	<libc.h>
4 #include	<bio.h>
5 #else
6 #include	<stdio.h>
7 #include	<unistd.h>
8 #include	"plan9.h"
9 #endif
10 #include	"hdr.h"
11 #include	"conv.h"
12 #include	"big5.h"
13 
14 /*
15 	a state machine for interpreting big5 (hk format).
16 */
17 void
big5proc(int c,Rune ** r,long input_loc)18 big5proc(int c, Rune **r, long input_loc)
19 {
20 	static enum { state0, state1 } state = state0;
21 	static int lastc;
22 	long n, ch, f, cold = c;
23 
24 	switch(state)
25 	{
26 	case state0:	/* idle state */
27 		if(c < 0)
28 			return;
29 		if(c >= 0xA1){
30 			lastc = c;
31 			state = state1;
32 			return;
33 		}
34 		if(c == 26)
35 			c = '\n';
36 		emit(c);
37 		return;
38 
39 	case state1:	/* seen a font spec */
40 		if(c >= 64 && c <= 126)
41 			c -= 64;
42 		else if(c >= 161 && c <= 254)
43 			c = c-161 + 63;
44 		else {
45 			nerrors++;
46 			if(squawk)
47 				EPR "%s: bad big5 glyph (from 0x%x,0x%lx) near byte %ld in %s\n",
48 					argv0, lastc, cold, input_loc, file);
49 			if(!clean)
50 				emit(BADMAP);
51 			state = state0;
52 			return;
53 		}
54 		if(lastc >= 161 && lastc <= 254)
55 			f = lastc - 161;
56 		else {
57 			nerrors++;
58 			if(squawk)
59 				EPR "%s: bad big5 font %d (from 0x%x,0x%lx) near byte %ld in %s\n",
60 					argv0, lastc-161, lastc, cold, input_loc, file);
61 			if(!clean)
62 				emit(BADMAP);
63 			state = state0;
64 			return;
65 		}
66 		n = f*BIG5FONT + c;
67 		if(n < BIG5MAX)
68 			ch = tabbig5[n];
69 		else
70 			ch = -1;
71 		if(ch < 0){
72 			nerrors++;
73 			if(squawk)
74 				EPR "%s: unknown big5 %ld (from 0x%x,0x%lx) near byte %ld in %s\n",
75 					argv0, n, lastc, cold, input_loc, file);
76 			if(!clean)
77 				emit(BADMAP);
78 		} else
79 			emit(ch);
80 		state = state0;
81 	}
82 }
83 
84 void
big5_in(int fd,long * notused,struct convert * out)85 big5_in(int fd, long *notused, struct convert *out)
86 {
87 	Rune ob[N];
88 	Rune *r, *re;
89 	uchar ibuf[N];
90 	int n, i;
91 	long nin;
92 
93 	USED(notused);
94 	r = ob;
95 	re = ob+N-3;
96 	nin = 0;
97 	while((n = read(fd, ibuf, sizeof ibuf)) > 0){
98 		for(i = 0; i < n; i++){
99 			big5proc(ibuf[i], &r, nin++);
100 			if(r >= re){
101 				OUT(out, ob, r-ob);
102 				r = ob;
103 			}
104 		}
105 		if(r > ob){
106 			OUT(out, ob, r-ob);
107 			r = ob;
108 		}
109 	}
110 	big5proc(-1, &r, nin);
111 	if(r > ob)
112 		OUT(out, ob, r-ob);
113 	OUT(out, ob, 0);
114 }
115 
116 void
big5_out(Rune * base,int n,long * notused)117 big5_out(Rune *base, int n, long *notused)
118 {
119 	char *p;
120 	int i;
121 	Rune r;
122 	static int first = 1;
123 
124 	USED(notused);
125 	if(first){
126 		first = 0;
127 		for(i = 0; i < NRUNE; i++)
128 			tab[i] = -1;
129 		for(i = 0; i < BIG5MAX; i++)
130 			if(tabbig5[i] != -1)
131 				tab[tabbig5[i]] = i;
132 	}
133 	nrunes += n;
134 	p = obuf;
135 	for(i = 0; i < n; i++){
136 		r = base[i];
137 		if(r < 128)
138 			*p++ = r;
139 		else {
140 			if(tab[r] != -1){
141 				r = tab[r];
142 				if(r >= BIG5MAX){
143 					*p++ = 0xA1;
144 					*p++ = r-BIG5MAX;
145 					continue;
146 				} else {
147 					*p++ = 0xA1 + (r/BIG5FONT);
148 					r = r%BIG5FONT;
149 					if(r <= 62) r += 64;
150 					else r += 0xA1-63;
151 					*p++ = r;
152 					continue;
153 				}
154 			}
155 			if(squawk)
156 				EPR "%s: rune 0x%x not in output cs\n", argv0, r);
157 			nerrors++;
158 			if(clean)
159 				continue;
160 			*p++ = BYTEBADMAP;
161 		}
162 	}
163 	noutput += p-obuf;
164 	if(p > obuf)
165 		write(1, obuf, p-obuf);
166 }
167