xref: /openbsd-src/gnu/usr.bin/perl/cpan/Unicode-Collate/Collate.xs (revision eac174f2741a08d8deb8aae59a7f778ef9b5d770)
191f110e0Safresh1 
291f110e0Safresh1 #define PERL_NO_GET_CONTEXT /* we want efficiency */
391f110e0Safresh1 
491f110e0Safresh1 /* I guese no private function needs pTHX_ and aTHX_ */
591f110e0Safresh1 
6898184e3Ssthen #include "EXTERN.h"
7898184e3Ssthen #include "perl.h"
8898184e3Ssthen #include "XSUB.h"
9898184e3Ssthen 
10898184e3Ssthen /* This file is prepared by mkheader */
11898184e3Ssthen #include "ucatbl.h"
12898184e3Ssthen 
13898184e3Ssthen /* At present, char > 0x10ffff are unaffected without complaint, right? */
14898184e3Ssthen #define VALID_UTF_MAX    (0x10ffff)
15898184e3Ssthen #define OVER_UTF_MAX(uv) (VALID_UTF_MAX < (uv))
16898184e3Ssthen 
17b8851fccSafresh1 #define MAX_DIV_16 (UV_MAX / 16)
18898184e3Ssthen 
19898184e3Ssthen /* Supported Levels */
20898184e3Ssthen #define MinLevel	(1)
21898184e3Ssthen #define MaxLevel	(4)
22898184e3Ssthen 
23898184e3Ssthen /* Shifted weight at 4th level */
24898184e3Ssthen #define Shift4Wt	(0xFFFF)
25898184e3Ssthen 
26898184e3Ssthen #define VCE_Length	(9)
27898184e3Ssthen 
28898184e3Ssthen #define Hangul_SBase  (0xAC00)
29898184e3Ssthen #define Hangul_SIni   (0xAC00)
30898184e3Ssthen #define Hangul_SFin   (0xD7A3)
31898184e3Ssthen #define Hangul_NCount (588)
32898184e3Ssthen #define Hangul_TCount (28)
33898184e3Ssthen #define Hangul_LBase  (0x1100)
34898184e3Ssthen #define Hangul_LIni   (0x1100)
35898184e3Ssthen #define Hangul_LFin   (0x1159)
36898184e3Ssthen #define Hangul_LFill  (0x115F)
37898184e3Ssthen #define Hangul_LEnd   (0x115F) /* Unicode 5.2 */
38898184e3Ssthen #define Hangul_VBase  (0x1161)
39898184e3Ssthen #define Hangul_VIni   (0x1160) /* from Vowel Filler */
40898184e3Ssthen #define Hangul_VFin   (0x11A2)
41898184e3Ssthen #define Hangul_VEnd   (0x11A7) /* Unicode 5.2 */
42898184e3Ssthen #define Hangul_TBase  (0x11A7) /* from "no-final" codepoint */
43898184e3Ssthen #define Hangul_TIni   (0x11A8)
44898184e3Ssthen #define Hangul_TFin   (0x11F9)
45898184e3Ssthen #define Hangul_TEnd   (0x11FF) /* Unicode 5.2 */
46898184e3Ssthen #define HangulL2Ini   (0xA960) /* Unicode 5.2 */
47898184e3Ssthen #define HangulL2Fin   (0xA97C) /* Unicode 5.2 */
48898184e3Ssthen #define HangulV2Ini   (0xD7B0) /* Unicode 5.2 */
49898184e3Ssthen #define HangulV2Fin   (0xD7C6) /* Unicode 5.2 */
50898184e3Ssthen #define HangulT2Ini   (0xD7CB) /* Unicode 5.2 */
51898184e3Ssthen #define HangulT2Fin   (0xD7FB) /* Unicode 5.2 */
52898184e3Ssthen 
53898184e3Ssthen #define CJK_UidIni    (0x4E00)
54898184e3Ssthen #define CJK_UidFin    (0x9FA5)
559f11ffb7Safresh1 #define CJK_UidF41    (0x9FBB) /* Unicode 4.1 */
569f11ffb7Safresh1 #define CJK_UidF51    (0x9FC3) /* Unicode 5.1 */
579f11ffb7Safresh1 #define CJK_UidF52    (0x9FCB) /* Unicode 5.2 */
589f11ffb7Safresh1 #define CJK_UidF61    (0x9FCC) /* Unicode 6.1 */
599f11ffb7Safresh1 #define CJK_UidF80    (0x9FD5) /* Unicode 8.0 */
609f11ffb7Safresh1 #define CJK_UidF100   (0x9FEA) /* Unicode 10.0 */
61*eac174f2Safresh1 #define CJK_UidF110   (0x9FEF) /* Unicode 11.0 */
62*eac174f2Safresh1 #define CJK_UidF130   (0x9FFC) /* Unicode 13.0 */
639f11ffb7Safresh1 
64898184e3Ssthen #define CJK_ExtAIni   (0x3400) /* Unicode 3.0 */
65898184e3Ssthen #define CJK_ExtAFin   (0x4DB5) /* Unicode 3.0 */
66*eac174f2Safresh1 #define CJK_ExtA130   (0x4DBF) /* Unicode 13.0 */
67898184e3Ssthen #define CJK_ExtBIni  (0x20000) /* Unicode 3.1 */
68898184e3Ssthen #define CJK_ExtBFin  (0x2A6D6) /* Unicode 3.1 */
69*eac174f2Safresh1 #define CJK_ExtB130  (0x2A6DD) /* Unicode 13.0 */
70898184e3Ssthen #define CJK_ExtCIni  (0x2A700) /* Unicode 5.2 */
71898184e3Ssthen #define CJK_ExtCFin  (0x2B734) /* Unicode 5.2 */
72898184e3Ssthen #define CJK_ExtDIni  (0x2B740) /* Unicode 6.0 */
73898184e3Ssthen #define CJK_ExtDFin  (0x2B81D) /* Unicode 6.0 */
749f11ffb7Safresh1 #define CJK_ExtEIni  (0x2B820) /* Unicode 8.0 */
759f11ffb7Safresh1 #define CJK_ExtEFin  (0x2CEA1) /* Unicode 8.0 */
769f11ffb7Safresh1 #define CJK_ExtFIni  (0x2CEB0) /* Unicode 10.0 */
779f11ffb7Safresh1 #define CJK_ExtFFin  (0x2EBE0) /* Unicode 10.0 */
78*eac174f2Safresh1 #define CJK_ExtGIni  (0x30000) /* Unicode 13.0 */
79*eac174f2Safresh1 #define CJK_ExtGFin  (0x3134A) /* Unicode 13.0 */
80898184e3Ssthen 
81898184e3Ssthen #define CJK_CompIni  (0xFA0E)
82898184e3Ssthen #define CJK_CompFin  (0xFA29)
83b8851fccSafresh1 static const STDCHAR UnifiedCompat[] = {
84898184e3Ssthen       1,1,0,1,0,1,1,0,0,0,0,0,0,0,0,0,0,1,0,1,0,1,1,0,0,1,1,1
85898184e3Ssthen }; /* E F 0 1 2 3 4 5 6 7 8 9 A B C D E F 0 1 2 3 4 5 6 7 8 9 */
86898184e3Ssthen 
879f11ffb7Safresh1 #define TangIdeoIni  (0x17000) /* Unicode 9.0 */
889f11ffb7Safresh1 #define TangIdeoFin  (0x187EC) /* Unicode 9.0 */
89*eac174f2Safresh1 #define TangIdeo110  (0x187F1) /* Unicode 11.0 */
90*eac174f2Safresh1 #define TangIdeo120  (0x187F7) /* Unicode 12.0 */
919f11ffb7Safresh1 #define TangCompIni  (0x18800) /* Unicode 9.0 */
929f11ffb7Safresh1 #define TangCompFin  (0x18AF2) /* Unicode 9.0 */
93*eac174f2Safresh1 #define TangComp130  (0x18AFF) /* Unicode 13.0 */
94*eac174f2Safresh1 #define TangSuppIni  (0x18D00) /* Unicode 13.0 */
95*eac174f2Safresh1 #define TangSuppFin  (0x18D08) /* Unicode 13.0 */
969f11ffb7Safresh1 #define NushuIni     (0x1B170) /* Unicode 10.0 */
979f11ffb7Safresh1 #define NushuFin     (0x1B2FB) /* Unicode 10.0 */
98*eac174f2Safresh1 #define KhitanIni    (0x18B00) /* Unicode 13.0 */
99*eac174f2Safresh1 #define KhitanFin    (0x18CD5) /* Unicode 13.0 */
1009f11ffb7Safresh1 
101898184e3Ssthen #define codeRange(bcode, ecode)	((bcode) <= code && code <= (ecode))
102898184e3Ssthen 
103898184e3Ssthen MODULE = Unicode::Collate	PACKAGE = Unicode::Collate
104898184e3Ssthen 
105898184e3Ssthen PROTOTYPES: DISABLE
106898184e3Ssthen 
107898184e3Ssthen void
108898184e3Ssthen _fetch_rest ()
109898184e3Ssthen   PREINIT:
110898184e3Ssthen     char ** rest;
111898184e3Ssthen   PPCODE:
112b8851fccSafresh1     for (rest = (char **)UCA_rest; *rest; ++rest) {
113898184e3Ssthen 	XPUSHs(sv_2mortal(newSVpv((char *) *rest, 0)));
114898184e3Ssthen     }
115898184e3Ssthen 
116898184e3Ssthen 
117898184e3Ssthen void
118898184e3Ssthen _fetch_simple (uv)
119898184e3Ssthen     UV uv
120898184e3Ssthen   PREINIT:
121898184e3Ssthen     U8 ***plane, **row;
122898184e3Ssthen     U8* result = NULL;
123898184e3Ssthen   PPCODE:
124898184e3Ssthen     if (!OVER_UTF_MAX(uv)){
125898184e3Ssthen 	plane = (U8***)UCA_simple[uv >> 16];
126898184e3Ssthen 	if (plane) {
127898184e3Ssthen 	    row = plane[(uv >> 8) & 0xff];
128898184e3Ssthen 	    result = row ? row[uv & 0xff] : NULL;
129898184e3Ssthen 	}
130898184e3Ssthen     }
131898184e3Ssthen     if (result) {
132898184e3Ssthen 	int i;
133898184e3Ssthen 	int num = (int)*result;
134898184e3Ssthen 	++result;
135b8851fccSafresh1 	EXTEND(SP, num);
136898184e3Ssthen 	for (i = 0; i < num; ++i) {
137b8851fccSafresh1 	    PUSHs(sv_2mortal(newSVpvn((char *) result, VCE_Length)));
138898184e3Ssthen 	    result += VCE_Length;
139898184e3Ssthen 	}
140898184e3Ssthen     } else {
141b8851fccSafresh1 	PUSHs(sv_2mortal(newSViv(0)));
142898184e3Ssthen     }
143898184e3Ssthen 
144898184e3Ssthen SV*
145898184e3Ssthen _ignorable_simple (uv)
146898184e3Ssthen     UV uv
147898184e3Ssthen   ALIAS:
148898184e3Ssthen     _exists_simple = 1
149898184e3Ssthen   PREINIT:
150898184e3Ssthen     U8 ***plane, **row;
151898184e3Ssthen     int num = -1;
152898184e3Ssthen     U8* result = NULL;
153898184e3Ssthen   CODE:
154898184e3Ssthen     if (!OVER_UTF_MAX(uv)){
155898184e3Ssthen 	plane = (U8***)UCA_simple[uv >> 16];
156898184e3Ssthen 	if (plane) {
157898184e3Ssthen 	    row = plane[(uv >> 8) & 0xff];
158898184e3Ssthen 	    result = row ? row[uv & 0xff] : NULL;
159898184e3Ssthen 	}
160898184e3Ssthen 	if (result)
161898184e3Ssthen 	    num = (int)*result; /* assuming 0 <= num < 128 */
162898184e3Ssthen     }
163898184e3Ssthen 
164898184e3Ssthen     if (ix)
165898184e3Ssthen 	RETVAL = boolSV(num >0);
166898184e3Ssthen     else
167898184e3Ssthen 	RETVAL = boolSV(num==0);
168898184e3Ssthen   OUTPUT:
169898184e3Ssthen     RETVAL
170898184e3Ssthen 
171898184e3Ssthen 
172898184e3Ssthen void
173898184e3Ssthen _getHexArray (src)
174898184e3Ssthen     SV* src
175898184e3Ssthen   PREINIT:
176898184e3Ssthen     char *s, *e;
177898184e3Ssthen     STRLEN byte;
178898184e3Ssthen     UV value;
179898184e3Ssthen     bool overflowed = FALSE;
180898184e3Ssthen     const char *hexdigit;
181898184e3Ssthen   PPCODE:
182898184e3Ssthen     s = SvPV(src,byte);
183898184e3Ssthen     for (e = s + byte; s < e;) {
184898184e3Ssthen 	hexdigit = strchr((char *) PL_hexdigit, *s++);
185898184e3Ssthen 	if (! hexdigit)
186898184e3Ssthen 	    continue;
187898184e3Ssthen 	value = (hexdigit - PL_hexdigit) & 0xF;
188898184e3Ssthen 	while (*s) {
189898184e3Ssthen 	    hexdigit = strchr((char *) PL_hexdigit, *s++);
190898184e3Ssthen 	    if (! hexdigit)
191898184e3Ssthen 		break;
192898184e3Ssthen 	    if (overflowed)
193898184e3Ssthen 		continue;
194b8851fccSafresh1 	    if (value > MAX_DIV_16) {
195898184e3Ssthen 		overflowed = TRUE;
196898184e3Ssthen 		continue;
197898184e3Ssthen 	    }
198898184e3Ssthen 	    value = (value << 4) | ((hexdigit - PL_hexdigit) & 0xF);
199898184e3Ssthen 	}
200898184e3Ssthen 	XPUSHs(sv_2mortal(newSVuv(overflowed ? UV_MAX : value)));
201898184e3Ssthen     }
202898184e3Ssthen 
203898184e3Ssthen 
204898184e3Ssthen SV*
205898184e3Ssthen _isIllegal (sv)
206898184e3Ssthen     SV* sv
207898184e3Ssthen   PREINIT:
208898184e3Ssthen     UV uv;
209898184e3Ssthen   CODE:
210898184e3Ssthen     if (!sv || !SvIOK(sv))
211898184e3Ssthen 	XSRETURN_YES;
212898184e3Ssthen     uv = SvUVX(sv);
213898184e3Ssthen     RETVAL = boolSV(
214898184e3Ssthen 	   0x10FFFF < uv                   /* out of range */
2156fb12b70Safresh1 	|| ((uv & 0xFFFE) == 0xFFFE)       /* ??FFF[EF] */
216898184e3Ssthen 	|| (0xD800 <= uv && uv <= 0xDFFF)  /* unpaired surrogates */
217898184e3Ssthen 	|| (0xFDD0 <= uv && uv <= 0xFDEF)  /* other non-characters */
218898184e3Ssthen     );
219898184e3Ssthen OUTPUT:
220898184e3Ssthen     RETVAL
221898184e3Ssthen 
222898184e3Ssthen 
223898184e3Ssthen void
224898184e3Ssthen _decompHangul (code)
225898184e3Ssthen     UV code
226898184e3Ssthen   PREINIT:
227898184e3Ssthen     UV sindex, lindex, vindex, tindex;
228898184e3Ssthen   PPCODE:
229898184e3Ssthen     /* code *must* be in Hangul syllable.
230898184e3Ssthen      * Check it before you enter here. */
231898184e3Ssthen     sindex =  code - Hangul_SBase;
232898184e3Ssthen     lindex =  sindex / Hangul_NCount;
233898184e3Ssthen     vindex = (sindex % Hangul_NCount) / Hangul_TCount;
234898184e3Ssthen     tindex =  sindex % Hangul_TCount;
235898184e3Ssthen 
236b8851fccSafresh1     EXTEND(SP, tindex ? 3 : 2);
237b8851fccSafresh1     PUSHs(sv_2mortal(newSVuv(lindex + Hangul_LBase)));
238b8851fccSafresh1     PUSHs(sv_2mortal(newSVuv(vindex + Hangul_VBase)));
239898184e3Ssthen     if (tindex)
240b8851fccSafresh1 	PUSHs(sv_2mortal(newSVuv(tindex + Hangul_TBase)));
241898184e3Ssthen 
242898184e3Ssthen 
243898184e3Ssthen SV*
244898184e3Ssthen getHST (code, uca_vers = 0)
245898184e3Ssthen     UV code;
246898184e3Ssthen     IV uca_vers;
247898184e3Ssthen   PREINIT:
248898184e3Ssthen     const char * hangtype;
249898184e3Ssthen     STRLEN typelen;
250898184e3Ssthen   CODE:
251898184e3Ssthen     if (codeRange(Hangul_SIni, Hangul_SFin)) {
252898184e3Ssthen 	if ((code - Hangul_SBase) % Hangul_TCount) {
253898184e3Ssthen 	    hangtype = "LVT"; typelen = 3;
254898184e3Ssthen 	} else {
255898184e3Ssthen 	    hangtype = "LV"; typelen = 2;
256898184e3Ssthen 	}
257898184e3Ssthen     } else if (uca_vers < 20) {
258898184e3Ssthen 	if (codeRange(Hangul_LIni, Hangul_LFin) || code == Hangul_LFill) {
259898184e3Ssthen 	    hangtype = "L"; typelen = 1;
260898184e3Ssthen 	} else if (codeRange(Hangul_VIni, Hangul_VFin)) {
261898184e3Ssthen 	    hangtype = "V"; typelen = 1;
262898184e3Ssthen 	} else if (codeRange(Hangul_TIni, Hangul_TFin)) {
263898184e3Ssthen 	    hangtype = "T"; typelen = 1;
264898184e3Ssthen 	} else {
265898184e3Ssthen 	    hangtype = ""; typelen = 0;
266898184e3Ssthen 	}
267898184e3Ssthen     } else {
268898184e3Ssthen 	if        (codeRange(Hangul_LIni, Hangul_LEnd) ||
269898184e3Ssthen 		   codeRange(HangulL2Ini, HangulL2Fin)) {
270898184e3Ssthen 	    hangtype = "L"; typelen = 1;
271898184e3Ssthen 	} else if (codeRange(Hangul_VIni, Hangul_VEnd) ||
272898184e3Ssthen 		   codeRange(HangulV2Ini, HangulV2Fin)) {
273898184e3Ssthen 	    hangtype = "V"; typelen = 1;
274898184e3Ssthen 	} else if (codeRange(Hangul_TIni, Hangul_TEnd) ||
275898184e3Ssthen 		   codeRange(HangulT2Ini, HangulT2Fin)) {
276898184e3Ssthen 	    hangtype = "T"; typelen = 1;
277898184e3Ssthen 	} else {
278898184e3Ssthen 	    hangtype = ""; typelen = 0;
279898184e3Ssthen 	}
280898184e3Ssthen     }
281898184e3Ssthen 
282898184e3Ssthen     RETVAL = newSVpvn(hangtype, typelen);
283898184e3Ssthen OUTPUT:
284898184e3Ssthen     RETVAL
285898184e3Ssthen 
286898184e3Ssthen 
287898184e3Ssthen void
288898184e3Ssthen _derivCE_9 (code)
289898184e3Ssthen     UV code
290898184e3Ssthen   ALIAS:
291898184e3Ssthen     _derivCE_14 = 1
292898184e3Ssthen     _derivCE_18 = 2
293898184e3Ssthen     _derivCE_20 = 3
294898184e3Ssthen     _derivCE_22 = 4
295898184e3Ssthen     _derivCE_24 = 5
2969f11ffb7Safresh1     _derivCE_32 = 6
2979f11ffb7Safresh1     _derivCE_34 = 7
2989f11ffb7Safresh1     _derivCE_36 = 8
299*eac174f2Safresh1     _derivCE_38 = 9
300*eac174f2Safresh1     _derivCE_40 = 10
301*eac174f2Safresh1     _derivCE_43 = 11
302898184e3Ssthen   PREINIT:
303898184e3Ssthen     UV base, aaaa, bbbb;
304b8851fccSafresh1     U8 a[VCE_Length + 1] = "\x00\x00\x00\x00\x00\x00\x00\x00\x00";
305b8851fccSafresh1     U8 b[VCE_Length + 1] = "\x00\x00\x00\x00\x00\x00\x00\x00\x00";
306*eac174f2Safresh1     bool basic_unified = 0, tangut = 0, nushu = 0, khitan = 0;
307898184e3Ssthen   PPCODE:
3089f11ffb7Safresh1     if (codeRange(CJK_UidIni, CJK_CompFin)) {
309898184e3Ssthen 	if (codeRange(CJK_CompIni, CJK_CompFin))
310898184e3Ssthen 	    basic_unified = (bool)UnifiedCompat[code - CJK_CompIni];
311898184e3Ssthen 	else
312*eac174f2Safresh1 	    basic_unified = (ix >= 11 ? (code <= CJK_UidF130) :
313*eac174f2Safresh1 			     ix >= 9  ? (code <= CJK_UidF110) :
314*eac174f2Safresh1 			     ix == 8  ? (code <= CJK_UidF100) :
3159f11ffb7Safresh1 			     ix >= 6  ? (code <= CJK_UidF80) :
3169f11ffb7Safresh1 			     ix == 5  ? (code <= CJK_UidF61) :
317898184e3Ssthen 			     ix >= 3  ? (code <= CJK_UidF52) :
318898184e3Ssthen 			     ix == 2  ? (code <= CJK_UidF51) :
319898184e3Ssthen 			     ix == 1  ? (code <= CJK_UidF41) :
320898184e3Ssthen 				        (code <= CJK_UidFin));
3219f11ffb7Safresh1     } else {
322*eac174f2Safresh1 	if (ix >= 7) {
323*eac174f2Safresh1 	    tangut = (ix >= 11) ? (codeRange(TangIdeoIni, TangIdeo120) ||
324*eac174f2Safresh1 				   codeRange(TangCompIni, TangComp130) ||
325*eac174f2Safresh1 				   codeRange(TangSuppIni, TangSuppFin)) :
326*eac174f2Safresh1 		     (ix == 10) ? (codeRange(TangIdeoIni, TangIdeo120) ||
327*eac174f2Safresh1 				   codeRange(TangCompIni, TangCompFin)) :
328*eac174f2Safresh1 		     (ix == 9)  ? (codeRange(TangIdeoIni, TangIdeo110) ||
329*eac174f2Safresh1 				   codeRange(TangCompIni, TangCompFin)) :
330*eac174f2Safresh1 				  (codeRange(TangIdeoIni, TangIdeoFin) ||
3319f11ffb7Safresh1 				   codeRange(TangCompIni, TangCompFin));
332*eac174f2Safresh1 	}
3339f11ffb7Safresh1 	if (ix >= 8)
3349f11ffb7Safresh1 	    nushu = (codeRange(NushuIni, NushuFin));
335*eac174f2Safresh1 	if (ix >= 11)
336*eac174f2Safresh1 	    khitan = (codeRange(KhitanIni, KhitanFin));
337898184e3Ssthen     }
3389f11ffb7Safresh1     base = tangut
3399f11ffb7Safresh1 	    ? 0xFB00 :
3409f11ffb7Safresh1 	   nushu
3419f11ffb7Safresh1 	    ? 0xFB01 :
342*eac174f2Safresh1 	   khitan
343*eac174f2Safresh1 	    ? 0xFB02 :
3449f11ffb7Safresh1 	   basic_unified
345898184e3Ssthen 	    ? 0xFB40 : /* CJK */
346*eac174f2Safresh1 	   ((ix >= 11 ? codeRange(CJK_ExtAIni, CJK_ExtA130)
347*eac174f2Safresh1 		      : codeRange(CJK_ExtAIni, CJK_ExtAFin))
348898184e3Ssthen 		||
349*eac174f2Safresh1 	    (ix >= 11 ? codeRange(CJK_ExtBIni, CJK_ExtB130)
350*eac174f2Safresh1 		      : codeRange(CJK_ExtBIni, CJK_ExtBFin))
351898184e3Ssthen 		||
352898184e3Ssthen 	    (ix >= 3 && codeRange(CJK_ExtCIni, CJK_ExtCFin))
353898184e3Ssthen 		||
3549f11ffb7Safresh1 	    (ix >= 4 && codeRange(CJK_ExtDIni, CJK_ExtDFin))
3559f11ffb7Safresh1 		||
3569f11ffb7Safresh1 	    (ix >= 6 && codeRange(CJK_ExtEIni, CJK_ExtEFin))
3579f11ffb7Safresh1 		||
358*eac174f2Safresh1 	    (ix >= 8 && codeRange(CJK_ExtFIni, CJK_ExtFFin))
359*eac174f2Safresh1 		||
360*eac174f2Safresh1 	   (ix >= 11 && codeRange(CJK_ExtGIni, CJK_ExtGFin)))
361898184e3Ssthen 	    ? 0xFB80   /* CJK ext. */
362898184e3Ssthen 	    : 0xFBC0;  /* others */
363*eac174f2Safresh1     aaaa = tangut || nushu || khitan ? base : base + (code >> 15);
3649f11ffb7Safresh1     bbbb = (tangut ? (code - TangIdeoIni) :
365*eac174f2Safresh1 	    nushu  ? (code - NushuIni) :
366*eac174f2Safresh1 	    khitan ? (code - KhitanIni) : (code & 0x7FFF)) | 0x8000;
367898184e3Ssthen     a[1] = (U8)(aaaa >> 8);
368898184e3Ssthen     a[2] = (U8)(aaaa & 0xFF);
369898184e3Ssthen     b[1] = (U8)(bbbb >> 8);
370898184e3Ssthen     b[2] = (U8)(bbbb & 0xFF);
371b8851fccSafresh1     a[4] = (U8)(0x20); /* second octet of level 2 */
372b8851fccSafresh1     a[6] = (U8)(0x02); /* second octet of level 3 */
373898184e3Ssthen     a[7] = b[7] = (U8)(code >> 8);
374898184e3Ssthen     a[8] = b[8] = (U8)(code & 0xFF);
375b8851fccSafresh1     EXTEND(SP, 2);
376b8851fccSafresh1     PUSHs(sv_2mortal(newSVpvn((char *) a, VCE_Length)));
377b8851fccSafresh1     PUSHs(sv_2mortal(newSVpvn((char *) b, VCE_Length)));
378898184e3Ssthen 
379898184e3Ssthen 
380898184e3Ssthen void
381898184e3Ssthen _derivCE_8 (code)
382898184e3Ssthen     UV code
383898184e3Ssthen   PREINIT:
384898184e3Ssthen     UV aaaa, bbbb;
385b8851fccSafresh1     U8 a[VCE_Length + 1] = "\x00\x00\x00\x00\x00\x00\x00\x00\x00";
386b8851fccSafresh1     U8 b[VCE_Length + 1] = "\x00\x00\x00\x00\x00\x00\x00\x00\x00";
387898184e3Ssthen   PPCODE:
388898184e3Ssthen     aaaa =  0xFF80 + (code >> 15);
389898184e3Ssthen     bbbb = (code & 0x7FFF) | 0x8000;
390898184e3Ssthen     a[1] = (U8)(aaaa >> 8);
391898184e3Ssthen     a[2] = (U8)(aaaa & 0xFF);
392898184e3Ssthen     b[1] = (U8)(bbbb >> 8);
393898184e3Ssthen     b[2] = (U8)(bbbb & 0xFF);
394b8851fccSafresh1     a[4] = (U8)(0x02); /* second octet of level 2 */
395b8851fccSafresh1     a[6] = (U8)(0x01); /* second octet of level 3 */
396898184e3Ssthen     a[7] = b[7] = (U8)(code >> 8);
397898184e3Ssthen     a[8] = b[8] = (U8)(code & 0xFF);
398b8851fccSafresh1     EXTEND(SP, 2);
399b8851fccSafresh1     PUSHs(sv_2mortal(newSVpvn((char *) a, VCE_Length)));
400b8851fccSafresh1     PUSHs(sv_2mortal(newSVpvn((char *) b, VCE_Length)));
401898184e3Ssthen 
402898184e3Ssthen 
403898184e3Ssthen void
404898184e3Ssthen _uideoCE_8 (code)
405898184e3Ssthen     UV code
406898184e3Ssthen   PREINIT:
407b8851fccSafresh1     U8 uice[VCE_Length + 1] = "\x00\x00\x00\x00\x00\x00\x00\x00\x00";
408898184e3Ssthen   PPCODE:
409898184e3Ssthen     uice[1] = uice[7] = (U8)(code >> 8);
410898184e3Ssthen     uice[2] = uice[8] = (U8)(code & 0xFF);
411b8851fccSafresh1     uice[4] = (U8)(0x20); /* second octet of level 2 */
412b8851fccSafresh1     uice[6] = (U8)(0x02); /* second octet of level 3 */
413b8851fccSafresh1     PUSHs(sv_2mortal(newSVpvn((char *) uice, VCE_Length)));
414898184e3Ssthen 
415898184e3Ssthen 
416898184e3Ssthen SV*
417898184e3Ssthen _isUIdeo (code, uca_vers)
418898184e3Ssthen     UV code;
419898184e3Ssthen     IV uca_vers;
420898184e3Ssthen     bool basic_unified = 0;
421898184e3Ssthen   CODE:
422898184e3Ssthen     /* uca_vers = 0 for _uideoCE_8() */
423898184e3Ssthen     if (CJK_UidIni <= code) {
424898184e3Ssthen 	if (codeRange(CJK_CompIni, CJK_CompFin))
425898184e3Ssthen 	    basic_unified = (bool)UnifiedCompat[code - CJK_CompIni];
426898184e3Ssthen 	else
427*eac174f2Safresh1 	    basic_unified = (uca_vers >= 43 ? (code <= CJK_UidF130) :
428*eac174f2Safresh1 			     uca_vers >= 38 ? (code <= CJK_UidF110) :
429*eac174f2Safresh1 			     uca_vers >= 36 ? (code <= CJK_UidF100) :
4309f11ffb7Safresh1 			     uca_vers >= 32 ? (code <= CJK_UidF80) :
4319f11ffb7Safresh1 			     uca_vers >= 24 ? (code <= CJK_UidF61) :
432898184e3Ssthen 			     uca_vers >= 20 ? (code <= CJK_UidF52) :
433898184e3Ssthen 			     uca_vers >= 18 ? (code <= CJK_UidF51) :
434898184e3Ssthen 			     uca_vers >= 14 ? (code <= CJK_UidF41) :
435898184e3Ssthen 					      (code <= CJK_UidFin));
436898184e3Ssthen     }
437898184e3Ssthen     RETVAL = boolSV(
438898184e3Ssthen 	(basic_unified)
439898184e3Ssthen 		||
440898184e3Ssthen 	(codeRange(CJK_ExtAIni, CJK_ExtAFin))
441898184e3Ssthen 		||
442*eac174f2Safresh1 	(uca_vers >= 43 && codeRange(CJK_ExtAIni, CJK_ExtA130))
443*eac174f2Safresh1 		||
444898184e3Ssthen 	(uca_vers >=  8 && codeRange(CJK_ExtBIni, CJK_ExtBFin))
445898184e3Ssthen 		||
446*eac174f2Safresh1 	(uca_vers >= 43 && codeRange(CJK_ExtBIni, CJK_ExtB130))
447*eac174f2Safresh1 		||
448898184e3Ssthen 	(uca_vers >= 20 && codeRange(CJK_ExtCIni, CJK_ExtCFin))
449898184e3Ssthen 		||
450898184e3Ssthen 	(uca_vers >= 22 && codeRange(CJK_ExtDIni, CJK_ExtDFin))
4519f11ffb7Safresh1 		||
4529f11ffb7Safresh1 	(uca_vers >= 32 && codeRange(CJK_ExtEIni, CJK_ExtEFin))
4539f11ffb7Safresh1 		||
4549f11ffb7Safresh1 	(uca_vers >= 36 && codeRange(CJK_ExtFIni, CJK_ExtFFin))
455*eac174f2Safresh1 		||
456*eac174f2Safresh1 	(uca_vers >= 43 && codeRange(CJK_ExtGIni, CJK_ExtGFin))
457898184e3Ssthen     );
458898184e3Ssthen OUTPUT:
459898184e3Ssthen     RETVAL
460898184e3Ssthen 
461898184e3Ssthen 
462898184e3Ssthen SV*
463898184e3Ssthen mk_SortKey (self, buf)
464898184e3Ssthen     SV* self;
465898184e3Ssthen     SV* buf;
466898184e3Ssthen   PREINIT:
467898184e3Ssthen     SV *dst, **svp;
468898184e3Ssthen     STRLEN dlen, vlen;
469898184e3Ssthen     U8 *d, *p, *e, *v, *s[MaxLevel], *eachlevel[MaxLevel];
470898184e3Ssthen     AV *bufAV;
471898184e3Ssthen     HV *selfHV;
472898184e3Ssthen     UV back_flag;
473898184e3Ssthen     I32 i, buf_len;
474898184e3Ssthen     IV  lv, level, uca_vers;
475898184e3Ssthen     bool upper_lower, kata_hira, v2i, last_is_var;
476898184e3Ssthen   CODE:
477898184e3Ssthen     if (SvROK(self) && SvTYPE(SvRV(self)) == SVt_PVHV)
478898184e3Ssthen 	selfHV = (HV*)SvRV(self);
479898184e3Ssthen     else
480898184e3Ssthen 	croak("$self is not a HASHREF.");
481898184e3Ssthen 
482898184e3Ssthen     if (SvROK(buf) && SvTYPE(SvRV(buf)) == SVt_PVAV)
483898184e3Ssthen 	bufAV = (AV*)SvRV(buf);
484898184e3Ssthen     else
485898184e3Ssthen 	croak("XSUB, not an ARRAYREF.");
486898184e3Ssthen 
487898184e3Ssthen     buf_len = av_len(bufAV);
488898184e3Ssthen 
489898184e3Ssthen     if (buf_len < 0) { /* empty: -1 */
490898184e3Ssthen 	dlen = 2 * (MaxLevel - 1);
491898184e3Ssthen 	dst = newSV(dlen);
492898184e3Ssthen 	(void)SvPOK_only(dst);
493898184e3Ssthen 	d = (U8*)SvPVX(dst);
494898184e3Ssthen 	while (dlen--)
495898184e3Ssthen 	    *d++ = '\0';
496898184e3Ssthen     } else {
497898184e3Ssthen 	svp = hv_fetch(selfHV, "level", 5, FALSE);
498898184e3Ssthen 	level = svp ? SvIV(*svp) : MaxLevel;
499898184e3Ssthen 
500898184e3Ssthen 	for (lv = 0; lv < level; lv++) {
501898184e3Ssthen 	    New(0, eachlevel[lv], 2 * (1 + buf_len) + 1, U8);
502898184e3Ssthen 	    s[lv] = eachlevel[lv];
503898184e3Ssthen 	}
504898184e3Ssthen 
505898184e3Ssthen 	svp = hv_fetch(selfHV, "upper_before_lower", 18, FALSE);
506898184e3Ssthen 	upper_lower = svp ? SvTRUE(*svp) : FALSE;
507898184e3Ssthen 	svp = hv_fetch(selfHV, "katakana_before_hiragana", 24, FALSE);
508898184e3Ssthen 	kata_hira = svp ? SvTRUE(*svp) : FALSE;
509898184e3Ssthen 	svp = hv_fetch(selfHV, "UCA_Version", 11, FALSE);
510898184e3Ssthen 	uca_vers = SvIV(*svp);
511898184e3Ssthen 	svp = hv_fetch(selfHV, "variable", 8, FALSE);
512898184e3Ssthen 	v2i = uca_vers >= 9 && svp /* (vers >= 9) and not (non-ignorable) */
513898184e3Ssthen 	    ? !(SvCUR(*svp) == 13 && memEQ(SvPVX(*svp), "non-ignorable", 13))
514898184e3Ssthen 	    : FALSE;
515898184e3Ssthen 
516898184e3Ssthen 	last_is_var = FALSE;
517898184e3Ssthen 	for (i = 0; i <= buf_len; i++) {
518898184e3Ssthen 	    svp = av_fetch(bufAV, i, FALSE);
519898184e3Ssthen 
520898184e3Ssthen 	    if (svp && SvPOK(*svp))
521898184e3Ssthen 		v = (U8*)SvPV(*svp, vlen);
522898184e3Ssthen 	    else
523898184e3Ssthen 		croak("not a vwt.");
524898184e3Ssthen 
525898184e3Ssthen 	    if (vlen < VCE_Length) /* ignore short VCE (unexpected) */
526898184e3Ssthen 		continue;
527898184e3Ssthen 
528898184e3Ssthen 	    /* "Ignorable (L1, L2) after Variable" since track. v. 9 */
529898184e3Ssthen 	    if (v2i) {
530898184e3Ssthen 		if (*v)
531898184e3Ssthen 		    last_is_var = TRUE;
532898184e3Ssthen 		else if (v[1] || v[2]) /* non zero primary weight */
533898184e3Ssthen 		    last_is_var = FALSE;
534898184e3Ssthen 		else if (last_is_var) /* zero primary weight; skipped */
535898184e3Ssthen 		    continue;
536898184e3Ssthen 	    }
537898184e3Ssthen 
538898184e3Ssthen 	    if (v[5] == 0) { /* tert wt < 256 */
539898184e3Ssthen 		if (upper_lower) {
540898184e3Ssthen 		    if (0x8 <= v[6] && v[6] <= 0xC) /* lower */
541898184e3Ssthen 			v[6] -= 6;
542898184e3Ssthen 		    else if (0x2 <= v[6] && v[6] <= 0x6) /* upper */
543898184e3Ssthen 			v[6] += 6;
544898184e3Ssthen 		    else if (v[6] == 0x1C) /* square upper */
545898184e3Ssthen 			v[6]++;
546898184e3Ssthen 		    else if (v[6] == 0x1D) /* square lower */
547898184e3Ssthen 			v[6]--;
548898184e3Ssthen 		}
549898184e3Ssthen 		if (kata_hira) {
550898184e3Ssthen 		    if (0x0F <= v[6] && v[6] <= 0x13) /* katakana */
551898184e3Ssthen 			v[6] -= 2;
552898184e3Ssthen 		    else if (0xD <= v[6] && v[6] <= 0xE) /* hiragana */
553898184e3Ssthen 			v[6] += 5;
554898184e3Ssthen 		}
555898184e3Ssthen 	    }
556898184e3Ssthen 
557898184e3Ssthen 	    for (lv = 0; lv < level; lv++) {
558898184e3Ssthen 		if (v[2 * lv + 1] || v[2 * lv + 2]) {
559898184e3Ssthen 		    *s[lv]++ = v[2 * lv + 1];
560898184e3Ssthen 		    *s[lv]++ = v[2 * lv + 2];
561898184e3Ssthen 		}
562898184e3Ssthen 	    }
563898184e3Ssthen 	}
564898184e3Ssthen 
565898184e3Ssthen 	dlen = 2 * (MaxLevel - 1);
566898184e3Ssthen 	for (lv = 0; lv < level; lv++)
567898184e3Ssthen 	    dlen += s[lv] - eachlevel[lv];
568898184e3Ssthen 
569898184e3Ssthen 	dst = newSV(dlen);
570898184e3Ssthen 	(void)SvPOK_only(dst);
571898184e3Ssthen 	d = (U8*)SvPVX(dst);
572898184e3Ssthen 
573898184e3Ssthen 	svp = hv_fetch(selfHV, "backwardsFlag", 13, FALSE);
574898184e3Ssthen 	back_flag = svp ? SvUV(*svp) : (UV)0;
575898184e3Ssthen 
576898184e3Ssthen 	for (lv = 0; lv < level; lv++) {
577898184e3Ssthen 	    if (back_flag & (1 << (lv + 1))) {
578898184e3Ssthen 		p = s[lv];
579898184e3Ssthen 		e = eachlevel[lv];
580898184e3Ssthen 		for ( ; e < p; p -= 2) {
581898184e3Ssthen 		    *d++ = p[-2];
582898184e3Ssthen 		    *d++ = p[-1];
583898184e3Ssthen 		}
584898184e3Ssthen 	    }
585898184e3Ssthen 	    else {
586898184e3Ssthen 		p = eachlevel[lv];
587898184e3Ssthen 		e = s[lv];
588898184e3Ssthen 		while (p < e)
589898184e3Ssthen 		    *d++ = *p++;
590898184e3Ssthen 	    }
591898184e3Ssthen 	    if (lv + 1 < MaxLevel) { /* lv + 1 == real level */
592898184e3Ssthen 		*d++ = '\0';
593898184e3Ssthen 		*d++ = '\0';
594898184e3Ssthen 	    }
595898184e3Ssthen 	}
596898184e3Ssthen 
597898184e3Ssthen 	for (lv = level; lv < MaxLevel; lv++) {
598898184e3Ssthen 	    if (lv + 1 < MaxLevel) { /* lv + 1 == real level */
599898184e3Ssthen 		*d++ = '\0';
600898184e3Ssthen 		*d++ = '\0';
601898184e3Ssthen 	    }
602898184e3Ssthen 	}
603898184e3Ssthen 
604898184e3Ssthen 	for (lv = 0; lv < level; lv++) {
605898184e3Ssthen 	    Safefree(eachlevel[lv]);
606898184e3Ssthen 	}
607898184e3Ssthen     }
608898184e3Ssthen     *d = '\0';
609898184e3Ssthen     SvCUR_set(dst, d - (U8*)SvPVX(dst));
610898184e3Ssthen     RETVAL = dst;
611898184e3Ssthen OUTPUT:
612898184e3Ssthen     RETVAL
613898184e3Ssthen 
614898184e3Ssthen 
615898184e3Ssthen SV*
616898184e3Ssthen varCE (self, vce)
617898184e3Ssthen     SV* self;
618898184e3Ssthen     SV* vce;
619898184e3Ssthen   PREINIT:
620898184e3Ssthen     SV *dst, *vbl, **svp;
621898184e3Ssthen     HV *selfHV;
622898184e3Ssthen     U8 *a, *v, *d;
623898184e3Ssthen     STRLEN alen, vlen;
624898184e3Ssthen     bool ig_l2;
6259f11ffb7Safresh1     IV uca_vers;
626898184e3Ssthen     UV totwt;
627898184e3Ssthen   CODE:
628898184e3Ssthen     if (SvROK(self) && SvTYPE(SvRV(self)) == SVt_PVHV)
629898184e3Ssthen 	selfHV = (HV*)SvRV(self);
630898184e3Ssthen     else
631898184e3Ssthen 	croak("$self is not a HASHREF.");
632898184e3Ssthen 
633898184e3Ssthen     svp = hv_fetch(selfHV, "ignore_level2", 13, FALSE);
634898184e3Ssthen     ig_l2 = svp ? SvTRUE(*svp) : FALSE;
635898184e3Ssthen 
636898184e3Ssthen     svp = hv_fetch(selfHV, "variable", 8, FALSE);
637898184e3Ssthen     vbl = svp ? *svp : &PL_sv_no;
638898184e3Ssthen     a = (U8*)SvPV(vbl, alen);
639898184e3Ssthen     v = (U8*)SvPV(vce, vlen);
640898184e3Ssthen 
641898184e3Ssthen     dst = newSV(vlen);
642898184e3Ssthen     d = (U8*)SvPVX(dst);
643898184e3Ssthen     (void)SvPOK_only(dst);
644898184e3Ssthen     Copy(v, d, vlen, U8);
645898184e3Ssthen     SvCUR_set(dst, vlen);
646898184e3Ssthen     d[vlen] = '\0';
647898184e3Ssthen 
648898184e3Ssthen     /* primary weight == 0 && secondary weight != 0 */
649898184e3Ssthen     if (ig_l2 && !d[1] && !d[2] && (d[3] || d[4])) {
650898184e3Ssthen 	d[3] = d[4] = d[5] = d[6] = '\0';
651898184e3Ssthen     }
652898184e3Ssthen 
653898184e3Ssthen     /* variable: checked only the first char and the length,
654898184e3Ssthen        trusting checkCollator() and %VariableOK in Perl ... */
655898184e3Ssthen 
656b8851fccSafresh1     if (vlen >= VCE_Length && *a != 'n') {
657b8851fccSafresh1 	if (*v) {
658898184e3Ssthen 	    if (*a == 's') { /* shifted or shift-trimmed */
659898184e3Ssthen 		d[7] = d[1]; /* wt level 1 to 4 */
660898184e3Ssthen 		d[8] = d[2];
661898184e3Ssthen 	    } /* else blanked */
662898184e3Ssthen 	    d[1] = d[2] = d[3] = d[4] = d[5] = d[6] = '\0';
663b8851fccSafresh1 	} else if (*a == 's') { /* shifted or shift-trimmed */
664898184e3Ssthen 	    totwt = d[1] + d[2] + d[3] + d[4] + d[5] + d[6];
665898184e3Ssthen 	    if (alen == 7 && totwt != 0) { /* shifted */
66691f110e0Safresh1 		if (d[1] == 0 && d[2] == 1) { /* XXX: CollationAuxiliary-6.2.0 */
66791f110e0Safresh1 		    d[7] = d[1]; /* wt level 1 to 4 */
66891f110e0Safresh1 		    d[8] = d[2];
66991f110e0Safresh1 		} else {
6709f11ffb7Safresh1 		    svp = hv_fetch(selfHV, "UCA_Version", 11, FALSE);
6719f11ffb7Safresh1 		    if (!svp)
6729f11ffb7Safresh1 			croak("Panic: no $self->{UCA_Version} in varCE");
6739f11ffb7Safresh1 		    uca_vers = SvIV(*svp);
6749f11ffb7Safresh1 
6759f11ffb7Safresh1 		    /* completely ignorable or the second derived CE */
6769f11ffb7Safresh1 		    if (uca_vers >= 36 && d[3] + d[4] + d[5] + d[6] == 0) {
6779f11ffb7Safresh1 			d[7] = d[8] = '\0';
6789f11ffb7Safresh1 		    } else {
679898184e3Ssthen 			d[7] = (U8)(Shift4Wt >> 8);
680898184e3Ssthen 			d[8] = (U8)(Shift4Wt & 0xFF);
681898184e3Ssthen 		    }
6829f11ffb7Safresh1 		}
68391f110e0Safresh1 	    } else { /* shift-trimmed or completely ignorable */
684898184e3Ssthen 		d[7] = d[8] = '\0';
685898184e3Ssthen 	    }
686b8851fccSafresh1 	} /* else blanked */
687b8851fccSafresh1     } /* else non-ignorable */
688898184e3Ssthen     RETVAL = dst;
689898184e3Ssthen OUTPUT:
690898184e3Ssthen     RETVAL
691898184e3Ssthen 
692898184e3Ssthen 
693898184e3Ssthen 
694898184e3Ssthen SV*
695898184e3Ssthen visualizeSortKey (self, key)
696898184e3Ssthen     SV * self
697898184e3Ssthen     SV * key
698898184e3Ssthen   PREINIT:
699898184e3Ssthen     HV *selfHV;
700898184e3Ssthen     SV **svp, *dst;
701898184e3Ssthen     U8 *s, *e, *d;
702898184e3Ssthen     STRLEN klen, dlen;
703898184e3Ssthen     UV uv;
70491f110e0Safresh1     IV uca_vers, sep = 0;
705b8851fccSafresh1     const char *upperhex = "0123456789ABCDEF";
706898184e3Ssthen   CODE:
707898184e3Ssthen     if (SvROK(self) && SvTYPE(SvRV(self)) == SVt_PVHV)
708898184e3Ssthen 	selfHV = (HV*)SvRV(self);
709898184e3Ssthen     else
710898184e3Ssthen 	croak("$self is not a HASHREF.");
711898184e3Ssthen 
712898184e3Ssthen     svp = hv_fetch(selfHV, "UCA_Version", 11, FALSE);
713898184e3Ssthen     if (!svp)
714898184e3Ssthen 	croak("Panic: no $self->{UCA_Version} in visualizeSortKey");
715898184e3Ssthen     uca_vers = SvIV(*svp);
716898184e3Ssthen 
717898184e3Ssthen     s = (U8*)SvPV(key, klen);
718898184e3Ssthen 
719898184e3Ssthen    /* slightly *longer* than the need, but I'm afraid of miscounting;
72091f110e0Safresh1       = (klen / 2) * 5 - 1
72191f110e0Safresh1              # FFFF and ' ' for each 16bit units but ' ' is less by 1;
72291f110e0Safresh1              # ' ' and '|' for level boundaries including the identical level
723898184e3Ssthen        + 2   # '[' and ']'
72491f110e0Safresh1        + 1   # '\0'
72591f110e0Safresh1        (a) if klen is odd (not expected), maybe more 5 bytes.
72691f110e0Safresh1        (b) there is not always the identical level.
727898184e3Ssthen    */
728898184e3Ssthen     dlen = (klen / 2) * 5 + MaxLevel * 2 + 2;
729898184e3Ssthen     dst = newSV(dlen);
730898184e3Ssthen     (void)SvPOK_only(dst);
731898184e3Ssthen     d = (U8*)SvPVX(dst);
732898184e3Ssthen 
733898184e3Ssthen     *d++ = '[';
734898184e3Ssthen     for (e = s + klen; s < e; s += 2) {
735898184e3Ssthen 	uv = (U16)(*s << 8 | s[1]);
73691f110e0Safresh1 	if (uv || sep >= MaxLevel) {
737898184e3Ssthen 	    if ((d[-1] != '[') && ((9 <= uca_vers) || (d[-1] != '|')))
738898184e3Ssthen 		*d++ = ' ';
739898184e3Ssthen 	    *d++ = upperhex[ (s[0] >> 4) & 0xF ];
740898184e3Ssthen 	    *d++ = upperhex[  s[0]       & 0xF ];
741898184e3Ssthen 	    *d++ = upperhex[ (s[1] >> 4) & 0xF ];
742898184e3Ssthen 	    *d++ = upperhex[  s[1]       & 0xF ];
74391f110e0Safresh1 	} else {
744898184e3Ssthen 	    if ((9 <= uca_vers) && (d[-1] != '['))
745898184e3Ssthen 		*d++ = ' ';
746898184e3Ssthen 	    *d++ = '|';
74791f110e0Safresh1 	    ++sep;
748898184e3Ssthen 	}
749898184e3Ssthen     }
750898184e3Ssthen     *d++ = ']';
751898184e3Ssthen     *d   = '\0';
752898184e3Ssthen     SvCUR_set(dst, d - (U8*)SvPVX(dst));
753898184e3Ssthen     RETVAL = dst;
754898184e3Ssthen OUTPUT:
755898184e3Ssthen     RETVAL
756