1 2 #define PERL_NO_GET_CONTEXT /* we want efficiency */ 3 4 /* I guese no private function needs pTHX_ and aTHX_ */ 5 6 #include "EXTERN.h" 7 #include "perl.h" 8 #include "XSUB.h" 9 10 /* This file is prepared by mkheader */ 11 #include "ucatbl.h" 12 13 /* Perl 5.6.1 ? */ 14 #ifndef utf8n_to_uvuni 15 #define utf8n_to_uvuni utf8_to_uv 16 #endif /* utf8n_to_uvuni */ 17 18 /* UTF8_ALLOW_BOM is used before Perl 5.8.0 */ 19 #ifndef UTF8_ALLOW_BOM 20 #define UTF8_ALLOW_BOM (0) 21 #endif /* UTF8_ALLOW_BOM */ 22 23 #ifndef UTF8_ALLOW_SURROGATE 24 #define UTF8_ALLOW_SURROGATE (0) 25 #endif /* UTF8_ALLOW_SURROGATE */ 26 27 #ifndef UTF8_ALLOW_FE_FF 28 #define UTF8_ALLOW_FE_FF (0) 29 #endif /* UTF8_ALLOW_FE_FF */ 30 31 #ifndef UTF8_ALLOW_FFFF 32 #define UTF8_ALLOW_FFFF (0) 33 #endif /* UTF8_ALLOW_FFFF */ 34 35 #define AllowAnyUTF (UTF8_ALLOW_SURROGATE|UTF8_ALLOW_BOM|UTF8_ALLOW_FE_FF|UTF8_ALLOW_FFFF) 36 37 /* perl 5.6.x workaround, before 5.8.0 */ 38 #ifdef utf8n_to_uvuni 39 #define GET_UV_FOR_5_6 utf8n_to_uvuni(p, e - p, &retlen, AllowAnyUTF) 40 #else 41 #define GET_UV_FOR_5_6 retlen = 1 /* avoid an infinite loop */ 42 #endif /* utf8n_to_uvuni */ 43 44 /* At present, char > 0x10ffff are unaffected without complaint, right? */ 45 #define VALID_UTF_MAX (0x10ffff) 46 #define OVER_UTF_MAX(uv) (VALID_UTF_MAX < (uv)) 47 48 static const UV max_div_16 = UV_MAX / 16; 49 50 /* Supported Levels */ 51 #define MinLevel (1) 52 #define MaxLevel (4) 53 54 /* Shifted weight at 4th level */ 55 #define Shift4Wt (0xFFFF) 56 57 #define VCE_Length (9) 58 59 #define Hangul_SBase (0xAC00) 60 #define Hangul_SIni (0xAC00) 61 #define Hangul_SFin (0xD7A3) 62 #define Hangul_NCount (588) 63 #define Hangul_TCount (28) 64 #define Hangul_LBase (0x1100) 65 #define Hangul_LIni (0x1100) 66 #define Hangul_LFin (0x1159) 67 #define Hangul_LFill (0x115F) 68 #define Hangul_LEnd (0x115F) /* Unicode 5.2 */ 69 #define Hangul_VBase (0x1161) 70 #define Hangul_VIni (0x1160) /* from Vowel Filler */ 71 #define Hangul_VFin (0x11A2) 72 #define Hangul_VEnd (0x11A7) /* Unicode 5.2 */ 73 #define Hangul_TBase (0x11A7) /* from "no-final" codepoint */ 74 #define Hangul_TIni (0x11A8) 75 #define Hangul_TFin (0x11F9) 76 #define Hangul_TEnd (0x11FF) /* Unicode 5.2 */ 77 #define HangulL2Ini (0xA960) /* Unicode 5.2 */ 78 #define HangulL2Fin (0xA97C) /* Unicode 5.2 */ 79 #define HangulV2Ini (0xD7B0) /* Unicode 5.2 */ 80 #define HangulV2Fin (0xD7C6) /* Unicode 5.2 */ 81 #define HangulT2Ini (0xD7CB) /* Unicode 5.2 */ 82 #define HangulT2Fin (0xD7FB) /* Unicode 5.2 */ 83 84 #define CJK_UidIni (0x4E00) 85 #define CJK_UidFin (0x9FA5) 86 #define CJK_UidF41 (0x9FBB) 87 #define CJK_UidF51 (0x9FC3) 88 #define CJK_UidF52 (0x9FCB) 89 #define CJK_UidF61 (0x9FCC) 90 #define CJK_ExtAIni (0x3400) /* Unicode 3.0 */ 91 #define CJK_ExtAFin (0x4DB5) /* Unicode 3.0 */ 92 #define CJK_ExtBIni (0x20000) /* Unicode 3.1 */ 93 #define CJK_ExtBFin (0x2A6D6) /* Unicode 3.1 */ 94 #define CJK_ExtCIni (0x2A700) /* Unicode 5.2 */ 95 #define CJK_ExtCFin (0x2B734) /* Unicode 5.2 */ 96 #define CJK_ExtDIni (0x2B740) /* Unicode 6.0 */ 97 #define CJK_ExtDFin (0x2B81D) /* Unicode 6.0 */ 98 99 #define CJK_CompIni (0xFA0E) 100 #define CJK_CompFin (0xFA29) 101 static STDCHAR UnifiedCompat[] = { 102 1,1,0,1,0,1,1,0,0,0,0,0,0,0,0,0,0,1,0,1,0,1,1,0,0,1,1,1 103 }; /* E F 0 1 2 3 4 5 6 7 8 9 A B C D E F 0 1 2 3 4 5 6 7 8 9 */ 104 105 #define codeRange(bcode, ecode) ((bcode) <= code && code <= (ecode)) 106 107 MODULE = Unicode::Collate PACKAGE = Unicode::Collate 108 109 PROTOTYPES: DISABLE 110 111 void 112 _fetch_rest () 113 PREINIT: 114 char ** rest; 115 PPCODE: 116 for (rest = UCA_rest; *rest; ++rest) { 117 XPUSHs(sv_2mortal(newSVpv((char *) *rest, 0))); 118 } 119 120 121 void 122 _fetch_simple (uv) 123 UV uv 124 PREINIT: 125 U8 ***plane, **row; 126 U8* result = NULL; 127 PPCODE: 128 if (!OVER_UTF_MAX(uv)){ 129 plane = (U8***)UCA_simple[uv >> 16]; 130 if (plane) { 131 row = plane[(uv >> 8) & 0xff]; 132 result = row ? row[uv & 0xff] : NULL; 133 } 134 } 135 if (result) { 136 int i; 137 int num = (int)*result; 138 ++result; 139 for (i = 0; i < num; ++i) { 140 XPUSHs(sv_2mortal(newSVpvn((char *) result, VCE_Length))); 141 result += VCE_Length; 142 } 143 } else { 144 XPUSHs(sv_2mortal(newSViv(0))); 145 } 146 147 SV* 148 _ignorable_simple (uv) 149 UV uv 150 ALIAS: 151 _exists_simple = 1 152 PREINIT: 153 U8 ***plane, **row; 154 int num = -1; 155 U8* result = NULL; 156 CODE: 157 if (!OVER_UTF_MAX(uv)){ 158 plane = (U8***)UCA_simple[uv >> 16]; 159 if (plane) { 160 row = plane[(uv >> 8) & 0xff]; 161 result = row ? row[uv & 0xff] : NULL; 162 } 163 if (result) 164 num = (int)*result; /* assuming 0 <= num < 128 */ 165 } 166 167 if (ix) 168 RETVAL = boolSV(num >0); 169 else 170 RETVAL = boolSV(num==0); 171 OUTPUT: 172 RETVAL 173 174 175 void 176 _getHexArray (src) 177 SV* src 178 PREINIT: 179 char *s, *e; 180 STRLEN byte; 181 UV value; 182 bool overflowed = FALSE; 183 const char *hexdigit; 184 PPCODE: 185 s = SvPV(src,byte); 186 for (e = s + byte; s < e;) { 187 hexdigit = strchr((char *) PL_hexdigit, *s++); 188 if (! hexdigit) 189 continue; 190 value = (hexdigit - PL_hexdigit) & 0xF; 191 while (*s) { 192 hexdigit = strchr((char *) PL_hexdigit, *s++); 193 if (! hexdigit) 194 break; 195 if (overflowed) 196 continue; 197 if (value > max_div_16) { 198 overflowed = TRUE; 199 continue; 200 } 201 value = (value << 4) | ((hexdigit - PL_hexdigit) & 0xF); 202 } 203 XPUSHs(sv_2mortal(newSVuv(overflowed ? UV_MAX : value))); 204 } 205 206 207 SV* 208 _isIllegal (sv) 209 SV* sv 210 PREINIT: 211 UV uv; 212 CODE: 213 if (!sv || !SvIOK(sv)) 214 XSRETURN_YES; 215 uv = SvUVX(sv); 216 RETVAL = boolSV( 217 0x10FFFF < uv /* out of range */ 218 || ((uv & 0xFFFE) == 0xFFFE) /* ??FFF[EF] */ 219 || (0xD800 <= uv && uv <= 0xDFFF) /* unpaired surrogates */ 220 || (0xFDD0 <= uv && uv <= 0xFDEF) /* other non-characters */ 221 ); 222 OUTPUT: 223 RETVAL 224 225 226 void 227 _decompHangul (code) 228 UV code 229 PREINIT: 230 UV sindex, lindex, vindex, tindex; 231 PPCODE: 232 /* code *must* be in Hangul syllable. 233 * Check it before you enter here. */ 234 sindex = code - Hangul_SBase; 235 lindex = sindex / Hangul_NCount; 236 vindex = (sindex % Hangul_NCount) / Hangul_TCount; 237 tindex = sindex % Hangul_TCount; 238 239 XPUSHs(sv_2mortal(newSVuv(lindex + Hangul_LBase))); 240 XPUSHs(sv_2mortal(newSVuv(vindex + Hangul_VBase))); 241 if (tindex) 242 XPUSHs(sv_2mortal(newSVuv(tindex + Hangul_TBase))); 243 244 245 SV* 246 getHST (code, uca_vers = 0) 247 UV code; 248 IV uca_vers; 249 PREINIT: 250 const char * hangtype; 251 STRLEN typelen; 252 CODE: 253 if (codeRange(Hangul_SIni, Hangul_SFin)) { 254 if ((code - Hangul_SBase) % Hangul_TCount) { 255 hangtype = "LVT"; typelen = 3; 256 } else { 257 hangtype = "LV"; typelen = 2; 258 } 259 } else if (uca_vers < 20) { 260 if (codeRange(Hangul_LIni, Hangul_LFin) || code == Hangul_LFill) { 261 hangtype = "L"; typelen = 1; 262 } else if (codeRange(Hangul_VIni, Hangul_VFin)) { 263 hangtype = "V"; typelen = 1; 264 } else if (codeRange(Hangul_TIni, Hangul_TFin)) { 265 hangtype = "T"; typelen = 1; 266 } else { 267 hangtype = ""; typelen = 0; 268 } 269 } else { 270 if (codeRange(Hangul_LIni, Hangul_LEnd) || 271 codeRange(HangulL2Ini, HangulL2Fin)) { 272 hangtype = "L"; typelen = 1; 273 } else if (codeRange(Hangul_VIni, Hangul_VEnd) || 274 codeRange(HangulV2Ini, HangulV2Fin)) { 275 hangtype = "V"; typelen = 1; 276 } else if (codeRange(Hangul_TIni, Hangul_TEnd) || 277 codeRange(HangulT2Ini, HangulT2Fin)) { 278 hangtype = "T"; typelen = 1; 279 } else { 280 hangtype = ""; typelen = 0; 281 } 282 } 283 284 RETVAL = newSVpvn(hangtype, typelen); 285 OUTPUT: 286 RETVAL 287 288 289 void 290 _derivCE_9 (code) 291 UV code 292 ALIAS: 293 _derivCE_14 = 1 294 _derivCE_18 = 2 295 _derivCE_20 = 3 296 _derivCE_22 = 4 297 _derivCE_24 = 5 298 PREINIT: 299 UV base, aaaa, bbbb; 300 U8 a[VCE_Length + 1] = "\x00\xFF\xFF\x00\x20\x00\x02\xFF\xFF"; 301 U8 b[VCE_Length + 1] = "\x00\xFF\xFF\x00\x00\x00\x00\xFF\xFF"; 302 bool basic_unified = 0; 303 PPCODE: 304 if (CJK_UidIni <= code) { 305 if (codeRange(CJK_CompIni, CJK_CompFin)) 306 basic_unified = (bool)UnifiedCompat[code - CJK_CompIni]; 307 else 308 basic_unified = (ix >= 5 ? (code <= CJK_UidF61) : 309 ix >= 3 ? (code <= CJK_UidF52) : 310 ix == 2 ? (code <= CJK_UidF51) : 311 ix == 1 ? (code <= CJK_UidF41) : 312 (code <= CJK_UidFin)); 313 } 314 base = (basic_unified) 315 ? 0xFB40 : /* CJK */ 316 ((codeRange(CJK_ExtAIni, CJK_ExtAFin)) 317 || 318 (codeRange(CJK_ExtBIni, CJK_ExtBFin)) 319 || 320 (ix >= 3 && codeRange(CJK_ExtCIni, CJK_ExtCFin)) 321 || 322 (ix >= 4 && codeRange(CJK_ExtDIni, CJK_ExtDFin))) 323 ? 0xFB80 /* CJK ext. */ 324 : 0xFBC0; /* others */ 325 aaaa = base + (code >> 15); 326 bbbb = (code & 0x7FFF) | 0x8000; 327 a[1] = (U8)(aaaa >> 8); 328 a[2] = (U8)(aaaa & 0xFF); 329 b[1] = (U8)(bbbb >> 8); 330 b[2] = (U8)(bbbb & 0xFF); 331 a[7] = b[7] = (U8)(code >> 8); 332 a[8] = b[8] = (U8)(code & 0xFF); 333 XPUSHs(sv_2mortal(newSVpvn((char *) a, VCE_Length))); 334 XPUSHs(sv_2mortal(newSVpvn((char *) b, VCE_Length))); 335 336 337 void 338 _derivCE_8 (code) 339 UV code 340 PREINIT: 341 UV aaaa, bbbb; 342 U8 a[VCE_Length + 1] = "\x00\xFF\xFF\x00\x02\x00\x01\xFF\xFF"; 343 U8 b[VCE_Length + 1] = "\x00\xFF\xFF\x00\x00\x00\x00\xFF\xFF"; 344 PPCODE: 345 aaaa = 0xFF80 + (code >> 15); 346 bbbb = (code & 0x7FFF) | 0x8000; 347 a[1] = (U8)(aaaa >> 8); 348 a[2] = (U8)(aaaa & 0xFF); 349 b[1] = (U8)(bbbb >> 8); 350 b[2] = (U8)(bbbb & 0xFF); 351 a[7] = b[7] = (U8)(code >> 8); 352 a[8] = b[8] = (U8)(code & 0xFF); 353 XPUSHs(sv_2mortal(newSVpvn((char *) a, VCE_Length))); 354 XPUSHs(sv_2mortal(newSVpvn((char *) b, VCE_Length))); 355 356 357 void 358 _uideoCE_8 (code) 359 UV code 360 PREINIT: 361 U8 uice[VCE_Length + 1] = "\x00\xFF\xFF\x00\x20\x00\x02\xFF\xFF"; 362 PPCODE: 363 uice[1] = uice[7] = (U8)(code >> 8); 364 uice[2] = uice[8] = (U8)(code & 0xFF); 365 XPUSHs(sv_2mortal(newSVpvn((char *) uice, VCE_Length))); 366 367 368 SV* 369 _isUIdeo (code, uca_vers) 370 UV code; 371 IV uca_vers; 372 bool basic_unified = 0; 373 CODE: 374 /* uca_vers = 0 for _uideoCE_8() */ 375 if (CJK_UidIni <= code) { 376 if (codeRange(CJK_CompIni, CJK_CompFin)) 377 basic_unified = (bool)UnifiedCompat[code - CJK_CompIni]; 378 else 379 basic_unified = (uca_vers >= 24 ? (code <= CJK_UidF61) : 380 uca_vers >= 20 ? (code <= CJK_UidF52) : 381 uca_vers >= 18 ? (code <= CJK_UidF51) : 382 uca_vers >= 14 ? (code <= CJK_UidF41) : 383 (code <= CJK_UidFin)); 384 } 385 RETVAL = boolSV( 386 (basic_unified) 387 || 388 (codeRange(CJK_ExtAIni, CJK_ExtAFin)) 389 || 390 (uca_vers >= 8 && codeRange(CJK_ExtBIni, CJK_ExtBFin)) 391 || 392 (uca_vers >= 20 && codeRange(CJK_ExtCIni, CJK_ExtCFin)) 393 || 394 (uca_vers >= 22 && codeRange(CJK_ExtDIni, CJK_ExtDFin)) 395 ); 396 OUTPUT: 397 RETVAL 398 399 400 SV* 401 mk_SortKey (self, buf) 402 SV* self; 403 SV* buf; 404 PREINIT: 405 SV *dst, **svp; 406 STRLEN dlen, vlen; 407 U8 *d, *p, *e, *v, *s[MaxLevel], *eachlevel[MaxLevel]; 408 AV *bufAV; 409 HV *selfHV; 410 UV back_flag; 411 I32 i, buf_len; 412 IV lv, level, uca_vers; 413 bool upper_lower, kata_hira, v2i, last_is_var; 414 CODE: 415 if (SvROK(self) && SvTYPE(SvRV(self)) == SVt_PVHV) 416 selfHV = (HV*)SvRV(self); 417 else 418 croak("$self is not a HASHREF."); 419 420 if (SvROK(buf) && SvTYPE(SvRV(buf)) == SVt_PVAV) 421 bufAV = (AV*)SvRV(buf); 422 else 423 croak("XSUB, not an ARRAYREF."); 424 425 buf_len = av_len(bufAV); 426 427 if (buf_len < 0) { /* empty: -1 */ 428 dlen = 2 * (MaxLevel - 1); 429 dst = newSV(dlen); 430 (void)SvPOK_only(dst); 431 d = (U8*)SvPVX(dst); 432 while (dlen--) 433 *d++ = '\0'; 434 } else { 435 svp = hv_fetch(selfHV, "level", 5, FALSE); 436 level = svp ? SvIV(*svp) : MaxLevel; 437 438 for (lv = 0; lv < level; lv++) { 439 New(0, eachlevel[lv], 2 * (1 + buf_len) + 1, U8); 440 s[lv] = eachlevel[lv]; 441 } 442 443 svp = hv_fetch(selfHV, "upper_before_lower", 18, FALSE); 444 upper_lower = svp ? SvTRUE(*svp) : FALSE; 445 svp = hv_fetch(selfHV, "katakana_before_hiragana", 24, FALSE); 446 kata_hira = svp ? SvTRUE(*svp) : FALSE; 447 svp = hv_fetch(selfHV, "UCA_Version", 11, FALSE); 448 uca_vers = SvIV(*svp); 449 svp = hv_fetch(selfHV, "variable", 8, FALSE); 450 v2i = uca_vers >= 9 && svp /* (vers >= 9) and not (non-ignorable) */ 451 ? !(SvCUR(*svp) == 13 && memEQ(SvPVX(*svp), "non-ignorable", 13)) 452 : FALSE; 453 454 last_is_var = FALSE; 455 for (i = 0; i <= buf_len; i++) { 456 svp = av_fetch(bufAV, i, FALSE); 457 458 if (svp && SvPOK(*svp)) 459 v = (U8*)SvPV(*svp, vlen); 460 else 461 croak("not a vwt."); 462 463 if (vlen < VCE_Length) /* ignore short VCE (unexpected) */ 464 continue; 465 466 /* "Ignorable (L1, L2) after Variable" since track. v. 9 */ 467 if (v2i) { 468 if (*v) 469 last_is_var = TRUE; 470 else if (v[1] || v[2]) /* non zero primary weight */ 471 last_is_var = FALSE; 472 else if (last_is_var) /* zero primary weight; skipped */ 473 continue; 474 } 475 476 if (v[5] == 0) { /* tert wt < 256 */ 477 if (upper_lower) { 478 if (0x8 <= v[6] && v[6] <= 0xC) /* lower */ 479 v[6] -= 6; 480 else if (0x2 <= v[6] && v[6] <= 0x6) /* upper */ 481 v[6] += 6; 482 else if (v[6] == 0x1C) /* square upper */ 483 v[6]++; 484 else if (v[6] == 0x1D) /* square lower */ 485 v[6]--; 486 } 487 if (kata_hira) { 488 if (0x0F <= v[6] && v[6] <= 0x13) /* katakana */ 489 v[6] -= 2; 490 else if (0xD <= v[6] && v[6] <= 0xE) /* hiragana */ 491 v[6] += 5; 492 } 493 } 494 495 for (lv = 0; lv < level; lv++) { 496 if (v[2 * lv + 1] || v[2 * lv + 2]) { 497 *s[lv]++ = v[2 * lv + 1]; 498 *s[lv]++ = v[2 * lv + 2]; 499 } 500 } 501 } 502 503 dlen = 2 * (MaxLevel - 1); 504 for (lv = 0; lv < level; lv++) 505 dlen += s[lv] - eachlevel[lv]; 506 507 dst = newSV(dlen); 508 (void)SvPOK_only(dst); 509 d = (U8*)SvPVX(dst); 510 511 svp = hv_fetch(selfHV, "backwardsFlag", 13, FALSE); 512 back_flag = svp ? SvUV(*svp) : (UV)0; 513 514 for (lv = 0; lv < level; lv++) { 515 if (back_flag & (1 << (lv + 1))) { 516 p = s[lv]; 517 e = eachlevel[lv]; 518 for ( ; e < p; p -= 2) { 519 *d++ = p[-2]; 520 *d++ = p[-1]; 521 } 522 } 523 else { 524 p = eachlevel[lv]; 525 e = s[lv]; 526 while (p < e) 527 *d++ = *p++; 528 } 529 if (lv + 1 < MaxLevel) { /* lv + 1 == real level */ 530 *d++ = '\0'; 531 *d++ = '\0'; 532 } 533 } 534 535 for (lv = level; lv < MaxLevel; lv++) { 536 if (lv + 1 < MaxLevel) { /* lv + 1 == real level */ 537 *d++ = '\0'; 538 *d++ = '\0'; 539 } 540 } 541 542 for (lv = 0; lv < level; lv++) { 543 Safefree(eachlevel[lv]); 544 } 545 } 546 *d = '\0'; 547 SvCUR_set(dst, d - (U8*)SvPVX(dst)); 548 RETVAL = dst; 549 OUTPUT: 550 RETVAL 551 552 553 SV* 554 varCE (self, vce) 555 SV* self; 556 SV* vce; 557 PREINIT: 558 SV *dst, *vbl, **svp; 559 HV *selfHV; 560 U8 *a, *v, *d; 561 STRLEN alen, vlen; 562 bool ig_l2; 563 UV totwt; 564 CODE: 565 if (SvROK(self) && SvTYPE(SvRV(self)) == SVt_PVHV) 566 selfHV = (HV*)SvRV(self); 567 else 568 croak("$self is not a HASHREF."); 569 570 svp = hv_fetch(selfHV, "ignore_level2", 13, FALSE); 571 ig_l2 = svp ? SvTRUE(*svp) : FALSE; 572 573 svp = hv_fetch(selfHV, "variable", 8, FALSE); 574 vbl = svp ? *svp : &PL_sv_no; 575 a = (U8*)SvPV(vbl, alen); 576 v = (U8*)SvPV(vce, vlen); 577 578 dst = newSV(vlen); 579 d = (U8*)SvPVX(dst); 580 (void)SvPOK_only(dst); 581 Copy(v, d, vlen, U8); 582 SvCUR_set(dst, vlen); 583 d[vlen] = '\0'; 584 585 /* primary weight == 0 && secondary weight != 0 */ 586 if (ig_l2 && !d[1] && !d[2] && (d[3] || d[4])) { 587 d[3] = d[4] = d[5] = d[6] = '\0'; 588 } 589 590 /* variable: checked only the first char and the length, 591 trusting checkCollator() and %VariableOK in Perl ... */ 592 593 if (vlen < VCE_Length /* ignore short VCE (unexpected) */ 594 || 595 *a == 'n') /* non-ignorable */ 596 1; 597 else if (*v) { 598 if (*a == 's') { /* shifted or shift-trimmed */ 599 d[7] = d[1]; /* wt level 1 to 4 */ 600 d[8] = d[2]; 601 } /* else blanked */ 602 603 d[1] = d[2] = d[3] = d[4] = d[5] = d[6] = '\0'; 604 } 605 else if (*a == 'b') /* blanked */ 606 1; 607 else if (*a == 's') { /* shifted or shift-trimmed */ 608 totwt = d[1] + d[2] + d[3] + d[4] + d[5] + d[6]; 609 if (alen == 7 && totwt != 0) { /* shifted */ 610 if (d[1] == 0 && d[2] == 1) { /* XXX: CollationAuxiliary-6.2.0 */ 611 d[7] = d[1]; /* wt level 1 to 4 */ 612 d[8] = d[2]; 613 } else { 614 d[7] = (U8)(Shift4Wt >> 8); 615 d[8] = (U8)(Shift4Wt & 0xFF); 616 } 617 } else { /* shift-trimmed or completely ignorable */ 618 d[7] = d[8] = '\0'; 619 } 620 } 621 else 622 croak("unknown variable value '%s'", a); 623 RETVAL = dst; 624 OUTPUT: 625 RETVAL 626 627 628 629 SV* 630 visualizeSortKey (self, key) 631 SV * self 632 SV * key 633 PREINIT: 634 HV *selfHV; 635 SV **svp, *dst; 636 U8 *s, *e, *d; 637 STRLEN klen, dlen; 638 UV uv; 639 IV uca_vers, sep = 0; 640 static const char *upperhex = "0123456789ABCDEF"; 641 CODE: 642 if (SvROK(self) && SvTYPE(SvRV(self)) == SVt_PVHV) 643 selfHV = (HV*)SvRV(self); 644 else 645 croak("$self is not a HASHREF."); 646 647 svp = hv_fetch(selfHV, "UCA_Version", 11, FALSE); 648 if (!svp) 649 croak("Panic: no $self->{UCA_Version} in visualizeSortKey"); 650 uca_vers = SvIV(*svp); 651 652 s = (U8*)SvPV(key, klen); 653 654 /* slightly *longer* than the need, but I'm afraid of miscounting; 655 = (klen / 2) * 5 - 1 656 # FFFF and ' ' for each 16bit units but ' ' is less by 1; 657 # ' ' and '|' for level boundaries including the identical level 658 + 2 # '[' and ']' 659 + 1 # '\0' 660 (a) if klen is odd (not expected), maybe more 5 bytes. 661 (b) there is not always the identical level. 662 */ 663 dlen = (klen / 2) * 5 + MaxLevel * 2 + 2; 664 dst = newSV(dlen); 665 (void)SvPOK_only(dst); 666 d = (U8*)SvPVX(dst); 667 668 *d++ = '['; 669 for (e = s + klen; s < e; s += 2) { 670 uv = (U16)(*s << 8 | s[1]); 671 if (uv || sep >= MaxLevel) { 672 if ((d[-1] != '[') && ((9 <= uca_vers) || (d[-1] != '|'))) 673 *d++ = ' '; 674 *d++ = upperhex[ (s[0] >> 4) & 0xF ]; 675 *d++ = upperhex[ s[0] & 0xF ]; 676 *d++ = upperhex[ (s[1] >> 4) & 0xF ]; 677 *d++ = upperhex[ s[1] & 0xF ]; 678 } else { 679 if ((9 <= uca_vers) && (d[-1] != '[')) 680 *d++ = ' '; 681 *d++ = '|'; 682 ++sep; 683 } 684 } 685 *d++ = ']'; 686 *d = '\0'; 687 SvCUR_set(dst, d - (U8*)SvPVX(dst)); 688 RETVAL = dst; 689 OUTPUT: 690 RETVAL 691 692 693 694 void 695 unpackUfor56 (src) 696 SV* src 697 PREINIT: 698 STRLEN srclen, retlen; 699 U8 *s, *p, *e; 700 UV uv; 701 PPCODE: 702 s = (U8*)SvPV(src,srclen); 703 if (!SvUTF8(src)) { 704 SV* tmpsv = sv_mortalcopy(src); 705 if (!SvPOK(tmpsv)) 706 (void)sv_pvn_force(tmpsv,&srclen); 707 sv_utf8_upgrade(tmpsv); 708 s = (U8*)SvPV(tmpsv,srclen); 709 } 710 e = s + srclen; 711 712 for (p = s; p < e; p += retlen) { 713 uv = GET_UV_FOR_5_6; /* perl 5.6.x workaround */ 714 if (!retlen) 715 croak("panic (Unicode::Collate): zero-length character"); 716 XPUSHs(sv_2mortal(newSVuv(uv))); 717 } 718 719