1 /** 2 * This code handles decoding UTF strings for foreach loops. There are 6 3 * combinations of conversions between char, wchar, and dchar, and 2 of each 4 * of those. 5 * 6 * Copyright: Copyright Digital Mars 2004 - 2010. 7 * License: $(WEB www.boost.org/LICENSE_1_0.txt, Boost License 1.0). 8 * Authors: Walter Bright 9 * Source: $(DRUNTIMESRC src/rt/_aApply.d) 10 */ 11 module rt.aApply; 12 13 private import rt.util.utf : decode, toUTF8; 14 15 /**********************************************/ 16 /* 1 argument versions */ 17 18 // dg is D, but _aApplycd() is C 19 extern (D) alias int delegate(void *) dg_t; 20 21 extern (C) int _aApplycd1(in char[] aa, dg_t dg) 22 { 23 int result; 24 size_t len = aa.length; 25 26 debug(apply) printf("_aApplycd1(), len = %d\n", len); 27 for (size_t i = 0; i < len; ) 28 { 29 dchar d = aa[i]; 30 if (d & 0x80) 31 d = decode(aa, i); 32 else 33 ++i; 34 result = dg(cast(void *)&d); 35 if (result) 36 break; 37 } 38 return result; 39 } 40 41 unittest 42 { 43 debug(apply) printf("_aApplycd1.unittest\n"); 44 45 auto s = "hello"c[]; 46 int i; 47 48 foreach (dchar d; s) 49 { 50 switch (i) 51 { 52 case 0: assert(d == 'h'); break; 53 case 1: assert(d == 'e'); break; 54 case 2: assert(d == 'l'); break; 55 case 3: assert(d == 'l'); break; 56 case 4: assert(d == 'o'); break; 57 default: assert(0); 58 } 59 i++; 60 } 61 assert(i == 5); 62 63 s = "a\u1234\U000A0456b"; 64 i = 0; 65 foreach (dchar d; s) 66 { 67 //printf("i = %d, d = %x\n", i, d); 68 switch (i) 69 { 70 case 0: assert(d == 'a'); break; 71 case 1: assert(d == '\u1234'); break; 72 case 2: assert(d == '\U000A0456'); break; 73 case 3: assert(d == 'b'); break; 74 default: assert(0); 75 } 76 i++; 77 } 78 assert(i == 4); 79 } 80 81 /*****************************/ 82 83 extern (C) int _aApplywd1(in wchar[] aa, dg_t dg) 84 { 85 int result; 86 size_t len = aa.length; 87 88 debug(apply) printf("_aApplywd1(), len = %d\n", len); 89 for (size_t i = 0; i < len; ) 90 { 91 dchar d = aa[i]; 92 if (d >= 0xD800) 93 d = decode(aa, i); 94 else 95 ++i; 96 result = dg(cast(void *)&d); 97 if (result) 98 break; 99 } 100 return result; 101 } 102 103 unittest 104 { 105 debug(apply) printf("_aApplywd1.unittest\n"); 106 107 auto s = "hello"w[]; 108 int i; 109 110 foreach (dchar d; s) 111 { 112 switch (i) 113 { 114 case 0: assert(d == 'h'); break; 115 case 1: assert(d == 'e'); break; 116 case 2: assert(d == 'l'); break; 117 case 3: assert(d == 'l'); break; 118 case 4: assert(d == 'o'); break; 119 default: assert(0); 120 } 121 i++; 122 } 123 assert(i == 5); 124 125 s = "a\u1234\U000A0456b"; 126 i = 0; 127 foreach (dchar d; s) 128 { 129 //printf("i = %d, d = %x\n", i, d); 130 switch (i) 131 { 132 case 0: assert(d == 'a'); break; 133 case 1: assert(d == '\u1234'); break; 134 case 2: assert(d == '\U000A0456'); break; 135 case 3: assert(d == 'b'); break; 136 default: assert(0); 137 } 138 i++; 139 } 140 assert(i == 4); 141 } 142 143 /*****************************/ 144 145 extern (C) int _aApplycw1(in char[] aa, dg_t dg) 146 { 147 int result; 148 size_t len = aa.length; 149 150 debug(apply) printf("_aApplycw1(), len = %d\n", len); 151 for (size_t i = 0; i < len; ) 152 { 153 wchar w = aa[i]; 154 if (w & 0x80) 155 { 156 dchar d = decode(aa, i); 157 if (d <= 0xFFFF) 158 w = cast(wchar) d; 159 else 160 { 161 w = cast(wchar)((((d - 0x10000) >> 10) & 0x3FF) + 0xD800); 162 result = dg(cast(void *)&w); 163 if (result) 164 break; 165 w = cast(wchar)(((d - 0x10000) & 0x3FF) + 0xDC00); 166 } 167 } 168 else 169 ++i; 170 result = dg(cast(void *)&w); 171 if (result) 172 break; 173 } 174 return result; 175 } 176 177 unittest 178 { 179 debug(apply) printf("_aApplycw1.unittest\n"); 180 181 auto s = "hello"c[]; 182 int i; 183 184 foreach (wchar d; s) 185 { 186 switch (i) 187 { 188 case 0: assert(d == 'h'); break; 189 case 1: assert(d == 'e'); break; 190 case 2: assert(d == 'l'); break; 191 case 3: assert(d == 'l'); break; 192 case 4: assert(d == 'o'); break; 193 default: assert(0); 194 } 195 i++; 196 } 197 assert(i == 5); 198 199 s = "a\u1234\U000A0456b"; 200 i = 0; 201 foreach (wchar d; s) 202 { 203 //printf("i = %d, d = %x\n", i, d); 204 switch (i) 205 { 206 case 0: assert(d == 'a'); break; 207 case 1: assert(d == 0x1234); break; 208 case 2: assert(d == 0xDA41); break; 209 case 3: assert(d == 0xDC56); break; 210 case 4: assert(d == 'b'); break; 211 default: assert(0); 212 } 213 i++; 214 } 215 assert(i == 5); 216 } 217 218 /*****************************/ 219 220 extern (C) int _aApplywc1(in wchar[] aa, dg_t dg) 221 { 222 int result; 223 size_t len = aa.length; 224 225 debug(apply) printf("_aApplywc1(), len = %d\n", len); 226 for (size_t i = 0; i < len; ) 227 { 228 wchar w = aa[i]; 229 if (w & ~0x7F) 230 { 231 char[4] buf = void; 232 233 dchar d = decode(aa, i); 234 auto b = toUTF8(buf, d); 235 foreach (char c2; b) 236 { 237 result = dg(cast(void *)&c2); 238 if (result) 239 return result; 240 } 241 } 242 else 243 { 244 char c = cast(char)w; 245 ++i; 246 result = dg(cast(void *)&c); 247 if (result) 248 break; 249 } 250 } 251 return result; 252 } 253 254 unittest 255 { 256 debug(apply) printf("_aApplywc1.unittest\n"); 257 258 auto s = "hello"w[]; 259 int i; 260 261 foreach (char d; s) 262 { 263 switch (i) 264 { 265 case 0: assert(d == 'h'); break; 266 case 1: assert(d == 'e'); break; 267 case 2: assert(d == 'l'); break; 268 case 3: assert(d == 'l'); break; 269 case 4: assert(d == 'o'); break; 270 default: assert(0); 271 } 272 i++; 273 } 274 assert(i == 5); 275 276 s = "a\u1234\U000A0456b"; 277 i = 0; 278 foreach (char d; s) 279 { 280 //printf("i = %d, d = %x\n", i, d); 281 switch (i) 282 { 283 case 0: assert(d == 'a'); break; 284 case 1: assert(d == 0xE1); break; 285 case 2: assert(d == 0x88); break; 286 case 3: assert(d == 0xB4); break; 287 case 4: assert(d == 0xF2); break; 288 case 5: assert(d == 0xA0); break; 289 case 6: assert(d == 0x91); break; 290 case 7: assert(d == 0x96); break; 291 case 8: assert(d == 'b'); break; 292 default: assert(0); 293 } 294 i++; 295 } 296 assert(i == 9); 297 } 298 299 /*****************************/ 300 301 extern (C) int _aApplydc1(in dchar[] aa, dg_t dg) 302 { 303 int result; 304 305 debug(apply) printf("_aApplydc1(), len = %d\n", aa.length); 306 foreach (dchar d; aa) 307 { 308 if (d & ~0x7F) 309 { 310 char[4] buf = void; 311 312 auto b = toUTF8(buf, d); 313 foreach (char c2; b) 314 { 315 result = dg(cast(void *)&c2); 316 if (result) 317 return result; 318 } 319 } 320 else 321 { 322 char c = cast(char)d; 323 result = dg(cast(void *)&c); 324 if (result) 325 break; 326 } 327 } 328 return result; 329 } 330 331 unittest 332 { 333 debug(apply) printf("_aApplyRdc1.unittest\n"); 334 335 auto s = "hello"d[]; 336 int i; 337 338 foreach (char d; s) 339 { 340 switch (i) 341 { 342 case 0: assert(d == 'h'); break; 343 case 1: assert(d == 'e'); break; 344 case 2: assert(d == 'l'); break; 345 case 3: assert(d == 'l'); break; 346 case 4: assert(d == 'o'); break; 347 default: assert(0); 348 } 349 i++; 350 } 351 assert(i == 5); 352 353 s = "a\u1234\U000A0456b"; 354 i = 0; 355 foreach (char d; s) 356 { 357 //printf("i = %d, d = %x\n", i, d); 358 switch (i) 359 { 360 case 0: assert(d == 'a'); break; 361 case 1: assert(d == 0xE1); break; 362 case 2: assert(d == 0x88); break; 363 case 3: assert(d == 0xB4); break; 364 case 4: assert(d == 0xF2); break; 365 case 5: assert(d == 0xA0); break; 366 case 6: assert(d == 0x91); break; 367 case 7: assert(d == 0x96); break; 368 case 8: assert(d == 'b'); break; 369 default: assert(0); 370 } 371 i++; 372 } 373 assert(i == 9); 374 } 375 376 /*****************************/ 377 378 extern (C) int _aApplydw1(in dchar[] aa, dg_t dg) 379 { 380 int result; 381 382 debug(apply) printf("_aApplydw1(), len = %d\n", aa.length); 383 foreach (dchar d; aa) 384 { 385 wchar w; 386 387 if (d <= 0xFFFF) 388 w = cast(wchar) d; 389 else 390 { 391 w = cast(wchar)((((d - 0x10000) >> 10) & 0x3FF) + 0xD800); 392 result = dg(cast(void *)&w); 393 if (result) 394 break; 395 w = cast(wchar)(((d - 0x10000) & 0x3FF) + 0xDC00); 396 } 397 result = dg(cast(void *)&w); 398 if (result) 399 break; 400 } 401 return result; 402 } 403 404 unittest 405 { 406 debug(apply) printf("_aApplydw1.unittest\n"); 407 408 auto s = "hello"d[]; 409 int i; 410 411 foreach (wchar d; s) 412 { 413 switch (i) 414 { 415 case 0: assert(d == 'h'); break; 416 case 1: assert(d == 'e'); break; 417 case 2: assert(d == 'l'); break; 418 case 3: assert(d == 'l'); break; 419 case 4: assert(d == 'o'); break; 420 default: assert(0); 421 } 422 i++; 423 } 424 assert(i == 5); 425 426 s = "a\u1234\U000A0456b"; 427 i = 0; 428 foreach (wchar d; s) 429 { 430 //printf("i = %d, d = %x\n", i, d); 431 switch (i) 432 { 433 case 0: assert(d == 'a'); break; 434 case 1: assert(d == 0x1234); break; 435 case 2: assert(d == 0xDA41); break; 436 case 3: assert(d == 0xDC56); break; 437 case 4: assert(d == 'b'); break; 438 default: assert(0); 439 } 440 i++; 441 } 442 assert(i == 5); 443 } 444 445 446 /****************************************************************************/ 447 /* 2 argument versions */ 448 449 // dg is D, but _aApplycd2() is C 450 extern (D) alias int delegate(void *, void *) dg2_t; 451 452 extern (C) int _aApplycd2(in char[] aa, dg2_t dg) 453 { 454 int result; 455 size_t len = aa.length; 456 457 debug(apply) printf("_aApplycd2(), len = %d\n", len); 458 size_t n; 459 for (size_t i = 0; i < len; i += n) 460 { 461 dchar d = aa[i]; 462 if (d & 0x80) 463 { 464 n = i; 465 d = decode(aa, n); 466 n -= i; 467 } 468 else 469 n = 1; 470 result = dg(&i, cast(void *)&d); 471 if (result) 472 break; 473 } 474 return result; 475 } 476 477 unittest 478 { 479 debug(apply) printf("_aApplycd2.unittest\n"); 480 481 auto s = "hello"c[]; 482 int i; 483 484 foreach (k, dchar d; s) 485 { 486 //printf("i = %d, k = %d, d = %x\n", i, k, d); 487 assert(k == i); 488 switch (i) 489 { 490 case 0: assert(d == 'h'); break; 491 case 1: assert(d == 'e'); break; 492 case 2: assert(d == 'l'); break; 493 case 3: assert(d == 'l'); break; 494 case 4: assert(d == 'o'); break; 495 default: assert(0); 496 } 497 i++; 498 } 499 assert(i == 5); 500 501 s = "a\u1234\U000A0456b"; 502 i = 0; 503 foreach (k, dchar d; s) 504 { 505 //printf("i = %d, k = %d, d = %x\n", i, k, d); 506 switch (i) 507 { 508 case 0: assert(d == 'a'); assert(k == 0); break; 509 case 1: assert(d == '\u1234'); assert(k == 1); break; 510 case 2: assert(d == '\U000A0456'); assert(k == 4); break; 511 case 3: assert(d == 'b'); assert(k == 8); break; 512 default: assert(0); 513 } 514 i++; 515 } 516 assert(i == 4); 517 } 518 519 /*****************************/ 520 521 extern (C) int _aApplywd2(in wchar[] aa, dg2_t dg) 522 { 523 int result; 524 size_t len = aa.length; 525 526 debug(apply) printf("_aApplywd2(), len = %d\n", len); 527 size_t n; 528 for (size_t i = 0; i < len; i += n) 529 { 530 dchar d = aa[i]; 531 if (d & ~0x7F) 532 { 533 n = i; 534 d = decode(aa, n); 535 n -= i; 536 } 537 else 538 n = 1; 539 result = dg(&i, cast(void *)&d); 540 if (result) 541 break; 542 } 543 return result; 544 } 545 546 unittest 547 { 548 debug(apply) printf("_aApplywd2.unittest\n"); 549 550 auto s = "hello"w[]; 551 int i; 552 553 foreach (k, dchar d; s) 554 { 555 //printf("i = %d, k = %d, d = %x\n", i, k, d); 556 assert(k == i); 557 switch (i) 558 { 559 case 0: assert(d == 'h'); break; 560 case 1: assert(d == 'e'); break; 561 case 2: assert(d == 'l'); break; 562 case 3: assert(d == 'l'); break; 563 case 4: assert(d == 'o'); break; 564 default: assert(0); 565 } 566 i++; 567 } 568 assert(i == 5); 569 570 s = "a\u1234\U000A0456b"; 571 i = 0; 572 foreach (k, dchar d; s) 573 { 574 //printf("i = %d, k = %d, d = %x\n", i, k, d); 575 switch (i) 576 { 577 case 0: assert(k == 0); assert(d == 'a'); break; 578 case 1: assert(k == 1); assert(d == '\u1234'); break; 579 case 2: assert(k == 2); assert(d == '\U000A0456'); break; 580 case 3: assert(k == 4); assert(d == 'b'); break; 581 default: assert(0); 582 } 583 i++; 584 } 585 assert(i == 4); 586 } 587 588 /*****************************/ 589 590 extern (C) int _aApplycw2(in char[] aa, dg2_t dg) 591 { 592 int result; 593 size_t len = aa.length; 594 595 debug(apply) printf("_aApplycw2(), len = %d\n", len); 596 size_t n; 597 for (size_t i = 0; i < len; i += n) 598 { 599 wchar w = aa[i]; 600 if (w & 0x80) 601 { 602 n = i; 603 dchar d = decode(aa, n); 604 n -= i; 605 if (d <= 0xFFFF) 606 w = cast(wchar) d; 607 else 608 { 609 w = cast(wchar) ((((d - 0x10000) >> 10) & 0x3FF) + 0xD800); 610 result = dg(&i, cast(void *)&w); 611 if (result) 612 break; 613 w = cast(wchar) (((d - 0x10000) & 0x3FF) + 0xDC00); 614 } 615 } 616 else 617 n = 1; 618 result = dg(&i, cast(void *)&w); 619 if (result) 620 break; 621 } 622 return result; 623 } 624 625 unittest 626 { 627 debug(apply) printf("_aApplycw2.unittest\n"); 628 629 auto s = "hello"c[]; 630 int i; 631 632 foreach (k, wchar d; s) 633 { 634 //printf("i = %d, k = %d, d = %x\n", i, k, d); 635 assert(k == i); 636 switch (i) 637 { 638 case 0: assert(d == 'h'); break; 639 case 1: assert(d == 'e'); break; 640 case 2: assert(d == 'l'); break; 641 case 3: assert(d == 'l'); break; 642 case 4: assert(d == 'o'); break; 643 default: assert(0); 644 } 645 i++; 646 } 647 assert(i == 5); 648 649 s = "a\u1234\U000A0456b"; 650 i = 0; 651 foreach (k, wchar d; s) 652 { 653 //printf("i = %d, k = %d, d = %x\n", i, k, d); 654 switch (i) 655 { 656 case 0: assert(k == 0); assert(d == 'a'); break; 657 case 1: assert(k == 1); assert(d == 0x1234); break; 658 case 2: assert(k == 4); assert(d == 0xDA41); break; 659 case 3: assert(k == 4); assert(d == 0xDC56); break; 660 case 4: assert(k == 8); assert(d == 'b'); break; 661 default: assert(0); 662 } 663 i++; 664 } 665 assert(i == 5); 666 } 667 668 /*****************************/ 669 670 extern (C) int _aApplywc2(in wchar[] aa, dg2_t dg) 671 { 672 int result; 673 size_t len = aa.length; 674 675 debug(apply) printf("_aApplywc2(), len = %d\n", len); 676 size_t n; 677 for (size_t i = 0; i < len; i += n) 678 { 679 wchar w = aa[i]; 680 if (w & ~0x7F) 681 { 682 char[4] buf = void; 683 684 n = i; 685 dchar d = decode(aa, n); 686 n -= i; 687 auto b = toUTF8(buf, d); 688 foreach (char c2; b) 689 { 690 result = dg(&i, cast(void *)&c2); 691 if (result) 692 return result; 693 } 694 } 695 else 696 { 697 char c = cast(char)w; 698 n = 1; 699 result = dg(&i, cast(void *)&c); 700 if (result) 701 break; 702 } 703 } 704 return result; 705 } 706 707 unittest 708 { 709 debug(apply) printf("_aApplywc2.unittest\n"); 710 711 auto s = "hello"w[]; 712 int i; 713 714 foreach (k, char d; s) 715 { 716 //printf("i = %d, k = %d, d = %x\n", i, k, d); 717 assert(k == i); 718 switch (i) 719 { 720 case 0: assert(d == 'h'); break; 721 case 1: assert(d == 'e'); break; 722 case 2: assert(d == 'l'); break; 723 case 3: assert(d == 'l'); break; 724 case 4: assert(d == 'o'); break; 725 default: assert(0); 726 } 727 i++; 728 } 729 assert(i == 5); 730 731 s = "a\u1234\U000A0456b"; 732 i = 0; 733 foreach (k, char d; s) 734 { 735 //printf("i = %d, k = %d, d = %x\n", i, k, d); 736 switch (i) 737 { 738 case 0: assert(k == 0); assert(d == 'a'); break; 739 case 1: assert(k == 1); assert(d == 0xE1); break; 740 case 2: assert(k == 1); assert(d == 0x88); break; 741 case 3: assert(k == 1); assert(d == 0xB4); break; 742 case 4: assert(k == 2); assert(d == 0xF2); break; 743 case 5: assert(k == 2); assert(d == 0xA0); break; 744 case 6: assert(k == 2); assert(d == 0x91); break; 745 case 7: assert(k == 2); assert(d == 0x96); break; 746 case 8: assert(k == 4); assert(d == 'b'); break; 747 default: assert(0); 748 } 749 i++; 750 } 751 assert(i == 9); 752 } 753 754 /*****************************/ 755 756 extern (C) int _aApplydc2(in dchar[] aa, dg2_t dg) 757 { 758 int result; 759 size_t len = aa.length; 760 761 debug(apply) printf("_aApplydc2(), len = %d\n", len); 762 for (size_t i = 0; i < len; i++) 763 { 764 dchar d = aa[i]; 765 if (d & ~0x7F) 766 { 767 char[4] buf = void; 768 769 auto b = toUTF8(buf, d); 770 foreach (char c2; b) 771 { 772 result = dg(&i, cast(void *)&c2); 773 if (result) 774 return result; 775 } 776 } 777 else 778 { 779 char c = cast(char)d; 780 result = dg(&i, cast(void *)&c); 781 if (result) 782 break; 783 } 784 } 785 return result; 786 } 787 788 unittest 789 { 790 debug(apply) printf("_aApplydc2.unittest\n"); 791 792 auto s = "hello"d[]; 793 int i; 794 795 foreach (k, char d; s) 796 { 797 //printf("i = %d, k = %d, d = %x\n", i, k, d); 798 assert(k == i); 799 switch (i) 800 { 801 case 0: assert(d == 'h'); break; 802 case 1: assert(d == 'e'); break; 803 case 2: assert(d == 'l'); break; 804 case 3: assert(d == 'l'); break; 805 case 4: assert(d == 'o'); break; 806 default: assert(0); 807 } 808 i++; 809 } 810 assert(i == 5); 811 812 s = "a\u1234\U000A0456b"; 813 i = 0; 814 foreach (k, char d; s) 815 { 816 //printf("i = %d, k = %d, d = %x\n", i, k, d); 817 switch (i) 818 { 819 case 0: assert(k == 0); assert(d == 'a'); break; 820 case 1: assert(k == 1); assert(d == 0xE1); break; 821 case 2: assert(k == 1); assert(d == 0x88); break; 822 case 3: assert(k == 1); assert(d == 0xB4); break; 823 case 4: assert(k == 2); assert(d == 0xF2); break; 824 case 5: assert(k == 2); assert(d == 0xA0); break; 825 case 6: assert(k == 2); assert(d == 0x91); break; 826 case 7: assert(k == 2); assert(d == 0x96); break; 827 case 8: assert(k == 3); assert(d == 'b'); break; 828 default: assert(0); 829 } 830 i++; 831 } 832 assert(i == 9); 833 } 834 835 /*****************************/ 836 837 extern (C) int _aApplydw2(in dchar[] aa, dg2_t dg) 838 { int result; 839 840 debug(apply) printf("_aApplydw2(), len = %d\n", aa.length); 841 foreach (size_t i, dchar d; aa) 842 { 843 wchar w; 844 auto j = i; 845 846 if (d <= 0xFFFF) 847 w = cast(wchar) d; 848 else 849 { 850 w = cast(wchar) ((((d - 0x10000) >> 10) & 0x3FF) + 0xD800); 851 result = dg(&j, cast(void *)&w); 852 if (result) 853 break; 854 w = cast(wchar) (((d - 0x10000) & 0x3FF) + 0xDC00); 855 } 856 result = dg(&j, cast(void *)&w); 857 if (result) 858 break; 859 } 860 return result; 861 } 862 863 unittest 864 { 865 debug(apply) printf("_aApplydw2.unittest\n"); 866 867 auto s = "hello"d[]; 868 int i; 869 870 foreach (k, wchar d; s) 871 { 872 //printf("i = %d, k = %d, d = %x\n", i, k, d); 873 assert(k == i); 874 switch (i) 875 { 876 case 0: assert(d == 'h'); break; 877 case 1: assert(d == 'e'); break; 878 case 2: assert(d == 'l'); break; 879 case 3: assert(d == 'l'); break; 880 case 4: assert(d == 'o'); break; 881 default: assert(0); 882 } 883 i++; 884 } 885 assert(i == 5); 886 887 s = "a\u1234\U000A0456b"; 888 i = 0; 889 foreach (k, wchar d; s) 890 { 891 //printf("i = %d, k = %d, d = %x\n", i, k, d); 892 switch (i) 893 { 894 case 0: assert(k == 0); assert(d == 'a'); break; 895 case 1: assert(k == 1); assert(d == 0x1234); break; 896 case 2: assert(k == 2); assert(d == 0xDA41); break; 897 case 3: assert(k == 2); assert(d == 0xDC56); break; 898 case 4: assert(k == 3); assert(d == 'b'); break; 899 default: assert(0); 900 } 901 i++; 902 } 903 assert(i == 5); 904 } 905