1 // Written in the D programming language. 2 3 /++ 4 Functions which operate on ASCII characters. 5 6 All of the functions in std._ascii accept Unicode characters but 7 effectively ignore them if they're not ASCII. All $(D isX) functions return 8 $(D false) for non-ASCII characters, and all $(D toX) functions do nothing 9 to non-ASCII characters. 10 11 For functions which operate on Unicode characters, see 12 $(MREF std, uni). 13 14 $(SCRIPT inhibitQuickIndex = 1;) 15 $(DIVC quickindex, 16 $(BOOKTABLE, 17 $(TR $(TH Category) $(TH Functions)) 18 $(TR $(TD Validation) $(TD 19 $(LREF isAlpha) 20 $(LREF isAlphaNum) 21 $(LREF isASCII) 22 $(LREF isControl) 23 $(LREF isDigit) 24 $(LREF isGraphical) 25 $(LREF isHexDigit) 26 $(LREF isOctalDigit) 27 $(LREF isPrintable) 28 $(LREF isPunctuation) 29 $(LREF isUpper) 30 $(LREF isWhite) 31 )) 32 $(TR $(TD Conversions) $(TD 33 $(LREF toLower) 34 $(LREF toUpper) 35 )) 36 $(TR $(TD Constants) $(TD 37 $(LREF digits) 38 $(LREF fullHexDigits) 39 $(LREF hexDigits) 40 $(LREF letters) 41 $(LREF lowercase) 42 $(LREF lowerHexDigits) 43 $(LREF newline) 44 $(LREF octalDigits) 45 $(LREF uppercase) 46 $(LREF whitespace) 47 )) 48 $(TR $(TD Enums) $(TD 49 $(LREF LetterCase) 50 )) 51 )) 52 References: 53 $(LINK2 http://www.digitalmars.com/d/ascii-table.html, ASCII Table), 54 $(HTTP en.wikipedia.org/wiki/Ascii, Wikipedia) 55 56 License: $(HTTP www.boost.org/LICENSE_1_0.txt, Boost License 1.0). 57 Authors: $(HTTP digitalmars.com, Walter Bright) and Jonathan M Davis 58 Source: $(PHOBOSSRC std/_ascii.d) 59 +/ 60 module std.ascii; 61 62 version (unittest) 63 { 64 // FIXME: When dmd bug #314 is fixed, make these selective. 65 import std.meta; // : AliasSeq; 66 import std.range; // : chain; 67 import std.traits; // : functionAttributes, FunctionAttribute, isSafe; 68 } 69 70 71 immutable fullHexDigits = "0123456789ABCDEFabcdef"; /// 0 .. 9A .. Fa .. f 72 immutable hexDigits = fullHexDigits[0 .. 16]; /// 0 .. 9A .. F 73 immutable lowerHexDigits = "0123456789abcdef"; /// 0 .. 9a .. f 74 immutable digits = hexDigits[0 .. 10]; /// 0 .. 9 75 immutable octalDigits = digits[0 .. 8]; /// 0 .. 7 76 immutable letters = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz"; /// A .. Za .. z 77 immutable uppercase = letters[0 .. 26]; /// A .. Z 78 immutable lowercase = letters[26 .. 52]; /// a .. z 79 immutable whitespace = " \t\v\r\n\f"; /// ASCII _whitespace 80 81 /++ 82 Letter case specifier. 83 +/ 84 enum LetterCase : bool 85 { 86 upper, /// Upper case letters 87 lower /// Lower case letters 88 } 89 90 /// 91 @safe unittest 92 { 93 import std.conv : to; 94 95 assert(42.to!string(16, LetterCase.upper) == "2A"); 96 assert(42.to!string(16, LetterCase.lower) == "2a"); 97 } 98 99 /// 100 @system unittest 101 { 102 import std.digest.hmac : hmac; 103 import std.digest.digest : toHexString; 104 import std.digest.sha : SHA1; 105 import std.string : representation; 106 107 const sha1HMAC = "A very long phrase".representation 108 .hmac!SHA1("secret".representation) 109 .toHexString!(LetterCase.lower); 110 assert(sha1HMAC == "49f2073c7bf58577e8c9ae59fe8cfd37c9ab94e5"); 111 } 112 113 /// Newline sequence for this system. 114 version (Windows) 115 immutable newline = "\r\n"; 116 else version (Posix) 117 immutable newline = "\n"; 118 else 119 static assert(0, "Unsupported OS"); 120 121 122 /++ 123 Params: c = The character to test. 124 Returns: Whether $(D c) is a letter or a number (0 .. 9, a .. z, A .. Z). 125 +/ 126 bool isAlphaNum(dchar c) @safe pure nothrow @nogc 127 { 128 return c <= 'z' && c >= '0' && (c <= '9' || c >= 'a' || (c >= 'A' && c <= 'Z')); 129 } 130 131 /// 132 @safe pure nothrow @nogc unittest 133 { 134 assert( isAlphaNum('A')); 135 assert( isAlphaNum('1')); 136 assert(!isAlphaNum('#')); 137 138 // N.B.: does not return true for non-ASCII Unicode alphanumerics: 139 assert(!isAlphaNum('á')); 140 } 141 142 @safe unittest 143 { 144 foreach (c; chain(digits, octalDigits, fullHexDigits, letters, lowercase, uppercase)) 145 assert(isAlphaNum(c)); 146 147 foreach (c; whitespace) 148 assert(!isAlphaNum(c)); 149 } 150 151 152 /++ 153 Params: c = The character to test. 154 Returns: Whether $(D c) is an ASCII letter (A .. Z, a .. z). 155 +/ 156 bool isAlpha(dchar c) @safe pure nothrow @nogc 157 { 158 // Optimizer can turn this into a bitmask operation on 64 bit code 159 return (c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z'); 160 } 161 162 /// 163 @safe pure nothrow @nogc unittest 164 { 165 assert( isAlpha('A')); 166 assert(!isAlpha('1')); 167 assert(!isAlpha('#')); 168 169 // N.B.: does not return true for non-ASCII Unicode alphabetic characters: 170 assert(!isAlpha('á')); 171 } 172 173 @safe unittest 174 { 175 foreach (c; chain(letters, lowercase, uppercase)) 176 assert(isAlpha(c)); 177 178 foreach (c; chain(digits, octalDigits, whitespace)) 179 assert(!isAlpha(c)); 180 } 181 182 183 /++ 184 Params: c = The character to test. 185 Returns: Whether $(D c) is a lowercase ASCII letter (a .. z). 186 +/ 187 bool isLower(dchar c) @safe pure nothrow @nogc 188 { 189 return c >= 'a' && c <= 'z'; 190 } 191 192 /// 193 @safe pure nothrow @nogc unittest 194 { 195 assert( isLower('a')); 196 assert(!isLower('A')); 197 assert(!isLower('#')); 198 199 // N.B.: does not return true for non-ASCII Unicode lowercase letters 200 assert(!isLower('á')); 201 assert(!isLower('Á')); 202 } 203 204 @safe unittest 205 { 206 foreach (c; lowercase) 207 assert(isLower(c)); 208 209 foreach (c; chain(digits, uppercase, whitespace)) 210 assert(!isLower(c)); 211 } 212 213 214 /++ 215 Params: c = The character to test. 216 Returns: Whether $(D c) is an uppercase ASCII letter (A .. Z). 217 +/ 218 bool isUpper(dchar c) @safe pure nothrow @nogc 219 { 220 return c <= 'Z' && 'A' <= c; 221 } 222 223 /// 224 @safe pure nothrow @nogc unittest 225 { 226 assert( isUpper('A')); 227 assert(!isUpper('a')); 228 assert(!isUpper('#')); 229 230 // N.B.: does not return true for non-ASCII Unicode uppercase letters 231 assert(!isUpper('á')); 232 assert(!isUpper('Á')); 233 } 234 235 @safe unittest 236 { 237 foreach (c; uppercase) 238 assert(isUpper(c)); 239 240 foreach (c; chain(digits, lowercase, whitespace)) 241 assert(!isUpper(c)); 242 } 243 244 245 /++ 246 Params: c = The character to test. 247 Returns: Whether $(D c) is a digit (0 .. 9). 248 +/ 249 bool isDigit(dchar c) @safe pure nothrow @nogc 250 { 251 return '0' <= c && c <= '9'; 252 } 253 254 /// 255 @safe pure nothrow @nogc unittest 256 { 257 assert( isDigit('3')); 258 assert( isDigit('8')); 259 assert(!isDigit('B')); 260 assert(!isDigit('#')); 261 262 // N.B.: does not return true for non-ASCII Unicode numbers 263 assert(!isDigit('0')); // full-width digit zero (U+FF10) 264 assert(!isDigit('4')); // full-width digit four (U+FF14) 265 } 266 267 @safe unittest 268 { 269 foreach (c; digits) 270 assert(isDigit(c)); 271 272 foreach (c; chain(letters, whitespace)) 273 assert(!isDigit(c)); 274 } 275 276 277 /++ 278 Params: c = The character to test. 279 Returns: Whether $(D c) is a digit in base 8 (0 .. 7). 280 +/ 281 bool isOctalDigit(dchar c) @safe pure nothrow @nogc 282 { 283 return c >= '0' && c <= '7'; 284 } 285 286 /// 287 @safe pure nothrow @nogc unittest 288 { 289 assert( isOctalDigit('0')); 290 assert( isOctalDigit('7')); 291 assert(!isOctalDigit('8')); 292 assert(!isOctalDigit('A')); 293 assert(!isOctalDigit('#')); 294 } 295 296 @safe unittest 297 { 298 foreach (c; octalDigits) 299 assert(isOctalDigit(c)); 300 301 foreach (c; chain(letters, ['8', '9'], whitespace)) 302 assert(!isOctalDigit(c)); 303 } 304 305 306 /++ 307 Params: c = The character to test. 308 Returns: Whether $(D c) is a digit in base 16 (0 .. 9, A .. F, a .. f). 309 +/ 310 bool isHexDigit(dchar c) @safe pure nothrow @nogc 311 { 312 return c <= 'f' && c >= '0' && (c <= '9' || c >= 'a' || (c >= 'A' && c <= 'F')); 313 } 314 315 /// 316 @safe pure nothrow @nogc unittest 317 { 318 assert( isHexDigit('0')); 319 assert( isHexDigit('A')); 320 assert( isHexDigit('f')); // lowercase hex digits are accepted 321 assert(!isHexDigit('g')); 322 assert(!isHexDigit('G')); 323 assert(!isHexDigit('#')); 324 } 325 326 @safe unittest 327 { 328 foreach (c; fullHexDigits) 329 assert(isHexDigit(c)); 330 331 foreach (c; chain(lowercase[6 .. $], uppercase[6 .. $], whitespace)) 332 assert(!isHexDigit(c)); 333 } 334 335 336 /++ 337 Params: c = The character to test. 338 Returns: Whether or not $(D c) is a whitespace character. That includes the 339 space, tab, vertical tab, form feed, carriage return, and linefeed 340 characters. 341 +/ 342 bool isWhite(dchar c) @safe pure nothrow @nogc 343 { 344 return c == ' ' || (c >= 0x09 && c <= 0x0D); 345 } 346 347 /// 348 @safe pure nothrow @nogc unittest 349 { 350 assert( isWhite(' ')); 351 assert( isWhite('\t')); 352 assert( isWhite('\n')); 353 assert(!isWhite('1')); 354 assert(!isWhite('a')); 355 assert(!isWhite('#')); 356 357 // N.B.: Does not return true for non-ASCII Unicode whitespace characters. 358 static import std.uni; 359 assert(std.uni.isWhite('\u00A0')); 360 assert(!isWhite('\u00A0')); // std.ascii.isWhite 361 } 362 363 @safe unittest 364 { 365 foreach (c; whitespace) 366 assert(isWhite(c)); 367 368 foreach (c; chain(digits, letters)) 369 assert(!isWhite(c)); 370 } 371 372 373 /++ 374 Params: c = The character to test. 375 Returns: Whether $(D c) is a control character. 376 +/ 377 bool isControl(dchar c) @safe pure nothrow @nogc 378 { 379 return c < 0x20 || c == 0x7F; 380 } 381 382 /// 383 @safe pure nothrow @nogc unittest 384 { 385 assert( isControl('\0')); 386 assert( isControl('\022')); 387 assert( isControl('\n')); // newline is both whitespace and control 388 assert(!isControl(' ')); 389 assert(!isControl('1')); 390 assert(!isControl('a')); 391 assert(!isControl('#')); 392 393 // N.B.: non-ASCII Unicode control characters are not recognized: 394 assert(!isControl('\u0080')); 395 assert(!isControl('\u2028')); 396 assert(!isControl('\u2029')); 397 } 398 399 @safe unittest 400 { 401 foreach (dchar c; 0 .. 32) 402 assert(isControl(c)); 403 assert(isControl(127)); 404 405 foreach (c; chain(digits, letters, [' '])) 406 assert(!isControl(c)); 407 } 408 409 410 /++ 411 Params: c = The character to test. 412 Returns: Whether or not $(D c) is a punctuation character. That includes 413 all ASCII characters which are not control characters, letters, digits, or 414 whitespace. 415 +/ 416 bool isPunctuation(dchar c) @safe pure nothrow @nogc 417 { 418 return c <= '~' && c >= '!' && !isAlphaNum(c); 419 } 420 421 /// 422 @safe pure nothrow @nogc unittest 423 { 424 assert( isPunctuation('.')); 425 assert( isPunctuation(',')); 426 assert( isPunctuation(':')); 427 assert( isPunctuation('!')); 428 assert( isPunctuation('#')); 429 assert( isPunctuation('~')); 430 assert( isPunctuation('+')); 431 assert( isPunctuation('_')); 432 433 assert(!isPunctuation('1')); 434 assert(!isPunctuation('a')); 435 assert(!isPunctuation(' ')); 436 assert(!isPunctuation('\n')); 437 assert(!isPunctuation('\0')); 438 439 // N.B.: Non-ASCII Unicode punctuation characters are not recognized. 440 assert(!isPunctuation('\u2012')); // (U+2012 = en-dash) 441 } 442 443 @safe unittest 444 { 445 foreach (dchar c; 0 .. 128) 446 { 447 if (isControl(c) || isAlphaNum(c) || c == ' ') 448 assert(!isPunctuation(c)); 449 else 450 assert(isPunctuation(c)); 451 } 452 } 453 454 455 /++ 456 Params: c = The character to test. 457 Returns: Whether or not $(D c) is a printable character other than the 458 space character. 459 +/ 460 bool isGraphical(dchar c) @safe pure nothrow @nogc 461 { 462 return '!' <= c && c <= '~'; 463 } 464 465 /// 466 @safe pure nothrow @nogc unittest 467 { 468 assert( isGraphical('1')); 469 assert( isGraphical('a')); 470 assert( isGraphical('#')); 471 assert(!isGraphical(' ')); // whitespace is not graphical 472 assert(!isGraphical('\n')); 473 assert(!isGraphical('\0')); 474 475 // N.B.: Unicode graphical characters are not regarded as such. 476 assert(!isGraphical('á')); 477 } 478 479 @safe unittest 480 { 481 foreach (dchar c; 0 .. 128) 482 { 483 if (isControl(c) || c == ' ') 484 assert(!isGraphical(c)); 485 else 486 assert(isGraphical(c)); 487 } 488 } 489 490 491 /++ 492 Params: c = The character to test. 493 Returns: Whether or not $(D c) is a printable character - including the 494 space character. 495 +/ 496 bool isPrintable(dchar c) @safe pure nothrow @nogc 497 { 498 return c >= ' ' && c <= '~'; 499 } 500 501 /// 502 @safe pure nothrow @nogc unittest 503 { 504 assert( isPrintable(' ')); // whitespace is printable 505 assert( isPrintable('1')); 506 assert( isPrintable('a')); 507 assert( isPrintable('#')); 508 assert(!isPrintable('\0')); // control characters are not printable 509 510 // N.B.: Printable non-ASCII Unicode characters are not recognized. 511 assert(!isPrintable('á')); 512 } 513 514 @safe unittest 515 { 516 foreach (dchar c; 0 .. 128) 517 { 518 if (isControl(c)) 519 assert(!isPrintable(c)); 520 else 521 assert(isPrintable(c)); 522 } 523 } 524 525 526 /++ 527 Params: c = The character to test. 528 Returns: Whether or not $(D c) is in the ASCII character set - i.e. in the 529 range 0 .. 0x7F. 530 +/ 531 pragma(inline, true) 532 bool isASCII(dchar c) @safe pure nothrow @nogc 533 { 534 return c <= 0x7F; 535 } 536 537 /// 538 @safe pure nothrow @nogc unittest 539 { 540 assert( isASCII('a')); 541 assert(!isASCII('á')); 542 } 543 544 @safe unittest 545 { 546 foreach (dchar c; 0 .. 128) 547 assert(isASCII(c)); 548 549 assert(!isASCII(128)); 550 } 551 552 553 /++ 554 Converts an ASCII letter to lowercase. 555 556 Params: c = A character of any type that implicitly converts to $(D dchar). 557 In the case where it's a built-in type, or an enum of a built-in type, 558 $(D Unqual!(OriginalType!C)) is returned, whereas if it's a user-defined 559 type, $(D dchar) is returned. 560 561 Returns: The corresponding lowercase letter, if $(D c) is an uppercase 562 ASCII character, otherwise $(D c) itself. 563 +/ 564 auto toLower(C)(C c) 565 if (is(C : dchar)) 566 { 567 import std.traits : isAggregateType, OriginalType, Unqual; 568 569 alias OC = OriginalType!C; 570 static if (isAggregateType!OC) 571 alias R = dchar; 572 else 573 alias R = Unqual!OC; 574 575 return isUpper(c) ? cast(R)(cast(R) c + 'a' - 'A') : cast(R) c; 576 } 577 578 /// 579 @safe pure nothrow @nogc unittest 580 { 581 assert(toLower('a') == 'a'); 582 assert(toLower('A') == 'a'); 583 assert(toLower('#') == '#'); 584 585 // N.B.: Non-ASCII Unicode uppercase letters are not converted. 586 assert(toLower('Á') == 'Á'); 587 } 588 589 @safe pure nothrow unittest 590 { 591 592 foreach (C; AliasSeq!(char, wchar, dchar, immutable char, ubyte)) 593 { 594 foreach (i, c; uppercase) 595 assert(toLower(cast(C) c) == lowercase[i]); 596 597 foreach (C c; 0 .. 128) 598 { 599 if (c < 'A' || c > 'Z') 600 assert(toLower(c) == c); 601 else 602 assert(toLower(c) != c); 603 } 604 605 foreach (C c; 128 .. C.max) 606 assert(toLower(c) == c); 607 608 //CTFE 609 static assert(toLower(cast(C)'a') == 'a'); 610 static assert(toLower(cast(C)'A') == 'a'); 611 } 612 } 613 614 615 /++ 616 Converts an ASCII letter to uppercase. 617 618 Params: c = Any type which implicitly converts to $(D dchar). In the case 619 where it's a built-in type, or an enum of a built-in type, 620 $(D Unqual!(OriginalType!C)) is returned, whereas if it's a user-defined 621 type, $(D dchar) is returned. 622 623 Returns: The corresponding uppercase letter, if $(D c) is a lowercase ASCII 624 character, otherwise $(D c) itself. 625 +/ 626 auto toUpper(C)(C c) 627 if (is(C : dchar)) 628 { 629 import std.traits : isAggregateType, OriginalType, Unqual; 630 631 alias OC = OriginalType!C; 632 static if (isAggregateType!OC) 633 alias R = dchar; 634 else 635 alias R = Unqual!OC; 636 637 return isLower(c) ? cast(R)(cast(R) c - ('a' - 'A')) : cast(R) c; 638 } 639 640 /// 641 @safe pure nothrow @nogc unittest 642 { 643 assert(toUpper('a') == 'A'); 644 assert(toUpper('A') == 'A'); 645 assert(toUpper('#') == '#'); 646 647 // N.B.: Non-ASCII Unicode lowercase letters are not converted. 648 assert(toUpper('á') == 'á'); 649 } 650 651 @safe pure nothrow unittest 652 { 653 foreach (C; AliasSeq!(char, wchar, dchar, immutable char, ubyte)) 654 { 655 foreach (i, c; lowercase) 656 assert(toUpper(cast(C) c) == uppercase[i]); 657 658 foreach (C c; 0 .. 128) 659 { 660 if (c < 'a' || c > 'z') 661 assert(toUpper(c) == c); 662 else 663 assert(toUpper(c) != c); 664 } 665 666 foreach (C c; 128 .. C.max) 667 assert(toUpper(c) == c); 668 669 //CTFE 670 static assert(toUpper(cast(C)'a') == 'A'); 671 static assert(toUpper(cast(C)'A') == 'A'); 672 } 673 } 674 675 676 @safe unittest //Test both toUpper and toLower with non-builtin 677 { 678 //User Defined [Char|Wchar|Dchar] 679 static struct UDC { char c; alias c this; } 680 static struct UDW { wchar c; alias c this; } 681 static struct UDD { dchar c; alias c this; } 682 //[Char|Wchar|Dchar] Enum 683 enum CE : char {a = 'a', A = 'A'} 684 enum WE : wchar {a = 'a', A = 'A'} 685 enum DE : dchar {a = 'a', A = 'A'} 686 //User Defined [Char|Wchar|Dchar] Enum 687 enum UDCE : UDC {a = UDC('a'), A = UDC('A')} 688 enum UDWE : UDW {a = UDW('a'), A = UDW('A')} 689 enum UDDE : UDD {a = UDD('a'), A = UDD('A')} 690 691 //User defined types with implicit cast to dchar test. 692 foreach (Char; AliasSeq!(UDC, UDW, UDD)) 693 { 694 assert(toLower(Char('a')) == 'a'); 695 assert(toLower(Char('A')) == 'a'); 696 static assert(toLower(Char('a')) == 'a'); 697 static assert(toLower(Char('A')) == 'a'); 698 static assert(toUpper(Char('a')) == 'A'); 699 static assert(toUpper(Char('A')) == 'A'); 700 } 701 702 //Various enum tests. 703 foreach (Enum; AliasSeq!(CE, WE, DE, UDCE, UDWE, UDDE)) 704 { 705 assert(toLower(Enum.a) == 'a'); 706 assert(toLower(Enum.A) == 'a'); 707 assert(toUpper(Enum.a) == 'A'); 708 assert(toUpper(Enum.A) == 'A'); 709 static assert(toLower(Enum.a) == 'a'); 710 static assert(toLower(Enum.A) == 'a'); 711 static assert(toUpper(Enum.a) == 'A'); 712 static assert(toUpper(Enum.A) == 'A'); 713 } 714 715 //Return value type tests for enum of non-UDT. These should be the original type. 716 foreach (T; AliasSeq!(CE, WE, DE)) 717 { 718 alias C = OriginalType!T; 719 static assert(is(typeof(toLower(T.init)) == C)); 720 static assert(is(typeof(toUpper(T.init)) == C)); 721 } 722 723 //Return value tests for UDT and enum of UDT. These should be dchar 724 foreach (T; AliasSeq!(UDC, UDW, UDD, UDCE, UDWE, UDDE)) 725 { 726 static assert(is(typeof(toLower(T.init)) == dchar)); 727 static assert(is(typeof(toUpper(T.init)) == dchar)); 728 } 729 } 730