1 //===- unittests/Basic/CharInfoTest.cpp -- ASCII classification tests -----===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #include "clang/Basic/CharInfo.h" 10 #include "gtest/gtest.h" 11 12 using namespace llvm; 13 using namespace clang; 14 15 // Check that the CharInfo table has been constructed reasonably. 16 TEST(CharInfoTest, validateInfoTable) { 17 using namespace charinfo; 18 EXPECT_EQ((unsigned)CHAR_SPACE, InfoTable[(unsigned)' ']); 19 EXPECT_EQ((unsigned)CHAR_HORZ_WS, InfoTable[(unsigned)'\t']); 20 EXPECT_EQ((unsigned)CHAR_HORZ_WS, InfoTable[(unsigned)'\f']); // ?? 21 EXPECT_EQ((unsigned)CHAR_HORZ_WS, InfoTable[(unsigned)'\v']); // ?? 22 EXPECT_EQ((unsigned)CHAR_VERT_WS, InfoTable[(unsigned)'\n']); 23 EXPECT_EQ((unsigned)CHAR_VERT_WS, InfoTable[(unsigned)'\r']); 24 EXPECT_EQ((unsigned)CHAR_UNDER, InfoTable[(unsigned)'_']); 25 EXPECT_EQ((unsigned)CHAR_PERIOD, InfoTable[(unsigned)'.']); 26 27 for (unsigned i = 'a'; i <= 'f'; ++i) { 28 EXPECT_EQ((unsigned)CHAR_XLOWER, InfoTable[i]); 29 EXPECT_EQ((unsigned)CHAR_XUPPER, InfoTable[i+'A'-'a']); 30 } 31 32 for (unsigned i = 'g'; i <= 'z'; ++i) { 33 EXPECT_EQ((unsigned)CHAR_LOWER, InfoTable[i]); 34 EXPECT_EQ((unsigned)CHAR_UPPER, InfoTable[i+'A'-'a']); 35 } 36 37 for (unsigned i = '0'; i <= '9'; ++i) 38 EXPECT_EQ((unsigned)CHAR_DIGIT, InfoTable[i]); 39 } 40 41 // Check various predicates. 42 TEST(CharInfoTest, isASCII) { 43 EXPECT_TRUE(isASCII('\0')); 44 EXPECT_TRUE(isASCII('\n')); 45 EXPECT_TRUE(isASCII(' ')); 46 EXPECT_TRUE(isASCII('a')); 47 EXPECT_TRUE(isASCII('\x7f')); 48 EXPECT_FALSE(isASCII('\x80')); 49 EXPECT_FALSE(isASCII('\xc2')); 50 EXPECT_FALSE(isASCII('\xff')); 51 } 52 53 TEST(CharInfoTest, isIdentifierHead) { 54 EXPECT_TRUE(isIdentifierHead('a')); 55 EXPECT_TRUE(isIdentifierHead('A')); 56 EXPECT_TRUE(isIdentifierHead('z')); 57 EXPECT_TRUE(isIdentifierHead('Z')); 58 EXPECT_TRUE(isIdentifierHead('_')); 59 60 EXPECT_FALSE(isIdentifierHead('0')); 61 EXPECT_FALSE(isIdentifierHead('.')); 62 EXPECT_FALSE(isIdentifierHead('`')); 63 EXPECT_FALSE(isIdentifierHead('\0')); 64 65 EXPECT_FALSE(isIdentifierHead('$')); 66 EXPECT_TRUE(isIdentifierHead('$', /*AllowDollar=*/true)); 67 68 EXPECT_FALSE(isIdentifierHead('\x80')); 69 EXPECT_FALSE(isIdentifierHead('\xc2')); 70 EXPECT_FALSE(isIdentifierHead('\xff')); 71 } 72 73 TEST(CharInfoTest, isIdentifierBody) { 74 EXPECT_TRUE(isIdentifierBody('a')); 75 EXPECT_TRUE(isIdentifierBody('A')); 76 EXPECT_TRUE(isIdentifierBody('z')); 77 EXPECT_TRUE(isIdentifierBody('Z')); 78 EXPECT_TRUE(isIdentifierBody('_')); 79 80 EXPECT_TRUE(isIdentifierBody('0')); 81 EXPECT_FALSE(isIdentifierBody('.')); 82 EXPECT_FALSE(isIdentifierBody('`')); 83 EXPECT_FALSE(isIdentifierBody('\0')); 84 85 EXPECT_FALSE(isIdentifierBody('$')); 86 EXPECT_TRUE(isIdentifierBody('$', /*AllowDollar=*/true)); 87 88 EXPECT_FALSE(isIdentifierBody('\x80')); 89 EXPECT_FALSE(isIdentifierBody('\xc2')); 90 EXPECT_FALSE(isIdentifierBody('\xff')); 91 } 92 93 TEST(CharInfoTest, isHorizontalWhitespace) { 94 EXPECT_FALSE(isHorizontalWhitespace('a')); 95 EXPECT_FALSE(isHorizontalWhitespace('_')); 96 EXPECT_FALSE(isHorizontalWhitespace('0')); 97 EXPECT_FALSE(isHorizontalWhitespace('.')); 98 EXPECT_FALSE(isHorizontalWhitespace('`')); 99 EXPECT_FALSE(isHorizontalWhitespace('\0')); 100 EXPECT_FALSE(isHorizontalWhitespace('\x7f')); 101 102 EXPECT_TRUE(isHorizontalWhitespace(' ')); 103 EXPECT_TRUE(isHorizontalWhitespace('\t')); 104 EXPECT_TRUE(isHorizontalWhitespace('\f')); // ?? 105 EXPECT_TRUE(isHorizontalWhitespace('\v')); // ?? 106 107 EXPECT_FALSE(isHorizontalWhitespace('\n')); 108 EXPECT_FALSE(isHorizontalWhitespace('\r')); 109 110 EXPECT_FALSE(isHorizontalWhitespace('\x80')); 111 EXPECT_FALSE(isHorizontalWhitespace('\xc2')); 112 EXPECT_FALSE(isHorizontalWhitespace('\xff')); 113 } 114 115 TEST(CharInfoTest, isVerticalWhitespace) { 116 EXPECT_FALSE(isVerticalWhitespace('a')); 117 EXPECT_FALSE(isVerticalWhitespace('_')); 118 EXPECT_FALSE(isVerticalWhitespace('0')); 119 EXPECT_FALSE(isVerticalWhitespace('.')); 120 EXPECT_FALSE(isVerticalWhitespace('`')); 121 EXPECT_FALSE(isVerticalWhitespace('\0')); 122 EXPECT_FALSE(isVerticalWhitespace('\x7f')); 123 124 EXPECT_FALSE(isVerticalWhitespace(' ')); 125 EXPECT_FALSE(isVerticalWhitespace('\t')); 126 EXPECT_FALSE(isVerticalWhitespace('\f')); // ?? 127 EXPECT_FALSE(isVerticalWhitespace('\v')); // ?? 128 129 EXPECT_TRUE(isVerticalWhitespace('\n')); 130 EXPECT_TRUE(isVerticalWhitespace('\r')); 131 132 EXPECT_FALSE(isVerticalWhitespace('\x80')); 133 EXPECT_FALSE(isVerticalWhitespace('\xc2')); 134 EXPECT_FALSE(isVerticalWhitespace('\xff')); 135 } 136 137 TEST(CharInfoTest, isWhitespace) { 138 EXPECT_FALSE(isWhitespace('a')); 139 EXPECT_FALSE(isWhitespace('_')); 140 EXPECT_FALSE(isWhitespace('0')); 141 EXPECT_FALSE(isWhitespace('.')); 142 EXPECT_FALSE(isWhitespace('`')); 143 EXPECT_FALSE(isWhitespace('\0')); 144 EXPECT_FALSE(isWhitespace('\x7f')); 145 146 EXPECT_TRUE(isWhitespace(' ')); 147 EXPECT_TRUE(isWhitespace('\t')); 148 EXPECT_TRUE(isWhitespace('\f')); 149 EXPECT_TRUE(isWhitespace('\v')); 150 151 EXPECT_TRUE(isWhitespace('\n')); 152 EXPECT_TRUE(isWhitespace('\r')); 153 154 EXPECT_FALSE(isWhitespace('\x80')); 155 EXPECT_FALSE(isWhitespace('\xc2')); 156 EXPECT_FALSE(isWhitespace('\xff')); 157 } 158 159 TEST(CharInfoTest, isDigit) { 160 EXPECT_TRUE(isDigit('0')); 161 EXPECT_TRUE(isDigit('9')); 162 163 EXPECT_FALSE(isDigit('a')); 164 EXPECT_FALSE(isDigit('A')); 165 166 EXPECT_FALSE(isDigit('z')); 167 EXPECT_FALSE(isDigit('Z')); 168 169 EXPECT_FALSE(isDigit('.')); 170 EXPECT_FALSE(isDigit('_')); 171 172 EXPECT_FALSE(isDigit('/')); 173 EXPECT_FALSE(isDigit('\0')); 174 175 EXPECT_FALSE(isDigit('\x80')); 176 EXPECT_FALSE(isDigit('\xc2')); 177 EXPECT_FALSE(isDigit('\xff')); 178 } 179 180 TEST(CharInfoTest, isHexDigit) { 181 EXPECT_TRUE(isHexDigit('0')); 182 EXPECT_TRUE(isHexDigit('9')); 183 184 EXPECT_TRUE(isHexDigit('a')); 185 EXPECT_TRUE(isHexDigit('A')); 186 187 EXPECT_FALSE(isHexDigit('z')); 188 EXPECT_FALSE(isHexDigit('Z')); 189 190 EXPECT_FALSE(isHexDigit('.')); 191 EXPECT_FALSE(isHexDigit('_')); 192 193 EXPECT_FALSE(isHexDigit('/')); 194 EXPECT_FALSE(isHexDigit('\0')); 195 196 EXPECT_FALSE(isHexDigit('\x80')); 197 EXPECT_FALSE(isHexDigit('\xc2')); 198 EXPECT_FALSE(isHexDigit('\xff')); 199 } 200 201 TEST(CharInfoTest, isLetter) { 202 EXPECT_FALSE(isLetter('0')); 203 EXPECT_FALSE(isLetter('9')); 204 205 EXPECT_TRUE(isLetter('a')); 206 EXPECT_TRUE(isLetter('A')); 207 208 EXPECT_TRUE(isLetter('z')); 209 EXPECT_TRUE(isLetter('Z')); 210 211 EXPECT_FALSE(isLetter('.')); 212 EXPECT_FALSE(isLetter('_')); 213 214 EXPECT_FALSE(isLetter('/')); 215 EXPECT_FALSE(isLetter('(')); 216 EXPECT_FALSE(isLetter('\0')); 217 218 EXPECT_FALSE(isLetter('\x80')); 219 EXPECT_FALSE(isLetter('\xc2')); 220 EXPECT_FALSE(isLetter('\xff')); 221 } 222 223 TEST(CharInfoTest, isLowercase) { 224 EXPECT_FALSE(isLowercase('0')); 225 EXPECT_FALSE(isLowercase('9')); 226 227 EXPECT_TRUE(isLowercase('a')); 228 EXPECT_FALSE(isLowercase('A')); 229 230 EXPECT_TRUE(isLowercase('z')); 231 EXPECT_FALSE(isLowercase('Z')); 232 233 EXPECT_FALSE(isLowercase('.')); 234 EXPECT_FALSE(isLowercase('_')); 235 236 EXPECT_FALSE(isLowercase('/')); 237 EXPECT_FALSE(isLowercase('(')); 238 EXPECT_FALSE(isLowercase('\0')); 239 240 EXPECT_FALSE(isLowercase('\x80')); 241 EXPECT_FALSE(isLowercase('\xc2')); 242 EXPECT_FALSE(isLowercase('\xff')); 243 } 244 245 TEST(CharInfoTest, isUppercase) { 246 EXPECT_FALSE(isUppercase('0')); 247 EXPECT_FALSE(isUppercase('9')); 248 249 EXPECT_FALSE(isUppercase('a')); 250 EXPECT_TRUE(isUppercase('A')); 251 252 EXPECT_FALSE(isUppercase('z')); 253 EXPECT_TRUE(isUppercase('Z')); 254 255 EXPECT_FALSE(isUppercase('.')); 256 EXPECT_FALSE(isUppercase('_')); 257 258 EXPECT_FALSE(isUppercase('/')); 259 EXPECT_FALSE(isUppercase('(')); 260 EXPECT_FALSE(isUppercase('\0')); 261 262 EXPECT_FALSE(isUppercase('\x80')); 263 EXPECT_FALSE(isUppercase('\xc2')); 264 EXPECT_FALSE(isUppercase('\xff')); 265 } 266 267 TEST(CharInfoTest, isAlphanumeric) { 268 EXPECT_TRUE(isAlphanumeric('0')); 269 EXPECT_TRUE(isAlphanumeric('9')); 270 271 EXPECT_TRUE(isAlphanumeric('a')); 272 EXPECT_TRUE(isAlphanumeric('A')); 273 274 EXPECT_TRUE(isAlphanumeric('z')); 275 EXPECT_TRUE(isAlphanumeric('Z')); 276 277 EXPECT_FALSE(isAlphanumeric('.')); 278 EXPECT_FALSE(isAlphanumeric('_')); 279 280 EXPECT_FALSE(isAlphanumeric('/')); 281 EXPECT_FALSE(isAlphanumeric('(')); 282 EXPECT_FALSE(isAlphanumeric('\0')); 283 284 EXPECT_FALSE(isAlphanumeric('\x80')); 285 EXPECT_FALSE(isAlphanumeric('\xc2')); 286 EXPECT_FALSE(isAlphanumeric('\xff')); 287 } 288 289 TEST(CharInfoTest, isPunctuation) { 290 EXPECT_FALSE(isPunctuation('0')); 291 EXPECT_FALSE(isPunctuation('9')); 292 293 EXPECT_FALSE(isPunctuation('a')); 294 EXPECT_FALSE(isPunctuation('A')); 295 296 EXPECT_FALSE(isPunctuation('z')); 297 EXPECT_FALSE(isPunctuation('Z')); 298 299 EXPECT_TRUE(isPunctuation('.')); 300 EXPECT_TRUE(isPunctuation('_')); 301 302 EXPECT_TRUE(isPunctuation('/')); 303 EXPECT_TRUE(isPunctuation('(')); 304 305 EXPECT_FALSE(isPunctuation(' ')); 306 EXPECT_FALSE(isPunctuation('\n')); 307 EXPECT_FALSE(isPunctuation('\0')); 308 309 EXPECT_FALSE(isPunctuation('\x80')); 310 EXPECT_FALSE(isPunctuation('\xc2')); 311 EXPECT_FALSE(isPunctuation('\xff')); 312 } 313 314 TEST(CharInfoTest, isPrintable) { 315 EXPECT_TRUE(isPrintable('0')); 316 EXPECT_TRUE(isPrintable('9')); 317 318 EXPECT_TRUE(isPrintable('a')); 319 EXPECT_TRUE(isPrintable('A')); 320 321 EXPECT_TRUE(isPrintable('z')); 322 EXPECT_TRUE(isPrintable('Z')); 323 324 EXPECT_TRUE(isPrintable('.')); 325 EXPECT_TRUE(isPrintable('_')); 326 327 EXPECT_TRUE(isPrintable('/')); 328 EXPECT_TRUE(isPrintable('(')); 329 330 EXPECT_TRUE(isPrintable(' ')); 331 EXPECT_FALSE(isPrintable('\t')); 332 EXPECT_FALSE(isPrintable('\n')); 333 EXPECT_FALSE(isPrintable('\0')); 334 335 EXPECT_FALSE(isPrintable('\x80')); 336 EXPECT_FALSE(isPrintable('\xc2')); 337 EXPECT_FALSE(isPrintable('\xff')); 338 } 339 340 TEST(CharInfoTest, isPreprocessingNumberBody) { 341 EXPECT_TRUE(isPreprocessingNumberBody('0')); 342 EXPECT_TRUE(isPreprocessingNumberBody('9')); 343 344 EXPECT_TRUE(isPreprocessingNumberBody('a')); 345 EXPECT_TRUE(isPreprocessingNumberBody('A')); 346 347 EXPECT_TRUE(isPreprocessingNumberBody('z')); 348 EXPECT_TRUE(isPreprocessingNumberBody('Z')); 349 EXPECT_TRUE(isPreprocessingNumberBody('.')); 350 EXPECT_TRUE(isPreprocessingNumberBody('_')); 351 352 EXPECT_FALSE(isPreprocessingNumberBody('/')); 353 EXPECT_FALSE(isPreprocessingNumberBody('(')); 354 EXPECT_FALSE(isPreprocessingNumberBody('\0')); 355 356 EXPECT_FALSE(isPreprocessingNumberBody('\x80')); 357 EXPECT_FALSE(isPreprocessingNumberBody('\xc2')); 358 EXPECT_FALSE(isPreprocessingNumberBody('\xff')); 359 } 360 361 TEST(CharInfoTest, isRawStringDelimBody) { 362 EXPECT_TRUE(isRawStringDelimBody('0')); 363 EXPECT_TRUE(isRawStringDelimBody('9')); 364 365 EXPECT_TRUE(isRawStringDelimBody('a')); 366 EXPECT_TRUE(isRawStringDelimBody('A')); 367 368 EXPECT_TRUE(isRawStringDelimBody('z')); 369 EXPECT_TRUE(isRawStringDelimBody('Z')); 370 EXPECT_TRUE(isRawStringDelimBody('.')); 371 EXPECT_TRUE(isRawStringDelimBody('_')); 372 373 EXPECT_TRUE(isRawStringDelimBody('/')); 374 EXPECT_FALSE(isRawStringDelimBody('(')); 375 EXPECT_FALSE(isRawStringDelimBody('\0')); 376 377 EXPECT_FALSE(isRawStringDelimBody('\x80')); 378 EXPECT_FALSE(isRawStringDelimBody('\xc2')); 379 EXPECT_FALSE(isRawStringDelimBody('\xff')); 380 } 381 382 TEST(CharInfoTest, toLowercase) { 383 EXPECT_EQ('0', toLowercase('0')); 384 EXPECT_EQ('9', toLowercase('9')); 385 386 EXPECT_EQ('a', toLowercase('a')); 387 EXPECT_EQ('a', toLowercase('A')); 388 389 EXPECT_EQ('z', toLowercase('z')); 390 EXPECT_EQ('z', toLowercase('Z')); 391 392 EXPECT_EQ('.', toLowercase('.')); 393 EXPECT_EQ('_', toLowercase('_')); 394 395 EXPECT_EQ('/', toLowercase('/')); 396 EXPECT_EQ('\0', toLowercase('\0')); 397 } 398 399 TEST(CharInfoTest, toUppercase) { 400 EXPECT_EQ('0', toUppercase('0')); 401 EXPECT_EQ('9', toUppercase('9')); 402 403 EXPECT_EQ('A', toUppercase('a')); 404 EXPECT_EQ('A', toUppercase('A')); 405 406 EXPECT_EQ('Z', toUppercase('z')); 407 EXPECT_EQ('Z', toUppercase('Z')); 408 409 EXPECT_EQ('.', toUppercase('.')); 410 EXPECT_EQ('_', toUppercase('_')); 411 412 EXPECT_EQ('/', toUppercase('/')); 413 EXPECT_EQ('\0', toUppercase('\0')); 414 } 415 416 TEST(CharInfoTest, isValidIdentifier) { 417 EXPECT_FALSE(isValidIdentifier("")); 418 419 // 1 character 420 EXPECT_FALSE(isValidIdentifier(".")); 421 EXPECT_FALSE(isValidIdentifier("\n")); 422 EXPECT_FALSE(isValidIdentifier(" ")); 423 EXPECT_FALSE(isValidIdentifier("\x80")); 424 EXPECT_FALSE(isValidIdentifier("\xc2")); 425 EXPECT_FALSE(isValidIdentifier("\xff")); 426 EXPECT_FALSE(isValidIdentifier("$")); 427 EXPECT_FALSE(isValidIdentifier("1")); 428 429 EXPECT_TRUE(isValidIdentifier("_")); 430 EXPECT_TRUE(isValidIdentifier("a")); 431 EXPECT_TRUE(isValidIdentifier("z")); 432 EXPECT_TRUE(isValidIdentifier("A")); 433 EXPECT_TRUE(isValidIdentifier("Z")); 434 EXPECT_TRUE(isValidIdentifier("$", /*AllowDollar=*/true)); 435 436 // 2 characters, '_' suffix 437 EXPECT_FALSE(isValidIdentifier("._")); 438 EXPECT_FALSE(isValidIdentifier("\n_")); 439 EXPECT_FALSE(isValidIdentifier(" _")); 440 EXPECT_FALSE(isValidIdentifier("\x80_")); 441 EXPECT_FALSE(isValidIdentifier("\xc2_")); 442 EXPECT_FALSE(isValidIdentifier("\xff_")); 443 EXPECT_FALSE(isValidIdentifier("$_")); 444 EXPECT_FALSE(isValidIdentifier("1_")); 445 446 EXPECT_TRUE(isValidIdentifier("__")); 447 EXPECT_TRUE(isValidIdentifier("a_")); 448 EXPECT_TRUE(isValidIdentifier("z_")); 449 EXPECT_TRUE(isValidIdentifier("A_")); 450 EXPECT_TRUE(isValidIdentifier("Z_")); 451 EXPECT_TRUE(isValidIdentifier("$_", /*AllowDollar=*/true)); 452 453 // 2 characters, '_' prefix 454 EXPECT_FALSE(isValidIdentifier("_.")); 455 EXPECT_FALSE(isValidIdentifier("_\n")); 456 EXPECT_FALSE(isValidIdentifier("_ ")); 457 EXPECT_FALSE(isValidIdentifier("_\x80")); 458 EXPECT_FALSE(isValidIdentifier("_\xc2")); 459 EXPECT_FALSE(isValidIdentifier("_\xff")); 460 EXPECT_FALSE(isValidIdentifier("_$")); 461 EXPECT_TRUE(isValidIdentifier("_1")); 462 463 EXPECT_TRUE(isValidIdentifier("__")); 464 EXPECT_TRUE(isValidIdentifier("_a")); 465 EXPECT_TRUE(isValidIdentifier("_z")); 466 EXPECT_TRUE(isValidIdentifier("_A")); 467 EXPECT_TRUE(isValidIdentifier("_Z")); 468 EXPECT_TRUE(isValidIdentifier("_$", /*AllowDollar=*/true)); 469 470 // 3 characters, '__' prefix 471 EXPECT_FALSE(isValidIdentifier("__.")); 472 EXPECT_FALSE(isValidIdentifier("__\n")); 473 EXPECT_FALSE(isValidIdentifier("__ ")); 474 EXPECT_FALSE(isValidIdentifier("__\x80")); 475 EXPECT_FALSE(isValidIdentifier("__\xc2")); 476 EXPECT_FALSE(isValidIdentifier("__\xff")); 477 EXPECT_FALSE(isValidIdentifier("__$")); 478 EXPECT_TRUE(isValidIdentifier("__1")); 479 480 EXPECT_TRUE(isValidIdentifier("___")); 481 EXPECT_TRUE(isValidIdentifier("__a")); 482 EXPECT_TRUE(isValidIdentifier("__z")); 483 EXPECT_TRUE(isValidIdentifier("__A")); 484 EXPECT_TRUE(isValidIdentifier("__Z")); 485 EXPECT_TRUE(isValidIdentifier("__$", /*AllowDollar=*/true)); 486 487 // 3 characters, '_' prefix and suffix 488 EXPECT_FALSE(isValidIdentifier("_._")); 489 EXPECT_FALSE(isValidIdentifier("_\n_")); 490 EXPECT_FALSE(isValidIdentifier("_ _")); 491 EXPECT_FALSE(isValidIdentifier("_\x80_")); 492 EXPECT_FALSE(isValidIdentifier("_\xc2_")); 493 EXPECT_FALSE(isValidIdentifier("_\xff_")); 494 EXPECT_FALSE(isValidIdentifier("_$_")); 495 EXPECT_TRUE(isValidIdentifier("_1_")); 496 497 EXPECT_TRUE(isValidIdentifier("___")); 498 EXPECT_TRUE(isValidIdentifier("_a_")); 499 EXPECT_TRUE(isValidIdentifier("_z_")); 500 EXPECT_TRUE(isValidIdentifier("_A_")); 501 EXPECT_TRUE(isValidIdentifier("_Z_")); 502 EXPECT_TRUE(isValidIdentifier("_$_", /*AllowDollar=*/true)); 503 } 504