1 /*- 2 * Copyright (c) 2003-2007 Tim Kientzle 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR(S) ``AS IS'' AND ANY EXPRESS OR 15 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 16 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 17 * IN NO EVENT SHALL THE AUTHOR(S) BE LIABLE FOR ANY DIRECT, INDIRECT, 18 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 19 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 20 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 21 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 22 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 23 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 24 */ 25 #include "test.h" 26 27 #include <locale.h> 28 29 /* 30 * Pax interchange is supposed to encode filenames into 31 * UTF-8. Of course, that's not always possible. This 32 * test is intended to verify that filenames always get 33 * stored and restored correctly, regardless of the encodings. 34 */ 35 36 /* 37 * Read a manually-created archive that has filenames that are 38 * stored in binary instead of UTF-8 and verify that we get 39 * the right filename returned and that we get a warning only 40 * if the header isn't marked as binary. 41 */ 42 static void 43 test_pax_filename_encoding_1(void) 44 { 45 static const char testname[] = "test_pax_filename_encoding.tar"; 46 /* 47 * \314\214 is a valid 2-byte UTF-8 sequence. 48 * \374 is invalid in UTF-8. 49 */ 50 char filename[] = "abc\314\214mno\374xyz"; 51 struct archive *a; 52 struct archive_entry *entry; 53 54 /* 55 * Read an archive that has non-UTF8 pax filenames in it. 56 */ 57 extract_reference_file(testname); 58 a = archive_read_new(); 59 assertEqualInt(ARCHIVE_OK, archive_read_support_format_tar(a)); 60 assertEqualInt(ARCHIVE_OK, archive_read_support_filter_all(a)); 61 assertEqualInt(ARCHIVE_OK, 62 archive_read_open_filename(a, testname, 10240)); 63 /* 64 * First entry in this test archive has an invalid UTF-8 sequence 65 * in it, but the header is not marked as hdrcharset=BINARY, so that 66 * requires a warning. 67 */ 68 failure("Invalid UTF8 in a pax archive pathname should cause a warning"); 69 assertEqualInt(ARCHIVE_WARN, archive_read_next_header(a, &entry)); 70 assertEqualString(filename, archive_entry_pathname(entry)); 71 /* 72 * Second entry is identical except that it does have 73 * hdrcharset=BINARY, so no warning should be generated. 74 */ 75 failure("A pathname with hdrcharset=BINARY can have invalid UTF8\n" 76 " characters in it without generating a warning"); 77 assertEqualInt(ARCHIVE_OK, archive_read_next_header(a, &entry)); 78 assertEqualString(filename, archive_entry_pathname(entry)); 79 archive_read_free(a); 80 } 81 82 /* 83 * Set the locale and write a pathname containing invalid characters. 84 * This should work; the underlying implementation should automatically 85 * fall back to storing the pathname in binary. 86 */ 87 static void 88 test_pax_filename_encoding_2(void) 89 { 90 char filename[] = "abc\314\214mno\374xyz"; 91 struct archive *a; 92 struct archive_entry *entry; 93 char buff[65536]; 94 char longname[] = "abc\314\214mno\374xyz" 95 "/abc\314\214mno\374xyz/abcdefghijklmnopqrstuvwxyz" 96 "/abc\314\214mno\374xyz/abcdefghijklmnopqrstuvwxyz" 97 "/abc\314\214mno\374xyz/abcdefghijklmnopqrstuvwxyz" 98 "/abc\314\214mno\374xyz/abcdefghijklmnopqrstuvwxyz" 99 "/abc\314\214mno\374xyz/abcdefghijklmnopqrstuvwxyz" 100 "/abc\314\214mno\374xyz/abcdefghijklmnopqrstuvwxyz" 101 ; 102 size_t used; 103 104 /* 105 * We need a starting locale which has invalid sequences. 106 * en_US.UTF-8 seems to be commonly supported. 107 */ 108 /* If it doesn't exist, just warn and return. */ 109 if (NULL == setlocale(LC_ALL, "en_US.UTF-8")) { 110 skipping("invalid encoding tests require a suitable locale;" 111 " en_US.UTF-8 not available on this system"); 112 return; 113 } 114 115 assert((a = archive_write_new()) != NULL); 116 assertEqualIntA(a, 0, archive_write_set_format_pax(a)); 117 assertEqualIntA(a, 0, archive_write_add_filter_none(a)); 118 assertEqualIntA(a, 0, archive_write_set_bytes_per_block(a, 0)); 119 assertEqualInt(0, 120 archive_write_open_memory(a, buff, sizeof(buff), &used)); 121 122 assert((entry = archive_entry_new()) != NULL); 123 /* Set pathname, gname, uname, hardlink to nonconvertible values. */ 124 archive_entry_copy_pathname(entry, filename); 125 archive_entry_copy_gname(entry, filename); 126 archive_entry_copy_uname(entry, filename); 127 archive_entry_copy_hardlink(entry, filename); 128 archive_entry_set_filetype(entry, AE_IFREG); 129 failure("This should generate a warning for nonconvertible names."); 130 assertEqualInt(ARCHIVE_WARN, archive_write_header(a, entry)); 131 archive_entry_free(entry); 132 133 assert((entry = archive_entry_new()) != NULL); 134 /* Set path, gname, uname, and symlink to nonconvertible values. */ 135 archive_entry_copy_pathname(entry, filename); 136 archive_entry_copy_gname(entry, filename); 137 archive_entry_copy_uname(entry, filename); 138 archive_entry_copy_symlink(entry, filename); 139 archive_entry_set_filetype(entry, AE_IFLNK); 140 failure("This should generate a warning for nonconvertible names."); 141 assertEqualInt(ARCHIVE_WARN, archive_write_header(a, entry)); 142 archive_entry_free(entry); 143 144 assert((entry = archive_entry_new()) != NULL); 145 /* Set pathname to a very long nonconvertible value. */ 146 archive_entry_copy_pathname(entry, longname); 147 archive_entry_set_filetype(entry, AE_IFREG); 148 failure("This should generate a warning for nonconvertible names."); 149 assertEqualInt(ARCHIVE_WARN, archive_write_header(a, entry)); 150 archive_entry_free(entry); 151 152 assertEqualIntA(a, ARCHIVE_OK, archive_write_close(a)); 153 assertEqualInt(ARCHIVE_OK, archive_write_free(a)); 154 155 /* 156 * Now read the entries back. 157 */ 158 159 assert((a = archive_read_new()) != NULL); 160 assertEqualInt(0, archive_read_support_format_tar(a)); 161 assertEqualInt(0, archive_read_open_memory(a, buff, used)); 162 163 assertEqualInt(0, archive_read_next_header(a, &entry)); 164 assertEqualString(filename, archive_entry_pathname(entry)); 165 assertEqualString(filename, archive_entry_gname(entry)); 166 assertEqualString(filename, archive_entry_uname(entry)); 167 assertEqualString(filename, archive_entry_hardlink(entry)); 168 169 assertEqualInt(0, archive_read_next_header(a, &entry)); 170 assertEqualString(filename, archive_entry_pathname(entry)); 171 assertEqualString(filename, archive_entry_gname(entry)); 172 assertEqualString(filename, archive_entry_uname(entry)); 173 assertEqualString(filename, archive_entry_symlink(entry)); 174 175 assertEqualInt(0, archive_read_next_header(a, &entry)); 176 assertEqualString(longname, archive_entry_pathname(entry)); 177 178 assertEqualIntA(a, ARCHIVE_OK, archive_read_close(a)); 179 assertEqualInt(ARCHIVE_OK, archive_read_free(a)); 180 } 181 182 #if 0 /* Disable this until Tim check out it. */ 183 184 /* 185 * Create an entry starting from a wide-character Unicode pathname, 186 * read it back into "C" locale, which doesn't support the name. 187 * TODO: Figure out the "right" behavior here. 188 */ 189 static void 190 test_pax_filename_encoding_3(void) 191 { 192 wchar_t badname[] = L"xxxAyyyBzzz"; 193 const char badname_utf8[] = "xxx\xE1\x88\xB4yyy\xE5\x99\xB8zzz"; 194 struct archive *a; 195 struct archive_entry *entry; 196 char buff[65536]; 197 size_t used; 198 199 badname[3] = 0x1234; 200 badname[7] = 0x5678; 201 202 /* If it doesn't exist, just warn and return. */ 203 if (NULL == setlocale(LC_ALL, "C")) { 204 skipping("Can't set \"C\" locale, so can't exercise " 205 "certain character-conversion failures"); 206 return; 207 } 208 209 /* If wctomb is broken, warn and return. */ 210 if (wctomb(buff, 0x1234) > 0) { 211 skipping("Cannot test conversion failures because \"C\" " 212 "locale on this system has no invalid characters."); 213 return; 214 } 215 216 /* If wctomb is broken, warn and return. */ 217 if (wctomb(buff, 0x1234) > 0) { 218 skipping("Cannot test conversion failures because \"C\" " 219 "locale on this system has no invalid characters."); 220 return; 221 } 222 223 /* Skip test if archive_entry_update_pathname_utf8() is broken. */ 224 /* In particular, this is currently broken on Win32 because 225 * setlocale() does not set the default encoding for CP_ACP. */ 226 entry = archive_entry_new(); 227 if (archive_entry_update_pathname_utf8(entry, badname_utf8)) { 228 archive_entry_free(entry); 229 skipping("Cannot test conversion failures."); 230 return; 231 } 232 archive_entry_free(entry); 233 234 assert((a = archive_write_new()) != NULL); 235 assertEqualIntA(a, 0, archive_write_set_format_pax(a)); 236 assertEqualIntA(a, 0, archive_write_add_filter_none(a)); 237 assertEqualIntA(a, 0, archive_write_set_bytes_per_block(a, 0)); 238 assertEqualInt(0, 239 archive_write_open_memory(a, buff, sizeof(buff), &used)); 240 241 assert((entry = archive_entry_new()) != NULL); 242 /* Set pathname to non-convertible wide value. */ 243 archive_entry_copy_pathname_w(entry, badname); 244 archive_entry_set_filetype(entry, AE_IFREG); 245 assertEqualInt(ARCHIVE_OK, archive_write_header(a, entry)); 246 archive_entry_free(entry); 247 248 assert((entry = archive_entry_new()) != NULL); 249 archive_entry_copy_pathname_w(entry, L"abc"); 250 /* Set gname to non-convertible wide value. */ 251 archive_entry_copy_gname_w(entry, badname); 252 archive_entry_set_filetype(entry, AE_IFREG); 253 assertEqualInt(ARCHIVE_OK, archive_write_header(a, entry)); 254 archive_entry_free(entry); 255 256 assert((entry = archive_entry_new()) != NULL); 257 archive_entry_copy_pathname_w(entry, L"abc"); 258 /* Set uname to non-convertible wide value. */ 259 archive_entry_copy_uname_w(entry, badname); 260 archive_entry_set_filetype(entry, AE_IFREG); 261 assertEqualInt(ARCHIVE_OK, archive_write_header(a, entry)); 262 archive_entry_free(entry); 263 264 assert((entry = archive_entry_new()) != NULL); 265 archive_entry_copy_pathname_w(entry, L"abc"); 266 /* Set hardlink to non-convertible wide value. */ 267 archive_entry_copy_hardlink_w(entry, badname); 268 archive_entry_set_filetype(entry, AE_IFREG); 269 assertEqualInt(ARCHIVE_OK, archive_write_header(a, entry)); 270 archive_entry_free(entry); 271 272 assert((entry = archive_entry_new()) != NULL); 273 archive_entry_copy_pathname_w(entry, L"abc"); 274 /* Set symlink to non-convertible wide value. */ 275 archive_entry_copy_symlink_w(entry, badname); 276 archive_entry_set_filetype(entry, AE_IFLNK); 277 assertEqualInt(ARCHIVE_OK, archive_write_header(a, entry)); 278 archive_entry_free(entry); 279 280 assertEqualIntA(a, ARCHIVE_OK, archive_write_close(a)); 281 assertEqualInt(ARCHIVE_OK, archive_write_free(a)); 282 283 /* 284 * Now read the entries back. 285 */ 286 287 assert((a = archive_read_new()) != NULL); 288 assertEqualInt(0, archive_read_support_format_tar(a)); 289 assertEqualInt(0, archive_read_open_memory(a, buff, used)); 290 291 failure("A non-convertible pathname should cause a warning."); 292 assertEqualInt(ARCHIVE_WARN, archive_read_next_header(a, &entry)); 293 assertEqualWString(badname, archive_entry_pathname_w(entry)); 294 failure("If native locale can't convert, we should get UTF-8 back."); 295 assertEqualString(badname_utf8, archive_entry_pathname(entry)); 296 297 failure("A non-convertible gname should cause a warning."); 298 assertEqualInt(ARCHIVE_WARN, archive_read_next_header(a, &entry)); 299 assertEqualWString(badname, archive_entry_gname_w(entry)); 300 failure("If native locale can't convert, we should get UTF-8 back."); 301 assertEqualString(badname_utf8, archive_entry_gname(entry)); 302 303 failure("A non-convertible uname should cause a warning."); 304 assertEqualInt(ARCHIVE_WARN, archive_read_next_header(a, &entry)); 305 assertEqualWString(badname, archive_entry_uname_w(entry)); 306 failure("If native locale can't convert, we should get UTF-8 back."); 307 assertEqualString(badname_utf8, archive_entry_uname(entry)); 308 309 failure("A non-convertible hardlink should cause a warning."); 310 assertEqualInt(ARCHIVE_WARN, archive_read_next_header(a, &entry)); 311 assertEqualWString(badname, archive_entry_hardlink_w(entry)); 312 failure("If native locale can't convert, we should get UTF-8 back."); 313 assertEqualString(badname_utf8, archive_entry_hardlink(entry)); 314 315 failure("A non-convertible symlink should cause a warning."); 316 assertEqualInt(ARCHIVE_WARN, archive_read_next_header(a, &entry)); 317 assertEqualWString(badname, archive_entry_symlink_w(entry)); 318 assertEqualWString(NULL, archive_entry_hardlink_w(entry)); 319 failure("If native locale can't convert, we should get UTF-8 back."); 320 assertEqualString(badname_utf8, archive_entry_symlink(entry)); 321 322 assertEqualInt(ARCHIVE_EOF, archive_read_next_header(a, &entry)); 323 324 assertEqualIntA(a, ARCHIVE_OK, archive_read_close(a)); 325 assertEqualInt(ARCHIVE_OK, archive_read_free(a)); 326 } 327 #else 328 static void 329 test_pax_filename_encoding_3(void) 330 { 331 } 332 #endif 333 334 /* 335 * Verify that KOI8-R filenames are correctly translated to Unicode and UTF-8. 336 */ 337 DEFINE_TEST(test_pax_filename_encoding_KOI8R) 338 { 339 struct archive *a; 340 struct archive_entry *entry; 341 char buff[4096]; 342 size_t used; 343 344 if (NULL == setlocale(LC_ALL, "ru_RU.KOI8-R")) { 345 skipping("KOI8-R locale not available on this system."); 346 return; 347 } 348 349 /* Check if the platform completely supports the string conversion. */ 350 a = archive_write_new(); 351 assertEqualInt(ARCHIVE_OK, archive_write_set_format_pax(a)); 352 if (archive_write_set_options(a, "hdrcharset=UTF-8") != ARCHIVE_OK) { 353 skipping("This system cannot convert character-set" 354 " from KOI8-R to UTF-8."); 355 archive_write_free(a); 356 return; 357 } 358 archive_write_free(a); 359 360 /* Re-create a write archive object since filenames should be written 361 * in UTF-8 by default. */ 362 a = archive_write_new(); 363 assertEqualInt(ARCHIVE_OK, archive_write_set_format_pax(a)); 364 assertEqualInt(ARCHIVE_OK, 365 archive_write_open_memory(a, buff, sizeof(buff), &used)); 366 367 entry = archive_entry_new2(a); 368 archive_entry_set_pathname(entry, "\xD0\xD2\xC9"); 369 archive_entry_set_filetype(entry, AE_IFREG); 370 archive_entry_set_size(entry, 0); 371 assertEqualInt(ARCHIVE_OK, archive_write_header(a, entry)); 372 archive_entry_free(entry); 373 assertEqualInt(ARCHIVE_OK, archive_write_free(a)); 374 375 /* Above three characters in KOI8-R should translate to the following 376 * three characters (two bytes each) in UTF-8. */ 377 assertEqualMem(buff + 512, "15 path=\xD0\xBF\xD1\x80\xD0\xB8\x0A", 15); 378 } 379 380 /* 381 * Verify that CP1251 filenames are correctly translated to Unicode and UTF-8. 382 */ 383 DEFINE_TEST(test_pax_filename_encoding_CP1251) 384 { 385 struct archive *a; 386 struct archive_entry *entry; 387 char buff[4096]; 388 size_t used; 389 390 if (NULL == setlocale(LC_ALL, "Russian_Russia") && 391 NULL == setlocale(LC_ALL, "ru_RU.CP1251")) { 392 skipping("KOI8-R locale not available on this system."); 393 return; 394 } 395 396 /* Check if the platform completely supports the string conversion. */ 397 a = archive_write_new(); 398 assertEqualInt(ARCHIVE_OK, archive_write_set_format_pax(a)); 399 if (archive_write_set_options(a, "hdrcharset=UTF-8") != ARCHIVE_OK) { 400 skipping("This system cannot convert character-set" 401 " from KOI8-R to UTF-8."); 402 archive_write_free(a); 403 return; 404 } 405 archive_write_free(a); 406 407 /* Re-create a write archive object since filenames should be written 408 * in UTF-8 by default. */ 409 a = archive_write_new(); 410 assertEqualInt(ARCHIVE_OK, archive_write_set_format_pax(a)); 411 assertEqualInt(ARCHIVE_OK, 412 archive_write_open_memory(a, buff, sizeof(buff), &used)); 413 414 entry = archive_entry_new2(a); 415 archive_entry_set_pathname(entry, "\xef\xf0\xe8"); 416 archive_entry_set_filetype(entry, AE_IFREG); 417 archive_entry_set_size(entry, 0); 418 assertEqualInt(ARCHIVE_OK, archive_write_header(a, entry)); 419 archive_entry_free(entry); 420 assertEqualInt(ARCHIVE_OK, archive_write_free(a)); 421 422 /* Above three characters in KOI8-R should translate to the following 423 * three characters (two bytes each) in UTF-8. */ 424 assertEqualMem(buff + 512, "15 path=\xD0\xBF\xD1\x80\xD0\xB8\x0A", 15); 425 } 426 427 /* 428 * Verify that EUC-JP filenames are correctly translated to Unicode and UTF-8. 429 */ 430 DEFINE_TEST(test_pax_filename_encoding_EUCJP) 431 { 432 struct archive *a; 433 struct archive_entry *entry; 434 char buff[4096]; 435 size_t used; 436 437 if (NULL == setlocale(LC_ALL, "ja_JP.eucJP")) { 438 skipping("eucJP locale not available on this system."); 439 return; 440 } 441 442 /* Check if the platform completely supports the string conversion. */ 443 a = archive_write_new(); 444 assertEqualInt(ARCHIVE_OK, archive_write_set_format_pax(a)); 445 if (archive_write_set_options(a, "hdrcharset=UTF-8") != ARCHIVE_OK) { 446 skipping("This system cannot convert character-set" 447 " from eucJP to UTF-8."); 448 archive_write_free(a); 449 return; 450 } 451 archive_write_free(a); 452 453 /* Re-create a write archive object since filenames should be written 454 * in UTF-8 by default. */ 455 a = archive_write_new(); 456 assertEqualInt(ARCHIVE_OK, archive_write_set_format_pax(a)); 457 assertEqualInt(ARCHIVE_OK, 458 archive_write_open_memory(a, buff, sizeof(buff), &used)); 459 460 entry = archive_entry_new2(a); 461 archive_entry_set_pathname(entry, "\xC9\xBD.txt"); 462 /* Check the Unicode version. */ 463 archive_entry_set_filetype(entry, AE_IFREG); 464 archive_entry_set_size(entry, 0); 465 assertEqualInt(ARCHIVE_OK, archive_write_header(a, entry)); 466 archive_entry_free(entry); 467 assertEqualInt(ARCHIVE_OK, archive_write_free(a)); 468 469 /* Check UTF-8 version. */ 470 assertEqualMem(buff + 512, "16 path=\xE8\xA1\xA8.txt\x0A", 16); 471 472 } 473 474 /* 475 * Verify that CP932/SJIS filenames are correctly translated to Unicode and UTF-8. 476 */ 477 DEFINE_TEST(test_pax_filename_encoding_CP932) 478 { 479 struct archive *a; 480 struct archive_entry *entry; 481 char buff[4096]; 482 size_t used; 483 484 if (NULL == setlocale(LC_ALL, "Japanese_Japan") && 485 NULL == setlocale(LC_ALL, "ja_JP.SJIS")) { 486 skipping("eucJP locale not available on this system."); 487 return; 488 } 489 490 /* Check if the platform completely supports the string conversion. */ 491 a = archive_write_new(); 492 assertEqualInt(ARCHIVE_OK, archive_write_set_format_pax(a)); 493 if (archive_write_set_options(a, "hdrcharset=UTF-8") != ARCHIVE_OK) { 494 skipping("This system cannot convert character-set" 495 " from CP932/SJIS to UTF-8."); 496 archive_write_free(a); 497 return; 498 } 499 archive_write_free(a); 500 501 /* Re-create a write archive object since filenames should be written 502 * in UTF-8 by default. */ 503 a = archive_write_new(); 504 assertEqualInt(ARCHIVE_OK, archive_write_set_format_pax(a)); 505 assertEqualInt(ARCHIVE_OK, 506 archive_write_open_memory(a, buff, sizeof(buff), &used)); 507 508 entry = archive_entry_new2(a); 509 archive_entry_set_pathname(entry, "\x95\x5C.txt"); 510 /* Check the Unicode version. */ 511 archive_entry_set_filetype(entry, AE_IFREG); 512 archive_entry_set_size(entry, 0); 513 assertEqualInt(ARCHIVE_OK, archive_write_header(a, entry)); 514 archive_entry_free(entry); 515 assertEqualInt(ARCHIVE_OK, archive_write_free(a)); 516 517 /* Check UTF-8 version. */ 518 assertEqualMem(buff + 512, "16 path=\xE8\xA1\xA8.txt\x0A", 16); 519 520 } 521 522 /* 523 * Verify that KOI8-R filenames are not translated to Unicode and UTF-8 524 * when using hdrcharset=BINARY option. 525 */ 526 DEFINE_TEST(test_pax_filename_encoding_KOI8R_BINARY) 527 { 528 struct archive *a; 529 struct archive_entry *entry; 530 char buff[4096]; 531 size_t used; 532 533 if (NULL == setlocale(LC_ALL, "ru_RU.KOI8-R")) { 534 skipping("KOI8-R locale not available on this system."); 535 return; 536 } 537 538 a = archive_write_new(); 539 assertEqualInt(ARCHIVE_OK, archive_write_set_format_pax(a)); 540 /* BINARY mode should be accepted. */ 541 assertEqualInt(ARCHIVE_OK, 542 archive_write_set_options(a, "hdrcharset=BINARY")); 543 assertEqualInt(ARCHIVE_OK, 544 archive_write_open_memory(a, buff, sizeof(buff), &used)); 545 546 entry = archive_entry_new2(a); 547 archive_entry_set_pathname(entry, "\xD0\xD2\xC9"); 548 archive_entry_set_filetype(entry, AE_IFREG); 549 archive_entry_set_size(entry, 0); 550 assertEqualInt(ARCHIVE_OK, archive_write_header(a, entry)); 551 archive_entry_free(entry); 552 assertEqualInt(ARCHIVE_OK, archive_write_free(a)); 553 554 /* "hdrcharset=BINARY" pax attribute should be written. */ 555 assertEqualMem(buff + 512, "21 hdrcharset=BINARY\x0A", 21); 556 /* Above three characters in KOI8-R should not translate to any 557 * character-set. */ 558 assertEqualMem(buff + 512+21, "12 path=\xD0\xD2\xC9\x0A", 12); 559 } 560 561 /* 562 * Pax format writer only accepts both BINARY and UTF-8. 563 * If other character-set name is specified, you will get ARCHIVE_FAILED. 564 */ 565 DEFINE_TEST(test_pax_filename_encoding_KOI8R_CP1251) 566 { 567 struct archive *a; 568 569 if (NULL == setlocale(LC_ALL, "ru_RU.KOI8-R")) { 570 skipping("KOI8-R locale not available on this system."); 571 return; 572 } 573 574 a = archive_write_new(); 575 assertEqualInt(ARCHIVE_OK, archive_write_set_format_pax(a)); 576 /* pax format writer only accepts both BINARY and UTF-8. */ 577 assertEqualInt(ARCHIVE_FAILED, 578 archive_write_set_options(a, "hdrcharset=CP1251")); 579 assertEqualInt(ARCHIVE_OK, archive_write_free(a)); 580 } 581 582 /* 583 * Verify that unicode filenames are correctly preserved on Windows 584 */ 585 DEFINE_TEST(test_pax_filename_encoding_UTF16_win) 586 { 587 #if !defined(_WIN32) || defined(__CYGWIN__) 588 skipping("This test is meant to verify unicode string handling" 589 " on Windows with UTF-16 names"); 590 return; 591 #else 592 struct archive *a; 593 struct archive_entry *entry; 594 char buff[0x2000]; 595 char *p; 596 size_t used; 597 598 /* 599 * Don't call setlocale because we're verifying that the '_w' functions 600 * work as expected when 'hdrcharset' is UTF-8 601 */ 602 603 /* Check if the platform completely supports the string conversion. */ 604 a = archive_write_new(); 605 assertEqualInt(ARCHIVE_OK, archive_write_set_format_pax(a)); 606 if (archive_write_set_options(a, "hdrcharset=UTF-8") != ARCHIVE_OK) { 607 skipping("This system cannot convert character-set" 608 " from UTF-16 to UTF-8."); 609 archive_write_free(a); 610 return; 611 } 612 archive_write_free(a); 613 614 /* 615 * Create a new archive handle with default charset handling 616 */ 617 a = archive_write_new(); 618 assertEqualInt(ARCHIVE_OK, archive_write_set_format_pax(a)); 619 assertEqualInt(ARCHIVE_OK, 620 archive_write_open_memory(a, buff, sizeof(buff), &used)); 621 622 /* Part 1: file */ 623 entry = archive_entry_new2(a); 624 archive_entry_copy_pathname_w(entry, L"\u4f60\u597d.txt"); 625 archive_entry_set_filetype(entry, AE_IFREG); 626 archive_entry_set_size(entry, 0); 627 assertEqualInt(ARCHIVE_OK, archive_write_header(a, entry)); 628 629 /* Part 2: directory */ 630 /* NOTE: Explicitly not adding trailing slash to test that code path */ 631 archive_entry_copy_pathname_w(entry, L"\u043f\u0440\u0438"); 632 archive_entry_set_filetype(entry, AE_IFDIR); 633 archive_entry_set_size(entry, 0); 634 assertEqualInt(ARCHIVE_OK, archive_write_header(a, entry)); 635 636 /* Part 3: symlink */ 637 archive_entry_copy_pathname_w(entry, L"\u518d\u89c1.txt"); 638 archive_entry_copy_symlink_w(entry, L"\u4f60\u597d.txt"); 639 archive_entry_set_filetype(entry, AE_IFLNK); 640 archive_entry_set_symlink_type(entry, AE_SYMLINK_TYPE_FILE); 641 archive_entry_set_size(entry, 0); 642 assertEqualInt(ARCHIVE_OK, archive_write_header(a, entry)); 643 644 /* Part 4: hardlink */ 645 archive_entry_copy_pathname_w(entry, L"\u665a\u5b89.txt"); 646 archive_entry_copy_hardlink_w(entry, L"\u4f60\u597d.txt"); 647 archive_entry_set_filetype(entry, AE_IFREG); 648 archive_entry_set_size(entry, 0); 649 assertEqualInt(ARCHIVE_OK, archive_write_header(a, entry)); 650 651 archive_entry_free(entry); 652 assertEqualInt(ARCHIVE_OK, archive_write_free(a)); 653 654 /* 655 * Examine the bytes to ensure the filenames ended up UTF-8 656 * encoded as we expect. 657 */ 658 659 /* Part 1: file */ 660 p = buff + 0; 661 assertEqualString(p + 0, "PaxHeader/\xE4\xBD\xA0\xE5\xA5\xBD.txt"); /* File name */ 662 assertEqualInt(p[156], 'x'); /* Pax extension header */ 663 p += 512; /* Pax extension body */ 664 assertEqualString(p + 0, "19 path=\xE4\xBD\xA0\xE5\xA5\xBD.txt\n"); 665 p += 512; /* Ustar header */ 666 assertEqualString(p + 0, "\xE4\xBD\xA0\xE5\xA5\xBD.txt"); /* File name */ 667 assertEqualInt(p[156], '0'); 668 669 /* Part 2: directory */ 670 p += 512; /* Pax extension header */ 671 assertEqualString(p + 0, "PaxHeader/\xD0\xBF\xD1\x80\xD0\xB8"); /* File name */ 672 assertEqualInt(p[156], 'x'); 673 p += 512; /* Pax extension body */ 674 assertEqualString(p + 0, "16 path=\xD0\xBF\xD1\x80\xD0\xB8/\n"); 675 p += 512; /* Ustar header */ 676 assertEqualString(p + 0, "\xD0\xBF\xD1\x80\xD0\xB8/"); /* File name */ 677 assertEqualInt(p[156], '5'); /* directory */ 678 679 /* Part 3: symlink */ 680 p += 512; /* Pax Extension Header */ 681 assertEqualString(p + 0, "PaxHeader/\xE5\x86\x8D\xE8\xA7\x81.txt"); /* File name */ 682 p += 512; /* Pax extension body */ 683 assertEqualString(p + 0, 684 "19 path=\xE5\x86\x8D\xE8\xA7\x81.txt\n" 685 "23 linkpath=\xE4\xBD\xA0\xE5\xA5\xBD.txt\n" 686 "31 LIBARCHIVE.symlinktype=file\n"); 687 p += 512; /* Ustar header */ 688 assertEqualString(p + 0, "\xE5\x86\x8D\xE8\xA7\x81.txt"); /* File name */ 689 assertEqualInt(p[156], '2'); /* symlink */ 690 assertEqualString(p + 157, "\xE4\xBD\xA0\xE5\xA5\xBD.txt"); /* link name */ 691 692 /* Part 4: hardlink */ 693 p += 512; /* Pax extension header */ 694 assertEqualString(p + 0, "PaxHeader/\xE6\x99\x9A\xE5\xAE\x89.txt"); /* File name */ 695 p += 512; /* Pax extension body */ 696 assertEqualString(p + 0, 697 "19 path=\xE6\x99\x9A\xE5\xAE\x89.txt\n" 698 "23 linkpath=\xE4\xBD\xA0\xE5\xA5\xBD.txt\n" 699 "31 LIBARCHIVE.symlinktype=file\n"); 700 p += 512; /* Ustar header */ 701 assertEqualString(p + 0, "\xE6\x99\x9A\xE5\xAE\x89.txt"); /* File name */ 702 assertEqualInt(p[156], '1'); /* hard link */ 703 assertEqualString(p + 157, "\xE4\xBD\xA0\xE5\xA5\xBD.txt"); /* link name */ 704 705 /* 706 * Read back the archive to see if we get the original names 707 */ 708 a = archive_read_new(); 709 archive_read_support_format_all(a); 710 archive_read_support_filter_all(a); 711 assertEqualInt(0, archive_read_open_memory(a, buff, used)); 712 713 /* Read part 1: file */ 714 assertEqualIntA(a, ARCHIVE_OK, archive_read_next_header(a, &entry)); 715 assertEqualWString(L"\u4f60\u597d.txt", archive_entry_pathname_w(entry)); 716 717 /* Read part 2: directory */ 718 assertEqualIntA(a, ARCHIVE_OK, archive_read_next_header(a, &entry)); 719 assertEqualWString(L"\u043f\u0440\u0438/", archive_entry_pathname_w(entry)); 720 721 /* Read part 3: symlink */ 722 assertEqualIntA(a, ARCHIVE_OK, archive_read_next_header(a, &entry)); 723 assertEqualWString(L"\u518d\u89c1.txt", archive_entry_pathname_w(entry)); 724 assertEqualWString(L"\u4f60\u597d.txt", archive_entry_symlink_w(entry)); 725 726 /* Read part 4: hardlink */ 727 assertEqualIntA(a, ARCHIVE_OK, archive_read_next_header(a, &entry)); 728 assertEqualWString(L"\u665a\u5b89.txt", archive_entry_pathname_w(entry)); 729 assertEqualWString(L"\u4f60\u597d.txt", archive_entry_hardlink_w(entry)); 730 731 archive_free(a); 732 #endif 733 } 734 735 DEFINE_TEST(test_pax_filename_encoding) 736 { 737 test_pax_filename_encoding_1(); 738 test_pax_filename_encoding_2(); 739 test_pax_filename_encoding_3(); 740 } 741