1 /*- 2 * Copyright (c) 2011 Michihiro NAKAJIMA 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR(S) ``AS IS'' AND ANY EXPRESS OR 15 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 16 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 17 * IN NO EVENT SHALL THE AUTHOR(S) BE LIABLE FOR ANY DIRECT, INDIRECT, 18 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 19 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 20 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 21 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 22 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 23 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 24 */ 25 #include "test.h" 26 #include <locale.h> 27 28 DEFINE_TEST(test_gnutar_filename_encoding_UTF8_CP866) 29 { 30 struct archive *a; 31 struct archive_entry *entry; 32 char buff[4096]; 33 size_t used; 34 35 if (NULL == setlocale(LC_ALL, "en_US.UTF-8")) { 36 skipping("en_US.UTF-8 locale not available on this system."); 37 return; 38 } 39 40 /* 41 * Verify that UTF-8 filenames are correctly translated into CP866 42 * and stored with hdrcharset=CP866 option. 43 */ 44 a = archive_write_new(); 45 assertEqualInt(ARCHIVE_OK, archive_write_set_format_gnutar(a)); 46 if (archive_write_set_options(a, "hdrcharset=CP866") != ARCHIVE_OK) { 47 skipping("This system cannot convert character-set" 48 " from UTF-8 to CP866."); 49 archive_write_free(a); 50 return; 51 } 52 assertEqualInt(ARCHIVE_OK, 53 archive_write_open_memory(a, buff, sizeof(buff), &used)); 54 55 entry = archive_entry_new2(a); 56 /* Set a UTF-8 filename. */ 57 archive_entry_set_pathname(entry, "\xD0\xBF\xD1\x80\xD0\xB8"); 58 archive_entry_set_filetype(entry, AE_IFREG); 59 archive_entry_set_size(entry, 0); 60 assertEqualInt(ARCHIVE_OK, archive_write_header(a, entry)); 61 archive_entry_free(entry); 62 assertEqualInt(ARCHIVE_OK, archive_write_free(a)); 63 64 /* Above three characters in UTF-8 should translate to the following 65 * three characters in CP866. */ 66 assertEqualMem(buff, "\xAF\xE0\xA8", 3); 67 } 68 69 DEFINE_TEST(test_gnutar_filename_encoding_KOI8R_UTF8) 70 { 71 struct archive *a; 72 struct archive_entry *entry; 73 char buff[4096]; 74 size_t used; 75 76 if (NULL == setlocale(LC_ALL, "ru_RU.KOI8-R")) { 77 skipping("KOI8-R locale not available on this system."); 78 return; 79 } 80 81 /* 82 * Verify that KOI8-R filenames are correctly translated into UTF-8 83 * and stored with hdrcharset=UTF-8 option. 84 */ 85 a = archive_write_new(); 86 assertEqualInt(ARCHIVE_OK, archive_write_set_format_gnutar(a)); 87 if (archive_write_set_options(a, "hdrcharset=UTF-8") != ARCHIVE_OK) { 88 skipping("This system cannot convert character-set" 89 " from KOI8-R to UTF-8."); 90 archive_write_free(a); 91 return; 92 } 93 assertEqualInt(ARCHIVE_OK, 94 archive_write_open_memory(a, buff, sizeof(buff), &used)); 95 96 entry = archive_entry_new2(a); 97 /* Set a KOI8-R filename. */ 98 archive_entry_set_pathname(entry, "\xD0\xD2\xC9"); 99 archive_entry_set_filetype(entry, AE_IFREG); 100 archive_entry_set_size(entry, 0); 101 assertEqualInt(ARCHIVE_OK, archive_write_header(a, entry)); 102 archive_entry_free(entry); 103 assertEqualInt(ARCHIVE_OK, archive_write_free(a)); 104 105 /* Above three characters in KOI8-R should translate to the following 106 * three characters (two bytes each) in UTF-8. */ 107 assertEqualMem(buff, "\xD0\xBF\xD1\x80\xD0\xB8", 6); 108 } 109 110 DEFINE_TEST(test_gnutar_filename_encoding_KOI8R_CP866) 111 { 112 struct archive *a; 113 struct archive_entry *entry; 114 char buff[4096]; 115 size_t used; 116 117 if (NULL == setlocale(LC_ALL, "ru_RU.KOI8-R")) { 118 skipping("KOI8-R locale not available on this system."); 119 return; 120 } 121 122 /* 123 * Verify that KOI8-R filenames are correctly translated into CP866 124 * and stored with hdrcharset=CP866 option. 125 */ 126 a = archive_write_new(); 127 assertEqualInt(ARCHIVE_OK, archive_write_set_format_gnutar(a)); 128 if (archive_write_set_options(a, "hdrcharset=CP866") != ARCHIVE_OK) { 129 skipping("This system cannot convert character-set" 130 " from KOI8-R to CP866."); 131 archive_write_free(a); 132 return; 133 } 134 assertEqualInt(ARCHIVE_OK, 135 archive_write_open_memory(a, buff, sizeof(buff), &used)); 136 137 entry = archive_entry_new2(a); 138 /* Set a KOI8-R filename. */ 139 archive_entry_set_pathname(entry, "\xD0\xD2\xC9"); 140 archive_entry_set_filetype(entry, AE_IFREG); 141 archive_entry_set_size(entry, 0); 142 assertEqualInt(ARCHIVE_OK, archive_write_header(a, entry)); 143 archive_entry_free(entry); 144 assertEqualInt(ARCHIVE_OK, archive_write_free(a)); 145 146 /* Above three characters in KOI8-R should translate to the following 147 * three characters in CP866. */ 148 assertEqualMem(buff, "\xAF\xE0\xA8", 3); 149 } 150 151 DEFINE_TEST(test_gnutar_filename_encoding_CP1251_UTF8) 152 { 153 struct archive *a; 154 struct archive_entry *entry; 155 char buff[4096]; 156 size_t used; 157 158 if (NULL == setlocale(LC_ALL, "Russian_Russia") && 159 NULL == setlocale(LC_ALL, "ru_RU.CP1251")) { 160 skipping("KOI8-R locale not available on this system."); 161 return; 162 } 163 164 /* 165 * Verify that CP1251 filenames are correctly translated into UTF-8 166 * and stored with hdrcharset=UTF-8 option. 167 */ 168 a = archive_write_new(); 169 assertEqualInt(ARCHIVE_OK, archive_write_set_format_gnutar(a)); 170 if (archive_write_set_options(a, "hdrcharset=UTF-8") != ARCHIVE_OK) { 171 skipping("This system cannot convert character-set" 172 " from KOI8-R to UTF-8."); 173 archive_write_free(a); 174 return; 175 } 176 assertEqualInt(ARCHIVE_OK, 177 archive_write_open_memory(a, buff, sizeof(buff), &used)); 178 179 entry = archive_entry_new2(a); 180 /* Set a KOI8-R filename. */ 181 archive_entry_set_pathname(entry, "\xEF\xF0\xE8"); 182 archive_entry_set_filetype(entry, AE_IFREG); 183 archive_entry_set_size(entry, 0); 184 assertEqualInt(ARCHIVE_OK, archive_write_header(a, entry)); 185 archive_entry_free(entry); 186 assertEqualInt(ARCHIVE_OK, archive_write_free(a)); 187 188 /* Above three characters in CP1251 should translate to the following 189 * three characters (two bytes each) in UTF-8. */ 190 assertEqualMem(buff, "\xD0\xBF\xD1\x80\xD0\xB8", 6); 191 } 192 193 /* 194 * Do not translate CP1251 into CP866 if non Windows platform. 195 */ 196 DEFINE_TEST(test_gnutar_filename_encoding_ru_RU_CP1251) 197 { 198 struct archive *a; 199 struct archive_entry *entry; 200 char buff[4096]; 201 size_t used; 202 203 if (NULL == setlocale(LC_ALL, "ru_RU.CP1251")) { 204 skipping("KOI8-R locale not available on this system."); 205 return; 206 } 207 208 /* 209 * Verify that CP1251 filenames are not translated into any 210 * other character-set, in particular, CP866. 211 */ 212 a = archive_write_new(); 213 assertEqualInt(ARCHIVE_OK, archive_write_set_format_gnutar(a)); 214 assertEqualInt(ARCHIVE_OK, 215 archive_write_open_memory(a, buff, sizeof(buff), &used)); 216 217 entry = archive_entry_new2(a); 218 /* Set a KOI8-R filename. */ 219 archive_entry_set_pathname(entry, "\xEF\xF0\xE8"); 220 archive_entry_set_filetype(entry, AE_IFREG); 221 archive_entry_set_size(entry, 0); 222 assertEqualInt(ARCHIVE_OK, archive_write_header(a, entry)); 223 archive_entry_free(entry); 224 assertEqualInt(ARCHIVE_OK, archive_write_free(a)); 225 226 /* Above three characters in CP1251 should not translate to 227 * any other character-set. */ 228 assertEqualMem(buff, "\xEF\xF0\xE8", 3); 229 } 230 231 /* 232 * Other archiver applications on Windows translate CP1251 filenames 233 * into CP866 filenames and store it in the gnutar file. 234 * Test above behavior works well. 235 */ 236 DEFINE_TEST(test_gnutar_filename_encoding_Russian_Russia) 237 { 238 struct archive *a; 239 struct archive_entry *entry; 240 char buff[4096]; 241 size_t used; 242 243 if (NULL == setlocale(LC_ALL, "Russian_Russia")) { 244 skipping("Russian_Russia locale not available on this system."); 245 return; 246 } 247 248 /* 249 * Verify that Russian_Russia(CP1251) filenames are correctly translated 250 * to CP866. 251 */ 252 a = archive_write_new(); 253 assertEqualInt(ARCHIVE_OK, archive_write_set_format_gnutar(a)); 254 assertEqualInt(ARCHIVE_OK, 255 archive_write_open_memory(a, buff, sizeof(buff), &used)); 256 257 entry = archive_entry_new2(a); 258 /* Set a CP1251 filename. */ 259 archive_entry_set_pathname(entry, "\xEF\xF0\xE8"); 260 archive_entry_set_filetype(entry, AE_IFREG); 261 archive_entry_set_size(entry, 0); 262 assertEqualInt(ARCHIVE_OK, archive_write_header(a, entry)); 263 archive_entry_free(entry); 264 assertEqualInt(ARCHIVE_OK, archive_write_free(a)); 265 266 /* Above three characters in CP1251 should translate to the following 267 * three characters in CP866. */ 268 assertEqualMem(buff, "\xAF\xE0\xA8", 3); 269 } 270 271 DEFINE_TEST(test_gnutar_filename_encoding_EUCJP_UTF8) 272 { 273 struct archive *a; 274 struct archive_entry *entry; 275 char buff[4096]; 276 size_t used; 277 278 if (NULL == setlocale(LC_ALL, "ja_JP.eucJP")) { 279 skipping("eucJP locale not available on this system."); 280 return; 281 } 282 283 /* 284 * Verify that EUC-JP filenames are correctly translated to UTF-8. 285 */ 286 a = archive_write_new(); 287 assertEqualInt(ARCHIVE_OK, archive_write_set_format_gnutar(a)); 288 if (archive_write_set_options(a, "hdrcharset=UTF-8") != ARCHIVE_OK) { 289 skipping("This system cannot convert character-set" 290 " from eucJP to UTF-8."); 291 archive_write_free(a); 292 return; 293 } 294 assertEqualInt(ARCHIVE_OK, 295 archive_write_open_memory(a, buff, sizeof(buff), &used)); 296 297 entry = archive_entry_new2(a); 298 /* Set an EUC-JP filename. */ 299 archive_entry_set_pathname(entry, "\xC9\xBD.txt"); 300 /* Check the Unicode version. */ 301 archive_entry_set_filetype(entry, AE_IFREG); 302 archive_entry_set_size(entry, 0); 303 assertEqualInt(ARCHIVE_OK, archive_write_header(a, entry)); 304 archive_entry_free(entry); 305 assertEqualInt(ARCHIVE_OK, archive_write_free(a)); 306 307 /* Check UTF-8 version. */ 308 assertEqualMem(buff, "\xE8\xA1\xA8.txt", 7); 309 } 310 311 DEFINE_TEST(test_gnutar_filename_encoding_EUCJP_CP932) 312 { 313 struct archive *a; 314 struct archive_entry *entry; 315 char buff[4096]; 316 size_t used; 317 318 if (NULL == setlocale(LC_ALL, "ja_JP.eucJP")) { 319 skipping("eucJP locale not available on this system."); 320 return; 321 } 322 323 /* 324 * Verify that EUC-JP filenames are correctly translated to CP932. 325 */ 326 a = archive_write_new(); 327 assertEqualInt(ARCHIVE_OK, archive_write_set_format_gnutar(a)); 328 if (archive_write_set_options(a, "hdrcharset=CP932") != ARCHIVE_OK) { 329 skipping("This system cannot convert character-set" 330 " from eucJP to CP932."); 331 archive_write_free(a); 332 return; 333 } 334 assertEqualInt(ARCHIVE_OK, 335 archive_write_open_memory(a, buff, sizeof(buff), &used)); 336 337 entry = archive_entry_new2(a); 338 /* Set an EUC-JP filename. */ 339 archive_entry_set_pathname(entry, "\xC9\xBD.txt"); 340 /* Check the Unicode version. */ 341 archive_entry_set_filetype(entry, AE_IFREG); 342 archive_entry_set_size(entry, 0); 343 assertEqualInt(ARCHIVE_OK, archive_write_header(a, entry)); 344 archive_entry_free(entry); 345 assertEqualInt(ARCHIVE_OK, archive_write_free(a)); 346 347 /* Check CP932 version. */ 348 assertEqualMem(buff, "\x95\x5C.txt", 6); 349 } 350 351 DEFINE_TEST(test_gnutar_filename_encoding_CP932_UTF8) 352 { 353 struct archive *a; 354 struct archive_entry *entry; 355 char buff[4096]; 356 size_t used; 357 358 if (NULL == setlocale(LC_ALL, "Japanese_Japan") && 359 NULL == setlocale(LC_ALL, "ja_JP.SJIS")) { 360 skipping("CP932/SJIS locale not available on this system."); 361 return; 362 } 363 364 /* 365 * Verify that CP932/SJIS filenames are correctly translated to UTF-8. 366 */ 367 a = archive_write_new(); 368 assertEqualInt(ARCHIVE_OK, archive_write_set_format_gnutar(a)); 369 if (archive_write_set_options(a, "hdrcharset=UTF-8") != ARCHIVE_OK) { 370 skipping("This system cannot convert character-set" 371 " from CP932/SJIS to UTF-8."); 372 archive_write_free(a); 373 return; 374 } 375 assertEqualInt(ARCHIVE_OK, 376 archive_write_open_memory(a, buff, sizeof(buff), &used)); 377 378 entry = archive_entry_new2(a); 379 /* Set an CP932/SJIS filename. */ 380 archive_entry_set_pathname(entry, "\x95\x5C.txt"); 381 /* Check the Unicode version. */ 382 archive_entry_set_filetype(entry, AE_IFREG); 383 archive_entry_set_size(entry, 0); 384 assertEqualInt(ARCHIVE_OK, archive_write_header(a, entry)); 385 archive_entry_free(entry); 386 assertEqualInt(ARCHIVE_OK, archive_write_free(a)); 387 388 /* Check UTF-8 version. */ 389 assertEqualMem(buff, "\xE8\xA1\xA8.txt", 7); 390 } 391 392 DEFINE_TEST(test_gnutar_filename_encoding_UTF16_win) 393 { 394 #if !defined(_WIN32) || defined(__CYGWIN__) 395 skipping("This test is meant to verify unicode string handling" 396 " on Windows with UTF-16 names"); 397 return; 398 #else 399 struct archive *a; 400 struct archive_entry *entry; 401 char buff[4096]; 402 size_t used; 403 404 /* 405 * Don't call setlocale because we're verifying that the '_w' functions 406 * work as expected when 'hdrcharset' is UTF-8 407 */ 408 409 /* Part 1: file */ 410 a = archive_write_new(); 411 assertEqualInt(ARCHIVE_OK, archive_write_set_format_gnutar(a)); 412 if (archive_write_set_options(a, "hdrcharset=UTF-8") != ARCHIVE_OK) { 413 skipping("This system cannot convert character-set" 414 " from UTF-16 to UTF-8."); 415 archive_write_free(a); 416 return; 417 } 418 assertEqualInt(ARCHIVE_OK, 419 archive_write_open_memory(a, buff, sizeof(buff), &used)); 420 421 entry = archive_entry_new2(a); 422 /* Set the filename using a UTF-16 string */ 423 archive_entry_copy_pathname_w(entry, L"\u8868.txt"); 424 archive_entry_set_filetype(entry, AE_IFREG); 425 archive_entry_set_size(entry, 0); 426 assertEqualInt(ARCHIVE_OK, archive_write_header(a, entry)); 427 archive_entry_free(entry); 428 assertEqualInt(ARCHIVE_OK, archive_write_free(a)); 429 430 /* Check UTF-8 version. */ 431 assertEqualMem(buff, "\xE8\xA1\xA8.txt", 7); 432 433 /* Part 2: directory */ 434 a = archive_write_new(); 435 assertEqualInt(ARCHIVE_OK, archive_write_set_format_gnutar(a)); 436 assertEqualInt(ARCHIVE_OK, archive_write_set_options(a, "hdrcharset=UTF-8")); 437 assertEqualInt(ARCHIVE_OK, 438 archive_write_open_memory(a, buff, sizeof(buff), &used)); 439 440 entry = archive_entry_new2(a); 441 /* Set the directory name using a UTF-16 string */ 442 /* NOTE: Explicitly not adding trailing slash to test that code path */ 443 archive_entry_copy_pathname_w(entry, L"\u8868"); 444 archive_entry_set_filetype(entry, AE_IFDIR); 445 archive_entry_set_size(entry, 0); 446 assertEqualInt(ARCHIVE_OK, archive_write_header(a, entry)); 447 archive_entry_free(entry); 448 assertEqualInt(ARCHIVE_OK, archive_write_free(a)); 449 450 /* Check UTF-8 version. */ 451 assertEqualMem(buff, "\xE8\xA1\xA8/", 4); 452 453 /* Part 3: symlink */ 454 a = archive_write_new(); 455 assertEqualInt(ARCHIVE_OK, archive_write_set_format_gnutar(a)); 456 assertEqualInt(ARCHIVE_OK, archive_write_set_options(a, "hdrcharset=UTF-8")); 457 assertEqualInt(ARCHIVE_OK, 458 archive_write_open_memory(a, buff, sizeof(buff), &used)); 459 460 entry = archive_entry_new2(a); 461 /* Set the symlink target using a UTF-16 string */ 462 archive_entry_set_pathname(entry, "link.txt"); 463 archive_entry_copy_symlink_w(entry, L"\u8868.txt"); 464 archive_entry_set_filetype(entry, AE_IFLNK); 465 archive_entry_set_symlink_type(entry, AE_SYMLINK_TYPE_FILE); 466 archive_entry_set_size(entry, 0); 467 assertEqualInt(ARCHIVE_OK, archive_write_header(a, entry)); 468 archive_entry_free(entry); 469 assertEqualInt(ARCHIVE_OK, archive_write_free(a)); 470 471 /* Check UTF-8 version. */ 472 assertEqualMem(buff + 157, "\xE8\xA1\xA8.txt", 7); 473 474 /* Part 4: hardlink */ 475 a = archive_write_new(); 476 assertEqualInt(ARCHIVE_OK, archive_write_set_format_gnutar(a)); 477 assertEqualInt(ARCHIVE_OK, archive_write_set_options(a, "hdrcharset=UTF-8")); 478 assertEqualInt(ARCHIVE_OK, 479 archive_write_open_memory(a, buff, sizeof(buff), &used)); 480 481 entry = archive_entry_new2(a); 482 /* Set the symlink target using a UTF-16 string */ 483 archive_entry_set_pathname(entry, "link.txt"); 484 archive_entry_copy_hardlink_w(entry, L"\u8868.txt"); 485 archive_entry_set_size(entry, 0); 486 assertEqualInt(ARCHIVE_OK, archive_write_header(a, entry)); 487 archive_entry_free(entry); 488 assertEqualInt(ARCHIVE_OK, archive_write_free(a)); 489 490 /* Check UTF-8 version. */ 491 assertEqualMem(buff + 157, "\xE8\xA1\xA8.txt", 7); 492 #endif 493 } 494