1 /*-
2 * Copyright (c) 2011 Michihiro NAKAJIMA
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
13 *
14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR(S) ``AS IS'' AND ANY EXPRESS OR
15 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
16 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
17 * IN NO EVENT SHALL THE AUTHOR(S) BE LIABLE FOR ANY DIRECT, INDIRECT,
18 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
19 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
20 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
21 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
22 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
23 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
24 */
25 #include "test.h"
26
27 #include <locale.h>
28
29 /*
30 * The sample tar file was made in LANG=KOI8-R and it contains two
31 * files the charset of which are different.
32 * - the filename of first file is stored in BINARY mode.
33 * - the filename of second file is stored in UTF-8.
34 *
35 * Whenever hdrcharset option is specified, we will correctly read the
36 * filename of second file, which is stored in UTF-8 by default.
37 */
38
39 static void
test_read_format_tar_filename_KOI8R_CP866(const char * refname)40 test_read_format_tar_filename_KOI8R_CP866(const char *refname)
41 {
42 struct archive *a;
43 struct archive_entry *ae;
44
45 /*
46 * Read filename in ru_RU.CP866 with "hdrcharset=KOI8-R" option.
47 * We should correctly read two filenames.
48 */
49 if (NULL == setlocale(LC_ALL, "Russian_Russia.866") &&
50 NULL == setlocale(LC_ALL, "ru_RU.CP866")) {
51 skipping("ru_RU.CP866 locale not available on this system.");
52 return;
53 }
54
55 /* Test if the platform can convert from UTF-8. */
56 assert((a = archive_read_new()) != NULL);
57 assertEqualIntA(a, ARCHIVE_OK, archive_read_support_format_tar(a));
58 if (ARCHIVE_OK != archive_read_set_options(a, "hdrcharset=UTF-8")) {
59 assertEqualInt(ARCHIVE_OK, archive_read_free(a));
60 skipping("This system cannot convert character-set"
61 " from UTF-8 to CP866.");
62 return;
63 }
64 assertEqualInt(ARCHIVE_OK, archive_read_free(a));
65
66 assert((a = archive_read_new()) != NULL);
67 assertEqualIntA(a, ARCHIVE_OK, archive_read_support_filter_all(a));
68 assertEqualIntA(a, ARCHIVE_OK, archive_read_support_format_all(a));
69 if (ARCHIVE_OK != archive_read_set_options(a, "hdrcharset=KOI8-R")) {
70 skipping("This system cannot convert character-set"
71 " from KOI8-R to CP866.");
72 goto next_test;
73 }
74 assertEqualIntA(a, ARCHIVE_OK,
75 archive_read_open_filename(a, refname, 10240));
76
77 /* Verify regular first file. */
78 assertEqualIntA(a, ARCHIVE_OK, archive_read_next_header(a, &ae));
79 assertEqualString("\x8f\x90\x88\x82\x85\x92",
80 archive_entry_pathname(ae));
81 assertEqualInt(6, archive_entry_size(ae));
82 assertEqualInt(archive_entry_is_encrypted(ae), 0);
83 assertEqualIntA(a, archive_read_has_encrypted_entries(a), ARCHIVE_READ_FORMAT_ENCRYPTION_UNSUPPORTED);
84
85 /* Verify regular second file. */
86 assertEqualIntA(a, ARCHIVE_OK, archive_read_next_header(a, &ae));
87 assertEqualString("\xaf\xe0\xa8\xa2\xa5\xe2",
88 archive_entry_pathname(ae));
89 assertEqualInt(6, archive_entry_size(ae));
90 assertEqualInt(archive_entry_is_encrypted(ae), 0);
91 assertEqualIntA(a, archive_read_has_encrypted_entries(a), ARCHIVE_READ_FORMAT_ENCRYPTION_UNSUPPORTED);
92
93
94 /* End of archive. */
95 assertEqualIntA(a, ARCHIVE_EOF, archive_read_next_header(a, &ae));
96
97 /* Verify archive format. */
98 assertEqualIntA(a, ARCHIVE_FILTER_COMPRESS, archive_filter_code(a, 0));
99 assertEqualIntA(a, ARCHIVE_FORMAT_TAR_PAX_INTERCHANGE,
100 archive_format(a));
101
102 /* Close the archive. */
103 assertEqualInt(ARCHIVE_OK, archive_read_close(a));
104 next_test:
105 assertEqualInt(ARCHIVE_OK, archive_read_free(a));
106
107
108 /*
109 * Read filename in ru_RU.CP866 without "hdrcharset=KOI8-R" option.
110 * The filename we can properly read is only second file.
111 */
112
113 assert((a = archive_read_new()) != NULL);
114 assertEqualIntA(a, ARCHIVE_OK, archive_read_support_filter_all(a));
115 assertEqualIntA(a, ARCHIVE_OK, archive_read_support_format_all(a));
116 assertEqualIntA(a, ARCHIVE_OK,
117 archive_read_open_filename(a, refname, 10240));
118
119 /*
120 * Verify regular first file.
121 * The filename is not translated to CP866 because hdrcharset
122 * attribute is BINARY and there is not way to know its charset.
123 */
124 assertEqualIntA(a, ARCHIVE_OK, archive_read_next_header(a, &ae));
125 /* A filename is in KOI8-R. */
126 assertEqualString("\xf0\xf2\xe9\xf7\xe5\xf4",
127 archive_entry_pathname(ae));
128 assertEqualInt(6, archive_entry_size(ae));
129 assertEqualInt(archive_entry_is_encrypted(ae), 0);
130 assertEqualIntA(a, archive_read_has_encrypted_entries(a), ARCHIVE_READ_FORMAT_ENCRYPTION_UNSUPPORTED);
131
132 /*
133 * Verify regular second file.
134 * The filename is translated from UTF-8 to CP866
135 */
136 assertEqualIntA(a, ARCHIVE_OK, archive_read_next_header(a, &ae));
137 assertEqualString("\xaf\xe0\xa8\xa2\xa5\xe2",
138 archive_entry_pathname(ae));
139 assertEqualInt(6, archive_entry_size(ae));
140 assertEqualInt(archive_entry_is_encrypted(ae), 0);
141 assertEqualIntA(a, archive_read_has_encrypted_entries(a), ARCHIVE_READ_FORMAT_ENCRYPTION_UNSUPPORTED);
142
143
144 /* End of archive. */
145 assertEqualIntA(a, ARCHIVE_EOF, archive_read_next_header(a, &ae));
146
147 /* Verify archive format. */
148 assertEqualIntA(a, ARCHIVE_FILTER_COMPRESS, archive_filter_code(a, 0));
149 assertEqualIntA(a, ARCHIVE_FORMAT_TAR_PAX_INTERCHANGE,
150 archive_format(a));
151
152 /* Close the archive. */
153 assertEqualInt(ARCHIVE_OK, archive_read_close(a));
154 assertEqualInt(ARCHIVE_OK, archive_read_free(a));
155 }
156
157 static void
test_read_format_tar_filename_KOI8R_UTF8(const char * refname)158 test_read_format_tar_filename_KOI8R_UTF8(const char *refname)
159 {
160 struct archive *a;
161 struct archive_entry *ae;
162
163 /*
164 * Read filename in en_US.UTF-8 with "hdrcharset=KOI8-R" option.
165 * We should correctly read two filenames.
166 */
167 if (NULL == setlocale(LC_ALL, "en_US.UTF-8")) {
168 skipping("en_US.UTF-8 locale not available on this system.");
169 return;
170 }
171
172 assert((a = archive_read_new()) != NULL);
173 assertEqualIntA(a, ARCHIVE_OK, archive_read_support_filter_all(a));
174 assertEqualIntA(a, ARCHIVE_OK, archive_read_support_format_all(a));
175 if (ARCHIVE_OK != archive_read_set_options(a, "hdrcharset=KOI8-R")) {
176 assertEqualInt(ARCHIVE_OK, archive_read_free(a));
177 skipping("This system cannot convert character-set"
178 " from KOI8-R to UTF-8.");
179 return;
180 }
181 assertEqualIntA(a, ARCHIVE_OK,
182 archive_read_open_filename(a, refname, 10240));
183
184 /* Verify regular file. */
185 assertEqualIntA(a, ARCHIVE_OK, archive_read_next_header(a, &ae));
186 assertEqualString("\xd0\x9f\xd0\xa0\xd0\x98\xd0\x92\xd0\x95\xd0\xa2",
187 archive_entry_pathname(ae));
188 assertEqualInt(6, archive_entry_size(ae));
189 assertEqualInt(archive_entry_is_encrypted(ae), 0);
190 assertEqualIntA(a, archive_read_has_encrypted_entries(a), ARCHIVE_READ_FORMAT_ENCRYPTION_UNSUPPORTED);
191
192 /* Verify regular file. */
193 assertEqualIntA(a, ARCHIVE_OK, archive_read_next_header(a, &ae));
194 assertEqualString("\xd0\xbf\xd1\x80\xd0\xb8\xd0\xb2\xd0\xb5\xd1\x82",
195 archive_entry_pathname(ae));
196 assertEqualInt(6, archive_entry_size(ae));
197 assertEqualInt(archive_entry_is_encrypted(ae), 0);
198 assertEqualIntA(a, archive_read_has_encrypted_entries(a), ARCHIVE_READ_FORMAT_ENCRYPTION_UNSUPPORTED);
199
200 /* Verify encryption status */
201 assertEqualInt(archive_entry_is_encrypted(ae), 0);
202 assertEqualIntA(a, archive_read_has_encrypted_entries(a), ARCHIVE_READ_FORMAT_ENCRYPTION_UNSUPPORTED);
203
204 /* End of archive. */
205 assertEqualIntA(a, ARCHIVE_EOF, archive_read_next_header(a, &ae));
206
207 /* Verify archive format. */
208 assertEqualIntA(a, ARCHIVE_FILTER_COMPRESS, archive_filter_code(a, 0));
209 assertEqualIntA(a, ARCHIVE_FORMAT_TAR_PAX_INTERCHANGE,
210 archive_format(a));
211
212 /* Verify encryption status */
213 assertEqualInt(archive_entry_is_encrypted(ae), 0);
214 assertEqualIntA(a, archive_read_has_encrypted_entries(a), ARCHIVE_READ_FORMAT_ENCRYPTION_UNSUPPORTED);
215
216 /* Close the archive. */
217 assertEqualInt(ARCHIVE_OK, archive_read_close(a));
218 assertEqualInt(ARCHIVE_OK, archive_read_free(a));
219
220 /*
221 * Read filename in en_US.UTF-8 without "hdrcharset=KOI8-R" option.
222 * The filename we can properly read is only second file.
223 */
224
225 assert((a = archive_read_new()) != NULL);
226 assertEqualIntA(a, ARCHIVE_OK, archive_read_support_filter_all(a));
227 assertEqualIntA(a, ARCHIVE_OK, archive_read_support_format_all(a));
228 assertEqualIntA(a, ARCHIVE_OK,
229 archive_read_open_filename(a, refname, 10240));
230
231 /*
232 * Verify regular first file.
233 * The filename is not translated to UTF-8 because hdrcharset
234 * attribute is BINARY and there is not way to know its charset.
235 */
236 assertEqualIntA(a, ARCHIVE_OK, archive_read_next_header(a, &ae));
237 /* A filename is in KOI8-R. */
238 assertEqualString("\xf0\xf2\xe9\xf7\xe5\xf4",
239 archive_entry_pathname(ae));
240 assertEqualInt(6, archive_entry_size(ae));
241
242 /* Verify encryption status */
243 assertEqualInt(archive_entry_is_encrypted(ae), 0);
244 assertEqualIntA(a, archive_read_has_encrypted_entries(a), ARCHIVE_READ_FORMAT_ENCRYPTION_UNSUPPORTED);
245
246 /*
247 * Verify regular second file.
248 */
249 assertEqualIntA(a, ARCHIVE_OK, archive_read_next_header(a, &ae));
250 assertEqualString("\xd0\xbf\xd1\x80\xd0\xb8\xd0\xb2\xd0\xb5\xd1\x82",
251 archive_entry_pathname(ae));
252 assertEqualInt(6, archive_entry_size(ae));
253
254
255 /* End of archive. */
256 assertEqualIntA(a, ARCHIVE_EOF, archive_read_next_header(a, &ae));
257
258 /* Verify archive format. */
259 assertEqualIntA(a, ARCHIVE_FILTER_COMPRESS, archive_filter_code(a, 0));
260 assertEqualIntA(a, ARCHIVE_FORMAT_TAR_PAX_INTERCHANGE,
261 archive_format(a));
262
263 /* Close the archive. */
264 assertEqualInt(ARCHIVE_OK, archive_read_close(a));
265 assertEqualInt(ARCHIVE_OK, archive_read_free(a));
266 }
267
268 static void
test_read_format_tar_filename_KOI8R_CP1251(const char * refname)269 test_read_format_tar_filename_KOI8R_CP1251(const char *refname)
270 {
271 struct archive *a;
272 struct archive_entry *ae;
273
274 /*
275 * Read filename in CP1251 with "hdrcharset=KOI8-R" option.
276 * We should correctly read two filenames.
277 */
278 if (NULL == setlocale(LC_ALL, "Russian_Russia") &&
279 NULL == setlocale(LC_ALL, "ru_RU.CP1251")) {
280 skipping("CP1251 locale not available on this system.");
281 return;
282 }
283
284 /* Test if the platform can convert from UTF-8. */
285 assert((a = archive_read_new()) != NULL);
286 assertEqualIntA(a, ARCHIVE_OK, archive_read_support_format_tar(a));
287 if (ARCHIVE_OK != archive_read_set_options(a, "hdrcharset=UTF-8")) {
288 assertEqualInt(ARCHIVE_OK, archive_read_free(a));
289 skipping("This system cannot convert character-set"
290 " from UTF-8 to CP1251.");
291 return;
292 }
293 assertEqualInt(ARCHIVE_OK, archive_read_free(a));
294
295 assert((a = archive_read_new()) != NULL);
296 assertEqualIntA(a, ARCHIVE_OK, archive_read_support_filter_all(a));
297 assertEqualIntA(a, ARCHIVE_OK, archive_read_support_format_all(a));
298 if (ARCHIVE_OK != archive_read_set_options(a, "hdrcharset=KOI8-R")) {
299 skipping("This system cannot convert character-set"
300 " from KOI8-R to CP1251.");
301 goto next_test;
302 }
303 assertEqualIntA(a, ARCHIVE_OK,
304 archive_read_open_filename(a, refname, 10240));
305
306 /* Verify regular first file. */
307 assertEqualIntA(a, ARCHIVE_OK, archive_read_next_header(a, &ae));
308 assertEqualString("\xcf\xd0\xc8\xc2\xc5\xd2",
309 archive_entry_pathname(ae));
310 assertEqualInt(6, archive_entry_size(ae));
311 assertEqualInt(archive_entry_is_encrypted(ae), 0);
312 assertEqualIntA(a, archive_read_has_encrypted_entries(a), ARCHIVE_READ_FORMAT_ENCRYPTION_UNSUPPORTED);
313
314 /* Verify regular second file. */
315 assertEqualIntA(a, ARCHIVE_OK, archive_read_next_header(a, &ae));
316 assertEqualString("\xef\xf0\xe8\xe2\xe5\xf2",
317 archive_entry_pathname(ae));
318 assertEqualInt(6, archive_entry_size(ae));
319 assertEqualInt(archive_entry_is_encrypted(ae), 0);
320 assertEqualIntA(a, archive_read_has_encrypted_entries(a), ARCHIVE_READ_FORMAT_ENCRYPTION_UNSUPPORTED);
321
322
323 /* End of archive. */
324 assertEqualIntA(a, ARCHIVE_EOF, archive_read_next_header(a, &ae));
325
326 /* Verify archive format. */
327 assertEqualIntA(a, ARCHIVE_FILTER_COMPRESS, archive_filter_code(a, 0));
328 assertEqualIntA(a, ARCHIVE_FORMAT_TAR_PAX_INTERCHANGE,
329 archive_format(a));
330
331 /* Close the archive. */
332 assertEqualInt(ARCHIVE_OK, archive_read_close(a));
333 next_test:
334 assertEqualInt(ARCHIVE_OK, archive_read_free(a));
335
336 /*
337 * Read filename in CP1251 without "hdrcharset=KOI8-R" option.
338 * The filename we can properly read is only second file.
339 */
340
341 assert((a = archive_read_new()) != NULL);
342 assertEqualIntA(a, ARCHIVE_OK, archive_read_support_filter_all(a));
343 assertEqualIntA(a, ARCHIVE_OK, archive_read_support_format_all(a));
344 assertEqualIntA(a, ARCHIVE_OK,
345 archive_read_open_filename(a, refname, 10240));
346
347 /*
348 * Verify regular first file.
349 * The filename is not translated to CP1251 because hdrcharset
350 * attribute is BINARY and there is not way to know its charset.
351 */
352 assertEqualIntA(a, ARCHIVE_OK, archive_read_next_header(a, &ae));
353 /* A filename is in KOI8-R. */
354 assertEqualString("\xf0\xf2\xe9\xf7\xe5\xf4",
355 archive_entry_pathname(ae));
356 assertEqualInt(6, archive_entry_size(ae));
357 assertEqualInt(archive_entry_is_encrypted(ae), 0);
358 assertEqualIntA(a, archive_read_has_encrypted_entries(a), ARCHIVE_READ_FORMAT_ENCRYPTION_UNSUPPORTED);
359
360 /*
361 * Verify regular second file.
362 */
363 assertEqualIntA(a, ARCHIVE_OK, archive_read_next_header(a, &ae));
364 assertEqualString("\xef\xf0\xe8\xe2\xe5\xf2",
365 archive_entry_pathname(ae));
366 assertEqualInt(6, archive_entry_size(ae));
367 assertEqualInt(archive_entry_is_encrypted(ae), 0);
368 assertEqualIntA(a, archive_read_has_encrypted_entries(a), ARCHIVE_READ_FORMAT_ENCRYPTION_UNSUPPORTED);
369
370
371 /* End of archive. */
372 assertEqualIntA(a, ARCHIVE_EOF, archive_read_next_header(a, &ae));
373
374 /* Verify archive format. */
375 assertEqualIntA(a, ARCHIVE_FILTER_COMPRESS, archive_filter_code(a, 0));
376 assertEqualIntA(a, ARCHIVE_FORMAT_TAR_PAX_INTERCHANGE,
377 archive_format(a));
378
379 /* Close the archive. */
380 assertEqualInt(ARCHIVE_OK, archive_read_close(a));
381 assertEqualInt(ARCHIVE_OK, archive_read_free(a));
382 }
383
384
DEFINE_TEST(test_read_format_tar_filename)385 DEFINE_TEST(test_read_format_tar_filename)
386 {
387 const char *refname = "test_read_format_tar_filename_koi8r.tar.Z";
388
389 extract_reference_file(refname);
390 test_read_format_tar_filename_KOI8R_CP866(refname);
391 test_read_format_tar_filename_KOI8R_UTF8(refname);
392 test_read_format_tar_filename_KOI8R_CP1251(refname);
393 }
394