1 /* $NetBSD: unxz.c,v 1.9 2024/05/04 13:17:03 christos Exp $ */
2
3 /*-
4 * Copyright (c) 2011 The NetBSD Foundation, Inc.
5 * All rights reserved.
6 *
7 * This code is derived from software contributed to The NetBSD Foundation
8 * by Christos Zoulas.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29 * POSSIBILITY OF SUCH DAMAGE.
30 */
31 #include <sys/cdefs.h>
32 __RCSID("$NetBSD: unxz.c,v 1.9 2024/05/04 13:17:03 christos Exp $");
33
34 #include <stdarg.h>
35 #include <errno.h>
36 #include <stdio.h>
37 #include <unistd.h>
38 #include <lzma.h>
39
40 static off_t
unxz(int i,int o,char * pre,size_t prelen,off_t * bytes_in)41 unxz(int i, int o, char *pre, size_t prelen, off_t *bytes_in)
42 {
43 lzma_stream strm = LZMA_STREAM_INIT;
44 static const int flags = LZMA_TELL_UNSUPPORTED_CHECK|LZMA_CONCATENATED;
45 lzma_ret ret;
46 lzma_action action = LZMA_RUN;
47 off_t bytes_out, bp;
48 uint8_t ibuf[BUFSIZ];
49 uint8_t obuf[BUFSIZ];
50
51 if (bytes_in == NULL)
52 bytes_in = &bp;
53
54 strm.next_in = ibuf;
55 memcpy(ibuf, pre, prelen);
56 strm.avail_in = read(i, ibuf + prelen, sizeof(ibuf) - prelen);
57 if (strm.avail_in == (size_t)-1)
58 maybe_err("read failed");
59 infile_newdata(strm.avail_in);
60 strm.avail_in += prelen;
61 *bytes_in = strm.avail_in;
62
63 if ((ret = lzma_stream_decoder(&strm, UINT64_MAX, flags)) != LZMA_OK)
64 maybe_errx("Can't initialize decoder (%d)", ret);
65
66 strm.next_out = NULL;
67 strm.avail_out = 0;
68 if ((ret = lzma_code(&strm, LZMA_RUN)) != LZMA_OK)
69 maybe_errx("Can't read headers (%d)", ret);
70
71 bytes_out = 0;
72 strm.next_out = obuf;
73 strm.avail_out = sizeof(obuf);
74
75 for (;;) {
76 check_siginfo();
77 if (strm.avail_in == 0) {
78 strm.next_in = ibuf;
79 strm.avail_in = read(i, ibuf, sizeof(ibuf));
80 switch (strm.avail_in) {
81 case (size_t)-1:
82 maybe_err("read failed");
83 /*NOTREACHED*/
84 case 0:
85 action = LZMA_FINISH;
86 break;
87 default:
88 infile_newdata(strm.avail_in);
89 *bytes_in += strm.avail_in;
90 break;
91 }
92 }
93
94 ret = lzma_code(&strm, action);
95
96 // Write and check write error before checking decoder error.
97 // This way as much data as possible gets written to output
98 // even if decoder detected an error.
99 if (strm.avail_out == 0 || ret != LZMA_OK) {
100 const size_t write_size = sizeof(obuf) - strm.avail_out;
101
102 if (!tflag &&
103 write(o, obuf, write_size) != (ssize_t)write_size)
104 maybe_err("write failed");
105
106 strm.next_out = obuf;
107 strm.avail_out = sizeof(obuf);
108 bytes_out += write_size;
109 }
110
111 if (ret != LZMA_OK) {
112 if (ret == LZMA_STREAM_END) {
113 // Check that there's no trailing garbage.
114 if (strm.avail_in != 0 || read(i, ibuf, 1))
115 ret = LZMA_DATA_ERROR;
116 else {
117 lzma_end(&strm);
118 return bytes_out;
119 }
120 }
121
122 const char *msg;
123 switch (ret) {
124 case LZMA_MEM_ERROR:
125 msg = strerror(ENOMEM);
126 break;
127
128 case LZMA_FORMAT_ERROR:
129 msg = "File format not recognized";
130 break;
131
132 case LZMA_OPTIONS_ERROR:
133 // FIXME: Better message?
134 msg = "Unsupported compression options";
135 break;
136
137 case LZMA_DATA_ERROR:
138 msg = "File is corrupt";
139 break;
140
141 case LZMA_BUF_ERROR:
142 msg = "Unexpected end of input";
143 break;
144
145 case LZMA_MEMLIMIT_ERROR:
146 msg = "Reached memory limit";
147 break;
148
149 default:
150 maybe_errx("Unknown error (%d)", ret);
151 break;
152 }
153 maybe_errx("%s", msg);
154
155 }
156 }
157 }
158
159 #include <stdbool.h>
160
161 /*
162 * Copied various bits and pieces from xz support code or brute force
163 * replacements.
164 */
165
166 #define my_min(A,B) ((A)<(B)?(A):(B))
167
168 // Some systems have suboptimal BUFSIZ. Use a bit bigger value on them.
169 // We also need that IO_BUFFER_SIZE is a multiple of 8 (sizeof(uint64_t))
170 #if BUFSIZ <= 1024
171 # define IO_BUFFER_SIZE 8192
172 #else
173 # define IO_BUFFER_SIZE (BUFSIZ & ~7U)
174 #endif
175
176 /// is_sparse() accesses the buffer as uint64_t for maximum speed.
177 /// Use an union to make sure that the buffer is properly aligned.
178 typedef union {
179 uint8_t u8[IO_BUFFER_SIZE];
180 uint32_t u32[IO_BUFFER_SIZE / sizeof(uint32_t)];
181 uint64_t u64[IO_BUFFER_SIZE / sizeof(uint64_t)];
182 } io_buf;
183
184
185 static bool
io_pread(int fd,io_buf * buf,size_t size,off_t pos)186 io_pread(int fd, io_buf *buf, size_t size, off_t pos)
187 {
188 // Using lseek() and read() is more portable than pread() and
189 // for us it is as good as real pread().
190 if (lseek(fd, pos, SEEK_SET) != pos) {
191 return true;
192 }
193
194 const size_t amount = read(fd, buf, size);
195 if (amount == SIZE_MAX)
196 return true;
197
198 if (amount != size) {
199 return true;
200 }
201
202 return false;
203 }
204
205 /*
206 * Most of the following is copied (mostly verbatim) from the xz
207 * distribution, from file src/xz/list.c
208 */
209
210 ///////////////////////////////////////////////////////////////////////////////
211 //
212 /// \file list.c
213 /// \brief Listing information about .xz files
214 //
215 // Author: Lasse Collin
216 //
217 // This file has been put into the public domain.
218 // You can do whatever you want with this file.
219 //
220 ///////////////////////////////////////////////////////////////////////////////
221
222
223 /// Information about a .xz file
224 typedef struct {
225 /// Combined Index of all Streams in the file
226 lzma_index *idx;
227
228 /// Total amount of Stream Padding
229 uint64_t stream_padding;
230
231 /// Highest memory usage so far
232 uint64_t memusage_max;
233
234 /// True if all Blocks so far have Compressed Size and
235 /// Uncompressed Size fields
236 bool all_have_sizes;
237
238 /// Oldest XZ Utils version that will decompress the file
239 uint32_t min_version;
240
241 } xz_file_info;
242
243 #define XZ_FILE_INFO_INIT { NULL, 0, 0, true, 50000002 }
244
245
246 /// \brief Parse the Index(es) from the given .xz file
247 ///
248 /// \param xfi Pointer to structure where the decoded information
249 /// is stored.
250 /// \param pair Input file
251 ///
252 /// \return On success, false is returned. On error, true is returned.
253 ///
254 // TODO: This function is pretty big. liblzma should have a function that
255 // takes a callback function to parse the Index(es) from a .xz file to make
256 // it easy for applications.
257 static bool
parse_indexes(xz_file_info * xfi,int src_fd)258 parse_indexes(xz_file_info *xfi, int src_fd)
259 {
260 struct stat st;
261
262 fstat(src_fd, &st);
263 if (st.st_size <= 0) {
264 return true;
265 }
266
267 if (st.st_size < 2 * LZMA_STREAM_HEADER_SIZE) {
268 return true;
269 }
270
271 io_buf buf;
272 lzma_stream_flags header_flags;
273 lzma_stream_flags footer_flags;
274 lzma_ret ret;
275
276 // lzma_stream for the Index decoder
277 lzma_stream strm = LZMA_STREAM_INIT;
278
279 // All Indexes decoded so far
280 lzma_index *combined_index = NULL;
281
282 // The Index currently being decoded
283 lzma_index *this_index = NULL;
284
285 // Current position in the file. We parse the file backwards so
286 // initialize it to point to the end of the file.
287 off_t pos = st.st_size;
288
289 // Each loop iteration decodes one Index.
290 do {
291 // Check that there is enough data left to contain at least
292 // the Stream Header and Stream Footer. This check cannot
293 // fail in the first pass of this loop.
294 if (pos < 2 * LZMA_STREAM_HEADER_SIZE) {
295 goto error;
296 }
297
298 pos -= LZMA_STREAM_HEADER_SIZE;
299 lzma_vli stream_padding = 0;
300
301 // Locate the Stream Footer. There may be Stream Padding which
302 // we must skip when reading backwards.
303 while (true) {
304 if (pos < LZMA_STREAM_HEADER_SIZE) {
305 goto error;
306 }
307
308 if (io_pread(src_fd, &buf,
309 LZMA_STREAM_HEADER_SIZE, pos))
310 goto error;
311
312 // Stream Padding is always a multiple of four bytes.
313 int i = 2;
314 if (buf.u32[i] != 0)
315 break;
316
317 // To avoid calling io_pread() for every four bytes
318 // of Stream Padding, take advantage that we read
319 // 12 bytes (LZMA_STREAM_HEADER_SIZE) already and
320 // check them too before calling io_pread() again.
321 do {
322 stream_padding += 4;
323 pos -= 4;
324 --i;
325 } while (i >= 0 && buf.u32[i] == 0);
326 }
327
328 // Decode the Stream Footer.
329 ret = lzma_stream_footer_decode(&footer_flags, buf.u8);
330 if (ret != LZMA_OK) {
331 goto error;
332 }
333
334 // Check that the Stream Footer doesn't specify something
335 // that we don't support. This can only happen if the xz
336 // version is older than liblzma and liblzma supports
337 // something new.
338 //
339 // It is enough to check Stream Footer. Stream Header must
340 // match when it is compared against Stream Footer with
341 // lzma_stream_flags_compare().
342 if (footer_flags.version != 0) {
343 goto error;
344 }
345
346 // Check that the size of the Index field looks sane.
347 lzma_vli index_size = footer_flags.backward_size;
348 if ((lzma_vli)(pos) < index_size + LZMA_STREAM_HEADER_SIZE) {
349 goto error;
350 }
351
352 // Set pos to the beginning of the Index.
353 pos -= index_size;
354
355 // Decode the Index.
356 ret = lzma_index_decoder(&strm, &this_index, UINT64_MAX);
357 if (ret != LZMA_OK) {
358 goto error;
359 }
360
361 do {
362 // Don't give the decoder more input than the
363 // Index size.
364 strm.avail_in = my_min(IO_BUFFER_SIZE, index_size);
365 if (io_pread(src_fd, &buf, strm.avail_in, pos))
366 goto error;
367
368 pos += strm.avail_in;
369 index_size -= strm.avail_in;
370
371 strm.next_in = buf.u8;
372 ret = lzma_code(&strm, LZMA_RUN);
373
374 } while (ret == LZMA_OK);
375
376 // If the decoding seems to be successful, check also that
377 // the Index decoder consumed as much input as indicated
378 // by the Backward Size field.
379 if (ret == LZMA_STREAM_END)
380 if (index_size != 0 || strm.avail_in != 0)
381 ret = LZMA_DATA_ERROR;
382
383 if (ret != LZMA_STREAM_END) {
384 // LZMA_BUFFER_ERROR means that the Index decoder
385 // would have liked more input than what the Index
386 // size should be according to Stream Footer.
387 // The message for LZMA_DATA_ERROR makes more
388 // sense in that case.
389 if (ret == LZMA_BUF_ERROR)
390 ret = LZMA_DATA_ERROR;
391
392 goto error;
393 }
394
395 // Decode the Stream Header and check that its Stream Flags
396 // match the Stream Footer.
397 pos -= footer_flags.backward_size + LZMA_STREAM_HEADER_SIZE;
398 if ((lzma_vli)(pos) < lzma_index_total_size(this_index)) {
399 goto error;
400 }
401
402 pos -= lzma_index_total_size(this_index);
403 if (io_pread(src_fd, &buf, LZMA_STREAM_HEADER_SIZE, pos))
404 goto error;
405
406 ret = lzma_stream_header_decode(&header_flags, buf.u8);
407 if (ret != LZMA_OK) {
408 goto error;
409 }
410
411 ret = lzma_stream_flags_compare(&header_flags, &footer_flags);
412 if (ret != LZMA_OK) {
413 goto error;
414 }
415
416 // Store the decoded Stream Flags into this_index. This is
417 // needed so that we can print which Check is used in each
418 // Stream.
419 ret = lzma_index_stream_flags(this_index, &footer_flags);
420 if (ret != LZMA_OK)
421 goto error;
422
423 // Store also the size of the Stream Padding field. It is
424 // needed to show the offsets of the Streams correctly.
425 ret = lzma_index_stream_padding(this_index, stream_padding);
426 if (ret != LZMA_OK)
427 goto error;
428
429 if (combined_index != NULL) {
430 // Append the earlier decoded Indexes
431 // after this_index.
432 ret = lzma_index_cat(
433 this_index, combined_index, NULL);
434 if (ret != LZMA_OK) {
435 goto error;
436 }
437 }
438
439 combined_index = this_index;
440 this_index = NULL;
441
442 xfi->stream_padding += stream_padding;
443
444 } while (pos > 0);
445
446 lzma_end(&strm);
447
448 // All OK. Make combined_index available to the caller.
449 xfi->idx = combined_index;
450 return false;
451
452 error:
453 // Something went wrong, free the allocated memory.
454 lzma_end(&strm);
455 lzma_index_end(combined_index, NULL);
456 lzma_index_end(this_index, NULL);
457 return true;
458 }
459
460 /***************** end of copy form list.c *************************/
461
462 /*
463 * Small wrapper to extract total length of a file
464 */
465 off_t
unxz_len(int fd)466 unxz_len(int fd)
467 {
468 xz_file_info xfi = XZ_FILE_INFO_INIT;
469 if (!parse_indexes(&xfi, fd)) {
470 off_t res = lzma_index_uncompressed_size(xfi.idx);
471 lzma_index_end(xfi.idx, NULL);
472 return res;
473 }
474 return 0;
475 }
476
477