1 /* $NetBSD: unxz.c,v 1.9 2024/05/04 13:17:03 christos Exp $ */ 2 3 /*- 4 * Copyright (c) 2011 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * This code is derived from software contributed to The NetBSD Foundation 8 * by Christos Zoulas. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 29 * POSSIBILITY OF SUCH DAMAGE. 30 */ 31 #include <sys/cdefs.h> 32 __RCSID("$NetBSD: unxz.c,v 1.9 2024/05/04 13:17:03 christos Exp $"); 33 34 #include <stdarg.h> 35 #include <errno.h> 36 #include <stdio.h> 37 #include <unistd.h> 38 #include <lzma.h> 39 40 static off_t 41 unxz(int i, int o, char *pre, size_t prelen, off_t *bytes_in) 42 { 43 lzma_stream strm = LZMA_STREAM_INIT; 44 static const int flags = LZMA_TELL_UNSUPPORTED_CHECK|LZMA_CONCATENATED; 45 lzma_ret ret; 46 lzma_action action = LZMA_RUN; 47 off_t bytes_out, bp; 48 uint8_t ibuf[BUFSIZ]; 49 uint8_t obuf[BUFSIZ]; 50 51 if (bytes_in == NULL) 52 bytes_in = &bp; 53 54 strm.next_in = ibuf; 55 memcpy(ibuf, pre, prelen); 56 strm.avail_in = read(i, ibuf + prelen, sizeof(ibuf) - prelen); 57 if (strm.avail_in == (size_t)-1) 58 maybe_err("read failed"); 59 infile_newdata(strm.avail_in); 60 strm.avail_in += prelen; 61 *bytes_in = strm.avail_in; 62 63 if ((ret = lzma_stream_decoder(&strm, UINT64_MAX, flags)) != LZMA_OK) 64 maybe_errx("Can't initialize decoder (%d)", ret); 65 66 strm.next_out = NULL; 67 strm.avail_out = 0; 68 if ((ret = lzma_code(&strm, LZMA_RUN)) != LZMA_OK) 69 maybe_errx("Can't read headers (%d)", ret); 70 71 bytes_out = 0; 72 strm.next_out = obuf; 73 strm.avail_out = sizeof(obuf); 74 75 for (;;) { 76 check_siginfo(); 77 if (strm.avail_in == 0) { 78 strm.next_in = ibuf; 79 strm.avail_in = read(i, ibuf, sizeof(ibuf)); 80 switch (strm.avail_in) { 81 case (size_t)-1: 82 maybe_err("read failed"); 83 /*NOTREACHED*/ 84 case 0: 85 action = LZMA_FINISH; 86 break; 87 default: 88 infile_newdata(strm.avail_in); 89 *bytes_in += strm.avail_in; 90 break; 91 } 92 } 93 94 ret = lzma_code(&strm, action); 95 96 // Write and check write error before checking decoder error. 97 // This way as much data as possible gets written to output 98 // even if decoder detected an error. 99 if (strm.avail_out == 0 || ret != LZMA_OK) { 100 const size_t write_size = sizeof(obuf) - strm.avail_out; 101 102 if (!tflag && 103 write(o, obuf, write_size) != (ssize_t)write_size) 104 maybe_err("write failed"); 105 106 strm.next_out = obuf; 107 strm.avail_out = sizeof(obuf); 108 bytes_out += write_size; 109 } 110 111 if (ret != LZMA_OK) { 112 if (ret == LZMA_STREAM_END) { 113 // Check that there's no trailing garbage. 114 if (strm.avail_in != 0 || read(i, ibuf, 1)) 115 ret = LZMA_DATA_ERROR; 116 else { 117 lzma_end(&strm); 118 return bytes_out; 119 } 120 } 121 122 const char *msg; 123 switch (ret) { 124 case LZMA_MEM_ERROR: 125 msg = strerror(ENOMEM); 126 break; 127 128 case LZMA_FORMAT_ERROR: 129 msg = "File format not recognized"; 130 break; 131 132 case LZMA_OPTIONS_ERROR: 133 // FIXME: Better message? 134 msg = "Unsupported compression options"; 135 break; 136 137 case LZMA_DATA_ERROR: 138 msg = "File is corrupt"; 139 break; 140 141 case LZMA_BUF_ERROR: 142 msg = "Unexpected end of input"; 143 break; 144 145 case LZMA_MEMLIMIT_ERROR: 146 msg = "Reached memory limit"; 147 break; 148 149 default: 150 maybe_errx("Unknown error (%d)", ret); 151 break; 152 } 153 maybe_errx("%s", msg); 154 155 } 156 } 157 } 158 159 #include <stdbool.h> 160 161 /* 162 * Copied various bits and pieces from xz support code or brute force 163 * replacements. 164 */ 165 166 #define my_min(A,B) ((A)<(B)?(A):(B)) 167 168 // Some systems have suboptimal BUFSIZ. Use a bit bigger value on them. 169 // We also need that IO_BUFFER_SIZE is a multiple of 8 (sizeof(uint64_t)) 170 #if BUFSIZ <= 1024 171 # define IO_BUFFER_SIZE 8192 172 #else 173 # define IO_BUFFER_SIZE (BUFSIZ & ~7U) 174 #endif 175 176 /// is_sparse() accesses the buffer as uint64_t for maximum speed. 177 /// Use an union to make sure that the buffer is properly aligned. 178 typedef union { 179 uint8_t u8[IO_BUFFER_SIZE]; 180 uint32_t u32[IO_BUFFER_SIZE / sizeof(uint32_t)]; 181 uint64_t u64[IO_BUFFER_SIZE / sizeof(uint64_t)]; 182 } io_buf; 183 184 185 static bool 186 io_pread(int fd, io_buf *buf, size_t size, off_t pos) 187 { 188 // Using lseek() and read() is more portable than pread() and 189 // for us it is as good as real pread(). 190 if (lseek(fd, pos, SEEK_SET) != pos) { 191 return true; 192 } 193 194 const size_t amount = read(fd, buf, size); 195 if (amount == SIZE_MAX) 196 return true; 197 198 if (amount != size) { 199 return true; 200 } 201 202 return false; 203 } 204 205 /* 206 * Most of the following is copied (mostly verbatim) from the xz 207 * distribution, from file src/xz/list.c 208 */ 209 210 /////////////////////////////////////////////////////////////////////////////// 211 // 212 /// \file list.c 213 /// \brief Listing information about .xz files 214 // 215 // Author: Lasse Collin 216 // 217 // This file has been put into the public domain. 218 // You can do whatever you want with this file. 219 // 220 /////////////////////////////////////////////////////////////////////////////// 221 222 223 /// Information about a .xz file 224 typedef struct { 225 /// Combined Index of all Streams in the file 226 lzma_index *idx; 227 228 /// Total amount of Stream Padding 229 uint64_t stream_padding; 230 231 /// Highest memory usage so far 232 uint64_t memusage_max; 233 234 /// True if all Blocks so far have Compressed Size and 235 /// Uncompressed Size fields 236 bool all_have_sizes; 237 238 /// Oldest XZ Utils version that will decompress the file 239 uint32_t min_version; 240 241 } xz_file_info; 242 243 #define XZ_FILE_INFO_INIT { NULL, 0, 0, true, 50000002 } 244 245 246 /// \brief Parse the Index(es) from the given .xz file 247 /// 248 /// \param xfi Pointer to structure where the decoded information 249 /// is stored. 250 /// \param pair Input file 251 /// 252 /// \return On success, false is returned. On error, true is returned. 253 /// 254 // TODO: This function is pretty big. liblzma should have a function that 255 // takes a callback function to parse the Index(es) from a .xz file to make 256 // it easy for applications. 257 static bool 258 parse_indexes(xz_file_info *xfi, int src_fd) 259 { 260 struct stat st; 261 262 fstat(src_fd, &st); 263 if (st.st_size <= 0) { 264 return true; 265 } 266 267 if (st.st_size < 2 * LZMA_STREAM_HEADER_SIZE) { 268 return true; 269 } 270 271 io_buf buf; 272 lzma_stream_flags header_flags; 273 lzma_stream_flags footer_flags; 274 lzma_ret ret; 275 276 // lzma_stream for the Index decoder 277 lzma_stream strm = LZMA_STREAM_INIT; 278 279 // All Indexes decoded so far 280 lzma_index *combined_index = NULL; 281 282 // The Index currently being decoded 283 lzma_index *this_index = NULL; 284 285 // Current position in the file. We parse the file backwards so 286 // initialize it to point to the end of the file. 287 off_t pos = st.st_size; 288 289 // Each loop iteration decodes one Index. 290 do { 291 // Check that there is enough data left to contain at least 292 // the Stream Header and Stream Footer. This check cannot 293 // fail in the first pass of this loop. 294 if (pos < 2 * LZMA_STREAM_HEADER_SIZE) { 295 goto error; 296 } 297 298 pos -= LZMA_STREAM_HEADER_SIZE; 299 lzma_vli stream_padding = 0; 300 301 // Locate the Stream Footer. There may be Stream Padding which 302 // we must skip when reading backwards. 303 while (true) { 304 if (pos < LZMA_STREAM_HEADER_SIZE) { 305 goto error; 306 } 307 308 if (io_pread(src_fd, &buf, 309 LZMA_STREAM_HEADER_SIZE, pos)) 310 goto error; 311 312 // Stream Padding is always a multiple of four bytes. 313 int i = 2; 314 if (buf.u32[i] != 0) 315 break; 316 317 // To avoid calling io_pread() for every four bytes 318 // of Stream Padding, take advantage that we read 319 // 12 bytes (LZMA_STREAM_HEADER_SIZE) already and 320 // check them too before calling io_pread() again. 321 do { 322 stream_padding += 4; 323 pos -= 4; 324 --i; 325 } while (i >= 0 && buf.u32[i] == 0); 326 } 327 328 // Decode the Stream Footer. 329 ret = lzma_stream_footer_decode(&footer_flags, buf.u8); 330 if (ret != LZMA_OK) { 331 goto error; 332 } 333 334 // Check that the Stream Footer doesn't specify something 335 // that we don't support. This can only happen if the xz 336 // version is older than liblzma and liblzma supports 337 // something new. 338 // 339 // It is enough to check Stream Footer. Stream Header must 340 // match when it is compared against Stream Footer with 341 // lzma_stream_flags_compare(). 342 if (footer_flags.version != 0) { 343 goto error; 344 } 345 346 // Check that the size of the Index field looks sane. 347 lzma_vli index_size = footer_flags.backward_size; 348 if ((lzma_vli)(pos) < index_size + LZMA_STREAM_HEADER_SIZE) { 349 goto error; 350 } 351 352 // Set pos to the beginning of the Index. 353 pos -= index_size; 354 355 // Decode the Index. 356 ret = lzma_index_decoder(&strm, &this_index, UINT64_MAX); 357 if (ret != LZMA_OK) { 358 goto error; 359 } 360 361 do { 362 // Don't give the decoder more input than the 363 // Index size. 364 strm.avail_in = my_min(IO_BUFFER_SIZE, index_size); 365 if (io_pread(src_fd, &buf, strm.avail_in, pos)) 366 goto error; 367 368 pos += strm.avail_in; 369 index_size -= strm.avail_in; 370 371 strm.next_in = buf.u8; 372 ret = lzma_code(&strm, LZMA_RUN); 373 374 } while (ret == LZMA_OK); 375 376 // If the decoding seems to be successful, check also that 377 // the Index decoder consumed as much input as indicated 378 // by the Backward Size field. 379 if (ret == LZMA_STREAM_END) 380 if (index_size != 0 || strm.avail_in != 0) 381 ret = LZMA_DATA_ERROR; 382 383 if (ret != LZMA_STREAM_END) { 384 // LZMA_BUFFER_ERROR means that the Index decoder 385 // would have liked more input than what the Index 386 // size should be according to Stream Footer. 387 // The message for LZMA_DATA_ERROR makes more 388 // sense in that case. 389 if (ret == LZMA_BUF_ERROR) 390 ret = LZMA_DATA_ERROR; 391 392 goto error; 393 } 394 395 // Decode the Stream Header and check that its Stream Flags 396 // match the Stream Footer. 397 pos -= footer_flags.backward_size + LZMA_STREAM_HEADER_SIZE; 398 if ((lzma_vli)(pos) < lzma_index_total_size(this_index)) { 399 goto error; 400 } 401 402 pos -= lzma_index_total_size(this_index); 403 if (io_pread(src_fd, &buf, LZMA_STREAM_HEADER_SIZE, pos)) 404 goto error; 405 406 ret = lzma_stream_header_decode(&header_flags, buf.u8); 407 if (ret != LZMA_OK) { 408 goto error; 409 } 410 411 ret = lzma_stream_flags_compare(&header_flags, &footer_flags); 412 if (ret != LZMA_OK) { 413 goto error; 414 } 415 416 // Store the decoded Stream Flags into this_index. This is 417 // needed so that we can print which Check is used in each 418 // Stream. 419 ret = lzma_index_stream_flags(this_index, &footer_flags); 420 if (ret != LZMA_OK) 421 goto error; 422 423 // Store also the size of the Stream Padding field. It is 424 // needed to show the offsets of the Streams correctly. 425 ret = lzma_index_stream_padding(this_index, stream_padding); 426 if (ret != LZMA_OK) 427 goto error; 428 429 if (combined_index != NULL) { 430 // Append the earlier decoded Indexes 431 // after this_index. 432 ret = lzma_index_cat( 433 this_index, combined_index, NULL); 434 if (ret != LZMA_OK) { 435 goto error; 436 } 437 } 438 439 combined_index = this_index; 440 this_index = NULL; 441 442 xfi->stream_padding += stream_padding; 443 444 } while (pos > 0); 445 446 lzma_end(&strm); 447 448 // All OK. Make combined_index available to the caller. 449 xfi->idx = combined_index; 450 return false; 451 452 error: 453 // Something went wrong, free the allocated memory. 454 lzma_end(&strm); 455 lzma_index_end(combined_index, NULL); 456 lzma_index_end(this_index, NULL); 457 return true; 458 } 459 460 /***************** end of copy form list.c *************************/ 461 462 /* 463 * Small wrapper to extract total length of a file 464 */ 465 off_t 466 unxz_len(int fd) 467 { 468 xz_file_info xfi = XZ_FILE_INFO_INIT; 469 if (!parse_indexes(&xfi, fd)) { 470 off_t res = lzma_index_uncompressed_size(xfi.idx); 471 lzma_index_end(xfi.idx, NULL); 472 return res; 473 } 474 return 0; 475 } 476 477