1 /* $NetBSD: unxz.c,v 1.8 2018/10/06 16:36:45 martin Exp $ */ 2 3 /*- 4 * Copyright (c) 2011 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * This code is derived from software contributed to The NetBSD Foundation 8 * by Christos Zoulas. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 29 * POSSIBILITY OF SUCH DAMAGE. 30 */ 31 #include <sys/cdefs.h> 32 __RCSID("$NetBSD: unxz.c,v 1.8 2018/10/06 16:36:45 martin Exp $"); 33 34 #include <stdarg.h> 35 #include <errno.h> 36 #include <stdio.h> 37 #include <unistd.h> 38 #include <lzma.h> 39 40 static off_t 41 unxz(int i, int o, char *pre, size_t prelen, off_t *bytes_in) 42 { 43 lzma_stream strm = LZMA_STREAM_INIT; 44 static const int flags = LZMA_TELL_UNSUPPORTED_CHECK|LZMA_CONCATENATED; 45 lzma_ret ret; 46 lzma_action action = LZMA_RUN; 47 off_t bytes_out, bp; 48 uint8_t ibuf[BUFSIZ]; 49 uint8_t obuf[BUFSIZ]; 50 51 if (bytes_in == NULL) 52 bytes_in = &bp; 53 54 strm.next_in = ibuf; 55 memcpy(ibuf, pre, prelen); 56 strm.avail_in = read(i, ibuf + prelen, sizeof(ibuf) - prelen); 57 if (strm.avail_in == (size_t)-1) 58 maybe_err("read failed"); 59 infile_newdata(strm.avail_in); 60 strm.avail_in += prelen; 61 *bytes_in = strm.avail_in; 62 63 if ((ret = lzma_stream_decoder(&strm, UINT64_MAX, flags)) != LZMA_OK) 64 maybe_errx("Can't initialize decoder (%d)", ret); 65 66 strm.next_out = NULL; 67 strm.avail_out = 0; 68 if ((ret = lzma_code(&strm, LZMA_RUN)) != LZMA_OK) 69 maybe_errx("Can't read headers (%d)", ret); 70 71 bytes_out = 0; 72 strm.next_out = obuf; 73 strm.avail_out = sizeof(obuf); 74 75 for (;;) { 76 check_siginfo(); 77 if (strm.avail_in == 0) { 78 strm.next_in = ibuf; 79 strm.avail_in = read(i, ibuf, sizeof(ibuf)); 80 switch (strm.avail_in) { 81 case (size_t)-1: 82 maybe_err("read failed"); 83 /*NOTREACHED*/ 84 case 0: 85 action = LZMA_FINISH; 86 break; 87 default: 88 infile_newdata(strm.avail_in); 89 *bytes_in += strm.avail_in; 90 break; 91 } 92 } 93 94 ret = lzma_code(&strm, action); 95 96 // Write and check write error before checking decoder error. 97 // This way as much data as possible gets written to output 98 // even if decoder detected an error. 99 if (strm.avail_out == 0 || ret != LZMA_OK) { 100 const size_t write_size = sizeof(obuf) - strm.avail_out; 101 102 if (write(o, obuf, write_size) != (ssize_t)write_size) 103 maybe_err("write failed"); 104 105 strm.next_out = obuf; 106 strm.avail_out = sizeof(obuf); 107 bytes_out += write_size; 108 } 109 110 if (ret != LZMA_OK) { 111 if (ret == LZMA_STREAM_END) { 112 // Check that there's no trailing garbage. 113 if (strm.avail_in != 0 || read(i, ibuf, 1)) 114 ret = LZMA_DATA_ERROR; 115 else { 116 lzma_end(&strm); 117 return bytes_out; 118 } 119 } 120 121 const char *msg; 122 switch (ret) { 123 case LZMA_MEM_ERROR: 124 msg = strerror(ENOMEM); 125 break; 126 127 case LZMA_FORMAT_ERROR: 128 msg = "File format not recognized"; 129 break; 130 131 case LZMA_OPTIONS_ERROR: 132 // FIXME: Better message? 133 msg = "Unsupported compression options"; 134 break; 135 136 case LZMA_DATA_ERROR: 137 msg = "File is corrupt"; 138 break; 139 140 case LZMA_BUF_ERROR: 141 msg = "Unexpected end of input"; 142 break; 143 144 case LZMA_MEMLIMIT_ERROR: 145 msg = "Reached memory limit"; 146 break; 147 148 default: 149 maybe_errx("Unknown error (%d)", ret); 150 break; 151 } 152 maybe_errx("%s", msg); 153 154 } 155 } 156 } 157 158 #include <stdbool.h> 159 160 /* 161 * Copied various bits and pieces from xz support code or brute force 162 * replacements. 163 */ 164 165 #define my_min(A,B) ((A)<(B)?(A):(B)) 166 167 // Some systems have suboptimal BUFSIZ. Use a bit bigger value on them. 168 // We also need that IO_BUFFER_SIZE is a multiple of 8 (sizeof(uint64_t)) 169 #if BUFSIZ <= 1024 170 # define IO_BUFFER_SIZE 8192 171 #else 172 # define IO_BUFFER_SIZE (BUFSIZ & ~7U) 173 #endif 174 175 /// is_sparse() accesses the buffer as uint64_t for maximum speed. 176 /// Use an union to make sure that the buffer is properly aligned. 177 typedef union { 178 uint8_t u8[IO_BUFFER_SIZE]; 179 uint32_t u32[IO_BUFFER_SIZE / sizeof(uint32_t)]; 180 uint64_t u64[IO_BUFFER_SIZE / sizeof(uint64_t)]; 181 } io_buf; 182 183 184 static bool 185 io_pread(int fd, io_buf *buf, size_t size, off_t pos) 186 { 187 // Using lseek() and read() is more portable than pread() and 188 // for us it is as good as real pread(). 189 if (lseek(fd, pos, SEEK_SET) != pos) { 190 return true; 191 } 192 193 const size_t amount = read(fd, buf, size); 194 if (amount == SIZE_MAX) 195 return true; 196 197 if (amount != size) { 198 return true; 199 } 200 201 return false; 202 } 203 204 /* 205 * Most of the following is copied (mostly verbatim) from the xz 206 * distribution, from file src/xz/list.c 207 */ 208 209 /////////////////////////////////////////////////////////////////////////////// 210 // 211 /// \file list.c 212 /// \brief Listing information about .xz files 213 // 214 // Author: Lasse Collin 215 // 216 // This file has been put into the public domain. 217 // You can do whatever you want with this file. 218 // 219 /////////////////////////////////////////////////////////////////////////////// 220 221 222 /// Information about a .xz file 223 typedef struct { 224 /// Combined Index of all Streams in the file 225 lzma_index *idx; 226 227 /// Total amount of Stream Padding 228 uint64_t stream_padding; 229 230 /// Highest memory usage so far 231 uint64_t memusage_max; 232 233 /// True if all Blocks so far have Compressed Size and 234 /// Uncompressed Size fields 235 bool all_have_sizes; 236 237 /// Oldest XZ Utils version that will decompress the file 238 uint32_t min_version; 239 240 } xz_file_info; 241 242 #define XZ_FILE_INFO_INIT { NULL, 0, 0, true, 50000002 } 243 244 245 /// \brief Parse the Index(es) from the given .xz file 246 /// 247 /// \param xfi Pointer to structure where the decoded information 248 /// is stored. 249 /// \param pair Input file 250 /// 251 /// \return On success, false is returned. On error, true is returned. 252 /// 253 // TODO: This function is pretty big. liblzma should have a function that 254 // takes a callback function to parse the Index(es) from a .xz file to make 255 // it easy for applications. 256 static bool 257 parse_indexes(xz_file_info *xfi, int src_fd) 258 { 259 struct stat st; 260 261 fstat(src_fd, &st); 262 if (st.st_size <= 0) { 263 return true; 264 } 265 266 if (st.st_size < 2 * LZMA_STREAM_HEADER_SIZE) { 267 return true; 268 } 269 270 io_buf buf; 271 lzma_stream_flags header_flags; 272 lzma_stream_flags footer_flags; 273 lzma_ret ret; 274 275 // lzma_stream for the Index decoder 276 lzma_stream strm = LZMA_STREAM_INIT; 277 278 // All Indexes decoded so far 279 lzma_index *combined_index = NULL; 280 281 // The Index currently being decoded 282 lzma_index *this_index = NULL; 283 284 // Current position in the file. We parse the file backwards so 285 // initialize it to point to the end of the file. 286 off_t pos = st.st_size; 287 288 // Each loop iteration decodes one Index. 289 do { 290 // Check that there is enough data left to contain at least 291 // the Stream Header and Stream Footer. This check cannot 292 // fail in the first pass of this loop. 293 if (pos < 2 * LZMA_STREAM_HEADER_SIZE) { 294 goto error; 295 } 296 297 pos -= LZMA_STREAM_HEADER_SIZE; 298 lzma_vli stream_padding = 0; 299 300 // Locate the Stream Footer. There may be Stream Padding which 301 // we must skip when reading backwards. 302 while (true) { 303 if (pos < LZMA_STREAM_HEADER_SIZE) { 304 goto error; 305 } 306 307 if (io_pread(src_fd, &buf, 308 LZMA_STREAM_HEADER_SIZE, pos)) 309 goto error; 310 311 // Stream Padding is always a multiple of four bytes. 312 int i = 2; 313 if (buf.u32[i] != 0) 314 break; 315 316 // To avoid calling io_pread() for every four bytes 317 // of Stream Padding, take advantage that we read 318 // 12 bytes (LZMA_STREAM_HEADER_SIZE) already and 319 // check them too before calling io_pread() again. 320 do { 321 stream_padding += 4; 322 pos -= 4; 323 --i; 324 } while (i >= 0 && buf.u32[i] == 0); 325 } 326 327 // Decode the Stream Footer. 328 ret = lzma_stream_footer_decode(&footer_flags, buf.u8); 329 if (ret != LZMA_OK) { 330 goto error; 331 } 332 333 // Check that the Stream Footer doesn't specify something 334 // that we don't support. This can only happen if the xz 335 // version is older than liblzma and liblzma supports 336 // something new. 337 // 338 // It is enough to check Stream Footer. Stream Header must 339 // match when it is compared against Stream Footer with 340 // lzma_stream_flags_compare(). 341 if (footer_flags.version != 0) { 342 goto error; 343 } 344 345 // Check that the size of the Index field looks sane. 346 lzma_vli index_size = footer_flags.backward_size; 347 if ((lzma_vli)(pos) < index_size + LZMA_STREAM_HEADER_SIZE) { 348 goto error; 349 } 350 351 // Set pos to the beginning of the Index. 352 pos -= index_size; 353 354 // Decode the Index. 355 ret = lzma_index_decoder(&strm, &this_index, UINT64_MAX); 356 if (ret != LZMA_OK) { 357 goto error; 358 } 359 360 do { 361 // Don't give the decoder more input than the 362 // Index size. 363 strm.avail_in = my_min(IO_BUFFER_SIZE, index_size); 364 if (io_pread(src_fd, &buf, strm.avail_in, pos)) 365 goto error; 366 367 pos += strm.avail_in; 368 index_size -= strm.avail_in; 369 370 strm.next_in = buf.u8; 371 ret = lzma_code(&strm, LZMA_RUN); 372 373 } while (ret == LZMA_OK); 374 375 // If the decoding seems to be successful, check also that 376 // the Index decoder consumed as much input as indicated 377 // by the Backward Size field. 378 if (ret == LZMA_STREAM_END) 379 if (index_size != 0 || strm.avail_in != 0) 380 ret = LZMA_DATA_ERROR; 381 382 if (ret != LZMA_STREAM_END) { 383 // LZMA_BUFFER_ERROR means that the Index decoder 384 // would have liked more input than what the Index 385 // size should be according to Stream Footer. 386 // The message for LZMA_DATA_ERROR makes more 387 // sense in that case. 388 if (ret == LZMA_BUF_ERROR) 389 ret = LZMA_DATA_ERROR; 390 391 goto error; 392 } 393 394 // Decode the Stream Header and check that its Stream Flags 395 // match the Stream Footer. 396 pos -= footer_flags.backward_size + LZMA_STREAM_HEADER_SIZE; 397 if ((lzma_vli)(pos) < lzma_index_total_size(this_index)) { 398 goto error; 399 } 400 401 pos -= lzma_index_total_size(this_index); 402 if (io_pread(src_fd, &buf, LZMA_STREAM_HEADER_SIZE, pos)) 403 goto error; 404 405 ret = lzma_stream_header_decode(&header_flags, buf.u8); 406 if (ret != LZMA_OK) { 407 goto error; 408 } 409 410 ret = lzma_stream_flags_compare(&header_flags, &footer_flags); 411 if (ret != LZMA_OK) { 412 goto error; 413 } 414 415 // Store the decoded Stream Flags into this_index. This is 416 // needed so that we can print which Check is used in each 417 // Stream. 418 ret = lzma_index_stream_flags(this_index, &footer_flags); 419 if (ret != LZMA_OK) 420 goto error; 421 422 // Store also the size of the Stream Padding field. It is 423 // needed to show the offsets of the Streams correctly. 424 ret = lzma_index_stream_padding(this_index, stream_padding); 425 if (ret != LZMA_OK) 426 goto error; 427 428 if (combined_index != NULL) { 429 // Append the earlier decoded Indexes 430 // after this_index. 431 ret = lzma_index_cat( 432 this_index, combined_index, NULL); 433 if (ret != LZMA_OK) { 434 goto error; 435 } 436 } 437 438 combined_index = this_index; 439 this_index = NULL; 440 441 xfi->stream_padding += stream_padding; 442 443 } while (pos > 0); 444 445 lzma_end(&strm); 446 447 // All OK. Make combined_index available to the caller. 448 xfi->idx = combined_index; 449 return false; 450 451 error: 452 // Something went wrong, free the allocated memory. 453 lzma_end(&strm); 454 lzma_index_end(combined_index, NULL); 455 lzma_index_end(this_index, NULL); 456 return true; 457 } 458 459 /***************** end of copy form list.c *************************/ 460 461 /* 462 * Small wrapper to extract total length of a file 463 */ 464 off_t 465 unxz_len(int fd) 466 { 467 xz_file_info xfi = XZ_FILE_INFO_INIT; 468 if (!parse_indexes(&xfi, fd)) { 469 off_t res = lzma_index_uncompressed_size(xfi.idx); 470 lzma_index_end(xfi.idx, NULL); 471 return res; 472 } 473 return 0; 474 } 475 476