1 /*- 2 * Copyright (c) 2008 Joerg Sonnenberger 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR(S) ``AS IS'' AND ANY EXPRESS OR 15 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 16 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 17 * IN NO EVENT SHALL THE AUTHOR(S) BE LIABLE FOR ANY DIRECT, INDIRECT, 18 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 19 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 20 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 21 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 22 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 23 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 24 */ 25 26 /*- 27 * Copyright (c) 1985, 1986, 1992, 1993 28 * The Regents of the University of California. All rights reserved. 29 * 30 * This code is derived from software contributed to Berkeley by 31 * Diomidis Spinellis and James A. Woods, derived from original 32 * work by Spencer Thomas and Joseph Orost. 33 * 34 * Redistribution and use in source and binary forms, with or without 35 * modification, are permitted provided that the following conditions 36 * are met: 37 * 1. Redistributions of source code must retain the above copyright 38 * notice, this list of conditions and the following disclaimer. 39 * 2. Redistributions in binary form must reproduce the above copyright 40 * notice, this list of conditions and the following disclaimer in the 41 * documentation and/or other materials provided with the distribution. 42 * 3. Neither the name of the University nor the names of its contributors 43 * may be used to endorse or promote products derived from this software 44 * without specific prior written permission. 45 * 46 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 47 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 48 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 49 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 50 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 51 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 52 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 53 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 54 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 55 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 56 * SUCH DAMAGE. 57 */ 58 59 #include "archive_platform.h" 60 61 #ifdef HAVE_ERRNO_H 62 #include <errno.h> 63 #endif 64 #ifdef HAVE_STDLIB_H 65 #include <stdlib.h> 66 #endif 67 #ifdef HAVE_STRING_H 68 #include <string.h> 69 #endif 70 71 #include "archive.h" 72 #include "archive_private.h" 73 #include "archive_write_private.h" 74 75 #define HSIZE 69001 /* 95% occupancy */ 76 #define HSHIFT 8 /* 8 - trunc(log2(HSIZE / 65536)) */ 77 #define CHECK_GAP 10000 /* Ratio check interval. */ 78 79 #define MAXCODE(bits) ((1 << (bits)) - 1) 80 81 /* 82 * the next two codes should not be changed lightly, as they must not 83 * lie within the contiguous general code space. 84 */ 85 #define FIRST 257 /* First free entry. */ 86 #define CLEAR 256 /* Table clear output code. */ 87 88 struct private_data { 89 int64_t in_count, out_count, checkpoint; 90 91 int code_len; /* Number of bits/code. */ 92 int cur_maxcode; /* Maximum code, given n_bits. */ 93 int max_maxcode; /* Should NEVER generate this code. */ 94 int hashtab [HSIZE]; 95 unsigned short codetab [HSIZE]; 96 int first_free; /* First unused entry. */ 97 int compress_ratio; 98 99 int cur_code, cur_fcode; 100 101 int bit_offset; 102 unsigned char bit_buf; 103 104 unsigned char *compressed; 105 size_t compressed_buffer_size; 106 size_t compressed_offset; 107 }; 108 109 static int archive_compressor_compress_open(struct archive_write_filter *); 110 static int archive_compressor_compress_write(struct archive_write_filter *, 111 const void *, size_t); 112 static int archive_compressor_compress_close(struct archive_write_filter *); 113 static int archive_compressor_compress_free(struct archive_write_filter *); 114 115 #if ARCHIVE_VERSION_NUMBER < 4000000 116 int 117 archive_write_set_compression_compress(struct archive *a) 118 { 119 __archive_write_filters_free(a); 120 return (archive_write_add_filter_compress(a)); 121 } 122 #endif 123 124 /* 125 * Add a compress filter to this write handle. 126 */ 127 int 128 archive_write_add_filter_compress(struct archive *_a) 129 { 130 struct archive_write *a = (struct archive_write *)_a; 131 struct archive_write_filter *f = __archive_write_allocate_filter(_a); 132 133 archive_check_magic(&a->archive, ARCHIVE_WRITE_MAGIC, 134 ARCHIVE_STATE_NEW, "archive_write_add_filter_compress"); 135 f->open = &archive_compressor_compress_open; 136 f->code = ARCHIVE_FILTER_COMPRESS; 137 f->name = "compress"; 138 return (ARCHIVE_OK); 139 } 140 141 /* 142 * Setup callback. 143 */ 144 static int 145 archive_compressor_compress_open(struct archive_write_filter *f) 146 { 147 struct private_data *state; 148 size_t bs = 65536, bpb; 149 150 f->code = ARCHIVE_FILTER_COMPRESS; 151 f->name = "compress"; 152 153 state = calloc(1, sizeof(*state)); 154 if (state == NULL) { 155 archive_set_error(f->archive, ENOMEM, 156 "Can't allocate data for compression"); 157 return (ARCHIVE_FATAL); 158 } 159 160 if (f->archive->magic == ARCHIVE_WRITE_MAGIC) { 161 /* Buffer size should be a multiple number of the bytes 162 * per block for performance. */ 163 bpb = archive_write_get_bytes_per_block(f->archive); 164 if (bpb > bs) 165 bs = bpb; 166 else if (bpb != 0) 167 bs -= bs % bpb; 168 } 169 state->compressed_buffer_size = bs; 170 state->compressed = malloc(state->compressed_buffer_size); 171 172 if (state->compressed == NULL) { 173 archive_set_error(f->archive, ENOMEM, 174 "Can't allocate data for compression buffer"); 175 free(state); 176 return (ARCHIVE_FATAL); 177 } 178 179 f->write = archive_compressor_compress_write; 180 f->close = archive_compressor_compress_close; 181 f->free = archive_compressor_compress_free; 182 183 state->max_maxcode = 0x10000; /* Should NEVER generate this code. */ 184 state->in_count = 0; /* Length of input. */ 185 state->bit_buf = 0; 186 state->bit_offset = 0; 187 state->out_count = 3; /* Includes 3-byte header mojo. */ 188 state->compress_ratio = 0; 189 state->checkpoint = CHECK_GAP; 190 state->code_len = 9; 191 state->cur_maxcode = MAXCODE(state->code_len); 192 state->first_free = FIRST; 193 194 memset(state->hashtab, 0xff, sizeof(state->hashtab)); 195 196 /* Prime output buffer with a gzip header. */ 197 state->compressed[0] = 0x1f; /* Compress */ 198 state->compressed[1] = 0x9d; 199 state->compressed[2] = 0x90; /* Block mode, 16bit max */ 200 state->compressed_offset = 3; 201 202 f->data = state; 203 return (0); 204 } 205 206 /*- 207 * Output the given code. 208 * Inputs: 209 * code: A n_bits-bit integer. If == -1, then EOF. This assumes 210 * that n_bits <= (long)wordsize - 1. 211 * Outputs: 212 * Outputs code to the file. 213 * Assumptions: 214 * Chars are 8 bits long. 215 * Algorithm: 216 * Maintain a BITS character long buffer (so that 8 codes will 217 * fit in it exactly). Use the VAX insv instruction to insert each 218 * code in turn. When the buffer fills up empty it and start over. 219 */ 220 221 static const unsigned char rmask[9] = 222 {0x00, 0x01, 0x03, 0x07, 0x0f, 0x1f, 0x3f, 0x7f, 0xff}; 223 224 static int 225 output_byte(struct archive_write_filter *f, unsigned char c) 226 { 227 struct private_data *state = f->data; 228 229 state->compressed[state->compressed_offset++] = c; 230 ++state->out_count; 231 232 if (state->compressed_buffer_size == state->compressed_offset) { 233 int ret = __archive_write_filter(f->next_filter, 234 state->compressed, state->compressed_buffer_size); 235 if (ret != ARCHIVE_OK) 236 return ARCHIVE_FATAL; 237 state->compressed_offset = 0; 238 } 239 240 return ARCHIVE_OK; 241 } 242 243 static int 244 output_code(struct archive_write_filter *f, int ocode) 245 { 246 struct private_data *state = f->data; 247 int bits, ret, clear_flg, bit_offset; 248 249 clear_flg = ocode == CLEAR; 250 251 /* 252 * Since ocode is always >= 8 bits, only need to mask the first 253 * hunk on the left. 254 */ 255 bit_offset = state->bit_offset % 8; 256 state->bit_buf |= (ocode << bit_offset) & 0xff; 257 output_byte(f, state->bit_buf); 258 259 bits = state->code_len - (8 - bit_offset); 260 ocode >>= 8 - bit_offset; 261 /* Get any 8 bit parts in the middle (<=1 for up to 16 bits). */ 262 if (bits >= 8) { 263 output_byte(f, ocode & 0xff); 264 ocode >>= 8; 265 bits -= 8; 266 } 267 /* Last bits. */ 268 state->bit_offset += state->code_len; 269 state->bit_buf = ocode & rmask[bits]; 270 if (state->bit_offset == state->code_len * 8) 271 state->bit_offset = 0; 272 273 /* 274 * If the next entry is going to be too big for the ocode size, 275 * then increase it, if possible. 276 */ 277 if (clear_flg || state->first_free > state->cur_maxcode) { 278 /* 279 * Write the whole buffer, because the input side won't 280 * discover the size increase until after it has read it. 281 */ 282 if (state->bit_offset > 0) { 283 while (state->bit_offset < state->code_len * 8) { 284 ret = output_byte(f, state->bit_buf); 285 if (ret != ARCHIVE_OK) 286 return ret; 287 state->bit_offset += 8; 288 state->bit_buf = 0; 289 } 290 } 291 state->bit_buf = 0; 292 state->bit_offset = 0; 293 294 if (clear_flg) { 295 state->code_len = 9; 296 state->cur_maxcode = MAXCODE(state->code_len); 297 } else { 298 state->code_len++; 299 if (state->code_len == 16) 300 state->cur_maxcode = state->max_maxcode; 301 else 302 state->cur_maxcode = MAXCODE(state->code_len); 303 } 304 } 305 306 return (ARCHIVE_OK); 307 } 308 309 static int 310 output_flush(struct archive_write_filter *f) 311 { 312 struct private_data *state = f->data; 313 int ret; 314 315 /* At EOF, write the rest of the buffer. */ 316 if (state->bit_offset % 8) { 317 state->code_len = (state->bit_offset % 8 + 7) / 8; 318 ret = output_byte(f, state->bit_buf); 319 if (ret != ARCHIVE_OK) 320 return ret; 321 } 322 323 return (ARCHIVE_OK); 324 } 325 326 /* 327 * Write data to the compressed stream. 328 */ 329 static int 330 archive_compressor_compress_write(struct archive_write_filter *f, 331 const void *buff, size_t length) 332 { 333 struct private_data *state = (struct private_data *)f->data; 334 int i; 335 int ratio; 336 int c, disp, ret; 337 const unsigned char *bp; 338 339 if (length == 0) 340 return ARCHIVE_OK; 341 342 bp = buff; 343 344 if (state->in_count == 0) { 345 state->cur_code = *bp++; 346 ++state->in_count; 347 --length; 348 } 349 350 while (length--) { 351 c = *bp++; 352 state->in_count++; 353 state->cur_fcode = (c << 16) | state->cur_code; 354 i = ((c << HSHIFT) ^ state->cur_code); /* Xor hashing. */ 355 356 if (state->hashtab[i] == state->cur_fcode) { 357 state->cur_code = state->codetab[i]; 358 continue; 359 } 360 if (state->hashtab[i] < 0) /* Empty slot. */ 361 goto nomatch; 362 /* Secondary hash (after G. Knott). */ 363 if (i == 0) 364 disp = 1; 365 else 366 disp = HSIZE - i; 367 probe: 368 if ((i -= disp) < 0) 369 i += HSIZE; 370 371 if (state->hashtab[i] == state->cur_fcode) { 372 state->cur_code = state->codetab[i]; 373 continue; 374 } 375 if (state->hashtab[i] >= 0) 376 goto probe; 377 nomatch: 378 ret = output_code(f, state->cur_code); 379 if (ret != ARCHIVE_OK) 380 return ret; 381 state->cur_code = c; 382 if (state->first_free < state->max_maxcode) { 383 state->codetab[i] = state->first_free++; /* code -> hashtable */ 384 state->hashtab[i] = state->cur_fcode; 385 continue; 386 } 387 if (state->in_count < state->checkpoint) 388 continue; 389 390 state->checkpoint = state->in_count + CHECK_GAP; 391 392 if (state->in_count <= 0x007fffff && state->out_count != 0) 393 ratio = (int)(state->in_count * 256 / state->out_count); 394 else if ((ratio = (int)(state->out_count / 256)) == 0) 395 ratio = 0x7fffffff; 396 else 397 ratio = (int)(state->in_count / ratio); 398 399 if (ratio > state->compress_ratio) 400 state->compress_ratio = ratio; 401 else { 402 state->compress_ratio = 0; 403 memset(state->hashtab, 0xff, sizeof(state->hashtab)); 404 state->first_free = FIRST; 405 ret = output_code(f, CLEAR); 406 if (ret != ARCHIVE_OK) 407 return ret; 408 } 409 } 410 411 return (ARCHIVE_OK); 412 } 413 414 415 /* 416 * Finish the compression... 417 */ 418 static int 419 archive_compressor_compress_close(struct archive_write_filter *f) 420 { 421 struct private_data *state = (struct private_data *)f->data; 422 int ret; 423 424 ret = output_code(f, state->cur_code); 425 if (ret != ARCHIVE_OK) 426 return ret; 427 ret = output_flush(f); 428 if (ret != ARCHIVE_OK) 429 return ret; 430 431 /* Write the last block */ 432 ret = __archive_write_filter(f->next_filter, 433 state->compressed, state->compressed_offset); 434 return (ret); 435 } 436 437 static int 438 archive_compressor_compress_free(struct archive_write_filter *f) 439 { 440 struct private_data *state = (struct private_data *)f->data; 441 442 free(state->compressed); 443 free(state); 444 return (ARCHIVE_OK); 445 } 446