1 /*- 2 * Copyright (c) 2003-2007 Tim Kientzle 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR(S) ``AS IS'' AND ANY EXPRESS OR 15 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 16 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 17 * IN NO EVENT SHALL THE AUTHOR(S) BE LIABLE FOR ANY DIRECT, INDIRECT, 18 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 19 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 20 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 21 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 22 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 23 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 24 */ 25 26 #include "archive_platform.h" 27 28 #ifdef HAVE_SYS_STAT_H 29 #include <sys/stat.h> 30 #endif 31 #ifdef HAVE_ERRNO_H 32 #include <errno.h> 33 #endif 34 #include <stdio.h> 35 #ifdef HAVE_STDLIB_H 36 #include <stdlib.h> 37 #endif 38 #ifdef HAVE_STRING_H 39 #include <string.h> 40 #endif 41 42 #include "archive.h" 43 #include "archive_entry.h" 44 45 /* 46 * This is mostly a pretty straightforward hash table implementation. 47 * The only interesting bit is the different strategies used to 48 * match up links. These strategies match those used by various 49 * archiving formats: 50 * tar - content stored with first link, remainder refer back to it. 51 * This requires us to match each subsequent link up with the 52 * first appearance. 53 * cpio - Old cpio just stored body with each link, match-ups were 54 * implicit. This is trivial. 55 * new cpio - New cpio only stores body with last link, match-ups 56 * are implicit. This is actually quite tricky; see the notes 57 * below. 58 */ 59 60 /* Users pass us a format code, we translate that into a strategy here. */ 61 #define ARCHIVE_ENTRY_LINKIFY_LIKE_TAR 0 62 #define ARCHIVE_ENTRY_LINKIFY_LIKE_MTREE 1 63 #define ARCHIVE_ENTRY_LINKIFY_LIKE_OLD_CPIO 2 64 #define ARCHIVE_ENTRY_LINKIFY_LIKE_NEW_CPIO 3 65 66 /* Initial size of link cache. */ 67 #define links_cache_initial_size 1024 68 69 struct links_entry { 70 struct links_entry *next; 71 struct links_entry *previous; 72 struct archive_entry *canonical; 73 struct archive_entry *entry; 74 size_t hash; 75 unsigned int links; /* # links not yet seen */ 76 }; 77 78 struct archive_entry_linkresolver { 79 struct links_entry **buckets; 80 struct links_entry *spare; 81 unsigned long number_entries; 82 size_t number_buckets; 83 int strategy; 84 }; 85 86 #define NEXT_ENTRY_DEFERRED 1 87 #define NEXT_ENTRY_PARTIAL 2 88 #define NEXT_ENTRY_ALL (NEXT_ENTRY_DEFERRED | NEXT_ENTRY_PARTIAL) 89 90 static struct links_entry *find_entry(struct archive_entry_linkresolver *, 91 struct archive_entry *); 92 static void grow_hash(struct archive_entry_linkresolver *); 93 static struct links_entry *insert_entry(struct archive_entry_linkresolver *, 94 struct archive_entry *); 95 static struct links_entry *next_entry(struct archive_entry_linkresolver *, 96 int); 97 98 struct archive_entry_linkresolver * 99 archive_entry_linkresolver_new(void) 100 { 101 struct archive_entry_linkresolver *res; 102 103 /* Check for positive power-of-two */ 104 if (links_cache_initial_size == 0 || 105 (links_cache_initial_size & (links_cache_initial_size - 1)) != 0) 106 return (NULL); 107 108 res = calloc(1, sizeof(struct archive_entry_linkresolver)); 109 if (res == NULL) 110 return (NULL); 111 res->number_buckets = links_cache_initial_size; 112 res->buckets = calloc(res->number_buckets, sizeof(res->buckets[0])); 113 if (res->buckets == NULL) { 114 free(res); 115 return (NULL); 116 } 117 return (res); 118 } 119 120 void 121 archive_entry_linkresolver_set_strategy(struct archive_entry_linkresolver *res, 122 int fmt) 123 { 124 int fmtbase = fmt & ARCHIVE_FORMAT_BASE_MASK; 125 126 switch (fmtbase) { 127 case ARCHIVE_FORMAT_7ZIP: 128 case ARCHIVE_FORMAT_AR: 129 case ARCHIVE_FORMAT_ZIP: 130 res->strategy = ARCHIVE_ENTRY_LINKIFY_LIKE_OLD_CPIO; 131 break; 132 case ARCHIVE_FORMAT_CPIO: 133 switch (fmt) { 134 case ARCHIVE_FORMAT_CPIO_SVR4_NOCRC: 135 case ARCHIVE_FORMAT_CPIO_SVR4_CRC: 136 res->strategy = ARCHIVE_ENTRY_LINKIFY_LIKE_NEW_CPIO; 137 break; 138 default: 139 res->strategy = ARCHIVE_ENTRY_LINKIFY_LIKE_OLD_CPIO; 140 break; 141 } 142 break; 143 case ARCHIVE_FORMAT_MTREE: 144 res->strategy = ARCHIVE_ENTRY_LINKIFY_LIKE_MTREE; 145 break; 146 case ARCHIVE_FORMAT_ISO9660: 147 case ARCHIVE_FORMAT_SHAR: 148 case ARCHIVE_FORMAT_TAR: 149 case ARCHIVE_FORMAT_XAR: 150 res->strategy = ARCHIVE_ENTRY_LINKIFY_LIKE_TAR; 151 break; 152 default: 153 res->strategy = ARCHIVE_ENTRY_LINKIFY_LIKE_OLD_CPIO; 154 break; 155 } 156 } 157 158 void 159 archive_entry_linkresolver_free(struct archive_entry_linkresolver *res) 160 { 161 struct links_entry *le; 162 163 if (res == NULL) 164 return; 165 166 while ((le = next_entry(res, NEXT_ENTRY_ALL)) != NULL) 167 archive_entry_free(le->entry); 168 free(res->buckets); 169 free(res); 170 } 171 172 void 173 archive_entry_linkify(struct archive_entry_linkresolver *res, 174 struct archive_entry **e, struct archive_entry **f) 175 { 176 struct links_entry *le; 177 struct archive_entry *t; 178 179 *f = NULL; /* Default: Don't return a second entry. */ 180 181 if (*e == NULL) { 182 le = next_entry(res, NEXT_ENTRY_DEFERRED); 183 if (le != NULL) { 184 *e = le->entry; 185 le->entry = NULL; 186 } 187 return; 188 } 189 190 /* If it has only one link, then we're done. */ 191 if (archive_entry_nlink(*e) == 1) 192 return; 193 /* Directories, devices never have hardlinks. */ 194 if (archive_entry_filetype(*e) == AE_IFDIR 195 || archive_entry_filetype(*e) == AE_IFBLK 196 || archive_entry_filetype(*e) == AE_IFCHR) 197 return; 198 199 switch (res->strategy) { 200 case ARCHIVE_ENTRY_LINKIFY_LIKE_TAR: 201 le = find_entry(res, *e); 202 if (le != NULL) { 203 archive_entry_unset_size(*e); 204 #if defined(_WIN32) && !defined(__CYGWIN__) 205 archive_entry_copy_hardlink_w(*e, 206 archive_entry_pathname_w(le->canonical)); 207 #else 208 archive_entry_copy_hardlink(*e, 209 archive_entry_pathname(le->canonical)); 210 #endif 211 } else 212 insert_entry(res, *e); 213 return; 214 case ARCHIVE_ENTRY_LINKIFY_LIKE_MTREE: 215 le = find_entry(res, *e); 216 if (le != NULL) { 217 #if defined(_WIN32) && !defined(__CYGWIN__) 218 archive_entry_copy_hardlink_w(*e, 219 archive_entry_pathname_w(le->canonical)); 220 #else 221 archive_entry_copy_hardlink(*e, 222 archive_entry_pathname(le->canonical)); 223 #endif 224 } else 225 insert_entry(res, *e); 226 return; 227 case ARCHIVE_ENTRY_LINKIFY_LIKE_OLD_CPIO: 228 /* This one is trivial. */ 229 return; 230 case ARCHIVE_ENTRY_LINKIFY_LIKE_NEW_CPIO: 231 le = find_entry(res, *e); 232 if (le != NULL) { 233 /* 234 * Put the new entry in le, return the 235 * old entry from le. 236 */ 237 t = *e; 238 *e = le->entry; 239 le->entry = t; 240 /* Make the old entry into a hardlink. */ 241 archive_entry_unset_size(*e); 242 #if defined(_WIN32) && !defined(__CYGWIN__) 243 archive_entry_copy_hardlink_w(*e, 244 archive_entry_pathname_w(le->canonical)); 245 #else 246 archive_entry_copy_hardlink(*e, 247 archive_entry_pathname(le->canonical)); 248 #endif 249 /* If we ran out of links, return the 250 * final entry as well. */ 251 if (le->links == 0) { 252 *f = le->entry; 253 le->entry = NULL; 254 } 255 } else { 256 /* 257 * If we haven't seen it, tuck it away 258 * for future use. 259 */ 260 le = insert_entry(res, *e); 261 if (le == NULL) 262 /* XXX We should return an error code XXX */ 263 return; 264 le->entry = *e; 265 *e = NULL; 266 } 267 return; 268 default: 269 break; 270 } 271 return; 272 } 273 274 static struct links_entry * 275 find_entry(struct archive_entry_linkresolver *res, 276 struct archive_entry *entry) 277 { 278 struct links_entry *le; 279 size_t hash, bucket; 280 dev_t dev; 281 int64_t ino; 282 283 /* Free a held entry. */ 284 if (res->spare != NULL) { 285 archive_entry_free(res->spare->canonical); 286 archive_entry_free(res->spare->entry); 287 free(res->spare); 288 res->spare = NULL; 289 } 290 291 dev = archive_entry_dev(entry); 292 ino = archive_entry_ino64(entry); 293 hash = (size_t)(dev ^ ino); 294 295 /* Try to locate this entry in the links cache. */ 296 bucket = hash & (res->number_buckets - 1); 297 for (le = res->buckets[bucket]; le != NULL; le = le->next) { 298 if (le->hash == hash 299 && dev == archive_entry_dev(le->canonical) 300 && ino == archive_entry_ino64(le->canonical)) { 301 /* 302 * Decrement link count each time and release 303 * the entry if it hits zero. This saves 304 * memory and is necessary for detecting 305 * missed links. 306 */ 307 --le->links; 308 if (le->links > 0) 309 return (le); 310 /* Remove it from this hash bucket. */ 311 if (le->previous != NULL) 312 le->previous->next = le->next; 313 if (le->next != NULL) 314 le->next->previous = le->previous; 315 if (res->buckets[bucket] == le) 316 res->buckets[bucket] = le->next; 317 res->number_entries--; 318 /* Defer freeing this entry. */ 319 res->spare = le; 320 return (le); 321 } 322 } 323 return (NULL); 324 } 325 326 static struct links_entry * 327 next_entry(struct archive_entry_linkresolver *res, int mode) 328 { 329 struct links_entry *le; 330 size_t bucket; 331 332 /* Free a held entry. */ 333 if (res->spare != NULL) { 334 archive_entry_free(res->spare->canonical); 335 archive_entry_free(res->spare->entry); 336 free(res->spare); 337 res->spare = NULL; 338 } 339 340 /* Look for next non-empty bucket in the links cache. */ 341 for (bucket = 0; bucket < res->number_buckets; bucket++) { 342 for (le = res->buckets[bucket]; le != NULL; le = le->next) { 343 if (le->entry != NULL && 344 (mode & NEXT_ENTRY_DEFERRED) == 0) 345 continue; 346 if (le->entry == NULL && 347 (mode & NEXT_ENTRY_PARTIAL) == 0) 348 continue; 349 /* Remove it from this hash bucket. */ 350 if (le->next != NULL) 351 le->next->previous = le->previous; 352 if (le->previous != NULL) 353 le->previous->next = le->next; 354 else 355 res->buckets[bucket] = le->next; 356 res->number_entries--; 357 /* Defer freeing this entry. */ 358 res->spare = le; 359 return (le); 360 } 361 } 362 return (NULL); 363 } 364 365 static struct links_entry * 366 insert_entry(struct archive_entry_linkresolver *res, 367 struct archive_entry *entry) 368 { 369 struct links_entry *le; 370 size_t hash, bucket; 371 372 /* Add this entry to the links cache. */ 373 le = calloc(1, sizeof(struct links_entry)); 374 if (le == NULL) 375 return (NULL); 376 le->canonical = archive_entry_clone(entry); 377 378 /* If the links cache is getting too full, enlarge the hash table. */ 379 if (res->number_entries > res->number_buckets * 2) 380 grow_hash(res); 381 382 hash = (size_t)(archive_entry_dev(entry) ^ archive_entry_ino64(entry)); 383 bucket = hash & (res->number_buckets - 1); 384 385 /* If we could allocate the entry, record it. */ 386 if (res->buckets[bucket] != NULL) 387 res->buckets[bucket]->previous = le; 388 res->number_entries++; 389 le->next = res->buckets[bucket]; 390 le->previous = NULL; 391 res->buckets[bucket] = le; 392 le->hash = hash; 393 le->links = archive_entry_nlink(entry) - 1; 394 return (le); 395 } 396 397 static void 398 grow_hash(struct archive_entry_linkresolver *res) 399 { 400 struct links_entry *le, **new_buckets; 401 size_t new_size; 402 size_t i, bucket; 403 404 /* Try to enlarge the bucket list. */ 405 new_size = res->number_buckets * 2; 406 if (new_size < res->number_buckets) 407 return; 408 new_buckets = calloc(new_size, sizeof(struct links_entry *)); 409 410 if (new_buckets == NULL) 411 return; 412 413 for (i = 0; i < res->number_buckets; i++) { 414 while (res->buckets[i] != NULL) { 415 /* Remove entry from old bucket. */ 416 le = res->buckets[i]; 417 res->buckets[i] = le->next; 418 419 /* Add entry to new bucket. */ 420 bucket = le->hash & (new_size - 1); 421 422 if (new_buckets[bucket] != NULL) 423 new_buckets[bucket]->previous = le; 424 le->next = new_buckets[bucket]; 425 le->previous = NULL; 426 new_buckets[bucket] = le; 427 } 428 } 429 free(res->buckets); 430 res->buckets = new_buckets; 431 res->number_buckets = new_size; 432 } 433 434 struct archive_entry * 435 archive_entry_partial_links(struct archive_entry_linkresolver *res, 436 unsigned int *links) 437 { 438 struct archive_entry *e; 439 struct links_entry *le; 440 441 /* Free a held entry. */ 442 if (res->spare != NULL) { 443 archive_entry_free(res->spare->canonical); 444 archive_entry_free(res->spare->entry); 445 free(res->spare); 446 res->spare = NULL; 447 } 448 449 le = next_entry(res, NEXT_ENTRY_PARTIAL); 450 if (le != NULL) { 451 e = le->canonical; 452 if (links != NULL) 453 *links = le->links; 454 le->canonical = NULL; 455 } else { 456 e = NULL; 457 if (links != NULL) 458 *links = 0; 459 } 460 return (e); 461 } 462