1 /* $NetBSD: pat_rep.c,v 1.18 2003/02/02 10:21:14 wiz Exp $ */ 2 3 /*- 4 * Copyright (c) 1992 Keith Muller. 5 * Copyright (c) 1992, 1993 6 * The Regents of the University of California. All rights reserved. 7 * 8 * This code is derived from software contributed to Berkeley by 9 * Keith Muller of the University of California, San Diego. 10 * 11 * Redistribution and use in source and binary forms, with or without 12 * modification, are permitted provided that the following conditions 13 * are met: 14 * 1. Redistributions of source code must retain the above copyright 15 * notice, this list of conditions and the following disclaimer. 16 * 2. Redistributions in binary form must reproduce the above copyright 17 * notice, this list of conditions and the following disclaimer in the 18 * documentation and/or other materials provided with the distribution. 19 * 3. All advertising materials mentioning features or use of this software 20 * must display the following acknowledgement: 21 * This product includes software developed by the University of 22 * California, Berkeley and its contributors. 23 * 4. Neither the name of the University nor the names of its contributors 24 * may be used to endorse or promote products derived from this software 25 * without specific prior written permission. 26 * 27 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 28 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 29 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 30 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 31 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 32 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 33 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 34 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 35 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 36 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 37 * SUCH DAMAGE. 38 */ 39 40 #include <sys/cdefs.h> 41 #if defined(__RCSID) && !defined(lint) 42 #if 0 43 static char sccsid[] = "@(#)pat_rep.c 8.2 (Berkeley) 4/18/94"; 44 #else 45 __RCSID("$NetBSD: pat_rep.c,v 1.18 2003/02/02 10:21:14 wiz Exp $"); 46 #endif 47 #endif /* not lint */ 48 49 #include <sys/types.h> 50 #include <sys/time.h> 51 #include <sys/stat.h> 52 #include <sys/param.h> 53 #include <stdio.h> 54 #include <ctype.h> 55 #include <string.h> 56 #include <unistd.h> 57 #include <stdlib.h> 58 #ifdef NET2_REGEX 59 #include <regexp.h> 60 #else 61 #include <regex.h> 62 #endif 63 #include "pax.h" 64 #include "pat_rep.h" 65 #include "extern.h" 66 67 /* 68 * routines to handle pattern matching, name modification (regular expression 69 * substitution and interactive renames), and destination name modification for 70 * copy (-rw). Both file name and link names are adjusted as required in these 71 * routines. 72 */ 73 74 #define MAXSUBEXP 10 /* max subexpressions, DO NOT CHANGE */ 75 static PATTERN *pathead = NULL; /* file pattern match list head */ 76 static PATTERN *pattail = NULL; /* file pattern match list tail */ 77 static REPLACE *rephead = NULL; /* replacement string list head */ 78 static REPLACE *reptail = NULL; /* replacement string list tail */ 79 80 static int rep_name(char *, size_t, int *, int); 81 static int tty_rename(ARCHD *); 82 static int fix_path(char *, int *, char *, int); 83 static int fn_match(char *, char *, char **); 84 static char * range_match(char *, int); 85 static int checkdotdot(const char *); 86 #ifdef NET2_REGEX 87 static int resub(regexp *, char *, char *, char *); 88 #else 89 static int resub(regex_t *, regmatch_t *, char *, char *, char *, char *); 90 #endif 91 92 /* 93 * rep_add() 94 * parses the -s replacement string; compiles the regular expression 95 * and stores the compiled value and it's replacement string together in 96 * replacement string list. Input to this function is of the form: 97 * /old/new/pg 98 * The first char in the string specifies the delimiter used by this 99 * replacement string. "Old" is a regular expression in "ed" format which 100 * is compiled by regcomp() and is applied to filenames. "new" is the 101 * substitution string; p and g are options flags for printing and global 102 * replacement (over the single filename) 103 * Return: 104 * 0 if a proper replacement string and regular expression was added to 105 * the list of replacement patterns; -1 otherwise. 106 */ 107 108 int 109 rep_add(char *str) 110 { 111 char *pt1; 112 char *pt2; 113 REPLACE *rep; 114 #ifndef NET2_REGEX 115 int res; 116 char rebuf[BUFSIZ]; 117 #endif 118 119 /* 120 * throw out the bad parameters 121 */ 122 if ((str == NULL) || (*str == '\0')) { 123 tty_warn(1, "Empty replacement string"); 124 return(-1); 125 } 126 127 /* 128 * first character in the string specifies what the delimiter is for 129 * this expression. 130 */ 131 for (pt1 = str+1; *pt1; pt1++) { 132 if (*pt1 == '\\') { 133 pt1++; 134 continue; 135 } 136 if (*pt1 == *str) 137 break; 138 } 139 if (pt1 == NULL) { 140 tty_warn(1, "Invalid replacement string %s", str); 141 return(-1); 142 } 143 144 /* 145 * allocate space for the node that handles this replacement pattern 146 * and split out the regular expression and try to compile it 147 */ 148 if ((rep = (REPLACE *)malloc(sizeof(REPLACE))) == NULL) { 149 tty_warn(1, "Unable to allocate memory for replacement string"); 150 return(-1); 151 } 152 153 *pt1 = '\0'; 154 #ifdef NET2_REGEX 155 if ((rep->rcmp = regcomp(str+1)) == NULL) { 156 #else 157 if ((res = regcomp(&(rep->rcmp), str+1, 0)) != 0) { 158 regerror(res, &(rep->rcmp), rebuf, sizeof(rebuf)); 159 tty_warn(1, "%s while compiling regular expression %s", rebuf, 160 str); 161 #endif 162 (void)free((char *)rep); 163 return(-1); 164 } 165 166 /* 167 * put the delimiter back in case we need an error message and 168 * locate the delimiter at the end of the replacement string 169 * we then point the node at the new substitution string 170 */ 171 *pt1++ = *str; 172 for (pt2 = pt1; *pt2; pt2++) { 173 if (*pt2 == '\\') { 174 pt2++; 175 continue; 176 } 177 if (*pt2 == *str) 178 break; 179 } 180 if (pt2 == NULL) { 181 #ifdef NET2_REGEX 182 (void)free((char *)rep->rcmp); 183 #else 184 regfree(&(rep->rcmp)); 185 #endif 186 (void)free((char *)rep); 187 tty_warn(1, "Invalid replacement string %s", str); 188 return(-1); 189 } 190 191 *pt2 = '\0'; 192 193 /* Make sure to dup replacement, who knows where it came from! */ 194 if ((rep->nstr = strdup(pt1)) == NULL) { 195 #ifdef NET2_REGEX 196 (void)free((char *)rep->rcmp); 197 #else 198 regfree(&(rep->rcmp)); 199 #endif 200 (void)free((char *)rep); 201 tty_warn(1, "Unable to allocate memory for replacement string"); 202 return(-1); 203 } 204 205 pt1 = pt2++; 206 rep->flgs = 0; 207 208 /* 209 * set the options if any 210 */ 211 while (*pt2 != '\0') { 212 switch(*pt2) { 213 case 'g': 214 case 'G': 215 rep->flgs |= GLOB; 216 break; 217 case 'p': 218 case 'P': 219 rep->flgs |= PRNT; 220 break; 221 default: 222 #ifdef NET2_REGEX 223 (void)free((char *)rep->rcmp); 224 #else 225 regfree(&(rep->rcmp)); 226 #endif 227 (void)free((char *)rep); 228 *pt1 = *str; 229 tty_warn(1, "Invalid replacement string option %s", 230 str); 231 return(-1); 232 } 233 ++pt2; 234 } 235 236 /* 237 * all done, link it in at the end 238 */ 239 rep->fow = NULL; 240 if (rephead == NULL) { 241 reptail = rephead = rep; 242 return(0); 243 } 244 reptail->fow = rep; 245 reptail = rep; 246 return(0); 247 } 248 249 /* 250 * pat_add() 251 * add a pattern match to the pattern match list. Pattern matches are used 252 * to select which archive members are extracted. (They appear as 253 * arguments to pax in the list and read modes). If no patterns are 254 * supplied to pax, all members in the archive will be selected (and the 255 * pattern match list is empty). 256 * 257 * Return: 258 * 0 if the pattern was added to the list, -1 otherwise 259 */ 260 261 int 262 pat_add(char *str, char *chdn) 263 { 264 PATTERN *pt; 265 266 /* 267 * throw out the junk 268 */ 269 if ((str == NULL) || (*str == '\0')) { 270 tty_warn(1, "Empty pattern string"); 271 return(-1); 272 } 273 274 /* 275 * allocate space for the pattern and store the pattern. the pattern is 276 * part of argv so do not bother to copy it, just point at it. Add the 277 * node to the end of the pattern list 278 */ 279 if ((pt = (PATTERN *)malloc(sizeof(PATTERN))) == NULL) { 280 tty_warn(1, "Unable to allocate memory for pattern string"); 281 return(-1); 282 } 283 284 pt->pstr = str; 285 pt->pend = NULL; 286 pt->plen = strlen(str); 287 pt->fow = NULL; 288 pt->flgs = 0; 289 pt->chdname = chdn; 290 if (pathead == NULL) { 291 pattail = pathead = pt; 292 return(0); 293 } 294 pattail->fow = pt; 295 pattail = pt; 296 return(0); 297 } 298 299 /* 300 * pat_chk() 301 * complain if any the user supplied pattern did not result in a match to 302 * a selected archive member. 303 */ 304 305 void 306 pat_chk(void) 307 { 308 PATTERN *pt; 309 int wban = 0; 310 311 /* 312 * walk down the list checking the flags to make sure MTCH was set, 313 * if not complain 314 */ 315 for (pt = pathead; pt != NULL; pt = pt->fow) { 316 if (pt->flgs & MTCH) 317 continue; 318 if (!wban) { 319 tty_warn(1, "WARNING! These patterns were not matched:"); 320 ++wban; 321 } 322 (void)fprintf(stderr, "%s\n", pt->pstr); 323 } 324 } 325 326 /* 327 * pat_sel() 328 * the archive member which matches a pattern was selected. Mark the 329 * pattern as having selected an archive member. arcn->pat points at the 330 * pattern that was matched. arcn->pat is set in pat_match() 331 * 332 * NOTE: When the -c option is used, we are called when there was no match 333 * by pat_match() (that means we did match before the inverted sense of 334 * the logic). Now this seems really strange at first, but with -c we 335 * need to keep track of those patterns that cause a archive member to NOT 336 * be selected (it found an archive member with a specified pattern) 337 * Return: 338 * 0 if the pattern pointed at by arcn->pat was tagged as creating a 339 * match, -1 otherwise. 340 */ 341 342 int 343 pat_sel(ARCHD *arcn) 344 { 345 PATTERN *pt; 346 PATTERN **ppt; 347 int len; 348 349 /* 350 * if no patterns just return 351 */ 352 if ((pathead == NULL) || ((pt = arcn->pat) == NULL)) 353 return(0); 354 355 /* 356 * when we are NOT limited to a single match per pattern mark the 357 * pattern and return 358 */ 359 if (!nflag) { 360 pt->flgs |= MTCH; 361 return(0); 362 } 363 364 /* 365 * we reach this point only when we allow a single selected match per 366 * pattern, if the pattern matches a directory and we do not have -d 367 * (dflag) we are done with this pattern. We may also be handed a file 368 * in the subtree of a directory. in that case when we are operating 369 * with -d, this pattern was already selected and we are done 370 */ 371 if (pt->flgs & DIR_MTCH) 372 return(0); 373 374 if (!dflag && ((pt->pend != NULL) || (arcn->type == PAX_DIR))) { 375 /* 376 * ok we matched a directory and we are allowing 377 * subtree matches but because of the -n only its children will 378 * match. This is tagged as a DIR_MTCH type. 379 * WATCH IT, the code assumes that pt->pend points 380 * into arcn->name and arcn->name has not been modified. 381 * If not we will have a big mess. Yup this is another kludge 382 */ 383 384 /* 385 * if this was a prefix match, remove trailing part of path 386 * so we can copy it. Future matches will be exact prefix match 387 */ 388 if (pt->pend != NULL) 389 *pt->pend = '\0'; 390 391 if ((pt->pstr = strdup(arcn->name)) == NULL) { 392 tty_warn(1, "Pattern select out of memory"); 393 if (pt->pend != NULL) 394 *pt->pend = '/'; 395 pt->pend = NULL; 396 return(-1); 397 } 398 399 /* 400 * put the trailing / back in the source string 401 */ 402 if (pt->pend != NULL) { 403 *pt->pend = '/'; 404 pt->pend = NULL; 405 } 406 pt->plen = strlen(pt->pstr); 407 408 /* 409 * strip off any trailing /, this should really never happen 410 */ 411 len = pt->plen - 1; 412 if (*(pt->pstr + len) == '/') { 413 *(pt->pstr + len) = '\0'; 414 pt->plen = len; 415 } 416 pt->flgs = DIR_MTCH | MTCH; 417 arcn->pat = pt; 418 return(0); 419 } 420 421 /* 422 * we are then done with this pattern, so we delete it from the list 423 * because it can never be used for another match. 424 * Seems kind of strange to do for a -c, but the pax spec is really 425 * vague on the interaction of -c, -n, and -d. We assume that when -c 426 * and the pattern rejects a member (i.e. it matched it) it is done. 427 * In effect we place the order of the flags as having -c last. 428 */ 429 pt = pathead; 430 ppt = &pathead; 431 while ((pt != NULL) && (pt != arcn->pat)) { 432 ppt = &(pt->fow); 433 pt = pt->fow; 434 } 435 436 if (pt == NULL) { 437 /* 438 * should never happen.... 439 */ 440 tty_warn(1, "Pattern list inconsistant"); 441 return(-1); 442 } 443 *ppt = pt->fow; 444 (void)free((char *)pt); 445 arcn->pat = NULL; 446 return(0); 447 } 448 449 /* 450 * pat_match() 451 * see if this archive member matches any supplied pattern, if a match 452 * is found, arcn->pat is set to point at the potential pattern. Later if 453 * this archive member is "selected" we process and mark the pattern as 454 * one which matched a selected archive member (see pat_sel()) 455 * Return: 456 * 0 if this archive member should be processed, 1 if it should be 457 * skipped and -1 if we are done with all patterns (and pax should quit 458 * looking for more members) 459 */ 460 461 int 462 pat_match(ARCHD *arcn) 463 { 464 PATTERN *pt; 465 466 arcn->pat = NULL; 467 468 /* 469 * if there are no more patterns and we have -n (and not -c) we are 470 * done. otherwise with no patterns to match, matches all 471 */ 472 if (pathead == NULL) { 473 if (nflag && !cflag) 474 return(-1); 475 return(0); 476 } 477 478 /* 479 * have to search down the list one at a time looking for a match. 480 */ 481 pt = pathead; 482 while (pt != NULL) { 483 /* 484 * check for a file name match unless we have DIR_MTCH set in 485 * this pattern then we want a prefix match 486 */ 487 if (pt->flgs & DIR_MTCH) { 488 /* 489 * this pattern was matched before to a directory 490 * as we must have -n set for this (but not -d). We can 491 * only match CHILDREN of that directory so we must use 492 * an exact prefix match (no wildcards). 493 */ 494 if ((arcn->name[pt->plen] == '/') && 495 (strncmp(pt->pstr, arcn->name, pt->plen) == 0)) 496 break; 497 } else if (fn_match(pt->pstr, arcn->name, &pt->pend) == 0) 498 break; 499 pt = pt->fow; 500 } 501 502 /* 503 * return the result, remember that cflag (-c) inverts the sense of a 504 * match 505 */ 506 if (pt == NULL) 507 return(cflag ? 0 : 1); 508 509 /* 510 * we had a match, now when we invert the sense (-c) we reject this 511 * member. However we have to tag the pattern a being successful, (in a 512 * match, not in selecting a archive member) so we call pat_sel() here. 513 */ 514 arcn->pat = pt; 515 if (!cflag) 516 return(0); 517 518 if (pat_sel(arcn) < 0) 519 return(-1); 520 arcn->pat = NULL; 521 return(1); 522 } 523 524 /* 525 * fn_match() 526 * Return: 527 * 0 if this archive member should be processed, 1 if it should be 528 * skipped and -1 if we are done with all patterns (and pax should quit 529 * looking for more members) 530 * Note: *pend may be changed to show where the prefix ends. 531 */ 532 533 static int 534 fn_match(char *pattern, char *string, char **pend) 535 { 536 char c; 537 char test; 538 539 *pend = NULL; 540 for (;;) { 541 switch (c = *pattern++) { 542 case '\0': 543 /* 544 * Ok we found an exact match 545 */ 546 if (*string == '\0') 547 return(0); 548 549 /* 550 * Check if it is a prefix match 551 */ 552 if ((dflag == 1) || (*string != '/')) 553 return(-1); 554 555 /* 556 * It is a prefix match, remember where the trailing 557 * / is located 558 */ 559 *pend = string; 560 return(0); 561 case '?': 562 if ((test = *string++) == '\0') 563 return (-1); 564 break; 565 case '*': 566 c = *pattern; 567 /* 568 * Collapse multiple *'s. 569 */ 570 while (c == '*') 571 c = *++pattern; 572 573 /* 574 * Optimized hack for pattern with a * at the end 575 */ 576 if (c == '\0') 577 return (0); 578 579 /* 580 * General case, use recursion. 581 */ 582 while ((test = *string) != '\0') { 583 if (!fn_match(pattern, string, pend)) 584 return (0); 585 ++string; 586 } 587 return (-1); 588 case '[': 589 /* 590 * range match 591 */ 592 if (((test = *string++) == '\0') || 593 ((pattern = range_match(pattern, test)) == NULL)) 594 return (-1); 595 break; 596 case '\\': 597 default: 598 if (c != *string++) 599 return (-1); 600 break; 601 } 602 } 603 /* NOTREACHED */ 604 } 605 606 static char * 607 range_match(char *pattern, int test) 608 { 609 char c; 610 char c2; 611 int negate; 612 int ok = 0; 613 614 if ((negate = (*pattern == '!')) != 0) 615 ++pattern; 616 617 while ((c = *pattern++) != ']') { 618 /* 619 * Illegal pattern 620 */ 621 if (c == '\0') 622 return (NULL); 623 624 if ((*pattern == '-') && ((c2 = pattern[1]) != '\0') && 625 (c2 != ']')) { 626 if ((c <= test) && (test <= c2)) 627 ok = 1; 628 pattern += 2; 629 } else if (c == test) 630 ok = 1; 631 } 632 return (ok == negate ? NULL : pattern); 633 } 634 635 /* 636 * mod_name() 637 * modify a selected file name. first attempt to apply replacement string 638 * expressions, then apply interactive file rename. We apply replacement 639 * string expressions to both filenames and file links (if we didn't the 640 * links would point to the wrong place, and we could never be able to 641 * move an archive that has a file link in it). When we rename files 642 * interactively, we store that mapping (old name to user input name) so 643 * if we spot any file links to the old file name in the future, we will 644 * know exactly how to fix the file link. 645 * Return: 646 * 0 continue to process file, 1 skip this file, -1 pax is finished 647 */ 648 649 int 650 mod_name(ARCHD *arcn) 651 { 652 int res = 0; 653 654 /* 655 * Strip off leading '/' if appropriate. 656 * Currently, this option is only set for the tar format. 657 */ 658 if (rmleadslash && arcn->name[0] == '/') { 659 if (arcn->name[1] == '\0') { 660 arcn->name[0] = '.'; 661 } else { 662 (void)memmove(arcn->name, &arcn->name[1], 663 strlen(arcn->name)); 664 arcn->nlen--; 665 } 666 if (rmleadslash < 2) { 667 rmleadslash = 2; 668 tty_warn(0, "Removing leading / from absolute path names in the archive"); 669 } 670 } 671 if (rmleadslash && arcn->ln_name[0] == '/' && 672 (arcn->type == PAX_HLK || arcn->type == PAX_HRG)) { 673 if (arcn->ln_name[1] == '\0') { 674 arcn->ln_name[0] = '.'; 675 } else { 676 (void)memmove(arcn->ln_name, &arcn->ln_name[1], 677 strlen(arcn->ln_name)); 678 arcn->ln_nlen--; 679 } 680 if (rmleadslash < 2) { 681 rmleadslash = 2; 682 tty_warn(0, "Removing leading / from absolute path names in the archive"); 683 } 684 } 685 686 if (secure) { 687 if (checkdotdot(arcn->name)) { 688 tty_warn(0, "Ignoring file containing `..' (%s)", 689 arcn->name); 690 return 1; 691 } 692 #ifdef notdef 693 if (checkdotdot(arcn->ln_name)) { 694 tty_warn(0, "Ignoring link containing `..' (%s)", 695 arcn->ln_name); 696 return 1; 697 } 698 #endif 699 } 700 701 /* 702 * IMPORTANT: We have a problem. what do we do with symlinks? 703 * Modifying a hard link name makes sense, as we know the file it 704 * points at should have been seen already in the archive (and if it 705 * wasn't seen because of a read error or a bad archive, we lose 706 * anyway). But there are no such requirements for symlinks. On one 707 * hand the symlink that refers to a file in the archive will have to 708 * be modified to so it will still work at its new location in the 709 * file system. On the other hand a symlink that points elsewhere (and 710 * should continue to do so) should not be modified. There is clearly 711 * no perfect solution here. So we handle them like hardlinks. Clearly 712 * a replacement made by the interactive rename mapping is very likely 713 * to be correct since it applies to a single file and is an exact 714 * match. The regular expression replacements are a little harder to 715 * justify though. We claim that the symlink name is only likely 716 * to be replaced when it points within the file tree being moved and 717 * in that case it should be modified. what we really need to do is to 718 * call an oracle here. :) 719 */ 720 if (rephead != NULL) { 721 /* 722 * we have replacement strings, modify the name and the link 723 * name if any. 724 */ 725 if ((res = rep_name(arcn->name, sizeof(arcn->name), 726 &(arcn->nlen), 1)) != 0) 727 return(res); 728 729 if (((arcn->type == PAX_SLK) || (arcn->type == PAX_HLK) || 730 (arcn->type == PAX_HRG)) && 731 ((res = rep_name(arcn->ln_name, sizeof(arcn->ln_name), 732 &(arcn->ln_nlen), 0)) != 0)) 733 return(res); 734 } 735 736 if (iflag) { 737 /* 738 * perform interactive file rename, then map the link if any 739 */ 740 if ((res = tty_rename(arcn)) != 0) 741 return(res); 742 if ((arcn->type == PAX_SLK) || (arcn->type == PAX_HLK) || 743 (arcn->type == PAX_HRG)) 744 sub_name(arcn->ln_name, &(arcn->ln_nlen), sizeof(arcn->ln_name)); 745 } 746 return(res); 747 } 748 749 /* 750 * tty_rename() 751 * Prompt the user for a replacement file name. A "." keeps the old name, 752 * a empty line skips the file, and an EOF on reading the tty, will cause 753 * pax to stop processing and exit. Otherwise the file name input, replaces 754 * the old one. 755 * Return: 756 * 0 process this file, 1 skip this file, -1 we need to exit pax 757 */ 758 759 static int 760 tty_rename(ARCHD *arcn) 761 { 762 char tmpname[PAXPATHLEN+2]; 763 int res; 764 765 /* 766 * prompt user for the replacement name for a file, keep trying until 767 * we get some reasonable input. Archives may have more than one file 768 * on them with the same name (from updates etc). We print verbose info 769 * on the file so the user knows what is up. 770 */ 771 tty_prnt("\nATTENTION: %s interactive file rename operation.\n", argv0); 772 773 for (;;) { 774 ls_tty(arcn); 775 tty_prnt("Input new name, or a \".\" to keep the old name, "); 776 tty_prnt("or a \"return\" to skip this file.\n"); 777 tty_prnt("Input > "); 778 if (tty_read(tmpname, sizeof(tmpname)) < 0) 779 return(-1); 780 if (strcmp(tmpname, "..") == 0) { 781 tty_prnt("Try again, illegal file name: ..\n"); 782 continue; 783 } 784 if (strlen(tmpname) > PAXPATHLEN) { 785 tty_prnt("Try again, file name too long\n"); 786 continue; 787 } 788 break; 789 } 790 791 /* 792 * empty file name, skips this file. a "." leaves it alone 793 */ 794 if (tmpname[0] == '\0') { 795 tty_prnt("Skipping file.\n"); 796 return(1); 797 } 798 if ((tmpname[0] == '.') && (tmpname[1] == '\0')) { 799 tty_prnt("Processing continues, name unchanged.\n"); 800 return(0); 801 } 802 803 /* 804 * ok the name changed. We may run into links that point at this 805 * file later. we have to remember where the user sent the file 806 * in order to repair any links. 807 */ 808 tty_prnt("Processing continues, name changed to: %s\n", tmpname); 809 res = add_name(arcn->name, arcn->nlen, tmpname); 810 arcn->nlen = strlcpy(arcn->name, tmpname, sizeof(arcn->name)); 811 if (res < 0) 812 return(-1); 813 return(0); 814 } 815 816 /* 817 * set_dest() 818 * fix up the file name and the link name (if any) so this file will land 819 * in the destination directory (used during copy() -rw). 820 * Return: 821 * 0 if ok, -1 if failure (name too long) 822 */ 823 824 int 825 set_dest(ARCHD *arcn, char *dest_dir, int dir_len) 826 { 827 if (fix_path(arcn->name, &(arcn->nlen), dest_dir, dir_len) < 0) 828 return(-1); 829 830 /* 831 * It is really hard to deal with symlinks here, we cannot be sure 832 * if the name they point was moved (or will be moved). It is best to 833 * leave them alone. 834 */ 835 if ((arcn->type != PAX_HLK) && (arcn->type != PAX_HRG)) 836 return(0); 837 838 if (fix_path(arcn->ln_name, &(arcn->ln_nlen), dest_dir, dir_len) < 0) 839 return(-1); 840 return(0); 841 } 842 843 /* 844 * fix_path 845 * concatenate dir_name and or_name and store the result in or_name (if 846 * it fits). This is one ugly function. 847 * Return: 848 * 0 if ok, -1 if the final name is too long 849 */ 850 851 static int 852 fix_path( char *or_name, int *or_len, char *dir_name, int dir_len) 853 { 854 char *src; 855 char *dest; 856 char *start; 857 int len; 858 859 /* 860 * we shift the or_name to the right enough to tack in the dir_name 861 * at the front. We make sure we have enough space for it all before 862 * we start. since dest always ends in a slash, we skip of or_name 863 * if it also starts with one. 864 */ 865 start = or_name; 866 src = start + *or_len; 867 dest = src + dir_len; 868 if (*start == '/') { 869 ++start; 870 --dest; 871 } 872 if ((len = dest - or_name) > PAXPATHLEN) { 873 tty_warn(1, "File name %s/%s, too long", dir_name, start); 874 return(-1); 875 } 876 *or_len = len; 877 878 /* 879 * enough space, shift 880 */ 881 while (src >= start) 882 *dest-- = *src--; 883 src = dir_name + dir_len - 1; 884 885 /* 886 * splice in the destination directory name 887 */ 888 while (src >= dir_name) 889 *dest-- = *src--; 890 891 *(or_name + len) = '\0'; 892 return(0); 893 } 894 895 /* 896 * rep_name() 897 * walk down the list of replacement strings applying each one in order. 898 * when we find one with a successful substitution, we modify the name 899 * as specified. if required, we print the results. if the resulting name 900 * is empty, we will skip this archive member. We use the regexp(3) 901 * routines (regexp() ought to win a prize as having the most cryptic 902 * library function manual page). 903 * --Parameters-- 904 * name is the file name we are going to apply the regular expressions to 905 * (and may be modified) 906 * namelen the size of the name buffer. 907 * nlen is the length of this name (and is modified to hold the length of 908 * the final string). 909 * prnt is a flag that says whether to print the final result. 910 * Return: 911 * 0 if substitution was successful, 1 if we are to skip the file (the name 912 * ended up empty) 913 */ 914 915 static int 916 rep_name(char *name, size_t namelen, int *nlen, int prnt) 917 { 918 REPLACE *pt; 919 char *inpt; 920 char *outpt; 921 char *endpt; 922 char *rpt; 923 int found = 0; 924 int res; 925 #ifndef NET2_REGEX 926 regmatch_t pm[MAXSUBEXP]; 927 #endif 928 char nname[PAXPATHLEN+1]; /* final result of all replacements */ 929 char buf1[PAXPATHLEN+1]; /* where we work on the name */ 930 931 /* 932 * copy the name into buf1, where we will work on it. We need to keep 933 * the orig string around so we can print out the result of the final 934 * replacement. We build up the final result in nname. inpt points at 935 * the string we apply the regular expression to. prnt is used to 936 * suppress printing when we handle replacements on the link field 937 * (the user already saw that substitution go by) 938 */ 939 pt = rephead; 940 (void)strcpy(buf1, name); 941 inpt = buf1; 942 outpt = nname; 943 endpt = outpt + PAXPATHLEN; 944 945 /* 946 * try each replacement string in order 947 */ 948 while (pt != NULL) { 949 do { 950 /* 951 * check for a successful substitution, if not go to 952 * the next pattern, or cleanup if we were global 953 */ 954 #ifdef NET2_REGEX 955 if (regexec(pt->rcmp, inpt) == 0) 956 #else 957 if (regexec(&(pt->rcmp), inpt, MAXSUBEXP, pm, 0) != 0) 958 #endif 959 break; 960 961 /* 962 * ok we found one. We have three parts, the prefix 963 * which did not match, the section that did and the 964 * tail (that also did not match). Copy the prefix to 965 * the final output buffer (watching to make sure we 966 * do not create a string too long). 967 */ 968 found = 1; 969 #ifdef NET2_REGEX 970 rpt = pt->rcmp->startp[0]; 971 #else 972 rpt = inpt + pm[0].rm_so; 973 #endif 974 975 while ((inpt < rpt) && (outpt < endpt)) 976 *outpt++ = *inpt++; 977 if (outpt == endpt) 978 break; 979 980 /* 981 * for the second part (which matched the regular 982 * expression) apply the substitution using the 983 * replacement string and place it the prefix in the 984 * final output. If we have problems, skip it. 985 */ 986 if ((res = 987 #ifdef NET2_REGEX 988 resub(pt->rcmp,pt->nstr,outpt,endpt) 989 #else 990 resub(&(pt->rcmp),pm,pt->nstr,inpt, outpt,endpt) 991 #endif 992 ) < 0) { 993 if (prnt) 994 tty_warn(1, "Replacement name error %s", 995 name); 996 return(1); 997 } 998 outpt += res; 999 1000 /* 1001 * we set up to look again starting at the first 1002 * character in the tail (of the input string right 1003 * after the last character matched by the regular 1004 * expression (inpt always points at the first char in 1005 * the string to process). If we are not doing a global 1006 * substitution, we will use inpt to copy the tail to 1007 * the final result. Make sure we do not overrun the 1008 * output buffer 1009 */ 1010 #ifdef NET2_REGEX 1011 inpt = pt->rcmp->endp[0]; 1012 #else 1013 inpt += pm[0].rm_eo - pm[0].rm_so; 1014 #endif 1015 1016 if ((outpt == endpt) || (*inpt == '\0')) 1017 break; 1018 1019 /* 1020 * if the user wants global we keep trying to 1021 * substitute until it fails, then we are done. 1022 */ 1023 } while (pt->flgs & GLOB); 1024 1025 if (found) 1026 break; 1027 1028 /* 1029 * a successful substitution did NOT occur, try the next one 1030 */ 1031 pt = pt->fow; 1032 } 1033 1034 if (found) { 1035 /* 1036 * we had a substitution, copy the last tail piece (if there is 1037 * room) to the final result 1038 */ 1039 while ((outpt < endpt) && (*inpt != '\0')) 1040 *outpt++ = *inpt++; 1041 1042 *outpt = '\0'; 1043 if ((outpt == endpt) && (*inpt != '\0')) { 1044 if (prnt) 1045 tty_warn(1,"Replacement name too long %s >> %s", 1046 name, nname); 1047 return(1); 1048 } 1049 1050 /* 1051 * inform the user of the result if wanted 1052 */ 1053 if (prnt && (pt->flgs & PRNT)) { 1054 if (*nname == '\0') 1055 (void)fprintf(stderr,"%s >> <empty string>\n", 1056 name); 1057 else 1058 (void)fprintf(stderr,"%s >> %s\n", name, nname); 1059 } 1060 1061 /* 1062 * if empty inform the caller this file is to be skipped 1063 * otherwise copy the new name over the orig name and return 1064 */ 1065 if (*nname == '\0') 1066 return(1); 1067 *nlen = strlcpy(name, nname, namelen); 1068 } 1069 return(0); 1070 } 1071 1072 1073 /* 1074 * checkdotdot() 1075 * Return true if a component of the name contains a reference to ".." 1076 */ 1077 static int 1078 checkdotdot(const char *name) 1079 { 1080 const char *p; 1081 /* 1. "..{[/],}" */ 1082 if (name[0] == '.' && name[1] == '.' && 1083 (name[2] == '/' || name[2] == '\0')) 1084 return 1; 1085 1086 /* 2. "*[/]..[/]*" */ 1087 if (strstr(name, "/../") != NULL) 1088 return 1; 1089 1090 /* 3. "*[/].." */ 1091 for (p = name; *p; p++) 1092 continue; 1093 if (p - name < 3) 1094 return 0; 1095 if (p[-1] == '.' && p[-2] == '.' && p[-3] == '/') 1096 return 1; 1097 1098 return 0; 1099 } 1100 1101 #ifdef NET2_REGEX 1102 /* 1103 * resub() 1104 * apply the replacement to the matched expression. expand out the old 1105 * style ed(1) subexpression expansion. 1106 * Return: 1107 * -1 if error, or the number of characters added to the destination. 1108 */ 1109 1110 static int 1111 resub(regexp *prog, char *src, char *dest, char *destend) 1112 { 1113 char *spt; 1114 char *dpt; 1115 char c; 1116 int no; 1117 int len; 1118 1119 spt = src; 1120 dpt = dest; 1121 while ((dpt < destend) && ((c = *spt++) != '\0')) { 1122 if (c == '&') 1123 no = 0; 1124 else if ((c == '\\') && (*spt >= '0') && (*spt <= '9')) 1125 no = *spt++ - '0'; 1126 else { 1127 if ((c == '\\') && ((*spt == '\\') || (*spt == '&'))) 1128 c = *spt++; 1129 *dpt++ = c; 1130 continue; 1131 } 1132 if ((prog->startp[no] == NULL) || (prog->endp[no] == NULL) || 1133 ((len = prog->endp[no] - prog->startp[no]) <= 0)) 1134 continue; 1135 1136 /* 1137 * copy the subexpression to the destination. 1138 * fail if we run out of space or the match string is damaged 1139 */ 1140 if (len > (destend - dpt)) 1141 return (-1); 1142 strncpy(dpt, prog->startp[no], len); 1143 dpt += len; 1144 } 1145 return(dpt - dest); 1146 } 1147 1148 #else 1149 1150 /* 1151 * resub() 1152 * apply the replacement to the matched expression. expand out the old 1153 * style ed(1) subexpression expansion. 1154 * Return: 1155 * -1 if error, or the number of characters added to the destination. 1156 */ 1157 1158 static int 1159 resub(regex_t *rp, regmatch_t *pm, char *src, char *txt, char *dest, 1160 char *destend) 1161 { 1162 char *spt; 1163 char *dpt; 1164 char c; 1165 regmatch_t *pmpt; 1166 int len; 1167 int subexcnt; 1168 1169 spt = src; 1170 dpt = dest; 1171 subexcnt = rp->re_nsub; 1172 while ((dpt < destend) && ((c = *spt++) != '\0')) { 1173 /* 1174 * see if we just have an ordinary replacement character 1175 * or we refer to a subexpression. 1176 */ 1177 if (c == '&') { 1178 pmpt = pm; 1179 } else if ((c == '\\') && (*spt >= '1') && (*spt <= '9')) { 1180 /* 1181 * make sure there is a subexpression as specified 1182 */ 1183 if ((len = *spt++ - '0') > subexcnt) 1184 return(-1); 1185 pmpt = pm + len; 1186 } else { 1187 /* 1188 * Ordinary character, just copy it 1189 */ 1190 if ((c == '\\') && ((*spt == '\\') || (*spt == '&'))) 1191 c = *spt++; 1192 *dpt++ = c; 1193 continue; 1194 } 1195 1196 /* 1197 * continue if the subexpression is bogus 1198 */ 1199 if ((pmpt->rm_so < 0) || (pmpt->rm_eo < 0) || 1200 ((len = pmpt->rm_eo - pmpt->rm_so) <= 0)) 1201 continue; 1202 1203 /* 1204 * copy the subexpression to the destination. 1205 * fail if we run out of space or the match string is damaged 1206 */ 1207 if (len > (destend - dpt)) 1208 return -1; 1209 strncpy(dpt, txt + pmpt->rm_so, len); 1210 dpt += len; 1211 } 1212 return(dpt - dest); 1213 } 1214 #endif 1215