1*2b9c0f9fSriastradh /* $NetBSD: pat_rep.c,v 1.32 2024/08/05 13:37:27 riastradh Exp $ */ 2b5b29542Sagc 3b5b29542Sagc /*- 4ed6ed8e6Sagc * Copyright (c) 1992 Keith Muller. 5b5b29542Sagc * Copyright (c) 1992, 1993 6b5b29542Sagc * The Regents of the University of California. All rights reserved. 7b5b29542Sagc * 8b5b29542Sagc * This code is derived from software contributed to Berkeley by 9b5b29542Sagc * Keith Muller of the University of California, San Diego. 10b5b29542Sagc * 11b5b29542Sagc * Redistribution and use in source and binary forms, with or without 12b5b29542Sagc * modification, are permitted provided that the following conditions 13b5b29542Sagc * are met: 14b5b29542Sagc * 1. Redistributions of source code must retain the above copyright 15b5b29542Sagc * notice, this list of conditions and the following disclaimer. 16b5b29542Sagc * 2. Redistributions in binary form must reproduce the above copyright 17b5b29542Sagc * notice, this list of conditions and the following disclaimer in the 18b5b29542Sagc * documentation and/or other materials provided with the distribution. 19b5b29542Sagc * 3. Neither the name of the University nor the names of its contributors 20b5b29542Sagc * may be used to endorse or promote products derived from this software 21b5b29542Sagc * without specific prior written permission. 22b5b29542Sagc * 23b5b29542Sagc * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 24b5b29542Sagc * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 25b5b29542Sagc * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 26b5b29542Sagc * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 27b5b29542Sagc * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 28b5b29542Sagc * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 29b5b29542Sagc * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 30b5b29542Sagc * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 31b5b29542Sagc * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 32b5b29542Sagc * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 33b5b29542Sagc * SUCH DAMAGE. 34b5b29542Sagc */ 3549f0ad86Scgd 36171d6532Slukem #if HAVE_NBTOOL_CONFIG_H 37171d6532Slukem #include "nbtool_config.h" 38171d6532Slukem #endif 39171d6532Slukem 40f3cd6022Schristos #include <sys/cdefs.h> 41171d6532Slukem #if !defined(lint) 4249f0ad86Scgd #if 0 4349f0ad86Scgd static char sccsid[] = "@(#)pat_rep.c 8.2 (Berkeley) 4/18/94"; 4449f0ad86Scgd #else 45*2b9c0f9fSriastradh __RCSID("$NetBSD: pat_rep.c,v 1.32 2024/08/05 13:37:27 riastradh Exp $"); 4649f0ad86Scgd #endif 478b35abe2Sjtc #endif /* not lint */ 488b35abe2Sjtc 498b35abe2Sjtc #include <sys/types.h> 508b35abe2Sjtc #include <sys/time.h> 518b35abe2Sjtc #include <sys/stat.h> 528b35abe2Sjtc #include <sys/param.h> 538b35abe2Sjtc #include <stdio.h> 548b35abe2Sjtc #include <ctype.h> 558b35abe2Sjtc #include <string.h> 568b35abe2Sjtc #include <unistd.h> 578b35abe2Sjtc #include <stdlib.h> 588b35abe2Sjtc #include "pax.h" 598b35abe2Sjtc #include "pat_rep.h" 608b35abe2Sjtc #include "extern.h" 618b35abe2Sjtc 628b35abe2Sjtc /* 638b35abe2Sjtc * routines to handle pattern matching, name modification (regular expression 648b35abe2Sjtc * substitution and interactive renames), and destination name modification for 658b35abe2Sjtc * copy (-rw). Both file name and link names are adjusted as required in these 668b35abe2Sjtc * routines. 678b35abe2Sjtc */ 688b35abe2Sjtc 698b35abe2Sjtc #define MAXSUBEXP 10 /* max subexpressions, DO NOT CHANGE */ 708b35abe2Sjtc static PATTERN *pathead = NULL; /* file pattern match list head */ 718b35abe2Sjtc static PATTERN *pattail = NULL; /* file pattern match list tail */ 728b35abe2Sjtc static REPLACE *rephead = NULL; /* replacement string list head */ 738b35abe2Sjtc static REPLACE *reptail = NULL; /* replacement string list tail */ 748b35abe2Sjtc 750c612021Schristos static int rep_name(char *, size_t, int *, int); 76c1bd745cSlukem static int tty_rename(ARCHD *); 77c1bd745cSlukem static int fix_path(char *, int *, char *, int); 78a992ea79Sperry static int fn_match(char *, char *, char **, int); 79c1bd745cSlukem static char * range_match(char *, int); 80ca541391Schristos static int checkdotdot(const char *); 81c1bd745cSlukem static int resub(regex_t *, regmatch_t *, char *, char *, char *, char *); 828b35abe2Sjtc 838b35abe2Sjtc /* 848b35abe2Sjtc * rep_add() 858b35abe2Sjtc * parses the -s replacement string; compiles the regular expression 86a640fe8cSsnj * and stores the compiled value and its replacement string together in 878b35abe2Sjtc * replacement string list. Input to this function is of the form: 8801205dd5Sgutteridge * /old/new/gps 898b35abe2Sjtc * The first char in the string specifies the delimiter used by this 908b35abe2Sjtc * replacement string. "Old" is a regular expression in "ed" format which 918b35abe2Sjtc * is compiled by regcomp() and is applied to filenames. "new" is the 9201205dd5Sgutteridge * substitution string; g, p, and s are options flags for global 9301205dd5Sgutteridge * replacement (over the single filename), printing, and preventing 9401205dd5Sgutteridge * substitutions on symbolic link destinations. 958b35abe2Sjtc * Return: 968b35abe2Sjtc * 0 if a proper replacement string and regular expression was added to 978b35abe2Sjtc * the list of replacement patterns; -1 otherwise. 988b35abe2Sjtc */ 998b35abe2Sjtc 1008b35abe2Sjtc int 10148250187Stls rep_add(char *str) 1028b35abe2Sjtc { 10348250187Stls char *pt1; 10448250187Stls char *pt2; 10548250187Stls REPLACE *rep; 10648250187Stls int res; 1078b35abe2Sjtc char rebuf[BUFSIZ]; 1088b35abe2Sjtc 1098b35abe2Sjtc /* 1108b35abe2Sjtc * throw out the bad parameters 1118b35abe2Sjtc */ 1128b35abe2Sjtc if ((str == NULL) || (*str == '\0')) { 113f3cd6022Schristos tty_warn(1, "Empty replacement string"); 114cdec4ac1Sdsl return -1; 1158b35abe2Sjtc } 1168b35abe2Sjtc 1178b35abe2Sjtc /* 1188b35abe2Sjtc * first character in the string specifies what the delimiter is for 1190c612021Schristos * this expression. 1208b35abe2Sjtc */ 1210c612021Schristos for (pt1 = str+1; *pt1; pt1++) { 1220c612021Schristos if (*pt1 == '\\') { 1230c612021Schristos pt1++; 1240c612021Schristos continue; 1250c612021Schristos } 1260c612021Schristos if (*pt1 == *str) 1270c612021Schristos break; 1280c612021Schristos } 1293b8851c8Sdsl if (*pt1 == 0) { 130f3cd6022Schristos tty_warn(1, "Invalid replacement string %s", str); 131cdec4ac1Sdsl return -1; 1328b35abe2Sjtc } 1338b35abe2Sjtc 1348b35abe2Sjtc /* 1358b35abe2Sjtc * allocate space for the node that handles this replacement pattern 1368b35abe2Sjtc * and split out the regular expression and try to compile it 1378b35abe2Sjtc */ 1388b35abe2Sjtc if ((rep = (REPLACE *)malloc(sizeof(REPLACE))) == NULL) { 139f3cd6022Schristos tty_warn(1, "Unable to allocate memory for replacement string"); 140cdec4ac1Sdsl return -1; 1418b35abe2Sjtc } 1428b35abe2Sjtc 1438b35abe2Sjtc *pt1 = '\0'; 1448b35abe2Sjtc if ((res = regcomp(&(rep->rcmp), str+1, 0)) != 0) { 1458b35abe2Sjtc regerror(res, &(rep->rcmp), rebuf, sizeof(rebuf)); 146f3cd6022Schristos tty_warn(1, "%s while compiling regular expression %s", rebuf, 147f3cd6022Schristos str); 1488b35abe2Sjtc (void)free((char *)rep); 149cdec4ac1Sdsl return -1; 1508b35abe2Sjtc } 1518b35abe2Sjtc 1528b35abe2Sjtc /* 1538b35abe2Sjtc * put the delimiter back in case we need an error message and 1548b35abe2Sjtc * locate the delimiter at the end of the replacement string 1558b35abe2Sjtc * we then point the node at the new substitution string 1568b35abe2Sjtc */ 1578b35abe2Sjtc *pt1++ = *str; 1580c612021Schristos for (pt2 = pt1; *pt2; pt2++) { 1590c612021Schristos if (*pt2 == '\\') { 1600c612021Schristos pt2++; 1610c612021Schristos continue; 1620c612021Schristos } 1630c612021Schristos if (*pt2 == *str) 1640c612021Schristos break; 1650c612021Schristos } 1663b8851c8Sdsl if (*pt2 == 0) { 1670c612021Schristos regfree(&(rep->rcmp)); 1680c612021Schristos (void)free((char *)rep); 1690c612021Schristos tty_warn(1, "Invalid replacement string %s", str); 170cdec4ac1Sdsl return -1; 1710c612021Schristos } 1720c612021Schristos 1730c612021Schristos *pt2 = '\0'; 1747d4cb139Srafal 1753ac7ce18Swiz /* Make sure to dup replacement, who knows where it came from! */ 1767d4cb139Srafal if ((rep->nstr = strdup(pt1)) == NULL) { 1777d4cb139Srafal regfree(&(rep->rcmp)); 1787d4cb139Srafal (void)free((char *)rep); 1797d4cb139Srafal tty_warn(1, "Unable to allocate memory for replacement string"); 180cdec4ac1Sdsl return -1; 1817d4cb139Srafal } 1827d4cb139Srafal 1838b35abe2Sjtc pt1 = pt2++; 1848b35abe2Sjtc rep->flgs = 0; 1858b35abe2Sjtc 1868b35abe2Sjtc /* 1878b35abe2Sjtc * set the options if any 1888b35abe2Sjtc */ 1898b35abe2Sjtc while (*pt2 != '\0') { 1908b35abe2Sjtc switch(*pt2) { 1918b35abe2Sjtc case 'g': 1928b35abe2Sjtc case 'G': 1938b35abe2Sjtc rep->flgs |= GLOB; 1948b35abe2Sjtc break; 1958b35abe2Sjtc case 'p': 1968b35abe2Sjtc case 'P': 1978b35abe2Sjtc rep->flgs |= PRNT; 1988b35abe2Sjtc break; 199206f4182Schristos case 's': 200206f4182Schristos case 'S': 201206f4182Schristos rep->flgs |= SYML; 202206f4182Schristos break; 2038b35abe2Sjtc default: 2048b35abe2Sjtc regfree(&(rep->rcmp)); 2058b35abe2Sjtc (void)free((char *)rep); 2068b35abe2Sjtc *pt1 = *str; 207f3cd6022Schristos tty_warn(1, "Invalid replacement string option %s", 208f3cd6022Schristos str); 209cdec4ac1Sdsl return -1; 2108b35abe2Sjtc } 2118b35abe2Sjtc ++pt2; 2128b35abe2Sjtc } 2138b35abe2Sjtc 2148b35abe2Sjtc /* 2158b35abe2Sjtc * all done, link it in at the end 2168b35abe2Sjtc */ 2178b35abe2Sjtc rep->fow = NULL; 2188b35abe2Sjtc if (rephead == NULL) { 2198b35abe2Sjtc reptail = rephead = rep; 220cdec4ac1Sdsl return 0; 2218b35abe2Sjtc } 2228b35abe2Sjtc reptail->fow = rep; 2238b35abe2Sjtc reptail = rep; 224cdec4ac1Sdsl return 0; 2258b35abe2Sjtc } 2268b35abe2Sjtc 2278b35abe2Sjtc /* 2288b35abe2Sjtc * pat_add() 2298b35abe2Sjtc * add a pattern match to the pattern match list. Pattern matches are used 2308b35abe2Sjtc * to select which archive members are extracted. (They appear as 2318b35abe2Sjtc * arguments to pax in the list and read modes). If no patterns are 2328b35abe2Sjtc * supplied to pax, all members in the archive will be selected (and the 2338b35abe2Sjtc * pattern match list is empty). 234e413a4ffSis * 2358b35abe2Sjtc * Return: 2368b35abe2Sjtc * 0 if the pattern was added to the list, -1 otherwise 2378b35abe2Sjtc */ 2388b35abe2Sjtc 2398b35abe2Sjtc int 240a992ea79Sperry pat_add(char *str, char *chdn, int flags) 2418b35abe2Sjtc { 24248250187Stls PATTERN *pt; 2438b35abe2Sjtc 2448b35abe2Sjtc /* 2458b35abe2Sjtc * throw out the junk 2468b35abe2Sjtc */ 2478b35abe2Sjtc if ((str == NULL) || (*str == '\0')) { 248f3cd6022Schristos tty_warn(1, "Empty pattern string"); 249cdec4ac1Sdsl return -1; 2508b35abe2Sjtc } 2518b35abe2Sjtc 2528b35abe2Sjtc /* 2538b35abe2Sjtc * allocate space for the pattern and store the pattern. the pattern is 2548b35abe2Sjtc * part of argv so do not bother to copy it, just point at it. Add the 2558b35abe2Sjtc * node to the end of the pattern list 2568b35abe2Sjtc */ 2578b35abe2Sjtc if ((pt = (PATTERN *)malloc(sizeof(PATTERN))) == NULL) { 258f3cd6022Schristos tty_warn(1, "Unable to allocate memory for pattern string"); 259cdec4ac1Sdsl return -1; 2608b35abe2Sjtc } 2618b35abe2Sjtc 2628b35abe2Sjtc pt->pstr = str; 2638b35abe2Sjtc pt->pend = NULL; 2648b35abe2Sjtc pt->plen = strlen(str); 2658b35abe2Sjtc pt->fow = NULL; 266a992ea79Sperry pt->flgs = flags; 2670c612021Schristos pt->chdname = chdn; 2688b35abe2Sjtc if (pathead == NULL) { 2698b35abe2Sjtc pattail = pathead = pt; 270cdec4ac1Sdsl return 0; 2718b35abe2Sjtc } 2728b35abe2Sjtc pattail->fow = pt; 2738b35abe2Sjtc pattail = pt; 274cdec4ac1Sdsl return 0; 2758b35abe2Sjtc } 2768b35abe2Sjtc 2778b35abe2Sjtc /* 2788b35abe2Sjtc * pat_chk() 2798b35abe2Sjtc * complain if any the user supplied pattern did not result in a match to 2808b35abe2Sjtc * a selected archive member. 2818b35abe2Sjtc */ 2828b35abe2Sjtc 2838b35abe2Sjtc void 2848b35abe2Sjtc pat_chk(void) 2858b35abe2Sjtc { 28648250187Stls PATTERN *pt; 28748250187Stls int wban = 0; 2888b35abe2Sjtc 2898b35abe2Sjtc /* 2908b35abe2Sjtc * walk down the list checking the flags to make sure MTCH was set, 2918b35abe2Sjtc * if not complain 2928b35abe2Sjtc */ 2938b35abe2Sjtc for (pt = pathead; pt != NULL; pt = pt->fow) { 2940c612021Schristos if (pt->flgs & MTCH) 2958b35abe2Sjtc continue; 2968b35abe2Sjtc if (!wban) { 297f3cd6022Schristos tty_warn(1, "WARNING! These patterns were not matched:"); 2988b35abe2Sjtc ++wban; 2998b35abe2Sjtc } 3008b35abe2Sjtc (void)fprintf(stderr, "%s\n", pt->pstr); 3018b35abe2Sjtc } 3028b35abe2Sjtc } 3038b35abe2Sjtc 3048b35abe2Sjtc /* 3058b35abe2Sjtc * pat_sel() 3068b35abe2Sjtc * the archive member which matches a pattern was selected. Mark the 3078b35abe2Sjtc * pattern as having selected an archive member. arcn->pat points at the 3088b35abe2Sjtc * pattern that was matched. arcn->pat is set in pat_match() 3098b35abe2Sjtc * 3108b35abe2Sjtc * NOTE: When the -c option is used, we are called when there was no match 3118b35abe2Sjtc * by pat_match() (that means we did match before the inverted sense of 3128b35abe2Sjtc * the logic). Now this seems really strange at first, but with -c we 3138ce1f4ffSmsaitoh * need to keep track of those patterns that cause an archive member to 3148ce1f4ffSmsaitoh * NOT be selected (it found an archive member with a specified pattern) 3158b35abe2Sjtc * Return: 3168b35abe2Sjtc * 0 if the pattern pointed at by arcn->pat was tagged as creating a 3178b35abe2Sjtc * match, -1 otherwise. 3188b35abe2Sjtc */ 3198b35abe2Sjtc 3208b35abe2Sjtc int 32148250187Stls pat_sel(ARCHD *arcn) 3228b35abe2Sjtc { 32348250187Stls PATTERN *pt; 32448250187Stls PATTERN **ppt; 32548250187Stls int len; 3268b35abe2Sjtc 3278b35abe2Sjtc /* 3288b35abe2Sjtc * if no patterns just return 3298b35abe2Sjtc */ 3308b35abe2Sjtc if ((pathead == NULL) || ((pt = arcn->pat) == NULL)) 331cdec4ac1Sdsl return 0; 3328b35abe2Sjtc 3338b35abe2Sjtc /* 3348b35abe2Sjtc * when we are NOT limited to a single match per pattern mark the 3358b35abe2Sjtc * pattern and return 3368b35abe2Sjtc */ 3378b35abe2Sjtc if (!nflag) { 3388b35abe2Sjtc pt->flgs |= MTCH; 339cdec4ac1Sdsl return 0; 3408b35abe2Sjtc } 3418b35abe2Sjtc 3428b35abe2Sjtc /* 3438b35abe2Sjtc * we reach this point only when we allow a single selected match per 3448b35abe2Sjtc * pattern, if the pattern matches a directory and we do not have -d 3458b35abe2Sjtc * (dflag) we are done with this pattern. We may also be handed a file 3468b35abe2Sjtc * in the subtree of a directory. in that case when we are operating 3478b35abe2Sjtc * with -d, this pattern was already selected and we are done 3488b35abe2Sjtc */ 3498b35abe2Sjtc if (pt->flgs & DIR_MTCH) 350cdec4ac1Sdsl return 0; 3518b35abe2Sjtc 3528b35abe2Sjtc if (!dflag && ((pt->pend != NULL) || (arcn->type == PAX_DIR))) { 3538b35abe2Sjtc /* 3548b35abe2Sjtc * ok we matched a directory and we are allowing 3558b35abe2Sjtc * subtree matches but because of the -n only its children will 3568b35abe2Sjtc * match. This is tagged as a DIR_MTCH type. 3578b35abe2Sjtc * WATCH IT, the code assumes that pt->pend points 3588b35abe2Sjtc * into arcn->name and arcn->name has not been modified. 3598b35abe2Sjtc * If not we will have a big mess. Yup this is another kludge 3608b35abe2Sjtc */ 3618b35abe2Sjtc 3628b35abe2Sjtc /* 3638b35abe2Sjtc * if this was a prefix match, remove trailing part of path 3648b35abe2Sjtc * so we can copy it. Future matches will be exact prefix match 3658b35abe2Sjtc */ 3668b35abe2Sjtc if (pt->pend != NULL) 3678b35abe2Sjtc *pt->pend = '\0'; 3688b35abe2Sjtc 3698b35abe2Sjtc if ((pt->pstr = strdup(arcn->name)) == NULL) { 370f3cd6022Schristos tty_warn(1, "Pattern select out of memory"); 3718b35abe2Sjtc if (pt->pend != NULL) 3728b35abe2Sjtc *pt->pend = '/'; 3738b35abe2Sjtc pt->pend = NULL; 374cdec4ac1Sdsl return -1; 3758b35abe2Sjtc } 3768b35abe2Sjtc 3778b35abe2Sjtc /* 3788b35abe2Sjtc * put the trailing / back in the source string 3798b35abe2Sjtc */ 3808b35abe2Sjtc if (pt->pend != NULL) { 3818b35abe2Sjtc *pt->pend = '/'; 3828b35abe2Sjtc pt->pend = NULL; 3838b35abe2Sjtc } 3848b35abe2Sjtc pt->plen = strlen(pt->pstr); 3858b35abe2Sjtc 3868b35abe2Sjtc /* 3878b35abe2Sjtc * strip off any trailing /, this should really never happen 3888b35abe2Sjtc */ 3898b35abe2Sjtc len = pt->plen - 1; 3908b35abe2Sjtc if (*(pt->pstr + len) == '/') { 3918b35abe2Sjtc *(pt->pstr + len) = '\0'; 3928b35abe2Sjtc pt->plen = len; 3938b35abe2Sjtc } 3948b35abe2Sjtc pt->flgs = DIR_MTCH | MTCH; 3958b35abe2Sjtc arcn->pat = pt; 396cdec4ac1Sdsl return 0; 3978b35abe2Sjtc } 3988b35abe2Sjtc 3998b35abe2Sjtc /* 4008b35abe2Sjtc * we are then done with this pattern, so we delete it from the list 4018b35abe2Sjtc * because it can never be used for another match. 4028b35abe2Sjtc * Seems kind of strange to do for a -c, but the pax spec is really 4033ac7ce18Swiz * vague on the interaction of -c, -n, and -d. We assume that when -c 4048b35abe2Sjtc * and the pattern rejects a member (i.e. it matched it) it is done. 4058b35abe2Sjtc * In effect we place the order of the flags as having -c last. 4068b35abe2Sjtc */ 4078b35abe2Sjtc pt = pathead; 4088b35abe2Sjtc ppt = &pathead; 4098b35abe2Sjtc while ((pt != NULL) && (pt != arcn->pat)) { 4108b35abe2Sjtc ppt = &(pt->fow); 4118b35abe2Sjtc pt = pt->fow; 4128b35abe2Sjtc } 4138b35abe2Sjtc 4148b35abe2Sjtc if (pt == NULL) { 4158b35abe2Sjtc /* 4168b35abe2Sjtc * should never happen.... 4178b35abe2Sjtc */ 4188ce1f4ffSmsaitoh tty_warn(1, "Pattern list inconsistent"); 419cdec4ac1Sdsl return -1; 4208b35abe2Sjtc } 4218b35abe2Sjtc *ppt = pt->fow; 4228b35abe2Sjtc (void)free((char *)pt); 4238b35abe2Sjtc arcn->pat = NULL; 424cdec4ac1Sdsl return 0; 4258b35abe2Sjtc } 4268b35abe2Sjtc 4278b35abe2Sjtc /* 4288b35abe2Sjtc * pat_match() 4298b35abe2Sjtc * see if this archive member matches any supplied pattern, if a match 4308b35abe2Sjtc * is found, arcn->pat is set to point at the potential pattern. Later if 4318b35abe2Sjtc * this archive member is "selected" we process and mark the pattern as 4328b35abe2Sjtc * one which matched a selected archive member (see pat_sel()) 4338b35abe2Sjtc * Return: 4348b35abe2Sjtc * 0 if this archive member should be processed, 1 if it should be 4358b35abe2Sjtc * skipped and -1 if we are done with all patterns (and pax should quit 4368b35abe2Sjtc * looking for more members) 4378b35abe2Sjtc */ 4388b35abe2Sjtc 4398b35abe2Sjtc int 44048250187Stls pat_match(ARCHD *arcn) 4418b35abe2Sjtc { 44248250187Stls PATTERN *pt; 4438b35abe2Sjtc 4448b35abe2Sjtc arcn->pat = NULL; 4458b35abe2Sjtc 4468b35abe2Sjtc /* 4478b35abe2Sjtc * if there are no more patterns and we have -n (and not -c) we are 4488b35abe2Sjtc * done. otherwise with no patterns to match, matches all 4498b35abe2Sjtc */ 4508b35abe2Sjtc if (pathead == NULL) { 4518b35abe2Sjtc if (nflag && !cflag) 452cdec4ac1Sdsl return -1; 453cdec4ac1Sdsl return 0; 4548b35abe2Sjtc } 4558b35abe2Sjtc 4568b35abe2Sjtc /* 4578b35abe2Sjtc * have to search down the list one at a time looking for a match. 4588b35abe2Sjtc */ 4598b35abe2Sjtc pt = pathead; 4608b35abe2Sjtc while (pt != NULL) { 4618b35abe2Sjtc /* 4628b35abe2Sjtc * check for a file name match unless we have DIR_MTCH set in 4638b35abe2Sjtc * this pattern then we want a prefix match 4648b35abe2Sjtc */ 4658b35abe2Sjtc if (pt->flgs & DIR_MTCH) { 4668b35abe2Sjtc /* 4678b35abe2Sjtc * this pattern was matched before to a directory 4688b35abe2Sjtc * as we must have -n set for this (but not -d). We can 4698b35abe2Sjtc * only match CHILDREN of that directory so we must use 4708b35abe2Sjtc * an exact prefix match (no wildcards). 4718b35abe2Sjtc */ 4728b35abe2Sjtc if ((arcn->name[pt->plen] == '/') && 4738b35abe2Sjtc (strncmp(pt->pstr, arcn->name, pt->plen) == 0)) 4748b35abe2Sjtc break; 475a992ea79Sperry } else if (fn_match(pt->pstr, arcn->name, &pt->pend, 476a992ea79Sperry pt->flgs & NOGLOB_MTCH) == 0) 4778b35abe2Sjtc break; 4788b35abe2Sjtc pt = pt->fow; 4798b35abe2Sjtc } 4808b35abe2Sjtc 4818b35abe2Sjtc /* 4828b35abe2Sjtc * return the result, remember that cflag (-c) inverts the sense of a 4838b35abe2Sjtc * match 4848b35abe2Sjtc */ 4858b35abe2Sjtc if (pt == NULL) 486cdec4ac1Sdsl return cflag ? 0 : 1; 4878b35abe2Sjtc 4888b35abe2Sjtc /* 4898b35abe2Sjtc * we had a match, now when we invert the sense (-c) we reject this 4908b35abe2Sjtc * member. However we have to tag the pattern a being successful, (in a 4918ce1f4ffSmsaitoh * match, not in selecting an archive member) so we call pat_sel() 4928ce1f4ffSmsaitoh * here. 4938b35abe2Sjtc */ 4948b35abe2Sjtc arcn->pat = pt; 4958b35abe2Sjtc if (!cflag) 496cdec4ac1Sdsl return 0; 4978b35abe2Sjtc 4988b35abe2Sjtc if (pat_sel(arcn) < 0) 499cdec4ac1Sdsl return -1; 5008b35abe2Sjtc arcn->pat = NULL; 501cdec4ac1Sdsl return 1; 5028b35abe2Sjtc } 5038b35abe2Sjtc 5048b35abe2Sjtc /* 5058b35abe2Sjtc * fn_match() 5068b35abe2Sjtc * Return: 5078b35abe2Sjtc * 0 if this archive member should be processed, 1 if it should be 5088b35abe2Sjtc * skipped and -1 if we are done with all patterns (and pax should quit 5098b35abe2Sjtc * looking for more members) 5108b35abe2Sjtc * Note: *pend may be changed to show where the prefix ends. 5118b35abe2Sjtc */ 5128b35abe2Sjtc 5138b35abe2Sjtc static int 514a992ea79Sperry fn_match(char *pattern, char *string, char **pend, int noglob) 5158b35abe2Sjtc { 51648250187Stls char c; 5178b35abe2Sjtc char test; 5188b35abe2Sjtc 5198b35abe2Sjtc *pend = NULL; 5208b35abe2Sjtc for (;;) { 5218b35abe2Sjtc switch (c = *pattern++) { 5228b35abe2Sjtc case '\0': 5238b35abe2Sjtc /* 5248b35abe2Sjtc * Ok we found an exact match 5258b35abe2Sjtc */ 5268b35abe2Sjtc if (*string == '\0') 527cdec4ac1Sdsl return 0; 5288b35abe2Sjtc 5298b35abe2Sjtc /* 5308b35abe2Sjtc * Check if it is a prefix match 5318b35abe2Sjtc */ 5328b35abe2Sjtc if ((dflag == 1) || (*string != '/')) 533cdec4ac1Sdsl return -1; 5348b35abe2Sjtc 5358b35abe2Sjtc /* 5368b35abe2Sjtc * It is a prefix match, remember where the trailing 5378b35abe2Sjtc * / is located 5388b35abe2Sjtc */ 5398b35abe2Sjtc *pend = string; 540cdec4ac1Sdsl return 0; 5418b35abe2Sjtc case '?': 542a992ea79Sperry if (noglob) 543a992ea79Sperry goto regular; 5448b35abe2Sjtc if ((test = *string++) == '\0') 5458b35abe2Sjtc return (-1); 5468b35abe2Sjtc break; 5478b35abe2Sjtc case '*': 548a992ea79Sperry if (noglob) 549a992ea79Sperry goto regular; 5508b35abe2Sjtc c = *pattern; 5518b35abe2Sjtc /* 5528b35abe2Sjtc * Collapse multiple *'s. 5538b35abe2Sjtc */ 5548b35abe2Sjtc while (c == '*') 5558b35abe2Sjtc c = *++pattern; 5568b35abe2Sjtc 5578b35abe2Sjtc /* 5588b35abe2Sjtc * Optimized hack for pattern with a * at the end 5598b35abe2Sjtc */ 5608b35abe2Sjtc if (c == '\0') 5618b35abe2Sjtc return (0); 5628b35abe2Sjtc 5638b35abe2Sjtc /* 5648b35abe2Sjtc * General case, use recursion. 5658b35abe2Sjtc */ 5668b35abe2Sjtc while ((test = *string) != '\0') { 567a992ea79Sperry if (!fn_match(pattern, string, pend, noglob)) 5688b35abe2Sjtc return (0); 5698b35abe2Sjtc ++string; 5708b35abe2Sjtc } 5718b35abe2Sjtc return (-1); 5728b35abe2Sjtc case '[': 573a992ea79Sperry if (noglob) 574a992ea79Sperry goto regular; 5758b35abe2Sjtc /* 5768b35abe2Sjtc * range match 5778b35abe2Sjtc */ 5788b35abe2Sjtc if (((test = *string++) == '\0') || 5798b35abe2Sjtc ((pattern = range_match(pattern, test)) == NULL)) 5808b35abe2Sjtc return (-1); 5818b35abe2Sjtc break; 5828b35abe2Sjtc case '\\': 5838b35abe2Sjtc default: 584a992ea79Sperry regular: 5858b35abe2Sjtc if (c != *string++) 5868b35abe2Sjtc return (-1); 5878b35abe2Sjtc break; 5888b35abe2Sjtc } 5898b35abe2Sjtc } 5908b35abe2Sjtc /* NOTREACHED */ 5918b35abe2Sjtc } 5928b35abe2Sjtc 5938b35abe2Sjtc static char * 59448250187Stls range_match(char *pattern, int test) 5958b35abe2Sjtc { 59648250187Stls char c; 59748250187Stls char c2; 5988b35abe2Sjtc int negate; 5998b35abe2Sjtc int ok = 0; 6008b35abe2Sjtc 601f3cd6022Schristos if ((negate = (*pattern == '!')) != 0) 6028b35abe2Sjtc ++pattern; 6038b35abe2Sjtc 6048b35abe2Sjtc while ((c = *pattern++) != ']') { 6058b35abe2Sjtc /* 6068b35abe2Sjtc * Illegal pattern 6078b35abe2Sjtc */ 6088b35abe2Sjtc if (c == '\0') 6098b35abe2Sjtc return (NULL); 6108b35abe2Sjtc 6118b35abe2Sjtc if ((*pattern == '-') && ((c2 = pattern[1]) != '\0') && 6128b35abe2Sjtc (c2 != ']')) { 6138b35abe2Sjtc if ((c <= test) && (test <= c2)) 6148b35abe2Sjtc ok = 1; 6158b35abe2Sjtc pattern += 2; 6168b35abe2Sjtc } else if (c == test) 6178b35abe2Sjtc ok = 1; 6188b35abe2Sjtc } 6198b35abe2Sjtc return (ok == negate ? NULL : pattern); 6208b35abe2Sjtc } 6218b35abe2Sjtc 6228b35abe2Sjtc /* 6238b35abe2Sjtc * mod_name() 6248b35abe2Sjtc * modify a selected file name. first attempt to apply replacement string 6258b35abe2Sjtc * expressions, then apply interactive file rename. We apply replacement 6268b35abe2Sjtc * string expressions to both filenames and file links (if we didn't the 6278b35abe2Sjtc * links would point to the wrong place, and we could never be able to 6288b35abe2Sjtc * move an archive that has a file link in it). When we rename files 6298b35abe2Sjtc * interactively, we store that mapping (old name to user input name) so 6308b35abe2Sjtc * if we spot any file links to the old file name in the future, we will 6318b35abe2Sjtc * know exactly how to fix the file link. 6328b35abe2Sjtc * Return: 6338b35abe2Sjtc * 0 continue to process file, 1 skip this file, -1 pax is finished 6348b35abe2Sjtc */ 6358b35abe2Sjtc 6368b35abe2Sjtc int 637206f4182Schristos mod_name(ARCHD *arcn, int flags) 6388b35abe2Sjtc { 63948250187Stls int res = 0; 6408b35abe2Sjtc 641ca541391Schristos if (secure) { 642ca541391Schristos if (checkdotdot(arcn->name)) { 643ca541391Schristos tty_warn(0, "Ignoring file containing `..' (%s)", 644ca541391Schristos arcn->name); 645ca541391Schristos return 1; 646ca541391Schristos } 64718c0c995Schristos #ifdef notdef 648ca541391Schristos if (checkdotdot(arcn->ln_name)) { 649ca541391Schristos tty_warn(0, "Ignoring link containing `..' (%s)", 650ca541391Schristos arcn->ln_name); 651ca541391Schristos return 1; 652ca541391Schristos } 65318c0c995Schristos #endif 654ca541391Schristos } 655ca541391Schristos 6560c612021Schristos /* 6578b35abe2Sjtc * IMPORTANT: We have a problem. what do we do with symlinks? 6588b35abe2Sjtc * Modifying a hard link name makes sense, as we know the file it 6598b35abe2Sjtc * points at should have been seen already in the archive (and if it 6608b35abe2Sjtc * wasn't seen because of a read error or a bad archive, we lose 6618b35abe2Sjtc * anyway). But there are no such requirements for symlinks. On one 6628b35abe2Sjtc * hand the symlink that refers to a file in the archive will have to 6638b35abe2Sjtc * be modified to so it will still work at its new location in the 6648b35abe2Sjtc * file system. On the other hand a symlink that points elsewhere (and 6658b35abe2Sjtc * should continue to do so) should not be modified. There is clearly 6668b35abe2Sjtc * no perfect solution here. So we handle them like hardlinks. Clearly 6678b35abe2Sjtc * a replacement made by the interactive rename mapping is very likely 6688b35abe2Sjtc * to be correct since it applies to a single file and is an exact 6698b35abe2Sjtc * match. The regular expression replacements are a little harder to 6708b35abe2Sjtc * justify though. We claim that the symlink name is only likely 6718b35abe2Sjtc * to be replaced when it points within the file tree being moved and 6728b35abe2Sjtc * in that case it should be modified. what we really need to do is to 6738b35abe2Sjtc * call an oracle here. :) 6748b35abe2Sjtc */ 6758b35abe2Sjtc if (rephead != NULL) { 676206f4182Schristos flags |= (flags & RENM) ? PRNT : 0; 6778b35abe2Sjtc /* 6788b35abe2Sjtc * we have replacement strings, modify the name and the link 6798b35abe2Sjtc * name if any. 6808b35abe2Sjtc */ 6810c612021Schristos if ((res = rep_name(arcn->name, sizeof(arcn->name), 682206f4182Schristos &(arcn->nlen), flags)) != 0) 683cdec4ac1Sdsl return res; 6848b35abe2Sjtc 6858b35abe2Sjtc if (((arcn->type == PAX_SLK) || (arcn->type == PAX_HLK) || 6868b35abe2Sjtc (arcn->type == PAX_HRG)) && 687206f4182Schristos ((res = rep_name(arcn->ln_name, 688206f4182Schristos sizeof(arcn->ln_name), &(arcn->ln_nlen), 689206f4182Schristos flags | (arcn->type == PAX_SLK ? SYML : 0))) != 0)) 690cdec4ac1Sdsl return res; 6918b35abe2Sjtc } 6928b35abe2Sjtc 6938b35abe2Sjtc if (iflag) { 6948b35abe2Sjtc /* 6958b35abe2Sjtc * perform interactive file rename, then map the link if any 6968b35abe2Sjtc */ 6978b35abe2Sjtc if ((res = tty_rename(arcn)) != 0) 698cdec4ac1Sdsl return res; 6998b35abe2Sjtc if ((arcn->type == PAX_SLK) || (arcn->type == PAX_HLK) || 7008b35abe2Sjtc (arcn->type == PAX_HRG)) 7010c612021Schristos sub_name(arcn->ln_name, &(arcn->ln_nlen), sizeof(arcn->ln_name)); 7028b35abe2Sjtc } 70394eaa317Sjmc 70494eaa317Sjmc /* 70594eaa317Sjmc * Strip off leading '/' if appropriate. 70694eaa317Sjmc * Currently, this option is only set for the tar format. 70794eaa317Sjmc */ 70894eaa317Sjmc if (rmleadslash && arcn->name[0] == '/') { 70994eaa317Sjmc if (arcn->name[1] == '\0') { 71094eaa317Sjmc arcn->name[0] = '.'; 71194eaa317Sjmc } else { 71294eaa317Sjmc (void)memmove(arcn->name, &arcn->name[1], 71394eaa317Sjmc strlen(arcn->name)); 71494eaa317Sjmc arcn->nlen--; 71594eaa317Sjmc } 71694eaa317Sjmc if (rmleadslash < 2) { 71794eaa317Sjmc rmleadslash = 2; 71894eaa317Sjmc tty_warn(0, "Removing leading / from absolute path names in the archive"); 71994eaa317Sjmc } 72094eaa317Sjmc } 72194eaa317Sjmc if (rmleadslash && arcn->ln_name[0] == '/' && 72294eaa317Sjmc (arcn->type == PAX_HLK || arcn->type == PAX_HRG)) { 72394eaa317Sjmc if (arcn->ln_name[1] == '\0') { 72494eaa317Sjmc arcn->ln_name[0] = '.'; 72594eaa317Sjmc } else { 72694eaa317Sjmc (void)memmove(arcn->ln_name, &arcn->ln_name[1], 72794eaa317Sjmc strlen(arcn->ln_name)); 72894eaa317Sjmc arcn->ln_nlen--; 72994eaa317Sjmc } 73094eaa317Sjmc if (rmleadslash < 2) { 73194eaa317Sjmc rmleadslash = 2; 73294eaa317Sjmc tty_warn(0, "Removing leading / from absolute path names in the archive"); 73394eaa317Sjmc } 73494eaa317Sjmc } 73594eaa317Sjmc 736cdec4ac1Sdsl return res; 7378b35abe2Sjtc } 7388b35abe2Sjtc 7398b35abe2Sjtc /* 7408b35abe2Sjtc * tty_rename() 7418b35abe2Sjtc * Prompt the user for a replacement file name. A "." keeps the old name, 7428b35abe2Sjtc * a empty line skips the file, and an EOF on reading the tty, will cause 7438b35abe2Sjtc * pax to stop processing and exit. Otherwise the file name input, replaces 7448b35abe2Sjtc * the old one. 7458b35abe2Sjtc * Return: 7468b35abe2Sjtc * 0 process this file, 1 skip this file, -1 we need to exit pax 7478b35abe2Sjtc */ 7488b35abe2Sjtc 7498b35abe2Sjtc static int 75048250187Stls tty_rename(ARCHD *arcn) 7518b35abe2Sjtc { 7528b35abe2Sjtc char tmpname[PAXPATHLEN+2]; 7538b35abe2Sjtc int res; 7548b35abe2Sjtc 7558b35abe2Sjtc /* 7568b35abe2Sjtc * prompt user for the replacement name for a file, keep trying until 7578b35abe2Sjtc * we get some reasonable input. Archives may have more than one file 7588b35abe2Sjtc * on them with the same name (from updates etc). We print verbose info 7598b35abe2Sjtc * on the file so the user knows what is up. 7608b35abe2Sjtc */ 7618b35abe2Sjtc tty_prnt("\nATTENTION: %s interactive file rename operation.\n", argv0); 7628b35abe2Sjtc 7638b35abe2Sjtc for (;;) { 7648b35abe2Sjtc ls_tty(arcn); 7658b35abe2Sjtc tty_prnt("Input new name, or a \".\" to keep the old name, "); 7668b35abe2Sjtc tty_prnt("or a \"return\" to skip this file.\n"); 7678b35abe2Sjtc tty_prnt("Input > "); 7688b35abe2Sjtc if (tty_read(tmpname, sizeof(tmpname)) < 0) 769cdec4ac1Sdsl return -1; 7708b35abe2Sjtc if (strcmp(tmpname, "..") == 0) { 7718b35abe2Sjtc tty_prnt("Try again, illegal file name: ..\n"); 7728b35abe2Sjtc continue; 7738b35abe2Sjtc } 7748b35abe2Sjtc if (strlen(tmpname) > PAXPATHLEN) { 7758b35abe2Sjtc tty_prnt("Try again, file name too long\n"); 7768b35abe2Sjtc continue; 7778b35abe2Sjtc } 7788b35abe2Sjtc break; 7798b35abe2Sjtc } 7808b35abe2Sjtc 7818b35abe2Sjtc /* 7828b35abe2Sjtc * empty file name, skips this file. a "." leaves it alone 7838b35abe2Sjtc */ 7848b35abe2Sjtc if (tmpname[0] == '\0') { 7858b35abe2Sjtc tty_prnt("Skipping file.\n"); 786cdec4ac1Sdsl return 1; 7878b35abe2Sjtc } 7888b35abe2Sjtc if ((tmpname[0] == '.') && (tmpname[1] == '\0')) { 7898b35abe2Sjtc tty_prnt("Processing continues, name unchanged.\n"); 790cdec4ac1Sdsl return 0; 7918b35abe2Sjtc } 7928b35abe2Sjtc 7938b35abe2Sjtc /* 7948b35abe2Sjtc * ok the name changed. We may run into links that point at this 7958b35abe2Sjtc * file later. we have to remember where the user sent the file 7968b35abe2Sjtc * in order to repair any links. 7978b35abe2Sjtc */ 7988b35abe2Sjtc tty_prnt("Processing continues, name changed to: %s\n", tmpname); 7998b35abe2Sjtc res = add_name(arcn->name, arcn->nlen, tmpname); 8000c612021Schristos arcn->nlen = strlcpy(arcn->name, tmpname, sizeof(arcn->name)); 8018b35abe2Sjtc if (res < 0) 802cdec4ac1Sdsl return -1; 803cdec4ac1Sdsl return 0; 8048b35abe2Sjtc } 8058b35abe2Sjtc 8068b35abe2Sjtc /* 8078b35abe2Sjtc * set_dest() 8088b35abe2Sjtc * fix up the file name and the link name (if any) so this file will land 8098b35abe2Sjtc * in the destination directory (used during copy() -rw). 8108b35abe2Sjtc * Return: 8118b35abe2Sjtc * 0 if ok, -1 if failure (name too long) 8128b35abe2Sjtc */ 8138b35abe2Sjtc 8148b35abe2Sjtc int 81548250187Stls set_dest(ARCHD *arcn, char *dest_dir, int dir_len) 8168b35abe2Sjtc { 8178b35abe2Sjtc if (fix_path(arcn->name, &(arcn->nlen), dest_dir, dir_len) < 0) 818cdec4ac1Sdsl return -1; 8198b35abe2Sjtc 8208b35abe2Sjtc /* 8218b35abe2Sjtc * It is really hard to deal with symlinks here, we cannot be sure 8228b35abe2Sjtc * if the name they point was moved (or will be moved). It is best to 8238b35abe2Sjtc * leave them alone. 8248b35abe2Sjtc */ 8258b35abe2Sjtc if ((arcn->type != PAX_HLK) && (arcn->type != PAX_HRG)) 826cdec4ac1Sdsl return 0; 8278b35abe2Sjtc 8288b35abe2Sjtc if (fix_path(arcn->ln_name, &(arcn->ln_nlen), dest_dir, dir_len) < 0) 829cdec4ac1Sdsl return -1; 830cdec4ac1Sdsl return 0; 8318b35abe2Sjtc } 8328b35abe2Sjtc 8338b35abe2Sjtc /* 8348b35abe2Sjtc * fix_path 8358b35abe2Sjtc * concatenate dir_name and or_name and store the result in or_name (if 8368b35abe2Sjtc * it fits). This is one ugly function. 8378b35abe2Sjtc * Return: 8388b35abe2Sjtc * 0 if ok, -1 if the final name is too long 8398b35abe2Sjtc */ 8408b35abe2Sjtc 8418b35abe2Sjtc static int 8428b35abe2Sjtc fix_path( char *or_name, int *or_len, char *dir_name, int dir_len) 8438b35abe2Sjtc { 84448250187Stls char *src; 84548250187Stls char *dest; 84648250187Stls char *start; 8478b35abe2Sjtc int len; 8488b35abe2Sjtc 8498b35abe2Sjtc /* 8508b35abe2Sjtc * we shift the or_name to the right enough to tack in the dir_name 8518b35abe2Sjtc * at the front. We make sure we have enough space for it all before 8528b35abe2Sjtc * we start. since dest always ends in a slash, we skip of or_name 8538b35abe2Sjtc * if it also starts with one. 8548b35abe2Sjtc */ 8558b35abe2Sjtc start = or_name; 8568b35abe2Sjtc src = start + *or_len; 8578b35abe2Sjtc dest = src + dir_len; 8588b35abe2Sjtc if (*start == '/') { 8598b35abe2Sjtc ++start; 8608b35abe2Sjtc --dest; 8618b35abe2Sjtc } 8628b35abe2Sjtc if ((len = dest - or_name) > PAXPATHLEN) { 863f3cd6022Schristos tty_warn(1, "File name %s/%s, too long", dir_name, start); 864cdec4ac1Sdsl return -1; 8658b35abe2Sjtc } 8668b35abe2Sjtc *or_len = len; 8678b35abe2Sjtc 8688b35abe2Sjtc /* 8698b35abe2Sjtc * enough space, shift 8708b35abe2Sjtc */ 8718b35abe2Sjtc while (src >= start) 8728b35abe2Sjtc *dest-- = *src--; 8738b35abe2Sjtc src = dir_name + dir_len - 1; 8748b35abe2Sjtc 8758b35abe2Sjtc /* 8768b35abe2Sjtc * splice in the destination directory name 8778b35abe2Sjtc */ 8788b35abe2Sjtc while (src >= dir_name) 8798b35abe2Sjtc *dest-- = *src--; 8808b35abe2Sjtc 8818b35abe2Sjtc *(or_name + len) = '\0'; 882cdec4ac1Sdsl return 0; 8838b35abe2Sjtc } 8848b35abe2Sjtc 8858b35abe2Sjtc /* 8868b35abe2Sjtc * rep_name() 8878b35abe2Sjtc * walk down the list of replacement strings applying each one in order. 8888b35abe2Sjtc * when we find one with a successful substitution, we modify the name 8898b35abe2Sjtc * as specified. if required, we print the results. if the resulting name 8908b35abe2Sjtc * is empty, we will skip this archive member. We use the regexp(3) 8918b35abe2Sjtc * routines (regexp() ought to win a prize as having the most cryptic 8928b35abe2Sjtc * library function manual page). 8938b35abe2Sjtc * --Parameters-- 8948b35abe2Sjtc * name is the file name we are going to apply the regular expressions to 8958b35abe2Sjtc * (and may be modified) 8960c612021Schristos * namelen the size of the name buffer. 8978b35abe2Sjtc * nlen is the length of this name (and is modified to hold the length of 8988b35abe2Sjtc * the final string). 89901205dd5Sgutteridge * flags contains various options to control behavior. 9008b35abe2Sjtc * Return: 9018b35abe2Sjtc * 0 if substitution was successful, 1 if we are to skip the file (the name 9028b35abe2Sjtc * ended up empty) 9038b35abe2Sjtc */ 9048b35abe2Sjtc 9058b35abe2Sjtc static int 906206f4182Schristos rep_name(char *name, size_t namelen, int *nlen, int flags) 9078b35abe2Sjtc { 90848250187Stls REPLACE *pt; 90948250187Stls char *inpt; 91048250187Stls char *outpt; 91148250187Stls char *endpt; 91248250187Stls char *rpt; 91348250187Stls int found = 0; 91448250187Stls int res; 9158b35abe2Sjtc regmatch_t pm[MAXSUBEXP]; 9168b35abe2Sjtc char nname[PAXPATHLEN+1]; /* final result of all replacements */ 9178b35abe2Sjtc char buf1[PAXPATHLEN+1]; /* where we work on the name */ 9188b35abe2Sjtc 9198b35abe2Sjtc /* 9208b35abe2Sjtc * copy the name into buf1, where we will work on it. We need to keep 9218b35abe2Sjtc * the orig string around so we can print out the result of the final 9228b35abe2Sjtc * replacement. We build up the final result in nname. inpt points at 9238b35abe2Sjtc * the string we apply the regular expression to. prnt is used to 9248b35abe2Sjtc * suppress printing when we handle replacements on the link field 9258b35abe2Sjtc * (the user already saw that substitution go by) 9268b35abe2Sjtc */ 9278b35abe2Sjtc pt = rephead; 928448c6863Schristos (void)strlcpy(buf1, name, sizeof(buf1)); 9298b35abe2Sjtc inpt = buf1; 9308b35abe2Sjtc outpt = nname; 9318b35abe2Sjtc endpt = outpt + PAXPATHLEN; 9328b35abe2Sjtc 9338b35abe2Sjtc /* 9348b35abe2Sjtc * try each replacement string in order 9358b35abe2Sjtc */ 9368b35abe2Sjtc while (pt != NULL) { 9378b35abe2Sjtc do { 938206f4182Schristos if ((flags & SYML) && (pt->flgs & SYML)) 939206f4182Schristos continue; 9408b35abe2Sjtc /* 9418b35abe2Sjtc * check for a successful substitution, if not go to 9428b35abe2Sjtc * the next pattern, or cleanup if we were global 9438b35abe2Sjtc */ 9448b35abe2Sjtc if (regexec(&(pt->rcmp), inpt, MAXSUBEXP, pm, 0) != 0) 9458b35abe2Sjtc break; 9468b35abe2Sjtc 9478b35abe2Sjtc /* 9488b35abe2Sjtc * ok we found one. We have three parts, the prefix 9498b35abe2Sjtc * which did not match, the section that did and the 9508b35abe2Sjtc * tail (that also did not match). Copy the prefix to 9518b35abe2Sjtc * the final output buffer (watching to make sure we 9528b35abe2Sjtc * do not create a string too long). 9538b35abe2Sjtc */ 9548b35abe2Sjtc found = 1; 9558b35abe2Sjtc rpt = inpt + pm[0].rm_so; 9568b35abe2Sjtc 9578b35abe2Sjtc while ((inpt < rpt) && (outpt < endpt)) 9588b35abe2Sjtc *outpt++ = *inpt++; 9598b35abe2Sjtc if (outpt == endpt) 9608b35abe2Sjtc break; 9618b35abe2Sjtc 9628b35abe2Sjtc /* 9638b35abe2Sjtc * for the second part (which matched the regular 9648b35abe2Sjtc * expression) apply the substitution using the 9658b35abe2Sjtc * replacement string and place it the prefix in the 9668b35abe2Sjtc * final output. If we have problems, skip it. 9678b35abe2Sjtc */ 968c1bd745cSlukem if ((res = 969c1bd745cSlukem resub(&(pt->rcmp),pm,pt->nstr,inpt, outpt,endpt) 970c1bd745cSlukem ) < 0) { 971206f4182Schristos if (flags & PRNT) 972f3cd6022Schristos tty_warn(1, "Replacement name error %s", 9738b35abe2Sjtc name); 974cdec4ac1Sdsl return 1; 9758b35abe2Sjtc } 9768b35abe2Sjtc outpt += res; 9778b35abe2Sjtc 9788b35abe2Sjtc /* 9798b35abe2Sjtc * we set up to look again starting at the first 9808b35abe2Sjtc * character in the tail (of the input string right 9818b35abe2Sjtc * after the last character matched by the regular 9828b35abe2Sjtc * expression (inpt always points at the first char in 9838b35abe2Sjtc * the string to process). If we are not doing a global 9848b35abe2Sjtc * substitution, we will use inpt to copy the tail to 9858b35abe2Sjtc * the final result. Make sure we do not overrun the 9868b35abe2Sjtc * output buffer 9878b35abe2Sjtc */ 988bd05c38cSmycroft inpt += pm[0].rm_eo - pm[0].rm_so; 9898b35abe2Sjtc 9908b35abe2Sjtc if ((outpt == endpt) || (*inpt == '\0')) 9918b35abe2Sjtc break; 9928b35abe2Sjtc 9938b35abe2Sjtc /* 9948b35abe2Sjtc * if the user wants global we keep trying to 9958b35abe2Sjtc * substitute until it fails, then we are done. 9968b35abe2Sjtc */ 9978b35abe2Sjtc } while (pt->flgs & GLOB); 9988b35abe2Sjtc 9998b35abe2Sjtc if (found) 10008b35abe2Sjtc break; 10018b35abe2Sjtc 10028b35abe2Sjtc /* 10038b35abe2Sjtc * a successful substitution did NOT occur, try the next one 10048b35abe2Sjtc */ 10058b35abe2Sjtc pt = pt->fow; 10068b35abe2Sjtc } 10078b35abe2Sjtc 10088b35abe2Sjtc if (found) { 10098b35abe2Sjtc /* 10108b35abe2Sjtc * we had a substitution, copy the last tail piece (if there is 10118b35abe2Sjtc * room) to the final result 10128b35abe2Sjtc */ 10138b35abe2Sjtc while ((outpt < endpt) && (*inpt != '\0')) 10148b35abe2Sjtc *outpt++ = *inpt++; 10158b35abe2Sjtc 10168b35abe2Sjtc *outpt = '\0'; 10178b35abe2Sjtc if ((outpt == endpt) && (*inpt != '\0')) { 1018206f4182Schristos if (flags & PRNT) 1019f3cd6022Schristos tty_warn(1,"Replacement name too long %s >> %s", 10208b35abe2Sjtc name, nname); 1021cdec4ac1Sdsl return 1; 10228b35abe2Sjtc } 10238b35abe2Sjtc 10248b35abe2Sjtc /* 10258b35abe2Sjtc * inform the user of the result if wanted 10268b35abe2Sjtc */ 1027206f4182Schristos if ((flags & PRNT) && (pt->flgs & PRNT)) { 10288b35abe2Sjtc if (*nname == '\0') 10298b35abe2Sjtc (void)fprintf(stderr,"%s >> <empty string>\n", 10308b35abe2Sjtc name); 10318b35abe2Sjtc else 10328b35abe2Sjtc (void)fprintf(stderr,"%s >> %s\n", name, nname); 10338b35abe2Sjtc } 10348b35abe2Sjtc 10358b35abe2Sjtc /* 10368b35abe2Sjtc * if empty inform the caller this file is to be skipped 10378b35abe2Sjtc * otherwise copy the new name over the orig name and return 10388b35abe2Sjtc */ 10398b35abe2Sjtc if (*nname == '\0') 1040cdec4ac1Sdsl return 1; 1041206f4182Schristos if (flags & RENM) 10420c612021Schristos *nlen = strlcpy(name, nname, namelen); 10438b35abe2Sjtc } 1044cdec4ac1Sdsl return 0; 10458b35abe2Sjtc } 10468b35abe2Sjtc 1047ca541391Schristos 1048ca541391Schristos /* 1049ca541391Schristos * checkdotdot() 1050ca541391Schristos * Return true if a component of the name contains a reference to ".." 1051ca541391Schristos */ 1052ca541391Schristos static int 1053ca541391Schristos checkdotdot(const char *name) 1054ca541391Schristos { 1055ca541391Schristos const char *p; 1056ca541391Schristos /* 1. "..{[/],}" */ 1057ca541391Schristos if (name[0] == '.' && name[1] == '.' && 1058ca541391Schristos (name[2] == '/' || name[2] == '\0')) 1059ca541391Schristos return 1; 1060ca541391Schristos 1061ca541391Schristos /* 2. "*[/]..[/]*" */ 1062ca541391Schristos if (strstr(name, "/../") != NULL) 1063ca541391Schristos return 1; 1064ca541391Schristos 1065ca541391Schristos /* 3. "*[/].." */ 1066ca541391Schristos for (p = name; *p; p++) 1067ca541391Schristos continue; 1068ca541391Schristos if (p - name < 3) 1069ca541391Schristos return 0; 1070ca541391Schristos if (p[-1] == '.' && p[-2] == '.' && p[-3] == '/') 1071ca541391Schristos return 1; 1072ca541391Schristos 1073ca541391Schristos return 0; 1074ca541391Schristos } 1075ca541391Schristos 10768b35abe2Sjtc 10778b35abe2Sjtc /* 10788b35abe2Sjtc * resub() 10798b35abe2Sjtc * apply the replacement to the matched expression. expand out the old 10808b35abe2Sjtc * style ed(1) subexpression expansion. 10818b35abe2Sjtc * Return: 10828b35abe2Sjtc * -1 if error, or the number of characters added to the destination. 10838b35abe2Sjtc */ 10848b35abe2Sjtc 10858b35abe2Sjtc static int 1086b65081cfSpk resub(regex_t *rp, regmatch_t *pm, char *src, char *txt, char *dest, 108748250187Stls char *destend) 10888b35abe2Sjtc { 108948250187Stls char *spt; 109048250187Stls char *dpt; 109148250187Stls char c; 109248250187Stls regmatch_t *pmpt; 109348250187Stls int len; 10948b35abe2Sjtc int subexcnt; 10958b35abe2Sjtc 10968b35abe2Sjtc spt = src; 10978b35abe2Sjtc dpt = dest; 10988b35abe2Sjtc subexcnt = rp->re_nsub; 10998b35abe2Sjtc while ((dpt < destend) && ((c = *spt++) != '\0')) { 11008b35abe2Sjtc /* 11018b35abe2Sjtc * see if we just have an ordinary replacement character 11028b35abe2Sjtc * or we refer to a subexpression. 11038b35abe2Sjtc */ 11048b35abe2Sjtc if (c == '&') { 11058b35abe2Sjtc pmpt = pm; 1106b65081cfSpk } else if ((c == '\\') && (*spt >= '1') && (*spt <= '9')) { 11078b35abe2Sjtc /* 11088b35abe2Sjtc * make sure there is a subexpression as specified 11098b35abe2Sjtc */ 11108b35abe2Sjtc if ((len = *spt++ - '0') > subexcnt) 1111cdec4ac1Sdsl return -1; 11128b35abe2Sjtc pmpt = pm + len; 11138b35abe2Sjtc } else { 11148b35abe2Sjtc /* 11158b35abe2Sjtc * Ordinary character, just copy it 11168b35abe2Sjtc */ 11178b35abe2Sjtc if ((c == '\\') && ((*spt == '\\') || (*spt == '&'))) 11188b35abe2Sjtc c = *spt++; 11198b35abe2Sjtc *dpt++ = c; 11208b35abe2Sjtc continue; 11218b35abe2Sjtc } 11228b35abe2Sjtc 11238b35abe2Sjtc /* 11248b35abe2Sjtc * continue if the subexpression is bogus 11258b35abe2Sjtc */ 11268b35abe2Sjtc if ((pmpt->rm_so < 0) || (pmpt->rm_eo < 0) || 11278b35abe2Sjtc ((len = pmpt->rm_eo - pmpt->rm_so) <= 0)) 11288b35abe2Sjtc continue; 11298b35abe2Sjtc 11308b35abe2Sjtc /* 11318b35abe2Sjtc * copy the subexpression to the destination. 11328b35abe2Sjtc * fail if we run out of space or the match string is damaged 11338b35abe2Sjtc */ 11348b35abe2Sjtc if (len > (destend - dpt)) 11350c612021Schristos return -1; 11360c612021Schristos strncpy(dpt, txt + pmpt->rm_so, len); 11378b35abe2Sjtc dpt += len; 11388b35abe2Sjtc } 1139cdec4ac1Sdsl return dpt - dest; 11408b35abe2Sjtc } 1141