xref: /netbsd-src/bin/rm/rm.c (revision e5548b402ae4c44fb816de42c7bba9581ce23ef5)
1 /* $NetBSD: rm.c,v 1.42 2004/01/11 09:41:55 wiz Exp $ */
2 
3 /*-
4  * Copyright (c) 1990, 1993, 1994, 2003
5  *	The Regents of the University of California.  All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  * 3. Neither the name of the University nor the names of its contributors
16  *    may be used to endorse or promote products derived from this software
17  *    without specific prior written permission.
18  *
19  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
20  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
23  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
25  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
26  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
28  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
29  * SUCH DAMAGE.
30  */
31 
32 #include <sys/cdefs.h>
33 #ifndef lint
34 __COPYRIGHT("@(#) Copyright (c) 1990, 1993, 1994\n\
35 	The Regents of the University of California.  All rights reserved.\n");
36 #endif /* not lint */
37 
38 #ifndef lint
39 #if 0
40 static char sccsid[] = "@(#)rm.c	8.8 (Berkeley) 4/27/95";
41 #else
42 __RCSID("$NetBSD: rm.c,v 1.42 2004/01/11 09:41:55 wiz Exp $");
43 #endif
44 #endif /* not lint */
45 
46 #include <sys/param.h>
47 #include <sys/stat.h>
48 #include <sys/types.h>
49 
50 #include <err.h>
51 #include <errno.h>
52 #include <fcntl.h>
53 #include <fts.h>
54 #include <grp.h>
55 #include <locale.h>
56 #include <pwd.h>
57 #include <stdio.h>
58 #include <stdlib.h>
59 #include <string.h>
60 #include <unistd.h>
61 
62 int dflag, eval, fflag, iflag, Pflag, stdin_ok, vflag, Wflag;
63 
64 int	check(char *, char *, struct stat *);
65 void	checkdot(char **);
66 void	rm_file(char **);
67 void	rm_overwrite(char *, struct stat *);
68 void	rm_tree(char **);
69 void	usage(void);
70 int	main(int, char *[]);
71 
72 /*
73  * For the sake of the `-f' flag, check whether an error number indicates the
74  * failure of an operation due to an non-existent file, either per se (ENOENT)
75  * or because its filename argument was illegal (ENAMETOOLONG, ENOTDIR).
76  */
77 #define NONEXISTENT(x) \
78     ((x) == ENOENT || (x) == ENAMETOOLONG || (x) == ENOTDIR)
79 
80 /*
81  * rm --
82  *	This rm is different from historic rm's, but is expected to match
83  *	POSIX 1003.2 behavior.  The most visible difference is that -f
84  *	has two specific effects now, ignore non-existent files and force
85  * 	file removal.
86  */
87 int
88 main(int argc, char *argv[])
89 {
90 	int ch, rflag;
91 
92 	setprogname(argv[0]);
93 	(void)setlocale(LC_ALL, "");
94 
95 	Pflag = rflag = 0;
96 	while ((ch = getopt(argc, argv, "dfiPRrvW")) != -1)
97 		switch (ch) {
98 		case 'd':
99 			dflag = 1;
100 			break;
101 		case 'f':
102 			fflag = 1;
103 			iflag = 0;
104 			break;
105 		case 'i':
106 			fflag = 0;
107 			iflag = 1;
108 			break;
109 		case 'P':
110 			Pflag = 1;
111 			break;
112 		case 'R':
113 		case 'r':			/* Compatibility. */
114 			rflag = 1;
115 			break;
116 		case 'v':
117 			vflag = 1;
118 			break;
119 		case 'W':
120 			Wflag = 1;
121 			break;
122 		case '?':
123 		default:
124 			usage();
125 		}
126 	argc -= optind;
127 	argv += optind;
128 
129 	if (argc < 1)
130 		usage();
131 
132 	checkdot(argv);
133 
134 	if (*argv) {
135 		stdin_ok = isatty(STDIN_FILENO);
136 
137 		if (rflag)
138 			rm_tree(argv);
139 		else
140 			rm_file(argv);
141 	}
142 
143 	exit(eval);
144 	/* NOTREACHED */
145 }
146 
147 void
148 rm_tree(char **argv)
149 {
150 	FTS *fts;
151 	FTSENT *p;
152 	int flags, needstat, rval;
153 
154 	/*
155 	 * Remove a file hierarchy.  If forcing removal (-f), or interactive
156 	 * (-i) or can't ask anyway (stdin_ok), don't stat the file.
157 	 */
158 	needstat = !fflag && !iflag && stdin_ok;
159 
160 	/*
161 	 * If the -i option is specified, the user can skip on the pre-order
162 	 * visit.  The fts_number field flags skipped directories.
163 	 */
164 #define	SKIPPED	1
165 
166 	flags = FTS_PHYSICAL;
167 	if (!needstat)
168 		flags |= FTS_NOSTAT;
169 	if (Wflag)
170 		flags |= FTS_WHITEOUT;
171 	if (!(fts = fts_open(argv, flags,
172 	    (int (*)(const FTSENT **, const FTSENT **))NULL)))
173 		err(1, NULL);
174 	while ((p = fts_read(fts)) != NULL) {
175 
176 		switch (p->fts_info) {
177 		case FTS_DNR:
178 			if (!fflag || p->fts_errno != ENOENT) {
179 				warnx("%s: %s", p->fts_path,
180 						strerror(p->fts_errno));
181 				eval = 1;
182 			}
183 			continue;
184 		case FTS_ERR:
185 			errx(EXIT_FAILURE, "%s: %s", p->fts_path,
186 					strerror(p->fts_errno));
187 			/* NOTREACHED */
188 		case FTS_NS:
189 			/*
190 			 * FTS_NS: assume that if can't stat the file, it
191 			 * can't be unlinked.
192 			 */
193 			if (fflag && NONEXISTENT(p->fts_errno))
194 				continue;
195 			if (needstat) {
196 				warnx("%s: %s", p->fts_path,
197 						strerror(p->fts_errno));
198 				eval = 1;
199 				continue;
200 			}
201 			break;
202 		case FTS_D:
203 			/* Pre-order: give user chance to skip. */
204 			if (!fflag && !check(p->fts_path, p->fts_accpath,
205 			    p->fts_statp)) {
206 				(void)fts_set(fts, p, FTS_SKIP);
207 				p->fts_number = SKIPPED;
208 			}
209 			continue;
210 		case FTS_DP:
211 			/* Post-order: see if user skipped. */
212 			if (p->fts_number == SKIPPED)
213 				continue;
214 			break;
215 		default:
216 			if (!fflag &&
217 			    !check(p->fts_path, p->fts_accpath, p->fts_statp))
218 				continue;
219 		}
220 
221 		rval = 0;
222 		/*
223 		 * If we can't read or search the directory, may still be
224 		 * able to remove it.  Don't print out the un{read,search}able
225 		 * message unless the remove fails.
226 		 */
227 		switch (p->fts_info) {
228 		case FTS_DP:
229 		case FTS_DNR:
230 			rval = rmdir(p->fts_accpath);
231 			if (rval != 0 && fflag && errno == ENOENT)
232 				continue;
233 			break;
234 
235 		case FTS_W:
236 			rval = undelete(p->fts_accpath);
237 			if (rval != 0 && fflag && errno == ENOENT)
238 				continue;
239 			break;
240 
241 		default:
242 			if (Pflag)
243 				rm_overwrite(p->fts_accpath, NULL);
244 			rval = unlink(p->fts_accpath);
245 			if (rval != 0 && fflag && NONEXISTENT(errno))
246 				continue;
247 			break;
248 		}
249 		if (rval != 0) {
250 			warn("%s", p->fts_path);
251 			eval = 1;
252 		} else if (vflag)
253 			(void)printf("%s\n", p->fts_path);
254 	}
255 	if (errno)
256 		err(1, "fts_read");
257 }
258 
259 void
260 rm_file(char **argv)
261 {
262 	struct stat sb;
263 	int rval;
264 	char *f;
265 
266 	/*
267 	 * Remove a file.  POSIX 1003.2 states that, by default, attempting
268 	 * to remove a directory is an error, so must always stat the file.
269 	 */
270 	while ((f = *argv++) != NULL) {
271 		/* Assume if can't stat the file, can't unlink it. */
272 		if (lstat(f, &sb)) {
273 			if (Wflag) {
274 				sb.st_mode = S_IFWHT|S_IWUSR|S_IRUSR;
275 			} else {
276 				if (!fflag || !NONEXISTENT(errno)) {
277 					warn("%s", f);
278 					eval = 1;
279 				}
280 				continue;
281 			}
282 		} else if (Wflag) {
283 			warnx("%s: %s", f, strerror(EEXIST));
284 			eval = 1;
285 			continue;
286 		}
287 
288 		if (S_ISDIR(sb.st_mode) && !dflag) {
289 			warnx("%s: is a directory", f);
290 			eval = 1;
291 			continue;
292 		}
293 		if (!fflag && !S_ISWHT(sb.st_mode) && !check(f, f, &sb))
294 			continue;
295 		if (S_ISWHT(sb.st_mode))
296 			rval = undelete(f);
297 		else if (S_ISDIR(sb.st_mode))
298 			rval = rmdir(f);
299 		else {
300 			if (Pflag)
301 				rm_overwrite(f, &sb);
302 			rval = unlink(f);
303 		}
304 		if (rval && (!fflag || !NONEXISTENT(errno))) {
305 			warn("%s", f);
306 			eval = 1;
307 		}
308 		if (vflag && rval == 0)
309 			(void)printf("%s\n", f);
310 	}
311 }
312 
313 /*
314  * rm_overwrite --
315  *	Overwrite the file 3 times with varying bit patterns.
316  *
317  * This is an expensive way to keep people from recovering files from your
318  * non-snapshotted FFS filesystems using fsdb(8).  Really.  No more.  Only
319  * regular files are deleted, directories (and therefore names) will remain.
320  * Also, this assumes a fixed-block file system (like FFS, or a V7 or a
321  * System V file system).  In a logging file system, you'll have to have
322  * kernel support.
323  *
324  * A note on standards:  U.S. DoD 5220.22-M "National Industrial Security
325  * Program Operating Manual" ("NISPOM") is often cited as a reference
326  * for clearing and sanitizing magnetic media.  In fact, a matrix of
327  * "clearing" and "sanitization" methods for various media was given in
328  * Chapter 8 of the original 1995 version of NISPOM.  However, that
329  * matrix was *removed from the document* when Chapter 8 was rewritten
330  * in Change 2 to the document in 2001.  Recently, the Defense Security
331  * Service has made a revised clearing and sanitization matrix available
332  * in Microsoft Word format on the DSS web site.  The standardization
333  * status of this matrix is unclear.  Furthermore, one must be very
334  * careful when referring to this matrix: it is intended for the "clearing"
335  * prior to reuse or "sanitization" prior to disposal of *entire media*,
336  * not individual files and the only non-physically-destructive method of
337  * "sanitization" that is permitted for magnetic disks of any kind is
338  * specifically noted to be prohibited for media that have contained
339  * Top Secret data.
340  *
341  * It is impossible to actually conform to the exact procedure given in
342  * the matrix if one is overwriting a file, not an entire disk, because
343  * the procedure requires examination and comparison of the disk's defect
344  * lists.  Any program that claims to securely erase *files* while
345  * conforming to the standard, then, is not correct.  We do everything
346  *
347  * Furthermore, the presence of track caches, disk and controller write
348  * caches, and so forth make it extremely difficult to ensure that data
349  * have actually been written to the disk, particularly when one tries
350  * to repeatedly overwrite the same sectors in quick succession.  We call
351  * fsync(), but controllers with nonvolatile cache, as well as IDE disks
352  * that just plain lie about the stable storage of data, will defeat this.
353  *
354  * Finally, widely respected research suggests that the given procedure
355  * is nowhere near sufficient to prevent the recovery of data using special
356  * forensic equipment and techniques that are well-known.  This is
357  * presumably one reason that the matrix requires physical media destruction,
358  * rather than any technique of the sort attempted here, for secret data.
359  *
360  * Caveat Emptor.
361  */
362 
363 void
364 rm_overwrite(char *file, struct stat *sbp)
365 {
366 	struct stat sb;
367 	int fd, randint;
368 	char randchar;
369 
370 	fd = -1;
371 	if (sbp == NULL) {
372 		if (lstat(file, &sb))
373 			goto err;
374 		sbp = &sb;
375 	}
376 	if (!S_ISREG(sbp->st_mode))
377 		return;
378 
379 	/* flags to try to defeat hidden caching by forcing seeks */
380 	if ((fd = open(file, O_RDWR|O_SYNC|O_RSYNC, 0)) == -1)
381 		goto err;
382 
383 #define RAND_BYTES	1
384 #define THIS_BYTE	0
385 
386 #define	WRITE_PASS(mode, byte) do {					\
387 	off_t len;							\
388 	int wlen, i;							\
389 	char buf[8 * 1024];						\
390 									\
391 	if (fsync(fd) || lseek(fd, (off_t)0, SEEK_SET))			\
392 		goto err;						\
393 									\
394 	if (mode == THIS_BYTE)						\
395 		memset(buf, byte, sizeof(buf));				\
396 	for (len = sbp->st_size; len > 0; len -= wlen) {		\
397 		if (mode == RAND_BYTES) {				\
398 			for (i = 0; i < sizeof(buf); 			\
399 			    i+= sizeof(u_int32_t))			\
400 				*(int *)(buf + i) = arc4random();	\
401 		}							\
402 		wlen = len < sizeof(buf) ? len : sizeof(buf);		\
403 		if (write(fd, buf, wlen) != wlen)			\
404 			goto err;					\
405 	}								\
406 	sync();		/* another poke at hidden caches */		\
407 } while (/* CONSTCOND */ 0)
408 
409 #define READ_PASS(byte) do {						\
410 	off_t len;							\
411 	int rlen;							\
412 	char pattern[8 * 1024];						\
413 	char buf[8 * 1024];						\
414 									\
415 	if (fsync(fd) || lseek(fd, (off_t)0, SEEK_SET))			\
416 		goto err;						\
417 									\
418 	memset(pattern, byte, sizeof(pattern));				\
419 	for(len = sbp->st_size; len > 0; len -= rlen) {			\
420 		rlen = len < sizeof(buf) ? len : sizeof(buf);		\
421 		if(read(fd, buf, rlen) != rlen)				\
422 			goto err;					\
423 		if(memcmp(buf, pattern, rlen))				\
424 			goto err;					\
425 	}								\
426 	sync();		/* another poke at hidden caches */		\
427 } while (/* CONSTCOND */ 0)
428 
429 	/*
430 	 * DSS sanitization matrix "clear" for magnetic disks:
431 	 * option 'c' "Overwrite all addressable locations with a single
432 	 * character."
433 	 */
434 	randint = arc4random();
435 	randchar = *(char *)&randint;
436 	WRITE_PASS(THIS_BYTE, randchar);
437 
438 	/*
439 	 * DSS sanitization matrix "sanitize" for magnetic disks:
440 	 * option 'd', sub 2 "Overwrite all addressable locations with a
441 	 * character, then its complement.  Verify "complement" character
442 	 * was written successfully to all addressable locations, then
443 	 * overwrite all addressable locations with random characters; or
444 	 * verify third overwrite of random characters."  The rest of the
445 	 * text in d-sub-2 specifies requirements for overwriting spared
446 	 * sectors; we cannot conform to it when erasing only a file, thus
447 	 * we do not conform to the standard.
448 	 */
449 
450 	/* 1. "a character" */
451 	WRITE_PASS(THIS_BYTE, 0xff);
452 
453 	/* 2. "its complement" */
454 	WRITE_PASS(THIS_BYTE, 0x00);
455 
456 	/* 3. "Verify 'complement' character" */
457 	READ_PASS(0x00);
458 
459 	/* 4. "overwrite all addressable locations with random characters" */
460 
461 	WRITE_PASS(RAND_BYTES, 0x00);
462 
463 	/*
464 	 * As the file might be huge, and we note that this revision of
465 	 * the matrix says "random characters", not "a random character"
466 	 * as the original did, we do not verify the random-character
467 	 * write; the "or" in the standard allows this.
468 	 */
469 
470 	if (!close(fd))
471 		return;
472 
473 err:	eval = 1;
474 	warn("%s", file);
475 }
476 
477 int
478 check(char *path, char *name, struct stat *sp)
479 {
480 	int ch, first;
481 	char modep[15];
482 
483 	/* Check -i first. */
484 	if (iflag)
485 		(void)fprintf(stderr, "remove '%s'? ", path);
486 	else {
487 		/*
488 		 * If it's not a symbolic link and it's unwritable and we're
489 		 * talking to a terminal, ask.  Symbolic links are excluded
490 		 * because their permissions are meaningless.  Check stdin_ok
491 		 * first because we may not have stat'ed the file.
492 		 */
493 		if (!stdin_ok || S_ISLNK(sp->st_mode) ||
494 		    !(access(name, W_OK) && (errno != ETXTBSY)))
495 			return (1);
496 		strmode(sp->st_mode, modep);
497 		(void)fprintf(stderr, "override %s%s%s/%s for '%s'? ",
498 		    modep + 1, modep[9] == ' ' ? "" : " ",
499 		    user_from_uid(sp->st_uid, 0),
500 		    group_from_gid(sp->st_gid, 0), path);
501 	}
502 	(void)fflush(stderr);
503 
504 	first = ch = getchar();
505 	while (ch != '\n' && ch != EOF)
506 		ch = getchar();
507 	return (first == 'y' || first == 'Y');
508 }
509 
510 /*
511  * POSIX.2 requires that if "." or ".." are specified as the basename
512  * portion of an operand, a diagnostic message be written to standard
513  * error and nothing more be done with such operands.
514  *
515  * Since POSIX.2 defines basename as the final portion of a path after
516  * trailing slashes have been removed, we'll remove them here.
517  */
518 #define ISDOT(a) ((a)[0] == '.' && (!(a)[1] || ((a)[1] == '.' && !(a)[2])))
519 void
520 checkdot(char **argv)
521 {
522 	char *p, **save, **t;
523 	int complained;
524 
525 	complained = 0;
526 	for (t = argv; *t;) {
527 		/* strip trailing slashes */
528 		p = strrchr(*t, '\0');
529 		while (--p > *t && *p == '/')
530 			*p = '\0';
531 
532 		/* extract basename */
533 		if ((p = strrchr(*t, '/')) != NULL)
534 			++p;
535 		else
536 			p = *t;
537 
538 		if (ISDOT(p)) {
539 			if (!complained++)
540 				warnx("\".\" and \"..\" may not be removed");
541 			eval = 1;
542 			for (save = t; (t[0] = t[1]) != NULL; ++t)
543 				continue;
544 			t = save;
545 		} else
546 			++t;
547 	}
548 }
549 
550 void
551 usage(void)
552 {
553 
554 	(void)fprintf(stderr, "usage: %s [-f|-i] [-dPRrvW] file ...\n",
555 	    getprogname());
556 	exit(1);
557 	/* NOTREACHED */
558 }
559