1 // SPDX-License-Identifier: 0BSD
2
3 ///////////////////////////////////////////////////////////////////////////////
4 //
5 /// \file file_io.c
6 /// \brief File opening, unlinking, and closing
7 //
8 // Author: Lasse Collin
9 //
10 ///////////////////////////////////////////////////////////////////////////////
11
12 #include "private.h"
13
14 #include <fcntl.h>
15
16 #ifdef TUKLIB_DOSLIKE
17 # include <io.h>
18 #else
19 # include <poll.h>
20 static bool warn_fchown;
21 #endif
22
23 #if defined(HAVE_FUTIMES) || defined(HAVE_FUTIMESAT) || defined(HAVE_UTIMES)
24 # include <sys/time.h>
25 #elif defined(HAVE__FUTIME)
26 # include <sys/utime.h>
27 #elif defined(HAVE_UTIME)
28 # include <utime.h>
29 #endif
30
31 #include "tuklib_open_stdxxx.h"
32
33 #ifdef _MSC_VER
34 # ifdef _WIN64
35 typedef __int64 ssize_t;
36 # else
37 typedef int ssize_t;
38 # endif
39
40 typedef int mode_t;
41 # define S_IRUSR _S_IREAD
42 # define S_IWUSR _S_IWRITE
43
44 # define setmode _setmode
45 # define open _open
46 # define close _close
47 # define lseek _lseeki64
48 # define unlink _unlink
49
50 // The casts are to silence warnings.
51 // The sizes are known to be small enough.
52 # define read(fd, buf, size) _read(fd, buf, (unsigned int)(size))
53 # define write(fd, buf, size) _write(fd, buf, (unsigned int)(size))
54
55 # define S_ISDIR(m) (((m) & _S_IFMT) == _S_IFDIR)
56 # define S_ISREG(m) (((m) & _S_IFMT) == _S_IFREG)
57 #endif
58
59 #ifndef O_BINARY
60 # define O_BINARY 0
61 #endif
62
63 #ifndef O_NOCTTY
64 # define O_NOCTTY 0
65 #endif
66
67 // Using this macro to silence a warning from gcc -Wlogical-op.
68 #if EAGAIN == EWOULDBLOCK
69 # define IS_EAGAIN_OR_EWOULDBLOCK(e) ((e) == EAGAIN)
70 #else
71 # define IS_EAGAIN_OR_EWOULDBLOCK(e) \
72 ((e) == EAGAIN || (e) == EWOULDBLOCK)
73 #endif
74
75
76 typedef enum {
77 IO_WAIT_MORE, // Reading or writing is possible.
78 IO_WAIT_ERROR, // Error or user_abort
79 IO_WAIT_TIMEOUT, // poll() timed out
80 } io_wait_ret;
81
82
83 /// If true, try to create sparse files when decompressing.
84 static bool try_sparse = true;
85
86 #ifndef TUKLIB_DOSLIKE
87 /// File status flags of standard input. This is used by io_open_src()
88 /// and io_close_src().
89 static int stdin_flags;
90 static bool restore_stdin_flags = false;
91
92 /// Original file status flags of standard output. This is used by
93 /// io_open_dest() and io_close_dest() to save and restore the flags.
94 static int stdout_flags;
95 static bool restore_stdout_flags = false;
96
97 /// Self-pipe used together with the user_abort variable to avoid
98 /// race conditions with signal handling.
99 static int user_abort_pipe[2];
100 #endif
101
102
103 static bool io_write_buf(file_pair *pair, const uint8_t *buf, size_t size);
104
105
106 extern void
io_init(void)107 io_init(void)
108 {
109 // Make sure that stdin, stdout, and stderr are connected to
110 // a valid file descriptor. Exit immediately with exit code ERROR
111 // if we cannot make the file descriptors valid. Maybe we should
112 // print an error message, but our stderr could be screwed anyway.
113 tuklib_open_stdxxx(E_ERROR);
114
115 #ifndef TUKLIB_DOSLIKE
116 // If fchown() fails setting the owner, we warn about it only if
117 // we are root.
118 warn_fchown = geteuid() == 0;
119
120 // Create a pipe for the self-pipe trick.
121 if (pipe(user_abort_pipe))
122 message_fatal(_("Error creating a pipe: %s"),
123 strerror(errno));
124
125 // Make both ends of the pipe non-blocking.
126 for (unsigned i = 0; i < 2; ++i) {
127 int flags = fcntl(user_abort_pipe[i], F_GETFL);
128 if (flags == -1 || fcntl(user_abort_pipe[i], F_SETFL,
129 flags | O_NONBLOCK) == -1)
130 message_fatal(_("Error creating a pipe: %s"),
131 strerror(errno));
132 }
133 #endif
134
135 #ifdef __DJGPP__
136 // Avoid doing useless things when statting files.
137 // This isn't important but doesn't hurt.
138 _djstat_flags = _STAT_EXEC_EXT | _STAT_EXEC_MAGIC | _STAT_DIRSIZE;
139 #endif
140
141 return;
142 }
143
144
145 #ifndef TUKLIB_DOSLIKE
146 extern void
io_write_to_user_abort_pipe(void)147 io_write_to_user_abort_pipe(void)
148 {
149 // If the write() fails, it's probably due to the pipe being full.
150 // Failing in that case is fine. If the reason is something else,
151 // there's not much we can do since this is called in a signal
152 // handler. So ignore the errors and try to avoid warnings with
153 // GCC and glibc when _FORTIFY_SOURCE=2 is used.
154 uint8_t b = '\0';
155 const ssize_t ret = write(user_abort_pipe[1], &b, 1);
156 (void)ret;
157 return;
158 }
159 #endif
160
161
162 extern void
io_no_sparse(void)163 io_no_sparse(void)
164 {
165 try_sparse = false;
166 return;
167 }
168
169
170 #ifndef TUKLIB_DOSLIKE
171 /// \brief Waits for input or output to become available or for a signal
172 ///
173 /// This uses the self-pipe trick to avoid a race condition that can occur
174 /// if a signal is caught after user_abort has been checked but before e.g.
175 /// read() has been called. In that situation read() could block unless
176 /// non-blocking I/O is used. With non-blocking I/O something like select()
177 /// or poll() is needed to avoid a busy-wait loop, and the same race condition
178 /// pops up again. There are pselect() (POSIX-1.2001) and ppoll() (not in
179 /// POSIX) but neither is portable enough in 2013. The self-pipe trick is
180 /// old and very portable.
181 static io_wait_ret
io_wait(file_pair * pair,int timeout,bool is_reading)182 io_wait(file_pair *pair, int timeout, bool is_reading)
183 {
184 struct pollfd pfd[2];
185
186 if (is_reading) {
187 pfd[0].fd = pair->src_fd;
188 pfd[0].events = POLLIN;
189 } else {
190 pfd[0].fd = pair->dest_fd;
191 pfd[0].events = POLLOUT;
192 }
193
194 pfd[1].fd = user_abort_pipe[0];
195 pfd[1].events = POLLIN;
196
197 while (true) {
198 const int ret = poll(pfd, 2, timeout);
199
200 if (user_abort)
201 return IO_WAIT_ERROR;
202
203 if (ret == -1) {
204 if (errno == EINTR || errno == EAGAIN)
205 continue;
206
207 message_error(_("%s: poll() failed: %s"),
208 is_reading ? pair->src_name
209 : pair->dest_name,
210 strerror(errno));
211 return IO_WAIT_ERROR;
212 }
213
214 if (ret == 0)
215 return IO_WAIT_TIMEOUT;
216
217 if (pfd[0].revents != 0)
218 return IO_WAIT_MORE;
219 }
220 }
221 #endif
222
223
224 /// \brief Unlink a file
225 ///
226 /// This tries to verify that the file being unlinked really is the file that
227 /// we want to unlink by verifying device and inode numbers. There's still
228 /// a small unavoidable race, but this is much better than nothing (the file
229 /// could have been moved/replaced even hours earlier).
230 static void
io_unlink(const char * name,const struct stat * known_st)231 io_unlink(const char *name, const struct stat *known_st)
232 {
233 #if defined(TUKLIB_DOSLIKE)
234 // On DOS-like systems, st_ino is meaningless, so don't bother
235 // testing it. Just silence a compiler warning.
236 (void)known_st;
237 #else
238 struct stat new_st;
239
240 // If --force was used, use stat() instead of lstat(). This way
241 // (de)compressing symlinks works correctly. However, it also means
242 // that xz cannot detect if a regular file foo is renamed to bar
243 // and then a symlink foo -> bar is created. Because of stat()
244 // instead of lstat(), xz will think that foo hasn't been replaced
245 // with another file. Thus, xz will remove foo even though it no
246 // longer is the same file that xz used when it started compressing.
247 // Probably it's not too bad though, so this doesn't need a more
248 // complex fix.
249 const int stat_ret = opt_force
250 ? stat(name, &new_st) : lstat(name, &new_st);
251
252 if (stat_ret
253 # ifdef __VMS
254 // st_ino is an array, and we don't want to
255 // compare st_dev at all.
256 || memcmp(&new_st.st_ino, &known_st->st_ino,
257 sizeof(new_st.st_ino)) != 0
258 # else
259 // Typical POSIX-like system
260 || new_st.st_dev != known_st->st_dev
261 || new_st.st_ino != known_st->st_ino
262 # endif
263 )
264 // TRANSLATORS: When compression or decompression finishes,
265 // and xz is going to remove the source file, xz first checks
266 // if the source file still exists, and if it does, does its
267 // device and inode numbers match what xz saw when it opened
268 // the source file. If these checks fail, this message is
269 // shown, %s being the filename, and the file is not deleted.
270 // The check for device and inode numbers is there, because
271 // it is possible that the user has put a new file in place
272 // of the original file, and in that case it obviously
273 // shouldn't be removed.
274 message_warning(_("%s: File seems to have been moved, "
275 "not removing"), name);
276 else
277 #endif
278 // There's a race condition between lstat() and unlink()
279 // but at least we have tried to avoid removing wrong file.
280 if (unlink(name))
281 message_warning(_("%s: Cannot remove: %s"),
282 name, strerror(errno));
283
284 return;
285 }
286
287
288 /// \brief Copies owner/group and permissions
289 ///
290 /// \todo ACL and EA support
291 ///
292 static void
io_copy_attrs(const file_pair * pair)293 io_copy_attrs(const file_pair *pair)
294 {
295 // Skip chown and chmod on Windows.
296 #ifndef TUKLIB_DOSLIKE
297 // This function is more tricky than you may think at first.
298 // Blindly copying permissions may permit users to access the
299 // destination file who didn't have permission to access the
300 // source file.
301
302 // Try changing the owner of the file. If we aren't root or the owner
303 // isn't already us, fchown() probably doesn't succeed. We warn
304 // about failing fchown() only if we are root.
305 if (fchown(pair->dest_fd, pair->src_st.st_uid, (gid_t)(-1))
306 && warn_fchown)
307 message_warning(_("%s: Cannot set the file owner: %s"),
308 pair->dest_name, strerror(errno));
309
310 mode_t mode;
311
312 // With BSD semantics the new dest file may have a group that
313 // does not belong to the user. If the src file has the same gid
314 // nothing has to be done. Nevertheless OpenBSD fchown(2) fails
315 // in this case which seems to be POSIX compliant. As there is
316 // nothing to do, skip the system call.
317 if (pair->dest_st.st_gid != pair->src_st.st_gid
318 && fchown(pair->dest_fd, (uid_t)(-1),
319 pair->src_st.st_gid)) {
320 message_warning(_("%s: Cannot set the file group: %s"),
321 pair->dest_name, strerror(errno));
322 // We can still safely copy some additional permissions:
323 // 'group' must be at least as strict as 'other' and
324 // also vice versa.
325 //
326 // NOTE: After this, the owner of the source file may
327 // get additional permissions. This shouldn't be too bad,
328 // because the owner would have had permission to chmod
329 // the original file anyway.
330 mode = ((pair->src_st.st_mode & 0070) >> 3)
331 & (pair->src_st.st_mode & 0007);
332 mode = (pair->src_st.st_mode & 0700) | (mode << 3) | mode;
333 } else {
334 // Drop the setuid, setgid, and sticky bits.
335 mode = pair->src_st.st_mode & 0777;
336 }
337
338 if (fchmod(pair->dest_fd, mode))
339 message_warning(_("%s: Cannot set the file permissions: %s"),
340 pair->dest_name, strerror(errno));
341 #endif
342
343 // Copy the timestamps. We have several possible ways to do this, of
344 // which some are better in both security and precision.
345 //
346 // First, get the nanosecond part of the timestamps. As of writing,
347 // it's not standardized by POSIX, and there are several names for
348 // the same thing in struct stat.
349 long atime_nsec;
350 long mtime_nsec;
351
352 # if defined(HAVE_STRUCT_STAT_ST_ATIM_TV_NSEC)
353 // GNU and Solaris
354 atime_nsec = pair->src_st.st_atim.tv_nsec;
355 mtime_nsec = pair->src_st.st_mtim.tv_nsec;
356
357 # elif defined(HAVE_STRUCT_STAT_ST_ATIMESPEC_TV_NSEC)
358 // BSD
359 atime_nsec = pair->src_st.st_atimespec.tv_nsec;
360 mtime_nsec = pair->src_st.st_mtimespec.tv_nsec;
361
362 # elif defined(HAVE_STRUCT_STAT_ST_ATIMENSEC)
363 // GNU and BSD without extensions
364 atime_nsec = pair->src_st.st_atimensec;
365 mtime_nsec = pair->src_st.st_mtimensec;
366
367 # elif defined(HAVE_STRUCT_STAT_ST_UATIME)
368 // Tru64
369 atime_nsec = pair->src_st.st_uatime * 1000;
370 mtime_nsec = pair->src_st.st_umtime * 1000;
371
372 # elif defined(HAVE_STRUCT_STAT_ST_ATIM_ST__TIM_TV_NSEC)
373 // UnixWare
374 atime_nsec = pair->src_st.st_atim.st__tim.tv_nsec;
375 mtime_nsec = pair->src_st.st_mtim.st__tim.tv_nsec;
376
377 # else
378 // Safe fallback
379 atime_nsec = 0;
380 mtime_nsec = 0;
381 # endif
382
383 // Construct a structure to hold the timestamps and call appropriate
384 // function to set the timestamps.
385 #if defined(HAVE_FUTIMENS)
386 // Use nanosecond precision.
387 struct timespec tv[2];
388 tv[0].tv_sec = pair->src_st.st_atime;
389 tv[0].tv_nsec = atime_nsec;
390 tv[1].tv_sec = pair->src_st.st_mtime;
391 tv[1].tv_nsec = mtime_nsec;
392
393 (void)futimens(pair->dest_fd, tv);
394
395 #elif defined(HAVE_FUTIMES) || defined(HAVE_FUTIMESAT) || defined(HAVE_UTIMES)
396 // Use microsecond precision.
397 struct timeval tv[2];
398 tv[0].tv_sec = pair->src_st.st_atime;
399 tv[0].tv_usec = atime_nsec / 1000;
400 tv[1].tv_sec = pair->src_st.st_mtime;
401 tv[1].tv_usec = mtime_nsec / 1000;
402
403 # if defined(HAVE_FUTIMES)
404 (void)futimes(pair->dest_fd, tv);
405 # elif defined(HAVE_FUTIMESAT)
406 (void)futimesat(pair->dest_fd, NULL, tv);
407 # else
408 // Argh, no function to use a file descriptor to set the timestamp.
409 (void)utimes(pair->dest_name, tv);
410 # endif
411
412 #elif defined(HAVE__FUTIME)
413 // Use one-second precision with Windows-specific _futime().
414 // We could use utime() too except that for some reason the
415 // timestamp will get reset at close(). With _futime() it works.
416 // This struct cannot be const as _futime() takes a non-const pointer.
417 struct _utimbuf buf = {
418 .actime = pair->src_st.st_atime,
419 .modtime = pair->src_st.st_mtime,
420 };
421
422 // Avoid warnings.
423 (void)atime_nsec;
424 (void)mtime_nsec;
425
426 (void)_futime(pair->dest_fd, &buf);
427
428 #elif defined(HAVE_UTIME)
429 // Use one-second precision. utime() doesn't support using file
430 // descriptor either. Some systems have broken utime() prototype
431 // so don't make this const.
432 struct utimbuf buf = {
433 .actime = pair->src_st.st_atime,
434 .modtime = pair->src_st.st_mtime,
435 };
436
437 // Avoid warnings.
438 (void)atime_nsec;
439 (void)mtime_nsec;
440
441 (void)utime(pair->dest_name, &buf);
442 #endif
443
444 return;
445 }
446
447
448 /// Opens the source file. Returns false on success, true on error.
449 static bool
io_open_src_real(file_pair * pair)450 io_open_src_real(file_pair *pair)
451 {
452 // There's nothing to open when reading from stdin.
453 if (pair->src_name == stdin_filename) {
454 pair->src_fd = STDIN_FILENO;
455 #ifdef TUKLIB_DOSLIKE
456 setmode(STDIN_FILENO, O_BINARY);
457 #else
458 // Try to set stdin to non-blocking mode. It won't work
459 // e.g. on OpenBSD if stdout is e.g. /dev/null. In such
460 // case we proceed as if stdin were non-blocking anyway
461 // (in case of /dev/null it will be in practice). The
462 // same applies to stdout in io_open_dest_real().
463 stdin_flags = fcntl(STDIN_FILENO, F_GETFL);
464 if (stdin_flags == -1) {
465 message_error(_("Error getting the file status flags "
466 "from standard input: %s"),
467 strerror(errno));
468 return true;
469 }
470
471 if ((stdin_flags & O_NONBLOCK) == 0
472 && fcntl(STDIN_FILENO, F_SETFL,
473 stdin_flags | O_NONBLOCK) != -1)
474 restore_stdin_flags = true;
475 #endif
476 #ifdef HAVE_POSIX_FADVISE
477 // It will fail if stdin is a pipe and that's fine.
478 (void)posix_fadvise(STDIN_FILENO, 0, 0,
479 opt_mode == MODE_LIST
480 ? POSIX_FADV_RANDOM
481 : POSIX_FADV_SEQUENTIAL);
482 #endif
483 return false;
484 }
485
486 // Symlinks are not followed unless writing to stdout or --force
487 // or --keep was used.
488 const bool follow_symlinks
489 = opt_stdout || opt_force || opt_keep_original;
490
491 // We accept only regular files if we are writing the output
492 // to disk too. bzip2 allows overriding this with --force but
493 // gzip and xz don't.
494 const bool reg_files_only = !opt_stdout;
495
496 // Flags for open()
497 int flags = O_RDONLY | O_BINARY | O_NOCTTY;
498
499 #ifndef TUKLIB_DOSLIKE
500 // Use non-blocking I/O:
501 // - It prevents blocking when opening FIFOs and some other
502 // special files, which is good if we want to accept only
503 // regular files.
504 // - It can help avoiding some race conditions with signal handling.
505 flags |= O_NONBLOCK;
506 #endif
507
508 #if defined(O_NOFOLLOW)
509 if (!follow_symlinks)
510 flags |= O_NOFOLLOW;
511 #elif !defined(TUKLIB_DOSLIKE)
512 // Some POSIX-like systems lack O_NOFOLLOW (it's not required
513 // by POSIX). Check for symlinks with a separate lstat() on
514 // these systems.
515 if (!follow_symlinks) {
516 struct stat st;
517 if (lstat(pair->src_name, &st)) {
518 message_error(_("%s: %s"), pair->src_name,
519 strerror(errno));
520 return true;
521
522 } else if (S_ISLNK(st.st_mode)) {
523 message_warning(_("%s: Is a symbolic link, "
524 "skipping"), pair->src_name);
525 return true;
526 }
527 }
528 #else
529 // Avoid warnings.
530 (void)follow_symlinks;
531 #endif
532
533 // Try to open the file. Signals have been blocked so EINTR shouldn't
534 // be possible.
535 pair->src_fd = open(pair->src_name, flags);
536
537 if (pair->src_fd == -1) {
538 // Signals (that have a signal handler) have been blocked.
539 assert(errno != EINTR);
540
541 #ifdef O_NOFOLLOW
542 // Give an understandable error message if the reason
543 // for failing was that the file was a symbolic link.
544 //
545 // Note that at least Linux, OpenBSD, Solaris, and Darwin
546 // use ELOOP to indicate that O_NOFOLLOW was the reason
547 // that open() failed. Because there may be
548 // directories in the pathname, ELOOP may occur also
549 // because of a symlink loop in the directory part.
550 // So ELOOP doesn't tell us what actually went wrong,
551 // and this stupidity went into POSIX-1.2008 too.
552 //
553 // FreeBSD associates EMLINK with O_NOFOLLOW and
554 // Tru64 uses ENOTSUP. We use these directly here
555 // and skip the lstat() call and the associated race.
556 // I want to hear if there are other kernels that
557 // fail with something else than ELOOP with O_NOFOLLOW.
558 bool was_symlink = false;
559
560 # if defined(__FreeBSD__) || defined(__DragonFly__)
561 if (errno == EMLINK)
562 was_symlink = true;
563
564 # elif defined(__digital__) && defined(__unix__)
565 if (errno == ENOTSUP)
566 was_symlink = true;
567
568 # elif defined(__NetBSD__)
569 if (errno == EFTYPE)
570 was_symlink = true;
571
572 # else
573 if (errno == ELOOP && !follow_symlinks) {
574 const int saved_errno = errno;
575 struct stat st;
576 if (lstat(pair->src_name, &st) == 0
577 && S_ISLNK(st.st_mode))
578 was_symlink = true;
579
580 errno = saved_errno;
581 }
582 # endif
583
584 if (was_symlink)
585 message_warning(_("%s: Is a symbolic link, "
586 "skipping"), pair->src_name);
587 else
588 #endif
589 // Something else than O_NOFOLLOW failing
590 // (assuming that the race conditions didn't
591 // confuse us).
592 message_error(_("%s: %s"), pair->src_name,
593 strerror(errno));
594
595 return true;
596 }
597
598 // Stat the source file. We need the result also when we copy
599 // the permissions, and when unlinking.
600 //
601 // NOTE: Use stat() instead of fstat() with DJGPP, because
602 // then we have a better chance to get st_ino value that can
603 // be used in io_open_dest_real() to prevent overwriting the
604 // source file.
605 #ifdef __DJGPP__
606 if (stat(pair->src_name, &pair->src_st))
607 goto error_msg;
608 #else
609 if (fstat(pair->src_fd, &pair->src_st))
610 goto error_msg;
611 #endif
612
613 if (S_ISDIR(pair->src_st.st_mode)) {
614 message_warning(_("%s: Is a directory, skipping"),
615 pair->src_name);
616 goto error;
617 }
618
619 if (reg_files_only && !S_ISREG(pair->src_st.st_mode)) {
620 message_warning(_("%s: Not a regular file, skipping"),
621 pair->src_name);
622 goto error;
623 }
624
625 #ifndef TUKLIB_DOSLIKE
626 if (reg_files_only && !opt_force && !opt_keep_original) {
627 if (pair->src_st.st_mode & (S_ISUID | S_ISGID)) {
628 // gzip rejects setuid and setgid files even
629 // when --force was used. bzip2 doesn't check
630 // for them, but calls fchown() after fchmod(),
631 // and many systems automatically drop setuid
632 // and setgid bits there.
633 //
634 // We accept setuid and setgid files if
635 // --force or --keep was used. We drop these bits
636 // explicitly in io_copy_attr().
637 message_warning(_("%s: File has setuid or "
638 "setgid bit set, skipping"),
639 pair->src_name);
640 goto error;
641 }
642
643 if (pair->src_st.st_mode & S_ISVTX) {
644 message_warning(_("%s: File has sticky bit "
645 "set, skipping"),
646 pair->src_name);
647 goto error;
648 }
649
650 if (pair->src_st.st_nlink > 1) {
651 message_warning(_("%s: Input file has more "
652 "than one hard link, "
653 "skipping"), pair->src_name);
654 goto error;
655 }
656 }
657
658 // If it is something else than a regular file, wait until
659 // there is input available. This way reading from FIFOs
660 // will work when open() is used with O_NONBLOCK.
661 if (!S_ISREG(pair->src_st.st_mode)) {
662 signals_unblock();
663 const io_wait_ret ret = io_wait(pair, -1, true);
664 signals_block();
665
666 if (ret != IO_WAIT_MORE)
667 goto error;
668 }
669 #endif
670
671 #ifdef HAVE_POSIX_FADVISE
672 // It will fail with some special files like FIFOs but that is fine.
673 (void)posix_fadvise(pair->src_fd, 0, 0,
674 opt_mode == MODE_LIST
675 ? POSIX_FADV_RANDOM
676 : POSIX_FADV_SEQUENTIAL);
677 #endif
678
679 return false;
680
681 error_msg:
682 message_error(_("%s: %s"), pair->src_name, strerror(errno));
683 error:
684 (void)close(pair->src_fd);
685 return true;
686 }
687
688
689 extern file_pair *
io_open_src(const char * src_name)690 io_open_src(const char *src_name)
691 {
692 if (src_name[0] == '\0') {
693 message_error(_("Empty filename, skipping"));
694 return NULL;
695 }
696
697 // Since we have only one file open at a time, we can use
698 // a statically allocated structure.
699 static file_pair pair;
700
701 // This implicitly also initializes src_st.st_size to zero
702 // which is expected to be <= 0 by default. fstat() isn't
703 // called when reading from standard input but src_st.st_size
704 // is still read.
705 pair = (file_pair){
706 .src_name = src_name,
707 .dest_name = NULL,
708 .src_fd = -1,
709 .dest_fd = -1,
710 .src_eof = false,
711 .src_has_seen_input = false,
712 .flush_needed = false,
713 .dest_try_sparse = false,
714 .dest_pending_sparse = 0,
715 };
716
717 // Block the signals, for which we have a custom signal handler, so
718 // that we don't need to worry about EINTR.
719 signals_block();
720 const bool error = io_open_src_real(&pair);
721 signals_unblock();
722
723 #ifdef ENABLE_SANDBOX
724 if (!error)
725 sandbox_enable_strict_if_allowed(pair.src_fd,
726 user_abort_pipe[0], user_abort_pipe[1]);
727 #endif
728
729 return error ? NULL : &pair;
730 }
731
732
733 /// \brief Closes source file of the file_pair structure
734 ///
735 /// \param pair File whose src_fd should be closed
736 /// \param success If true, the file will be removed from the disk if
737 /// closing succeeds and --keep hasn't been used.
738 static void
io_close_src(file_pair * pair,bool success)739 io_close_src(file_pair *pair, bool success)
740 {
741 #ifndef TUKLIB_DOSLIKE
742 if (restore_stdin_flags) {
743 assert(pair->src_fd == STDIN_FILENO);
744
745 restore_stdin_flags = false;
746
747 if (fcntl(STDIN_FILENO, F_SETFL, stdin_flags) == -1)
748 message_error(_("Error restoring the status flags "
749 "to standard input: %s"),
750 strerror(errno));
751 }
752 #endif
753
754 if (pair->src_fd != STDIN_FILENO && pair->src_fd != -1) {
755 // Close the file before possibly unlinking it. On DOS-like
756 // systems this is always required since unlinking will fail
757 // if the file is open. On POSIX systems it usually works
758 // to unlink open files, but in some cases it doesn't and
759 // one gets EBUSY in errno.
760 //
761 // xz 5.2.2 and older unlinked the file before closing it
762 // (except on DOS-like systems). The old code didn't handle
763 // EBUSY and could fail e.g. on some CIFS shares. The
764 // advantage of unlinking before closing is negligible
765 // (avoids a race between close() and stat()/lstat() and
766 // unlink()), so let's keep this simple.
767 (void)close(pair->src_fd);
768
769 if (success && !opt_keep_original)
770 io_unlink(pair->src_name, &pair->src_st);
771 }
772
773 return;
774 }
775
776
777 static bool
io_open_dest_real(file_pair * pair)778 io_open_dest_real(file_pair *pair)
779 {
780 if (opt_stdout || pair->src_fd == STDIN_FILENO) {
781 // We don't modify or free() this.
782 pair->dest_name = (char *)"(stdout)";
783 pair->dest_fd = STDOUT_FILENO;
784 #ifdef TUKLIB_DOSLIKE
785 setmode(STDOUT_FILENO, O_BINARY);
786 #else
787 // Try to set O_NONBLOCK if it isn't already set.
788 // If it fails, we assume that stdout is non-blocking
789 // in practice. See the comments in io_open_src_real()
790 // for similar situation with stdin.
791 //
792 // NOTE: O_APPEND may be unset later in this function
793 // and it relies on stdout_flags being set here.
794 stdout_flags = fcntl(STDOUT_FILENO, F_GETFL);
795 if (stdout_flags == -1) {
796 message_error(_("Error getting the file status flags "
797 "from standard output: %s"),
798 strerror(errno));
799 return true;
800 }
801
802 if ((stdout_flags & O_NONBLOCK) == 0
803 && fcntl(STDOUT_FILENO, F_SETFL,
804 stdout_flags | O_NONBLOCK) != -1)
805 restore_stdout_flags = true;
806 #endif
807 } else {
808 pair->dest_name = suffix_get_dest_name(pair->src_name);
809 if (pair->dest_name == NULL)
810 return true;
811
812 #ifdef __DJGPP__
813 struct stat st;
814 if (stat(pair->dest_name, &st) == 0) {
815 // Check that it isn't a special file like "prn".
816 if (st.st_dev == -1) {
817 message_error("%s: Refusing to write to "
818 "a DOS special file",
819 pair->dest_name);
820 free(pair->dest_name);
821 return true;
822 }
823
824 // Check that we aren't overwriting the source file.
825 if (st.st_dev == pair->src_st.st_dev
826 && st.st_ino == pair->src_st.st_ino) {
827 message_error("%s: Output file is the same "
828 "as the input file",
829 pair->dest_name);
830 free(pair->dest_name);
831 return true;
832 }
833 }
834 #endif
835
836 // If --force was used, unlink the target file first.
837 if (opt_force && unlink(pair->dest_name) && errno != ENOENT) {
838 message_error(_("%s: Cannot remove: %s"),
839 pair->dest_name, strerror(errno));
840 free(pair->dest_name);
841 return true;
842 }
843
844 // Open the file.
845 int flags = O_WRONLY | O_BINARY | O_NOCTTY
846 | O_CREAT | O_EXCL;
847 #ifndef TUKLIB_DOSLIKE
848 flags |= O_NONBLOCK;
849 #endif
850 const mode_t mode = S_IRUSR | S_IWUSR;
851 pair->dest_fd = open(pair->dest_name, flags, mode);
852
853 if (pair->dest_fd == -1) {
854 message_error(_("%s: %s"), pair->dest_name,
855 strerror(errno));
856 free(pair->dest_name);
857 return true;
858 }
859 }
860
861 if (fstat(pair->dest_fd, &pair->dest_st)) {
862 // If fstat() really fails, we have a safe fallback here.
863 #if defined(__VMS)
864 pair->dest_st.st_ino[0] = 0;
865 pair->dest_st.st_ino[1] = 0;
866 pair->dest_st.st_ino[2] = 0;
867 #else
868 pair->dest_st.st_dev = 0;
869 pair->dest_st.st_ino = 0;
870 #endif
871 }
872 #if defined(TUKLIB_DOSLIKE) && !defined(__DJGPP__)
873 // Check that the output file is a regular file. We open with O_EXCL
874 // but that doesn't prevent open()/_open() on Windows from opening
875 // files like "con" or "nul".
876 //
877 // With DJGPP this check is done with stat() even before opening
878 // the output file. That method or a variant of it doesn't work on
879 // Windows because on Windows stat()/_stat64() sets st.st_mode so
880 // that S_ISREG(st.st_mode) will be true even for special files.
881 // With fstat()/_fstat64() it works.
882 else if (pair->dest_fd != STDOUT_FILENO
883 && !S_ISREG(pair->dest_st.st_mode)) {
884 message_error("%s: Destination is not a regular file",
885 pair->dest_name);
886
887 // dest_fd needs to be reset to -1 to keep io_close() working.
888 (void)close(pair->dest_fd);
889 pair->dest_fd = -1;
890
891 free(pair->dest_name);
892 return true;
893 }
894 #elif !defined(TUKLIB_DOSLIKE)
895 else if (try_sparse && opt_mode == MODE_DECOMPRESS) {
896 // When writing to standard output, we need to be extra
897 // careful:
898 // - It may be connected to something else than
899 // a regular file.
900 // - We aren't necessarily writing to a new empty file
901 // or to the end of an existing file.
902 // - O_APPEND may be active.
903 //
904 // TODO: I'm keeping this disabled for DOS-like systems
905 // for now. FAT doesn't support sparse files, but NTFS
906 // does, so maybe this should be enabled on Windows after
907 // some testing.
908 if (pair->dest_fd == STDOUT_FILENO) {
909 if (!S_ISREG(pair->dest_st.st_mode))
910 return false;
911
912 if (stdout_flags & O_APPEND) {
913 // Creating a sparse file is not possible
914 // when O_APPEND is active (it's used by
915 // shell's >> redirection). As I understand
916 // it, it is safe to temporarily disable
917 // O_APPEND in xz, because if someone
918 // happened to write to the same file at the
919 // same time, results would be bad anyway
920 // (users shouldn't assume that xz uses any
921 // specific block size when writing data).
922 //
923 // The write position may be something else
924 // than the end of the file, so we must fix
925 // it to start writing at the end of the file
926 // to imitate O_APPEND.
927 if (lseek(STDOUT_FILENO, 0, SEEK_END) == -1)
928 return false;
929
930 // Construct the new file status flags.
931 // If O_NONBLOCK was set earlier in this
932 // function, it must be kept here too.
933 int flags = stdout_flags & ~O_APPEND;
934 if (restore_stdout_flags)
935 flags |= O_NONBLOCK;
936
937 // If this fcntl() fails, we continue but won't
938 // try to create sparse output. The original
939 // flags will still be restored if needed (to
940 // unset O_NONBLOCK) when the file is finished.
941 if (fcntl(STDOUT_FILENO, F_SETFL, flags) == -1)
942 return false;
943
944 // Disabling O_APPEND succeeded. Mark
945 // that the flags should be restored
946 // in io_close_dest(). (This may have already
947 // been set when enabling O_NONBLOCK.)
948 restore_stdout_flags = true;
949
950 } else if (lseek(STDOUT_FILENO, 0, SEEK_CUR)
951 != pair->dest_st.st_size) {
952 // Writing won't start exactly at the end
953 // of the file. We cannot use sparse output,
954 // because it would probably corrupt the file.
955 return false;
956 }
957 }
958
959 pair->dest_try_sparse = true;
960 }
961 #endif
962
963 return false;
964 }
965
966
967 extern bool
io_open_dest(file_pair * pair)968 io_open_dest(file_pair *pair)
969 {
970 signals_block();
971 const bool ret = io_open_dest_real(pair);
972 signals_unblock();
973 return ret;
974 }
975
976
977 /// \brief Closes destination file of the file_pair structure
978 ///
979 /// \param pair File whose dest_fd should be closed
980 /// \param success If false, the file will be removed from the disk.
981 ///
982 /// \return Zero if closing succeeds. On error, -1 is returned and
983 /// error message printed.
984 static bool
io_close_dest(file_pair * pair,bool success)985 io_close_dest(file_pair *pair, bool success)
986 {
987 #ifndef TUKLIB_DOSLIKE
988 // If io_open_dest() has disabled O_APPEND, restore it here.
989 if (restore_stdout_flags) {
990 assert(pair->dest_fd == STDOUT_FILENO);
991
992 restore_stdout_flags = false;
993
994 if (fcntl(STDOUT_FILENO, F_SETFL, stdout_flags) == -1) {
995 message_error(_("Error restoring the O_APPEND flag "
996 "to standard output: %s"),
997 strerror(errno));
998 return true;
999 }
1000 }
1001 #endif
1002
1003 if (pair->dest_fd == -1 || pair->dest_fd == STDOUT_FILENO)
1004 return false;
1005
1006 if (close(pair->dest_fd)) {
1007 message_error(_("%s: Closing the file failed: %s"),
1008 pair->dest_name, strerror(errno));
1009
1010 // Closing destination file failed, so we cannot trust its
1011 // contents. Get rid of junk:
1012 io_unlink(pair->dest_name, &pair->dest_st);
1013 free(pair->dest_name);
1014 return true;
1015 }
1016
1017 // If the operation using this file wasn't successful, we git rid
1018 // of the junk file.
1019 if (!success)
1020 io_unlink(pair->dest_name, &pair->dest_st);
1021
1022 free(pair->dest_name);
1023
1024 return false;
1025 }
1026
1027
1028 extern void
io_close(file_pair * pair,bool success)1029 io_close(file_pair *pair, bool success)
1030 {
1031 // Take care of sparseness at the end of the output file.
1032 if (success && pair->dest_try_sparse
1033 && pair->dest_pending_sparse > 0) {
1034 // Seek forward one byte less than the size of the pending
1035 // hole, then write one zero-byte. This way the file grows
1036 // to its correct size. An alternative would be to use
1037 // ftruncate() but that isn't portable enough (e.g. it
1038 // doesn't work with FAT on Linux; FAT isn't that important
1039 // since it doesn't support sparse files anyway, but we don't
1040 // want to create corrupt files on it).
1041 if (lseek(pair->dest_fd, pair->dest_pending_sparse - 1,
1042 SEEK_CUR) == -1) {
1043 message_error(_("%s: Seeking failed when trying "
1044 "to create a sparse file: %s"),
1045 pair->dest_name, strerror(errno));
1046 success = false;
1047 } else {
1048 const uint8_t zero[1] = { '\0' };
1049 if (io_write_buf(pair, zero, 1))
1050 success = false;
1051 }
1052 }
1053
1054 signals_block();
1055
1056 // Copy the file attributes. We need to skip this if destination
1057 // file isn't open or it is standard output.
1058 if (success && pair->dest_fd != -1 && pair->dest_fd != STDOUT_FILENO)
1059 io_copy_attrs(pair);
1060
1061 // Close the destination first. If it fails, we must not remove
1062 // the source file!
1063 if (io_close_dest(pair, success))
1064 success = false;
1065
1066 // Close the source file, and unlink it if the operation using this
1067 // file pair was successful and we haven't requested to keep the
1068 // source file.
1069 io_close_src(pair, success);
1070
1071 signals_unblock();
1072
1073 return;
1074 }
1075
1076
1077 extern void
io_fix_src_pos(file_pair * pair,size_t rewind_size)1078 io_fix_src_pos(file_pair *pair, size_t rewind_size)
1079 {
1080 assert(rewind_size <= IO_BUFFER_SIZE);
1081
1082 if (rewind_size > 0) {
1083 // This doesn't need to work on unseekable file descriptors,
1084 // so just ignore possible errors.
1085 (void)lseek(pair->src_fd, -(off_t)(rewind_size), SEEK_CUR);
1086 }
1087
1088 return;
1089 }
1090
1091
1092 extern size_t
io_read(file_pair * pair,io_buf * buf,size_t size)1093 io_read(file_pair *pair, io_buf *buf, size_t size)
1094 {
1095 assert(size <= IO_BUFFER_SIZE);
1096
1097 size_t pos = 0;
1098
1099 while (pos < size) {
1100 const ssize_t amount = read(
1101 pair->src_fd, buf->u8 + pos, size - pos);
1102
1103 if (amount == 0) {
1104 pair->src_eof = true;
1105 break;
1106 }
1107
1108 if (amount == -1) {
1109 if (errno == EINTR) {
1110 if (user_abort)
1111 return SIZE_MAX;
1112
1113 continue;
1114 }
1115
1116 #ifndef TUKLIB_DOSLIKE
1117 if (IS_EAGAIN_OR_EWOULDBLOCK(errno)) {
1118 // Disable the flush-timeout if no input has
1119 // been seen since the previous flush and thus
1120 // there would be nothing to flush after the
1121 // timeout expires (avoids busy waiting).
1122 const int timeout = pair->src_has_seen_input
1123 ? mytime_get_flush_timeout()
1124 : -1;
1125
1126 switch (io_wait(pair, timeout, true)) {
1127 case IO_WAIT_MORE:
1128 continue;
1129
1130 case IO_WAIT_ERROR:
1131 return SIZE_MAX;
1132
1133 case IO_WAIT_TIMEOUT:
1134 pair->flush_needed = true;
1135 return pos;
1136
1137 default:
1138 message_bug();
1139 }
1140 }
1141 #endif
1142
1143 message_error(_("%s: Read error: %s"),
1144 pair->src_name, strerror(errno));
1145
1146 return SIZE_MAX;
1147 }
1148
1149 pos += (size_t)(amount);
1150
1151 if (!pair->src_has_seen_input) {
1152 pair->src_has_seen_input = true;
1153 mytime_set_flush_time();
1154 }
1155 }
1156
1157 return pos;
1158 }
1159
1160
1161 extern bool
io_seek_src(file_pair * pair,uint64_t pos)1162 io_seek_src(file_pair *pair, uint64_t pos)
1163 {
1164 // Caller must not attempt to seek past the end of the input file
1165 // (seeking to 100 in a 100-byte file is seeking to the end of
1166 // the file, not past the end of the file, and thus that is allowed).
1167 //
1168 // This also validates that pos can be safely cast to off_t.
1169 if (pos > (uint64_t)(pair->src_st.st_size))
1170 message_bug();
1171
1172 if (lseek(pair->src_fd, (off_t)(pos), SEEK_SET) == -1) {
1173 message_error(_("%s: Error seeking the file: %s"),
1174 pair->src_name, strerror(errno));
1175 return true;
1176 }
1177
1178 pair->src_eof = false;
1179
1180 return false;
1181 }
1182
1183
1184 extern bool
io_pread(file_pair * pair,io_buf * buf,size_t size,uint64_t pos)1185 io_pread(file_pair *pair, io_buf *buf, size_t size, uint64_t pos)
1186 {
1187 // Using lseek() and read() is more portable than pread() and
1188 // for us it is as good as real pread().
1189 if (io_seek_src(pair, pos))
1190 return true;
1191
1192 const size_t amount = io_read(pair, buf, size);
1193 if (amount == SIZE_MAX)
1194 return true;
1195
1196 if (amount != size) {
1197 message_error(_("%s: Unexpected end of file"),
1198 pair->src_name);
1199 return true;
1200 }
1201
1202 return false;
1203 }
1204
1205
1206 static bool
is_sparse(const io_buf * buf)1207 is_sparse(const io_buf *buf)
1208 {
1209 assert(IO_BUFFER_SIZE % sizeof(uint64_t) == 0);
1210
1211 for (size_t i = 0; i < ARRAY_SIZE(buf->u64); ++i)
1212 if (buf->u64[i] != 0)
1213 return false;
1214
1215 return true;
1216 }
1217
1218
1219 static bool
io_write_buf(file_pair * pair,const uint8_t * buf,size_t size)1220 io_write_buf(file_pair *pair, const uint8_t *buf, size_t size)
1221 {
1222 assert(size <= IO_BUFFER_SIZE);
1223
1224 while (size > 0) {
1225 const ssize_t amount = write(pair->dest_fd, buf, size);
1226 if (amount == -1) {
1227 if (errno == EINTR) {
1228 if (user_abort)
1229 return true;
1230
1231 continue;
1232 }
1233
1234 #ifndef TUKLIB_DOSLIKE
1235 if (IS_EAGAIN_OR_EWOULDBLOCK(errno)) {
1236 if (io_wait(pair, -1, false) == IO_WAIT_MORE)
1237 continue;
1238
1239 return true;
1240 }
1241 #endif
1242
1243 // Handle broken pipe specially. gzip and bzip2
1244 // don't print anything on SIGPIPE. In addition,
1245 // gzip --quiet uses exit status 2 (warning) on
1246 // broken pipe instead of whatever raise(SIGPIPE)
1247 // would make it return. It is there to hide "Broken
1248 // pipe" message on some old shells (probably old
1249 // GNU bash).
1250 //
1251 // We don't do anything special with --quiet, which
1252 // is what bzip2 does too. If we get SIGPIPE, we
1253 // will handle it like other signals by setting
1254 // user_abort, and get EPIPE here.
1255 if (errno != EPIPE)
1256 message_error(_("%s: Write error: %s"),
1257 pair->dest_name, strerror(errno));
1258
1259 return true;
1260 }
1261
1262 buf += (size_t)(amount);
1263 size -= (size_t)(amount);
1264 }
1265
1266 return false;
1267 }
1268
1269
1270 extern bool
io_write(file_pair * pair,const io_buf * buf,size_t size)1271 io_write(file_pair *pair, const io_buf *buf, size_t size)
1272 {
1273 assert(size <= IO_BUFFER_SIZE);
1274
1275 if (pair->dest_try_sparse) {
1276 // Check if the block is sparse (contains only zeros). If it
1277 // sparse, we just store the amount and return. We will take
1278 // care of actually skipping over the hole when we hit the
1279 // next data block or close the file.
1280 //
1281 // Since io_close() requires that dest_pending_sparse > 0
1282 // if the file ends with sparse block, we must also return
1283 // if size == 0 to avoid doing the lseek().
1284 if (size == IO_BUFFER_SIZE) {
1285 // Even if the block was sparse, treat it as non-sparse
1286 // if the pending sparse amount is large compared to
1287 // the size of off_t. In practice this only matters
1288 // on 32-bit systems where off_t isn't always 64 bits.
1289 const off_t pending_max
1290 = (off_t)(1) << (sizeof(off_t) * CHAR_BIT - 2);
1291 if (is_sparse(buf) && pair->dest_pending_sparse
1292 < pending_max) {
1293 pair->dest_pending_sparse += (off_t)(size);
1294 return false;
1295 }
1296 } else if (size == 0) {
1297 return false;
1298 }
1299
1300 // This is not a sparse block. If we have a pending hole,
1301 // skip it now.
1302 if (pair->dest_pending_sparse > 0) {
1303 if (lseek(pair->dest_fd, pair->dest_pending_sparse,
1304 SEEK_CUR) == -1) {
1305 message_error(_("%s: Seeking failed when "
1306 "trying to create a sparse "
1307 "file: %s"), pair->dest_name,
1308 strerror(errno));
1309 return true;
1310 }
1311
1312 pair->dest_pending_sparse = 0;
1313 }
1314 }
1315
1316 return io_write_buf(pair, buf->u8, size);
1317 }
1318