1*e15daa8bSchristos /* $NetBSD: is_tar.c,v 1.1.1.11 2023/08/18 18:36:49 christos Exp $ */
23c9d2f35Schristos
31b108b8bSchristos /*
41b108b8bSchristos * Copyright (c) Ian F. Darwin 1986-1995.
51b108b8bSchristos * Software written by Ian F. Darwin and others;
61b108b8bSchristos * maintained 1995-present by Christos Zoulas and others.
71b108b8bSchristos *
81b108b8bSchristos * Redistribution and use in source and binary forms, with or without
91b108b8bSchristos * modification, are permitted provided that the following conditions
101b108b8bSchristos * are met:
111b108b8bSchristos * 1. Redistributions of source code must retain the above copyright
121b108b8bSchristos * notice immediately at the beginning of the file, without modification,
131b108b8bSchristos * this list of conditions, and the following disclaimer.
141b108b8bSchristos * 2. Redistributions in binary form must reproduce the above copyright
151b108b8bSchristos * notice, this list of conditions and the following disclaimer in the
161b108b8bSchristos * documentation and/or other materials provided with the distribution.
171b108b8bSchristos *
181b108b8bSchristos * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
191b108b8bSchristos * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
201b108b8bSchristos * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
211b108b8bSchristos * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE FOR
221b108b8bSchristos * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
231b108b8bSchristos * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
241b108b8bSchristos * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
251b108b8bSchristos * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
261b108b8bSchristos * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
271b108b8bSchristos * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
281b108b8bSchristos * SUCH DAMAGE.
291b108b8bSchristos */
301b108b8bSchristos /*
311b108b8bSchristos * is_tar() -- figure out whether file is a tar archive.
321b108b8bSchristos *
33*e15daa8bSchristos * Stolen (by the author!) from the file_public domain tar program:
341b108b8bSchristos * Public Domain version written 26 Aug 1985 John Gilmore (ihnp4!hoptoad!gnu).
351b108b8bSchristos *
361b108b8bSchristos * @(#)list.c 1.18 9/23/86 Public Domain - gnu
371b108b8bSchristos *
381b108b8bSchristos * Comments changed and some code/comments reformatted
391b108b8bSchristos * for file command by Ian Darwin.
401b108b8bSchristos */
411b108b8bSchristos
421b108b8bSchristos #include "file.h"
431b108b8bSchristos
441b108b8bSchristos #ifndef lint
451b108b8bSchristos #if 0
46*e15daa8bSchristos FILE_RCSID("@(#)$File: is_tar.c,v 1.50 2022/12/26 17:31:14 christos Exp $")
471b108b8bSchristos #else
48*e15daa8bSchristos __RCSID("$NetBSD: is_tar.c,v 1.1.1.11 2023/08/18 18:36:49 christos Exp $");
491b108b8bSchristos #endif
501b108b8bSchristos #endif
511b108b8bSchristos
521b108b8bSchristos #include "magic.h"
531b108b8bSchristos #include <string.h>
541b108b8bSchristos #include <ctype.h>
551b108b8bSchristos #include "tar.h"
561b108b8bSchristos
571b108b8bSchristos #define isodigit(c) ( ((c) >= '0') && ((c) <= '7') )
581b108b8bSchristos
59*e15daa8bSchristos file_private int is_tar(const unsigned char *, size_t);
60*e15daa8bSchristos file_private int from_oct(const char *, size_t); /* Decode octal number */
611b108b8bSchristos
6252d7030aSchristos static const char tartype[][32] = { /* should be equal to messages */
6352d7030aSchristos "tar archive", /* found in ../magic/Magdir/archive */
641b108b8bSchristos "POSIX tar archive",
6552d7030aSchristos "POSIX tar archive (GNU)", /* */
661b108b8bSchristos };
671b108b8bSchristos
68*e15daa8bSchristos file_protected int
file_is_tar(struct magic_set * ms,const struct buffer * b)6952d7030aSchristos file_is_tar(struct magic_set *ms, const struct buffer *b)
701b108b8bSchristos {
71006f8008Schristos const unsigned char *buf = CAST(const unsigned char *, b->fbuf);
7252d7030aSchristos size_t nbytes = b->flen;
731b108b8bSchristos /*
741b108b8bSchristos * Do the tar test first, because if the first file in the tar
751b108b8bSchristos * archive starts with a dot, we can confuse it with an nroff file.
761b108b8bSchristos */
771b108b8bSchristos int tar;
781b108b8bSchristos int mime = ms->flags & MAGIC_MIME;
791b108b8bSchristos
80ae9cfef6Schristos if ((ms->flags & (MAGIC_APPLE|MAGIC_EXTENSION)) != 0)
811b108b8bSchristos return 0;
821b108b8bSchristos
831b108b8bSchristos tar = is_tar(buf, nbytes);
841b108b8bSchristos if (tar < 1 || tar > 3)
851b108b8bSchristos return 0;
861b108b8bSchristos
87006f8008Schristos if (mime == MAGIC_MIME_ENCODING)
88006f8008Schristos return 1;
89006f8008Schristos
901b108b8bSchristos if (file_printf(ms, "%s", mime ? "application/x-tar" :
911b108b8bSchristos tartype[tar - 1]) == -1)
921b108b8bSchristos return -1;
93006f8008Schristos
941b108b8bSchristos return 1;
951b108b8bSchristos }
961b108b8bSchristos
971b108b8bSchristos /*
981b108b8bSchristos * Return
991b108b8bSchristos * 0 if the checksum is bad (i.e., probably not a tar archive),
1001b108b8bSchristos * 1 for old UNIX tar file,
1011b108b8bSchristos * 2 for Unix Std (POSIX) tar file,
1021b108b8bSchristos * 3 for GNU tar file.
1031b108b8bSchristos */
104*e15daa8bSchristos file_private int
is_tar(const unsigned char * buf,size_t nbytes)1051b108b8bSchristos is_tar(const unsigned char *buf, size_t nbytes)
1061b108b8bSchristos {
107a77ebd86Schristos static const char gpkg_match[] = "/gpkg-1";
108a77ebd86Schristos
109dc958920Schristos const union record *header = RCAST(const union record *,
110dc958920Schristos RCAST(const void *, buf));
111008b33e8Schristos size_t i;
1121b108b8bSchristos int sum, recsum;
113008b33e8Schristos const unsigned char *p, *ep;
114a77ebd86Schristos const char *nulp;
1151b108b8bSchristos
116008b33e8Schristos if (nbytes < sizeof(*header))
1171b108b8bSchristos return 0;
1181b108b8bSchristos
119a77ebd86Schristos /* If the file looks like Gentoo GLEP 78 binary package (GPKG),
120a77ebd86Schristos * don't waste time on further checks and fall back to magic rules.
121a77ebd86Schristos */
122a77ebd86Schristos nulp = CAST(const char *,
123a77ebd86Schristos memchr(header->header.name, 0, sizeof(header->header.name)));
124a77ebd86Schristos if (nulp != NULL && nulp >= header->header.name + sizeof(gpkg_match) &&
125a77ebd86Schristos memcmp(nulp - sizeof(gpkg_match) + 1, gpkg_match,
126a77ebd86Schristos sizeof(gpkg_match)) == 0)
127a77ebd86Schristos return 0;
128a77ebd86Schristos
129008b33e8Schristos recsum = from_oct(header->header.chksum, sizeof(header->header.chksum));
1301b108b8bSchristos
1311b108b8bSchristos sum = 0;
1321b108b8bSchristos p = header->charptr;
133008b33e8Schristos ep = header->charptr + sizeof(*header);
134008b33e8Schristos while (p < ep)
1355ccaa8c0Schristos sum += *p++;
1361b108b8bSchristos
1371b108b8bSchristos /* Adjust checksum to count the "chksum" field as blanks. */
138008b33e8Schristos for (i = 0; i < sizeof(header->header.chksum); i++)
1395ccaa8c0Schristos sum -= header->header.chksum[i];
140008b33e8Schristos sum += ' ' * sizeof(header->header.chksum);
1411b108b8bSchristos
1421b108b8bSchristos if (sum != recsum)
1431b108b8bSchristos return 0; /* Not a tar archive */
1441b108b8bSchristos
145008b33e8Schristos if (strncmp(header->header.magic, GNUTMAGIC,
146008b33e8Schristos sizeof(header->header.magic)) == 0)
1471b108b8bSchristos return 3; /* GNU Unix Standard tar archive */
148008b33e8Schristos
149008b33e8Schristos if (strncmp(header->header.magic, TMAGIC,
150008b33e8Schristos sizeof(header->header.magic)) == 0)
1511b108b8bSchristos return 2; /* Unix Standard tar archive */
1521b108b8bSchristos
1531b108b8bSchristos return 1; /* Old fashioned tar archive */
1541b108b8bSchristos }
1551b108b8bSchristos
1561b108b8bSchristos
1571b108b8bSchristos /*
1581b108b8bSchristos * Quick and dirty octal conversion.
1591b108b8bSchristos *
1605ccaa8c0Schristos * Result is -1 if the field is invalid (all blank, or non-octal).
1611b108b8bSchristos */
162*e15daa8bSchristos file_private int
from_oct(const char * where,size_t digs)163008b33e8Schristos from_oct(const char *where, size_t digs)
1641b108b8bSchristos {
1651b108b8bSchristos int value;
1661b108b8bSchristos
167008b33e8Schristos if (digs == 0)
168008b33e8Schristos return -1;
169008b33e8Schristos
170dc958920Schristos while (isspace(CAST(unsigned char, *where))) { /* Skip spaces */
1711b108b8bSchristos where++;
172008b33e8Schristos if (digs-- == 0)
1731b108b8bSchristos return -1; /* All blank field */
1741b108b8bSchristos }
1751b108b8bSchristos value = 0;
1765ccaa8c0Schristos while (digs > 0 && isodigit(*where)) { /* Scan til non-octal */
1771b108b8bSchristos value = (value << 3) | (*where++ - '0');
178008b33e8Schristos digs--;
1791b108b8bSchristos }
1801b108b8bSchristos
181dc958920Schristos if (digs > 0 && *where && !isspace(CAST(unsigned char, *where)))
1825ccaa8c0Schristos return -1; /* Ended on non-(space/NUL) */
1831b108b8bSchristos
1841b108b8bSchristos return value;
1851b108b8bSchristos }
186