xref: /netbsd-src/external/bsd/file/dist/src/is_tar.c (revision e15daa8be9575f7ad2ca804c7c7c2d7f8e182d98)
1*e15daa8bSchristos /*	$NetBSD: is_tar.c,v 1.1.1.11 2023/08/18 18:36:49 christos Exp $	*/
23c9d2f35Schristos 
31b108b8bSchristos /*
41b108b8bSchristos  * Copyright (c) Ian F. Darwin 1986-1995.
51b108b8bSchristos  * Software written by Ian F. Darwin and others;
61b108b8bSchristos  * maintained 1995-present by Christos Zoulas and others.
71b108b8bSchristos  *
81b108b8bSchristos  * Redistribution and use in source and binary forms, with or without
91b108b8bSchristos  * modification, are permitted provided that the following conditions
101b108b8bSchristos  * are met:
111b108b8bSchristos  * 1. Redistributions of source code must retain the above copyright
121b108b8bSchristos  *    notice immediately at the beginning of the file, without modification,
131b108b8bSchristos  *    this list of conditions, and the following disclaimer.
141b108b8bSchristos  * 2. Redistributions in binary form must reproduce the above copyright
151b108b8bSchristos  *    notice, this list of conditions and the following disclaimer in the
161b108b8bSchristos  *    documentation and/or other materials provided with the distribution.
171b108b8bSchristos  *
181b108b8bSchristos  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
191b108b8bSchristos  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
201b108b8bSchristos  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
211b108b8bSchristos  * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE FOR
221b108b8bSchristos  * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
231b108b8bSchristos  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
241b108b8bSchristos  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
251b108b8bSchristos  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
261b108b8bSchristos  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
271b108b8bSchristos  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
281b108b8bSchristos  * SUCH DAMAGE.
291b108b8bSchristos  */
301b108b8bSchristos /*
311b108b8bSchristos  * is_tar() -- figure out whether file is a tar archive.
321b108b8bSchristos  *
33*e15daa8bSchristos  * Stolen (by the author!) from the file_public domain tar program:
341b108b8bSchristos  * Public Domain version written 26 Aug 1985 John Gilmore (ihnp4!hoptoad!gnu).
351b108b8bSchristos  *
361b108b8bSchristos  * @(#)list.c 1.18 9/23/86 Public Domain - gnu
371b108b8bSchristos  *
381b108b8bSchristos  * Comments changed and some code/comments reformatted
391b108b8bSchristos  * for file command by Ian Darwin.
401b108b8bSchristos  */
411b108b8bSchristos 
421b108b8bSchristos #include "file.h"
431b108b8bSchristos 
441b108b8bSchristos #ifndef lint
451b108b8bSchristos #if 0
46*e15daa8bSchristos FILE_RCSID("@(#)$File: is_tar.c,v 1.50 2022/12/26 17:31:14 christos Exp $")
471b108b8bSchristos #else
48*e15daa8bSchristos __RCSID("$NetBSD: is_tar.c,v 1.1.1.11 2023/08/18 18:36:49 christos Exp $");
491b108b8bSchristos #endif
501b108b8bSchristos #endif
511b108b8bSchristos 
521b108b8bSchristos #include "magic.h"
531b108b8bSchristos #include <string.h>
541b108b8bSchristos #include <ctype.h>
551b108b8bSchristos #include "tar.h"
561b108b8bSchristos 
571b108b8bSchristos #define	isodigit(c)	( ((c) >= '0') && ((c) <= '7') )
581b108b8bSchristos 
59*e15daa8bSchristos file_private int is_tar(const unsigned char *, size_t);
60*e15daa8bSchristos file_private int from_oct(const char *, size_t);	/* Decode octal number */
611b108b8bSchristos 
6252d7030aSchristos static const char tartype[][32] = {	/* should be equal to messages */
6352d7030aSchristos 	"tar archive",			/* found in ../magic/Magdir/archive */
641b108b8bSchristos 	"POSIX tar archive",
6552d7030aSchristos 	"POSIX tar archive (GNU)",	/*  */
661b108b8bSchristos };
671b108b8bSchristos 
68*e15daa8bSchristos file_protected int
file_is_tar(struct magic_set * ms,const struct buffer * b)6952d7030aSchristos file_is_tar(struct magic_set *ms, const struct buffer *b)
701b108b8bSchristos {
71006f8008Schristos 	const unsigned char *buf = CAST(const unsigned char *, b->fbuf);
7252d7030aSchristos 	size_t nbytes = b->flen;
731b108b8bSchristos 	/*
741b108b8bSchristos 	 * Do the tar test first, because if the first file in the tar
751b108b8bSchristos 	 * archive starts with a dot, we can confuse it with an nroff file.
761b108b8bSchristos 	 */
771b108b8bSchristos 	int tar;
781b108b8bSchristos 	int mime = ms->flags & MAGIC_MIME;
791b108b8bSchristos 
80ae9cfef6Schristos 	if ((ms->flags & (MAGIC_APPLE|MAGIC_EXTENSION)) != 0)
811b108b8bSchristos 		return 0;
821b108b8bSchristos 
831b108b8bSchristos 	tar = is_tar(buf, nbytes);
841b108b8bSchristos 	if (tar < 1 || tar > 3)
851b108b8bSchristos 		return 0;
861b108b8bSchristos 
87006f8008Schristos 	if (mime == MAGIC_MIME_ENCODING)
88006f8008Schristos 		return 1;
89006f8008Schristos 
901b108b8bSchristos 	if (file_printf(ms, "%s", mime ? "application/x-tar" :
911b108b8bSchristos 	    tartype[tar - 1]) == -1)
921b108b8bSchristos 		return -1;
93006f8008Schristos 
941b108b8bSchristos 	return 1;
951b108b8bSchristos }
961b108b8bSchristos 
971b108b8bSchristos /*
981b108b8bSchristos  * Return
991b108b8bSchristos  *	0 if the checksum is bad (i.e., probably not a tar archive),
1001b108b8bSchristos  *	1 for old UNIX tar file,
1011b108b8bSchristos  *	2 for Unix Std (POSIX) tar file,
1021b108b8bSchristos  *	3 for GNU tar file.
1031b108b8bSchristos  */
104*e15daa8bSchristos file_private int
is_tar(const unsigned char * buf,size_t nbytes)1051b108b8bSchristos is_tar(const unsigned char *buf, size_t nbytes)
1061b108b8bSchristos {
107a77ebd86Schristos 	static const char gpkg_match[] = "/gpkg-1";
108a77ebd86Schristos 
109dc958920Schristos 	const union record *header = RCAST(const union record *,
110dc958920Schristos 	    RCAST(const void *, buf));
111008b33e8Schristos 	size_t i;
1121b108b8bSchristos 	int sum, recsum;
113008b33e8Schristos 	const unsigned char *p, *ep;
114a77ebd86Schristos 	const char *nulp;
1151b108b8bSchristos 
116008b33e8Schristos 	if (nbytes < sizeof(*header))
1171b108b8bSchristos 		return 0;
1181b108b8bSchristos 
119a77ebd86Schristos 	/* If the file looks like Gentoo GLEP 78 binary package (GPKG),
120a77ebd86Schristos 	 * don't waste time on further checks and fall back to magic rules.
121a77ebd86Schristos 	 */
122a77ebd86Schristos 	nulp = CAST(const char *,
123a77ebd86Schristos 	    memchr(header->header.name, 0, sizeof(header->header.name)));
124a77ebd86Schristos 	if (nulp != NULL && nulp >= header->header.name + sizeof(gpkg_match) &&
125a77ebd86Schristos 	    memcmp(nulp - sizeof(gpkg_match) + 1, gpkg_match,
126a77ebd86Schristos 	    sizeof(gpkg_match)) == 0)
127a77ebd86Schristos 	    return 0;
128a77ebd86Schristos 
129008b33e8Schristos 	recsum = from_oct(header->header.chksum, sizeof(header->header.chksum));
1301b108b8bSchristos 
1311b108b8bSchristos 	sum = 0;
1321b108b8bSchristos 	p = header->charptr;
133008b33e8Schristos 	ep = header->charptr + sizeof(*header);
134008b33e8Schristos 	while (p < ep)
1355ccaa8c0Schristos 		sum += *p++;
1361b108b8bSchristos 
1371b108b8bSchristos 	/* Adjust checksum to count the "chksum" field as blanks. */
138008b33e8Schristos 	for (i = 0; i < sizeof(header->header.chksum); i++)
1395ccaa8c0Schristos 		sum -= header->header.chksum[i];
140008b33e8Schristos 	sum += ' ' * sizeof(header->header.chksum);
1411b108b8bSchristos 
1421b108b8bSchristos 	if (sum != recsum)
1431b108b8bSchristos 		return 0;	/* Not a tar archive */
1441b108b8bSchristos 
145008b33e8Schristos 	if (strncmp(header->header.magic, GNUTMAGIC,
146008b33e8Schristos 	    sizeof(header->header.magic)) == 0)
1471b108b8bSchristos 		return 3;		/* GNU Unix Standard tar archive */
148008b33e8Schristos 
149008b33e8Schristos 	if (strncmp(header->header.magic, TMAGIC,
150008b33e8Schristos 	    sizeof(header->header.magic)) == 0)
1511b108b8bSchristos 		return 2;		/* Unix Standard tar archive */
1521b108b8bSchristos 
1531b108b8bSchristos 	return 1;			/* Old fashioned tar archive */
1541b108b8bSchristos }
1551b108b8bSchristos 
1561b108b8bSchristos 
1571b108b8bSchristos /*
1581b108b8bSchristos  * Quick and dirty octal conversion.
1591b108b8bSchristos  *
1605ccaa8c0Schristos  * Result is -1 if the field is invalid (all blank, or non-octal).
1611b108b8bSchristos  */
162*e15daa8bSchristos file_private int
from_oct(const char * where,size_t digs)163008b33e8Schristos from_oct(const char *where, size_t digs)
1641b108b8bSchristos {
1651b108b8bSchristos 	int	value;
1661b108b8bSchristos 
167008b33e8Schristos 	if (digs == 0)
168008b33e8Schristos 		return -1;
169008b33e8Schristos 
170dc958920Schristos 	while (isspace(CAST(unsigned char, *where))) {	/* Skip spaces */
1711b108b8bSchristos 		where++;
172008b33e8Schristos 		if (digs-- == 0)
1731b108b8bSchristos 			return -1;		/* All blank field */
1741b108b8bSchristos 	}
1751b108b8bSchristos 	value = 0;
1765ccaa8c0Schristos 	while (digs > 0 && isodigit(*where)) {	/* Scan til non-octal */
1771b108b8bSchristos 		value = (value << 3) | (*where++ - '0');
178008b33e8Schristos 		digs--;
1791b108b8bSchristos 	}
1801b108b8bSchristos 
181dc958920Schristos 	if (digs > 0 && *where && !isspace(CAST(unsigned char, *where)))
1825ccaa8c0Schristos 		return -1;			/* Ended on non-(space/NUL) */
1831b108b8bSchristos 
1841b108b8bSchristos 	return value;
1851b108b8bSchristos }
186