xref: /netbsd-src/sys/fs/cd9660/cd9660_util.c (revision 3ba972454a03bbdb9d7f17984b1dc89b05d88df7)
1*3ba97245Stsutsui /*	$NetBSD: cd9660_util.c,v 1.16 2024/05/25 06:27:57 tsutsui Exp $	*/
23a8872deSjdolecek 
33a8872deSjdolecek /*-
43a8872deSjdolecek  * Copyright (c) 1994
53a8872deSjdolecek  *	The Regents of the University of California.  All rights reserved.
63a8872deSjdolecek  *
73a8872deSjdolecek  * This code is derived from software contributed to Berkeley
83a8872deSjdolecek  * by Pace Willisson (pace@blitz.com).  The Rock Ridge Extension
93a8872deSjdolecek  * Support code is derived from software contributed to Berkeley
103a8872deSjdolecek  * by Atsushi Murai (amurai@spec.co.jp).
113a8872deSjdolecek  *
123a8872deSjdolecek  * Redistribution and use in source and binary forms, with or without
133a8872deSjdolecek  * modification, are permitted provided that the following conditions
143a8872deSjdolecek  * are met:
153a8872deSjdolecek  * 1. Redistributions of source code must retain the above copyright
163a8872deSjdolecek  *    notice, this list of conditions and the following disclaimer.
173a8872deSjdolecek  * 2. Redistributions in binary form must reproduce the above copyright
183a8872deSjdolecek  *    notice, this list of conditions and the following disclaimer in the
193a8872deSjdolecek  *    documentation and/or other materials provided with the distribution.
20aad01611Sagc  * 3. Neither the name of the University nor the names of its contributors
213a8872deSjdolecek  *    may be used to endorse or promote products derived from this software
223a8872deSjdolecek  *    without specific prior written permission.
233a8872deSjdolecek  *
243a8872deSjdolecek  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
253a8872deSjdolecek  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
263a8872deSjdolecek  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
273a8872deSjdolecek  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
283a8872deSjdolecek  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
293a8872deSjdolecek  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
303a8872deSjdolecek  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
313a8872deSjdolecek  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
323a8872deSjdolecek  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
333a8872deSjdolecek  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
343a8872deSjdolecek  * SUCH DAMAGE.
353a8872deSjdolecek  *
363a8872deSjdolecek  *	@(#)cd9660_util.c	8.3 (Berkeley) 12/5/94
373a8872deSjdolecek  */
383a8872deSjdolecek 
393a8872deSjdolecek #include <sys/cdefs.h>
40635b4dc0Schristos #ifdef _KERNEL
41*3ba97245Stsutsui __KERNEL_RCSID(0, "$NetBSD: cd9660_util.c,v 1.16 2024/05/25 06:27:57 tsutsui Exp $");
42635b4dc0Schristos #else
43635b4dc0Schristos /* used by macppc_installboot */
44635b4dc0Schristos #if HAVE_NBTOOL_CONFIG_H
45635b4dc0Schristos #include "nbtool_config.h"
46635b4dc0Schristos #endif
47635b4dc0Schristos #endif
483a8872deSjdolecek 
493a8872deSjdolecek #include <sys/param.h>
50635b4dc0Schristos #ifdef _KERNEL
513a8872deSjdolecek #include <sys/systm.h>
523a8872deSjdolecek #include <sys/namei.h>
533a8872deSjdolecek #include <sys/resourcevar.h>
543a8872deSjdolecek #include <sys/kernel.h>
553a8872deSjdolecek #include <sys/file.h>
563a8872deSjdolecek #include <sys/stat.h>
573a8872deSjdolecek #include <sys/buf.h>
583a8872deSjdolecek #include <sys/proc.h>
593a8872deSjdolecek #include <sys/mount.h>
603a8872deSjdolecek #include <sys/vnode.h>
613a8872deSjdolecek #include <sys/dirent.h>
62635b4dc0Schristos #else
63635b4dc0Schristos #include <assert.h>
64635b4dc0Schristos #include <dirent.h>
65635b4dc0Schristos #define KASSERT(x)	assert(x)	/* XXX for <fs/unicode.h> */
66*3ba97245Stsutsui 
67*3ba97245Stsutsui #if !HAVE_NBTOOL_CONFIG_H || HAVE_SYS_ENDIAN_H
68*3ba97245Stsutsui #include <sys/endian.h>		/* for le16dec(9) etc. in iso.h */
69*3ba97245Stsutsui #endif
70635b4dc0Schristos #endif
713a8872deSjdolecek 
723a8872deSjdolecek #include <fs/cd9660/iso.h>
733a8872deSjdolecek #include <fs/cd9660/cd9660_extern.h>
743a8872deSjdolecek 
753b4a395dSjdolecek #include <fs/unicode.h>
763b4a395dSjdolecek 
77635b4dc0Schristos static uint16_t wget(const u_char **, size_t *, int);
78635b4dc0Schristos static int wput(u_char *, size_t, uint16_t, int);
793b4a395dSjdolecek 
803b4a395dSjdolecek int cd9660_utf8_joliet = 1;
813b4a395dSjdolecek 
823a8872deSjdolecek /*
833a8872deSjdolecek  * Get one character out of an iso filename
843a8872deSjdolecek  * Return number of bytes consumed
853a8872deSjdolecek  */
863a8872deSjdolecek int
isochar(const u_char * isofn,const u_char * isoend,int joliet_level,uint16_t * c)878014191aSmatt isochar(const u_char *isofn, const u_char *isoend, int joliet_level,
88635b4dc0Schristos     uint16_t *c)
893a8872deSjdolecek {
90635b4dc0Schristos 
913b4a395dSjdolecek 	*c = isofn[0];
923b4a395dSjdolecek 	if (joliet_level == 0 || isofn + 1 == isoend) {
933a8872deSjdolecek 		/* (00) and (01) are one byte in Joliet, too */
943a8872deSjdolecek 		return 1;
953a8872deSjdolecek 	}
963b4a395dSjdolecek 
973b4a395dSjdolecek 	if (cd9660_utf8_joliet) {
983b4a395dSjdolecek 		*c = (*c << 8) + isofn[1];
993b4a395dSjdolecek 	} else {
1003b4a395dSjdolecek 		/* characters outside ISO-8859-1 subset replaced with '?' */
1013b4a395dSjdolecek 		if (*c != 0)
1023b4a395dSjdolecek 			*c = '?';
1033b4a395dSjdolecek 		else
1043b4a395dSjdolecek 			*c = isofn[1];
1053b4a395dSjdolecek 	}
1063b4a395dSjdolecek 
1073a8872deSjdolecek 	return 2;
1083a8872deSjdolecek }
1093a8872deSjdolecek 
1103a8872deSjdolecek /*
1113a8872deSjdolecek  * translate and compare a filename
1123a8872deSjdolecek  * Note: Version number plus ';' may be omitted.
1133a8872deSjdolecek  */
1143a8872deSjdolecek int
isofncmp(const u_char * fn,size_t fnlen,const u_char * isofn,size_t isolen,int joliet_level)1158014191aSmatt isofncmp(const u_char *fn, size_t fnlen, const u_char *isofn, size_t isolen,
1168014191aSmatt     int joliet_level)
1173a8872deSjdolecek {
1183a8872deSjdolecek 	int i, j;
119635b4dc0Schristos 	uint16_t fc, ic;
1203a8872deSjdolecek 	const u_char *isoend = isofn + isolen;
1213a8872deSjdolecek 
122635b4dc0Schristos #ifdef ISOFNCMPDEBUG
123635b4dc0Schristos 	printf("fn = %s, fnlen = %zu, isofn = %s, isolen = %zu\n",
124635b4dc0Schristos 	    fn, fnlen, isofn, isolen);
125635b4dc0Schristos #endif
126635b4dc0Schristos 
12752c7db38Sjdolecek 	while (fnlen > 0) {
12852c7db38Sjdolecek 		fc = wget(&fn, &fnlen, joliet_level);
1293b4a395dSjdolecek 
1303a8872deSjdolecek 		if (isofn == isoend)
1313b4a395dSjdolecek 			return fc;
1323b4a395dSjdolecek 		isofn += isochar(isofn, isoend, joliet_level, &ic);
1333b4a395dSjdolecek 		if (ic == ';') {
1343b4a395dSjdolecek 			switch (fc) {
1353a8872deSjdolecek 			default:
1363b4a395dSjdolecek 				return fc;
1373a8872deSjdolecek 			case 0:
1383a8872deSjdolecek 				return 0;
1393a8872deSjdolecek 			case ';':
1403a8872deSjdolecek 				break;
1413a8872deSjdolecek 			}
14242866523Schristos 			for (i = 0; fnlen-- != 0; i = i * 10 + *fn++ - '0') {
1433a8872deSjdolecek 				if (*fn < '0' || *fn > '9') {
1443a8872deSjdolecek 					return -1;
1453a8872deSjdolecek 				}
1463a8872deSjdolecek 			}
1473b4a395dSjdolecek 			for (j = 0; isofn != isoend; j = j * 10 + ic - '0')
1483a8872deSjdolecek 				isofn += isochar(isofn, isoend,
1493b4a395dSjdolecek 						 joliet_level, &ic);
1503a8872deSjdolecek 			return i - j;
1513a8872deSjdolecek 		}
1523b4a395dSjdolecek 		if (ic != fc) {
1533b4a395dSjdolecek 			if (ic >= 'A' && ic <= 'Z') {
1543b4a395dSjdolecek 				if (ic + ('a' - 'A') != fc) {
1553b4a395dSjdolecek 					if (fc >= 'a' && fc <= 'z')
1563b4a395dSjdolecek 						fc -= 'a' - 'A';
1573b4a395dSjdolecek 
1583b4a395dSjdolecek 					return (int)fc - (int)ic;
1593a8872deSjdolecek 				}
1603a8872deSjdolecek 			} else
1613b4a395dSjdolecek 				return (int)fc - (int)ic;
1623a8872deSjdolecek 		}
1633a8872deSjdolecek 	}
1643a8872deSjdolecek 	if (isofn != isoend) {
1653b4a395dSjdolecek 		isofn += isochar(isofn, isoend, joliet_level, &ic);
1663b4a395dSjdolecek 		switch (ic) {
1673a8872deSjdolecek 		default:
1683a8872deSjdolecek 			return -1;
1693a8872deSjdolecek 		case '.':
1703a8872deSjdolecek 			if (isofn != isoend) {
1713b4a395dSjdolecek 				isochar(isofn, isoend, joliet_level, &ic);
1723b4a395dSjdolecek 				if (ic == ';')
1733a8872deSjdolecek 					return 0;
1743a8872deSjdolecek 			}
1753a8872deSjdolecek 			return -1;
1763a8872deSjdolecek 		case ';':
1773a8872deSjdolecek 			return 0;
1783a8872deSjdolecek 		}
1793a8872deSjdolecek 	}
1803a8872deSjdolecek 	return 0;
1813a8872deSjdolecek }
1823a8872deSjdolecek 
1833a8872deSjdolecek /*
1843a8872deSjdolecek  * translate a filename
1853a8872deSjdolecek  */
1863a8872deSjdolecek void
isofntrans(const u_char * infn,int infnlen,u_char * outfn,u_short * outfnlen,int original,int casetrans,int assoc,int joliet_level)1878014191aSmatt isofntrans(const u_char *infn, int infnlen, u_char *outfn, u_short *outfnlen,
1888014191aSmatt     int original, int casetrans, int assoc, int joliet_level)
1893a8872deSjdolecek {
1903a8872deSjdolecek 	int fnidx = 0;
1918014191aSmatt 	const u_char *infnend = infn + infnlen;
192635b4dc0Schristos 	uint16_t c;
1933b4a395dSjdolecek 	int sz;
1943a8872deSjdolecek 
1953a8872deSjdolecek 	if (assoc) {
1963a8872deSjdolecek 		*outfn++ = ASSOCCHAR;
1973a8872deSjdolecek 		fnidx++;
1983a8872deSjdolecek 	}
1993a8872deSjdolecek 
2003b4a395dSjdolecek 	for(; infn != infnend; fnidx += sz) {
2013a8872deSjdolecek 		infn += isochar(infn, infnend, joliet_level, &c);
2023a8872deSjdolecek 
2033a8872deSjdolecek 		if (casetrans && joliet_level == 0 && c >= 'A' && c <= 'Z')
2043b4a395dSjdolecek 			c = c + ('a' - 'A');
2053a8872deSjdolecek 		else if (!original && c == ';') {
2063a8872deSjdolecek 			if (fnidx > 0 && outfn[-1] == '.')
2073a8872deSjdolecek 				fnidx--;
2083a8872deSjdolecek 			break;
2093b4a395dSjdolecek 		}
2103b4a395dSjdolecek 
2114fa49ab8Schristos 		sz = wput(outfn, ISO_MAXNAMLEN - fnidx, c, joliet_level);
2123b4a395dSjdolecek 		if (sz == 0) {
2133b4a395dSjdolecek 			/* not enough space to write the character */
2144fa49ab8Schristos 			if (fnidx < ISO_MAXNAMLEN) {
2153b4a395dSjdolecek 				*outfn = '?';
2163b4a395dSjdolecek 				fnidx++;
2173b4a395dSjdolecek 			}
2183b4a395dSjdolecek 			break;
2193b4a395dSjdolecek 		}
2203b4a395dSjdolecek 		outfn += sz;
2213a8872deSjdolecek 	}
2223a8872deSjdolecek 	*outfnlen = fnidx;
2233a8872deSjdolecek }
2243b4a395dSjdolecek 
225635b4dc0Schristos static uint16_t
wget(const u_char ** str,size_t * sz,int joliet_level)22652c7db38Sjdolecek wget(const u_char **str, size_t *sz, int joliet_level)
2273b4a395dSjdolecek {
2283b4a395dSjdolecek 	if (joliet_level > 0 && cd9660_utf8_joliet) {
2293b4a395dSjdolecek 		/* decode UTF-8 sequence */
23052c7db38Sjdolecek 		return wget_utf8((const char **) str, sz);
2313b4a395dSjdolecek 	} else {
2323b4a395dSjdolecek 		/*
2333b4a395dSjdolecek 		 * Raw 8-bit characters without any conversion. For Joliet,
2343b4a395dSjdolecek 		 * this effectively assumes provided file name is using
2353b4a395dSjdolecek 		 * ISO-8859-1 subset.
2363b4a395dSjdolecek 		 */
237635b4dc0Schristos 		uint16_t c = *str[0];
2383b4a395dSjdolecek 		(*str)++;
2397edaf879Senami 		(*sz)--;
2403b4a395dSjdolecek 
2413b4a395dSjdolecek 		return c;
2423b4a395dSjdolecek 	}
2433b4a395dSjdolecek }
2443b4a395dSjdolecek 
2453b4a395dSjdolecek static int
wput(u_char * s,size_t n,uint16_t c,int joliet_level)246635b4dc0Schristos wput(u_char *s, size_t n, uint16_t c, int joliet_level)
2473b4a395dSjdolecek {
2483b4a395dSjdolecek 	if (joliet_level > 0 && cd9660_utf8_joliet) {
2493b4a395dSjdolecek 		/* Store Joliet file name encoded into UTF-8 */
2503b4a395dSjdolecek 		return wput_utf8((char *)s, n, c);
2513b4a395dSjdolecek 	} else {
2523b4a395dSjdolecek 		/*
2533b4a395dSjdolecek 		 * Store raw 8-bit characters without any conversion.
2543b4a395dSjdolecek 		 * For Joliet case, this filters the Unicode characters
2553b4a395dSjdolecek 		 * to ISO-8859-1 subset.
2563b4a395dSjdolecek 		 */
2573b4a395dSjdolecek 		*s = (u_char)c;
2583b4a395dSjdolecek 		return 1;
2593b4a395dSjdolecek 	}
2603b4a395dSjdolecek }
261