xref: /netbsd-src/sys/fs/cd9660/cd9660_util.c (revision 3ba972454a03bbdb9d7f17984b1dc89b05d88df7)
1 /*	$NetBSD: cd9660_util.c,v 1.16 2024/05/25 06:27:57 tsutsui Exp $	*/
2 
3 /*-
4  * Copyright (c) 1994
5  *	The Regents of the University of California.  All rights reserved.
6  *
7  * This code is derived from software contributed to Berkeley
8  * by Pace Willisson (pace@blitz.com).  The Rock Ridge Extension
9  * Support code is derived from software contributed to Berkeley
10  * by Atsushi Murai (amurai@spec.co.jp).
11  *
12  * Redistribution and use in source and binary forms, with or without
13  * modification, are permitted provided that the following conditions
14  * are met:
15  * 1. Redistributions of source code must retain the above copyright
16  *    notice, this list of conditions and the following disclaimer.
17  * 2. Redistributions in binary form must reproduce the above copyright
18  *    notice, this list of conditions and the following disclaimer in the
19  *    documentation and/or other materials provided with the distribution.
20  * 3. Neither the name of the University nor the names of its contributors
21  *    may be used to endorse or promote products derived from this software
22  *    without specific prior written permission.
23  *
24  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
25  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
26  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
27  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
28  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
29  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
30  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
31  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
32  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
33  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
34  * SUCH DAMAGE.
35  *
36  *	@(#)cd9660_util.c	8.3 (Berkeley) 12/5/94
37  */
38 
39 #include <sys/cdefs.h>
40 #ifdef _KERNEL
41 __KERNEL_RCSID(0, "$NetBSD: cd9660_util.c,v 1.16 2024/05/25 06:27:57 tsutsui Exp $");
42 #else
43 /* used by macppc_installboot */
44 #if HAVE_NBTOOL_CONFIG_H
45 #include "nbtool_config.h"
46 #endif
47 #endif
48 
49 #include <sys/param.h>
50 #ifdef _KERNEL
51 #include <sys/systm.h>
52 #include <sys/namei.h>
53 #include <sys/resourcevar.h>
54 #include <sys/kernel.h>
55 #include <sys/file.h>
56 #include <sys/stat.h>
57 #include <sys/buf.h>
58 #include <sys/proc.h>
59 #include <sys/mount.h>
60 #include <sys/vnode.h>
61 #include <sys/dirent.h>
62 #else
63 #include <assert.h>
64 #include <dirent.h>
65 #define KASSERT(x)	assert(x)	/* XXX for <fs/unicode.h> */
66 
67 #if !HAVE_NBTOOL_CONFIG_H || HAVE_SYS_ENDIAN_H
68 #include <sys/endian.h>		/* for le16dec(9) etc. in iso.h */
69 #endif
70 #endif
71 
72 #include <fs/cd9660/iso.h>
73 #include <fs/cd9660/cd9660_extern.h>
74 
75 #include <fs/unicode.h>
76 
77 static uint16_t wget(const u_char **, size_t *, int);
78 static int wput(u_char *, size_t, uint16_t, int);
79 
80 int cd9660_utf8_joliet = 1;
81 
82 /*
83  * Get one character out of an iso filename
84  * Return number of bytes consumed
85  */
86 int
isochar(const u_char * isofn,const u_char * isoend,int joliet_level,uint16_t * c)87 isochar(const u_char *isofn, const u_char *isoend, int joliet_level,
88     uint16_t *c)
89 {
90 
91 	*c = isofn[0];
92 	if (joliet_level == 0 || isofn + 1 == isoend) {
93 		/* (00) and (01) are one byte in Joliet, too */
94 		return 1;
95 	}
96 
97 	if (cd9660_utf8_joliet) {
98 		*c = (*c << 8) + isofn[1];
99 	} else {
100 		/* characters outside ISO-8859-1 subset replaced with '?' */
101 		if (*c != 0)
102 			*c = '?';
103 		else
104 			*c = isofn[1];
105 	}
106 
107 	return 2;
108 }
109 
110 /*
111  * translate and compare a filename
112  * Note: Version number plus ';' may be omitted.
113  */
114 int
isofncmp(const u_char * fn,size_t fnlen,const u_char * isofn,size_t isolen,int joliet_level)115 isofncmp(const u_char *fn, size_t fnlen, const u_char *isofn, size_t isolen,
116     int joliet_level)
117 {
118 	int i, j;
119 	uint16_t fc, ic;
120 	const u_char *isoend = isofn + isolen;
121 
122 #ifdef ISOFNCMPDEBUG
123 	printf("fn = %s, fnlen = %zu, isofn = %s, isolen = %zu\n",
124 	    fn, fnlen, isofn, isolen);
125 #endif
126 
127 	while (fnlen > 0) {
128 		fc = wget(&fn, &fnlen, joliet_level);
129 
130 		if (isofn == isoend)
131 			return fc;
132 		isofn += isochar(isofn, isoend, joliet_level, &ic);
133 		if (ic == ';') {
134 			switch (fc) {
135 			default:
136 				return fc;
137 			case 0:
138 				return 0;
139 			case ';':
140 				break;
141 			}
142 			for (i = 0; fnlen-- != 0; i = i * 10 + *fn++ - '0') {
143 				if (*fn < '0' || *fn > '9') {
144 					return -1;
145 				}
146 			}
147 			for (j = 0; isofn != isoend; j = j * 10 + ic - '0')
148 				isofn += isochar(isofn, isoend,
149 						 joliet_level, &ic);
150 			return i - j;
151 		}
152 		if (ic != fc) {
153 			if (ic >= 'A' && ic <= 'Z') {
154 				if (ic + ('a' - 'A') != fc) {
155 					if (fc >= 'a' && fc <= 'z')
156 						fc -= 'a' - 'A';
157 
158 					return (int)fc - (int)ic;
159 				}
160 			} else
161 				return (int)fc - (int)ic;
162 		}
163 	}
164 	if (isofn != isoend) {
165 		isofn += isochar(isofn, isoend, joliet_level, &ic);
166 		switch (ic) {
167 		default:
168 			return -1;
169 		case '.':
170 			if (isofn != isoend) {
171 				isochar(isofn, isoend, joliet_level, &ic);
172 				if (ic == ';')
173 					return 0;
174 			}
175 			return -1;
176 		case ';':
177 			return 0;
178 		}
179 	}
180 	return 0;
181 }
182 
183 /*
184  * translate a filename
185  */
186 void
isofntrans(const u_char * infn,int infnlen,u_char * outfn,u_short * outfnlen,int original,int casetrans,int assoc,int joliet_level)187 isofntrans(const u_char *infn, int infnlen, u_char *outfn, u_short *outfnlen,
188     int original, int casetrans, int assoc, int joliet_level)
189 {
190 	int fnidx = 0;
191 	const u_char *infnend = infn + infnlen;
192 	uint16_t c;
193 	int sz;
194 
195 	if (assoc) {
196 		*outfn++ = ASSOCCHAR;
197 		fnidx++;
198 	}
199 
200 	for(; infn != infnend; fnidx += sz) {
201 		infn += isochar(infn, infnend, joliet_level, &c);
202 
203 		if (casetrans && joliet_level == 0 && c >= 'A' && c <= 'Z')
204 			c = c + ('a' - 'A');
205 		else if (!original && c == ';') {
206 			if (fnidx > 0 && outfn[-1] == '.')
207 				fnidx--;
208 			break;
209 		}
210 
211 		sz = wput(outfn, ISO_MAXNAMLEN - fnidx, c, joliet_level);
212 		if (sz == 0) {
213 			/* not enough space to write the character */
214 			if (fnidx < ISO_MAXNAMLEN) {
215 				*outfn = '?';
216 				fnidx++;
217 			}
218 			break;
219 		}
220 		outfn += sz;
221 	}
222 	*outfnlen = fnidx;
223 }
224 
225 static uint16_t
wget(const u_char ** str,size_t * sz,int joliet_level)226 wget(const u_char **str, size_t *sz, int joliet_level)
227 {
228 	if (joliet_level > 0 && cd9660_utf8_joliet) {
229 		/* decode UTF-8 sequence */
230 		return wget_utf8((const char **) str, sz);
231 	} else {
232 		/*
233 		 * Raw 8-bit characters without any conversion. For Joliet,
234 		 * this effectively assumes provided file name is using
235 		 * ISO-8859-1 subset.
236 		 */
237 		uint16_t c = *str[0];
238 		(*str)++;
239 		(*sz)--;
240 
241 		return c;
242 	}
243 }
244 
245 static int
wput(u_char * s,size_t n,uint16_t c,int joliet_level)246 wput(u_char *s, size_t n, uint16_t c, int joliet_level)
247 {
248 	if (joliet_level > 0 && cd9660_utf8_joliet) {
249 		/* Store Joliet file name encoded into UTF-8 */
250 		return wput_utf8((char *)s, n, c);
251 	} else {
252 		/*
253 		 * Store raw 8-bit characters without any conversion.
254 		 * For Joliet case, this filters the Unicode characters
255 		 * to ISO-8859-1 subset.
256 		 */
257 		*s = (u_char)c;
258 		return 1;
259 	}
260 }
261