xref: /openbsd-src/sys/ntfs/ntfs_conv.c (revision a1ea65c62052e921794fdde62e6e66aa2cd374e3)
1*a1ea65c6Sjsing /*	$OpenBSD: ntfs_conv.c,v 1.9 2013/11/24 16:02:30 jsing Exp $	*/
226f3deacStedu /*	$NetBSD: ntfs_conv.c,v 1.1 2002/12/23 17:38:32 jdolecek Exp $	*/
326f3deacStedu 
426f3deacStedu /*-
526f3deacStedu  * Copyright (c) 2001 The NetBSD Foundation, Inc.
626f3deacStedu  * All rights reserved.
726f3deacStedu  *
826f3deacStedu  * Redistribution and use in source and binary forms, with or without
926f3deacStedu  * modification, are permitted provided that the following conditions
1026f3deacStedu  * are met:
1126f3deacStedu  * 1. Redistributions of source code must retain the above copyright
1226f3deacStedu  *    notice, this list of conditions and the following disclaimer.
1326f3deacStedu  * 2. Redistributions in binary form must reproduce the above copyright
1426f3deacStedu  *    notice, this list of conditions and the following disclaimer in the
1526f3deacStedu  *    documentation and/or other materials provided with the distribution.
1626f3deacStedu  *
1726f3deacStedu  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
1826f3deacStedu  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
1926f3deacStedu  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
2026f3deacStedu  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
2126f3deacStedu  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
2226f3deacStedu  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
2326f3deacStedu  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
2426f3deacStedu  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
2526f3deacStedu  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
2626f3deacStedu  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
2726f3deacStedu  * POSSIBILITY OF SUCH DAMAGE.
2826f3deacStedu  */
2926f3deacStedu 
3026f3deacStedu /*
3126f3deacStedu  * File name recode stuff.
3226f3deacStedu  *
3391c26625Sbrad  * The utf-8 routines were derived from src/lib/libc/locale/utf2.c.
3426f3deacStedu  */
3526f3deacStedu 
3626f3deacStedu #include <sys/param.h>
3726f3deacStedu #include <sys/systm.h>
3826f3deacStedu #include <sys/namei.h>
3926f3deacStedu #include <sys/mount.h>
4026f3deacStedu 
4126f3deacStedu /* #define NTFS_DEBUG 1 */
4226f3deacStedu #include <ntfs/ntfs.h>
4326f3deacStedu #include <ntfs/ntfs_inode.h>
4426f3deacStedu #include <ntfs/ntfs_subr.h>
4526f3deacStedu 
4626f3deacStedu /* UTF-8 encoding stuff */
4726f3deacStedu 
4826f3deacStedu static const int _utf_count[16] = {
4926f3deacStedu         1, 1, 1, 1, 1, 1, 1, 1,
5026f3deacStedu         0, 0, 0, 0, 2, 2, 3, 0,
5126f3deacStedu };
5226f3deacStedu 
5326f3deacStedu /*
5426f3deacStedu  * Read one wide character off the string, shift the string pointer
5526f3deacStedu  * and return the character.
5626f3deacStedu  */
5726f3deacStedu wchar
ntfs_utf8_wget(const char ** str)5826f3deacStedu ntfs_utf8_wget(const char **str)
5926f3deacStedu {
6026f3deacStedu 	int c;
6126f3deacStedu 	wchar rune = 0;
6226f3deacStedu 	const char *s = *str;
6326f3deacStedu 
6426f3deacStedu 	c = _utf_count[(s[0] >> 4) & 0xf];
6526f3deacStedu 	if (c == 0) {
6626f3deacStedu 		c = 1;
6726f3deacStedu 		goto encoding_error;
6826f3deacStedu 	}
6926f3deacStedu 
7026f3deacStedu 	switch (c) {
7126f3deacStedu 	case 1:
7226f3deacStedu 		rune = s[0] & 0xff;
7326f3deacStedu 		break;
7426f3deacStedu 	case 2:
7526f3deacStedu 		if ((s[1] & 0xc0) != 0x80)
7626f3deacStedu 			goto encoding_error;
7726f3deacStedu 		rune = ((s[0] & 0x1F) << 6) | (s[1] & 0x3F);
7826f3deacStedu 		break;
7926f3deacStedu 	case 3:
8026f3deacStedu 		if ((s[1] & 0xC0) != 0x80 || (s[2] & 0xC0) != 0x80)
8126f3deacStedu 			goto encoding_error;
82fba144ceStedu 		rune = ((s[0] & 0x1F) << 12) | ((s[1] & 0x3F) << 6) |
83fba144ceStedu 		    (s[2] & 0x3F);
8426f3deacStedu 		break;
8526f3deacStedu 	}
8626f3deacStedu 
8726f3deacStedu encoding_error:
8826f3deacStedu 	*str = *str + c;
8926f3deacStedu 	return rune;
9026f3deacStedu }
9126f3deacStedu 
9226f3deacStedu /*
9326f3deacStedu  * Encode wide character and write it to the string. 'n' specifies
9426f3deacStedu  * how much space there is in the string. Returns number of bytes written
9526f3deacStedu  * to the target string.
9626f3deacStedu  */
9726f3deacStedu int
ntfs_utf8_wput(char * s,size_t n,wchar wc)9826f3deacStedu ntfs_utf8_wput(char *s, size_t n, wchar wc)
9926f3deacStedu {
10026f3deacStedu         if (wc & 0xf800) {
10126f3deacStedu                 if (n < 3) {
10226f3deacStedu                         /* bound check failure */
103*a1ea65c6Sjsing 			DDPRINTF("ntfs_utf8_wput: need 3 bytes\n");
10426f3deacStedu                         return 0;
10526f3deacStedu                 }
10626f3deacStedu 
10726f3deacStedu                 s[0] = 0xE0 | ((wc >> 12) & 0x0F);
10826f3deacStedu                 s[1] = 0x80 | ((wc >> 6) & 0x3F);
10926f3deacStedu                 s[2] = 0x80 | ((wc) & 0x3F);
11026f3deacStedu                 return 3;
11126f3deacStedu         } else {
11226f3deacStedu                 if (wc & 0x0780) {
11326f3deacStedu                         if (n < 2) {
11426f3deacStedu                                 /* bound check failure */
115*a1ea65c6Sjsing 				DDPRINTF("ntfs_utf8_wput: need 2 bytes\n");
11626f3deacStedu                                 return 0;
11726f3deacStedu                         }
11826f3deacStedu 
11926f3deacStedu                         s[0] = 0xC0 | ((wc >> 6) & 0x1F);
12026f3deacStedu                         s[1] = 0x80 | ((wc) & 0x3F);
12126f3deacStedu                         return 2;
12226f3deacStedu                 } else {
12326f3deacStedu                         if (n < 1) {
12426f3deacStedu                                 /* bound check failure */
125*a1ea65c6Sjsing 				DDPRINTF("ntfs_utf8_wput: need 1 byte\n");
12626f3deacStedu                                 return 0;
12726f3deacStedu                         }
12826f3deacStedu 
12926f3deacStedu                         s[0] = wc;
13026f3deacStedu                         return 1;
13126f3deacStedu                 }
13226f3deacStedu         }
13326f3deacStedu }
13426f3deacStedu 
13526f3deacStedu /*
13626f3deacStedu  * Compare two wide characters, returning 1, 0, -1 if the first is
13726f3deacStedu  * bigger, equal or lower than the second.
13826f3deacStedu  */
13926f3deacStedu int
ntfs_utf8_wcmp(wchar wc1,wchar wc2)14026f3deacStedu ntfs_utf8_wcmp(wchar wc1, wchar wc2)
14126f3deacStedu {
14226f3deacStedu 	/* no conversions needed for utf8 */
14326f3deacStedu 
14426f3deacStedu 	if (wc1 == wc2)
14526f3deacStedu 		return 0;
14626f3deacStedu 	else
14726f3deacStedu 		return (int) wc1 - (int) wc2;
14826f3deacStedu }
149