1*a1ea65c6Sjsing /* $OpenBSD: ntfs_conv.c,v 1.9 2013/11/24 16:02:30 jsing Exp $ */
226f3deacStedu /* $NetBSD: ntfs_conv.c,v 1.1 2002/12/23 17:38:32 jdolecek Exp $ */
326f3deacStedu
426f3deacStedu /*-
526f3deacStedu * Copyright (c) 2001 The NetBSD Foundation, Inc.
626f3deacStedu * All rights reserved.
726f3deacStedu *
826f3deacStedu * Redistribution and use in source and binary forms, with or without
926f3deacStedu * modification, are permitted provided that the following conditions
1026f3deacStedu * are met:
1126f3deacStedu * 1. Redistributions of source code must retain the above copyright
1226f3deacStedu * notice, this list of conditions and the following disclaimer.
1326f3deacStedu * 2. Redistributions in binary form must reproduce the above copyright
1426f3deacStedu * notice, this list of conditions and the following disclaimer in the
1526f3deacStedu * documentation and/or other materials provided with the distribution.
1626f3deacStedu *
1726f3deacStedu * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
1826f3deacStedu * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
1926f3deacStedu * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
2026f3deacStedu * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
2126f3deacStedu * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
2226f3deacStedu * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
2326f3deacStedu * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
2426f3deacStedu * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
2526f3deacStedu * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
2626f3deacStedu * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
2726f3deacStedu * POSSIBILITY OF SUCH DAMAGE.
2826f3deacStedu */
2926f3deacStedu
3026f3deacStedu /*
3126f3deacStedu * File name recode stuff.
3226f3deacStedu *
3391c26625Sbrad * The utf-8 routines were derived from src/lib/libc/locale/utf2.c.
3426f3deacStedu */
3526f3deacStedu
3626f3deacStedu #include <sys/param.h>
3726f3deacStedu #include <sys/systm.h>
3826f3deacStedu #include <sys/namei.h>
3926f3deacStedu #include <sys/mount.h>
4026f3deacStedu
4126f3deacStedu /* #define NTFS_DEBUG 1 */
4226f3deacStedu #include <ntfs/ntfs.h>
4326f3deacStedu #include <ntfs/ntfs_inode.h>
4426f3deacStedu #include <ntfs/ntfs_subr.h>
4526f3deacStedu
4626f3deacStedu /* UTF-8 encoding stuff */
4726f3deacStedu
4826f3deacStedu static const int _utf_count[16] = {
4926f3deacStedu 1, 1, 1, 1, 1, 1, 1, 1,
5026f3deacStedu 0, 0, 0, 0, 2, 2, 3, 0,
5126f3deacStedu };
5226f3deacStedu
5326f3deacStedu /*
5426f3deacStedu * Read one wide character off the string, shift the string pointer
5526f3deacStedu * and return the character.
5626f3deacStedu */
5726f3deacStedu wchar
ntfs_utf8_wget(const char ** str)5826f3deacStedu ntfs_utf8_wget(const char **str)
5926f3deacStedu {
6026f3deacStedu int c;
6126f3deacStedu wchar rune = 0;
6226f3deacStedu const char *s = *str;
6326f3deacStedu
6426f3deacStedu c = _utf_count[(s[0] >> 4) & 0xf];
6526f3deacStedu if (c == 0) {
6626f3deacStedu c = 1;
6726f3deacStedu goto encoding_error;
6826f3deacStedu }
6926f3deacStedu
7026f3deacStedu switch (c) {
7126f3deacStedu case 1:
7226f3deacStedu rune = s[0] & 0xff;
7326f3deacStedu break;
7426f3deacStedu case 2:
7526f3deacStedu if ((s[1] & 0xc0) != 0x80)
7626f3deacStedu goto encoding_error;
7726f3deacStedu rune = ((s[0] & 0x1F) << 6) | (s[1] & 0x3F);
7826f3deacStedu break;
7926f3deacStedu case 3:
8026f3deacStedu if ((s[1] & 0xC0) != 0x80 || (s[2] & 0xC0) != 0x80)
8126f3deacStedu goto encoding_error;
82fba144ceStedu rune = ((s[0] & 0x1F) << 12) | ((s[1] & 0x3F) << 6) |
83fba144ceStedu (s[2] & 0x3F);
8426f3deacStedu break;
8526f3deacStedu }
8626f3deacStedu
8726f3deacStedu encoding_error:
8826f3deacStedu *str = *str + c;
8926f3deacStedu return rune;
9026f3deacStedu }
9126f3deacStedu
9226f3deacStedu /*
9326f3deacStedu * Encode wide character and write it to the string. 'n' specifies
9426f3deacStedu * how much space there is in the string. Returns number of bytes written
9526f3deacStedu * to the target string.
9626f3deacStedu */
9726f3deacStedu int
ntfs_utf8_wput(char * s,size_t n,wchar wc)9826f3deacStedu ntfs_utf8_wput(char *s, size_t n, wchar wc)
9926f3deacStedu {
10026f3deacStedu if (wc & 0xf800) {
10126f3deacStedu if (n < 3) {
10226f3deacStedu /* bound check failure */
103*a1ea65c6Sjsing DDPRINTF("ntfs_utf8_wput: need 3 bytes\n");
10426f3deacStedu return 0;
10526f3deacStedu }
10626f3deacStedu
10726f3deacStedu s[0] = 0xE0 | ((wc >> 12) & 0x0F);
10826f3deacStedu s[1] = 0x80 | ((wc >> 6) & 0x3F);
10926f3deacStedu s[2] = 0x80 | ((wc) & 0x3F);
11026f3deacStedu return 3;
11126f3deacStedu } else {
11226f3deacStedu if (wc & 0x0780) {
11326f3deacStedu if (n < 2) {
11426f3deacStedu /* bound check failure */
115*a1ea65c6Sjsing DDPRINTF("ntfs_utf8_wput: need 2 bytes\n");
11626f3deacStedu return 0;
11726f3deacStedu }
11826f3deacStedu
11926f3deacStedu s[0] = 0xC0 | ((wc >> 6) & 0x1F);
12026f3deacStedu s[1] = 0x80 | ((wc) & 0x3F);
12126f3deacStedu return 2;
12226f3deacStedu } else {
12326f3deacStedu if (n < 1) {
12426f3deacStedu /* bound check failure */
125*a1ea65c6Sjsing DDPRINTF("ntfs_utf8_wput: need 1 byte\n");
12626f3deacStedu return 0;
12726f3deacStedu }
12826f3deacStedu
12926f3deacStedu s[0] = wc;
13026f3deacStedu return 1;
13126f3deacStedu }
13226f3deacStedu }
13326f3deacStedu }
13426f3deacStedu
13526f3deacStedu /*
13626f3deacStedu * Compare two wide characters, returning 1, 0, -1 if the first is
13726f3deacStedu * bigger, equal or lower than the second.
13826f3deacStedu */
13926f3deacStedu int
ntfs_utf8_wcmp(wchar wc1,wchar wc2)14026f3deacStedu ntfs_utf8_wcmp(wchar wc1, wchar wc2)
14126f3deacStedu {
14226f3deacStedu /* no conversions needed for utf8 */
14326f3deacStedu
14426f3deacStedu if (wc1 == wc2)
14526f3deacStedu return 0;
14626f3deacStedu else
14726f3deacStedu return (int) wc1 - (int) wc2;
14826f3deacStedu }
149