1 /* $OpenBSD: ntfs_conv.c,v 1.7 2009/08/13 16:00:53 jasper Exp $ */ 2 /* $NetBSD: ntfs_conv.c,v 1.1 2002/12/23 17:38:32 jdolecek Exp $ */ 3 4 /*- 5 * Copyright (c) 2001 The NetBSD Foundation, Inc. 6 * All rights reserved. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 17 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 18 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 19 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 20 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 21 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 22 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 23 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 24 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 25 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 26 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 27 * POSSIBILITY OF SUCH DAMAGE. 28 */ 29 30 /* 31 * File name recode stuff. 32 * 33 * The utf-8 routines were derived from src/lib/libc/locale/utf2.c. 34 */ 35 36 #include <sys/param.h> 37 #include <sys/systm.h> 38 #include <sys/namei.h> 39 #include <sys/proc.h> 40 #include <sys/kernel.h> 41 #include <sys/vnode.h> 42 #include <sys/mount.h> 43 #include <sys/buf.h> 44 #include <sys/file.h> 45 #include <sys/malloc.h> 46 47 #include <miscfs/specfs/specdev.h> 48 49 /* #define NTFS_DEBUG 1 */ 50 #include <ntfs/ntfs.h> 51 #include <ntfs/ntfsmount.h> 52 #include <ntfs/ntfs_inode.h> 53 #include <ntfs/ntfs_vfsops.h> 54 #include <ntfs/ntfs_subr.h> 55 #include <ntfs/ntfs_compr.h> 56 #include <ntfs/ntfs_ihash.h> 57 58 /* UTF-8 encoding stuff */ 59 60 static const int _utf_count[16] = { 61 1, 1, 1, 1, 1, 1, 1, 1, 62 0, 0, 0, 0, 2, 2, 3, 0, 63 }; 64 65 /* 66 * Read one wide character off the string, shift the string pointer 67 * and return the character. 68 */ 69 wchar 70 ntfs_utf8_wget(const char **str) 71 { 72 int c; 73 wchar rune = 0; 74 const char *s = *str; 75 76 c = _utf_count[(s[0] >> 4) & 0xf]; 77 if (c == 0) { 78 c = 1; 79 goto encoding_error; 80 } 81 82 switch (c) { 83 case 1: 84 rune = s[0] & 0xff; 85 break; 86 case 2: 87 if ((s[1] & 0xc0) != 0x80) 88 goto encoding_error; 89 rune = ((s[0] & 0x1F) << 6) | (s[1] & 0x3F); 90 break; 91 case 3: 92 if ((s[1] & 0xC0) != 0x80 || (s[2] & 0xC0) != 0x80) 93 goto encoding_error; 94 rune = ((s[0] & 0x1F) << 12) | ((s[1] & 0x3F) << 6) 95 | (s[2] & 0x3F); 96 break; 97 } 98 99 encoding_error: 100 *str = *str + c; 101 return rune; 102 } 103 104 /* 105 * Encode wide character and write it to the string. 'n' specifies 106 * how much space there is in the string. Returns number of bytes written 107 * to the target string. 108 */ 109 int 110 ntfs_utf8_wput(char *s, size_t n, wchar wc) 111 { 112 if (wc & 0xf800) { 113 if (n < 3) { 114 /* bound check failure */ 115 ddprintf(("ntfs_utf8_wput: need 3 bytes\n")); 116 return 0; 117 } 118 119 s[0] = 0xE0 | ((wc >> 12) & 0x0F); 120 s[1] = 0x80 | ((wc >> 6) & 0x3F); 121 s[2] = 0x80 | ((wc) & 0x3F); 122 return 3; 123 } else { 124 if (wc & 0x0780) { 125 if (n < 2) { 126 /* bound check failure */ 127 ddprintf(("ntfs_utf8_wput: need 2 bytes\n")); 128 return 0; 129 } 130 131 s[0] = 0xC0 | ((wc >> 6) & 0x1F); 132 s[1] = 0x80 | ((wc) & 0x3F); 133 return 2; 134 } else { 135 if (n < 1) { 136 /* bound check failure */ 137 ddprintf(("ntfs_utf8_wput: need 1 byte\n")); 138 return 0; 139 } 140 141 s[0] = wc; 142 return 1; 143 } 144 } 145 } 146 147 /* 148 * Compare two wide characters, returning 1, 0, -1 if the first is 149 * bigger, equal or lower than the second. 150 */ 151 int 152 ntfs_utf8_wcmp(wchar wc1, wchar wc2) 153 { 154 /* no conversions needed for utf8 */ 155 156 if (wc1 == wc2) 157 return 0; 158 else 159 return (int) wc1 - (int) wc2; 160 } 161