1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21 /*
22 * Copyright 2010 Sun Microsystems, Inc. All rights reserved.
23 * Use is subject to license terms.
24 */
25
26 #ifdef _KERNEL
27 #include <sys/types.h>
28 #include <sys/sunddi.h>
29 #else
30 #include <stdio.h>
31 #include <stdlib.h>
32 #include <string.h>
33 #include <strings.h>
34 #endif
35 #include <sys/u8_textprep.h>
36 #include <smbsrv/alloc.h>
37 #include <sys/errno.h>
38 #include <smbsrv/string.h>
39 #include <smbsrv/cp_usascii.h>
40 #include <smbsrv/cp_unicode.h>
41
42 #define UNICODE_N_ENTRIES (sizeof (a_unicode) / sizeof (a_unicode[0]))
43
44 /*
45 * Global pointer to the current codepage: defaults to ASCII,
46 * and a flag indicating whether the codepage is Unicode or ASCII.
47 */
48 static smb_codepage_t *current_codepage = usascii_codepage;
49 static boolean_t is_unicode = B_FALSE;
50
51 static smb_codepage_t *smb_unicode_init(void);
52
53 /*
54 * strsubst
55 *
56 * Scan a string replacing all occurrences of orgchar with newchar.
57 * Returns a pointer to s, or null of s is null.
58 */
59 char *
strsubst(char * s,char orgchar,char newchar)60 strsubst(char *s, char orgchar, char newchar)
61 {
62 char *p = s;
63
64 if (p == 0)
65 return (0);
66
67 while (*p) {
68 if (*p == orgchar)
69 *p = newchar;
70 ++p;
71 }
72
73 return (s);
74 }
75
76 /*
77 * strcanon
78 *
79 * Normalize a string by reducing all the repeated characters in
80 * buf as defined by class. For example;
81 *
82 * char *buf = strdup("/d1//d2//d3\\\\d4\\\\f1.txt");
83 * strcanon(buf, "/\\");
84 *
85 * Would result in buf containing the following string:
86 *
87 * /d1/d2/d3\d4\f1.txt
88 *
89 * This function modifies the contents of buf in place and returns
90 * a pointer to buf.
91 */
92 char *
strcanon(char * buf,const char * class)93 strcanon(char *buf, const char *class)
94 {
95 char *p = buf;
96 char *q = buf;
97 char *r;
98
99 while (*p) {
100 *q++ = *p;
101
102 if ((r = strchr(class, *p)) != 0) {
103 while (*p == *r)
104 ++p;
105 } else
106 ++p;
107 }
108
109 *q = '\0';
110 return (buf);
111 }
112
113 void
smb_codepage_init(void)114 smb_codepage_init(void)
115 {
116 smb_codepage_t *cp;
117
118 if (is_unicode)
119 return;
120
121 if ((cp = smb_unicode_init()) != NULL) {
122 current_codepage = cp;
123 is_unicode = B_TRUE;
124 } else {
125 current_codepage = usascii_codepage;
126 is_unicode = B_FALSE;
127 }
128 }
129
130 /*
131 * Determine whether or not a character is an uppercase character.
132 * This function operates on the current codepage table. Returns
133 * non-zero if the character is uppercase. Otherwise returns zero.
134 */
135 int
smb_isupper(int c)136 smb_isupper(int c)
137 {
138 uint16_t mask = is_unicode ? 0xffff : 0xff;
139
140 return (current_codepage[c & mask].ctype & CODEPAGE_ISUPPER);
141 }
142
143 /*
144 * Determine whether or not a character is an lowercase character.
145 * This function operates on the current codepage table. Returns
146 * non-zero if the character is lowercase. Otherwise returns zero.
147 */
148 int
smb_islower(int c)149 smb_islower(int c)
150 {
151 uint16_t mask = is_unicode ? 0xffff : 0xff;
152
153 return (current_codepage[c & mask].ctype & CODEPAGE_ISLOWER);
154 }
155
156 /*
157 * Convert individual characters to their uppercase equivalent value.
158 * If the specified character is lowercase, the uppercase value will
159 * be returned. Otherwise the original value will be returned.
160 */
161 int
smb_toupper(int c)162 smb_toupper(int c)
163 {
164 uint16_t mask = is_unicode ? 0xffff : 0xff;
165
166 return (current_codepage[c & mask].upper);
167 }
168
169 /*
170 * Convert individual characters to their lowercase equivalent value.
171 * If the specified character is uppercase, the lowercase value will
172 * be returned. Otherwise the original value will be returned.
173 */
174 int
smb_tolower(int c)175 smb_tolower(int c)
176 {
177 uint16_t mask = is_unicode ? 0xffff : 0xff;
178
179 return (current_codepage[c & mask].lower);
180 }
181
182 /*
183 * Convert a string to uppercase using the appropriate codepage. The
184 * string is converted in place. A pointer to the string is returned.
185 * There is an assumption here that uppercase and lowercase values
186 * always result encode to the same length.
187 */
188 char *
smb_strupr(char * s)189 smb_strupr(char *s)
190 {
191 smb_wchar_t c;
192 char *p = s;
193
194 while (*p) {
195 if (smb_isascii(*p)) {
196 *p = smb_toupper(*p);
197 p++;
198 } else {
199 if (smb_mbtowc(&c, p, MTS_MB_CHAR_MAX) < 0)
200 return (0);
201
202 if (c == 0)
203 break;
204
205 c = smb_toupper(c);
206 p += smb_wctomb(p, c);
207 }
208 }
209
210 return (s);
211 }
212
213 /*
214 * Convert a string to lowercase using the appropriate codepage. The
215 * string is converted in place. A pointer to the string is returned.
216 * There is an assumption here that uppercase and lowercase values
217 * always result encode to the same length.
218 */
219 char *
smb_strlwr(char * s)220 smb_strlwr(char *s)
221 {
222 smb_wchar_t c;
223 char *p = s;
224
225 while (*p) {
226 if (smb_isascii(*p)) {
227 *p = smb_tolower(*p);
228 p++;
229 } else {
230 if (smb_mbtowc(&c, p, MTS_MB_CHAR_MAX) < 0)
231 return (0);
232
233 if (c == 0)
234 break;
235
236 c = smb_tolower(c);
237 p += smb_wctomb(p, c);
238 }
239 }
240
241 return (s);
242 }
243
244 /*
245 * Returns 1 if string contains NO uppercase chars 0 otherwise. However,
246 * -1 is returned if "s" is not a valid multi-byte string.
247 */
248 int
smb_isstrlwr(const char * s)249 smb_isstrlwr(const char *s)
250 {
251 smb_wchar_t c;
252 int n;
253 const char *p = s;
254
255 while (*p) {
256 if (smb_isascii(*p) && smb_isupper(*p))
257 return (0);
258 else {
259 if ((n = smb_mbtowc(&c, p, MTS_MB_CHAR_MAX)) < 0)
260 return (-1);
261
262 if (c == 0)
263 break;
264
265 if (smb_isupper(c))
266 return (0);
267
268 p += n;
269 }
270 }
271
272 return (1);
273 }
274
275 /*
276 * Returns 1 if string contains NO lowercase chars 0 otherwise. However,
277 * -1 is returned if "s" is not a valid multi-byte string.
278 */
279 int
smb_isstrupr(const char * s)280 smb_isstrupr(const char *s)
281 {
282 smb_wchar_t c;
283 int n;
284 const char *p = s;
285
286 while (*p) {
287 if (smb_isascii(*p) && smb_islower(*p))
288 return (0);
289 else {
290 if ((n = smb_mbtowc(&c, p, MTS_MB_CHAR_MAX)) < 0)
291 return (-1);
292
293 if (c == 0)
294 break;
295
296 if (smb_islower(c))
297 return (0);
298
299 p += n;
300 }
301 }
302
303 return (1);
304 }
305
306 /*
307 * Compare the null-terminated strings s1 and s2 and return an integer
308 * greater than, equal to or less than 0 dependent on whether s1 is
309 * lexicographically greater than, equal to or less than s2 after
310 * translation of each character to lowercase. The original strings
311 * are not modified.
312 *
313 * If n is non-zero, at most n bytes are compared. Otherwise, the strings
314 * are compared until a null terminator is encountered.
315 *
316 * Out: 0 if strings are equal
317 * < 0 if first string < second string
318 * > 0 if first string > second string
319 */
320 int
smb_strcasecmp(const char * s1,const char * s2,size_t n)321 smb_strcasecmp(const char *s1, const char *s2, size_t n)
322 {
323 int err = 0;
324 int rc;
325
326 rc = u8_strcmp(s1, s2, n, U8_STRCMP_CI_LOWER, U8_UNICODE_LATEST, &err);
327 if (err != 0)
328 return (-1);
329 return (rc);
330 }
331
332 /*
333 * First build a codepage based on cp_unicode.h. Then build the unicode
334 * codepage from this interim codepage by copying the entries over while
335 * fixing them and filling in the gaps.
336 */
337 static smb_codepage_t *
smb_unicode_init(void)338 smb_unicode_init(void)
339 {
340 smb_codepage_t *unicode;
341 uint32_t a = 0;
342 uint32_t b = 0;
343
344 unicode = MEM_ZALLOC("unicode", sizeof (smb_codepage_t) << 16);
345 if (unicode == NULL)
346 return (NULL);
347
348 while (b != 0xffff) {
349 /*
350 * If there is a gap in the standard,
351 * fill in the gap with no-case entries.
352 */
353 if (UNICODE_N_ENTRIES <= a || a_unicode[a].val > b) {
354 unicode[b].ctype = CODEPAGE_ISNONE;
355 unicode[b].upper = (smb_wchar_t)b;
356 unicode[b].lower = (smb_wchar_t)b;
357 b++;
358 continue;
359 }
360
361 /*
362 * Copy the entry and fixup as required.
363 */
364 switch (a_unicode[a].ctype) {
365 case CODEPAGE_ISNONE:
366 /*
367 * Replace 0xffff in upper/lower fields with its val.
368 */
369 unicode[b].ctype = CODEPAGE_ISNONE;
370 unicode[b].upper = (smb_wchar_t)b;
371 unicode[b].lower = (smb_wchar_t)b;
372 break;
373 case CODEPAGE_ISUPPER:
374 /*
375 * Some characters may have case yet not have
376 * case conversion. Treat them as no-case.
377 */
378 if (a_unicode[a].lower == 0xffff) {
379 unicode[b].ctype = CODEPAGE_ISNONE;
380 unicode[b].upper = (smb_wchar_t)b;
381 unicode[b].lower = (smb_wchar_t)b;
382 } else {
383 unicode[b].ctype = CODEPAGE_ISUPPER;
384 unicode[b].upper = (smb_wchar_t)b;
385 unicode[b].lower = a_unicode[a].lower;
386 }
387 break;
388 case CODEPAGE_ISLOWER:
389 /*
390 * Some characters may have case yet not have
391 * case conversion. Treat them as no-case.
392 */
393 if (a_unicode[a].upper == 0xffff) {
394 unicode[b].ctype = CODEPAGE_ISNONE;
395 unicode[b].upper = (smb_wchar_t)b;
396 unicode[b].lower = (smb_wchar_t)b;
397 } else {
398 unicode[b].ctype = CODEPAGE_ISLOWER;
399 unicode[b].upper = a_unicode[a].upper;
400 unicode[b].lower = (smb_wchar_t)b;
401 }
402 break;
403 default:
404 MEM_FREE("unicode", unicode);
405 return (NULL);
406 }
407
408 a++;
409 b++;
410 };
411
412 return (unicode);
413 }
414
415 /*
416 * Parse a UNC path (\\server\share\path) into its components.
417 * Although a standard UNC path starts with two '\', in DFS
418 * all UNC paths start with one '\'. So, this function only
419 * checks for one.
420 *
421 * A valid UNC must at least contain two components i.e. server
422 * and share. The path is parsed to:
423 *
424 * unc_server server or domain name with no leading/trailing '\'
425 * unc_share share name with no leading/trailing '\'
426 * unc_path relative path to the share with no leading/trailing '\'
427 * it is valid for unc_path to be NULL.
428 *
429 * Upon successful return of this function, smb_unc_free()
430 * MUST be called when returned 'unc' is no longer needed.
431 *
432 * Returns 0 on success, otherwise returns an errno code.
433 */
434 int
smb_unc_init(const char * path,smb_unc_t * unc)435 smb_unc_init(const char *path, smb_unc_t *unc)
436 {
437 char *p;
438
439 if (path == NULL || unc == NULL || (*path != '\\' && *path != '/'))
440 return (EINVAL);
441
442 bzero(unc, sizeof (smb_unc_t));
443
444 #ifdef _KERNEL
445 unc->unc_buf = smb_mem_strdup(path);
446 #else
447 if ((unc->unc_buf = strdup(path)) == NULL)
448 return (ENOMEM);
449 #endif
450
451 (void) strsubst(unc->unc_buf, '\\', '/');
452 (void) strcanon(unc->unc_buf, "/");
453
454 unc->unc_server = unc->unc_buf + 1;
455 if (*unc->unc_server == '\0') {
456 smb_unc_free(unc);
457 return (EINVAL);
458 }
459
460 if ((p = strchr(unc->unc_server, '/')) == NULL) {
461 smb_unc_free(unc);
462 return (EINVAL);
463 }
464
465 *p++ = '\0';
466 unc->unc_share = p;
467
468 if (*unc->unc_share == '\0') {
469 smb_unc_free(unc);
470 return (EINVAL);
471 }
472
473 unc->unc_path = strchr(unc->unc_share, '/');
474 if ((p = unc->unc_path) == NULL)
475 return (0);
476
477 unc->unc_path++;
478 *p = '\0';
479
480 /* remove the last '/' if any */
481 if ((p = strchr(unc->unc_path, '\0')) != NULL) {
482 if (*(--p) == '/')
483 *p = '\0';
484 }
485
486 return (0);
487 }
488
489 void
smb_unc_free(smb_unc_t * unc)490 smb_unc_free(smb_unc_t *unc)
491 {
492 if (unc == NULL)
493 return;
494
495 #ifdef _KERNEL
496 smb_mem_free(unc->unc_buf);
497 #else
498 free(unc->unc_buf);
499 #endif
500 unc->unc_buf = NULL;
501 }
502