xref: /netbsd-src/crypto/external/bsd/heimdal/dist/lib/wind/test-utf8.c (revision d3273b5b76f5afaafe308cead5511dbb8df8c5e9)
1*d3273b5bSchristos /*	$NetBSD: test-utf8.c,v 1.2 2017/01/28 21:31:50 christos Exp $	*/
2ca1c9b0cSelric 
3ca1c9b0cSelric /*
4ca1c9b0cSelric  * Copyright (c) 2004 Kungliga Tekniska Högskolan
5ca1c9b0cSelric  * (Royal Institute of Technology, Stockholm, Sweden).
6ca1c9b0cSelric  * All rights reserved.
7ca1c9b0cSelric  *
8ca1c9b0cSelric  * Redistribution and use in source and binary forms, with or without
9ca1c9b0cSelric  * modification, are permitted provided that the following conditions
10ca1c9b0cSelric  * are met:
11ca1c9b0cSelric  *
12ca1c9b0cSelric  * 1. Redistributions of source code must retain the above copyright
13ca1c9b0cSelric  *    notice, this list of conditions and the following disclaimer.
14ca1c9b0cSelric  *
15ca1c9b0cSelric  * 2. Redistributions in binary form must reproduce the above copyright
16ca1c9b0cSelric  *    notice, this list of conditions and the following disclaimer in the
17ca1c9b0cSelric  *    documentation and/or other materials provided with the distribution.
18ca1c9b0cSelric  *
19ca1c9b0cSelric  * 3. Neither the name of the Institute nor the names of its contributors
20ca1c9b0cSelric  *    may be used to endorse or promote products derived from this software
21ca1c9b0cSelric  *    without specific prior written permission.
22ca1c9b0cSelric  *
23ca1c9b0cSelric  * THIS SOFTWARE IS PROVIDED BY THE INSTITUTE AND CONTRIBUTORS ``AS IS'' AND
24ca1c9b0cSelric  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
25ca1c9b0cSelric  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
26ca1c9b0cSelric  * ARE DISCLAIMED.  IN NO EVENT SHALL THE INSTITUTE OR CONTRIBUTORS BE LIABLE
27ca1c9b0cSelric  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
28ca1c9b0cSelric  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
29ca1c9b0cSelric  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
30ca1c9b0cSelric  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
31ca1c9b0cSelric  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
32ca1c9b0cSelric  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
33ca1c9b0cSelric  * SUCH DAMAGE.
34ca1c9b0cSelric  */
35ca1c9b0cSelric 
36ca1c9b0cSelric #ifdef HAVE_CONFIG_H
37ca1c9b0cSelric #include <config.h>
38ca1c9b0cSelric #endif
39ca1c9b0cSelric #include <stdio.h>
40ca1c9b0cSelric #include <string.h>
41ca1c9b0cSelric #include <err.h>
42ca1c9b0cSelric #include "windlocl.h"
43ca1c9b0cSelric 
44ca1c9b0cSelric static const char *failing_testcases[] = {
45ca1c9b0cSelric     "\x80",
46ca1c9b0cSelric     "\xFF",
47ca1c9b0cSelric     "\xC0",
48ca1c9b0cSelric     "\xDF",
49ca1c9b0cSelric     "\xE0",
50ca1c9b0cSelric     "\xEF",
51ca1c9b0cSelric     "\xF0",
52ca1c9b0cSelric     "\xF7",
53ca1c9b0cSelric     "\xC0\x01",
54ca1c9b0cSelric     "\xC0\x7F",
55ca1c9b0cSelric     "\xC0\xFF",
56ca1c9b0cSelric     "\xC0\x80\x80",
57ca1c9b0cSelric     "\xE0\x01",
58ca1c9b0cSelric     "\xE0\x7F",
59ca1c9b0cSelric     "\xE0\x80",
60ca1c9b0cSelric     "\xE0\xFF",
61ca1c9b0cSelric     "\xE0\x80\x20",
62ca1c9b0cSelric     "\xE0\x80\xFF",
63ca1c9b0cSelric     "\xE0\x80\x80\x80",
64ca1c9b0cSelric     "\xF0\x01",
65ca1c9b0cSelric     "\xF0\x80",
66ca1c9b0cSelric     "\xF0\x80\x01",
67ca1c9b0cSelric     "\xF0\x80\x80",
68ca1c9b0cSelric     "\xF0\x80\x80\x01",
69ca1c9b0cSelric     "\xF0\x80\x80\xFF",
70ca1c9b0cSelric     NULL
71ca1c9b0cSelric };
72ca1c9b0cSelric 
73ca1c9b0cSelric #define MAX_LENGTH 10
74ca1c9b0cSelric 
75ca1c9b0cSelric struct testcase {
76ca1c9b0cSelric     const char *utf8_str;
77ca1c9b0cSelric     size_t len;
78ca1c9b0cSelric     uint32_t u[MAX_LENGTH];
79ca1c9b0cSelric     int invalid_ucs2;
80ca1c9b0cSelric };
81ca1c9b0cSelric 
82ca1c9b0cSelric static const struct testcase testcases[] = {
83b9d004c6Schristos     {"", 0, {0}, 0},
84b9d004c6Schristos     {"\x01", 1, {1}, 0},
85b9d004c6Schristos     {"\x7F", 1, {0x7F}, 0},
86b9d004c6Schristos     {"\x01\x7F", 2, {0x01, 0x7F}, 0},
87b9d004c6Schristos     {"\xC0\x80", 1, {0}, 0},
88b9d004c6Schristos     {"\xC0\x81", 1, {1}, 0},
89b9d004c6Schristos     {"\xC1\x80", 1, {0x40}, 0},
90b9d004c6Schristos     {"\xDF\xBF", 1, {0x7FF}, 0},
91b9d004c6Schristos     {"\xE0\x80\x80", 1, {0}, 0},
92b9d004c6Schristos     {"\xE0\x80\x81", 1, {1}, 0},
93b9d004c6Schristos     {"\xE0\x81\x80", 1, {0x40}, 0},
94b9d004c6Schristos     {"\xE1\x80\x80", 1, {0x1000}, 0},
95b9d004c6Schristos     {"\xEF\xBF\xBF", 1, {0xFFFF}, 0},
96b9d004c6Schristos     {"\xF0\x80\x80\x80", 1, {0}, 0},
97b9d004c6Schristos     {"\xF0\x80\x80\x81", 1, {1}, 0},
98b9d004c6Schristos     {"\xF0\x80\x81\x80", 1, {0x40}, 0},
99b9d004c6Schristos     {"\xF0\x81\x80\x80", 1, {0x1000}, 0},
100b9d004c6Schristos     {"\xF1\x80\x80\x80", 1, {0x40000}, 0},
101ca1c9b0cSelric     {"\xF7\xBF\xBF\xBF", 1, {0X1FFFFF}, 1},
102ca1c9b0cSelric };
103ca1c9b0cSelric 
104ca1c9b0cSelric int
main(void)105ca1c9b0cSelric main(void)
106ca1c9b0cSelric {
107ca1c9b0cSelric     unsigned failures = 0;
108ca1c9b0cSelric     unsigned i;
109ca1c9b0cSelric     const char **s;
110ca1c9b0cSelric     int ret;
111ca1c9b0cSelric     size_t len, len2;
112ca1c9b0cSelric     uint32_t u[MAX_LENGTH];
113ca1c9b0cSelric     char str[MAX_LENGTH * 4];
114ca1c9b0cSelric 
115ca1c9b0cSelric     for (s = failing_testcases; *s != NULL; ++s) {
116ca1c9b0cSelric 	len = MAX_LENGTH;
117ca1c9b0cSelric 	ret = wind_utf8ucs4(*s, u, &len);
118ca1c9b0cSelric 	if (ret == 0) {
119ca1c9b0cSelric 	    printf("utf8 decode of \"%s\" should have failed\n", *s);
120ca1c9b0cSelric 	    ++failures;
121ca1c9b0cSelric 	}
122ca1c9b0cSelric     }
123ca1c9b0cSelric 
124ca1c9b0cSelric     for (i = 0; i < sizeof(testcases)/sizeof(testcases[0]); ++i) {
125ca1c9b0cSelric 	const struct testcase *t = &testcases[i];
126ca1c9b0cSelric 
127ca1c9b0cSelric 	ret = wind_utf8ucs4_length(t->utf8_str, &len);
128ca1c9b0cSelric 	if (ret) {
129ca1c9b0cSelric 	    printf("utf8ucs4 length of \"%s\" should have succeeded\n",
130ca1c9b0cSelric 		   t->utf8_str);
131ca1c9b0cSelric 	    ++failures;
132ca1c9b0cSelric 	    continue;
133ca1c9b0cSelric 	}
134ca1c9b0cSelric 	if (len != t->len) {
135ca1c9b0cSelric 	    printf("utf8ucs4_length of \"%s\" has wrong length: "
136ca1c9b0cSelric 		   "expected: %u, actual: %u\n",
137ca1c9b0cSelric 		   t->utf8_str, (unsigned int)t->len, (unsigned int)len);
138ca1c9b0cSelric 	    ++failures;
139ca1c9b0cSelric 	    continue;
140ca1c9b0cSelric 	}
141ca1c9b0cSelric 
142ca1c9b0cSelric 	len = MAX_LENGTH;
143ca1c9b0cSelric 	ret = wind_utf8ucs4(t->utf8_str, u, &len);
144ca1c9b0cSelric 	if (ret) {
145ca1c9b0cSelric 	    printf("utf8 decode of \"%s\" should have succeeded\n",
146ca1c9b0cSelric 		   t->utf8_str);
147ca1c9b0cSelric 	    ++failures;
148ca1c9b0cSelric 	    continue;
149ca1c9b0cSelric 	}
150ca1c9b0cSelric 	if (len != t->len) {
151ca1c9b0cSelric 	    printf("utf8 decode of \"%s\" has wrong length: "
152ca1c9b0cSelric 		   "expected: %u, actual: %u\n",
153ca1c9b0cSelric 		   t->utf8_str, (unsigned int)t->len, (unsigned int)len);
154ca1c9b0cSelric 	    ++failures;
155ca1c9b0cSelric 	    continue;
156ca1c9b0cSelric 	}
157ca1c9b0cSelric 	if (memcmp(t->u, u, len * sizeof(uint32_t)) != 0) {
158ca1c9b0cSelric 	    printf("utf8 decode of \"%s\" has wrong data\n",
159ca1c9b0cSelric 		   t->utf8_str);
160ca1c9b0cSelric 	    ++failures;
161ca1c9b0cSelric 	    continue;
162ca1c9b0cSelric 	}
163ca1c9b0cSelric 	if (t->invalid_ucs2 == 0) {
164ca1c9b0cSelric 	    len2 = sizeof(str);
165ca1c9b0cSelric 	    ret = wind_ucs4utf8(u, len, str, &len2);
166ca1c9b0cSelric 	    if (ret) {
167ca1c9b0cSelric 		printf("ucs4 decode of \"%s\" should have succeeded\n",
168ca1c9b0cSelric 		       t->utf8_str);
169ca1c9b0cSelric 		++failures;
170ca1c9b0cSelric 		continue;
171ca1c9b0cSelric 	    }
172ca1c9b0cSelric 	}
173ca1c9b0cSelric     }
174ca1c9b0cSelric 
175ca1c9b0cSelric     return failures != 0;
176ca1c9b0cSelric }
177