1*d3273b5bSchristos /* $NetBSD: test-utf8.c,v 1.2 2017/01/28 21:31:50 christos Exp $ */
2ca1c9b0cSelric
3ca1c9b0cSelric /*
4ca1c9b0cSelric * Copyright (c) 2004 Kungliga Tekniska Högskolan
5ca1c9b0cSelric * (Royal Institute of Technology, Stockholm, Sweden).
6ca1c9b0cSelric * All rights reserved.
7ca1c9b0cSelric *
8ca1c9b0cSelric * Redistribution and use in source and binary forms, with or without
9ca1c9b0cSelric * modification, are permitted provided that the following conditions
10ca1c9b0cSelric * are met:
11ca1c9b0cSelric *
12ca1c9b0cSelric * 1. Redistributions of source code must retain the above copyright
13ca1c9b0cSelric * notice, this list of conditions and the following disclaimer.
14ca1c9b0cSelric *
15ca1c9b0cSelric * 2. Redistributions in binary form must reproduce the above copyright
16ca1c9b0cSelric * notice, this list of conditions and the following disclaimer in the
17ca1c9b0cSelric * documentation and/or other materials provided with the distribution.
18ca1c9b0cSelric *
19ca1c9b0cSelric * 3. Neither the name of the Institute nor the names of its contributors
20ca1c9b0cSelric * may be used to endorse or promote products derived from this software
21ca1c9b0cSelric * without specific prior written permission.
22ca1c9b0cSelric *
23ca1c9b0cSelric * THIS SOFTWARE IS PROVIDED BY THE INSTITUTE AND CONTRIBUTORS ``AS IS'' AND
24ca1c9b0cSelric * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
25ca1c9b0cSelric * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
26ca1c9b0cSelric * ARE DISCLAIMED. IN NO EVENT SHALL THE INSTITUTE OR CONTRIBUTORS BE LIABLE
27ca1c9b0cSelric * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
28ca1c9b0cSelric * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
29ca1c9b0cSelric * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
30ca1c9b0cSelric * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
31ca1c9b0cSelric * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
32ca1c9b0cSelric * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
33ca1c9b0cSelric * SUCH DAMAGE.
34ca1c9b0cSelric */
35ca1c9b0cSelric
36ca1c9b0cSelric #ifdef HAVE_CONFIG_H
37ca1c9b0cSelric #include <config.h>
38ca1c9b0cSelric #endif
39ca1c9b0cSelric #include <stdio.h>
40ca1c9b0cSelric #include <string.h>
41ca1c9b0cSelric #include <err.h>
42ca1c9b0cSelric #include "windlocl.h"
43ca1c9b0cSelric
44ca1c9b0cSelric static const char *failing_testcases[] = {
45ca1c9b0cSelric "\x80",
46ca1c9b0cSelric "\xFF",
47ca1c9b0cSelric "\xC0",
48ca1c9b0cSelric "\xDF",
49ca1c9b0cSelric "\xE0",
50ca1c9b0cSelric "\xEF",
51ca1c9b0cSelric "\xF0",
52ca1c9b0cSelric "\xF7",
53ca1c9b0cSelric "\xC0\x01",
54ca1c9b0cSelric "\xC0\x7F",
55ca1c9b0cSelric "\xC0\xFF",
56ca1c9b0cSelric "\xC0\x80\x80",
57ca1c9b0cSelric "\xE0\x01",
58ca1c9b0cSelric "\xE0\x7F",
59ca1c9b0cSelric "\xE0\x80",
60ca1c9b0cSelric "\xE0\xFF",
61ca1c9b0cSelric "\xE0\x80\x20",
62ca1c9b0cSelric "\xE0\x80\xFF",
63ca1c9b0cSelric "\xE0\x80\x80\x80",
64ca1c9b0cSelric "\xF0\x01",
65ca1c9b0cSelric "\xF0\x80",
66ca1c9b0cSelric "\xF0\x80\x01",
67ca1c9b0cSelric "\xF0\x80\x80",
68ca1c9b0cSelric "\xF0\x80\x80\x01",
69ca1c9b0cSelric "\xF0\x80\x80\xFF",
70ca1c9b0cSelric NULL
71ca1c9b0cSelric };
72ca1c9b0cSelric
73ca1c9b0cSelric #define MAX_LENGTH 10
74ca1c9b0cSelric
75ca1c9b0cSelric struct testcase {
76ca1c9b0cSelric const char *utf8_str;
77ca1c9b0cSelric size_t len;
78ca1c9b0cSelric uint32_t u[MAX_LENGTH];
79ca1c9b0cSelric int invalid_ucs2;
80ca1c9b0cSelric };
81ca1c9b0cSelric
82ca1c9b0cSelric static const struct testcase testcases[] = {
83b9d004c6Schristos {"", 0, {0}, 0},
84b9d004c6Schristos {"\x01", 1, {1}, 0},
85b9d004c6Schristos {"\x7F", 1, {0x7F}, 0},
86b9d004c6Schristos {"\x01\x7F", 2, {0x01, 0x7F}, 0},
87b9d004c6Schristos {"\xC0\x80", 1, {0}, 0},
88b9d004c6Schristos {"\xC0\x81", 1, {1}, 0},
89b9d004c6Schristos {"\xC1\x80", 1, {0x40}, 0},
90b9d004c6Schristos {"\xDF\xBF", 1, {0x7FF}, 0},
91b9d004c6Schristos {"\xE0\x80\x80", 1, {0}, 0},
92b9d004c6Schristos {"\xE0\x80\x81", 1, {1}, 0},
93b9d004c6Schristos {"\xE0\x81\x80", 1, {0x40}, 0},
94b9d004c6Schristos {"\xE1\x80\x80", 1, {0x1000}, 0},
95b9d004c6Schristos {"\xEF\xBF\xBF", 1, {0xFFFF}, 0},
96b9d004c6Schristos {"\xF0\x80\x80\x80", 1, {0}, 0},
97b9d004c6Schristos {"\xF0\x80\x80\x81", 1, {1}, 0},
98b9d004c6Schristos {"\xF0\x80\x81\x80", 1, {0x40}, 0},
99b9d004c6Schristos {"\xF0\x81\x80\x80", 1, {0x1000}, 0},
100b9d004c6Schristos {"\xF1\x80\x80\x80", 1, {0x40000}, 0},
101ca1c9b0cSelric {"\xF7\xBF\xBF\xBF", 1, {0X1FFFFF}, 1},
102ca1c9b0cSelric };
103ca1c9b0cSelric
104ca1c9b0cSelric int
main(void)105ca1c9b0cSelric main(void)
106ca1c9b0cSelric {
107ca1c9b0cSelric unsigned failures = 0;
108ca1c9b0cSelric unsigned i;
109ca1c9b0cSelric const char **s;
110ca1c9b0cSelric int ret;
111ca1c9b0cSelric size_t len, len2;
112ca1c9b0cSelric uint32_t u[MAX_LENGTH];
113ca1c9b0cSelric char str[MAX_LENGTH * 4];
114ca1c9b0cSelric
115ca1c9b0cSelric for (s = failing_testcases; *s != NULL; ++s) {
116ca1c9b0cSelric len = MAX_LENGTH;
117ca1c9b0cSelric ret = wind_utf8ucs4(*s, u, &len);
118ca1c9b0cSelric if (ret == 0) {
119ca1c9b0cSelric printf("utf8 decode of \"%s\" should have failed\n", *s);
120ca1c9b0cSelric ++failures;
121ca1c9b0cSelric }
122ca1c9b0cSelric }
123ca1c9b0cSelric
124ca1c9b0cSelric for (i = 0; i < sizeof(testcases)/sizeof(testcases[0]); ++i) {
125ca1c9b0cSelric const struct testcase *t = &testcases[i];
126ca1c9b0cSelric
127ca1c9b0cSelric ret = wind_utf8ucs4_length(t->utf8_str, &len);
128ca1c9b0cSelric if (ret) {
129ca1c9b0cSelric printf("utf8ucs4 length of \"%s\" should have succeeded\n",
130ca1c9b0cSelric t->utf8_str);
131ca1c9b0cSelric ++failures;
132ca1c9b0cSelric continue;
133ca1c9b0cSelric }
134ca1c9b0cSelric if (len != t->len) {
135ca1c9b0cSelric printf("utf8ucs4_length of \"%s\" has wrong length: "
136ca1c9b0cSelric "expected: %u, actual: %u\n",
137ca1c9b0cSelric t->utf8_str, (unsigned int)t->len, (unsigned int)len);
138ca1c9b0cSelric ++failures;
139ca1c9b0cSelric continue;
140ca1c9b0cSelric }
141ca1c9b0cSelric
142ca1c9b0cSelric len = MAX_LENGTH;
143ca1c9b0cSelric ret = wind_utf8ucs4(t->utf8_str, u, &len);
144ca1c9b0cSelric if (ret) {
145ca1c9b0cSelric printf("utf8 decode of \"%s\" should have succeeded\n",
146ca1c9b0cSelric t->utf8_str);
147ca1c9b0cSelric ++failures;
148ca1c9b0cSelric continue;
149ca1c9b0cSelric }
150ca1c9b0cSelric if (len != t->len) {
151ca1c9b0cSelric printf("utf8 decode of \"%s\" has wrong length: "
152ca1c9b0cSelric "expected: %u, actual: %u\n",
153ca1c9b0cSelric t->utf8_str, (unsigned int)t->len, (unsigned int)len);
154ca1c9b0cSelric ++failures;
155ca1c9b0cSelric continue;
156ca1c9b0cSelric }
157ca1c9b0cSelric if (memcmp(t->u, u, len * sizeof(uint32_t)) != 0) {
158ca1c9b0cSelric printf("utf8 decode of \"%s\" has wrong data\n",
159ca1c9b0cSelric t->utf8_str);
160ca1c9b0cSelric ++failures;
161ca1c9b0cSelric continue;
162ca1c9b0cSelric }
163ca1c9b0cSelric if (t->invalid_ucs2 == 0) {
164ca1c9b0cSelric len2 = sizeof(str);
165ca1c9b0cSelric ret = wind_ucs4utf8(u, len, str, &len2);
166ca1c9b0cSelric if (ret) {
167ca1c9b0cSelric printf("ucs4 decode of \"%s\" should have succeeded\n",
168ca1c9b0cSelric t->utf8_str);
169ca1c9b0cSelric ++failures;
170ca1c9b0cSelric continue;
171ca1c9b0cSelric }
172ca1c9b0cSelric }
173ca1c9b0cSelric }
174ca1c9b0cSelric
175ca1c9b0cSelric return failures != 0;
176ca1c9b0cSelric }
177