173a33477SBaptiste Daroussin /*-
273a33477SBaptiste Daroussin * Copyright (c) 2016 Baptiste Daroussin <bapt@FreeBSD.org>
3*c48dc2a1SBaptiste Daroussin * Copyright 2016 Tom Lane <tgl@sss.pgh.pa.us>
4*c48dc2a1SBaptiste Daroussin * Copyright 2017 Nexenta Systems, Inc.
573a33477SBaptiste Daroussin * All rights reserved.
673a33477SBaptiste Daroussin *
773a33477SBaptiste Daroussin * Redistribution and use in source and binary forms, with or without
873a33477SBaptiste Daroussin * modification, are permitted provided that the following conditions
973a33477SBaptiste Daroussin * are met:
1073a33477SBaptiste Daroussin * 1. Redistributions of source code must retain the above copyright
1173a33477SBaptiste Daroussin * notice, this list of conditions and the following disclaimer.
1273a33477SBaptiste Daroussin * 2. Redistributions in binary form must reproduce the above copyright
1373a33477SBaptiste Daroussin * notice, this list of conditions and the following disclaimer in the
1473a33477SBaptiste Daroussin * documentation and/or other materials provided with the distribution.
1573a33477SBaptiste Daroussin *
1673a33477SBaptiste Daroussin * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
1773a33477SBaptiste Daroussin * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
1873a33477SBaptiste Daroussin * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
1973a33477SBaptiste Daroussin * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
2073a33477SBaptiste Daroussin * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
2173a33477SBaptiste Daroussin * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
2273a33477SBaptiste Daroussin * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
2373a33477SBaptiste Daroussin * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
2473a33477SBaptiste Daroussin * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
2573a33477SBaptiste Daroussin * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
2673a33477SBaptiste Daroussin * SUCH DAMAGE.
2773a33477SBaptiste Daroussin */
2873a33477SBaptiste Daroussin
2973a33477SBaptiste Daroussin #include <wchar.h>
3073a33477SBaptiste Daroussin #include <locale.h>
3173a33477SBaptiste Daroussin #include <stdlib.h>
32*c48dc2a1SBaptiste Daroussin #include <time.h>
33*c48dc2a1SBaptiste Daroussin #include <errno.h>
3473a33477SBaptiste Daroussin
3573a33477SBaptiste Daroussin #include <atf-c.h>
3673a33477SBaptiste Daroussin
3773a33477SBaptiste Daroussin static int
cmp(const void * a,const void * b)3873a33477SBaptiste Daroussin cmp(const void *a, const void *b)
3973a33477SBaptiste Daroussin {
4073a33477SBaptiste Daroussin const wchar_t wa[2] = { *(const wchar_t *)a, 0 };
4173a33477SBaptiste Daroussin const wchar_t wb[2] = { *(const wchar_t *)b, 0 };
4273a33477SBaptiste Daroussin
4373a33477SBaptiste Daroussin return (wcscoll(wa, wb));
4473a33477SBaptiste Daroussin }
4573a33477SBaptiste Daroussin
4673a33477SBaptiste Daroussin ATF_TC_WITHOUT_HEAD(russian_collation);
ATF_TC_BODY(russian_collation,tc)4773a33477SBaptiste Daroussin ATF_TC_BODY(russian_collation, tc)
4873a33477SBaptiste Daroussin {
4973a33477SBaptiste Daroussin wchar_t c[] = L"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyzЁАБВГДЕЖЗИЙКЛМНОПРСТУФХЦЧШЩЪЫЬЭЮЯабвгдежзийклмнопрстуфхцчшщъыьэюяё";
5073a33477SBaptiste Daroussin wchar_t res[] = L"aAbBcCdDeEfFgGhHiIjJkKlLmMnNoOpPqQrRsStTuUvVwWxXyYzZаАбБвВгГдДеЕёЁжЖзЗиИйЙкКлЛмМнНоОпПрРсСтТуУфФхХцЦчЧшШщЩъЪыЫьЬэЭюЮяЯ";
5173a33477SBaptiste Daroussin
5273a33477SBaptiste Daroussin ATF_CHECK_MSG(setlocale(LC_ALL, "ru_RU.UTF-8") != NULL,
5373a33477SBaptiste Daroussin "Fail to set locale to \"ru_RU.UTF-8\"");
5473a33477SBaptiste Daroussin qsort(c, wcslen(c), sizeof(wchar_t), cmp);
5573a33477SBaptiste Daroussin ATF_CHECK_MSG(wcscmp(c, res) == 0,
5673a33477SBaptiste Daroussin "Bad collation, expected: '%ls' got '%ls'", res, c);
5773a33477SBaptiste Daroussin }
5873a33477SBaptiste Daroussin
59*c48dc2a1SBaptiste Daroussin #define NSTRINGS 2000
60*c48dc2a1SBaptiste Daroussin #define MAXSTRLEN 20
61*c48dc2a1SBaptiste Daroussin #define MAXXFRMLEN (MAXSTRLEN * 20)
62*c48dc2a1SBaptiste Daroussin
63*c48dc2a1SBaptiste Daroussin typedef struct {
64*c48dc2a1SBaptiste Daroussin char sval[MAXSTRLEN];
65*c48dc2a1SBaptiste Daroussin char xval[MAXXFRMLEN];
66*c48dc2a1SBaptiste Daroussin } cstr;
67*c48dc2a1SBaptiste Daroussin
68*c48dc2a1SBaptiste Daroussin ATF_TC_WITHOUT_HEAD(strcoll_vs_strxfrm);
ATF_TC_BODY(strcoll_vs_strxfrm,tc)69*c48dc2a1SBaptiste Daroussin ATF_TC_BODY(strcoll_vs_strxfrm, tc)
70*c48dc2a1SBaptiste Daroussin {
71*c48dc2a1SBaptiste Daroussin cstr data[NSTRINGS];
72*c48dc2a1SBaptiste Daroussin char *curloc;
73*c48dc2a1SBaptiste Daroussin int i, j;
74*c48dc2a1SBaptiste Daroussin
75*c48dc2a1SBaptiste Daroussin curloc = setlocale(LC_ALL, "en_US.UTF-8");
76*c48dc2a1SBaptiste Daroussin ATF_CHECK_MSG(curloc != NULL, "Fail to set locale");
77*c48dc2a1SBaptiste Daroussin
78*c48dc2a1SBaptiste Daroussin /* Ensure new random() values on every run */
79*c48dc2a1SBaptiste Daroussin srandom((unsigned int) time(NULL));
80*c48dc2a1SBaptiste Daroussin
81*c48dc2a1SBaptiste Daroussin /* Generate random UTF8 strings of length less than MAXSTRLEN bytes */
82*c48dc2a1SBaptiste Daroussin for (i = 0; i < NSTRINGS; i++) {
83*c48dc2a1SBaptiste Daroussin char *p;
84*c48dc2a1SBaptiste Daroussin int len;
85*c48dc2a1SBaptiste Daroussin
86*c48dc2a1SBaptiste Daroussin again:
87*c48dc2a1SBaptiste Daroussin p = data[i].sval;
88*c48dc2a1SBaptiste Daroussin len = 1 + (random() % (MAXSTRLEN - 1));
89*c48dc2a1SBaptiste Daroussin while (len > 0) {
90*c48dc2a1SBaptiste Daroussin int c;
91*c48dc2a1SBaptiste Daroussin /*
92*c48dc2a1SBaptiste Daroussin * Generate random printable char in ISO8859-1 range.
93*c48dc2a1SBaptiste Daroussin * Bias towards producing a lot of spaces.
94*c48dc2a1SBaptiste Daroussin */
95*c48dc2a1SBaptiste Daroussin
96*c48dc2a1SBaptiste Daroussin if ((random() % 16) < 3) {
97*c48dc2a1SBaptiste Daroussin c = ' ';
98*c48dc2a1SBaptiste Daroussin } else {
99*c48dc2a1SBaptiste Daroussin do {
100*c48dc2a1SBaptiste Daroussin c = random() & 0xFF;
101*c48dc2a1SBaptiste Daroussin } while (!((c >= ' ' && c <= 127) ||
102*c48dc2a1SBaptiste Daroussin (c >= 0xA0 && c <= 0xFF)));
103*c48dc2a1SBaptiste Daroussin }
104*c48dc2a1SBaptiste Daroussin
105*c48dc2a1SBaptiste Daroussin if (c <= 127) {
106*c48dc2a1SBaptiste Daroussin *p++ = c;
107*c48dc2a1SBaptiste Daroussin len--;
108*c48dc2a1SBaptiste Daroussin } else {
109*c48dc2a1SBaptiste Daroussin if (len < 2)
110*c48dc2a1SBaptiste Daroussin break;
111*c48dc2a1SBaptiste Daroussin /* Poor man's utf8-ification */
112*c48dc2a1SBaptiste Daroussin *p++ = 0xC0 + (c >> 6);
113*c48dc2a1SBaptiste Daroussin len--;
114*c48dc2a1SBaptiste Daroussin *p++ = 0x80 + (c & 0x3F);
115*c48dc2a1SBaptiste Daroussin len--;
116*c48dc2a1SBaptiste Daroussin }
117*c48dc2a1SBaptiste Daroussin }
118*c48dc2a1SBaptiste Daroussin *p = '\0';
119*c48dc2a1SBaptiste Daroussin /* strxfrm() each string as we produce it */
120*c48dc2a1SBaptiste Daroussin errno = 0;
121*c48dc2a1SBaptiste Daroussin ATF_CHECK_MSG(strxfrm(data[i].xval, data[i].sval,
122*c48dc2a1SBaptiste Daroussin MAXXFRMLEN) < MAXXFRMLEN, "strxfrm() result for %d-length "
123*c48dc2a1SBaptiste Daroussin " string exceeded %d bytes", (int)strlen(data[i].sval),
124*c48dc2a1SBaptiste Daroussin MAXXFRMLEN);
125*c48dc2a1SBaptiste Daroussin
126*c48dc2a1SBaptiste Daroussin /*
127*c48dc2a1SBaptiste Daroussin * Amend strxfrm() failing on certain characters to be fixed and
128*c48dc2a1SBaptiste Daroussin * test later
129*c48dc2a1SBaptiste Daroussin */
130*c48dc2a1SBaptiste Daroussin if (errno != 0)
131*c48dc2a1SBaptiste Daroussin goto again;
132*c48dc2a1SBaptiste Daroussin }
133*c48dc2a1SBaptiste Daroussin
134*c48dc2a1SBaptiste Daroussin for (i = 0; i < NSTRINGS; i++) {
135*c48dc2a1SBaptiste Daroussin for (j = 0; j < NSTRINGS; j++) {
136*c48dc2a1SBaptiste Daroussin int sr = strcoll(data[i].sval, data[j].sval);
137*c48dc2a1SBaptiste Daroussin int sx = strcmp(data[i].xval, data[j].xval);
138*c48dc2a1SBaptiste Daroussin
139*c48dc2a1SBaptiste Daroussin ATF_CHECK_MSG(!((sr * sx < 0) ||
140*c48dc2a1SBaptiste Daroussin (sr * sx == 0 && sr + sx != 0)),
141*c48dc2a1SBaptiste Daroussin "%s: diff for \"%s\" and \"%s\"",
142*c48dc2a1SBaptiste Daroussin curloc, data[i].sval, data[j].sval);
143*c48dc2a1SBaptiste Daroussin }
144*c48dc2a1SBaptiste Daroussin }
145*c48dc2a1SBaptiste Daroussin }
146*c48dc2a1SBaptiste Daroussin
ATF_TP_ADD_TCS(tp)14773a33477SBaptiste Daroussin ATF_TP_ADD_TCS(tp)
14873a33477SBaptiste Daroussin {
14973a33477SBaptiste Daroussin ATF_TP_ADD_TC(tp, russian_collation);
150*c48dc2a1SBaptiste Daroussin ATF_TP_ADD_TC(tp, strcoll_vs_strxfrm);
15173a33477SBaptiste Daroussin
15273a33477SBaptiste Daroussin return (atf_no_error());
15373a33477SBaptiste Daroussin }
154