1 /* $NetBSD: strcasecmp_utf8.c,v 1.2 2017/02/14 01:16:49 christos Exp $ */
2
3 /*++
4 /* NAME
5 /* strcasecmp_utf8 3
6 /* SUMMARY
7 /* caseless string comparison
8 /* SYNOPSIS
9 /* #include <stringops.h>
10 /*
11 /* int strcasecmp_utf8(
12 /* const char *s1,
13 /* const char *s2)
14 /*
15 /* int strncasecmp_utf8(
16 /* const char *s1,
17 /* const char *s2,
18 /* ssize_t len)
19 /* AUXILIARY FUNCTIONS
20 /* int strcasecmp_utf8x(
21 /* int flags,
22 /* const char *s1,
23 /* const char *s2)
24 /*
25 /* int strncasecmp_utf8x(
26 /* int flags,
27 /* const char *s1,
28 /* const char *s2,
29 /* ssize_t len)
30 /* DESCRIPTION
31 /* strcasecmp_utf8() implements caseless string comparison for
32 /* UTF-8 text, with an API similar to strcasecmp(). Only ASCII
33 /* characters are casefolded when the code is compiled without
34 /* EAI support or when util_utf8_enable is zero.
35 /*
36 /* strncasecmp_utf8() implements caseless string comparison
37 /* for UTF-8 text, with an API similar to strncasecmp(). Only
38 /* ASCII characters are casefolded when the code is compiled
39 /* without EAI support or when util_utf8_enable is zero.
40 /*
41 /* strcasecmp_utf8x() and strncasecmp_utf8x() implement a more
42 /* complex API that provides the above functionality and more.
43 /*
44 /* Arguments:
45 /* .IP "s1, s2"
46 /* Null-terminated strings to be compared.
47 /* .IP len
48 /* String length before casefolding.
49 /* .IP flags
50 /* Zero or CASEF_FLAG_UTF8. The latter flag enables UTF-8 case
51 /* folding instead of folding only ASCII characters. This flag
52 /* is ignored when compiled without EAI support.
53 /* SEE ALSO
54 /* casefold(), casefold text for caseless comparison.
55 /* LICENSE
56 /* .ad
57 /* .fi
58 /* The Secure Mailer license must be distributed with this software.
59 /* AUTHOR(S)
60 /* Wietse Venema
61 /* IBM T.J. Watson Research
62 /* P.O. Box 704
63 /* Yorktown Heights, NY 10598, USA
64 /*
65 /* Wietse Venema
66 /* Google, Inc.
67 /* 111 8th Avenue
68 /* New York, NY 10011, USA
69 /*--*/
70
71 /*
72 * System library.
73 */
74 #include <sys_defs.h>
75 #include <string.h>
76
77 #ifdef STRCASECMP_IN_STRINGS_H
78 #include <strings.h>
79 #endif
80
81 /*
82 * Utility library.
83 */
84 #include <stringops.h>
85
86 #define STR(x) vstring_str(x)
87
88 static VSTRING *f1; /* casefold result for s1 */
89 static VSTRING *f2; /* casefold result for s2 */
90
91 /* strcasecmp_utf8_init - initialize */
92
strcasecmp_utf8_init(void)93 static void strcasecmp_utf8_init(void)
94 {
95 f1 = vstring_alloc(100);
96 f2 = vstring_alloc(100);
97 }
98
99 /* strcasecmp_utf8x - caseless string comparison */
100
strcasecmp_utf8x(int flags,const char * s1,const char * s2)101 int strcasecmp_utf8x(int flags, const char *s1, const char *s2)
102 {
103
104 /*
105 * Short-circuit optimization for ASCII-only text. This may be slower
106 * than using a cache for all results. We must not expose strcasecmp(3)
107 * to non-ASCII text.
108 */
109 if (allascii(s1) && allascii(s2))
110 return (strcasecmp(s1, s2));
111
112 if (f1 == 0)
113 strcasecmp_utf8_init();
114
115 /*
116 * Cross our fingers and hope that strcmp() remains agnostic of
117 * charactersets and locales.
118 */
119 flags &= CASEF_FLAG_UTF8;
120 casefoldx(flags, f1, s1, -1);
121 casefoldx(flags, f2, s2, -1);
122 return (strcmp(STR(f1), STR(f2)));
123 }
124
125 /* strncasecmp_utf8x - caseless string comparison */
126
strncasecmp_utf8x(int flags,const char * s1,const char * s2,ssize_t len)127 int strncasecmp_utf8x(int flags, const char *s1, const char *s2,
128 ssize_t len)
129 {
130
131 /*
132 * Consider using a cache for all results.
133 */
134 if (f1 == 0)
135 strcasecmp_utf8_init();
136
137 /*
138 * Short-circuit optimization for ASCII-only text. This may be slower
139 * than using a cache for all results. See comments above for limitations
140 * of strcasecmp().
141 */
142 if (allascii_len(s1, len) && allascii_len(s2, len))
143 return (strncasecmp(s1, s2, len));
144
145 /*
146 * Caution: casefolding may change the number of bytes. See comments
147 * above for concerns about strcmp().
148 */
149 flags &= CASEF_FLAG_UTF8;
150 casefoldx(flags, f1, s1, len);
151 casefoldx(flags, f2, s2, len);
152 return (strcmp(STR(f1), STR(f2)));
153 }
154
155 #ifdef TEST
156 #include <stdio.h>
157 #include <stdlib.h>
158 #include <vstream.h>
159 #include <vstring_vstream.h>
160 #include <msg_vstream.h>
161 #include <argv.h>
162
main(int argc,char ** argv)163 int main(int argc, char **argv)
164 {
165 VSTRING *buffer = vstring_alloc(1);
166 ARGV *cmd;
167 char **args;
168 int len;
169 int flags;
170 int res;
171
172 msg_vstream_init(argv[0], VSTREAM_ERR);
173 flags = CASEF_FLAG_UTF8;
174 util_utf8_enable = 1;
175 while (vstring_fgets_nonl(buffer, VSTREAM_IN)) {
176 vstream_printf("> %s\n", STR(buffer));
177 cmd = argv_split(STR(buffer), CHARS_SPACE);
178 if (cmd->argc == 0 || cmd->argv[0][0] == '#')
179 continue;
180 args = cmd->argv;
181
182 /*
183 * Compare two strings.
184 */
185 if (strcmp(args[0], "compare") == 0 && cmd->argc == 3) {
186 res = strcasecmp_utf8x(flags, args[1], args[2]);
187 vstream_printf("\"%s\" %s \"%s\"\n",
188 args[1],
189 res < 0 ? "<" : res == 0 ? "==" : ">",
190 args[2]);
191 }
192
193 /*
194 * Compare two substrings.
195 */
196 else if (strcmp(args[0], "compare-len") == 0 && cmd->argc == 4
197 && sscanf(args[3], "%d", &len) == 1 && len >= 0) {
198 res = strncasecmp_utf8x(flags, args[1], args[2], len);
199 vstream_printf("\"%.*s\" %s \"%.*s\"\n",
200 len, args[1],
201 res < 0 ? "<" : res == 0 ? "==" : ">",
202 len, args[2]);
203 }
204
205 /*
206 * Usage.
207 */
208 else {
209 vstream_printf("Usage: %s compare <s1> <s2> | compare-len <s1> <s2> <len>\n",
210 argv[0]);
211 }
212 vstream_fflush(VSTREAM_OUT);
213 argv_free(cmd);
214 }
215 exit(0);
216 }
217
218 #endif /* TEST */
219