1 /* $NetBSD: strcasecmp_utf8.c,v 1.2 2017/02/14 01:16:49 christos Exp $ */ 2 3 /*++ 4 /* NAME 5 /* strcasecmp_utf8 3 6 /* SUMMARY 7 /* caseless string comparison 8 /* SYNOPSIS 9 /* #include <stringops.h> 10 /* 11 /* int strcasecmp_utf8( 12 /* const char *s1, 13 /* const char *s2) 14 /* 15 /* int strncasecmp_utf8( 16 /* const char *s1, 17 /* const char *s2, 18 /* ssize_t len) 19 /* AUXILIARY FUNCTIONS 20 /* int strcasecmp_utf8x( 21 /* int flags, 22 /* const char *s1, 23 /* const char *s2) 24 /* 25 /* int strncasecmp_utf8x( 26 /* int flags, 27 /* const char *s1, 28 /* const char *s2, 29 /* ssize_t len) 30 /* DESCRIPTION 31 /* strcasecmp_utf8() implements caseless string comparison for 32 /* UTF-8 text, with an API similar to strcasecmp(). Only ASCII 33 /* characters are casefolded when the code is compiled without 34 /* EAI support or when util_utf8_enable is zero. 35 /* 36 /* strncasecmp_utf8() implements caseless string comparison 37 /* for UTF-8 text, with an API similar to strncasecmp(). Only 38 /* ASCII characters are casefolded when the code is compiled 39 /* without EAI support or when util_utf8_enable is zero. 40 /* 41 /* strcasecmp_utf8x() and strncasecmp_utf8x() implement a more 42 /* complex API that provides the above functionality and more. 43 /* 44 /* Arguments: 45 /* .IP "s1, s2" 46 /* Null-terminated strings to be compared. 47 /* .IP len 48 /* String length before casefolding. 49 /* .IP flags 50 /* Zero or CASEF_FLAG_UTF8. The latter flag enables UTF-8 case 51 /* folding instead of folding only ASCII characters. This flag 52 /* is ignored when compiled without EAI support. 53 /* SEE ALSO 54 /* casefold(), casefold text for caseless comparison. 55 /* LICENSE 56 /* .ad 57 /* .fi 58 /* The Secure Mailer license must be distributed with this software. 59 /* AUTHOR(S) 60 /* Wietse Venema 61 /* IBM T.J. Watson Research 62 /* P.O. Box 704 63 /* Yorktown Heights, NY 10598, USA 64 /* 65 /* Wietse Venema 66 /* Google, Inc. 67 /* 111 8th Avenue 68 /* New York, NY 10011, USA 69 /*--*/ 70 71 /* 72 * System library. 73 */ 74 #include <sys_defs.h> 75 #include <string.h> 76 77 #ifdef STRCASECMP_IN_STRINGS_H 78 #include <strings.h> 79 #endif 80 81 /* 82 * Utility library. 83 */ 84 #include <stringops.h> 85 86 #define STR(x) vstring_str(x) 87 88 static VSTRING *f1; /* casefold result for s1 */ 89 static VSTRING *f2; /* casefold result for s2 */ 90 91 /* strcasecmp_utf8_init - initialize */ 92 93 static void strcasecmp_utf8_init(void) 94 { 95 f1 = vstring_alloc(100); 96 f2 = vstring_alloc(100); 97 } 98 99 /* strcasecmp_utf8x - caseless string comparison */ 100 101 int strcasecmp_utf8x(int flags, const char *s1, const char *s2) 102 { 103 104 /* 105 * Short-circuit optimization for ASCII-only text. This may be slower 106 * than using a cache for all results. We must not expose strcasecmp(3) 107 * to non-ASCII text. 108 */ 109 if (allascii(s1) && allascii(s2)) 110 return (strcasecmp(s1, s2)); 111 112 if (f1 == 0) 113 strcasecmp_utf8_init(); 114 115 /* 116 * Cross our fingers and hope that strcmp() remains agnostic of 117 * charactersets and locales. 118 */ 119 flags &= CASEF_FLAG_UTF8; 120 casefoldx(flags, f1, s1, -1); 121 casefoldx(flags, f2, s2, -1); 122 return (strcmp(STR(f1), STR(f2))); 123 } 124 125 /* strncasecmp_utf8x - caseless string comparison */ 126 127 int strncasecmp_utf8x(int flags, const char *s1, const char *s2, 128 ssize_t len) 129 { 130 131 /* 132 * Consider using a cache for all results. 133 */ 134 if (f1 == 0) 135 strcasecmp_utf8_init(); 136 137 /* 138 * Short-circuit optimization for ASCII-only text. This may be slower 139 * than using a cache for all results. See comments above for limitations 140 * of strcasecmp(). 141 */ 142 if (allascii_len(s1, len) && allascii_len(s2, len)) 143 return (strncasecmp(s1, s2, len)); 144 145 /* 146 * Caution: casefolding may change the number of bytes. See comments 147 * above for concerns about strcmp(). 148 */ 149 flags &= CASEF_FLAG_UTF8; 150 casefoldx(flags, f1, s1, len); 151 casefoldx(flags, f2, s2, len); 152 return (strcmp(STR(f1), STR(f2))); 153 } 154 155 #ifdef TEST 156 #include <stdio.h> 157 #include <stdlib.h> 158 #include <vstream.h> 159 #include <vstring_vstream.h> 160 #include <msg_vstream.h> 161 #include <argv.h> 162 163 int main(int argc, char **argv) 164 { 165 VSTRING *buffer = vstring_alloc(1); 166 ARGV *cmd; 167 char **args; 168 int len; 169 int flags; 170 int res; 171 172 msg_vstream_init(argv[0], VSTREAM_ERR); 173 flags = CASEF_FLAG_UTF8; 174 util_utf8_enable = 1; 175 while (vstring_fgets_nonl(buffer, VSTREAM_IN)) { 176 vstream_printf("> %s\n", STR(buffer)); 177 cmd = argv_split(STR(buffer), CHARS_SPACE); 178 if (cmd->argc == 0 || cmd->argv[0][0] == '#') 179 continue; 180 args = cmd->argv; 181 182 /* 183 * Compare two strings. 184 */ 185 if (strcmp(args[0], "compare") == 0 && cmd->argc == 3) { 186 res = strcasecmp_utf8x(flags, args[1], args[2]); 187 vstream_printf("\"%s\" %s \"%s\"\n", 188 args[1], 189 res < 0 ? "<" : res == 0 ? "==" : ">", 190 args[2]); 191 } 192 193 /* 194 * Compare two substrings. 195 */ 196 else if (strcmp(args[0], "compare-len") == 0 && cmd->argc == 4 197 && sscanf(args[3], "%d", &len) == 1 && len >= 0) { 198 res = strncasecmp_utf8x(flags, args[1], args[2], len); 199 vstream_printf("\"%.*s\" %s \"%.*s\"\n", 200 len, args[1], 201 res < 0 ? "<" : res == 0 ? "==" : ">", 202 len, args[2]); 203 } 204 205 /* 206 * Usage. 207 */ 208 else { 209 vstream_printf("Usage: %s compare <s1> <s2> | compare-len <s1> <s2> <len>\n", 210 argv[0]); 211 } 212 vstream_fflush(VSTREAM_OUT); 213 argv_free(cmd); 214 } 215 exit(0); 216 } 217 218 #endif /* TEST */ 219