1*479c151dSjsg /* $OpenBSD: bwstring.c,v 1.10 2024/09/20 02:00:46 jsg Exp $ */ 279428148Smillert 379428148Smillert /*- 479428148Smillert * Copyright (C) 2009 Gabor Kovesdan <gabor@FreeBSD.org> 579428148Smillert * Copyright (C) 2012 Oleg Moskalenko <mom040267@gmail.com> 679428148Smillert * All rights reserved. 779428148Smillert * 879428148Smillert * Redistribution and use in source and binary forms, with or without 979428148Smillert * modification, are permitted provided that the following conditions 1079428148Smillert * are met: 1179428148Smillert * 1. Redistributions of source code must retain the above copyright 1279428148Smillert * notice, this list of conditions and the following disclaimer. 1379428148Smillert * 2. Redistributions in binary form must reproduce the above copyright 1479428148Smillert * notice, this list of conditions and the following disclaimer in the 1579428148Smillert * documentation and/or other materials provided with the distribution. 1679428148Smillert * 1779428148Smillert * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 1879428148Smillert * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 1979428148Smillert * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 2079428148Smillert * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 2179428148Smillert * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 2279428148Smillert * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 2379428148Smillert * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 2479428148Smillert * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 2579428148Smillert * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 2679428148Smillert * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 2779428148Smillert * SUCH DAMAGE. 2879428148Smillert */ 2979428148Smillert 3079428148Smillert #include <ctype.h> 3179428148Smillert #include <errno.h> 3279428148Smillert #include <err.h> 3379428148Smillert #include <langinfo.h> 3479428148Smillert #include <math.h> 3579428148Smillert #include <stdlib.h> 3679428148Smillert #include <string.h> 3779428148Smillert #include <wchar.h> 3879428148Smillert #include <wctype.h> 3979428148Smillert 4079428148Smillert #include "bwstring.h" 4179428148Smillert #include "sort.h" 4279428148Smillert 4379428148Smillert static wchar_t **wmonths; 4479428148Smillert static char **cmonths; 4579428148Smillert 4679428148Smillert /* initialise months */ 4779428148Smillert 4879428148Smillert void 4979428148Smillert initialise_months(void) 5079428148Smillert { 5179428148Smillert const nl_item item[12] = { ABMON_1, ABMON_2, ABMON_3, ABMON_4, 5279428148Smillert ABMON_5, ABMON_6, ABMON_7, ABMON_8, ABMON_9, ABMON_10, 5379428148Smillert ABMON_11, ABMON_12 }; 5479428148Smillert char *tmp; 5579428148Smillert size_t len; 5679428148Smillert 5779428148Smillert if (sort_mb_cur_max == 1) { 5879428148Smillert if (cmonths == NULL) { 5979428148Smillert char *m; 6079428148Smillert unsigned int j; 6179428148Smillert int i; 6279428148Smillert 6379428148Smillert cmonths = sort_malloc(sizeof(char *) * 12); 6479428148Smillert for (i = 0; i < 12; i++) { 6579428148Smillert cmonths[i] = NULL; 6679428148Smillert tmp = nl_langinfo(item[i]); 6779428148Smillert if (debug_sort) 6879428148Smillert printf("month[%d]=%s\n", i, tmp); 6976dd1399Smillert if (*tmp == '\0') 7079428148Smillert continue; 7176dd1399Smillert m = sort_strdup(tmp); 72b7326289Smillert len = strlen(tmp); 7379428148Smillert for (j = 0; j < len; j++) 7479428148Smillert m[j] = toupper(m[j]); 7579428148Smillert cmonths[i] = m; 7679428148Smillert } 7779428148Smillert } 7879428148Smillert } else { 7979428148Smillert if (wmonths == NULL) { 8079428148Smillert unsigned int j; 8179428148Smillert wchar_t *m; 8279428148Smillert int i; 8379428148Smillert 8479428148Smillert wmonths = sort_malloc(sizeof(wchar_t *) * 12); 8579428148Smillert for (i = 0; i < 12; i++) { 8679428148Smillert wmonths[i] = NULL; 8779428148Smillert tmp = nl_langinfo(item[i]); 8879428148Smillert if (debug_sort) 8979428148Smillert printf("month[%d]=%s\n", i, tmp); 9076dd1399Smillert if (*tmp == '\0') 9176dd1399Smillert continue; 9279428148Smillert len = strlen(tmp); 939c2d5b19Smillert m = sort_reallocarray(NULL, len + 1, 949c2d5b19Smillert sizeof(wchar_t)); 9576dd1399Smillert if (mbstowcs(m, tmp, len) == (size_t)-1) { 9676dd1399Smillert sort_free(m); 9779428148Smillert continue; 9876dd1399Smillert } 9979428148Smillert m[len] = L'\0'; 10079428148Smillert for (j = 0; j < len; j++) 10179428148Smillert m[j] = towupper(m[j]); 10279428148Smillert wmonths[i] = m; 10379428148Smillert } 10479428148Smillert } 10579428148Smillert } 10679428148Smillert } 10779428148Smillert 10879428148Smillert /* 10979428148Smillert * Compare two wide-character strings 11079428148Smillert */ 11179428148Smillert static int 11279428148Smillert wide_str_coll(const wchar_t *s1, const wchar_t *s2) 11379428148Smillert { 11479428148Smillert int ret = 0; 11579428148Smillert 11679428148Smillert errno = 0; 11779428148Smillert ret = wcscoll(s1, s2); 11879428148Smillert if (errno == EILSEQ) { 11979428148Smillert errno = 0; 12079428148Smillert ret = wcscmp(s1, s2); 12179428148Smillert if (errno != 0) { 12279428148Smillert size_t i; 12379428148Smillert for (i = 0; ; ++i) { 12479428148Smillert wchar_t c1 = s1[i]; 12579428148Smillert wchar_t c2 = s2[i]; 12679428148Smillert if (c1 == L'\0') 12779428148Smillert return (c2 == L'\0') ? 0 : -1; 12879428148Smillert if (c2 == L'\0') 12979428148Smillert return 1; 13079428148Smillert if (c1 == c2) 13179428148Smillert continue; 13279428148Smillert return (int)c1 - (int)c2; 13379428148Smillert } 13479428148Smillert } 13579428148Smillert } 13679428148Smillert return ret; 13779428148Smillert } 13879428148Smillert 13979428148Smillert /* counterparts of wcs functions */ 14079428148Smillert 14179428148Smillert void 14279428148Smillert bwsprintf(FILE *f, struct bwstring *bws, const char *prefix, const char *suffix) 14379428148Smillert { 14479428148Smillert if (sort_mb_cur_max == 1) 14579428148Smillert fprintf(f, "%s%s%s", prefix, bws->data.cstr, suffix); 14679428148Smillert else 14779428148Smillert fprintf(f, "%s%S%s", prefix, bws->data.wstr, suffix); 14879428148Smillert } 14979428148Smillert 15079428148Smillert const void * 15179428148Smillert bwsrawdata(const struct bwstring *bws) 15279428148Smillert { 15379428148Smillert return &(bws->data); 15479428148Smillert } 15579428148Smillert 15679428148Smillert size_t 15779428148Smillert bwsrawlen(const struct bwstring *bws) 15879428148Smillert { 15979428148Smillert return (sort_mb_cur_max == 1) ? bws->len : SIZEOF_WCHAR_STRING(bws->len); 16079428148Smillert } 16179428148Smillert 16279428148Smillert size_t 16379428148Smillert bws_memsize(const struct bwstring *bws) 16479428148Smillert { 16579428148Smillert return (sort_mb_cur_max == 1) ? (bws->len + 2 + sizeof(struct bwstring)) : 16679428148Smillert (SIZEOF_WCHAR_STRING(bws->len + 1) + sizeof(struct bwstring)); 16779428148Smillert } 16879428148Smillert 16979428148Smillert void 17079428148Smillert bws_setlen(struct bwstring *bws, size_t newlen) 17179428148Smillert { 17279428148Smillert if (bws && newlen != bws->len && newlen <= bws->len) { 17379428148Smillert bws->len = newlen; 17479428148Smillert if (sort_mb_cur_max == 1) 17579428148Smillert bws->data.cstr[newlen] = '\0'; 17679428148Smillert else 17779428148Smillert bws->data.wstr[newlen] = L'\0'; 17879428148Smillert } 17979428148Smillert } 18079428148Smillert 18179428148Smillert /* 18279428148Smillert * Allocate a new binary string of specified size 18379428148Smillert */ 18479428148Smillert struct bwstring * 18579428148Smillert bwsalloc(size_t sz) 18679428148Smillert { 18779428148Smillert struct bwstring *ret; 18879428148Smillert 18979428148Smillert if (sort_mb_cur_max == 1) { 19079428148Smillert ret = sort_malloc(sizeof(struct bwstring) + 1 + sz); 19179428148Smillert ret->data.cstr[sz] = '\0'; 19279428148Smillert } else { 19379428148Smillert ret = sort_malloc(sizeof(struct bwstring) + 19479428148Smillert SIZEOF_WCHAR_STRING(sz + 1)); 19579428148Smillert ret->data.wstr[sz] = L'\0'; 19679428148Smillert } 19779428148Smillert ret->len = sz; 19879428148Smillert 19979428148Smillert return ret; 20079428148Smillert } 20179428148Smillert 20279428148Smillert /* 20379428148Smillert * Create a copy of binary string. 20479428148Smillert * New string size equals the length of the old string. 20579428148Smillert */ 20679428148Smillert struct bwstring * 20779428148Smillert bwsdup(const struct bwstring *s) 20879428148Smillert { 20979428148Smillert struct bwstring *ret; 21079428148Smillert 21179428148Smillert if (s == NULL) 21279428148Smillert return NULL; 21379428148Smillert 21479428148Smillert ret = bwsalloc(s->len); 21579428148Smillert 21679428148Smillert if (sort_mb_cur_max == 1) 21779428148Smillert memcpy(ret->data.cstr, s->data.cstr, s->len); 21879428148Smillert else 21979428148Smillert memcpy(ret->data.wstr, s->data.wstr, 22079428148Smillert SIZEOF_WCHAR_STRING(s->len)); 22179428148Smillert 22279428148Smillert return ret; 22379428148Smillert } 22479428148Smillert 22579428148Smillert /* 226fc5583eeSmillert * Create a new binary string from a wide character buffer. 22779428148Smillert */ 22879428148Smillert struct bwstring * 22979428148Smillert bwssbdup(const wchar_t *str, size_t len) 23079428148Smillert { 23179428148Smillert if (str == NULL) 23279428148Smillert return (len == 0) ? bwsalloc(0) : NULL; 23379428148Smillert else { 23479428148Smillert struct bwstring *ret; 23579428148Smillert size_t i; 23679428148Smillert 23779428148Smillert ret = bwsalloc(len); 23879428148Smillert 23979428148Smillert if (sort_mb_cur_max == 1) 24079428148Smillert for (i = 0; i < len; ++i) 24179428148Smillert ret->data.cstr[i] = (unsigned char) str[i]; 24279428148Smillert else 24379428148Smillert memcpy(ret->data.wstr, str, SIZEOF_WCHAR_STRING(len)); 24479428148Smillert 24579428148Smillert return ret; 24679428148Smillert } 24779428148Smillert } 24879428148Smillert 24979428148Smillert /* 25079428148Smillert * Create a new binary string from a raw binary buffer. 25179428148Smillert */ 25279428148Smillert struct bwstring * 25379428148Smillert bwscsbdup(const unsigned char *str, size_t len) 25479428148Smillert { 25579428148Smillert struct bwstring *ret; 25679428148Smillert 25779428148Smillert ret = bwsalloc(len); 25879428148Smillert 25979428148Smillert if (str) { 26079428148Smillert if (sort_mb_cur_max == 1) 26179428148Smillert memcpy(ret->data.cstr, str, len); 26279428148Smillert else { 26379428148Smillert mbstate_t mbs; 26479428148Smillert const char *s; 26579428148Smillert size_t charlen, chars, cptr; 26679428148Smillert 26779428148Smillert chars = 0; 26879428148Smillert cptr = 0; 26979428148Smillert s = (const char *) str; 27079428148Smillert 27179428148Smillert memset(&mbs, 0, sizeof(mbs)); 27279428148Smillert 27379428148Smillert while (cptr < len) { 27479428148Smillert size_t n = sort_mb_cur_max; 27579428148Smillert 27679428148Smillert if (n > len - cptr) 27779428148Smillert n = len - cptr; 27879428148Smillert charlen = mbrlen(s + cptr, n, &mbs); 27979428148Smillert switch (charlen) { 28079428148Smillert case 0: 28179428148Smillert /* FALLTHROUGH */ 28279428148Smillert case (size_t) -1: 28379428148Smillert /* FALLTHROUGH */ 28479428148Smillert case (size_t) -2: 28579428148Smillert ret->data.wstr[chars++] = 28679428148Smillert (unsigned char) s[cptr]; 28779428148Smillert ++cptr; 28879428148Smillert break; 28979428148Smillert default: 29079428148Smillert n = mbrtowc(ret->data.wstr + (chars++), 29179428148Smillert s + cptr, charlen, &mbs); 29279428148Smillert if ((n == (size_t)-1) || (n == (size_t)-2)) 29379428148Smillert /* NOTREACHED */ 29479428148Smillert err(2, "mbrtowc error"); 29579428148Smillert cptr += charlen; 296*479c151dSjsg } 29779428148Smillert } 29879428148Smillert 29979428148Smillert ret->len = chars; 30079428148Smillert ret->data.wstr[ret->len] = L'\0'; 30179428148Smillert } 30279428148Smillert } 30379428148Smillert return ret; 30479428148Smillert } 30579428148Smillert 30679428148Smillert /* 30779428148Smillert * De-allocate object memory 30879428148Smillert */ 30979428148Smillert void 31079428148Smillert bwsfree(struct bwstring *s) 31179428148Smillert { 31279428148Smillert sort_free(s); 31379428148Smillert } 31479428148Smillert 31579428148Smillert /* 31679428148Smillert * Copy content of src binary string to dst. 31779428148Smillert * If the capacity of the dst string is not sufficient, 31879428148Smillert * then the data is truncated. 31979428148Smillert */ 32079428148Smillert size_t 32179428148Smillert bwscpy(struct bwstring *dst, const struct bwstring *src) 32279428148Smillert { 32379428148Smillert size_t nums = src->len; 32479428148Smillert 32579428148Smillert if (nums > dst->len) 32679428148Smillert nums = dst->len; 32779428148Smillert dst->len = nums; 32879428148Smillert 32979428148Smillert if (sort_mb_cur_max == 1) { 33079428148Smillert memcpy(dst->data.cstr, src->data.cstr, nums); 33179428148Smillert dst->data.cstr[dst->len] = '\0'; 33279428148Smillert } else { 33379428148Smillert memcpy(dst->data.wstr, src->data.wstr, 33479428148Smillert SIZEOF_WCHAR_STRING(nums + 1)); 33579428148Smillert dst->data.wstr[dst->len] = L'\0'; 33679428148Smillert } 33779428148Smillert 33879428148Smillert return nums; 33979428148Smillert } 34079428148Smillert 34179428148Smillert /* 34279428148Smillert * Copy content of src binary string to dst, 34379428148Smillert * with specified number of symbols to be copied. 34479428148Smillert * If the capacity of the dst string is not sufficient, 34579428148Smillert * then the data is truncated. 34679428148Smillert */ 34779428148Smillert struct bwstring * 34879428148Smillert bwsncpy(struct bwstring *dst, const struct bwstring *src, size_t size) 34979428148Smillert { 35079428148Smillert size_t nums = src->len; 35179428148Smillert 35279428148Smillert if (nums > dst->len) 35379428148Smillert nums = dst->len; 35479428148Smillert if (nums > size) 35579428148Smillert nums = size; 35679428148Smillert dst->len = nums; 35779428148Smillert 35879428148Smillert if (sort_mb_cur_max == 1) { 35979428148Smillert memcpy(dst->data.cstr, src->data.cstr, nums); 36079428148Smillert dst->data.cstr[dst->len] = '\0'; 36179428148Smillert } else { 36279428148Smillert memcpy(dst->data.wstr, src->data.wstr, 36379428148Smillert SIZEOF_WCHAR_STRING(nums + 1)); 36479428148Smillert dst->data.wstr[dst->len] = L'\0'; 36579428148Smillert } 36679428148Smillert 36779428148Smillert return dst; 36879428148Smillert } 36979428148Smillert 37079428148Smillert /* 37179428148Smillert * Copy content of src binary string to dst, 37279428148Smillert * with specified number of symbols to be copied. 37379428148Smillert * An offset value can be specified, from the start of src string. 37479428148Smillert * If the capacity of the dst string is not sufficient, 37579428148Smillert * then the data is truncated. 37679428148Smillert */ 37779428148Smillert struct bwstring * 37879428148Smillert bwsnocpy(struct bwstring *dst, const struct bwstring *src, size_t offset, 37979428148Smillert size_t size) 38079428148Smillert { 38179428148Smillert if (offset >= src->len) { 38279428148Smillert dst->data.wstr[0] = 0; 38379428148Smillert dst->len = 0; 38479428148Smillert } else { 38579428148Smillert size_t nums = src->len - offset; 38679428148Smillert 38779428148Smillert if (nums > dst->len) 38879428148Smillert nums = dst->len; 38979428148Smillert if (nums > size) 39079428148Smillert nums = size; 39179428148Smillert dst->len = nums; 39279428148Smillert if (sort_mb_cur_max == 1) { 39379428148Smillert memcpy(dst->data.cstr, src->data.cstr + offset, 39479428148Smillert (nums)); 39579428148Smillert dst->data.cstr[dst->len] = '\0'; 39679428148Smillert } else { 39779428148Smillert memcpy(dst->data.wstr, src->data.wstr + offset, 39879428148Smillert SIZEOF_WCHAR_STRING(nums)); 39979428148Smillert dst->data.wstr[dst->len] = L'\0'; 40079428148Smillert } 40179428148Smillert } 40279428148Smillert return dst; 40379428148Smillert } 40479428148Smillert 40579428148Smillert /* 40679428148Smillert * Write binary string to the file. 40779428148Smillert * The output is ended either with '\n' (nl == true) 40879428148Smillert * or '\0' (nl == false). 40979428148Smillert */ 41079428148Smillert size_t 41179428148Smillert bwsfwrite(struct bwstring *bws, FILE *f, bool zero_ended) 41279428148Smillert { 41379428148Smillert if (sort_mb_cur_max == 1) { 41479428148Smillert size_t len = bws->len; 41579428148Smillert 41679428148Smillert if (!zero_ended) { 41779428148Smillert bws->data.cstr[len] = '\n'; 41879428148Smillert 41979428148Smillert if (fwrite(bws->data.cstr, len + 1, 1, f) < 1) 42079428148Smillert err(2, NULL); 42179428148Smillert 42279428148Smillert bws->data.cstr[len] = '\0'; 42379428148Smillert } else if (fwrite(bws->data.cstr, len + 1, 1, f) < 1) 42479428148Smillert err(2, NULL); 42579428148Smillert 42679428148Smillert return len + 1; 42779428148Smillert 42879428148Smillert } else { 42979428148Smillert wchar_t eols; 43079428148Smillert size_t printed = 0; 43179428148Smillert 43279428148Smillert eols = zero_ended ? btowc('\0') : btowc('\n'); 43379428148Smillert 43479428148Smillert while (printed < BWSLEN(bws)) { 43579428148Smillert const wchar_t *s = bws->data.wstr + printed; 43679428148Smillert 43779428148Smillert if (*s == L'\0') { 43879428148Smillert int nums; 43979428148Smillert 44079428148Smillert nums = fwprintf(f, L"%lc", *s); 44179428148Smillert 44279428148Smillert if (nums != 1) 44379428148Smillert err(2, NULL); 44479428148Smillert ++printed; 44579428148Smillert } else { 44679428148Smillert int nums; 44779428148Smillert 44879428148Smillert nums = fwprintf(f, L"%ls", s); 44979428148Smillert 45079428148Smillert if (nums < 1) 45179428148Smillert err(2, NULL); 45279428148Smillert printed += nums; 45379428148Smillert } 45479428148Smillert } 45579428148Smillert fwprintf(f, L"%lc", eols); 45679428148Smillert return printed + 1; 45779428148Smillert } 45879428148Smillert } 45979428148Smillert 46079428148Smillert /* 46179428148Smillert * Allocate and read a binary string from file. 46279428148Smillert * The strings are nl-ended or zero-ended, depending on the sort setting. 46379428148Smillert */ 46479428148Smillert struct bwstring * 46579428148Smillert bwsfgetln(FILE *f, size_t *len, bool zero_ended, struct reader_buffer *rb) 46679428148Smillert { 46779428148Smillert wint_t eols; 46879428148Smillert 46979428148Smillert eols = zero_ended ? btowc('\0') : btowc('\n'); 47079428148Smillert 47179428148Smillert if (!zero_ended && (sort_mb_cur_max > 1)) { 47279428148Smillert wchar_t *ret; 47379428148Smillert 47479428148Smillert ret = fgetwln(f, len); 47579428148Smillert 47679428148Smillert if (ret == NULL) { 47779428148Smillert if (!feof(f)) 47879428148Smillert err(2, NULL); 47979428148Smillert return NULL; 48079428148Smillert } 48179428148Smillert if (*len > 0) { 48279428148Smillert if (ret[*len - 1] == (wchar_t)eols) 48379428148Smillert --(*len); 48479428148Smillert } 48579428148Smillert return bwssbdup(ret, *len); 48679428148Smillert 48779428148Smillert } else if (!zero_ended && (sort_mb_cur_max == 1)) { 48879428148Smillert char *ret; 48979428148Smillert 49079428148Smillert ret = fgetln(f, len); 49179428148Smillert 49279428148Smillert if (ret == NULL) { 49379428148Smillert if (!feof(f)) 49479428148Smillert err(2, NULL); 49579428148Smillert return NULL; 49679428148Smillert } 49779428148Smillert if (*len > 0) { 49879428148Smillert if (ret[*len - 1] == '\n') 49979428148Smillert --(*len); 50079428148Smillert } 50179428148Smillert return bwscsbdup((unsigned char *)ret, *len); 50279428148Smillert 50379428148Smillert } else { 50479428148Smillert *len = 0; 50579428148Smillert 50679428148Smillert if (feof(f)) 50779428148Smillert return NULL; 50879428148Smillert 50979428148Smillert if (2 >= rb->fgetwln_z_buffer_size) { 51079428148Smillert rb->fgetwln_z_buffer_size += 256; 51179428148Smillert rb->fgetwln_z_buffer = 51279428148Smillert sort_reallocarray(rb->fgetwln_z_buffer, 51379428148Smillert rb->fgetwln_z_buffer_size, sizeof(wchar_t)); 51479428148Smillert } 51579428148Smillert rb->fgetwln_z_buffer[*len] = 0; 51679428148Smillert 517c9bc8d3fSmillert if (sort_mb_cur_max == 1) { 51879428148Smillert while (!feof(f)) { 51979428148Smillert int c; 52079428148Smillert 52179428148Smillert c = fgetc(f); 52279428148Smillert 52379428148Smillert if (c == EOF) { 52479428148Smillert if (*len == 0) 52579428148Smillert return NULL; 52679428148Smillert goto line_read_done; 52779428148Smillert } 52879428148Smillert if (c == eols) 52979428148Smillert goto line_read_done; 53079428148Smillert 53179428148Smillert if (*len + 1 >= rb->fgetwln_z_buffer_size) { 53279428148Smillert rb->fgetwln_z_buffer_size += 256; 53379428148Smillert rb->fgetwln_z_buffer = 53479428148Smillert sort_reallocarray(rb->fgetwln_z_buffer, 53579428148Smillert rb->fgetwln_z_buffer_size, sizeof(wchar_t)); 53679428148Smillert } 53779428148Smillert 53879428148Smillert rb->fgetwln_z_buffer[*len] = c; 53979428148Smillert rb->fgetwln_z_buffer[++(*len)] = 0; 54079428148Smillert } 541c9bc8d3fSmillert } else { 54279428148Smillert while (!feof(f)) { 54379428148Smillert wint_t c = 0; 54479428148Smillert 54579428148Smillert c = fgetwc(f); 54679428148Smillert 54779428148Smillert if (c == WEOF) { 54879428148Smillert if (*len == 0) 54979428148Smillert return NULL; 55079428148Smillert goto line_read_done; 55179428148Smillert } 55279428148Smillert if (c == eols) 55379428148Smillert goto line_read_done; 55479428148Smillert 55579428148Smillert if (*len + 1 >= rb->fgetwln_z_buffer_size) { 55679428148Smillert rb->fgetwln_z_buffer_size += 256; 55779428148Smillert rb->fgetwln_z_buffer = 55879428148Smillert sort_reallocarray(rb->fgetwln_z_buffer, 55979428148Smillert rb->fgetwln_z_buffer_size, sizeof(wchar_t)); 56079428148Smillert } 56179428148Smillert 56279428148Smillert rb->fgetwln_z_buffer[*len] = c; 56379428148Smillert rb->fgetwln_z_buffer[++(*len)] = 0; 56479428148Smillert } 565c9bc8d3fSmillert } 56679428148Smillert 56779428148Smillert line_read_done: 56879428148Smillert /* we do not count the last 0 */ 56979428148Smillert return bwssbdup(rb->fgetwln_z_buffer, *len); 57079428148Smillert } 57179428148Smillert } 57279428148Smillert 57379428148Smillert int 57479428148Smillert bwsncmp(const struct bwstring *bws1, const struct bwstring *bws2, 57579428148Smillert size_t offset, size_t len) 57679428148Smillert { 57779428148Smillert size_t cmp_len, len1, len2; 57879428148Smillert int res = 0; 57979428148Smillert 58079428148Smillert len1 = bws1->len; 58179428148Smillert len2 = bws2->len; 58279428148Smillert 58379428148Smillert if (len1 <= offset) { 58479428148Smillert return (len2 <= offset) ? 0 : -1; 58579428148Smillert } else { 58679428148Smillert if (len2 <= offset) 58779428148Smillert return 1; 58879428148Smillert else { 58979428148Smillert len1 -= offset; 59079428148Smillert len2 -= offset; 59179428148Smillert 59279428148Smillert cmp_len = len1; 59379428148Smillert 59479428148Smillert if (len2 < cmp_len) 59579428148Smillert cmp_len = len2; 59679428148Smillert 59779428148Smillert if (len < cmp_len) 59879428148Smillert cmp_len = len; 59979428148Smillert 60079428148Smillert if (sort_mb_cur_max == 1) { 60179428148Smillert const unsigned char *s1, *s2; 60279428148Smillert 60379428148Smillert s1 = bws1->data.cstr + offset; 60479428148Smillert s2 = bws2->data.cstr + offset; 60579428148Smillert 60679428148Smillert res = memcmp(s1, s2, cmp_len); 60779428148Smillert 60879428148Smillert } else { 60979428148Smillert const wchar_t *s1, *s2; 61079428148Smillert 61179428148Smillert s1 = bws1->data.wstr + offset; 61279428148Smillert s2 = bws2->data.wstr + offset; 61379428148Smillert 61479428148Smillert res = memcmp(s1, s2, SIZEOF_WCHAR_STRING(cmp_len)); 61579428148Smillert } 61679428148Smillert } 61779428148Smillert } 61879428148Smillert 61979428148Smillert if (res == 0) { 62079428148Smillert if (len1 < cmp_len && len1 < len2) 62179428148Smillert res = -1; 62279428148Smillert else if (len2 < cmp_len && len2 < len1) 62379428148Smillert res = +1; 62479428148Smillert } 62579428148Smillert 62679428148Smillert return res; 62779428148Smillert } 62879428148Smillert 62979428148Smillert int 63079428148Smillert bwscmp(const struct bwstring *bws1, const struct bwstring *bws2, size_t offset) 63179428148Smillert { 63279428148Smillert size_t len1, len2, cmp_len; 63379428148Smillert int res; 63479428148Smillert 63579428148Smillert len1 = bws1->len; 63679428148Smillert len2 = bws2->len; 63779428148Smillert 63879428148Smillert len1 -= offset; 63979428148Smillert len2 -= offset; 64079428148Smillert 64179428148Smillert cmp_len = len1; 64279428148Smillert 64379428148Smillert if (len2 < cmp_len) 64479428148Smillert cmp_len = len2; 64579428148Smillert 64679428148Smillert res = bwsncmp(bws1, bws2, offset, cmp_len); 64779428148Smillert 64879428148Smillert if (res == 0) { 64979428148Smillert if (len1 < len2) 65079428148Smillert res = -1; 65179428148Smillert else if (len2 < len1) 65279428148Smillert res = +1; 65379428148Smillert } 65479428148Smillert 65579428148Smillert return res; 65679428148Smillert } 65779428148Smillert 65879428148Smillert int 65979428148Smillert bws_iterator_cmp(bwstring_iterator iter1, bwstring_iterator iter2, size_t len) 66079428148Smillert { 66179428148Smillert wchar_t c1, c2; 66279428148Smillert size_t i = 0; 66379428148Smillert 66479428148Smillert for (i = 0; i < len; ++i) { 66579428148Smillert c1 = bws_get_iter_value(iter1); 66679428148Smillert c2 = bws_get_iter_value(iter2); 66779428148Smillert if (c1 != c2) 66879428148Smillert return c1 - c2; 66979428148Smillert iter1 = bws_iterator_inc(iter1, 1); 67079428148Smillert iter2 = bws_iterator_inc(iter2, 1); 67179428148Smillert } 67279428148Smillert 67379428148Smillert return 0; 67479428148Smillert } 67579428148Smillert 67679428148Smillert int 67779428148Smillert bwscoll(const struct bwstring *bws1, const struct bwstring *bws2, size_t offset) 67879428148Smillert { 67979428148Smillert size_t len1, len2; 68079428148Smillert 68179428148Smillert len1 = bws1->len; 68279428148Smillert len2 = bws2->len; 68379428148Smillert 68479428148Smillert if (len1 <= offset) 68579428148Smillert return (len2 <= offset) ? 0 : -1; 68652e4174eSschwarze 68779428148Smillert if (len2 <= offset) 68879428148Smillert return 1; 68952e4174eSschwarze 69079428148Smillert len1 -= offset; 69179428148Smillert len2 -= offset; 69279428148Smillert 69379428148Smillert if (sort_mb_cur_max == 1) { 69479428148Smillert const unsigned char *s1, *s2; 69552e4174eSschwarze int res; 69679428148Smillert 69779428148Smillert s1 = bws1->data.cstr + offset; 69879428148Smillert s2 = bws2->data.cstr + offset; 69979428148Smillert 70079428148Smillert if (len1 > len2) { 70179428148Smillert res = memcmp(s1, s2, len2); 70279428148Smillert if (!res) 70379428148Smillert res = +1; 70479428148Smillert } else if (len1 < len2) { 70579428148Smillert res = memcmp(s1, s2, len1); 70679428148Smillert if (!res) 70779428148Smillert res = -1; 70879428148Smillert } else 70979428148Smillert res = memcmp(s1, s2, len1); 71079428148Smillert 71179428148Smillert return res; 71279428148Smillert } else { 71379428148Smillert const wchar_t *s1, *s2; 71479428148Smillert size_t i, maxlen; 71579428148Smillert int res = 0; 71679428148Smillert 71779428148Smillert s1 = bws1->data.wstr + offset; 71879428148Smillert s2 = bws2->data.wstr + offset; 71979428148Smillert 72079428148Smillert i = 0; 72179428148Smillert maxlen = len1; 72279428148Smillert 72379428148Smillert if (maxlen > len2) 72479428148Smillert maxlen = len2; 72579428148Smillert 72679428148Smillert while (i < maxlen) { 72779428148Smillert 72879428148Smillert /* goto next non-zero part: */ 729eedbfd81Sschwarze while (i < maxlen && 730eedbfd81Sschwarze s1[i] == L'\0' && s2[i] == L'\0') 73179428148Smillert ++i; 73279428148Smillert 73379428148Smillert if (i >= maxlen) 73479428148Smillert break; 73579428148Smillert 736eedbfd81Sschwarze if (s1[i] == L'\0') { 737eedbfd81Sschwarze if (s2[i] == L'\0') 73879428148Smillert /* NOTREACHED */ 73979428148Smillert err(2, "bwscoll error 1"); 74079428148Smillert else 74179428148Smillert return -1; 742eedbfd81Sschwarze } else if (s2[i] == L'\0') 74379428148Smillert return 1; 74479428148Smillert 74579428148Smillert res = wide_str_coll(s1 + i, s2 + i); 74679428148Smillert if (res) 74779428148Smillert return res; 74879428148Smillert 749eedbfd81Sschwarze while (i < maxlen && s1[i] != L'\0' && s2[i] != L'\0') 75079428148Smillert ++i; 75179428148Smillert 75279428148Smillert if (i >= maxlen) 75379428148Smillert break; 75479428148Smillert 755eedbfd81Sschwarze if (s1[i] == L'\0') { 756eedbfd81Sschwarze if (s2[i] == L'\0') { 75779428148Smillert ++i; 75879428148Smillert continue; 75979428148Smillert } else 76079428148Smillert return -1; 761eedbfd81Sschwarze } else if (s2[i] == L'\0') 76279428148Smillert return 1; 76379428148Smillert else 76479428148Smillert /* NOTREACHED */ 76579428148Smillert err(2, "bwscoll error 2"); 76679428148Smillert } 76779428148Smillert 76879428148Smillert if (len1 == len2) 76979428148Smillert return 0; 77079428148Smillert return len1 < len2 ? -1 : 1; 77179428148Smillert } 77279428148Smillert } 77379428148Smillert 77479428148Smillert /* 77579428148Smillert * Correction of the system API 77679428148Smillert */ 77779428148Smillert double 77879428148Smillert bwstod(struct bwstring *s0, bool *empty) 77979428148Smillert { 78079428148Smillert double ret = 0; 78179428148Smillert 78279428148Smillert if (sort_mb_cur_max == 1) { 78379428148Smillert char *ep, *end, *s; 78479428148Smillert 78579428148Smillert s = (char *)s0->data.cstr; 78679428148Smillert end = s + s0->len; 78779428148Smillert ep = NULL; 78879428148Smillert 78979428148Smillert while (isblank((unsigned char)*s) && s < end) 79079428148Smillert ++s; 79179428148Smillert 79279428148Smillert if (!isprint((unsigned char)*s)) { 79379428148Smillert *empty = true; 79479428148Smillert return 0; 79579428148Smillert } 79679428148Smillert 79779428148Smillert ret = strtod(s, &ep); 79879428148Smillert if (ep == s) { 79979428148Smillert *empty = true; 80079428148Smillert return 0; 80179428148Smillert } 80279428148Smillert } else { 80379428148Smillert wchar_t *end, *ep, *s; 80479428148Smillert 80579428148Smillert s = s0->data.wstr; 80679428148Smillert end = s + s0->len; 80779428148Smillert ep = NULL; 80879428148Smillert 80979428148Smillert while (iswblank(*s) && s < end) 81079428148Smillert ++s; 81179428148Smillert 81279428148Smillert if (!iswprint(*s)) { 81379428148Smillert *empty = true; 81479428148Smillert return 0; 81579428148Smillert } 81679428148Smillert 81779428148Smillert ret = wcstod(s, &ep); 81879428148Smillert if (ep == s) { 81979428148Smillert *empty = true; 82079428148Smillert return 0; 82179428148Smillert } 82279428148Smillert } 82379428148Smillert 82479428148Smillert *empty = false; 82579428148Smillert return ret; 82679428148Smillert } 82779428148Smillert 82879428148Smillert /* 82979428148Smillert * A helper function for monthcoll. If a line matches 83079428148Smillert * a month name, it returns (number of the month - 1), 83179428148Smillert * while if there is no match, it just return -1. 83279428148Smillert */ 83379428148Smillert int 83479428148Smillert bws_month_score(const struct bwstring *s0) 83579428148Smillert { 83679428148Smillert if (sort_mb_cur_max == 1) { 83779428148Smillert const char *end, *s; 83879428148Smillert int i; 83979428148Smillert 84079428148Smillert s = (char *)s0->data.cstr; 84179428148Smillert end = s + s0->len; 84279428148Smillert 84379428148Smillert while (isblank((unsigned char)*s) && s < end) 84479428148Smillert ++s; 84579428148Smillert 84679428148Smillert for (i = 11; i >= 0; --i) { 84779428148Smillert if (cmonths[i] && 84879428148Smillert (s == strstr(s, cmonths[i]))) 84979428148Smillert return i; 85079428148Smillert } 85179428148Smillert } else { 85279428148Smillert const wchar_t *end, *s; 85379428148Smillert int i; 85479428148Smillert 85579428148Smillert s = s0->data.wstr; 85679428148Smillert end = s + s0->len; 85779428148Smillert 85879428148Smillert while (iswblank(*s) && s < end) 85979428148Smillert ++s; 86079428148Smillert 86179428148Smillert for (i = 11; i >= 0; --i) { 86279428148Smillert if (wmonths[i] && (s == wcsstr(s, wmonths[i]))) 86379428148Smillert return i; 86479428148Smillert } 86579428148Smillert } 86679428148Smillert 86779428148Smillert return -1; 86879428148Smillert } 86979428148Smillert 87079428148Smillert /* 87179428148Smillert * Rips out leading blanks (-b). 87279428148Smillert */ 87379428148Smillert struct bwstring * 87479428148Smillert ignore_leading_blanks(struct bwstring *str) 87579428148Smillert { 87679428148Smillert if (sort_mb_cur_max == 1) { 87779428148Smillert unsigned char *dst, *end, *src; 87879428148Smillert 87979428148Smillert src = str->data.cstr; 88079428148Smillert dst = src; 88179428148Smillert end = src + str->len; 88279428148Smillert 88379428148Smillert while (src < end && isblank(*src)) 88479428148Smillert ++src; 88579428148Smillert 88679428148Smillert if (src != dst) { 88779428148Smillert size_t newlen; 88879428148Smillert 88979428148Smillert newlen = BWSLEN(str) - (src - dst); 89079428148Smillert 89179428148Smillert while (src < end) { 89279428148Smillert *dst = *src; 89379428148Smillert ++dst; 89479428148Smillert ++src; 89579428148Smillert } 89679428148Smillert bws_setlen(str, newlen); 89779428148Smillert } 89879428148Smillert } else { 89979428148Smillert wchar_t *dst, *end, *src; 90079428148Smillert 90179428148Smillert src = str->data.wstr; 90279428148Smillert dst = src; 90379428148Smillert end = src + str->len; 90479428148Smillert 90579428148Smillert while (src < end && iswblank(*src)) 90679428148Smillert ++src; 90779428148Smillert 90879428148Smillert if (src != dst) { 90979428148Smillert 91079428148Smillert size_t newlen = BWSLEN(str) - (src - dst); 91179428148Smillert 91279428148Smillert while (src < end) { 91379428148Smillert *dst = *src; 91479428148Smillert ++dst; 91579428148Smillert ++src; 91679428148Smillert } 91779428148Smillert bws_setlen(str, newlen); 91879428148Smillert 91979428148Smillert } 92079428148Smillert } 92179428148Smillert return str; 92279428148Smillert } 92379428148Smillert 92479428148Smillert /* 92579428148Smillert * Rips out nonprinting characters (-i). 92679428148Smillert */ 92779428148Smillert struct bwstring * 92879428148Smillert ignore_nonprinting(struct bwstring *str) 92979428148Smillert { 93079428148Smillert size_t newlen = str->len; 93179428148Smillert 93279428148Smillert if (sort_mb_cur_max == 1) { 93379428148Smillert unsigned char *dst, *end, *src; 93479428148Smillert unsigned char c; 93579428148Smillert 93679428148Smillert src = str->data.cstr; 93779428148Smillert dst = src; 93879428148Smillert end = src + str->len; 93979428148Smillert 94079428148Smillert while (src < end) { 94179428148Smillert c = *src; 94279428148Smillert if (isprint(c)) { 94379428148Smillert *dst = c; 94479428148Smillert ++dst; 94579428148Smillert ++src; 94679428148Smillert } else { 94779428148Smillert ++src; 94879428148Smillert --newlen; 94979428148Smillert } 95079428148Smillert } 95179428148Smillert } else { 95279428148Smillert wchar_t *dst, *end, *src; 95379428148Smillert wchar_t c; 95479428148Smillert 95579428148Smillert src = str->data.wstr; 95679428148Smillert dst = src; 95779428148Smillert end = src + str->len; 95879428148Smillert 95979428148Smillert while (src < end) { 96079428148Smillert c = *src; 96179428148Smillert if (iswprint(c)) { 96279428148Smillert *dst = c; 96379428148Smillert ++dst; 96479428148Smillert ++src; 96579428148Smillert } else { 96679428148Smillert ++src; 96779428148Smillert --newlen; 96879428148Smillert } 96979428148Smillert } 97079428148Smillert } 97179428148Smillert bws_setlen(str, newlen); 97279428148Smillert 97379428148Smillert return str; 97479428148Smillert } 97579428148Smillert 97679428148Smillert /* 97779428148Smillert * Rips out any characters that are not alphanumeric characters 97879428148Smillert * nor blanks (-d). 97979428148Smillert */ 98079428148Smillert struct bwstring * 98179428148Smillert dictionary_order(struct bwstring *str) 98279428148Smillert { 98379428148Smillert size_t newlen = str->len; 98479428148Smillert 98579428148Smillert if (sort_mb_cur_max == 1) { 98679428148Smillert unsigned char *dst, *end, *src; 98779428148Smillert unsigned char c; 98879428148Smillert 98979428148Smillert src = str->data.cstr; 99079428148Smillert dst = src; 99179428148Smillert end = src + str->len; 99279428148Smillert 99379428148Smillert while (src < end) { 99479428148Smillert c = *src; 99579428148Smillert if (isalnum(c) || isblank(c)) { 99679428148Smillert *dst = c; 99779428148Smillert ++dst; 99879428148Smillert ++src; 99979428148Smillert } else { 100079428148Smillert ++src; 100179428148Smillert --newlen; 100279428148Smillert } 100379428148Smillert } 100479428148Smillert } else { 100579428148Smillert wchar_t *dst, *end, *src; 100679428148Smillert wchar_t c; 100779428148Smillert 100879428148Smillert src = str->data.wstr; 100979428148Smillert dst = src; 101079428148Smillert end = src + str->len; 101179428148Smillert 101279428148Smillert while (src < end) { 101379428148Smillert c = *src; 101479428148Smillert if (iswalnum(c) || iswblank(c)) { 101579428148Smillert *dst = c; 101679428148Smillert ++dst; 101779428148Smillert ++src; 101879428148Smillert } else { 101979428148Smillert ++src; 102079428148Smillert --newlen; 102179428148Smillert } 102279428148Smillert } 102379428148Smillert } 102479428148Smillert bws_setlen(str, newlen); 102579428148Smillert 102679428148Smillert return str; 102779428148Smillert } 102879428148Smillert 102979428148Smillert /* 103079428148Smillert * Converts string to lower case(-f). 103179428148Smillert */ 103279428148Smillert struct bwstring * 103379428148Smillert ignore_case(struct bwstring *str) 103479428148Smillert { 103579428148Smillert if (sort_mb_cur_max == 1) { 103679428148Smillert unsigned char *end, *s; 103779428148Smillert 103879428148Smillert s = str->data.cstr; 103979428148Smillert end = s + str->len; 104079428148Smillert 104179428148Smillert while (s < end) { 104279428148Smillert *s = toupper(*s); 104379428148Smillert ++s; 104479428148Smillert } 104579428148Smillert } else { 104679428148Smillert wchar_t *end, *s; 104779428148Smillert 104879428148Smillert s = str->data.wstr; 104979428148Smillert end = s + str->len; 105079428148Smillert 105179428148Smillert while (s < end) { 105279428148Smillert *s = towupper(*s); 105379428148Smillert ++s; 105479428148Smillert } 105579428148Smillert } 105679428148Smillert return str; 105779428148Smillert } 105879428148Smillert 105979428148Smillert void 106079428148Smillert bws_disorder_warnx(struct bwstring *s, const char *fn, size_t pos) 106179428148Smillert { 106279428148Smillert if (sort_mb_cur_max == 1) 106379428148Smillert warnx("%s:%zu: disorder: %s", fn, pos + 1, s->data.cstr); 106479428148Smillert else 106579428148Smillert warnx("%s:%zu: disorder: %ls", fn, pos + 1, s->data.wstr); 106679428148Smillert } 1067