1 /* Convert multibyte character to wide character. 2 Copyright (C) 1999-2002, 2005-2022 Free Software Foundation, Inc. 3 Written by Bruno Haible <bruno@clisp.org>, 2008. 4 5 This file is free software: you can redistribute it and/or modify 6 it under the terms of the GNU Lesser General Public License as 7 published by the Free Software Foundation; either version 2.1 of the 8 License, or (at your option) any later version. 9 10 This file is distributed in the hope that it will be useful, 11 but WITHOUT ANY WARRANTY; without even the implied warranty of 12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 GNU Lesser General Public License for more details. 14 15 You should have received a copy of the GNU Lesser General Public License 16 along with this program. If not, see <https://www.gnu.org/licenses/>. */ 17 18 #include <config.h> 19 20 /* Specification. */ 21 #include <wchar.h> 22 23 #if GNULIB_defined_mbstate_t 24 /* Implement mbrtowc() on top of mbtowc() for the non-UTF-8 locales 25 and directly for the UTF-8 locales. */ 26 27 # include <errno.h> 28 # include <stdint.h> 29 # include <stdlib.h> 30 31 # if defined _WIN32 && !defined __CYGWIN__ 32 33 # define WIN32_LEAN_AND_MEAN /* avoid including junk */ 34 # include <windows.h> 35 36 # elif HAVE_PTHREAD_API 37 38 # include <pthread.h> 39 # if HAVE_THREADS_H && HAVE_WEAK_SYMBOLS 40 # include <threads.h> 41 # pragma weak thrd_exit 42 # define c11_threads_in_use() (thrd_exit != NULL) 43 # else 44 # define c11_threads_in_use() 0 45 # endif 46 47 # elif HAVE_THREADS_H 48 49 # include <threads.h> 50 51 # endif 52 53 # include "attribute.h" 54 # include "verify.h" 55 # include "lc-charset-dispatch.h" 56 # include "mbtowc-lock.h" 57 58 verify (sizeof (mbstate_t) >= 4); 59 static char internal_state[4]; 60 61 size_t 62 mbrtowc (wchar_t *pwc, const char *s, size_t n, mbstate_t *ps) 63 { 64 # define FITS_IN_CHAR_TYPE(wc) ((wc) <= WCHAR_MAX) 65 # include "mbrtowc-impl.h" 66 } 67 68 #else 69 /* Override the system's mbrtowc() function. */ 70 71 # if MBRTOWC_IN_C_LOCALE_MAYBE_EILSEQ 72 # include "hard-locale.h" 73 # include <locale.h> 74 # endif 75 76 # undef mbrtowc 77 78 size_t 79 rpl_mbrtowc (wchar_t *pwc, const char *s, size_t n, mbstate_t *ps) 80 { 81 size_t ret; 82 wchar_t wc; 83 84 # if MBRTOWC_NULL_ARG2_BUG || MBRTOWC_RETVAL_BUG || MBRTOWC_EMPTY_INPUT_BUG 85 if (s == NULL) 86 { 87 pwc = NULL; 88 s = ""; 89 n = 1; 90 } 91 # endif 92 93 # if MBRTOWC_EMPTY_INPUT_BUG 94 if (n == 0) 95 return (size_t) -2; 96 # endif 97 98 if (! pwc) 99 pwc = &wc; 100 101 # if MBRTOWC_RETVAL_BUG 102 { 103 static mbstate_t internal_state; 104 105 /* Override mbrtowc's internal state. We cannot call mbsinit() on the 106 hidden internal state, but we can call it on our variable. */ 107 if (ps == NULL) 108 ps = &internal_state; 109 110 if (!mbsinit (ps)) 111 { 112 /* Parse the rest of the multibyte character byte for byte. */ 113 size_t count = 0; 114 for (; n > 0; s++, n--) 115 { 116 ret = mbrtowc (&wc, s, 1, ps); 117 118 if (ret == (size_t)(-1)) 119 return (size_t)(-1); 120 count++; 121 if (ret != (size_t)(-2)) 122 { 123 /* The multibyte character has been completed. */ 124 *pwc = wc; 125 return (wc == 0 ? 0 : count); 126 } 127 } 128 return (size_t)(-2); 129 } 130 } 131 # endif 132 133 # if MBRTOWC_STORES_INCOMPLETE_BUG 134 ret = mbrtowc (&wc, s, n, ps); 135 if (ret < (size_t) -2 && pwc != NULL) 136 *pwc = wc; 137 # else 138 ret = mbrtowc (pwc, s, n, ps); 139 # endif 140 141 # if MBRTOWC_NUL_RETVAL_BUG 142 if (ret < (size_t) -2 && !*pwc) 143 return 0; 144 # endif 145 146 # if MBRTOWC_IN_C_LOCALE_MAYBE_EILSEQ 147 if ((size_t) -2 <= ret && n != 0 && ! hard_locale (LC_CTYPE)) 148 { 149 unsigned char uc = *s; 150 *pwc = uc; 151 return 1; 152 } 153 # endif 154 155 return ret; 156 } 157 158 #endif 159