18dffb485Schristos /* Convert multibyte character to wide character. 2*4b169a6bSchristos Copyright (C) 1999-2002, 2005-2022 Free Software Foundation, Inc. 38dffb485Schristos 4*4b169a6bSchristos This file is free software: you can redistribute it and/or modify 5*4b169a6bSchristos it under the terms of the GNU Lesser General Public License as 6*4b169a6bSchristos published by the Free Software Foundation; either version 2.1 of the 7*4b169a6bSchristos License, or (at your option) any later version. 88dffb485Schristos 9*4b169a6bSchristos This file is distributed in the hope that it will be useful, 108dffb485Schristos but WITHOUT ANY WARRANTY; without even the implied warranty of 118dffb485Schristos MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12*4b169a6bSchristos GNU Lesser General Public License for more details. 138dffb485Schristos 14*4b169a6bSchristos You should have received a copy of the GNU Lesser General Public License 158dffb485Schristos along with this program. If not, see <https://www.gnu.org/licenses/>. */ 168dffb485Schristos 178dffb485Schristos /* Written by Bruno Haible <bruno@clisp.org>, 2008. */ 188dffb485Schristos 198dffb485Schristos /* This file contains the part of the body of the mbrtowc and mbrtoc32 functions 208dffb485Schristos that handles the special case of the UTF-8 encoding. */ 218dffb485Schristos 228dffb485Schristos /* Cf. unistr/u8-mbtouc.c. */ 238dffb485Schristos unsigned char c = (unsigned char) p[0]; 248dffb485Schristos 258dffb485Schristos if (c < 0x80) 268dffb485Schristos { 278dffb485Schristos if (pwc != NULL) 288dffb485Schristos *pwc = c; 298dffb485Schristos res = (c == 0 ? 0 : 1); 308dffb485Schristos goto success; 318dffb485Schristos } 328dffb485Schristos if (c >= 0xc2) 338dffb485Schristos { 348dffb485Schristos if (c < 0xe0) 358dffb485Schristos { 368dffb485Schristos if (m == 1) 378dffb485Schristos goto incomplete; 388dffb485Schristos else /* m >= 2 */ 398dffb485Schristos { 408dffb485Schristos unsigned char c2 = (unsigned char) p[1]; 418dffb485Schristos 428dffb485Schristos if ((c2 ^ 0x80) < 0x40) 438dffb485Schristos { 448dffb485Schristos if (pwc != NULL) 458dffb485Schristos *pwc = ((unsigned int) (c & 0x1f) << 6) 468dffb485Schristos | (unsigned int) (c2 ^ 0x80); 478dffb485Schristos res = 2; 488dffb485Schristos goto success; 498dffb485Schristos } 508dffb485Schristos } 518dffb485Schristos } 528dffb485Schristos else if (c < 0xf0) 538dffb485Schristos { 548dffb485Schristos if (m == 1) 558dffb485Schristos goto incomplete; 568dffb485Schristos else 578dffb485Schristos { 588dffb485Schristos unsigned char c2 = (unsigned char) p[1]; 598dffb485Schristos 608dffb485Schristos if ((c2 ^ 0x80) < 0x40 618dffb485Schristos && (c >= 0xe1 || c2 >= 0xa0) 628dffb485Schristos && (c != 0xed || c2 < 0xa0)) 638dffb485Schristos { 648dffb485Schristos if (m == 2) 658dffb485Schristos goto incomplete; 668dffb485Schristos else /* m >= 3 */ 678dffb485Schristos { 688dffb485Schristos unsigned char c3 = (unsigned char) p[2]; 698dffb485Schristos 708dffb485Schristos if ((c3 ^ 0x80) < 0x40) 718dffb485Schristos { 728dffb485Schristos unsigned int wc = 738dffb485Schristos (((unsigned int) (c & 0x0f) << 12) 748dffb485Schristos | ((unsigned int) (c2 ^ 0x80) << 6) 758dffb485Schristos | (unsigned int) (c3 ^ 0x80)); 768dffb485Schristos 778dffb485Schristos if (FITS_IN_CHAR_TYPE (wc)) 788dffb485Schristos { 798dffb485Schristos if (pwc != NULL) 808dffb485Schristos *pwc = wc; 818dffb485Schristos res = 3; 828dffb485Schristos goto success; 838dffb485Schristos } 848dffb485Schristos } 858dffb485Schristos } 868dffb485Schristos } 878dffb485Schristos } 888dffb485Schristos } 898dffb485Schristos else if (c <= 0xf4) 908dffb485Schristos { 918dffb485Schristos if (m == 1) 928dffb485Schristos goto incomplete; 938dffb485Schristos else 948dffb485Schristos { 958dffb485Schristos unsigned char c2 = (unsigned char) p[1]; 968dffb485Schristos 978dffb485Schristos if ((c2 ^ 0x80) < 0x40 988dffb485Schristos && (c >= 0xf1 || c2 >= 0x90) 99*4b169a6bSchristos && (c < 0xf4 || (/* c == 0xf4 && */ c2 < 0x90))) 1008dffb485Schristos { 1018dffb485Schristos if (m == 2) 1028dffb485Schristos goto incomplete; 1038dffb485Schristos else 1048dffb485Schristos { 1058dffb485Schristos unsigned char c3 = (unsigned char) p[2]; 1068dffb485Schristos 1078dffb485Schristos if ((c3 ^ 0x80) < 0x40) 1088dffb485Schristos { 1098dffb485Schristos if (m == 3) 1108dffb485Schristos goto incomplete; 1118dffb485Schristos else /* m >= 4 */ 1128dffb485Schristos { 1138dffb485Schristos unsigned char c4 = (unsigned char) p[3]; 1148dffb485Schristos 1158dffb485Schristos if ((c4 ^ 0x80) < 0x40) 1168dffb485Schristos { 1178dffb485Schristos unsigned int wc = 1188dffb485Schristos (((unsigned int) (c & 0x07) << 18) 1198dffb485Schristos | ((unsigned int) (c2 ^ 0x80) << 12) 1208dffb485Schristos | ((unsigned int) (c3 ^ 0x80) << 6) 1218dffb485Schristos | (unsigned int) (c4 ^ 0x80)); 1228dffb485Schristos 1238dffb485Schristos if (FITS_IN_CHAR_TYPE (wc)) 1248dffb485Schristos { 1258dffb485Schristos if (pwc != NULL) 1268dffb485Schristos *pwc = wc; 1278dffb485Schristos res = 4; 1288dffb485Schristos goto success; 1298dffb485Schristos } 1308dffb485Schristos } 1318dffb485Schristos } 1328dffb485Schristos } 1338dffb485Schristos } 1348dffb485Schristos } 1358dffb485Schristos } 1368dffb485Schristos } 1378dffb485Schristos } 1388dffb485Schristos goto invalid; 139