xref: /dflybsd-src/gnu/usr.bin/diff/libdiffutils/unistr.h (revision d37f73b6391aefe5c2d10f0664242f4ee7f1c7bd)
1 /* DO NOT EDIT! GENERATED AUTOMATICALLY! */
2 /* Elementary Unicode string functions.
3    Copyright (C) 2001-2002, 2005-2013 Free Software Foundation, Inc.
4 
5    This program is free software: you can redistribute it and/or modify it
6    under the terms of the GNU General Public License as published
7    by the Free Software Foundation; either version 3 of the License, or
8    (at your option) any later version.
9 
10    This program is distributed in the hope that it will be useful,
11    but WITHOUT ANY WARRANTY; without even the implied warranty of
12    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
13    General Public License for more details.
14 
15    You should have received a copy of the GNU General Public License
16    along with this program.  If not, see <http://www.gnu.org/licenses/>.  */
17 
18 #ifndef _UNISTR_H
19 #define _UNISTR_H
20 
21 #include "unitypes.h"
22 
23 /* Get common macros for C.  */
24 #include "unused-parameter.h"
25 
26 /* Get bool.  */
27 #include <stdbool.h>
28 
29 /* Get size_t.  */
30 #include <stddef.h>
31 
32 #ifdef __cplusplus
33 extern "C" {
34 #endif
35 
36 
37 /* Conventions:
38 
39    All functions prefixed with u8_ operate on UTF-8 encoded strings.
40    Their unit is an uint8_t (1 byte).
41 
42    All functions prefixed with u16_ operate on UTF-16 encoded strings.
43    Their unit is an uint16_t (a 2-byte word).
44 
45    All functions prefixed with u32_ operate on UCS-4 encoded strings.
46    Their unit is an uint32_t (a 4-byte word).
47 
48    All argument pairs (s, n) denote a Unicode string s[0..n-1] with exactly
49    n units.
50 
51    All arguments starting with "str" and the arguments of functions starting
52    with u8_str/u16_str/u32_str denote a NUL terminated string, i.e. a string
53    which terminates at the first NUL unit.  This termination unit is
54    considered part of the string for all memory allocation purposes, but
55    is not considered part of the string for all other logical purposes.
56 
57    Functions returning a string result take a (resultbuf, lengthp) argument
58    pair.  If resultbuf is not NULL and the result fits into *lengthp units,
59    it is put in resultbuf, and resultbuf is returned.  Otherwise, a freshly
60    allocated string is returned.  In both cases, *lengthp is set to the
61    length (number of units) of the returned string.  In case of error,
62    NULL is returned and errno is set.  */
63 
64 
65 /* Elementary string checks.  */
66 
67 /* Check whether an UTF-8 string is well-formed.
68    Return NULL if valid, or a pointer to the first invalid unit otherwise.  */
69 extern const uint8_t *
70        u8_check (const uint8_t *s, size_t n)
71        _UC_ATTRIBUTE_PURE;
72 
73 /* Check whether an UTF-16 string is well-formed.
74    Return NULL if valid, or a pointer to the first invalid unit otherwise.  */
75 extern const uint16_t *
76        u16_check (const uint16_t *s, size_t n)
77        _UC_ATTRIBUTE_PURE;
78 
79 /* Check whether an UCS-4 string is well-formed.
80    Return NULL if valid, or a pointer to the first invalid unit otherwise.  */
81 extern const uint32_t *
82        u32_check (const uint32_t *s, size_t n)
83        _UC_ATTRIBUTE_PURE;
84 
85 
86 /* Elementary string conversions.  */
87 
88 /* Convert an UTF-8 string to an UTF-16 string.  */
89 extern uint16_t *
90        u8_to_u16 (const uint8_t *s, size_t n, uint16_t *resultbuf,
91                   size_t *lengthp);
92 
93 /* Convert an UTF-8 string to an UCS-4 string.  */
94 extern uint32_t *
95        u8_to_u32 (const uint8_t *s, size_t n, uint32_t *resultbuf,
96                   size_t *lengthp);
97 
98 /* Convert an UTF-16 string to an UTF-8 string.  */
99 extern uint8_t *
100        u16_to_u8 (const uint16_t *s, size_t n, uint8_t *resultbuf,
101                   size_t *lengthp);
102 
103 /* Convert an UTF-16 string to an UCS-4 string.  */
104 extern uint32_t *
105        u16_to_u32 (const uint16_t *s, size_t n, uint32_t *resultbuf,
106                    size_t *lengthp);
107 
108 /* Convert an UCS-4 string to an UTF-8 string.  */
109 extern uint8_t *
110        u32_to_u8 (const uint32_t *s, size_t n, uint8_t *resultbuf,
111                   size_t *lengthp);
112 
113 /* Convert an UCS-4 string to an UTF-16 string.  */
114 extern uint16_t *
115        u32_to_u16 (const uint32_t *s, size_t n, uint16_t *resultbuf,
116                    size_t *lengthp);
117 
118 
119 /* Elementary string functions.  */
120 
121 /* Return the length (number of units) of the first character in S, which is
122    no longer than N.  Return 0 if it is the NUL character.  Return -1 upon
123    failure.  */
124 /* Similar to mblen(), except that s must not be NULL.  */
125 extern int
126        u8_mblen (const uint8_t *s, size_t n)
127        _UC_ATTRIBUTE_PURE;
128 extern int
129        u16_mblen (const uint16_t *s, size_t n)
130        _UC_ATTRIBUTE_PURE;
131 extern int
132        u32_mblen (const uint32_t *s, size_t n)
133        _UC_ATTRIBUTE_PURE;
134 
135 /* Return the length (number of units) of the first character in S, putting
136    its 'ucs4_t' representation in *PUC.  Upon failure, *PUC is set to 0xfffd,
137    and an appropriate number of units is returned.
138    The number of available units, N, must be > 0.  */
139 /* Similar to mbtowc(), except that puc and s must not be NULL, n must be > 0,
140    and the NUL character is not treated specially.  */
141 /* The variants with _safe suffix are safe, even if the library is compiled
142    without --enable-safety.  */
143 
144 #if GNULIB_UNISTR_U8_MBTOUC_UNSAFE || HAVE_LIBUNISTRING
145 # if !HAVE_INLINE
146 extern int
147        u8_mbtouc_unsafe (ucs4_t *puc, const uint8_t *s, size_t n);
148 # else
149 extern int
150        u8_mbtouc_unsafe_aux (ucs4_t *puc, const uint8_t *s, size_t n);
151 static inline int
152 u8_mbtouc_unsafe (ucs4_t *puc, const uint8_t *s, size_t n)
153 {
154   uint8_t c = *s;
155 
156   if (c < 0x80)
157     {
158       *puc = c;
159       return 1;
160     }
161   else
162     return u8_mbtouc_unsafe_aux (puc, s, n);
163 }
164 # endif
165 #endif
166 
167 #if GNULIB_UNISTR_U16_MBTOUC_UNSAFE || HAVE_LIBUNISTRING
168 # if !HAVE_INLINE
169 extern int
170        u16_mbtouc_unsafe (ucs4_t *puc, const uint16_t *s, size_t n);
171 # else
172 extern int
173        u16_mbtouc_unsafe_aux (ucs4_t *puc, const uint16_t *s, size_t n);
174 static inline int
175 u16_mbtouc_unsafe (ucs4_t *puc, const uint16_t *s, size_t n)
176 {
177   uint16_t c = *s;
178 
179   if (c < 0xd800 || c >= 0xe000)
180     {
181       *puc = c;
182       return 1;
183     }
184   else
185     return u16_mbtouc_unsafe_aux (puc, s, n);
186 }
187 # endif
188 #endif
189 
190 #if GNULIB_UNISTR_U32_MBTOUC_UNSAFE || HAVE_LIBUNISTRING
191 # if !HAVE_INLINE
192 extern int
193        u32_mbtouc_unsafe (ucs4_t *puc, const uint32_t *s, size_t n);
194 # else
195 static inline int
196 u32_mbtouc_unsafe (ucs4_t *puc,
197                    const uint32_t *s, size_t n _GL_UNUSED_PARAMETER)
198 {
199   uint32_t c = *s;
200 
201 #  if CONFIG_UNICODE_SAFETY
202   if (c < 0xd800 || (c >= 0xe000 && c < 0x110000))
203 #  endif
204     *puc = c;
205 #  if CONFIG_UNICODE_SAFETY
206   else
207     /* invalid multibyte character */
208     *puc = 0xfffd;
209 #  endif
210   return 1;
211 }
212 # endif
213 #endif
214 
215 #if GNULIB_UNISTR_U8_MBTOUC || HAVE_LIBUNISTRING
216 # if !HAVE_INLINE
217 extern int
218        u8_mbtouc (ucs4_t *puc, const uint8_t *s, size_t n);
219 # else
220 extern int
221        u8_mbtouc_aux (ucs4_t *puc, const uint8_t *s, size_t n);
222 static inline int
223 u8_mbtouc (ucs4_t *puc, const uint8_t *s, size_t n)
224 {
225   uint8_t c = *s;
226 
227   if (c < 0x80)
228     {
229       *puc = c;
230       return 1;
231     }
232   else
233     return u8_mbtouc_aux (puc, s, n);
234 }
235 # endif
236 #endif
237 
238 #if GNULIB_UNISTR_U16_MBTOUC || HAVE_LIBUNISTRING
239 # if !HAVE_INLINE
240 extern int
241        u16_mbtouc (ucs4_t *puc, const uint16_t *s, size_t n);
242 # else
243 extern int
244        u16_mbtouc_aux (ucs4_t *puc, const uint16_t *s, size_t n);
245 static inline int
246 u16_mbtouc (ucs4_t *puc, const uint16_t *s, size_t n)
247 {
248   uint16_t c = *s;
249 
250   if (c < 0xd800 || c >= 0xe000)
251     {
252       *puc = c;
253       return 1;
254     }
255   else
256     return u16_mbtouc_aux (puc, s, n);
257 }
258 # endif
259 #endif
260 
261 #if GNULIB_UNISTR_U32_MBTOUC || HAVE_LIBUNISTRING
262 # if !HAVE_INLINE
263 extern int
264        u32_mbtouc (ucs4_t *puc, const uint32_t *s, size_t n);
265 # else
266 static inline int
267 u32_mbtouc (ucs4_t *puc, const uint32_t *s, size_t n _GL_UNUSED_PARAMETER)
268 {
269   uint32_t c = *s;
270 
271   if (c < 0xd800 || (c >= 0xe000 && c < 0x110000))
272     *puc = c;
273   else
274     /* invalid multibyte character */
275     *puc = 0xfffd;
276   return 1;
277 }
278 # endif
279 #endif
280 
281 /* Return the length (number of units) of the first character in S, putting
282    its 'ucs4_t' representation in *PUC.  Upon failure, *PUC is set to 0xfffd,
283    and -1 is returned for an invalid sequence of units, -2 is returned for an
284    incomplete sequence of units.
285    The number of available units, N, must be > 0.  */
286 /* Similar to u*_mbtouc(), except that the return value gives more details
287    about the failure, similar to mbrtowc().  */
288 
289 #if GNULIB_UNISTR_U8_MBTOUCR || HAVE_LIBUNISTRING
290 extern int
291        u8_mbtoucr (ucs4_t *puc, const uint8_t *s, size_t n);
292 #endif
293 
294 #if GNULIB_UNISTR_U16_MBTOUCR || HAVE_LIBUNISTRING
295 extern int
296        u16_mbtoucr (ucs4_t *puc, const uint16_t *s, size_t n);
297 #endif
298 
299 #if GNULIB_UNISTR_U32_MBTOUCR || HAVE_LIBUNISTRING
300 extern int
301        u32_mbtoucr (ucs4_t *puc, const uint32_t *s, size_t n);
302 #endif
303 
304 /* Put the multibyte character represented by UC in S, returning its
305    length.  Return -1 upon failure, -2 if the number of available units, N,
306    is too small.  The latter case cannot occur if N >= 6/2/1, respectively.  */
307 /* Similar to wctomb(), except that s must not be NULL, and the argument n
308    must be specified.  */
309 
310 #if GNULIB_UNISTR_U8_UCTOMB || HAVE_LIBUNISTRING
311 /* Auxiliary function, also used by u8_chr, u8_strchr, u8_strrchr.  */
312 extern int
313        u8_uctomb_aux (uint8_t *s, ucs4_t uc, int n);
314 # if !HAVE_INLINE
315 extern int
316        u8_uctomb (uint8_t *s, ucs4_t uc, int n);
317 # else
318 static inline int
319 u8_uctomb (uint8_t *s, ucs4_t uc, int n)
320 {
321   if (uc < 0x80 && n > 0)
322     {
323       s[0] = uc;
324       return 1;
325     }
326   else
327     return u8_uctomb_aux (s, uc, n);
328 }
329 # endif
330 #endif
331 
332 #if GNULIB_UNISTR_U16_UCTOMB || HAVE_LIBUNISTRING
333 /* Auxiliary function, also used by u16_chr, u16_strchr, u16_strrchr.  */
334 extern int
335        u16_uctomb_aux (uint16_t *s, ucs4_t uc, int n);
336 # if !HAVE_INLINE
337 extern int
338        u16_uctomb (uint16_t *s, ucs4_t uc, int n);
339 # else
340 static inline int
341 u16_uctomb (uint16_t *s, ucs4_t uc, int n)
342 {
343   if (uc < 0xd800 && n > 0)
344     {
345       s[0] = uc;
346       return 1;
347     }
348   else
349     return u16_uctomb_aux (s, uc, n);
350 }
351 # endif
352 #endif
353 
354 #if GNULIB_UNISTR_U32_UCTOMB || HAVE_LIBUNISTRING
355 # if !HAVE_INLINE
356 extern int
357        u32_uctomb (uint32_t *s, ucs4_t uc, int n);
358 # else
359 static inline int
360 u32_uctomb (uint32_t *s, ucs4_t uc, int n)
361 {
362   if (uc < 0xd800 || (uc >= 0xe000 && uc < 0x110000))
363     {
364       if (n > 0)
365         {
366           *s = uc;
367           return 1;
368         }
369       else
370         return -2;
371     }
372   else
373     return -1;
374 }
375 # endif
376 #endif
377 
378 /* Copy N units from SRC to DEST.  */
379 /* Similar to memcpy().  */
380 extern uint8_t *
381        u8_cpy (uint8_t *dest, const uint8_t *src, size_t n);
382 extern uint16_t *
383        u16_cpy (uint16_t *dest, const uint16_t *src, size_t n);
384 extern uint32_t *
385        u32_cpy (uint32_t *dest, const uint32_t *src, size_t n);
386 
387 /* Copy N units from SRC to DEST, guaranteeing correct behavior for
388    overlapping memory areas.  */
389 /* Similar to memmove().  */
390 extern uint8_t *
391        u8_move (uint8_t *dest, const uint8_t *src, size_t n);
392 extern uint16_t *
393        u16_move (uint16_t *dest, const uint16_t *src, size_t n);
394 extern uint32_t *
395        u32_move (uint32_t *dest, const uint32_t *src, size_t n);
396 
397 /* Set the first N characters of S to UC.  UC should be a character that
398    occupies only 1 unit.  */
399 /* Similar to memset().  */
400 extern uint8_t *
401        u8_set (uint8_t *s, ucs4_t uc, size_t n);
402 extern uint16_t *
403        u16_set (uint16_t *s, ucs4_t uc, size_t n);
404 extern uint32_t *
405        u32_set (uint32_t *s, ucs4_t uc, size_t n);
406 
407 /* Compare S1 and S2, each of length N.  */
408 /* Similar to memcmp().  */
409 extern int
410        u8_cmp (const uint8_t *s1, const uint8_t *s2, size_t n)
411        _UC_ATTRIBUTE_PURE;
412 extern int
413        u16_cmp (const uint16_t *s1, const uint16_t *s2, size_t n)
414        _UC_ATTRIBUTE_PURE;
415 extern int
416        u32_cmp (const uint32_t *s1, const uint32_t *s2, size_t n)
417        _UC_ATTRIBUTE_PURE;
418 
419 /* Compare S1 and S2.  */
420 /* Similar to the gnulib function memcmp2().  */
421 extern int
422        u8_cmp2 (const uint8_t *s1, size_t n1, const uint8_t *s2, size_t n2)
423        _UC_ATTRIBUTE_PURE;
424 extern int
425        u16_cmp2 (const uint16_t *s1, size_t n1, const uint16_t *s2, size_t n2)
426        _UC_ATTRIBUTE_PURE;
427 extern int
428        u32_cmp2 (const uint32_t *s1, size_t n1, const uint32_t *s2, size_t n2)
429        _UC_ATTRIBUTE_PURE;
430 
431 /* Search the string at S for UC.  */
432 /* Similar to memchr().  */
433 extern uint8_t *
434        u8_chr (const uint8_t *s, size_t n, ucs4_t uc)
435        _UC_ATTRIBUTE_PURE;
436 extern uint16_t *
437        u16_chr (const uint16_t *s, size_t n, ucs4_t uc)
438        _UC_ATTRIBUTE_PURE;
439 extern uint32_t *
440        u32_chr (const uint32_t *s, size_t n, ucs4_t uc)
441        _UC_ATTRIBUTE_PURE;
442 
443 /* Count the number of Unicode characters in the N units from S.  */
444 /* Similar to mbsnlen().  */
445 extern size_t
446        u8_mbsnlen (const uint8_t *s, size_t n)
447        _UC_ATTRIBUTE_PURE;
448 extern size_t
449        u16_mbsnlen (const uint16_t *s, size_t n)
450        _UC_ATTRIBUTE_PURE;
451 extern size_t
452        u32_mbsnlen (const uint32_t *s, size_t n)
453        _UC_ATTRIBUTE_PURE;
454 
455 /* Elementary string functions with memory allocation.  */
456 
457 /* Make a freshly allocated copy of S, of length N.  */
458 extern uint8_t *
459        u8_cpy_alloc (const uint8_t *s, size_t n);
460 extern uint16_t *
461        u16_cpy_alloc (const uint16_t *s, size_t n);
462 extern uint32_t *
463        u32_cpy_alloc (const uint32_t *s, size_t n);
464 
465 /* Elementary string functions on NUL terminated strings.  */
466 
467 /* Return the length (number of units) of the first character in S.
468    Return 0 if it is the NUL character.  Return -1 upon failure.  */
469 extern int
470        u8_strmblen (const uint8_t *s)
471        _UC_ATTRIBUTE_PURE;
472 extern int
473        u16_strmblen (const uint16_t *s)
474        _UC_ATTRIBUTE_PURE;
475 extern int
476        u32_strmblen (const uint32_t *s)
477        _UC_ATTRIBUTE_PURE;
478 
479 /* Return the length (number of units) of the first character in S, putting
480    its 'ucs4_t' representation in *PUC.  Return 0 if it is the NUL
481    character.  Return -1 upon failure.  */
482 extern int
483        u8_strmbtouc (ucs4_t *puc, const uint8_t *s);
484 extern int
485        u16_strmbtouc (ucs4_t *puc, const uint16_t *s);
486 extern int
487        u32_strmbtouc (ucs4_t *puc, const uint32_t *s);
488 
489 /* Forward iteration step.  Advances the pointer past the next character,
490    or returns NULL if the end of the string has been reached.  Puts the
491    character's 'ucs4_t' representation in *PUC.  */
492 extern const uint8_t *
493        u8_next (ucs4_t *puc, const uint8_t *s);
494 extern const uint16_t *
495        u16_next (ucs4_t *puc, const uint16_t *s);
496 extern const uint32_t *
497        u32_next (ucs4_t *puc, const uint32_t *s);
498 
499 /* Backward iteration step.  Advances the pointer to point to the previous
500    character, or returns NULL if the beginning of the string had been reached.
501    Puts the character's 'ucs4_t' representation in *PUC.  */
502 extern const uint8_t *
503        u8_prev (ucs4_t *puc, const uint8_t *s, const uint8_t *start);
504 extern const uint16_t *
505        u16_prev (ucs4_t *puc, const uint16_t *s, const uint16_t *start);
506 extern const uint32_t *
507        u32_prev (ucs4_t *puc, const uint32_t *s, const uint32_t *start);
508 
509 /* Return the number of units in S.  */
510 /* Similar to strlen(), wcslen().  */
511 extern size_t
512        u8_strlen (const uint8_t *s)
513        _UC_ATTRIBUTE_PURE;
514 extern size_t
515        u16_strlen (const uint16_t *s)
516        _UC_ATTRIBUTE_PURE;
517 extern size_t
518        u32_strlen (const uint32_t *s)
519        _UC_ATTRIBUTE_PURE;
520 
521 /* Return the number of units in S, but at most MAXLEN.  */
522 /* Similar to strnlen(), wcsnlen().  */
523 extern size_t
524        u8_strnlen (const uint8_t *s, size_t maxlen)
525        _UC_ATTRIBUTE_PURE;
526 extern size_t
527        u16_strnlen (const uint16_t *s, size_t maxlen)
528        _UC_ATTRIBUTE_PURE;
529 extern size_t
530        u32_strnlen (const uint32_t *s, size_t maxlen)
531        _UC_ATTRIBUTE_PURE;
532 
533 /* Copy SRC to DEST.  */
534 /* Similar to strcpy(), wcscpy().  */
535 extern uint8_t *
536        u8_strcpy (uint8_t *dest, const uint8_t *src);
537 extern uint16_t *
538        u16_strcpy (uint16_t *dest, const uint16_t *src);
539 extern uint32_t *
540        u32_strcpy (uint32_t *dest, const uint32_t *src);
541 
542 /* Copy SRC to DEST, returning the address of the terminating NUL in DEST.  */
543 /* Similar to stpcpy().  */
544 extern uint8_t *
545        u8_stpcpy (uint8_t *dest, const uint8_t *src);
546 extern uint16_t *
547        u16_stpcpy (uint16_t *dest, const uint16_t *src);
548 extern uint32_t *
549        u32_stpcpy (uint32_t *dest, const uint32_t *src);
550 
551 /* Copy no more than N units of SRC to DEST.  */
552 /* Similar to strncpy(), wcsncpy().  */
553 extern uint8_t *
554        u8_strncpy (uint8_t *dest, const uint8_t *src, size_t n);
555 extern uint16_t *
556        u16_strncpy (uint16_t *dest, const uint16_t *src, size_t n);
557 extern uint32_t *
558        u32_strncpy (uint32_t *dest, const uint32_t *src, size_t n);
559 
560 /* Copy no more than N units of SRC to DEST.  Return a pointer past the last
561    non-NUL unit written into DEST.  */
562 /* Similar to stpncpy().  */
563 extern uint8_t *
564        u8_stpncpy (uint8_t *dest, const uint8_t *src, size_t n);
565 extern uint16_t *
566        u16_stpncpy (uint16_t *dest, const uint16_t *src, size_t n);
567 extern uint32_t *
568        u32_stpncpy (uint32_t *dest, const uint32_t *src, size_t n);
569 
570 /* Append SRC onto DEST.  */
571 /* Similar to strcat(), wcscat().  */
572 extern uint8_t *
573        u8_strcat (uint8_t *dest, const uint8_t *src);
574 extern uint16_t *
575        u16_strcat (uint16_t *dest, const uint16_t *src);
576 extern uint32_t *
577        u32_strcat (uint32_t *dest, const uint32_t *src);
578 
579 /* Append no more than N units of SRC onto DEST.  */
580 /* Similar to strncat(), wcsncat().  */
581 extern uint8_t *
582        u8_strncat (uint8_t *dest, const uint8_t *src, size_t n);
583 extern uint16_t *
584        u16_strncat (uint16_t *dest, const uint16_t *src, size_t n);
585 extern uint32_t *
586        u32_strncat (uint32_t *dest, const uint32_t *src, size_t n);
587 
588 /* Compare S1 and S2.  */
589 /* Similar to strcmp(), wcscmp().  */
590 #ifdef __sun
591 /* Avoid a collision with the u8_strcmp() function in Solaris 11 libc.  */
592 extern int
593        u8_strcmp_gnu (const uint8_t *s1, const uint8_t *s2)
594        _UC_ATTRIBUTE_PURE;
595 # define u8_strcmp u8_strcmp_gnu
596 #else
597 extern int
598        u8_strcmp (const uint8_t *s1, const uint8_t *s2)
599        _UC_ATTRIBUTE_PURE;
600 #endif
601 extern int
602        u16_strcmp (const uint16_t *s1, const uint16_t *s2)
603        _UC_ATTRIBUTE_PURE;
604 extern int
605        u32_strcmp (const uint32_t *s1, const uint32_t *s2)
606        _UC_ATTRIBUTE_PURE;
607 
608 /* Compare S1 and S2 using the collation rules of the current locale.
609    Return -1 if S1 < S2, 0 if S1 = S2, 1 if S1 > S2.
610    Upon failure, set errno and return any value.  */
611 /* Similar to strcoll(), wcscoll().  */
612 extern int
613        u8_strcoll (const uint8_t *s1, const uint8_t *s2);
614 extern int
615        u16_strcoll (const uint16_t *s1, const uint16_t *s2);
616 extern int
617        u32_strcoll (const uint32_t *s1, const uint32_t *s2);
618 
619 /* Compare no more than N units of S1 and S2.  */
620 /* Similar to strncmp(), wcsncmp().  */
621 extern int
622        u8_strncmp (const uint8_t *s1, const uint8_t *s2, size_t n)
623        _UC_ATTRIBUTE_PURE;
624 extern int
625        u16_strncmp (const uint16_t *s1, const uint16_t *s2, size_t n)
626        _UC_ATTRIBUTE_PURE;
627 extern int
628        u32_strncmp (const uint32_t *s1, const uint32_t *s2, size_t n)
629        _UC_ATTRIBUTE_PURE;
630 
631 /* Duplicate S, returning an identical malloc'd string.  */
632 /* Similar to strdup(), wcsdup().  */
633 extern uint8_t *
634        u8_strdup (const uint8_t *s);
635 extern uint16_t *
636        u16_strdup (const uint16_t *s);
637 extern uint32_t *
638        u32_strdup (const uint32_t *s);
639 
640 /* Find the first occurrence of UC in STR.  */
641 /* Similar to strchr(), wcschr().  */
642 extern uint8_t *
643        u8_strchr (const uint8_t *str, ucs4_t uc)
644        _UC_ATTRIBUTE_PURE;
645 extern uint16_t *
646        u16_strchr (const uint16_t *str, ucs4_t uc)
647        _UC_ATTRIBUTE_PURE;
648 extern uint32_t *
649        u32_strchr (const uint32_t *str, ucs4_t uc)
650        _UC_ATTRIBUTE_PURE;
651 
652 /* Find the last occurrence of UC in STR.  */
653 /* Similar to strrchr(), wcsrchr().  */
654 extern uint8_t *
655        u8_strrchr (const uint8_t *str, ucs4_t uc)
656        _UC_ATTRIBUTE_PURE;
657 extern uint16_t *
658        u16_strrchr (const uint16_t *str, ucs4_t uc)
659        _UC_ATTRIBUTE_PURE;
660 extern uint32_t *
661        u32_strrchr (const uint32_t *str, ucs4_t uc)
662        _UC_ATTRIBUTE_PURE;
663 
664 /* Return the length of the initial segment of STR which consists entirely
665    of Unicode characters not in REJECT.  */
666 /* Similar to strcspn(), wcscspn().  */
667 extern size_t
668        u8_strcspn (const uint8_t *str, const uint8_t *reject)
669        _UC_ATTRIBUTE_PURE;
670 extern size_t
671        u16_strcspn (const uint16_t *str, const uint16_t *reject)
672        _UC_ATTRIBUTE_PURE;
673 extern size_t
674        u32_strcspn (const uint32_t *str, const uint32_t *reject)
675        _UC_ATTRIBUTE_PURE;
676 
677 /* Return the length of the initial segment of STR which consists entirely
678    of Unicode characters in ACCEPT.  */
679 /* Similar to strspn(), wcsspn().  */
680 extern size_t
681        u8_strspn (const uint8_t *str, const uint8_t *accept)
682        _UC_ATTRIBUTE_PURE;
683 extern size_t
684        u16_strspn (const uint16_t *str, const uint16_t *accept)
685        _UC_ATTRIBUTE_PURE;
686 extern size_t
687        u32_strspn (const uint32_t *str, const uint32_t *accept)
688        _UC_ATTRIBUTE_PURE;
689 
690 /* Find the first occurrence in STR of any character in ACCEPT.  */
691 /* Similar to strpbrk(), wcspbrk().  */
692 extern uint8_t *
693        u8_strpbrk (const uint8_t *str, const uint8_t *accept)
694        _UC_ATTRIBUTE_PURE;
695 extern uint16_t *
696        u16_strpbrk (const uint16_t *str, const uint16_t *accept)
697        _UC_ATTRIBUTE_PURE;
698 extern uint32_t *
699        u32_strpbrk (const uint32_t *str, const uint32_t *accept)
700        _UC_ATTRIBUTE_PURE;
701 
702 /* Find the first occurrence of NEEDLE in HAYSTACK.  */
703 /* Similar to strstr(), wcsstr().  */
704 extern uint8_t *
705        u8_strstr (const uint8_t *haystack, const uint8_t *needle)
706        _UC_ATTRIBUTE_PURE;
707 extern uint16_t *
708        u16_strstr (const uint16_t *haystack, const uint16_t *needle)
709        _UC_ATTRIBUTE_PURE;
710 extern uint32_t *
711        u32_strstr (const uint32_t *haystack, const uint32_t *needle)
712        _UC_ATTRIBUTE_PURE;
713 
714 /* Test whether STR starts with PREFIX.  */
715 extern bool
716        u8_startswith (const uint8_t *str, const uint8_t *prefix)
717        _UC_ATTRIBUTE_PURE;
718 extern bool
719        u16_startswith (const uint16_t *str, const uint16_t *prefix)
720        _UC_ATTRIBUTE_PURE;
721 extern bool
722        u32_startswith (const uint32_t *str, const uint32_t *prefix)
723        _UC_ATTRIBUTE_PURE;
724 
725 /* Test whether STR ends with SUFFIX.  */
726 extern bool
727        u8_endswith (const uint8_t *str, const uint8_t *suffix)
728        _UC_ATTRIBUTE_PURE;
729 extern bool
730        u16_endswith (const uint16_t *str, const uint16_t *suffix)
731        _UC_ATTRIBUTE_PURE;
732 extern bool
733        u32_endswith (const uint32_t *str, const uint32_t *suffix)
734        _UC_ATTRIBUTE_PURE;
735 
736 /* Divide STR into tokens separated by characters in DELIM.
737    This interface is actually more similar to wcstok than to strtok.  */
738 /* Similar to strtok_r(), wcstok().  */
739 extern uint8_t *
740        u8_strtok (uint8_t *str, const uint8_t *delim, uint8_t **ptr);
741 extern uint16_t *
742        u16_strtok (uint16_t *str, const uint16_t *delim, uint16_t **ptr);
743 extern uint32_t *
744        u32_strtok (uint32_t *str, const uint32_t *delim, uint32_t **ptr);
745 
746 
747 #ifdef __cplusplus
748 }
749 #endif
750 
751 #endif /* _UNISTR_H */
752