xref: /netbsd-src/external/gpl2/gettext/dist/gettext-tools/src/po-charset.h (revision 946379e7b37692fc43f68eb0d1c10daa0a7f3b6c)
1 /* Charset handling while reading PO files.
2    Copyright (C) 2001-2003, 2006 Free Software Foundation, Inc.
3    Written by Bruno Haible <haible@clisp.cons.org>, 2001.
4 
5    This program is free software; you can redistribute it and/or modify
6    it under the terms of the GNU General Public License as published by
7    the Free Software Foundation; either version 2, or (at your option)
8    any later version.
9 
10    This program is distributed in the hope that it will be useful,
11    but WITHOUT ANY WARRANTY; without even the implied warranty of
12    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13    GNU General Public License for more details.
14 
15    You should have received a copy of the GNU General Public License
16    along with this program; if not, write to the Free Software Foundation,
17    Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.  */
18 
19 #ifndef _PO_CHARSET_H
20 #define _PO_CHARSET_H
21 
22 #include <stdbool.h>
23 #include <stddef.h>
24 
25 #if HAVE_ICONV
26 #include <iconv.h>
27 #endif
28 
29 
30 #ifdef __cplusplus
31 extern "C" {
32 #endif
33 
34 
35 /* Canonicalize an encoding name.
36    The results of this function are statically allocated and can be
37    compared using ==.  */
38 extern const char *po_charset_canonicalize (const char *charset);
39 
40 /* The canonicalized encoding name for ASCII.  */
41 extern DLL_VARIABLE const char *po_charset_ascii;
42 
43 /* The canonicalized encoding name for UTF-8.  */
44 extern DLL_VARIABLE const char *po_charset_utf8;
45 
46 /* Test for ASCII compatibility.  */
47 extern bool po_charset_ascii_compatible (const char *canon_charset);
48 
49 /* Test for a weird encoding, i.e. an encoding which has double-byte
50    characters ending in 0x5C.  */
51 extern bool po_is_charset_weird (const char *canon_charset);
52 
53 /* Test for a weird CJK encoding, i.e. a weird encoding with CJK structure.
54    An encoding has CJK structure if every valid character stream is composed
55    of single bytes in the range 0x{00..7F} and of byte pairs in the range
56    0x{80..FF}{30..FF}.  */
57 extern bool po_is_charset_weird_cjk (const char *canon_charset);
58 
59 /* Returns a character iterator for a given encoding.
60    Given a pointer into a string, it returns the number occupied by the next
61    single character.  If the piece of string is not valid or if the *s == '\0',
62    it returns 1.  */
63 typedef size_t (*character_iterator_t) (const char *s);
64 extern character_iterator_t po_charset_character_iterator (const char *canon_charset);
65 
66 
67 /* The PO file's encoding, as specified in the header entry.  */
68 extern DLL_VARIABLE const char *po_lex_charset;
69 
70 #if HAVE_ICONV
71 /* Converter from the PO file's encoding to UTF-8.  */
72 extern DLL_VARIABLE iconv_t po_lex_iconv;
73 #endif
74 /* If no converter is available, some information about the structure of the
75    PO file's encoding.  */
76 extern DLL_VARIABLE bool po_lex_weird_cjk;
77 
78 /* Initialize the PO file's encoding.  */
79 extern void po_lex_charset_init (void);
80 
81 /* Set the PO file's encoding from the header entry.  */
82 extern void po_lex_charset_set (const char *header_entry,
83 				const char *filename);
84 
85 /* Finish up with the PO file's encoding.  */
86 extern void po_lex_charset_close (void);
87 
88 
89 #ifdef __cplusplus
90 }
91 #endif
92 
93 
94 #endif /* _PO_CHARSET_H */
95