1 /* Charset handling while reading PO files. 2 Copyright (C) 2001-2003, 2006 Free Software Foundation, Inc. 3 Written by Bruno Haible <haible@clisp.cons.org>, 2001. 4 5 This program is free software; you can redistribute it and/or modify 6 it under the terms of the GNU General Public License as published by 7 the Free Software Foundation; either version 2, or (at your option) 8 any later version. 9 10 This program is distributed in the hope that it will be useful, 11 but WITHOUT ANY WARRANTY; without even the implied warranty of 12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 GNU General Public License for more details. 14 15 You should have received a copy of the GNU General Public License 16 along with this program; if not, write to the Free Software Foundation, 17 Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */ 18 19 #ifndef _PO_CHARSET_H 20 #define _PO_CHARSET_H 21 22 #include <stdbool.h> 23 #include <stddef.h> 24 25 #if HAVE_ICONV 26 #include <iconv.h> 27 #endif 28 29 30 #ifdef __cplusplus 31 extern "C" { 32 #endif 33 34 35 /* Canonicalize an encoding name. 36 The results of this function are statically allocated and can be 37 compared using ==. */ 38 extern const char *po_charset_canonicalize (const char *charset); 39 40 /* The canonicalized encoding name for ASCII. */ 41 extern DLL_VARIABLE const char *po_charset_ascii; 42 43 /* The canonicalized encoding name for UTF-8. */ 44 extern DLL_VARIABLE const char *po_charset_utf8; 45 46 /* Test for ASCII compatibility. */ 47 extern bool po_charset_ascii_compatible (const char *canon_charset); 48 49 /* Test for a weird encoding, i.e. an encoding which has double-byte 50 characters ending in 0x5C. */ 51 extern bool po_is_charset_weird (const char *canon_charset); 52 53 /* Test for a weird CJK encoding, i.e. a weird encoding with CJK structure. 54 An encoding has CJK structure if every valid character stream is composed 55 of single bytes in the range 0x{00..7F} and of byte pairs in the range 56 0x{80..FF}{30..FF}. */ 57 extern bool po_is_charset_weird_cjk (const char *canon_charset); 58 59 /* Returns a character iterator for a given encoding. 60 Given a pointer into a string, it returns the number occupied by the next 61 single character. If the piece of string is not valid or if the *s == '\0', 62 it returns 1. */ 63 typedef size_t (*character_iterator_t) (const char *s); 64 extern character_iterator_t po_charset_character_iterator (const char *canon_charset); 65 66 67 /* The PO file's encoding, as specified in the header entry. */ 68 extern DLL_VARIABLE const char *po_lex_charset; 69 70 #if HAVE_ICONV 71 /* Converter from the PO file's encoding to UTF-8. */ 72 extern DLL_VARIABLE iconv_t po_lex_iconv; 73 #endif 74 /* If no converter is available, some information about the structure of the 75 PO file's encoding. */ 76 extern DLL_VARIABLE bool po_lex_weird_cjk; 77 78 /* Initialize the PO file's encoding. */ 79 extern void po_lex_charset_init (void); 80 81 /* Set the PO file's encoding from the header entry. */ 82 extern void po_lex_charset_set (const char *header_entry, 83 const char *filename); 84 85 /* Finish up with the PO file's encoding. */ 86 extern void po_lex_charset_close (void); 87 88 89 #ifdef __cplusplus 90 } 91 #endif 92 93 94 #endif /* _PO_CHARSET_H */ 95