1*946379e7Schristos /* Recode Serbian text from Cyrillic to Latin script.
2*946379e7Schristos Copyright (C) 2006 Free Software Foundation, Inc.
3*946379e7Schristos Written by Bruno Haible <bruno@clisp.org>, 2006.
4*946379e7Schristos
5*946379e7Schristos This program is free software; you can redistribute it and/or modify
6*946379e7Schristos it under the terms of the GNU General Public License as published by
7*946379e7Schristos the Free Software Foundation; either version 2, or (at your option)
8*946379e7Schristos any later version.
9*946379e7Schristos
10*946379e7Schristos This program is distributed in the hope that it will be useful,
11*946379e7Schristos but WITHOUT ANY WARRANTY; without even the implied warranty of
12*946379e7Schristos MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13*946379e7Schristos GNU General Public License for more details.
14*946379e7Schristos
15*946379e7Schristos You should have received a copy of the GNU General Public License
16*946379e7Schristos along with this program; if not, write to the Free Software Foundation,
17*946379e7Schristos Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */
18*946379e7Schristos
19*946379e7Schristos #ifdef HAVE_CONFIG_H
20*946379e7Schristos # include "config.h"
21*946379e7Schristos #endif
22*946379e7Schristos
23*946379e7Schristos #include <errno.h>
24*946379e7Schristos #include <getopt.h>
25*946379e7Schristos #include <stdbool.h>
26*946379e7Schristos #include <stdio.h>
27*946379e7Schristos #include <stdlib.h>
28*946379e7Schristos #include <locale.h>
29*946379e7Schristos
30*946379e7Schristos #if HAVE_ICONV
31*946379e7Schristos #include <iconv.h>
32*946379e7Schristos #endif
33*946379e7Schristos
34*946379e7Schristos #include "closeout.h"
35*946379e7Schristos #include "error.h"
36*946379e7Schristos #include "progname.h"
37*946379e7Schristos #include "relocatable.h"
38*946379e7Schristos #include "basename.h"
39*946379e7Schristos #include "xalloc.h"
40*946379e7Schristos #include "exit.h"
41*946379e7Schristos #include "localcharset.h"
42*946379e7Schristos #include "c-strcase.h"
43*946379e7Schristos #include "xstriconv.h"
44*946379e7Schristos #include "filters.h"
45*946379e7Schristos #include "propername.h"
46*946379e7Schristos #include "gettext.h"
47*946379e7Schristos
48*946379e7Schristos #define _(str) gettext (str)
49*946379e7Schristos
50*946379e7Schristos
51*946379e7Schristos /* Long options. */
52*946379e7Schristos static const struct option long_options[] =
53*946379e7Schristos {
54*946379e7Schristos { "help", no_argument, NULL, 'h' },
55*946379e7Schristos { "version", no_argument, NULL, 'V' },
56*946379e7Schristos { NULL, 0, NULL, 0 }
57*946379e7Schristos };
58*946379e7Schristos
59*946379e7Schristos /* Forward declaration of local functions. */
60*946379e7Schristos static void usage (int status)
61*946379e7Schristos #if defined __GNUC__ && ((__GNUC__ == 2 && __GNUC_MINOR__ >= 5) || __GNUC__ > 2)
62*946379e7Schristos __attribute__ ((noreturn))
63*946379e7Schristos #endif
64*946379e7Schristos ;
65*946379e7Schristos static void process (FILE *stream);
66*946379e7Schristos
67*946379e7Schristos int
main(int argc,char * argv[])68*946379e7Schristos main (int argc, char *argv[])
69*946379e7Schristos {
70*946379e7Schristos /* Default values for command line options. */
71*946379e7Schristos bool do_help = false;
72*946379e7Schristos bool do_version = false;
73*946379e7Schristos
74*946379e7Schristos int opt;
75*946379e7Schristos
76*946379e7Schristos /* Set program name for message texts. */
77*946379e7Schristos set_program_name (argv[0]);
78*946379e7Schristos
79*946379e7Schristos #ifdef HAVE_SETLOCALE
80*946379e7Schristos /* Set locale via LC_ALL. */
81*946379e7Schristos setlocale (LC_ALL, "");
82*946379e7Schristos #endif
83*946379e7Schristos
84*946379e7Schristos /* Set the text message domain. */
85*946379e7Schristos bindtextdomain (PACKAGE, relocate (LOCALEDIR));
86*946379e7Schristos textdomain (PACKAGE);
87*946379e7Schristos
88*946379e7Schristos /* Ensure that write errors on stdout are detected. */
89*946379e7Schristos atexit (close_stdout);
90*946379e7Schristos
91*946379e7Schristos /* Parse command line options. */
92*946379e7Schristos while ((opt = getopt_long (argc, argv, "hV", long_options, NULL)) != EOF)
93*946379e7Schristos switch (opt)
94*946379e7Schristos {
95*946379e7Schristos case '\0': /* Long option. */
96*946379e7Schristos break;
97*946379e7Schristos case 'h':
98*946379e7Schristos do_help = true;
99*946379e7Schristos break;
100*946379e7Schristos case 'V':
101*946379e7Schristos do_version = true;
102*946379e7Schristos break;
103*946379e7Schristos default:
104*946379e7Schristos usage (EXIT_FAILURE);
105*946379e7Schristos }
106*946379e7Schristos
107*946379e7Schristos /* Version information is requested. */
108*946379e7Schristos if (do_version)
109*946379e7Schristos {
110*946379e7Schristos printf ("%s (GNU %s) %s\n", basename (program_name), PACKAGE, VERSION);
111*946379e7Schristos /* xgettext: no-wrap */
112*946379e7Schristos printf (_("Copyright (C) %s Free Software Foundation, Inc.\n\
113*946379e7Schristos This is free software; see the source for copying conditions. There is NO\n\
114*946379e7Schristos warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.\n\
115*946379e7Schristos "),
116*946379e7Schristos "2006");
117*946379e7Schristos printf (_("Written by %s and %s.\n"),
118*946379e7Schristos /* TRANSLATORS: This is a proper name. The last name is
119*946379e7Schristos (with Unicode escapes) "\u0160egan" or (with HTML entities)
120*946379e7Schristos "Šegan". */
121*946379e7Schristos proper_name_utf8 ("Danilo Segan", "Danilo \305\240egan"),
122*946379e7Schristos proper_name ("Bruno Haible"));
123*946379e7Schristos exit (EXIT_SUCCESS);
124*946379e7Schristos }
125*946379e7Schristos
126*946379e7Schristos /* Help is requested. */
127*946379e7Schristos if (do_help)
128*946379e7Schristos usage (EXIT_SUCCESS);
129*946379e7Schristos
130*946379e7Schristos if (argc - optind > 0)
131*946379e7Schristos error (EXIT_FAILURE, 0, _("too many arguments"));
132*946379e7Schristos
133*946379e7Schristos process (stdin);
134*946379e7Schristos
135*946379e7Schristos exit (EXIT_SUCCESS);
136*946379e7Schristos }
137*946379e7Schristos
138*946379e7Schristos
139*946379e7Schristos /* Display usage information and exit. */
140*946379e7Schristos static void
usage(int status)141*946379e7Schristos usage (int status)
142*946379e7Schristos {
143*946379e7Schristos if (status != EXIT_SUCCESS)
144*946379e7Schristos fprintf (stderr, _("Try `%s --help' for more information.\n"),
145*946379e7Schristos program_name);
146*946379e7Schristos else
147*946379e7Schristos {
148*946379e7Schristos /* xgettext: no-wrap */
149*946379e7Schristos printf (_("\
150*946379e7Schristos Usage: %s [OPTION]\n\
151*946379e7Schristos "), program_name);
152*946379e7Schristos printf ("\n");
153*946379e7Schristos /* xgettext: no-wrap */
154*946379e7Schristos printf (_("\
155*946379e7Schristos Recode Serbian text from Cyrillic to Latin script.\n"));
156*946379e7Schristos /* xgettext: no-wrap */
157*946379e7Schristos printf (_("\
158*946379e7Schristos The input text is read from standard input. The converted text is output to\n\
159*946379e7Schristos standard output.\n"));
160*946379e7Schristos printf ("\n");
161*946379e7Schristos /* xgettext: no-wrap */
162*946379e7Schristos printf (_("\
163*946379e7Schristos Informative output:\n"));
164*946379e7Schristos /* xgettext: no-wrap */
165*946379e7Schristos printf (_("\
166*946379e7Schristos -h, --help display this help and exit\n"));
167*946379e7Schristos /* xgettext: no-wrap */
168*946379e7Schristos printf (_("\
169*946379e7Schristos -V, --version output version information and exit\n"));
170*946379e7Schristos printf ("\n");
171*946379e7Schristos fputs (_("Report bugs to <bug-gnu-gettext@gnu.org>.\n"), stdout);
172*946379e7Schristos }
173*946379e7Schristos
174*946379e7Schristos exit (status);
175*946379e7Schristos }
176*946379e7Schristos
177*946379e7Schristos
178*946379e7Schristos /* Routines for reading a line.
179*946379e7Schristos Don't use routines that drop NUL bytes. Don't use getline(), because it
180*946379e7Schristos doesn't provide a good error message in case of memory allocation failure.
181*946379e7Schristos The gnulib module 'linebuffer' is nearly the right thing, except that we
182*946379e7Schristos don't want an extra newline at the end of file. */
183*946379e7Schristos
184*946379e7Schristos /* A 'struct linebuffer' holds a line of text. */
185*946379e7Schristos
186*946379e7Schristos struct linebuffer
187*946379e7Schristos {
188*946379e7Schristos size_t size; /* Allocated. */
189*946379e7Schristos size_t length; /* Used. */
190*946379e7Schristos char *buffer;
191*946379e7Schristos };
192*946379e7Schristos
193*946379e7Schristos /* Initialize linebuffer LINEBUFFER for use. */
194*946379e7Schristos static inline void
init_linebuffer(struct linebuffer * lb)195*946379e7Schristos init_linebuffer (struct linebuffer *lb)
196*946379e7Schristos {
197*946379e7Schristos lb->size = 0;
198*946379e7Schristos lb->length = 0;
199*946379e7Schristos lb->buffer = NULL;
200*946379e7Schristos }
201*946379e7Schristos
202*946379e7Schristos /* Read an arbitrarily long line of text from STREAM into linebuffer LB.
203*946379e7Schristos Keep the newline. Do not NUL terminate.
204*946379e7Schristos Return LINEBUFFER, except at end of file return NULL. */
205*946379e7Schristos static struct linebuffer *
read_linebuffer(struct linebuffer * lb,FILE * stream)206*946379e7Schristos read_linebuffer (struct linebuffer *lb, FILE *stream)
207*946379e7Schristos {
208*946379e7Schristos if (feof (stream))
209*946379e7Schristos return NULL;
210*946379e7Schristos else
211*946379e7Schristos {
212*946379e7Schristos char *p = lb->buffer;
213*946379e7Schristos char *end = lb->buffer + lb->size;
214*946379e7Schristos
215*946379e7Schristos for (;;)
216*946379e7Schristos {
217*946379e7Schristos int c = getc (stream);
218*946379e7Schristos if (c == EOF)
219*946379e7Schristos {
220*946379e7Schristos if (p == lb->buffer || ferror (stream))
221*946379e7Schristos return NULL;
222*946379e7Schristos break;
223*946379e7Schristos }
224*946379e7Schristos if (p == end)
225*946379e7Schristos {
226*946379e7Schristos size_t oldsize = lb->size; /* = p - lb->buffer */
227*946379e7Schristos size_t newsize = 2 * oldsize + 40;
228*946379e7Schristos lb->buffer = (char *) xrealloc (lb->buffer, newsize);
229*946379e7Schristos lb->size = newsize;
230*946379e7Schristos p = lb->buffer + oldsize;
231*946379e7Schristos end = lb->buffer + newsize;
232*946379e7Schristos }
233*946379e7Schristos *p++ = c;
234*946379e7Schristos if (c == '\n')
235*946379e7Schristos break;
236*946379e7Schristos }
237*946379e7Schristos
238*946379e7Schristos lb->length = p - lb->buffer;
239*946379e7Schristos return lb;
240*946379e7Schristos }
241*946379e7Schristos }
242*946379e7Schristos
243*946379e7Schristos /* Free linebuffer LB and its data, all allocated with malloc. */
244*946379e7Schristos static inline void
destroy_linebuffer(struct linebuffer * lb)245*946379e7Schristos destroy_linebuffer (struct linebuffer *lb)
246*946379e7Schristos {
247*946379e7Schristos if (lb->buffer != NULL)
248*946379e7Schristos free (lb->buffer);
249*946379e7Schristos }
250*946379e7Schristos
251*946379e7Schristos
252*946379e7Schristos /* Process the input and produce the output. */
253*946379e7Schristos static void
process(FILE * stream)254*946379e7Schristos process (FILE *stream)
255*946379e7Schristos {
256*946379e7Schristos struct linebuffer lb;
257*946379e7Schristos const char *locale_code = locale_charset ();
258*946379e7Schristos bool need_code_conversion = (c_strcasecmp (locale_code, "UTF-8") != 0);
259*946379e7Schristos #if HAVE_ICONV
260*946379e7Schristos iconv_t conv_to_utf8 = (iconv_t)(-1);
261*946379e7Schristos iconv_t conv_from_utf8 = (iconv_t)(-1);
262*946379e7Schristos char *utf8_line;
263*946379e7Schristos size_t utf8_line_len;
264*946379e7Schristos char *backconv_line;
265*946379e7Schristos size_t backconv_line_len;
266*946379e7Schristos #endif
267*946379e7Schristos
268*946379e7Schristos init_linebuffer (&lb);
269*946379e7Schristos
270*946379e7Schristos /* Initialize the conversion descriptors. */
271*946379e7Schristos if (need_code_conversion)
272*946379e7Schristos {
273*946379e7Schristos #if HAVE_ICONV
274*946379e7Schristos /* Avoid glibc-2.1 bug with EUC-KR. */
275*946379e7Schristos # if (__GLIBC__ - 0 == 2 && __GLIBC_MINOR__ - 0 <= 1) && !defined _LIBICONV_VERSION
276*946379e7Schristos if (strcmp (locale_code, "EUC-KR") != 0)
277*946379e7Schristos # endif
278*946379e7Schristos {
279*946379e7Schristos conv_to_utf8 = iconv_open ("UTF-8", locale_code);
280*946379e7Schristos /* TODO: Maybe append //TRANSLIT here? */
281*946379e7Schristos conv_from_utf8 = iconv_open (locale_code, "UTF-8");
282*946379e7Schristos }
283*946379e7Schristos if (conv_to_utf8 == (iconv_t)(-1))
284*946379e7Schristos error (EXIT_FAILURE, 0, _("\
285*946379e7Schristos Cannot convert from \"%s\" to \"%s\". %s relies on iconv(), \
286*946379e7Schristos and iconv() does not support this conversion."),
287*946379e7Schristos locale_code, "UTF-8", basename (program_name));
288*946379e7Schristos if (conv_from_utf8 == (iconv_t)(-1))
289*946379e7Schristos error (EXIT_FAILURE, 0, _("\
290*946379e7Schristos Cannot convert from \"%s\" to \"%s\". %s relies on iconv(), \
291*946379e7Schristos and iconv() does not support this conversion."),
292*946379e7Schristos "UTF-8", locale_code, basename (program_name));
293*946379e7Schristos utf8_line = NULL;
294*946379e7Schristos utf8_line_len = 0;
295*946379e7Schristos backconv_line = NULL;
296*946379e7Schristos backconv_line_len = 0;
297*946379e7Schristos #else
298*946379e7Schristos error (EXIT_FAILURE, 0, _("\
299*946379e7Schristos Cannot convert from \"%s\" to \"%s\". %s relies on iconv(). \
300*946379e7Schristos This version was built without iconv()."),
301*946379e7Schristos locale_code, "UTF-8", basename (program_name));
302*946379e7Schristos #endif
303*946379e7Schristos }
304*946379e7Schristos
305*946379e7Schristos /* Read the input line by line.
306*946379e7Schristos Processing it character by character is not possible, because some
307*946379e7Schristos filters need to look at adjacent characters. Processing the entire file
308*946379e7Schristos in a whole chunk would take an excessive amount of memory. */
309*946379e7Schristos for (;;)
310*946379e7Schristos {
311*946379e7Schristos char *line;
312*946379e7Schristos size_t line_len;
313*946379e7Schristos char *filtered_line;
314*946379e7Schristos size_t filtered_line_len;
315*946379e7Schristos
316*946379e7Schristos /* Read a line. */
317*946379e7Schristos if (read_linebuffer (&lb, stream) == NULL)
318*946379e7Schristos break;
319*946379e7Schristos line = lb.buffer;
320*946379e7Schristos line_len = lb.length;
321*946379e7Schristos /* read_linebuffer always returns a non-void result. */
322*946379e7Schristos if (line_len == 0)
323*946379e7Schristos abort ();
324*946379e7Schristos
325*946379e7Schristos #if HAVE_ICONV
326*946379e7Schristos /* Convert it to UTF-8. */
327*946379e7Schristos if (need_code_conversion)
328*946379e7Schristos {
329*946379e7Schristos if (xmem_cd_iconv (line, line_len, conv_to_utf8,
330*946379e7Schristos &utf8_line, &utf8_line_len) != 0)
331*946379e7Schristos error (EXIT_FAILURE, errno,
332*946379e7Schristos _("input is not valid in \"%s\" encoding"),
333*946379e7Schristos locale_code);
334*946379e7Schristos line = utf8_line;
335*946379e7Schristos line_len = utf8_line_len;
336*946379e7Schristos }
337*946379e7Schristos #endif
338*946379e7Schristos
339*946379e7Schristos /* Apply the filter. */
340*946379e7Schristos serbian_to_latin (line, line_len, &filtered_line, &filtered_line_len);
341*946379e7Schristos
342*946379e7Schristos #if HAVE_ICONV
343*946379e7Schristos /* Convert it back to the original encoding. */
344*946379e7Schristos if (need_code_conversion)
345*946379e7Schristos {
346*946379e7Schristos if (xmem_cd_iconv (filtered_line, filtered_line_len, conv_from_utf8,
347*946379e7Schristos &backconv_line, &backconv_line_len) != 0)
348*946379e7Schristos error (EXIT_FAILURE, errno,
349*946379e7Schristos _("error while converting from \"%s\" encoding to \"%s\" encoding"),
350*946379e7Schristos "UTF-8", locale_code);
351*946379e7Schristos fwrite (backconv_line, 1, backconv_line_len, stdout);
352*946379e7Schristos }
353*946379e7Schristos else
354*946379e7Schristos #endif
355*946379e7Schristos fwrite (filtered_line, 1, filtered_line_len, stdout);
356*946379e7Schristos
357*946379e7Schristos free (filtered_line);
358*946379e7Schristos }
359*946379e7Schristos
360*946379e7Schristos #if HAVE_ICONV
361*946379e7Schristos if (need_code_conversion)
362*946379e7Schristos {
363*946379e7Schristos iconv_close (conv_from_utf8);
364*946379e7Schristos iconv_close (conv_to_utf8);
365*946379e7Schristos }
366*946379e7Schristos #endif
367*946379e7Schristos
368*946379e7Schristos destroy_linebuffer (&lb);
369*946379e7Schristos }
370