xref: /netbsd-src/external/gpl2/gettext/dist/gettext-tools/src/recode-sr-latin.c (revision 946379e7b37692fc43f68eb0d1c10daa0a7f3b6c)
1*946379e7Schristos /* Recode Serbian text from Cyrillic to Latin script.
2*946379e7Schristos    Copyright (C) 2006 Free Software Foundation, Inc.
3*946379e7Schristos    Written by Bruno Haible <bruno@clisp.org>, 2006.
4*946379e7Schristos 
5*946379e7Schristos    This program is free software; you can redistribute it and/or modify
6*946379e7Schristos    it under the terms of the GNU General Public License as published by
7*946379e7Schristos    the Free Software Foundation; either version 2, or (at your option)
8*946379e7Schristos    any later version.
9*946379e7Schristos 
10*946379e7Schristos    This program is distributed in the hope that it will be useful,
11*946379e7Schristos    but WITHOUT ANY WARRANTY; without even the implied warranty of
12*946379e7Schristos    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13*946379e7Schristos    GNU General Public License for more details.
14*946379e7Schristos 
15*946379e7Schristos    You should have received a copy of the GNU General Public License
16*946379e7Schristos    along with this program; if not, write to the Free Software Foundation,
17*946379e7Schristos    Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.  */
18*946379e7Schristos 
19*946379e7Schristos #ifdef HAVE_CONFIG_H
20*946379e7Schristos # include "config.h"
21*946379e7Schristos #endif
22*946379e7Schristos 
23*946379e7Schristos #include <errno.h>
24*946379e7Schristos #include <getopt.h>
25*946379e7Schristos #include <stdbool.h>
26*946379e7Schristos #include <stdio.h>
27*946379e7Schristos #include <stdlib.h>
28*946379e7Schristos #include <locale.h>
29*946379e7Schristos 
30*946379e7Schristos #if HAVE_ICONV
31*946379e7Schristos #include <iconv.h>
32*946379e7Schristos #endif
33*946379e7Schristos 
34*946379e7Schristos #include "closeout.h"
35*946379e7Schristos #include "error.h"
36*946379e7Schristos #include "progname.h"
37*946379e7Schristos #include "relocatable.h"
38*946379e7Schristos #include "basename.h"
39*946379e7Schristos #include "xalloc.h"
40*946379e7Schristos #include "exit.h"
41*946379e7Schristos #include "localcharset.h"
42*946379e7Schristos #include "c-strcase.h"
43*946379e7Schristos #include "xstriconv.h"
44*946379e7Schristos #include "filters.h"
45*946379e7Schristos #include "propername.h"
46*946379e7Schristos #include "gettext.h"
47*946379e7Schristos 
48*946379e7Schristos #define _(str) gettext (str)
49*946379e7Schristos 
50*946379e7Schristos 
51*946379e7Schristos /* Long options.  */
52*946379e7Schristos static const struct option long_options[] =
53*946379e7Schristos {
54*946379e7Schristos   { "help", no_argument, NULL, 'h' },
55*946379e7Schristos   { "version", no_argument, NULL, 'V' },
56*946379e7Schristos   { NULL, 0, NULL, 0 }
57*946379e7Schristos };
58*946379e7Schristos 
59*946379e7Schristos /* Forward declaration of local functions.  */
60*946379e7Schristos static void usage (int status)
61*946379e7Schristos #if defined __GNUC__ && ((__GNUC__ == 2 && __GNUC_MINOR__ >= 5) || __GNUC__ > 2)
62*946379e7Schristos      __attribute__ ((noreturn))
63*946379e7Schristos #endif
64*946379e7Schristos ;
65*946379e7Schristos static void process (FILE *stream);
66*946379e7Schristos 
67*946379e7Schristos int
main(int argc,char * argv[])68*946379e7Schristos main (int argc, char *argv[])
69*946379e7Schristos {
70*946379e7Schristos   /* Default values for command line options.  */
71*946379e7Schristos   bool do_help = false;
72*946379e7Schristos   bool do_version = false;
73*946379e7Schristos 
74*946379e7Schristos   int opt;
75*946379e7Schristos 
76*946379e7Schristos   /* Set program name for message texts.  */
77*946379e7Schristos   set_program_name (argv[0]);
78*946379e7Schristos 
79*946379e7Schristos #ifdef HAVE_SETLOCALE
80*946379e7Schristos   /* Set locale via LC_ALL.  */
81*946379e7Schristos   setlocale (LC_ALL, "");
82*946379e7Schristos #endif
83*946379e7Schristos 
84*946379e7Schristos   /* Set the text message domain.  */
85*946379e7Schristos   bindtextdomain (PACKAGE, relocate (LOCALEDIR));
86*946379e7Schristos   textdomain (PACKAGE);
87*946379e7Schristos 
88*946379e7Schristos   /* Ensure that write errors on stdout are detected.  */
89*946379e7Schristos   atexit (close_stdout);
90*946379e7Schristos 
91*946379e7Schristos   /* Parse command line options.  */
92*946379e7Schristos   while ((opt = getopt_long (argc, argv, "hV", long_options, NULL)) != EOF)
93*946379e7Schristos     switch (opt)
94*946379e7Schristos     {
95*946379e7Schristos     case '\0':		/* Long option.  */
96*946379e7Schristos       break;
97*946379e7Schristos     case 'h':
98*946379e7Schristos       do_help = true;
99*946379e7Schristos       break;
100*946379e7Schristos     case 'V':
101*946379e7Schristos       do_version = true;
102*946379e7Schristos       break;
103*946379e7Schristos     default:
104*946379e7Schristos       usage (EXIT_FAILURE);
105*946379e7Schristos     }
106*946379e7Schristos 
107*946379e7Schristos   /* Version information is requested.  */
108*946379e7Schristos   if (do_version)
109*946379e7Schristos     {
110*946379e7Schristos       printf ("%s (GNU %s) %s\n", basename (program_name), PACKAGE, VERSION);
111*946379e7Schristos       /* xgettext: no-wrap */
112*946379e7Schristos       printf (_("Copyright (C) %s Free Software Foundation, Inc.\n\
113*946379e7Schristos This is free software; see the source for copying conditions.  There is NO\n\
114*946379e7Schristos warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.\n\
115*946379e7Schristos "),
116*946379e7Schristos 	      "2006");
117*946379e7Schristos       printf (_("Written by %s and %s.\n"),
118*946379e7Schristos 	      /* TRANSLATORS: This is a proper name. The last name is
119*946379e7Schristos 		 (with Unicode escapes) "\u0160egan" or (with HTML entities)
120*946379e7Schristos 		 "&Scaron;egan".  */
121*946379e7Schristos 	      proper_name_utf8 ("Danilo Segan", "Danilo \305\240egan"),
122*946379e7Schristos 	      proper_name ("Bruno Haible"));
123*946379e7Schristos       exit (EXIT_SUCCESS);
124*946379e7Schristos     }
125*946379e7Schristos 
126*946379e7Schristos   /* Help is requested.  */
127*946379e7Schristos   if (do_help)
128*946379e7Schristos     usage (EXIT_SUCCESS);
129*946379e7Schristos 
130*946379e7Schristos   if (argc - optind > 0)
131*946379e7Schristos     error (EXIT_FAILURE, 0, _("too many arguments"));
132*946379e7Schristos 
133*946379e7Schristos   process (stdin);
134*946379e7Schristos 
135*946379e7Schristos   exit (EXIT_SUCCESS);
136*946379e7Schristos }
137*946379e7Schristos 
138*946379e7Schristos 
139*946379e7Schristos /* Display usage information and exit.  */
140*946379e7Schristos static void
usage(int status)141*946379e7Schristos usage (int status)
142*946379e7Schristos {
143*946379e7Schristos   if (status != EXIT_SUCCESS)
144*946379e7Schristos     fprintf (stderr, _("Try `%s --help' for more information.\n"),
145*946379e7Schristos 	     program_name);
146*946379e7Schristos   else
147*946379e7Schristos     {
148*946379e7Schristos       /* xgettext: no-wrap */
149*946379e7Schristos       printf (_("\
150*946379e7Schristos Usage: %s [OPTION]\n\
151*946379e7Schristos "), program_name);
152*946379e7Schristos       printf ("\n");
153*946379e7Schristos       /* xgettext: no-wrap */
154*946379e7Schristos       printf (_("\
155*946379e7Schristos Recode Serbian text from Cyrillic to Latin script.\n"));
156*946379e7Schristos       /* xgettext: no-wrap */
157*946379e7Schristos       printf (_("\
158*946379e7Schristos The input text is read from standard input.  The converted text is output to\n\
159*946379e7Schristos standard output.\n"));
160*946379e7Schristos       printf ("\n");
161*946379e7Schristos       /* xgettext: no-wrap */
162*946379e7Schristos       printf (_("\
163*946379e7Schristos Informative output:\n"));
164*946379e7Schristos       /* xgettext: no-wrap */
165*946379e7Schristos       printf (_("\
166*946379e7Schristos   -h, --help                  display this help and exit\n"));
167*946379e7Schristos       /* xgettext: no-wrap */
168*946379e7Schristos       printf (_("\
169*946379e7Schristos   -V, --version               output version information and exit\n"));
170*946379e7Schristos       printf ("\n");
171*946379e7Schristos       fputs (_("Report bugs to <bug-gnu-gettext@gnu.org>.\n"), stdout);
172*946379e7Schristos     }
173*946379e7Schristos 
174*946379e7Schristos   exit (status);
175*946379e7Schristos }
176*946379e7Schristos 
177*946379e7Schristos 
178*946379e7Schristos /* Routines for reading a line.
179*946379e7Schristos    Don't use routines that drop NUL bytes.  Don't use getline(), because it
180*946379e7Schristos    doesn't provide a good error message in case of memory allocation failure.
181*946379e7Schristos    The gnulib module 'linebuffer' is nearly the right thing, except that we
182*946379e7Schristos    don't want an extra newline at the end of file.  */
183*946379e7Schristos 
184*946379e7Schristos /* A 'struct linebuffer' holds a line of text. */
185*946379e7Schristos 
186*946379e7Schristos struct linebuffer
187*946379e7Schristos {
188*946379e7Schristos   size_t size;			/* Allocated. */
189*946379e7Schristos   size_t length;		/* Used. */
190*946379e7Schristos   char *buffer;
191*946379e7Schristos };
192*946379e7Schristos 
193*946379e7Schristos /* Initialize linebuffer LINEBUFFER for use. */
194*946379e7Schristos static inline void
init_linebuffer(struct linebuffer * lb)195*946379e7Schristos init_linebuffer (struct linebuffer *lb)
196*946379e7Schristos {
197*946379e7Schristos   lb->size = 0;
198*946379e7Schristos   lb->length = 0;
199*946379e7Schristos   lb->buffer = NULL;
200*946379e7Schristos }
201*946379e7Schristos 
202*946379e7Schristos /* Read an arbitrarily long line of text from STREAM into linebuffer LB.
203*946379e7Schristos    Keep the newline.  Do not NUL terminate.
204*946379e7Schristos    Return LINEBUFFER, except at end of file return NULL.  */
205*946379e7Schristos static struct linebuffer *
read_linebuffer(struct linebuffer * lb,FILE * stream)206*946379e7Schristos read_linebuffer (struct linebuffer *lb, FILE *stream)
207*946379e7Schristos {
208*946379e7Schristos   if (feof (stream))
209*946379e7Schristos     return NULL;
210*946379e7Schristos   else
211*946379e7Schristos     {
212*946379e7Schristos       char *p = lb->buffer;
213*946379e7Schristos       char *end = lb->buffer + lb->size;
214*946379e7Schristos 
215*946379e7Schristos       for (;;)
216*946379e7Schristos 	{
217*946379e7Schristos 	  int c = getc (stream);
218*946379e7Schristos 	  if (c == EOF)
219*946379e7Schristos 	    {
220*946379e7Schristos 	      if (p == lb->buffer || ferror (stream))
221*946379e7Schristos 		return NULL;
222*946379e7Schristos 	      break;
223*946379e7Schristos 	    }
224*946379e7Schristos 	  if (p == end)
225*946379e7Schristos 	    {
226*946379e7Schristos 	      size_t oldsize = lb->size; /* = p - lb->buffer */
227*946379e7Schristos 	      size_t newsize = 2 * oldsize + 40;
228*946379e7Schristos 	      lb->buffer = (char *) xrealloc (lb->buffer, newsize);
229*946379e7Schristos 	      lb->size = newsize;
230*946379e7Schristos 	      p = lb->buffer + oldsize;
231*946379e7Schristos 	      end = lb->buffer + newsize;
232*946379e7Schristos 	    }
233*946379e7Schristos 	  *p++ = c;
234*946379e7Schristos 	  if (c == '\n')
235*946379e7Schristos 	    break;
236*946379e7Schristos 	}
237*946379e7Schristos 
238*946379e7Schristos       lb->length = p - lb->buffer;
239*946379e7Schristos       return lb;
240*946379e7Schristos     }
241*946379e7Schristos }
242*946379e7Schristos 
243*946379e7Schristos /* Free linebuffer LB and its data, all allocated with malloc. */
244*946379e7Schristos static inline void
destroy_linebuffer(struct linebuffer * lb)245*946379e7Schristos destroy_linebuffer (struct linebuffer *lb)
246*946379e7Schristos {
247*946379e7Schristos   if (lb->buffer != NULL)
248*946379e7Schristos     free (lb->buffer);
249*946379e7Schristos }
250*946379e7Schristos 
251*946379e7Schristos 
252*946379e7Schristos /* Process the input and produce the output.  */
253*946379e7Schristos static void
process(FILE * stream)254*946379e7Schristos process (FILE *stream)
255*946379e7Schristos {
256*946379e7Schristos   struct linebuffer lb;
257*946379e7Schristos   const char *locale_code = locale_charset ();
258*946379e7Schristos   bool need_code_conversion = (c_strcasecmp (locale_code, "UTF-8") != 0);
259*946379e7Schristos #if HAVE_ICONV
260*946379e7Schristos   iconv_t conv_to_utf8 = (iconv_t)(-1);
261*946379e7Schristos   iconv_t conv_from_utf8 = (iconv_t)(-1);
262*946379e7Schristos   char *utf8_line;
263*946379e7Schristos   size_t utf8_line_len;
264*946379e7Schristos   char *backconv_line;
265*946379e7Schristos   size_t backconv_line_len;
266*946379e7Schristos #endif
267*946379e7Schristos 
268*946379e7Schristos   init_linebuffer (&lb);
269*946379e7Schristos 
270*946379e7Schristos   /* Initialize the conversion descriptors.  */
271*946379e7Schristos   if (need_code_conversion)
272*946379e7Schristos     {
273*946379e7Schristos #if HAVE_ICONV
274*946379e7Schristos       /* Avoid glibc-2.1 bug with EUC-KR.  */
275*946379e7Schristos # if (__GLIBC__ - 0 == 2 && __GLIBC_MINOR__ - 0 <= 1) && !defined _LIBICONV_VERSION
276*946379e7Schristos       if (strcmp (locale_code, "EUC-KR") != 0)
277*946379e7Schristos # endif
278*946379e7Schristos 	{
279*946379e7Schristos 	  conv_to_utf8 = iconv_open ("UTF-8", locale_code);
280*946379e7Schristos 	  /* TODO:  Maybe append //TRANSLIT here?  */
281*946379e7Schristos 	  conv_from_utf8 = iconv_open (locale_code, "UTF-8");
282*946379e7Schristos 	}
283*946379e7Schristos       if (conv_to_utf8 == (iconv_t)(-1))
284*946379e7Schristos 	error (EXIT_FAILURE, 0, _("\
285*946379e7Schristos Cannot convert from \"%s\" to \"%s\". %s relies on iconv(), \
286*946379e7Schristos and iconv() does not support this conversion."),
287*946379e7Schristos 	       locale_code, "UTF-8", basename (program_name));
288*946379e7Schristos       if (conv_from_utf8 == (iconv_t)(-1))
289*946379e7Schristos 	error (EXIT_FAILURE, 0, _("\
290*946379e7Schristos Cannot convert from \"%s\" to \"%s\". %s relies on iconv(), \
291*946379e7Schristos and iconv() does not support this conversion."),
292*946379e7Schristos 	       "UTF-8", locale_code, basename (program_name));
293*946379e7Schristos       utf8_line = NULL;
294*946379e7Schristos       utf8_line_len = 0;
295*946379e7Schristos       backconv_line = NULL;
296*946379e7Schristos       backconv_line_len = 0;
297*946379e7Schristos #else
298*946379e7Schristos       error (EXIT_FAILURE, 0, _("\
299*946379e7Schristos Cannot convert from \"%s\" to \"%s\". %s relies on iconv(). \
300*946379e7Schristos This version was built without iconv()."),
301*946379e7Schristos 	     locale_code, "UTF-8", basename (program_name));
302*946379e7Schristos #endif
303*946379e7Schristos     }
304*946379e7Schristos 
305*946379e7Schristos   /* Read the input line by line.
306*946379e7Schristos      Processing it character by character is not possible, because some
307*946379e7Schristos      filters need to look at adjacent characters.  Processing the entire file
308*946379e7Schristos      in a whole chunk would take an excessive amount of memory.  */
309*946379e7Schristos   for (;;)
310*946379e7Schristos     {
311*946379e7Schristos       char *line;
312*946379e7Schristos       size_t line_len;
313*946379e7Schristos       char *filtered_line;
314*946379e7Schristos       size_t filtered_line_len;
315*946379e7Schristos 
316*946379e7Schristos       /* Read a line.  */
317*946379e7Schristos       if (read_linebuffer (&lb, stream) == NULL)
318*946379e7Schristos 	break;
319*946379e7Schristos       line = lb.buffer;
320*946379e7Schristos       line_len = lb.length;
321*946379e7Schristos       /* read_linebuffer always returns a non-void result.  */
322*946379e7Schristos       if (line_len == 0)
323*946379e7Schristos 	abort ();
324*946379e7Schristos 
325*946379e7Schristos #if HAVE_ICONV
326*946379e7Schristos       /* Convert it to UTF-8.  */
327*946379e7Schristos       if (need_code_conversion)
328*946379e7Schristos 	{
329*946379e7Schristos 	  if (xmem_cd_iconv (line, line_len, conv_to_utf8,
330*946379e7Schristos 			     &utf8_line, &utf8_line_len) != 0)
331*946379e7Schristos 	    error (EXIT_FAILURE, errno,
332*946379e7Schristos 		   _("input is not valid in \"%s\" encoding"),
333*946379e7Schristos 		   locale_code);
334*946379e7Schristos 	  line = utf8_line;
335*946379e7Schristos 	  line_len = utf8_line_len;
336*946379e7Schristos 	}
337*946379e7Schristos #endif
338*946379e7Schristos 
339*946379e7Schristos       /* Apply the filter.  */
340*946379e7Schristos       serbian_to_latin (line, line_len, &filtered_line, &filtered_line_len);
341*946379e7Schristos 
342*946379e7Schristos #if HAVE_ICONV
343*946379e7Schristos       /* Convert it back to the original encoding.  */
344*946379e7Schristos       if (need_code_conversion)
345*946379e7Schristos 	{
346*946379e7Schristos 	  if (xmem_cd_iconv (filtered_line, filtered_line_len, conv_from_utf8,
347*946379e7Schristos 			     &backconv_line, &backconv_line_len) != 0)
348*946379e7Schristos 	    error (EXIT_FAILURE, errno,
349*946379e7Schristos 		   _("error while converting from \"%s\" encoding to \"%s\" encoding"),
350*946379e7Schristos 		   "UTF-8", locale_code);
351*946379e7Schristos 	  fwrite (backconv_line, 1, backconv_line_len, stdout);
352*946379e7Schristos 	}
353*946379e7Schristos       else
354*946379e7Schristos #endif
355*946379e7Schristos 	fwrite (filtered_line, 1, filtered_line_len, stdout);
356*946379e7Schristos 
357*946379e7Schristos       free (filtered_line);
358*946379e7Schristos     }
359*946379e7Schristos 
360*946379e7Schristos #if HAVE_ICONV
361*946379e7Schristos   if (need_code_conversion)
362*946379e7Schristos     {
363*946379e7Schristos       iconv_close (conv_from_utf8);
364*946379e7Schristos       iconv_close (conv_to_utf8);
365*946379e7Schristos     }
366*946379e7Schristos #endif
367*946379e7Schristos 
368*946379e7Schristos   destroy_linebuffer (&lb);
369*946379e7Schristos }
370