xref: /netbsd-src/external/gpl2/gettext/dist/gettext-tools/src/recode-sr-latin.c (revision 946379e7b37692fc43f68eb0d1c10daa0a7f3b6c)
1 /* Recode Serbian text from Cyrillic to Latin script.
2    Copyright (C) 2006 Free Software Foundation, Inc.
3    Written by Bruno Haible <bruno@clisp.org>, 2006.
4 
5    This program is free software; you can redistribute it and/or modify
6    it under the terms of the GNU General Public License as published by
7    the Free Software Foundation; either version 2, or (at your option)
8    any later version.
9 
10    This program is distributed in the hope that it will be useful,
11    but WITHOUT ANY WARRANTY; without even the implied warranty of
12    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13    GNU General Public License for more details.
14 
15    You should have received a copy of the GNU General Public License
16    along with this program; if not, write to the Free Software Foundation,
17    Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.  */
18 
19 #ifdef HAVE_CONFIG_H
20 # include "config.h"
21 #endif
22 
23 #include <errno.h>
24 #include <getopt.h>
25 #include <stdbool.h>
26 #include <stdio.h>
27 #include <stdlib.h>
28 #include <locale.h>
29 
30 #if HAVE_ICONV
31 #include <iconv.h>
32 #endif
33 
34 #include "closeout.h"
35 #include "error.h"
36 #include "progname.h"
37 #include "relocatable.h"
38 #include "basename.h"
39 #include "xalloc.h"
40 #include "exit.h"
41 #include "localcharset.h"
42 #include "c-strcase.h"
43 #include "xstriconv.h"
44 #include "filters.h"
45 #include "propername.h"
46 #include "gettext.h"
47 
48 #define _(str) gettext (str)
49 
50 
51 /* Long options.  */
52 static const struct option long_options[] =
53 {
54   { "help", no_argument, NULL, 'h' },
55   { "version", no_argument, NULL, 'V' },
56   { NULL, 0, NULL, 0 }
57 };
58 
59 /* Forward declaration of local functions.  */
60 static void usage (int status)
61 #if defined __GNUC__ && ((__GNUC__ == 2 && __GNUC_MINOR__ >= 5) || __GNUC__ > 2)
62      __attribute__ ((noreturn))
63 #endif
64 ;
65 static void process (FILE *stream);
66 
67 int
main(int argc,char * argv[])68 main (int argc, char *argv[])
69 {
70   /* Default values for command line options.  */
71   bool do_help = false;
72   bool do_version = false;
73 
74   int opt;
75 
76   /* Set program name for message texts.  */
77   set_program_name (argv[0]);
78 
79 #ifdef HAVE_SETLOCALE
80   /* Set locale via LC_ALL.  */
81   setlocale (LC_ALL, "");
82 #endif
83 
84   /* Set the text message domain.  */
85   bindtextdomain (PACKAGE, relocate (LOCALEDIR));
86   textdomain (PACKAGE);
87 
88   /* Ensure that write errors on stdout are detected.  */
89   atexit (close_stdout);
90 
91   /* Parse command line options.  */
92   while ((opt = getopt_long (argc, argv, "hV", long_options, NULL)) != EOF)
93     switch (opt)
94     {
95     case '\0':		/* Long option.  */
96       break;
97     case 'h':
98       do_help = true;
99       break;
100     case 'V':
101       do_version = true;
102       break;
103     default:
104       usage (EXIT_FAILURE);
105     }
106 
107   /* Version information is requested.  */
108   if (do_version)
109     {
110       printf ("%s (GNU %s) %s\n", basename (program_name), PACKAGE, VERSION);
111       /* xgettext: no-wrap */
112       printf (_("Copyright (C) %s Free Software Foundation, Inc.\n\
113 This is free software; see the source for copying conditions.  There is NO\n\
114 warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.\n\
115 "),
116 	      "2006");
117       printf (_("Written by %s and %s.\n"),
118 	      /* TRANSLATORS: This is a proper name. The last name is
119 		 (with Unicode escapes) "\u0160egan" or (with HTML entities)
120 		 "&Scaron;egan".  */
121 	      proper_name_utf8 ("Danilo Segan", "Danilo \305\240egan"),
122 	      proper_name ("Bruno Haible"));
123       exit (EXIT_SUCCESS);
124     }
125 
126   /* Help is requested.  */
127   if (do_help)
128     usage (EXIT_SUCCESS);
129 
130   if (argc - optind > 0)
131     error (EXIT_FAILURE, 0, _("too many arguments"));
132 
133   process (stdin);
134 
135   exit (EXIT_SUCCESS);
136 }
137 
138 
139 /* Display usage information and exit.  */
140 static void
usage(int status)141 usage (int status)
142 {
143   if (status != EXIT_SUCCESS)
144     fprintf (stderr, _("Try `%s --help' for more information.\n"),
145 	     program_name);
146   else
147     {
148       /* xgettext: no-wrap */
149       printf (_("\
150 Usage: %s [OPTION]\n\
151 "), program_name);
152       printf ("\n");
153       /* xgettext: no-wrap */
154       printf (_("\
155 Recode Serbian text from Cyrillic to Latin script.\n"));
156       /* xgettext: no-wrap */
157       printf (_("\
158 The input text is read from standard input.  The converted text is output to\n\
159 standard output.\n"));
160       printf ("\n");
161       /* xgettext: no-wrap */
162       printf (_("\
163 Informative output:\n"));
164       /* xgettext: no-wrap */
165       printf (_("\
166   -h, --help                  display this help and exit\n"));
167       /* xgettext: no-wrap */
168       printf (_("\
169   -V, --version               output version information and exit\n"));
170       printf ("\n");
171       fputs (_("Report bugs to <bug-gnu-gettext@gnu.org>.\n"), stdout);
172     }
173 
174   exit (status);
175 }
176 
177 
178 /* Routines for reading a line.
179    Don't use routines that drop NUL bytes.  Don't use getline(), because it
180    doesn't provide a good error message in case of memory allocation failure.
181    The gnulib module 'linebuffer' is nearly the right thing, except that we
182    don't want an extra newline at the end of file.  */
183 
184 /* A 'struct linebuffer' holds a line of text. */
185 
186 struct linebuffer
187 {
188   size_t size;			/* Allocated. */
189   size_t length;		/* Used. */
190   char *buffer;
191 };
192 
193 /* Initialize linebuffer LINEBUFFER for use. */
194 static inline void
init_linebuffer(struct linebuffer * lb)195 init_linebuffer (struct linebuffer *lb)
196 {
197   lb->size = 0;
198   lb->length = 0;
199   lb->buffer = NULL;
200 }
201 
202 /* Read an arbitrarily long line of text from STREAM into linebuffer LB.
203    Keep the newline.  Do not NUL terminate.
204    Return LINEBUFFER, except at end of file return NULL.  */
205 static struct linebuffer *
read_linebuffer(struct linebuffer * lb,FILE * stream)206 read_linebuffer (struct linebuffer *lb, FILE *stream)
207 {
208   if (feof (stream))
209     return NULL;
210   else
211     {
212       char *p = lb->buffer;
213       char *end = lb->buffer + lb->size;
214 
215       for (;;)
216 	{
217 	  int c = getc (stream);
218 	  if (c == EOF)
219 	    {
220 	      if (p == lb->buffer || ferror (stream))
221 		return NULL;
222 	      break;
223 	    }
224 	  if (p == end)
225 	    {
226 	      size_t oldsize = lb->size; /* = p - lb->buffer */
227 	      size_t newsize = 2 * oldsize + 40;
228 	      lb->buffer = (char *) xrealloc (lb->buffer, newsize);
229 	      lb->size = newsize;
230 	      p = lb->buffer + oldsize;
231 	      end = lb->buffer + newsize;
232 	    }
233 	  *p++ = c;
234 	  if (c == '\n')
235 	    break;
236 	}
237 
238       lb->length = p - lb->buffer;
239       return lb;
240     }
241 }
242 
243 /* Free linebuffer LB and its data, all allocated with malloc. */
244 static inline void
destroy_linebuffer(struct linebuffer * lb)245 destroy_linebuffer (struct linebuffer *lb)
246 {
247   if (lb->buffer != NULL)
248     free (lb->buffer);
249 }
250 
251 
252 /* Process the input and produce the output.  */
253 static void
process(FILE * stream)254 process (FILE *stream)
255 {
256   struct linebuffer lb;
257   const char *locale_code = locale_charset ();
258   bool need_code_conversion = (c_strcasecmp (locale_code, "UTF-8") != 0);
259 #if HAVE_ICONV
260   iconv_t conv_to_utf8 = (iconv_t)(-1);
261   iconv_t conv_from_utf8 = (iconv_t)(-1);
262   char *utf8_line;
263   size_t utf8_line_len;
264   char *backconv_line;
265   size_t backconv_line_len;
266 #endif
267 
268   init_linebuffer (&lb);
269 
270   /* Initialize the conversion descriptors.  */
271   if (need_code_conversion)
272     {
273 #if HAVE_ICONV
274       /* Avoid glibc-2.1 bug with EUC-KR.  */
275 # if (__GLIBC__ - 0 == 2 && __GLIBC_MINOR__ - 0 <= 1) && !defined _LIBICONV_VERSION
276       if (strcmp (locale_code, "EUC-KR") != 0)
277 # endif
278 	{
279 	  conv_to_utf8 = iconv_open ("UTF-8", locale_code);
280 	  /* TODO:  Maybe append //TRANSLIT here?  */
281 	  conv_from_utf8 = iconv_open (locale_code, "UTF-8");
282 	}
283       if (conv_to_utf8 == (iconv_t)(-1))
284 	error (EXIT_FAILURE, 0, _("\
285 Cannot convert from \"%s\" to \"%s\". %s relies on iconv(), \
286 and iconv() does not support this conversion."),
287 	       locale_code, "UTF-8", basename (program_name));
288       if (conv_from_utf8 == (iconv_t)(-1))
289 	error (EXIT_FAILURE, 0, _("\
290 Cannot convert from \"%s\" to \"%s\". %s relies on iconv(), \
291 and iconv() does not support this conversion."),
292 	       "UTF-8", locale_code, basename (program_name));
293       utf8_line = NULL;
294       utf8_line_len = 0;
295       backconv_line = NULL;
296       backconv_line_len = 0;
297 #else
298       error (EXIT_FAILURE, 0, _("\
299 Cannot convert from \"%s\" to \"%s\". %s relies on iconv(). \
300 This version was built without iconv()."),
301 	     locale_code, "UTF-8", basename (program_name));
302 #endif
303     }
304 
305   /* Read the input line by line.
306      Processing it character by character is not possible, because some
307      filters need to look at adjacent characters.  Processing the entire file
308      in a whole chunk would take an excessive amount of memory.  */
309   for (;;)
310     {
311       char *line;
312       size_t line_len;
313       char *filtered_line;
314       size_t filtered_line_len;
315 
316       /* Read a line.  */
317       if (read_linebuffer (&lb, stream) == NULL)
318 	break;
319       line = lb.buffer;
320       line_len = lb.length;
321       /* read_linebuffer always returns a non-void result.  */
322       if (line_len == 0)
323 	abort ();
324 
325 #if HAVE_ICONV
326       /* Convert it to UTF-8.  */
327       if (need_code_conversion)
328 	{
329 	  if (xmem_cd_iconv (line, line_len, conv_to_utf8,
330 			     &utf8_line, &utf8_line_len) != 0)
331 	    error (EXIT_FAILURE, errno,
332 		   _("input is not valid in \"%s\" encoding"),
333 		   locale_code);
334 	  line = utf8_line;
335 	  line_len = utf8_line_len;
336 	}
337 #endif
338 
339       /* Apply the filter.  */
340       serbian_to_latin (line, line_len, &filtered_line, &filtered_line_len);
341 
342 #if HAVE_ICONV
343       /* Convert it back to the original encoding.  */
344       if (need_code_conversion)
345 	{
346 	  if (xmem_cd_iconv (filtered_line, filtered_line_len, conv_from_utf8,
347 			     &backconv_line, &backconv_line_len) != 0)
348 	    error (EXIT_FAILURE, errno,
349 		   _("error while converting from \"%s\" encoding to \"%s\" encoding"),
350 		   "UTF-8", locale_code);
351 	  fwrite (backconv_line, 1, backconv_line_len, stdout);
352 	}
353       else
354 #endif
355 	fwrite (filtered_line, 1, filtered_line_len, stdout);
356 
357       free (filtered_line);
358     }
359 
360 #if HAVE_ICONV
361   if (need_code_conversion)
362     {
363       iconv_close (conv_from_utf8);
364       iconv_close (conv_to_utf8);
365     }
366 #endif
367 
368   destroy_linebuffer (&lb);
369 }
370