1 /* Recode Serbian text from Cyrillic to Latin script.
2 Copyright (C) 2006 Free Software Foundation, Inc.
3 Written by Bruno Haible <bruno@clisp.org>, 2006.
4
5 This program is free software; you can redistribute it and/or modify
6 it under the terms of the GNU General Public License as published by
7 the Free Software Foundation; either version 2, or (at your option)
8 any later version.
9
10 This program is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 GNU General Public License for more details.
14
15 You should have received a copy of the GNU General Public License
16 along with this program; if not, write to the Free Software Foundation,
17 Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */
18
19 #ifdef HAVE_CONFIG_H
20 # include "config.h"
21 #endif
22
23 #include <errno.h>
24 #include <getopt.h>
25 #include <stdbool.h>
26 #include <stdio.h>
27 #include <stdlib.h>
28 #include <locale.h>
29
30 #if HAVE_ICONV
31 #include <iconv.h>
32 #endif
33
34 #include "closeout.h"
35 #include "error.h"
36 #include "progname.h"
37 #include "relocatable.h"
38 #include "basename.h"
39 #include "xalloc.h"
40 #include "exit.h"
41 #include "localcharset.h"
42 #include "c-strcase.h"
43 #include "xstriconv.h"
44 #include "filters.h"
45 #include "propername.h"
46 #include "gettext.h"
47
48 #define _(str) gettext (str)
49
50
51 /* Long options. */
52 static const struct option long_options[] =
53 {
54 { "help", no_argument, NULL, 'h' },
55 { "version", no_argument, NULL, 'V' },
56 { NULL, 0, NULL, 0 }
57 };
58
59 /* Forward declaration of local functions. */
60 static void usage (int status)
61 #if defined __GNUC__ && ((__GNUC__ == 2 && __GNUC_MINOR__ >= 5) || __GNUC__ > 2)
62 __attribute__ ((noreturn))
63 #endif
64 ;
65 static void process (FILE *stream);
66
67 int
main(int argc,char * argv[])68 main (int argc, char *argv[])
69 {
70 /* Default values for command line options. */
71 bool do_help = false;
72 bool do_version = false;
73
74 int opt;
75
76 /* Set program name for message texts. */
77 set_program_name (argv[0]);
78
79 #ifdef HAVE_SETLOCALE
80 /* Set locale via LC_ALL. */
81 setlocale (LC_ALL, "");
82 #endif
83
84 /* Set the text message domain. */
85 bindtextdomain (PACKAGE, relocate (LOCALEDIR));
86 textdomain (PACKAGE);
87
88 /* Ensure that write errors on stdout are detected. */
89 atexit (close_stdout);
90
91 /* Parse command line options. */
92 while ((opt = getopt_long (argc, argv, "hV", long_options, NULL)) != EOF)
93 switch (opt)
94 {
95 case '\0': /* Long option. */
96 break;
97 case 'h':
98 do_help = true;
99 break;
100 case 'V':
101 do_version = true;
102 break;
103 default:
104 usage (EXIT_FAILURE);
105 }
106
107 /* Version information is requested. */
108 if (do_version)
109 {
110 printf ("%s (GNU %s) %s\n", basename (program_name), PACKAGE, VERSION);
111 /* xgettext: no-wrap */
112 printf (_("Copyright (C) %s Free Software Foundation, Inc.\n\
113 This is free software; see the source for copying conditions. There is NO\n\
114 warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.\n\
115 "),
116 "2006");
117 printf (_("Written by %s and %s.\n"),
118 /* TRANSLATORS: This is a proper name. The last name is
119 (with Unicode escapes) "\u0160egan" or (with HTML entities)
120 "Šegan". */
121 proper_name_utf8 ("Danilo Segan", "Danilo \305\240egan"),
122 proper_name ("Bruno Haible"));
123 exit (EXIT_SUCCESS);
124 }
125
126 /* Help is requested. */
127 if (do_help)
128 usage (EXIT_SUCCESS);
129
130 if (argc - optind > 0)
131 error (EXIT_FAILURE, 0, _("too many arguments"));
132
133 process (stdin);
134
135 exit (EXIT_SUCCESS);
136 }
137
138
139 /* Display usage information and exit. */
140 static void
usage(int status)141 usage (int status)
142 {
143 if (status != EXIT_SUCCESS)
144 fprintf (stderr, _("Try `%s --help' for more information.\n"),
145 program_name);
146 else
147 {
148 /* xgettext: no-wrap */
149 printf (_("\
150 Usage: %s [OPTION]\n\
151 "), program_name);
152 printf ("\n");
153 /* xgettext: no-wrap */
154 printf (_("\
155 Recode Serbian text from Cyrillic to Latin script.\n"));
156 /* xgettext: no-wrap */
157 printf (_("\
158 The input text is read from standard input. The converted text is output to\n\
159 standard output.\n"));
160 printf ("\n");
161 /* xgettext: no-wrap */
162 printf (_("\
163 Informative output:\n"));
164 /* xgettext: no-wrap */
165 printf (_("\
166 -h, --help display this help and exit\n"));
167 /* xgettext: no-wrap */
168 printf (_("\
169 -V, --version output version information and exit\n"));
170 printf ("\n");
171 fputs (_("Report bugs to <bug-gnu-gettext@gnu.org>.\n"), stdout);
172 }
173
174 exit (status);
175 }
176
177
178 /* Routines for reading a line.
179 Don't use routines that drop NUL bytes. Don't use getline(), because it
180 doesn't provide a good error message in case of memory allocation failure.
181 The gnulib module 'linebuffer' is nearly the right thing, except that we
182 don't want an extra newline at the end of file. */
183
184 /* A 'struct linebuffer' holds a line of text. */
185
186 struct linebuffer
187 {
188 size_t size; /* Allocated. */
189 size_t length; /* Used. */
190 char *buffer;
191 };
192
193 /* Initialize linebuffer LINEBUFFER for use. */
194 static inline void
init_linebuffer(struct linebuffer * lb)195 init_linebuffer (struct linebuffer *lb)
196 {
197 lb->size = 0;
198 lb->length = 0;
199 lb->buffer = NULL;
200 }
201
202 /* Read an arbitrarily long line of text from STREAM into linebuffer LB.
203 Keep the newline. Do not NUL terminate.
204 Return LINEBUFFER, except at end of file return NULL. */
205 static struct linebuffer *
read_linebuffer(struct linebuffer * lb,FILE * stream)206 read_linebuffer (struct linebuffer *lb, FILE *stream)
207 {
208 if (feof (stream))
209 return NULL;
210 else
211 {
212 char *p = lb->buffer;
213 char *end = lb->buffer + lb->size;
214
215 for (;;)
216 {
217 int c = getc (stream);
218 if (c == EOF)
219 {
220 if (p == lb->buffer || ferror (stream))
221 return NULL;
222 break;
223 }
224 if (p == end)
225 {
226 size_t oldsize = lb->size; /* = p - lb->buffer */
227 size_t newsize = 2 * oldsize + 40;
228 lb->buffer = (char *) xrealloc (lb->buffer, newsize);
229 lb->size = newsize;
230 p = lb->buffer + oldsize;
231 end = lb->buffer + newsize;
232 }
233 *p++ = c;
234 if (c == '\n')
235 break;
236 }
237
238 lb->length = p - lb->buffer;
239 return lb;
240 }
241 }
242
243 /* Free linebuffer LB and its data, all allocated with malloc. */
244 static inline void
destroy_linebuffer(struct linebuffer * lb)245 destroy_linebuffer (struct linebuffer *lb)
246 {
247 if (lb->buffer != NULL)
248 free (lb->buffer);
249 }
250
251
252 /* Process the input and produce the output. */
253 static void
process(FILE * stream)254 process (FILE *stream)
255 {
256 struct linebuffer lb;
257 const char *locale_code = locale_charset ();
258 bool need_code_conversion = (c_strcasecmp (locale_code, "UTF-8") != 0);
259 #if HAVE_ICONV
260 iconv_t conv_to_utf8 = (iconv_t)(-1);
261 iconv_t conv_from_utf8 = (iconv_t)(-1);
262 char *utf8_line;
263 size_t utf8_line_len;
264 char *backconv_line;
265 size_t backconv_line_len;
266 #endif
267
268 init_linebuffer (&lb);
269
270 /* Initialize the conversion descriptors. */
271 if (need_code_conversion)
272 {
273 #if HAVE_ICONV
274 /* Avoid glibc-2.1 bug with EUC-KR. */
275 # if (__GLIBC__ - 0 == 2 && __GLIBC_MINOR__ - 0 <= 1) && !defined _LIBICONV_VERSION
276 if (strcmp (locale_code, "EUC-KR") != 0)
277 # endif
278 {
279 conv_to_utf8 = iconv_open ("UTF-8", locale_code);
280 /* TODO: Maybe append //TRANSLIT here? */
281 conv_from_utf8 = iconv_open (locale_code, "UTF-8");
282 }
283 if (conv_to_utf8 == (iconv_t)(-1))
284 error (EXIT_FAILURE, 0, _("\
285 Cannot convert from \"%s\" to \"%s\". %s relies on iconv(), \
286 and iconv() does not support this conversion."),
287 locale_code, "UTF-8", basename (program_name));
288 if (conv_from_utf8 == (iconv_t)(-1))
289 error (EXIT_FAILURE, 0, _("\
290 Cannot convert from \"%s\" to \"%s\". %s relies on iconv(), \
291 and iconv() does not support this conversion."),
292 "UTF-8", locale_code, basename (program_name));
293 utf8_line = NULL;
294 utf8_line_len = 0;
295 backconv_line = NULL;
296 backconv_line_len = 0;
297 #else
298 error (EXIT_FAILURE, 0, _("\
299 Cannot convert from \"%s\" to \"%s\". %s relies on iconv(). \
300 This version was built without iconv()."),
301 locale_code, "UTF-8", basename (program_name));
302 #endif
303 }
304
305 /* Read the input line by line.
306 Processing it character by character is not possible, because some
307 filters need to look at adjacent characters. Processing the entire file
308 in a whole chunk would take an excessive amount of memory. */
309 for (;;)
310 {
311 char *line;
312 size_t line_len;
313 char *filtered_line;
314 size_t filtered_line_len;
315
316 /* Read a line. */
317 if (read_linebuffer (&lb, stream) == NULL)
318 break;
319 line = lb.buffer;
320 line_len = lb.length;
321 /* read_linebuffer always returns a non-void result. */
322 if (line_len == 0)
323 abort ();
324
325 #if HAVE_ICONV
326 /* Convert it to UTF-8. */
327 if (need_code_conversion)
328 {
329 if (xmem_cd_iconv (line, line_len, conv_to_utf8,
330 &utf8_line, &utf8_line_len) != 0)
331 error (EXIT_FAILURE, errno,
332 _("input is not valid in \"%s\" encoding"),
333 locale_code);
334 line = utf8_line;
335 line_len = utf8_line_len;
336 }
337 #endif
338
339 /* Apply the filter. */
340 serbian_to_latin (line, line_len, &filtered_line, &filtered_line_len);
341
342 #if HAVE_ICONV
343 /* Convert it back to the original encoding. */
344 if (need_code_conversion)
345 {
346 if (xmem_cd_iconv (filtered_line, filtered_line_len, conv_from_utf8,
347 &backconv_line, &backconv_line_len) != 0)
348 error (EXIT_FAILURE, errno,
349 _("error while converting from \"%s\" encoding to \"%s\" encoding"),
350 "UTF-8", locale_code);
351 fwrite (backconv_line, 1, backconv_line_len, stdout);
352 }
353 else
354 #endif
355 fwrite (filtered_line, 1, filtered_line_len, stdout);
356
357 free (filtered_line);
358 }
359
360 #if HAVE_ICONV
361 if (need_code_conversion)
362 {
363 iconv_close (conv_from_utf8);
364 iconv_close (conv_to_utf8);
365 }
366 #endif
367
368 destroy_linebuffer (&lb);
369 }
370