1 /* Reading binary .mo files.
2 Copyright (C) 1995-1998, 2000-2006 Free Software Foundation, Inc.
3 Written by Ulrich Drepper <drepper@gnu.ai.mit.edu>, April 1995.
4
5 This program is free software; you can redistribute it and/or modify
6 it under the terms of the GNU General Public License as published by
7 the Free Software Foundation; either version 2, or (at your option)
8 any later version.
9
10 This program is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 GNU General Public License for more details.
14
15 You should have received a copy of the GNU General Public License
16 along with this program; if not, write to the Free Software Foundation,
17 Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */
18
19 #ifdef HAVE_CONFIG_H
20 # include <config.h>
21 #endif
22
23 /* Specification. */
24 #include "read-mo.h"
25
26 #include <errno.h>
27 #include <stdbool.h>
28 #include <stdio.h>
29 #include <stddef.h>
30 #include <stdlib.h>
31 #include <string.h>
32
33 /* This include file describes the main part of binary .mo format. */
34 #include "gmo.h"
35
36 #include "error.h"
37 #include "xalloc.h"
38 #include "binary-io.h"
39 #include "exit.h"
40 #include "message.h"
41 #include "format.h"
42 #include "gettext.h"
43
44 #define _(str) gettext (str)
45
46
47 /* We read the file completely into memory. This is more efficient than
48 lots of lseek(). This struct represents the .mo file in memory. */
49 struct binary_mo_file
50 {
51 const char *filename;
52 char *data;
53 size_t size;
54 enum { MO_LITTLE_ENDIAN, MO_BIG_ENDIAN } endian;
55 };
56
57
58 /* Read the contents of the given input stream. */
59 static void
read_binary_mo_file(struct binary_mo_file * bfp,FILE * fp,const char * filename)60 read_binary_mo_file (struct binary_mo_file *bfp,
61 FILE *fp, const char *filename)
62 {
63 char *buf = NULL;
64 size_t alloc = 0;
65 size_t size = 0;
66 size_t count;
67
68 while (!feof (fp))
69 {
70 const size_t increment = 4096;
71 if (size + increment > alloc)
72 {
73 alloc = alloc + alloc / 2;
74 if (alloc < size + increment)
75 alloc = size + increment;
76 buf = (char *) xrealloc (buf, alloc);
77 }
78 count = fread (buf + size, 1, increment, fp);
79 if (count == 0)
80 {
81 if (ferror (fp))
82 error (EXIT_FAILURE, errno, _("error while reading \"%s\""),
83 filename);
84 }
85 else
86 size += count;
87 }
88 buf = (char *) xrealloc (buf, size);
89 bfp->filename = filename;
90 bfp->data = buf;
91 bfp->size = size;
92 }
93
94 /* Get a 32-bit number from the file, at the given file position. */
95 static nls_uint32
get_uint32(const struct binary_mo_file * bfp,size_t offset)96 get_uint32 (const struct binary_mo_file *bfp, size_t offset)
97 {
98 nls_uint32 b0, b1, b2, b3;
99
100 if (offset + 4 > bfp->size)
101 error (EXIT_FAILURE, 0, _("file \"%s\" is truncated"), bfp->filename);
102
103 b0 = *(unsigned char *) (bfp->data + offset + 0);
104 b1 = *(unsigned char *) (bfp->data + offset + 1);
105 b2 = *(unsigned char *) (bfp->data + offset + 2);
106 b3 = *(unsigned char *) (bfp->data + offset + 3);
107 if (bfp->endian == MO_LITTLE_ENDIAN)
108 return b0 | (b1 << 8) | (b2 << 16) | (b3 << 24);
109 else
110 return (b0 << 24) | (b1 << 16) | (b2 << 8) | b3;
111 }
112
113 /* Get a static string from the file, at the given file position. */
114 static char *
get_string(const struct binary_mo_file * bfp,size_t offset,size_t * lengthp)115 get_string (const struct binary_mo_file *bfp, size_t offset, size_t *lengthp)
116 {
117 /* See 'struct string_desc'. */
118 nls_uint32 s_length = get_uint32 (bfp, offset);
119 nls_uint32 s_offset = get_uint32 (bfp, offset + 4);
120
121 if (s_offset + s_length + 1 > bfp->size)
122 error (EXIT_FAILURE, 0, _("file \"%s\" is truncated"), bfp->filename);
123 if (bfp->data[s_offset + s_length] != '\0')
124 error (EXIT_FAILURE, 0,
125 _("file \"%s\" contains a not NUL terminated string"),
126 bfp->filename);
127
128 *lengthp = s_length + 1;
129 return bfp->data + s_offset;
130 }
131
132 /* Get a system dependent string from the file, at the given file position. */
133 static char *
get_sysdep_string(const struct binary_mo_file * bfp,size_t offset,const struct mo_file_header * header,size_t * lengthp)134 get_sysdep_string (const struct binary_mo_file *bfp, size_t offset,
135 const struct mo_file_header *header, size_t *lengthp)
136 {
137 /* See 'struct sysdep_string'. */
138 size_t length;
139 char *string;
140 size_t i;
141 char *p;
142 nls_uint32 s_offset;
143
144 /* Compute the length. */
145 length = 0;
146 for (i = 4; ; i += 8)
147 {
148 nls_uint32 segsize = get_uint32 (bfp, offset + i);
149 nls_uint32 sysdepref = get_uint32 (bfp, offset + i + 4);
150 nls_uint32 sysdep_segment_offset;
151 nls_uint32 ss_length;
152 nls_uint32 ss_offset;
153 size_t n;
154
155 length += segsize;
156
157 if (sysdepref == SEGMENTS_END)
158 break;
159 if (sysdepref >= header->n_sysdep_segments)
160 /* Invalid. */
161 error (EXIT_FAILURE, 0, _("file \"%s\" is not in GNU .mo format"),
162 bfp->filename);
163 /* See 'struct sysdep_segment'. */
164 sysdep_segment_offset = header->sysdep_segments_offset + sysdepref * 8;
165 ss_length = get_uint32 (bfp, sysdep_segment_offset);
166 ss_offset = get_uint32 (bfp, sysdep_segment_offset + 4);
167 if (ss_offset + ss_length > bfp->size)
168 error (EXIT_FAILURE, 0, _("file \"%s\" is truncated"), bfp->filename);
169 if (!(ss_length > 0 && bfp->data[ss_offset + ss_length - 1] == '\0'))
170 {
171 char location[30];
172 sprintf (location, "sysdep_segment[%u]", (unsigned int) sysdepref);
173 error (EXIT_FAILURE, 0,
174 _("file \"%s\" contains a not NUL terminated string, at %s"),
175 bfp->filename, location);
176 }
177 n = strlen (bfp->data + ss_offset);
178 length += (n > 1 ? 1 + n + 1 : n);
179 }
180
181 /* Allocate and fill the string. */
182 string = (char *) xmalloc (length);
183 p = string;
184 s_offset = get_uint32 (bfp, offset);
185 for (i = 4; ; i += 8)
186 {
187 nls_uint32 segsize = get_uint32 (bfp, offset + i);
188 nls_uint32 sysdepref = get_uint32 (bfp, offset + i + 4);
189 nls_uint32 sysdep_segment_offset;
190 nls_uint32 ss_length;
191 nls_uint32 ss_offset;
192 size_t n;
193
194 if (s_offset + segsize > bfp->size)
195 error (EXIT_FAILURE, 0, _("file \"%s\" is truncated"), bfp->filename);
196 memcpy (p, bfp->data + s_offset, segsize);
197 p += segsize;
198 s_offset += segsize;
199
200 if (sysdepref == SEGMENTS_END)
201 break;
202 if (sysdepref >= header->n_sysdep_segments)
203 abort ();
204 /* See 'struct sysdep_segment'. */
205 sysdep_segment_offset = header->sysdep_segments_offset + sysdepref * 8;
206 ss_length = get_uint32 (bfp, sysdep_segment_offset);
207 ss_offset = get_uint32 (bfp, sysdep_segment_offset + 4);
208 if (ss_offset + ss_length > bfp->size)
209 abort ();
210 if (!(ss_length > 0 && bfp->data[ss_offset + ss_length - 1] == '\0'))
211 abort ();
212 n = strlen (bfp->data + ss_offset);
213 if (n > 1)
214 *p++ = '<';
215 memcpy (p, bfp->data + ss_offset, n);
216 p += n;
217 if (n > 1)
218 *p++ = '>';
219 }
220
221 if (p != string + length)
222 abort ();
223
224 *lengthp = length;
225 return string;
226 }
227
228 /* Reads an existing .mo file and adds the messages to mlp. */
229 void
read_mo_file(message_list_ty * mlp,const char * filename)230 read_mo_file (message_list_ty *mlp, const char *filename)
231 {
232 FILE *fp;
233 struct binary_mo_file bf;
234 struct mo_file_header header;
235 unsigned int i;
236 static lex_pos_ty pos = { __FILE__, __LINE__ };
237
238 if (strcmp (filename, "-") == 0 || strcmp (filename, "/dev/stdin") == 0)
239 {
240 fp = stdin;
241 SET_BINARY (fileno (fp));
242 }
243 else
244 {
245 fp = fopen (filename, "rb");
246 if (fp == NULL)
247 error (EXIT_FAILURE, errno,
248 _("error while opening \"%s\" for reading"), filename);
249 }
250
251 /* Read the file contents into memory. */
252 read_binary_mo_file (&bf, fp, filename);
253
254 /* Get a 32-bit number from the file header. */
255 # define GET_HEADER_FIELD(field) \
256 get_uint32 (&bf, offsetof (struct mo_file_header, field))
257
258 /* We must grope the file to determine which endian it is.
259 Perversity of the universe tends towards maximum, so it will
260 probably not match the currently executing architecture. */
261 bf.endian = MO_BIG_ENDIAN;
262 header.magic = GET_HEADER_FIELD (magic);
263 if (header.magic != _MAGIC)
264 {
265 bf.endian = MO_LITTLE_ENDIAN;
266 header.magic = GET_HEADER_FIELD (magic);
267 if (header.magic != _MAGIC)
268 {
269 unrecognised:
270 error (EXIT_FAILURE, 0, _("file \"%s\" is not in GNU .mo format"),
271 filename);
272 }
273 }
274
275 header.revision = GET_HEADER_FIELD (revision);
276
277 /* We support only the major revisions 0 and 1. */
278 switch (header.revision >> 16)
279 {
280 case 0:
281 case 1:
282 /* Fill the header parts that apply to major revisions 0 and 1. */
283 header.nstrings = GET_HEADER_FIELD (nstrings);
284 header.orig_tab_offset = GET_HEADER_FIELD (orig_tab_offset);
285 header.trans_tab_offset = GET_HEADER_FIELD (trans_tab_offset);
286 header.hash_tab_size = GET_HEADER_FIELD (hash_tab_size);
287 header.hash_tab_offset = GET_HEADER_FIELD (hash_tab_offset);
288
289 for (i = 0; i < header.nstrings; i++)
290 {
291 message_ty *mp;
292 char *msgctxt;
293 char *msgid;
294 size_t msgid_len;
295 char *separator;
296 char *msgstr;
297 size_t msgstr_len;
298
299 /* Read the msgctxt and msgid. */
300 msgid = get_string (&bf, header.orig_tab_offset + i * 8,
301 &msgid_len);
302 /* Split into msgctxt and msgid. */
303 separator = strchr (msgid, MSGCTXT_SEPARATOR);
304 if (separator != NULL)
305 {
306 /* The part before the MSGCTXT_SEPARATOR is the msgctxt. */
307 *separator = '\0';
308 msgctxt = msgid;
309 msgid = separator + 1;
310 msgid_len -= msgid - msgctxt;
311 }
312 else
313 msgctxt = NULL;
314
315 /* Read the msgstr. */
316 msgstr = get_string (&bf, header.trans_tab_offset + i * 8,
317 &msgstr_len);
318
319 mp = message_alloc (msgctxt,
320 msgid,
321 (strlen (msgid) + 1 < msgid_len
322 ? msgid + strlen (msgid) + 1
323 : NULL),
324 msgstr, msgstr_len,
325 &pos);
326 message_list_append (mlp, mp);
327 }
328
329 switch (header.revision & 0xffff)
330 {
331 case 0:
332 break;
333 case 1:
334 default:
335 /* Fill the header parts that apply to minor revision >= 1. */
336 header.n_sysdep_segments = GET_HEADER_FIELD (n_sysdep_segments);
337 header.sysdep_segments_offset =
338 GET_HEADER_FIELD (sysdep_segments_offset);
339 header.n_sysdep_strings = GET_HEADER_FIELD (n_sysdep_strings);
340 header.orig_sysdep_tab_offset =
341 GET_HEADER_FIELD (orig_sysdep_tab_offset);
342 header.trans_sysdep_tab_offset =
343 GET_HEADER_FIELD (trans_sysdep_tab_offset);
344
345 for (i = 0; i < header.n_sysdep_strings; i++)
346 {
347 message_ty *mp;
348 char *msgctxt;
349 char *msgid;
350 size_t msgid_len;
351 char *separator;
352 char *msgstr;
353 size_t msgstr_len;
354 nls_uint32 offset;
355 size_t f;
356
357 /* Read the msgctxt and msgid. */
358 offset = get_uint32 (&bf, header.orig_sysdep_tab_offset + i * 4);
359 msgid = get_sysdep_string (&bf, offset, &header, &msgid_len);
360 /* Split into msgctxt and msgid. */
361 separator = strchr (msgid, MSGCTXT_SEPARATOR);
362 if (separator != NULL)
363 {
364 /* The part before the MSGCTXT_SEPARATOR is the msgctxt. */
365 *separator = '\0';
366 msgctxt = msgid;
367 msgid = separator + 1;
368 msgid_len -= msgid - msgctxt;
369 }
370 else
371 msgctxt = NULL;
372
373 /* Read the msgstr. */
374 offset = get_uint32 (&bf, header.trans_sysdep_tab_offset + i * 4);
375 msgstr = get_sysdep_string (&bf, offset, &header, &msgstr_len);
376
377 mp = message_alloc (msgctxt,
378 msgid,
379 (strlen (msgid) + 1 < msgid_len
380 ? msgid + strlen (msgid) + 1
381 : NULL),
382 msgstr, msgstr_len,
383 &pos);
384
385 /* Only messages with c-format or objc-format annotation are
386 recognized as having system-dependent strings by msgfmt.
387 Which one of the two, we don't know. We have to guess,
388 assuming that c-format is more probable than objc-format and
389 that the .mo was likely produced by "msgfmt -c". */
390 for (f = format_c; ; f = format_objc)
391 {
392 bool valid = true;
393 struct formatstring_parser *parser = formatstring_parsers[f];
394 const char *str_end;
395 const char *str;
396
397 str_end = msgid + msgid_len;
398 for (str = msgid; str < str_end; str += strlen (str) + 1)
399 {
400 char *invalid_reason = NULL;
401 void *descr = parser->parse (str, false, &invalid_reason);
402
403 if (descr != NULL)
404 parser->free (descr);
405 else
406 {
407 free (invalid_reason);
408 valid = false;
409 break;
410 }
411 }
412 if (valid)
413 {
414 str_end = msgstr + msgstr_len;
415 for (str = msgstr; str < str_end; str += strlen (str) + 1)
416 {
417 char *invalid_reason = NULL;
418 void *descr =
419 parser->parse (str, true, &invalid_reason);
420
421 if (descr != NULL)
422 parser->free (descr);
423 else
424 {
425 free (invalid_reason);
426 valid = false;
427 break;
428 }
429 }
430 }
431
432 if (valid)
433 {
434 /* Found the most likely among c-format, objc-format. */
435 mp->is_format[f] = yes;
436 break;
437 }
438
439 /* Try next f. */
440 if (f == format_objc)
441 break;
442 }
443
444 message_list_append (mlp, mp);
445 }
446 break;
447 }
448 break;
449
450 default:
451 goto unrecognised;
452 }
453
454 if (fp != stdin)
455 fclose (fp);
456 }
457