xref: /netbsd-src/external/gpl2/gettext/dist/gettext-tools/src/write-properties.c (revision 946379e7b37692fc43f68eb0d1c10daa0a7f3b6c)
1 /* Writing Java .properties files.
2    Copyright (C) 2003, 2005-2006 Free Software Foundation, Inc.
3    Written by Bruno Haible <bruno@clisp.org>, 2003.
4 
5    This program is free software; you can redistribute it and/or modify
6    it under the terms of the GNU General Public License as published by
7    the Free Software Foundation; either version 2, or (at your option)
8    any later version.
9 
10    This program is distributed in the hope that it will be useful,
11    but WITHOUT ANY WARRANTY; without even the implied warranty of
12    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13    GNU General Public License for more details.
14 
15    You should have received a copy of the GNU General Public License
16    along with this program; if not, write to the Free Software Foundation,
17    Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.  */
18 
19 #ifdef HAVE_CONFIG_H
20 # include <config.h>
21 #endif
22 
23 /* Specification.  */
24 #include "write-properties.h"
25 
26 #include <errno.h>
27 #include <stdbool.h>
28 #include <stdio.h>
29 #include <stdlib.h>
30 #include <string.h>
31 
32 #include "error.h"
33 #include "message.h"
34 #include "msgl-ascii.h"
35 #include "msgl-iconv.h"
36 #include "po-charset.h"
37 #include "utf8-ucs4.h"
38 #include "write-po.h"
39 #include "xalloc.h"
40 
41 /* The format of the Java .properties files is documented in the JDK
42    documentation for class java.util.Properties.  In the case of .properties
43    files for PropertyResourceBundle, for each message, the msgid becomes the
44    key (left-hand side) and the msgstr becomes the value (right-hand side)
45    of a "key=value" line.  Messages with plurals are not supported in this
46    format.  */
47 
48 /* Handling of comments: We copy all comments from the PO file to the
49    .properties file. This is not really needed; it's a service for translators
50    who don't like PO files and prefer to maintain the .properties file.  */
51 
52 /* Converts a string to JAVA encoding (with \uxxxx sequences for non-ASCII
53    characters).  */
54 static const char *
conv_to_java(const char * string)55 conv_to_java (const char *string)
56 {
57   /* We cannot use iconv to "JAVA" because not all iconv() implementations
58      know about the "JAVA" encoding.  */
59   static const char hexdigit[] = "0123456789abcdef";
60   size_t length;
61   char *result;
62 
63   if (is_ascii_string (string))
64     return string;
65 
66   length = 0;
67   {
68     const char *str = string;
69     const char *str_limit = str + strlen (str);
70 
71     while (str < str_limit)
72       {
73 	unsigned int uc;
74 	str += u8_mbtouc (&uc, (const unsigned char *) str, str_limit - str);
75 	length += (uc <= 0x007f ? 1 : uc < 0x10000 ? 6 : 12);
76       }
77   }
78 
79   result = (char *) xmalloc (length + 1);
80 
81   {
82     char *newstr = result;
83     const char *str = string;
84     const char *str_limit = str + strlen (str);
85 
86     while (str < str_limit)
87       {
88 	unsigned int uc;
89 	str += u8_mbtouc (&uc, (const unsigned char *) str, str_limit - str);
90 	if (uc <= 0x007f)
91 	  /* ASCII characters can be output literally.
92 	     We could treat non-ASCII ISO-8859-1 characters (0x0080..0x00FF)
93 	     the same way, but there is no point in doing this; Sun's
94 	     nativetoascii doesn't do it either.  */
95 	  *newstr++ = uc;
96 	else if (uc < 0x10000)
97 	  {
98 	    /* Single UCS-2 'char'  */
99 	    sprintf (newstr, "\\u%c%c%c%c",
100 		     hexdigit[(uc >> 12) & 0x0f], hexdigit[(uc >> 8) & 0x0f],
101 		     hexdigit[(uc >> 4) & 0x0f], hexdigit[uc & 0x0f]);
102 	    newstr += 6;
103 	  }
104 	else
105 	  {
106 	    /* UTF-16 surrogate: two 'char's.  */
107 	    unsigned int uc1 = 0xd800 + ((uc - 0x10000) >> 10);
108 	    unsigned int uc2 = 0xdc00 + ((uc - 0x10000) & 0x3ff);
109 	    sprintf (newstr, "\\u%c%c%c%c",
110 		     hexdigit[(uc1 >> 12) & 0x0f], hexdigit[(uc1 >> 8) & 0x0f],
111 		     hexdigit[(uc1 >> 4) & 0x0f], hexdigit[uc1 & 0x0f]);
112 	    newstr += 6;
113 	    sprintf (newstr, "\\u%c%c%c%c",
114 		     hexdigit[(uc2 >> 12) & 0x0f], hexdigit[(uc2 >> 8) & 0x0f],
115 		     hexdigit[(uc2 >> 4) & 0x0f], hexdigit[uc2 & 0x0f]);
116 	    newstr += 6;
117 	  }
118       }
119     *newstr = '\0';
120   }
121 
122   return result;
123 }
124 
125 /* Writes a key or value to the file, without newline.  */
126 static void
write_escaped_string(FILE * fp,const char * str,bool in_key)127 write_escaped_string (FILE *fp, const char *str, bool in_key)
128 {
129   static const char hexdigit[] = "0123456789abcdef";
130   const char *str_limit = str + strlen (str);
131   bool first = true;
132 
133   while (str < str_limit)
134     {
135       unsigned int uc;
136       str += u8_mbtouc (&uc, (const unsigned char *) str, str_limit - str);
137       /* Whitespace must be escaped.  */
138       if (uc == 0x0020 && (first || in_key))
139 	{
140 	  putc ('\\', fp);
141 	  putc (' ', fp);
142 	}
143       else if (uc == 0x0009)
144 	{
145 	  putc ('\\', fp);
146 	  putc ('t', fp);
147 	}
148       else if (uc == 0x000a)
149 	{
150 	  putc ('\\', fp);
151 	  putc ('n', fp);
152 	}
153       else if (uc == 0x000d)
154 	{
155 	  putc ('\\', fp);
156 	  putc ('r', fp);
157 	}
158       else if (uc == 0x000c)
159 	{
160 	  putc ('\\', fp);
161 	  putc ('f', fp);
162 	}
163       else if (/* Backslash must be escaped.  */
164 	       uc == '\\'
165 	       /* Possible comment introducers must be escaped.  */
166 	       || uc == '#' || uc == '!'
167 	       /* Key terminators must be escaped.  */
168 	       || uc == '=' || uc == ':')
169 	{
170 	  putc ('\\', fp);
171 	  putc (uc, fp);
172 	}
173       else if (uc >= 0x0020 && uc <= 0x007e)
174 	{
175 	  /* ASCII characters can be output literally.
176 	     We could treat non-ASCII ISO-8859-1 characters (0x0080..0x00FF)
177 	     the same way, but there is no point in doing this; Sun's
178 	     nativetoascii doesn't do it either.  */
179 	  putc (uc, fp);
180 	}
181       else if (uc < 0x10000)
182 	{
183 	  /* Single UCS-2 'char'  */
184 	  fprintf (fp, "\\u%c%c%c%c",
185 		   hexdigit[(uc >> 12) & 0x0f], hexdigit[(uc >> 8) & 0x0f],
186 		   hexdigit[(uc >> 4) & 0x0f], hexdigit[uc & 0x0f]);
187 	}
188       else
189 	{
190 	  /* UTF-16 surrogate: two 'char's.  */
191 	  unsigned int uc1 = 0xd800 + ((uc - 0x10000) >> 10);
192 	  unsigned int uc2 = 0xdc00 + ((uc - 0x10000) & 0x3ff);
193 	  fprintf (fp, "\\u%c%c%c%c",
194 		   hexdigit[(uc1 >> 12) & 0x0f], hexdigit[(uc1 >> 8) & 0x0f],
195 		   hexdigit[(uc1 >> 4) & 0x0f], hexdigit[uc1 & 0x0f]);
196 	  fprintf (fp, "\\u%c%c%c%c",
197 		   hexdigit[(uc2 >> 12) & 0x0f], hexdigit[(uc2 >> 8) & 0x0f],
198 		   hexdigit[(uc2 >> 4) & 0x0f], hexdigit[uc2 & 0x0f]);
199 	}
200       first = false;
201     }
202 }
203 
204 /* Writes a message to the file.  */
205 static void
write_message(FILE * fp,const message_ty * mp,size_t page_width,bool debug)206 write_message (FILE *fp, const message_ty *mp, size_t page_width, bool debug)
207 {
208   /* Print translator comment if available.  */
209   message_print_comment (mp, fp);
210 
211   /* Print xgettext extracted comments.  */
212   message_print_comment_dot (mp, fp);
213 
214   /* Print the file position comments.  */
215   message_print_comment_filepos (mp, fp, false, page_width);
216 
217   /* Print flag information in special comment.  */
218   message_print_comment_flags (mp, fp, debug);
219 
220   /* Put a comment mark if the message is the header or untranslated or
221      fuzzy.  */
222   if (is_header (mp)
223       || mp->msgstr[0] == '\0'
224       || (mp->is_fuzzy && !is_header (mp)))
225     putc ('!', fp);
226 
227   /* Now write the untranslated string and the translated string.  */
228   write_escaped_string (fp, mp->msgid, true);
229   putc ('=', fp);
230   write_escaped_string (fp, mp->msgstr, false);
231 
232   putc ('\n', fp);
233 }
234 
235 /* Writes an entire message list to the file.  */
236 static void
write_properties(FILE * fp,message_list_ty * mlp,const char * canon_encoding,size_t page_width,bool debug)237 write_properties (FILE *fp, message_list_ty *mlp, const char *canon_encoding,
238 		  size_t page_width, bool debug)
239 {
240   bool blank_line;
241   size_t j, i;
242 
243   /* Convert the messages to Unicode.  */
244   iconv_message_list (mlp, canon_encoding, po_charset_utf8, NULL);
245   for (j = 0; j < mlp->nitems; ++j)
246     {
247       message_ty *mp = mlp->item[j];
248 
249       if (mp->comment != NULL)
250 	for (i = 0; i < mp->comment->nitems; ++i)
251 	  mp->comment->item[i] = conv_to_java (mp->comment->item[i]);
252       if (mp->comment_dot != NULL)
253 	for (i = 0; i < mp->comment_dot->nitems; ++i)
254 	  mp->comment_dot->item[i] = conv_to_java (mp->comment_dot->item[i]);
255     }
256 
257   /* Loop through the messages.  */
258   blank_line = false;
259   for (j = 0; j < mlp->nitems; ++j)
260     {
261       const message_ty *mp = mlp->item[j];
262 
263       if (mp->msgid_plural == NULL && !mp->obsolete)
264 	{
265 	  if (blank_line)
266 	    putc ('\n', fp);
267 
268 	  write_message (fp, mp, page_width, debug);
269 
270 	  blank_line = true;
271 	}
272     }
273 }
274 
275 /* Output the contents of a PO file in Java .properties syntax.  */
276 static void
msgdomain_list_print_properties(msgdomain_list_ty * mdlp,FILE * fp,size_t page_width,bool debug)277 msgdomain_list_print_properties (msgdomain_list_ty *mdlp, FILE *fp,
278 				 size_t page_width, bool debug)
279 {
280   message_list_ty *mlp;
281 
282   if (mdlp->nitems == 1)
283     mlp = mdlp->item[0]->messages;
284   else
285     mlp = message_list_alloc (false);
286   write_properties (fp, mlp, mdlp->encoding, page_width, debug);
287 }
288 
289 /* Describes a PO file in Java .properties syntax.  */
290 const struct catalog_output_format output_format_properties =
291 {
292   msgdomain_list_print_properties,	/* print */
293   true,					/* requires_utf8 */
294   false,				/* supports_multiple_domains */
295   false,				/* supports_contexts */
296   false,				/* supports_plurals */
297   true,					/* alternative_is_po */
298   true					/* alternative_is_java_class */
299 };
300