1c4074784SAlexander Polakov /*-
2c4074784SAlexander Polakov * Copyright (c) 2003 Ryuichiro Imura
3c4074784SAlexander Polakov * All rights reserved.
4c4074784SAlexander Polakov *
5c4074784SAlexander Polakov * Redistribution and use in source and binary forms, with or without
6c4074784SAlexander Polakov * modification, are permitted provided that the following conditions
7c4074784SAlexander Polakov * are met:
8c4074784SAlexander Polakov * 1. Redistributions of source code must retain the above copyright
9c4074784SAlexander Polakov * notice, this list of conditions and the following disclaimer.
10c4074784SAlexander Polakov * 2. Redistributions in binary form must reproduce the above copyright
11c4074784SAlexander Polakov * notice, this list of conditions and the following disclaimer in the
12c4074784SAlexander Polakov * documentation and/or other materials provided with the distribution.
13c4074784SAlexander Polakov *
14c4074784SAlexander Polakov * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
15c4074784SAlexander Polakov * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16c4074784SAlexander Polakov * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17c4074784SAlexander Polakov * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18c4074784SAlexander Polakov * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19c4074784SAlexander Polakov * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20c4074784SAlexander Polakov * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21c4074784SAlexander Polakov * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22c4074784SAlexander Polakov * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23c4074784SAlexander Polakov * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24c4074784SAlexander Polakov * SUCH DAMAGE.
25c4074784SAlexander Polakov *
26*8527a700SSascha Wildner * $FreeBSD: head/lib/libkiconv/quirks.c 298896 2016-05-01 19:37:33Z pfg $
27c4074784SAlexander Polakov */
28c4074784SAlexander Polakov
29c4074784SAlexander Polakov /*
300d5acd74SJohn Marino * kiconv(3) requires shared linked, and reduce module size
310d5acd74SJohn Marino * when statically linked.
320d5acd74SJohn Marino */
330d5acd74SJohn Marino
340d5acd74SJohn Marino #ifdef PIC
350d5acd74SJohn Marino
360d5acd74SJohn Marino /*
37c4074784SAlexander Polakov * Why do we need quirks?
38c4074784SAlexander Polakov * Since each vendors has their own Unicode mapping rules,
39c4074784SAlexander Polakov * we need some quirks until iconv(3) supports them.
40c4074784SAlexander Polakov * We can define Microsoft mappings here.
41c4074784SAlexander Polakov *
42c4074784SAlexander Polakov * For example, the eucJP and Unocode mapping rule is based on
43c4074784SAlexander Polakov * the JIS standard. Since Microsoft uses cp932 for Unicode mapping
44*8527a700SSascha Wildner * which is not truly based on the JIS standard, reading a file
45c4074784SAlexander Polakov * system created by Microsoft Windows family using eucJP/Unicode
46c4074784SAlexander Polakov * mapping rule will cause a problem. That's why we define eucJP-ms here.
47*8527a700SSascha Wildner * The eucJP-ms has been defined by The Open Group Japan Vendor Council.
48c4074784SAlexander Polakov *
49c4074784SAlexander Polakov * Well, Apple Mac OS also has their own Unicode mappings,
50c4074784SAlexander Polakov * but we won't require these quirks here, because HFS doesn't have
51c4074784SAlexander Polakov * Unicode and HFS+ has decomposed Unicode which can not be
52c4074784SAlexander Polakov * handled by this xlat16 converter.
53c4074784SAlexander Polakov */
54c4074784SAlexander Polakov
55c4074784SAlexander Polakov #include <sys/types.h>
56c4074784SAlexander Polakov #include <sys/iconv.h>
57c4074784SAlexander Polakov
58c4074784SAlexander Polakov #include <stdio.h>
59c4074784SAlexander Polakov #include <string.h>
60c4074784SAlexander Polakov
61c4074784SAlexander Polakov #include "quirks.h"
62c4074784SAlexander Polakov
63c4074784SAlexander Polakov /*
64c4074784SAlexander Polakov * All lists of quirk character set
65c4074784SAlexander Polakov */
66c4074784SAlexander Polakov static struct {
67c4074784SAlexander Polakov int vendor; /* reserved for non MS mapping */
68c4074784SAlexander Polakov const char *base_codeset, *quirk_codeset;
69c4074784SAlexander Polakov } quirk_list[] = {
70c4074784SAlexander Polakov { KICONV_VENDOR_MICSFT, "eucJP", "eucJP-ms" },
71c4074784SAlexander Polakov { KICONV_VENDOR_MICSFT, "EUC-JP", "eucJP-ms" },
72c4074784SAlexander Polakov { KICONV_VENDOR_MICSFT, "SJIS", "SJIS-ms" },
73c4074784SAlexander Polakov { KICONV_VENDOR_MICSFT, "Shift_JIS", "SJIS-ms" },
74c4074784SAlexander Polakov { KICONV_VENDOR_MICSFT, "Big5", "Big5-ms" }
75c4074784SAlexander Polakov };
76c4074784SAlexander Polakov
77c4074784SAlexander Polakov /*
78c4074784SAlexander Polakov * The character list to replace for Japanese MS-Windows.
79c4074784SAlexander Polakov */
80c4074784SAlexander Polakov static struct quirk_replace_list quirk_jis_cp932[] = {
81c4074784SAlexander Polakov { 0x00a2, 0xffe0 }, /* Cent Sign, Fullwidth Cent Sign */
82c4074784SAlexander Polakov { 0x00a3, 0xffe1 }, /* Pound Sign, Fullwidth Pound Sign */
83c4074784SAlexander Polakov { 0x00ac, 0xffe2 }, /* Not Sign, Fullwidth Not Sign */
84c4074784SAlexander Polakov { 0x2016, 0x2225 }, /* Double Vertical Line, Parallel To */
85c4074784SAlexander Polakov { 0x203e, 0x007e }, /* Overline, Tilde */
86c4074784SAlexander Polakov { 0x2212, 0xff0d }, /* Minus Sign, Fullwidth Hyphenminus */
87c4074784SAlexander Polakov { 0x301c, 0xff5e } /* Wave Dash, Fullwidth Tilde */
88c4074784SAlexander Polakov };
89c4074784SAlexander Polakov
90c4074784SAlexander Polakov /*
91c4074784SAlexander Polakov * All entries of quirks
92c4074784SAlexander Polakov */
93c4074784SAlexander Polakov #define NumOf(n) (sizeof((n)) / sizeof((n)[0]))
94c4074784SAlexander Polakov static struct {
95c4074784SAlexander Polakov const char *quirk_codeset, *iconv_codeset, *pair_codeset;
96c4074784SAlexander Polakov struct quirk_replace_list (*replace_list)[];
97c4074784SAlexander Polakov size_t num_of_replaces;
98c4074784SAlexander Polakov } quirk_table[] = {
99c4074784SAlexander Polakov {
100c4074784SAlexander Polakov "eucJP-ms", "eucJP", ENCODING_UNICODE,
101c4074784SAlexander Polakov (struct quirk_replace_list (*)[])&quirk_jis_cp932,
102c4074784SAlexander Polakov NumOf(quirk_jis_cp932)
103c4074784SAlexander Polakov },
104c4074784SAlexander Polakov {
105c4074784SAlexander Polakov "SJIS-ms", "CP932", ENCODING_UNICODE,
106c4074784SAlexander Polakov /* XXX - quirk_replace_list should be NULL */
107c4074784SAlexander Polakov (struct quirk_replace_list (*)[])&quirk_jis_cp932,
108c4074784SAlexander Polakov NumOf(quirk_jis_cp932)
109c4074784SAlexander Polakov },
110c4074784SAlexander Polakov {
111c4074784SAlexander Polakov "Big5-ms", "CP950", ENCODING_UNICODE,
112c4074784SAlexander Polakov NULL, 0
113c4074784SAlexander Polakov }
114c4074784SAlexander Polakov };
115c4074784SAlexander Polakov
116c4074784SAlexander Polakov
117c4074784SAlexander Polakov const char *
kiconv_quirkcs(const char * base,int vendor)118c4074784SAlexander Polakov kiconv_quirkcs(const char* base, int vendor)
119c4074784SAlexander Polakov {
120c4074784SAlexander Polakov size_t i;
121c4074784SAlexander Polakov
122c4074784SAlexander Polakov /*
123c4074784SAlexander Polakov * We should compare codeset names ignoring case here,
124c4074784SAlexander Polakov * so that quirk could be used for all of the user input
125c4074784SAlexander Polakov * patterns.
126c4074784SAlexander Polakov */
127c4074784SAlexander Polakov for (i = 0; i < NumOf(quirk_list); i++)
128c4074784SAlexander Polakov if (quirk_list[i].vendor == vendor &&
129c4074784SAlexander Polakov strcasecmp(quirk_list[i].base_codeset, base) == 0)
130c4074784SAlexander Polakov return (quirk_list[i].quirk_codeset);
131c4074784SAlexander Polakov
132c4074784SAlexander Polakov return (base);
133c4074784SAlexander Polakov }
134c4074784SAlexander Polakov
135c4074784SAlexander Polakov /*
136c4074784SAlexander Polakov * Internal Functions
137c4074784SAlexander Polakov */
138c4074784SAlexander Polakov const char *
search_quirk(const char * given_codeset,const char * pair_codeset,struct quirk_replace_list ** replace_list,size_t * num_of_replaces)139c4074784SAlexander Polakov search_quirk(const char *given_codeset,
140c4074784SAlexander Polakov const char *pair_codeset,
141c4074784SAlexander Polakov struct quirk_replace_list **replace_list,
142c4074784SAlexander Polakov size_t *num_of_replaces)
143c4074784SAlexander Polakov {
144c4074784SAlexander Polakov size_t i;
145c4074784SAlexander Polakov
146c4074784SAlexander Polakov *replace_list = NULL;
147c4074784SAlexander Polakov *num_of_replaces = 0;
148c4074784SAlexander Polakov for (i = 0; i < NumOf(quirk_table); i++)
149c4074784SAlexander Polakov if (strcmp(quirk_table[i].quirk_codeset, given_codeset) == 0) {
150c4074784SAlexander Polakov if (strcmp(quirk_table[i].pair_codeset, pair_codeset) == 0) {
151c4074784SAlexander Polakov *replace_list = *quirk_table[i].replace_list;
152c4074784SAlexander Polakov *num_of_replaces = quirk_table[i].num_of_replaces;
153c4074784SAlexander Polakov }
154c4074784SAlexander Polakov return (quirk_table[i].iconv_codeset);
155c4074784SAlexander Polakov }
156c4074784SAlexander Polakov
157c4074784SAlexander Polakov return (given_codeset);
158c4074784SAlexander Polakov }
159c4074784SAlexander Polakov
160c4074784SAlexander Polakov uint16_t
quirk_vendor2unix(uint16_t c,struct quirk_replace_list * replace_list,size_t num)161c4074784SAlexander Polakov quirk_vendor2unix(uint16_t c, struct quirk_replace_list *replace_list, size_t num)
162c4074784SAlexander Polakov {
163c4074784SAlexander Polakov size_t i;
164c4074784SAlexander Polakov
165c4074784SAlexander Polakov for (i = 0; i < num; i++)
166c4074784SAlexander Polakov if (replace_list[i].vendor_code == c)
167c4074784SAlexander Polakov return (replace_list[i].standard_code);
168c4074784SAlexander Polakov
169c4074784SAlexander Polakov return (c);
170c4074784SAlexander Polakov }
171c4074784SAlexander Polakov
172c4074784SAlexander Polakov uint16_t
quirk_unix2vendor(uint16_t c,struct quirk_replace_list * replace_list,size_t num)173c4074784SAlexander Polakov quirk_unix2vendor(uint16_t c, struct quirk_replace_list *replace_list, size_t num)
174c4074784SAlexander Polakov {
175c4074784SAlexander Polakov size_t i;
176c4074784SAlexander Polakov
177c4074784SAlexander Polakov for (i = 0; i < num; i++)
178c4074784SAlexander Polakov if (replace_list[i].standard_code == c)
179c4074784SAlexander Polakov return (replace_list[i].vendor_code);
180c4074784SAlexander Polakov
181c4074784SAlexander Polakov return (c);
182c4074784SAlexander Polakov }
1830d5acd74SJohn Marino
1840d5acd74SJohn Marino #else /* statically linked */
1850d5acd74SJohn Marino
1860d5acd74SJohn Marino #include <sys/types.h>
1870d5acd74SJohn Marino #include <sys/iconv.h>
1880d5acd74SJohn Marino
1890d5acd74SJohn Marino const char *
kiconv_quirkcs(const char * base __unused,int vendor __unused)1900d5acd74SJohn Marino kiconv_quirkcs(const char* base __unused, int vendor __unused)
1910d5acd74SJohn Marino {
1920d5acd74SJohn Marino
1930d5acd74SJohn Marino return (base);
1940d5acd74SJohn Marino }
1950d5acd74SJohn Marino
1960d5acd74SJohn Marino #endif /* PIC */
197