1*9ea9ce09Sschwarze /* $OpenBSD: chars.c,v 1.51 2022/06/26 20:30:00 schwarze Exp $ */
2d9b084f4Sschwarze /*
3a5e11edeSschwarze * Copyright (c) 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv>
40c1d2fccSschwarze * Copyright (c) 2011, 2014, 2015, 2017, 2018, 2020
50c1d2fccSschwarze * Ingo Schwarze <schwarze@openbsd.org>
6d9b084f4Sschwarze *
7d9b084f4Sschwarze * Permission to use, copy, modify, and distribute this software for any
8d9b084f4Sschwarze * purpose with or without fee is hereby granted, provided that the above
9d9b084f4Sschwarze * copyright notice and this permission notice appear in all copies.
10d9b084f4Sschwarze *
11d9b084f4Sschwarze * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
12d9b084f4Sschwarze * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
13d9b084f4Sschwarze * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
14d9b084f4Sschwarze * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
15d9b084f4Sschwarze * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
16d9b084f4Sschwarze * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
17d9b084f4Sschwarze * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
18d9b084f4Sschwarze */
19ed5ebdbaSschwarze #include <sys/types.h>
20ed5ebdbaSschwarze
21d9b084f4Sschwarze #include <assert.h>
22a5e11edeSschwarze #include <ctype.h>
2316536faaSschwarze #include <stddef.h>
2416536faaSschwarze #include <stdint.h>
25e501e731Sschwarze #include <stdio.h>
26d9b084f4Sschwarze #include <stdlib.h>
27d9b084f4Sschwarze #include <string.h>
28d9b084f4Sschwarze
291068637fSschwarze #include "mandoc.h"
304f4f7972Sschwarze #include "mandoc_aux.h"
3116536faaSschwarze #include "mandoc_ohash.h"
32a5e11edeSschwarze #include "libmandoc.h"
33d9b084f4Sschwarze
34d9b084f4Sschwarze struct ln {
3516536faaSschwarze const char roffcode[16];
36d9b084f4Sschwarze const char *ascii;
37ddce0b0cSschwarze int unicode;
38d9b084f4Sschwarze };
39d9b084f4Sschwarze
4016536faaSschwarze /* Special break control characters. */
4116536faaSschwarze static const char ascii_nbrsp[2] = { ASCII_NBRSP, '\0' };
4216536faaSschwarze static const char ascii_break[2] = { ASCII_BREAK, '\0' };
43d9b084f4Sschwarze
4416536faaSschwarze static struct ln lines[] = {
45d9b084f4Sschwarze
4616536faaSschwarze /* Spacing. */
4716536faaSschwarze { " ", ascii_nbrsp, 0x00a0 },
4816536faaSschwarze { "~", ascii_nbrsp, 0x00a0 },
490c1d2fccSschwarze { "0", ascii_nbrsp, 0x00a0 },
5016536faaSschwarze { ":", ascii_break, 0 },
51fa70b73eSschwarze
5216536faaSschwarze /* Lines. */
5316536faaSschwarze { "ba", "|", 0x007c },
5416536faaSschwarze { "br", "|", 0x2502 },
5516536faaSschwarze { "ul", "_", 0x005f },
566f6722cbSschwarze { "_", "_", 0x005f },
57f04548a1Sschwarze { "ru", "_", 0x005f },
5816536faaSschwarze { "rn", "-", 0x203e },
5916536faaSschwarze { "bb", "|", 0x00a6 },
6016536faaSschwarze { "sl", "/", 0x002f },
6116536faaSschwarze { "rs", "\\", 0x005c },
62d9b084f4Sschwarze
6316536faaSschwarze /* Text markers. */
6416536faaSschwarze { "ci", "O", 0x25cb },
6516536faaSschwarze { "bu", "+\bo", 0x2022 },
66c250368cSschwarze { "dd", "<**>", 0x2021 },
67c250368cSschwarze { "dg", "<*>", 0x2020 },
6816536faaSschwarze { "lz", "<>", 0x25ca },
6916536faaSschwarze { "sq", "[]", 0x25a1 },
70c250368cSschwarze { "ps", "<paragraph>", 0x00b6 },
71c250368cSschwarze { "sc", "<section>", 0x00a7 },
7216536faaSschwarze { "lh", "<=", 0x261c },
7316536faaSschwarze { "rh", "=>", 0x261e },
7416536faaSschwarze { "at", "@", 0x0040 },
7516536faaSschwarze { "sh", "#", 0x0023 },
76c250368cSschwarze { "CR", "<cr>", 0x21b5 },
7716536faaSschwarze { "OK", "\\/", 0x2713 },
78f09271baSschwarze { "CL", "C", 0x2663 },
79f09271baSschwarze { "SP", "S", 0x2660 },
80f09271baSschwarze { "HE", "H", 0x2665 },
81f09271baSschwarze { "DI", "D", 0x2666 },
8216536faaSschwarze
8316536faaSschwarze /* Legal symbols. */
8416536faaSschwarze { "co", "(C)", 0x00a9 },
8516536faaSschwarze { "rg", "(R)", 0x00ae },
8616536faaSschwarze { "tm", "tm", 0x2122 },
8716536faaSschwarze
8816536faaSschwarze /* Punctuation. */
8916536faaSschwarze { "em", "--", 0x2014 },
9016536faaSschwarze { "en", "-", 0x2013 },
9116536faaSschwarze { "hy", "-", 0x2010 },
9216536faaSschwarze { "e", "\\", 0x005c },
9316536faaSschwarze { "r!", "!", 0x00a1 },
9416536faaSschwarze { "r?", "?", 0x00bf },
9516536faaSschwarze
9616536faaSschwarze /* Quotes. */
9716536faaSschwarze { "Bq", ",,", 0x201e },
9816536faaSschwarze { "bq", ",", 0x201a },
9916536faaSschwarze { "lq", "\"", 0x201c },
10016536faaSschwarze { "rq", "\"", 0x201d },
101108086f6Sschwarze { "Lq", "\"", 0x201c },
102108086f6Sschwarze { "Rq", "\"", 0x201d },
10316536faaSschwarze { "oq", "`", 0x2018 },
10416536faaSschwarze { "cq", "\'", 0x2019 },
10516536faaSschwarze { "aq", "\'", 0x0027 },
10616536faaSschwarze { "dq", "\"", 0x0022 },
10716536faaSschwarze { "Fo", "<<", 0x00ab },
10816536faaSschwarze { "Fc", ">>", 0x00bb },
10916536faaSschwarze { "fo", "<", 0x2039 },
11016536faaSschwarze { "fc", ">", 0x203a },
11116536faaSschwarze
11216536faaSschwarze /* Brackets. */
11316536faaSschwarze { "lB", "[", 0x005b },
11416536faaSschwarze { "rB", "]", 0x005d },
11516536faaSschwarze { "lC", "{", 0x007b },
11616536faaSschwarze { "rC", "}", 0x007d },
11716536faaSschwarze { "la", "<", 0x27e8 },
11816536faaSschwarze { "ra", ">", 0x27e9 },
11916536faaSschwarze { "bv", "|", 0x23aa },
12016536faaSschwarze { "braceex", "|", 0x23aa },
12116536faaSschwarze { "bracketlefttp", "|", 0x23a1 },
12216536faaSschwarze { "bracketleftbt", "|", 0x23a3 },
12316536faaSschwarze { "bracketleftex", "|", 0x23a2 },
12416536faaSschwarze { "bracketrighttp", "|", 0x23a4 },
12516536faaSschwarze { "bracketrightbt", "|", 0x23a6 },
12616536faaSschwarze { "bracketrightex", "|", 0x23a5 },
12716536faaSschwarze { "lt", ",-", 0x23a7 },
12816536faaSschwarze { "bracelefttp", ",-", 0x23a7 },
12916536faaSschwarze { "lk", "{", 0x23a8 },
13016536faaSschwarze { "braceleftmid", "{", 0x23a8 },
13116536faaSschwarze { "lb", "`-", 0x23a9 },
13216536faaSschwarze { "braceleftbt", "`-", 0x23a9 },
13316536faaSschwarze { "braceleftex", "|", 0x23aa },
13416536faaSschwarze { "rt", "-.", 0x23ab },
13516536faaSschwarze { "bracerighttp", "-.", 0x23ab },
13616536faaSschwarze { "rk", "}", 0x23ac },
13716536faaSschwarze { "bracerightmid", "}", 0x23ac },
13816536faaSschwarze { "rb", "-\'", 0x23ad },
13916536faaSschwarze { "bracerightbt", "-\'", 0x23ad },
14016536faaSschwarze { "bracerightex", "|", 0x23aa },
14116536faaSschwarze { "parenlefttp", "/", 0x239b },
14216536faaSschwarze { "parenleftbt", "\\", 0x239d },
14316536faaSschwarze { "parenleftex", "|", 0x239c },
14416536faaSschwarze { "parenrighttp", "\\", 0x239e },
14516536faaSschwarze { "parenrightbt", "/", 0x23a0 },
14616536faaSschwarze { "parenrightex", "|", 0x239f },
14716536faaSschwarze
14816536faaSschwarze /* Arrows and lines. */
14916536faaSschwarze { "<-", "<-", 0x2190 },
15016536faaSschwarze { "->", "->", 0x2192 },
15116536faaSschwarze { "<>", "<->", 0x2194 },
15216536faaSschwarze { "da", "|\bv", 0x2193 },
15316536faaSschwarze { "ua", "|\b^", 0x2191 },
15416536faaSschwarze { "va", "^v", 0x2195 },
15516536faaSschwarze { "lA", "<=", 0x21d0 },
15616536faaSschwarze { "rA", "=>", 0x21d2 },
15716536faaSschwarze { "hA", "<=>", 0x21d4 },
15816536faaSschwarze { "uA", "=\b^", 0x21d1 },
15916536faaSschwarze { "dA", "=\bv", 0x21d3 },
16016536faaSschwarze { "vA", "^=v", 0x21d5 },
161a9e1fc4aSschwarze { "an", "-", 0x23af },
16216536faaSschwarze
16316536faaSschwarze /* Logic. */
16416536faaSschwarze { "AN", "^", 0x2227 },
16516536faaSschwarze { "OR", "v", 0x2228 },
16616536faaSschwarze { "no", "~", 0x00ac },
16716536faaSschwarze { "tno", "~", 0x00ac },
168c250368cSschwarze { "te", "<there\037exists>", 0x2203 },
169c250368cSschwarze { "fa", "<for\037all>", 0x2200 },
170c250368cSschwarze { "st", "<such\037that>", 0x220b },
171c250368cSschwarze { "tf", "<therefore>", 0x2234 },
172c250368cSschwarze { "3d", "<therefore>", 0x2234 },
17316536faaSschwarze { "or", "|", 0x007c },
17416536faaSschwarze
17516536faaSschwarze /* Mathematicals. */
17616536faaSschwarze { "pl", "+", 0x002b },
17716536faaSschwarze { "mi", "-", 0x2212 },
17816536faaSschwarze { "-", "-", 0x002d },
17916536faaSschwarze { "-+", "-+", 0x2213 },
18016536faaSschwarze { "+-", "+-", 0x00b1 },
18116536faaSschwarze { "t+-", "+-", 0x00b1 },
18216536faaSschwarze { "pc", ".", 0x00b7 },
18316536faaSschwarze { "md", ".", 0x22c5 },
18416536faaSschwarze { "mu", "x", 0x00d7 },
18516536faaSschwarze { "tmu", "x", 0x00d7 },
18616536faaSschwarze { "c*", "O\bx", 0x2297 },
18716536faaSschwarze { "c+", "O\b+", 0x2295 },
188c250368cSschwarze { "di", "/", 0x00f7 },
189c250368cSschwarze { "tdi", "/", 0x00f7 },
19016536faaSschwarze { "f/", "/", 0x2044 },
19116536faaSschwarze { "**", "*", 0x2217 },
19216536faaSschwarze { "<=", "<=", 0x2264 },
19316536faaSschwarze { ">=", ">=", 0x2265 },
19416536faaSschwarze { "<<", "<<", 0x226a },
19516536faaSschwarze { ">>", ">>", 0x226b },
19616536faaSschwarze { "eq", "=", 0x003d },
19716536faaSschwarze { "!=", "!=", 0x2260 },
19816536faaSschwarze { "==", "==", 0x2261 },
19916536faaSschwarze { "ne", "!==", 0x2262 },
20016536faaSschwarze { "ap", "~", 0x223c },
20116536faaSschwarze { "|=", "-~", 0x2243 },
20216536faaSschwarze { "=~", "=~", 0x2245 },
20316536faaSschwarze { "~~", "~~", 0x2248 },
20416536faaSschwarze { "~=", "~=", 0x2248 },
205c250368cSschwarze { "pt", "<proportional\037to>", 0x221d },
20616536faaSschwarze { "es", "{}", 0x2205 },
207c250368cSschwarze { "mo", "<element\037of>", 0x2208 },
208c250368cSschwarze { "nm", "<not\037element\037of>", 0x2209 },
209c250368cSschwarze { "sb", "<proper\037subset>", 0x2282 },
21061c0d376Sschwarze { "nb", "<not\037subset>", 0x2284 },
211c250368cSschwarze { "sp", "<proper\037superset>", 0x2283 },
21261c0d376Sschwarze { "nc", "<not\037superset>", 0x2285 },
213c250368cSschwarze { "ib", "<subset\037or\037equal>", 0x2286 },
214c250368cSschwarze { "ip", "<superset\037or\037equal>", 0x2287 },
215c250368cSschwarze { "ca", "<intersection>", 0x2229 },
216c250368cSschwarze { "cu", "<union>", 0x222a },
217c250368cSschwarze { "/_", "<angle>", 0x2220 },
218c250368cSschwarze { "pp", "<perpendicular>", 0x22a5 },
219c250368cSschwarze { "is", "<integral>", 0x222b },
22061c0d376Sschwarze { "integral", "<integral>", 0x222b },
22161c0d376Sschwarze { "sum", "<sum>", 0x2211 },
22261c0d376Sschwarze { "product", "<product>", 0x220f },
22361c0d376Sschwarze { "coproduct", "<coproduct>", 0x2210 },
224c250368cSschwarze { "gr", "<nabla>", 0x2207 },
225c250368cSschwarze { "sr", "<sqrt>", 0x221a },
22661c0d376Sschwarze { "sqrt", "<sqrt>", 0x221a },
22716536faaSschwarze { "lc", "|~", 0x2308 },
22816536faaSschwarze { "rc", "~|", 0x2309 },
22916536faaSschwarze { "lf", "|_", 0x230a },
23016536faaSschwarze { "rf", "_|", 0x230b },
231c250368cSschwarze { "if", "<infinity>", 0x221e },
232c250368cSschwarze { "Ah", "<Aleph>", 0x2135 },
233c250368cSschwarze { "Im", "<Im>", 0x2111 },
234c250368cSschwarze { "Re", "<Re>", 0x211c },
235e5837833Sschwarze { "wp", "p", 0x2118 },
236c250368cSschwarze { "pd", "<del>", 0x2202 },
23716536faaSschwarze { "-h", "/h", 0x210f },
238a9e1fc4aSschwarze { "hbar", "/h", 0x210f },
23916536faaSschwarze { "12", "1/2", 0x00bd },
24016536faaSschwarze { "14", "1/4", 0x00bc },
24116536faaSschwarze { "34", "3/4", 0x00be },
242a9e1fc4aSschwarze { "18", "1/8", 0x215B },
243a9e1fc4aSschwarze { "38", "3/8", 0x215C },
244a9e1fc4aSschwarze { "58", "5/8", 0x215D },
245a9e1fc4aSschwarze { "78", "7/8", 0x215E },
246c250368cSschwarze { "S1", "^1", 0x00B9 },
247c250368cSschwarze { "S2", "^2", 0x00B2 },
248c250368cSschwarze { "S3", "^3", 0x00B3 },
24916536faaSschwarze
25016536faaSschwarze /* Ligatures. */
25116536faaSschwarze { "ff", "ff", 0xfb00 },
25216536faaSschwarze { "fi", "fi", 0xfb01 },
25316536faaSschwarze { "fl", "fl", 0xfb02 },
25416536faaSschwarze { "Fi", "ffi", 0xfb03 },
25516536faaSschwarze { "Fl", "ffl", 0xfb04 },
25616536faaSschwarze { "AE", "AE", 0x00c6 },
25716536faaSschwarze { "ae", "ae", 0x00e6 },
25816536faaSschwarze { "OE", "OE", 0x0152 },
25916536faaSschwarze { "oe", "oe", 0x0153 },
26016536faaSschwarze { "ss", "ss", 0x00df },
26116536faaSschwarze { "IJ", "IJ", 0x0132 },
26216536faaSschwarze { "ij", "ij", 0x0133 },
26316536faaSschwarze
26416536faaSschwarze /* Accents. */
26516536faaSschwarze { "a\"", "\"", 0x02dd },
26616536faaSschwarze { "a-", "-", 0x00af },
26716536faaSschwarze { "a.", ".", 0x02d9 },
26816536faaSschwarze { "a^", "^", 0x005e },
26916536faaSschwarze { "aa", "\'", 0x00b4 },
27016536faaSschwarze { "\'", "\'", 0x00b4 },
27116536faaSschwarze { "ga", "`", 0x0060 },
27216536faaSschwarze { "`", "`", 0x0060 },
27316536faaSschwarze { "ab", "'\b`", 0x02d8 },
27416536faaSschwarze { "ac", ",", 0x00b8 },
27516536faaSschwarze { "ad", "\"", 0x00a8 },
27616536faaSschwarze { "ah", "v", 0x02c7 },
27716536faaSschwarze { "ao", "o", 0x02da },
27816536faaSschwarze { "a~", "~", 0x007e },
27916536faaSschwarze { "ho", ",", 0x02db },
28016536faaSschwarze { "ha", "^", 0x005e },
28116536faaSschwarze { "ti", "~", 0x007e },
282e5837833Sschwarze { "u02DC", "~", 0x02dc },
28316536faaSschwarze
28416536faaSschwarze /* Accented letters. */
28516536faaSschwarze { "'A", "'\bA", 0x00c1 },
28616536faaSschwarze { "'E", "'\bE", 0x00c9 },
28716536faaSschwarze { "'I", "'\bI", 0x00cd },
28816536faaSschwarze { "'O", "'\bO", 0x00d3 },
28916536faaSschwarze { "'U", "'\bU", 0x00da },
290e5837833Sschwarze { "'Y", "'\bY", 0x00dd },
29116536faaSschwarze { "'a", "'\ba", 0x00e1 },
29216536faaSschwarze { "'e", "'\be", 0x00e9 },
29316536faaSschwarze { "'i", "'\bi", 0x00ed },
29416536faaSschwarze { "'o", "'\bo", 0x00f3 },
29516536faaSschwarze { "'u", "'\bu", 0x00fa },
296e5837833Sschwarze { "'y", "'\by", 0x00fd },
29716536faaSschwarze { "`A", "`\bA", 0x00c0 },
29816536faaSschwarze { "`E", "`\bE", 0x00c8 },
29916536faaSschwarze { "`I", "`\bI", 0x00cc },
30016536faaSschwarze { "`O", "`\bO", 0x00d2 },
30116536faaSschwarze { "`U", "`\bU", 0x00d9 },
30216536faaSschwarze { "`a", "`\ba", 0x00e0 },
30316536faaSschwarze { "`e", "`\be", 0x00e8 },
30416536faaSschwarze { "`i", "`\bi", 0x00ec },
30516536faaSschwarze { "`o", "`\bo", 0x00f2 },
30616536faaSschwarze { "`u", "`\bu", 0x00f9 },
30716536faaSschwarze { "~A", "~\bA", 0x00c3 },
30816536faaSschwarze { "~N", "~\bN", 0x00d1 },
30916536faaSschwarze { "~O", "~\bO", 0x00d5 },
31016536faaSschwarze { "~a", "~\ba", 0x00e3 },
31116536faaSschwarze { "~n", "~\bn", 0x00f1 },
31216536faaSschwarze { "~o", "~\bo", 0x00f5 },
31316536faaSschwarze { ":A", "\"\bA", 0x00c4 },
31416536faaSschwarze { ":E", "\"\bE", 0x00cb },
31516536faaSschwarze { ":I", "\"\bI", 0x00cf },
31616536faaSschwarze { ":O", "\"\bO", 0x00d6 },
31716536faaSschwarze { ":U", "\"\bU", 0x00dc },
31816536faaSschwarze { ":a", "\"\ba", 0x00e4 },
31916536faaSschwarze { ":e", "\"\be", 0x00eb },
32016536faaSschwarze { ":i", "\"\bi", 0x00ef },
32116536faaSschwarze { ":o", "\"\bo", 0x00f6 },
32216536faaSschwarze { ":u", "\"\bu", 0x00fc },
32316536faaSschwarze { ":y", "\"\by", 0x00ff },
32416536faaSschwarze { "^A", "^\bA", 0x00c2 },
32516536faaSschwarze { "^E", "^\bE", 0x00ca },
32616536faaSschwarze { "^I", "^\bI", 0x00ce },
32716536faaSschwarze { "^O", "^\bO", 0x00d4 },
32816536faaSschwarze { "^U", "^\bU", 0x00db },
32916536faaSschwarze { "^a", "^\ba", 0x00e2 },
33016536faaSschwarze { "^e", "^\be", 0x00ea },
33116536faaSschwarze { "^i", "^\bi", 0x00ee },
33216536faaSschwarze { "^o", "^\bo", 0x00f4 },
33316536faaSschwarze { "^u", "^\bu", 0x00fb },
33416536faaSschwarze { ",C", ",\bC", 0x00c7 },
33516536faaSschwarze { ",c", ",\bc", 0x00e7 },
33616536faaSschwarze { "/L", "/\bL", 0x0141 },
33716536faaSschwarze { "/l", "/\bl", 0x0142 },
33816536faaSschwarze { "/O", "/\bO", 0x00d8 },
33916536faaSschwarze { "/o", "/\bo", 0x00f8 },
34016536faaSschwarze { "oA", "o\bA", 0x00c5 },
34116536faaSschwarze { "oa", "o\ba", 0x00e5 },
34216536faaSschwarze
34316536faaSschwarze /* Special letters. */
344c250368cSschwarze { "-D", "Dh", 0x00d0 },
345c250368cSschwarze { "Sd", "dh", 0x00f0 },
34616536faaSschwarze { "TP", "Th", 0x00de },
34716536faaSschwarze { "Tp", "th", 0x00fe },
34816536faaSschwarze { ".i", "i", 0x0131 },
34916536faaSschwarze { ".j", "j", 0x0237 },
35016536faaSschwarze
35116536faaSschwarze /* Currency. */
35216536faaSschwarze { "Do", "$", 0x0024 },
35316536faaSschwarze { "ct", "/\bc", 0x00a2 },
35416536faaSschwarze { "Eu", "EUR", 0x20ac },
35516536faaSschwarze { "eu", "EUR", 0x20ac },
35616536faaSschwarze { "Ye", "=\bY", 0x00a5 },
357f09271baSschwarze { "Po", "-\bL", 0x00a3 },
35816536faaSschwarze { "Cs", "o\bx", 0x00a4 },
35916536faaSschwarze { "Fn", ",\bf", 0x0192 },
36016536faaSschwarze
36116536faaSschwarze /* Units. */
362c250368cSschwarze { "de", "<degree>", 0x00b0 },
363c250368cSschwarze { "%0", "<permille>", 0x2030 },
36416536faaSschwarze { "fm", "\'", 0x2032 },
365*9ea9ce09Sschwarze { "sd", "\"", 0x2033 },
366c250368cSschwarze { "mc", "<micro>", 0x00b5 },
367a9e1fc4aSschwarze { "Of", "_\ba", 0x00aa },
368a9e1fc4aSschwarze { "Om", "_\bo", 0x00ba },
36916536faaSschwarze
37016536faaSschwarze /* Greek characters. */
37116536faaSschwarze { "*A", "A", 0x0391 },
37216536faaSschwarze { "*B", "B", 0x0392 },
373c250368cSschwarze { "*G", "<Gamma>", 0x0393 },
374c250368cSschwarze { "*D", "<Delta>", 0x0394 },
37516536faaSschwarze { "*E", "E", 0x0395 },
37616536faaSschwarze { "*Z", "Z", 0x0396 },
37716536faaSschwarze { "*Y", "H", 0x0397 },
378c250368cSschwarze { "*H", "<Theta>", 0x0398 },
37916536faaSschwarze { "*I", "I", 0x0399 },
38016536faaSschwarze { "*K", "K", 0x039a },
381c250368cSschwarze { "*L", "<Lambda>", 0x039b },
38216536faaSschwarze { "*M", "M", 0x039c },
38316536faaSschwarze { "*N", "N", 0x039d },
384c250368cSschwarze { "*C", "<Xi>", 0x039e },
38516536faaSschwarze { "*O", "O", 0x039f },
386c250368cSschwarze { "*P", "<Pi>", 0x03a0 },
38716536faaSschwarze { "*R", "P", 0x03a1 },
388c250368cSschwarze { "*S", "<Sigma>", 0x03a3 },
38916536faaSschwarze { "*T", "T", 0x03a4 },
39016536faaSschwarze { "*U", "Y", 0x03a5 },
391c250368cSschwarze { "*F", "<Phi>", 0x03a6 },
39216536faaSschwarze { "*X", "X", 0x03a7 },
393c250368cSschwarze { "*Q", "<Psi>", 0x03a8 },
394c250368cSschwarze { "*W", "<Omega>", 0x03a9 },
395c250368cSschwarze { "*a", "<alpha>", 0x03b1 },
396c250368cSschwarze { "*b", "<beta>", 0x03b2 },
397c250368cSschwarze { "*g", "<gamma>", 0x03b3 },
398c250368cSschwarze { "*d", "<delta>", 0x03b4 },
399c250368cSschwarze { "*e", "<epsilon>", 0x03b5 },
400c250368cSschwarze { "*z", "<zeta>", 0x03b6 },
401c250368cSschwarze { "*y", "<eta>", 0x03b7 },
402c250368cSschwarze { "*h", "<theta>", 0x03b8 },
403c250368cSschwarze { "*i", "<iota>", 0x03b9 },
404c250368cSschwarze { "*k", "<kappa>", 0x03ba },
405c250368cSschwarze { "*l", "<lambda>", 0x03bb },
406c250368cSschwarze { "*m", "<mu>", 0x03bc },
407c250368cSschwarze { "*n", "<nu>", 0x03bd },
408c250368cSschwarze { "*c", "<xi>", 0x03be },
40916536faaSschwarze { "*o", "o", 0x03bf },
410c250368cSschwarze { "*p", "<pi>", 0x03c0 },
411c250368cSschwarze { "*r", "<rho>", 0x03c1 },
412c250368cSschwarze { "*s", "<sigma>", 0x03c3 },
413c250368cSschwarze { "*t", "<tau>", 0x03c4 },
414c250368cSschwarze { "*u", "<upsilon>", 0x03c5 },
415c250368cSschwarze { "*f", "<phi>", 0x03d5 },
416c250368cSschwarze { "*x", "<chi>", 0x03c7 },
417c250368cSschwarze { "*q", "<psi>", 0x03c8 },
418c250368cSschwarze { "*w", "<omega>", 0x03c9 },
419c250368cSschwarze { "+h", "<theta>", 0x03d1 },
420c250368cSschwarze { "+f", "<phi>", 0x03c6 },
421c250368cSschwarze { "+p", "<pi>", 0x03d6 },
422c250368cSschwarze { "+e", "<epsilon>", 0x03f5 },
423c250368cSschwarze { "ts", "<sigma>", 0x03c2 },
424d9b084f4Sschwarze };
425d9b084f4Sschwarze
42616536faaSschwarze static struct ohash mchars;
427d9b084f4Sschwarze
42849aff9f8Sschwarze
429d9b084f4Sschwarze void
mchars_free(void)43016536faaSschwarze mchars_free(void)
431d9b084f4Sschwarze {
432d9b084f4Sschwarze
43316536faaSschwarze ohash_delete(&mchars);
434d9b084f4Sschwarze }
435d9b084f4Sschwarze
43616536faaSschwarze void
mchars_alloc(void)437a5e11edeSschwarze mchars_alloc(void)
438d9b084f4Sschwarze {
43916536faaSschwarze size_t i;
44016536faaSschwarze unsigned int slot;
441d9b084f4Sschwarze
44216536faaSschwarze mandoc_ohash_init(&mchars, 9, offsetof(struct ln, roffcode));
44316536faaSschwarze for (i = 0; i < sizeof(lines)/sizeof(lines[0]); i++) {
44416536faaSschwarze slot = ohash_qlookup(&mchars, lines[i].roffcode);
44516536faaSschwarze assert(ohash_find(&mchars, slot) == NULL);
44616536faaSschwarze ohash_insert(&mchars, slot, lines + i);
447d9b084f4Sschwarze }
448d9b084f4Sschwarze }
449d9b084f4Sschwarze
450ddce0b0cSschwarze int
mchars_spec2cp(const char * p,size_t sz)45116536faaSschwarze mchars_spec2cp(const char *p, size_t sz)
452d9b084f4Sschwarze {
453ddce0b0cSschwarze const struct ln *ln;
45416536faaSschwarze const char *end;
455d9b084f4Sschwarze
45616536faaSschwarze end = p + sz;
45716536faaSschwarze ln = ohash_find(&mchars, ohash_qlookupi(&mchars, p, &end));
4586f6722cbSschwarze return ln != NULL ? ln->unicode : -1;
459d9b084f4Sschwarze }
460d9b084f4Sschwarze
461e93f0bfbSschwarze int
mchars_num2char(const char * p,size_t sz)462a5e11edeSschwarze mchars_num2char(const char *p, size_t sz)
463b4634416Sschwarze {
464b4634416Sschwarze int i;
465b4634416Sschwarze
466e93f0bfbSschwarze i = mandoc_strntoi(p, sz, 10);
467526e306bSschwarze return i >= 0 && i < 256 ? i : -1;
468b4634416Sschwarze }
469b4634416Sschwarze
470a5e11edeSschwarze int
mchars_num2uc(const char * p,size_t sz)471a5e11edeSschwarze mchars_num2uc(const char *p, size_t sz)
472a5e11edeSschwarze {
473a5e11edeSschwarze int i;
474a5e11edeSschwarze
47536c33c15Sschwarze i = mandoc_strntoi(p, sz, 16);
47636c33c15Sschwarze assert(i >= 0 && i <= 0x10FFFF);
477526e306bSschwarze return i;
478a5e11edeSschwarze }
479b4634416Sschwarze
480ddce0b0cSschwarze const char *
mchars_spec2str(const char * p,size_t sz,size_t * rsz)48116536faaSschwarze mchars_spec2str(const char *p, size_t sz, size_t *rsz)
482ddce0b0cSschwarze {
483ddce0b0cSschwarze const struct ln *ln;
48416536faaSschwarze const char *end;
485ddce0b0cSschwarze
48616536faaSschwarze end = p + sz;
48716536faaSschwarze ln = ohash_find(&mchars, ohash_qlookupi(&mchars, p, &end));
4886f6722cbSschwarze if (ln == NULL)
4896f6722cbSschwarze return NULL;
490ddce0b0cSschwarze
4918cd724fbSschwarze *rsz = strlen(ln->ascii);
492526e306bSschwarze return ln->ascii;
493ddce0b0cSschwarze }
494ddce0b0cSschwarze
495ed5ebdbaSschwarze const char *
mchars_uc2str(int uc)496ed5ebdbaSschwarze mchars_uc2str(int uc)
497ed5ebdbaSschwarze {
49816536faaSschwarze size_t i;
499ed5ebdbaSschwarze
50016536faaSschwarze for (i = 0; i < sizeof(lines)/sizeof(lines[0]); i++)
501ed5ebdbaSschwarze if (uc == lines[i].unicode)
502526e306bSschwarze return lines[i].ascii;
503526e306bSschwarze return "<?>";
504ed5ebdbaSschwarze }
505