1*99db7d0eSSascha Wildner /* $Id: chars.c,v 1.79 2020/02/13 16:18:29 schwarze Exp $ */
280387638SSascha Wildner /*
336342e81SSascha Wildner * Copyright (c) 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv>
4*99db7d0eSSascha Wildner * Copyright (c) 2011, 2014, 2015, 2017, 2018, 2020
5*99db7d0eSSascha Wildner * Ingo Schwarze <schwarze@openbsd.org>
680387638SSascha Wildner *
780387638SSascha Wildner * Permission to use, copy, modify, and distribute this software for any
880387638SSascha Wildner * purpose with or without fee is hereby granted, provided that the above
980387638SSascha Wildner * copyright notice and this permission notice appear in all copies.
1080387638SSascha Wildner *
1180387638SSascha Wildner * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
1280387638SSascha Wildner * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
1380387638SSascha Wildner * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
1480387638SSascha Wildner * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
1580387638SSascha Wildner * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
1680387638SSascha Wildner * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
1780387638SSascha Wildner * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
1880387638SSascha Wildner */
1980387638SSascha Wildner #include "config.h"
2054ba9607SSascha Wildner
2154ba9607SSascha Wildner #include <sys/types.h>
2280387638SSascha Wildner
2380387638SSascha Wildner #include <assert.h>
24a4c7eb57SSascha Wildner #include <ctype.h>
2554ba9607SSascha Wildner #include <stddef.h>
2654ba9607SSascha Wildner #include <stdint.h>
2754ba9607SSascha Wildner #include <stdio.h>
2880387638SSascha Wildner #include <stdlib.h>
2980387638SSascha Wildner #include <string.h>
3080387638SSascha Wildner
3180387638SSascha Wildner #include "mandoc.h"
32070c62a6SFranco Fichtner #include "mandoc_aux.h"
3354ba9607SSascha Wildner #include "mandoc_ohash.h"
34a4c7eb57SSascha Wildner #include "libmandoc.h"
3580387638SSascha Wildner
3680387638SSascha Wildner struct ln {
3754ba9607SSascha Wildner const char roffcode[16];
3880387638SSascha Wildner const char *ascii;
3980387638SSascha Wildner int unicode;
4080387638SSascha Wildner };
4180387638SSascha Wildner
4254ba9607SSascha Wildner /* Special break control characters. */
4354ba9607SSascha Wildner static const char ascii_nbrsp[2] = { ASCII_NBRSP, '\0' };
4454ba9607SSascha Wildner static const char ascii_break[2] = { ASCII_BREAK, '\0' };
4580387638SSascha Wildner
4654ba9607SSascha Wildner static struct ln lines[] = {
4780387638SSascha Wildner
4854ba9607SSascha Wildner /* Spacing. */
4954ba9607SSascha Wildner { " ", ascii_nbrsp, 0x00a0 },
5054ba9607SSascha Wildner { "~", ascii_nbrsp, 0x00a0 },
51*99db7d0eSSascha Wildner { "0", ascii_nbrsp, 0x00a0 },
5254ba9607SSascha Wildner { ":", ascii_break, 0 },
5380387638SSascha Wildner
5454ba9607SSascha Wildner /* Lines. */
5554ba9607SSascha Wildner { "ba", "|", 0x007c },
5654ba9607SSascha Wildner { "br", "|", 0x2502 },
5754ba9607SSascha Wildner { "ul", "_", 0x005f },
5854ba9607SSascha Wildner { "_", "_", 0x005f },
5954ba9607SSascha Wildner { "ru", "_", 0x005f },
6054ba9607SSascha Wildner { "rn", "-", 0x203e },
6154ba9607SSascha Wildner { "bb", "|", 0x00a6 },
6254ba9607SSascha Wildner { "sl", "/", 0x002f },
6354ba9607SSascha Wildner { "rs", "\\", 0x005c },
6480387638SSascha Wildner
6554ba9607SSascha Wildner /* Text markers. */
6654ba9607SSascha Wildner { "ci", "O", 0x25cb },
6754ba9607SSascha Wildner { "bu", "+\bo", 0x2022 },
6854ba9607SSascha Wildner { "dd", "<**>", 0x2021 },
6954ba9607SSascha Wildner { "dg", "<*>", 0x2020 },
7054ba9607SSascha Wildner { "lz", "<>", 0x25ca },
7154ba9607SSascha Wildner { "sq", "[]", 0x25a1 },
7254ba9607SSascha Wildner { "ps", "<paragraph>", 0x00b6 },
7354ba9607SSascha Wildner { "sc", "<section>", 0x00a7 },
7454ba9607SSascha Wildner { "lh", "<=", 0x261c },
7554ba9607SSascha Wildner { "rh", "=>", 0x261e },
7654ba9607SSascha Wildner { "at", "@", 0x0040 },
7754ba9607SSascha Wildner { "sh", "#", 0x0023 },
7854ba9607SSascha Wildner { "CR", "<cr>", 0x21b5 },
7954ba9607SSascha Wildner { "OK", "\\/", 0x2713 },
8054ba9607SSascha Wildner { "CL", "C", 0x2663 },
8154ba9607SSascha Wildner { "SP", "S", 0x2660 },
8254ba9607SSascha Wildner { "HE", "H", 0x2665 },
8354ba9607SSascha Wildner { "DI", "D", 0x2666 },
8454ba9607SSascha Wildner
8554ba9607SSascha Wildner /* Legal symbols. */
8654ba9607SSascha Wildner { "co", "(C)", 0x00a9 },
8754ba9607SSascha Wildner { "rg", "(R)", 0x00ae },
8854ba9607SSascha Wildner { "tm", "tm", 0x2122 },
8954ba9607SSascha Wildner
9054ba9607SSascha Wildner /* Punctuation. */
9154ba9607SSascha Wildner { "em", "--", 0x2014 },
9254ba9607SSascha Wildner { "en", "-", 0x2013 },
9354ba9607SSascha Wildner { "hy", "-", 0x2010 },
9454ba9607SSascha Wildner { "e", "\\", 0x005c },
9554ba9607SSascha Wildner { ".", ".", 0x002e },
9654ba9607SSascha Wildner { "r!", "!", 0x00a1 },
9754ba9607SSascha Wildner { "r?", "?", 0x00bf },
9854ba9607SSascha Wildner
9954ba9607SSascha Wildner /* Quotes. */
10054ba9607SSascha Wildner { "Bq", ",,", 0x201e },
10154ba9607SSascha Wildner { "bq", ",", 0x201a },
10254ba9607SSascha Wildner { "lq", "\"", 0x201c },
10354ba9607SSascha Wildner { "rq", "\"", 0x201d },
10454ba9607SSascha Wildner { "Lq", "\"", 0x201c },
10554ba9607SSascha Wildner { "Rq", "\"", 0x201d },
10654ba9607SSascha Wildner { "oq", "`", 0x2018 },
10754ba9607SSascha Wildner { "cq", "\'", 0x2019 },
10854ba9607SSascha Wildner { "aq", "\'", 0x0027 },
10954ba9607SSascha Wildner { "dq", "\"", 0x0022 },
11054ba9607SSascha Wildner { "Fo", "<<", 0x00ab },
11154ba9607SSascha Wildner { "Fc", ">>", 0x00bb },
11254ba9607SSascha Wildner { "fo", "<", 0x2039 },
11354ba9607SSascha Wildner { "fc", ">", 0x203a },
11454ba9607SSascha Wildner
11554ba9607SSascha Wildner /* Brackets. */
11654ba9607SSascha Wildner { "lB", "[", 0x005b },
11754ba9607SSascha Wildner { "rB", "]", 0x005d },
11854ba9607SSascha Wildner { "lC", "{", 0x007b },
11954ba9607SSascha Wildner { "rC", "}", 0x007d },
12054ba9607SSascha Wildner { "la", "<", 0x27e8 },
12154ba9607SSascha Wildner { "ra", ">", 0x27e9 },
12254ba9607SSascha Wildner { "bv", "|", 0x23aa },
12354ba9607SSascha Wildner { "braceex", "|", 0x23aa },
12454ba9607SSascha Wildner { "bracketlefttp", "|", 0x23a1 },
12554ba9607SSascha Wildner { "bracketleftbt", "|", 0x23a3 },
12654ba9607SSascha Wildner { "bracketleftex", "|", 0x23a2 },
12754ba9607SSascha Wildner { "bracketrighttp", "|", 0x23a4 },
12854ba9607SSascha Wildner { "bracketrightbt", "|", 0x23a6 },
12954ba9607SSascha Wildner { "bracketrightex", "|", 0x23a5 },
13054ba9607SSascha Wildner { "lt", ",-", 0x23a7 },
13154ba9607SSascha Wildner { "bracelefttp", ",-", 0x23a7 },
13254ba9607SSascha Wildner { "lk", "{", 0x23a8 },
13354ba9607SSascha Wildner { "braceleftmid", "{", 0x23a8 },
13454ba9607SSascha Wildner { "lb", "`-", 0x23a9 },
13554ba9607SSascha Wildner { "braceleftbt", "`-", 0x23a9 },
13654ba9607SSascha Wildner { "braceleftex", "|", 0x23aa },
13754ba9607SSascha Wildner { "rt", "-.", 0x23ab },
13854ba9607SSascha Wildner { "bracerighttp", "-.", 0x23ab },
13954ba9607SSascha Wildner { "rk", "}", 0x23ac },
14054ba9607SSascha Wildner { "bracerightmid", "}", 0x23ac },
14154ba9607SSascha Wildner { "rb", "-\'", 0x23ad },
14254ba9607SSascha Wildner { "bracerightbt", "-\'", 0x23ad },
14354ba9607SSascha Wildner { "bracerightex", "|", 0x23aa },
14454ba9607SSascha Wildner { "parenlefttp", "/", 0x239b },
14554ba9607SSascha Wildner { "parenleftbt", "\\", 0x239d },
14654ba9607SSascha Wildner { "parenleftex", "|", 0x239c },
14754ba9607SSascha Wildner { "parenrighttp", "\\", 0x239e },
14854ba9607SSascha Wildner { "parenrightbt", "/", 0x23a0 },
14954ba9607SSascha Wildner { "parenrightex", "|", 0x239f },
15054ba9607SSascha Wildner
15154ba9607SSascha Wildner /* Arrows and lines. */
15254ba9607SSascha Wildner { "<-", "<-", 0x2190 },
15354ba9607SSascha Wildner { "->", "->", 0x2192 },
15454ba9607SSascha Wildner { "<>", "<->", 0x2194 },
15554ba9607SSascha Wildner { "da", "|\bv", 0x2193 },
15654ba9607SSascha Wildner { "ua", "|\b^", 0x2191 },
15754ba9607SSascha Wildner { "va", "^v", 0x2195 },
15854ba9607SSascha Wildner { "lA", "<=", 0x21d0 },
15954ba9607SSascha Wildner { "rA", "=>", 0x21d2 },
16054ba9607SSascha Wildner { "hA", "<=>", 0x21d4 },
16154ba9607SSascha Wildner { "uA", "=\b^", 0x21d1 },
16254ba9607SSascha Wildner { "dA", "=\bv", 0x21d3 },
16354ba9607SSascha Wildner { "vA", "^=v", 0x21d5 },
16454ba9607SSascha Wildner { "an", "-", 0x23af },
16554ba9607SSascha Wildner
16654ba9607SSascha Wildner /* Logic. */
16754ba9607SSascha Wildner { "AN", "^", 0x2227 },
16854ba9607SSascha Wildner { "OR", "v", 0x2228 },
16954ba9607SSascha Wildner { "no", "~", 0x00ac },
17054ba9607SSascha Wildner { "tno", "~", 0x00ac },
17154ba9607SSascha Wildner { "te", "<there\037exists>", 0x2203 },
17254ba9607SSascha Wildner { "fa", "<for\037all>", 0x2200 },
17354ba9607SSascha Wildner { "st", "<such\037that>", 0x220b },
17454ba9607SSascha Wildner { "tf", "<therefore>", 0x2234 },
17554ba9607SSascha Wildner { "3d", "<therefore>", 0x2234 },
17654ba9607SSascha Wildner { "or", "|", 0x007c },
17754ba9607SSascha Wildner
17854ba9607SSascha Wildner /* Mathematicals. */
17954ba9607SSascha Wildner { "pl", "+", 0x002b },
18054ba9607SSascha Wildner { "mi", "-", 0x2212 },
18154ba9607SSascha Wildner { "-", "-", 0x002d },
18254ba9607SSascha Wildner { "-+", "-+", 0x2213 },
18354ba9607SSascha Wildner { "+-", "+-", 0x00b1 },
18454ba9607SSascha Wildner { "t+-", "+-", 0x00b1 },
18554ba9607SSascha Wildner { "pc", ".", 0x00b7 },
18654ba9607SSascha Wildner { "md", ".", 0x22c5 },
18754ba9607SSascha Wildner { "mu", "x", 0x00d7 },
18854ba9607SSascha Wildner { "tmu", "x", 0x00d7 },
18954ba9607SSascha Wildner { "c*", "O\bx", 0x2297 },
19054ba9607SSascha Wildner { "c+", "O\b+", 0x2295 },
19154ba9607SSascha Wildner { "di", "/", 0x00f7 },
19254ba9607SSascha Wildner { "tdi", "/", 0x00f7 },
19354ba9607SSascha Wildner { "f/", "/", 0x2044 },
19454ba9607SSascha Wildner { "**", "*", 0x2217 },
19554ba9607SSascha Wildner { "<=", "<=", 0x2264 },
19654ba9607SSascha Wildner { ">=", ">=", 0x2265 },
19754ba9607SSascha Wildner { "<<", "<<", 0x226a },
19854ba9607SSascha Wildner { ">>", ">>", 0x226b },
19954ba9607SSascha Wildner { "eq", "=", 0x003d },
20054ba9607SSascha Wildner { "!=", "!=", 0x2260 },
20154ba9607SSascha Wildner { "==", "==", 0x2261 },
20254ba9607SSascha Wildner { "ne", "!==", 0x2262 },
20354ba9607SSascha Wildner { "ap", "~", 0x223c },
20454ba9607SSascha Wildner { "|=", "-~", 0x2243 },
20554ba9607SSascha Wildner { "=~", "=~", 0x2245 },
20654ba9607SSascha Wildner { "~~", "~~", 0x2248 },
20754ba9607SSascha Wildner { "~=", "~=", 0x2248 },
20854ba9607SSascha Wildner { "pt", "<proportional\037to>", 0x221d },
20954ba9607SSascha Wildner { "es", "{}", 0x2205 },
21054ba9607SSascha Wildner { "mo", "<element\037of>", 0x2208 },
21154ba9607SSascha Wildner { "nm", "<not\037element\037of>", 0x2209 },
21254ba9607SSascha Wildner { "sb", "<proper\037subset>", 0x2282 },
21354ba9607SSascha Wildner { "nb", "<not\037subset>", 0x2284 },
21454ba9607SSascha Wildner { "sp", "<proper\037superset>", 0x2283 },
21554ba9607SSascha Wildner { "nc", "<not\037superset>", 0x2285 },
21654ba9607SSascha Wildner { "ib", "<subset\037or\037equal>", 0x2286 },
21754ba9607SSascha Wildner { "ip", "<superset\037or\037equal>", 0x2287 },
21854ba9607SSascha Wildner { "ca", "<intersection>", 0x2229 },
21954ba9607SSascha Wildner { "cu", "<union>", 0x222a },
22054ba9607SSascha Wildner { "/_", "<angle>", 0x2220 },
22154ba9607SSascha Wildner { "pp", "<perpendicular>", 0x22a5 },
22254ba9607SSascha Wildner { "is", "<integral>", 0x222b },
22354ba9607SSascha Wildner { "integral", "<integral>", 0x222b },
22454ba9607SSascha Wildner { "sum", "<sum>", 0x2211 },
22554ba9607SSascha Wildner { "product", "<product>", 0x220f },
22654ba9607SSascha Wildner { "coproduct", "<coproduct>", 0x2210 },
22754ba9607SSascha Wildner { "gr", "<nabla>", 0x2207 },
22854ba9607SSascha Wildner { "sr", "<sqrt>", 0x221a },
22954ba9607SSascha Wildner { "sqrt", "<sqrt>", 0x221a },
23054ba9607SSascha Wildner { "lc", "|~", 0x2308 },
23154ba9607SSascha Wildner { "rc", "~|", 0x2309 },
23254ba9607SSascha Wildner { "lf", "|_", 0x230a },
23354ba9607SSascha Wildner { "rf", "_|", 0x230b },
23454ba9607SSascha Wildner { "if", "<infinity>", 0x221e },
23554ba9607SSascha Wildner { "Ah", "<Aleph>", 0x2135 },
23654ba9607SSascha Wildner { "Im", "<Im>", 0x2111 },
23754ba9607SSascha Wildner { "Re", "<Re>", 0x211c },
23854ba9607SSascha Wildner { "wp", "p", 0x2118 },
23954ba9607SSascha Wildner { "pd", "<del>", 0x2202 },
24054ba9607SSascha Wildner { "-h", "/h", 0x210f },
24154ba9607SSascha Wildner { "hbar", "/h", 0x210f },
24254ba9607SSascha Wildner { "12", "1/2", 0x00bd },
24354ba9607SSascha Wildner { "14", "1/4", 0x00bc },
24454ba9607SSascha Wildner { "34", "3/4", 0x00be },
24554ba9607SSascha Wildner { "18", "1/8", 0x215B },
24654ba9607SSascha Wildner { "38", "3/8", 0x215C },
24754ba9607SSascha Wildner { "58", "5/8", 0x215D },
24854ba9607SSascha Wildner { "78", "7/8", 0x215E },
24954ba9607SSascha Wildner { "S1", "^1", 0x00B9 },
25054ba9607SSascha Wildner { "S2", "^2", 0x00B2 },
25154ba9607SSascha Wildner { "S3", "^3", 0x00B3 },
25254ba9607SSascha Wildner
25354ba9607SSascha Wildner /* Ligatures. */
25454ba9607SSascha Wildner { "ff", "ff", 0xfb00 },
25554ba9607SSascha Wildner { "fi", "fi", 0xfb01 },
25654ba9607SSascha Wildner { "fl", "fl", 0xfb02 },
25754ba9607SSascha Wildner { "Fi", "ffi", 0xfb03 },
25854ba9607SSascha Wildner { "Fl", "ffl", 0xfb04 },
25954ba9607SSascha Wildner { "AE", "AE", 0x00c6 },
26054ba9607SSascha Wildner { "ae", "ae", 0x00e6 },
26154ba9607SSascha Wildner { "OE", "OE", 0x0152 },
26254ba9607SSascha Wildner { "oe", "oe", 0x0153 },
26354ba9607SSascha Wildner { "ss", "ss", 0x00df },
26454ba9607SSascha Wildner { "IJ", "IJ", 0x0132 },
26554ba9607SSascha Wildner { "ij", "ij", 0x0133 },
26654ba9607SSascha Wildner
26754ba9607SSascha Wildner /* Accents. */
26854ba9607SSascha Wildner { "a\"", "\"", 0x02dd },
26954ba9607SSascha Wildner { "a-", "-", 0x00af },
27054ba9607SSascha Wildner { "a.", ".", 0x02d9 },
27154ba9607SSascha Wildner { "a^", "^", 0x005e },
27254ba9607SSascha Wildner { "aa", "\'", 0x00b4 },
27354ba9607SSascha Wildner { "\'", "\'", 0x00b4 },
27454ba9607SSascha Wildner { "ga", "`", 0x0060 },
27554ba9607SSascha Wildner { "`", "`", 0x0060 },
27654ba9607SSascha Wildner { "ab", "'\b`", 0x02d8 },
27754ba9607SSascha Wildner { "ac", ",", 0x00b8 },
27854ba9607SSascha Wildner { "ad", "\"", 0x00a8 },
27954ba9607SSascha Wildner { "ah", "v", 0x02c7 },
28054ba9607SSascha Wildner { "ao", "o", 0x02da },
28154ba9607SSascha Wildner { "a~", "~", 0x007e },
28254ba9607SSascha Wildner { "ho", ",", 0x02db },
28354ba9607SSascha Wildner { "ha", "^", 0x005e },
28454ba9607SSascha Wildner { "ti", "~", 0x007e },
28554ba9607SSascha Wildner { "u02DC", "~", 0x02dc },
28654ba9607SSascha Wildner
28754ba9607SSascha Wildner /* Accented letters. */
28854ba9607SSascha Wildner { "'A", "'\bA", 0x00c1 },
28954ba9607SSascha Wildner { "'E", "'\bE", 0x00c9 },
29054ba9607SSascha Wildner { "'I", "'\bI", 0x00cd },
29154ba9607SSascha Wildner { "'O", "'\bO", 0x00d3 },
29254ba9607SSascha Wildner { "'U", "'\bU", 0x00da },
29354ba9607SSascha Wildner { "'Y", "'\bY", 0x00dd },
29454ba9607SSascha Wildner { "'a", "'\ba", 0x00e1 },
29554ba9607SSascha Wildner { "'e", "'\be", 0x00e9 },
29654ba9607SSascha Wildner { "'i", "'\bi", 0x00ed },
29754ba9607SSascha Wildner { "'o", "'\bo", 0x00f3 },
29854ba9607SSascha Wildner { "'u", "'\bu", 0x00fa },
29954ba9607SSascha Wildner { "'y", "'\by", 0x00fd },
30054ba9607SSascha Wildner { "`A", "`\bA", 0x00c0 },
30154ba9607SSascha Wildner { "`E", "`\bE", 0x00c8 },
30254ba9607SSascha Wildner { "`I", "`\bI", 0x00cc },
30354ba9607SSascha Wildner { "`O", "`\bO", 0x00d2 },
30454ba9607SSascha Wildner { "`U", "`\bU", 0x00d9 },
30554ba9607SSascha Wildner { "`a", "`\ba", 0x00e0 },
30654ba9607SSascha Wildner { "`e", "`\be", 0x00e8 },
30754ba9607SSascha Wildner { "`i", "`\bi", 0x00ec },
30854ba9607SSascha Wildner { "`o", "`\bo", 0x00f2 },
30954ba9607SSascha Wildner { "`u", "`\bu", 0x00f9 },
31054ba9607SSascha Wildner { "~A", "~\bA", 0x00c3 },
31154ba9607SSascha Wildner { "~N", "~\bN", 0x00d1 },
31254ba9607SSascha Wildner { "~O", "~\bO", 0x00d5 },
31354ba9607SSascha Wildner { "~a", "~\ba", 0x00e3 },
31454ba9607SSascha Wildner { "~n", "~\bn", 0x00f1 },
31554ba9607SSascha Wildner { "~o", "~\bo", 0x00f5 },
31654ba9607SSascha Wildner { ":A", "\"\bA", 0x00c4 },
31754ba9607SSascha Wildner { ":E", "\"\bE", 0x00cb },
31854ba9607SSascha Wildner { ":I", "\"\bI", 0x00cf },
31954ba9607SSascha Wildner { ":O", "\"\bO", 0x00d6 },
32054ba9607SSascha Wildner { ":U", "\"\bU", 0x00dc },
32154ba9607SSascha Wildner { ":a", "\"\ba", 0x00e4 },
32254ba9607SSascha Wildner { ":e", "\"\be", 0x00eb },
32354ba9607SSascha Wildner { ":i", "\"\bi", 0x00ef },
32454ba9607SSascha Wildner { ":o", "\"\bo", 0x00f6 },
32554ba9607SSascha Wildner { ":u", "\"\bu", 0x00fc },
32654ba9607SSascha Wildner { ":y", "\"\by", 0x00ff },
32754ba9607SSascha Wildner { "^A", "^\bA", 0x00c2 },
32854ba9607SSascha Wildner { "^E", "^\bE", 0x00ca },
32954ba9607SSascha Wildner { "^I", "^\bI", 0x00ce },
33054ba9607SSascha Wildner { "^O", "^\bO", 0x00d4 },
33154ba9607SSascha Wildner { "^U", "^\bU", 0x00db },
33254ba9607SSascha Wildner { "^a", "^\ba", 0x00e2 },
33354ba9607SSascha Wildner { "^e", "^\be", 0x00ea },
33454ba9607SSascha Wildner { "^i", "^\bi", 0x00ee },
33554ba9607SSascha Wildner { "^o", "^\bo", 0x00f4 },
33654ba9607SSascha Wildner { "^u", "^\bu", 0x00fb },
33754ba9607SSascha Wildner { ",C", ",\bC", 0x00c7 },
33854ba9607SSascha Wildner { ",c", ",\bc", 0x00e7 },
33954ba9607SSascha Wildner { "/L", "/\bL", 0x0141 },
34054ba9607SSascha Wildner { "/l", "/\bl", 0x0142 },
34154ba9607SSascha Wildner { "/O", "/\bO", 0x00d8 },
34254ba9607SSascha Wildner { "/o", "/\bo", 0x00f8 },
34354ba9607SSascha Wildner { "oA", "o\bA", 0x00c5 },
34454ba9607SSascha Wildner { "oa", "o\ba", 0x00e5 },
34554ba9607SSascha Wildner
34654ba9607SSascha Wildner /* Special letters. */
34754ba9607SSascha Wildner { "-D", "Dh", 0x00d0 },
34854ba9607SSascha Wildner { "Sd", "dh", 0x00f0 },
34954ba9607SSascha Wildner { "TP", "Th", 0x00de },
35054ba9607SSascha Wildner { "Tp", "th", 0x00fe },
35154ba9607SSascha Wildner { ".i", "i", 0x0131 },
35254ba9607SSascha Wildner { ".j", "j", 0x0237 },
35354ba9607SSascha Wildner
35454ba9607SSascha Wildner /* Currency. */
35554ba9607SSascha Wildner { "Do", "$", 0x0024 },
35654ba9607SSascha Wildner { "ct", "/\bc", 0x00a2 },
35754ba9607SSascha Wildner { "Eu", "EUR", 0x20ac },
35854ba9607SSascha Wildner { "eu", "EUR", 0x20ac },
35954ba9607SSascha Wildner { "Ye", "=\bY", 0x00a5 },
36054ba9607SSascha Wildner { "Po", "-\bL", 0x00a3 },
36154ba9607SSascha Wildner { "Cs", "o\bx", 0x00a4 },
36254ba9607SSascha Wildner { "Fn", ",\bf", 0x0192 },
36354ba9607SSascha Wildner
36454ba9607SSascha Wildner /* Units. */
36554ba9607SSascha Wildner { "de", "<degree>", 0x00b0 },
36654ba9607SSascha Wildner { "%0", "<permille>", 0x2030 },
36754ba9607SSascha Wildner { "fm", "\'", 0x2032 },
36854ba9607SSascha Wildner { "sd", "''", 0x2033 },
36954ba9607SSascha Wildner { "mc", "<micro>", 0x00b5 },
37054ba9607SSascha Wildner { "Of", "_\ba", 0x00aa },
37154ba9607SSascha Wildner { "Om", "_\bo", 0x00ba },
37254ba9607SSascha Wildner
37354ba9607SSascha Wildner /* Greek characters. */
37454ba9607SSascha Wildner { "*A", "A", 0x0391 },
37554ba9607SSascha Wildner { "*B", "B", 0x0392 },
37654ba9607SSascha Wildner { "*G", "<Gamma>", 0x0393 },
37754ba9607SSascha Wildner { "*D", "<Delta>", 0x0394 },
37854ba9607SSascha Wildner { "*E", "E", 0x0395 },
37954ba9607SSascha Wildner { "*Z", "Z", 0x0396 },
38054ba9607SSascha Wildner { "*Y", "H", 0x0397 },
38154ba9607SSascha Wildner { "*H", "<Theta>", 0x0398 },
38254ba9607SSascha Wildner { "*I", "I", 0x0399 },
38354ba9607SSascha Wildner { "*K", "K", 0x039a },
38454ba9607SSascha Wildner { "*L", "<Lambda>", 0x039b },
38554ba9607SSascha Wildner { "*M", "M", 0x039c },
38654ba9607SSascha Wildner { "*N", "N", 0x039d },
38754ba9607SSascha Wildner { "*C", "<Xi>", 0x039e },
38854ba9607SSascha Wildner { "*O", "O", 0x039f },
38954ba9607SSascha Wildner { "*P", "<Pi>", 0x03a0 },
39054ba9607SSascha Wildner { "*R", "P", 0x03a1 },
39154ba9607SSascha Wildner { "*S", "<Sigma>", 0x03a3 },
39254ba9607SSascha Wildner { "*T", "T", 0x03a4 },
39354ba9607SSascha Wildner { "*U", "Y", 0x03a5 },
39454ba9607SSascha Wildner { "*F", "<Phi>", 0x03a6 },
39554ba9607SSascha Wildner { "*X", "X", 0x03a7 },
39654ba9607SSascha Wildner { "*Q", "<Psi>", 0x03a8 },
39754ba9607SSascha Wildner { "*W", "<Omega>", 0x03a9 },
39854ba9607SSascha Wildner { "*a", "<alpha>", 0x03b1 },
39954ba9607SSascha Wildner { "*b", "<beta>", 0x03b2 },
40054ba9607SSascha Wildner { "*g", "<gamma>", 0x03b3 },
40154ba9607SSascha Wildner { "*d", "<delta>", 0x03b4 },
40254ba9607SSascha Wildner { "*e", "<epsilon>", 0x03b5 },
40354ba9607SSascha Wildner { "*z", "<zeta>", 0x03b6 },
40454ba9607SSascha Wildner { "*y", "<eta>", 0x03b7 },
40554ba9607SSascha Wildner { "*h", "<theta>", 0x03b8 },
40654ba9607SSascha Wildner { "*i", "<iota>", 0x03b9 },
40754ba9607SSascha Wildner { "*k", "<kappa>", 0x03ba },
40854ba9607SSascha Wildner { "*l", "<lambda>", 0x03bb },
40954ba9607SSascha Wildner { "*m", "<mu>", 0x03bc },
41054ba9607SSascha Wildner { "*n", "<nu>", 0x03bd },
41154ba9607SSascha Wildner { "*c", "<xi>", 0x03be },
41254ba9607SSascha Wildner { "*o", "o", 0x03bf },
41354ba9607SSascha Wildner { "*p", "<pi>", 0x03c0 },
41454ba9607SSascha Wildner { "*r", "<rho>", 0x03c1 },
41554ba9607SSascha Wildner { "*s", "<sigma>", 0x03c3 },
41654ba9607SSascha Wildner { "*t", "<tau>", 0x03c4 },
41754ba9607SSascha Wildner { "*u", "<upsilon>", 0x03c5 },
41854ba9607SSascha Wildner { "*f", "<phi>", 0x03d5 },
41954ba9607SSascha Wildner { "*x", "<chi>", 0x03c7 },
42054ba9607SSascha Wildner { "*q", "<psi>", 0x03c8 },
42154ba9607SSascha Wildner { "*w", "<omega>", 0x03c9 },
42254ba9607SSascha Wildner { "+h", "<theta>", 0x03d1 },
42354ba9607SSascha Wildner { "+f", "<phi>", 0x03c6 },
42454ba9607SSascha Wildner { "+p", "<pi>", 0x03d6 },
42554ba9607SSascha Wildner { "+e", "<epsilon>", 0x03f5 },
42654ba9607SSascha Wildner { "ts", "<sigma>", 0x03c2 },
42780387638SSascha Wildner };
42880387638SSascha Wildner
42954ba9607SSascha Wildner static struct ohash mchars;
43080387638SSascha Wildner
431070c62a6SFranco Fichtner
43280387638SSascha Wildner void
mchars_free(void)43354ba9607SSascha Wildner mchars_free(void)
43480387638SSascha Wildner {
43580387638SSascha Wildner
43654ba9607SSascha Wildner ohash_delete(&mchars);
43780387638SSascha Wildner }
43880387638SSascha Wildner
43954ba9607SSascha Wildner void
mchars_alloc(void)440a4c7eb57SSascha Wildner mchars_alloc(void)
44180387638SSascha Wildner {
44254ba9607SSascha Wildner size_t i;
44354ba9607SSascha Wildner unsigned int slot;
44480387638SSascha Wildner
44554ba9607SSascha Wildner mandoc_ohash_init(&mchars, 9, offsetof(struct ln, roffcode));
44654ba9607SSascha Wildner for (i = 0; i < sizeof(lines)/sizeof(lines[0]); i++) {
44754ba9607SSascha Wildner slot = ohash_qlookup(&mchars, lines[i].roffcode);
44854ba9607SSascha Wildner assert(ohash_find(&mchars, slot) == NULL);
44954ba9607SSascha Wildner ohash_insert(&mchars, slot, lines + i);
45080387638SSascha Wildner }
45180387638SSascha Wildner }
45280387638SSascha Wildner
45380387638SSascha Wildner int
mchars_spec2cp(const char * p,size_t sz)45454ba9607SSascha Wildner mchars_spec2cp(const char *p, size_t sz)
45580387638SSascha Wildner {
45680387638SSascha Wildner const struct ln *ln;
45754ba9607SSascha Wildner const char *end;
45880387638SSascha Wildner
45954ba9607SSascha Wildner end = p + sz;
46054ba9607SSascha Wildner ln = ohash_find(&mchars, ohash_qlookupi(&mchars, p, &end));
46154ba9607SSascha Wildner return ln != NULL ? ln->unicode : -1;
46280387638SSascha Wildner }
46380387638SSascha Wildner
46454ba9607SSascha Wildner int
mchars_num2char(const char * p,size_t sz)465a4c7eb57SSascha Wildner mchars_num2char(const char *p, size_t sz)
46660e1e752SSascha Wildner {
46760e1e752SSascha Wildner int i;
46860e1e752SSascha Wildner
46954ba9607SSascha Wildner i = mandoc_strntoi(p, sz, 10);
47054ba9607SSascha Wildner return i >= 0 && i < 256 ? i : -1;
47160e1e752SSascha Wildner }
47260e1e752SSascha Wildner
473a4c7eb57SSascha Wildner int
mchars_num2uc(const char * p,size_t sz)474a4c7eb57SSascha Wildner mchars_num2uc(const char *p, size_t sz)
475a4c7eb57SSascha Wildner {
476a4c7eb57SSascha Wildner int i;
477a4c7eb57SSascha Wildner
47854ba9607SSascha Wildner i = mandoc_strntoi(p, sz, 16);
47954ba9607SSascha Wildner assert(i >= 0 && i <= 0x10FFFF);
48054ba9607SSascha Wildner return i;
481a4c7eb57SSascha Wildner }
48260e1e752SSascha Wildner
48380387638SSascha Wildner const char *
mchars_spec2str(const char * p,size_t sz,size_t * rsz)48454ba9607SSascha Wildner mchars_spec2str(const char *p, size_t sz, size_t *rsz)
48580387638SSascha Wildner {
48680387638SSascha Wildner const struct ln *ln;
48754ba9607SSascha Wildner const char *end;
48880387638SSascha Wildner
48954ba9607SSascha Wildner end = p + sz;
49054ba9607SSascha Wildner ln = ohash_find(&mchars, ohash_qlookupi(&mchars, p, &end));
49154ba9607SSascha Wildner if (ln == NULL)
49254ba9607SSascha Wildner return NULL;
49380387638SSascha Wildner
49480387638SSascha Wildner *rsz = strlen(ln->ascii);
49554ba9607SSascha Wildner return ln->ascii;
49680387638SSascha Wildner }
49780387638SSascha Wildner
49854ba9607SSascha Wildner const char *
mchars_uc2str(int uc)49954ba9607SSascha Wildner mchars_uc2str(int uc)
50080387638SSascha Wildner {
50154ba9607SSascha Wildner size_t i;
50280387638SSascha Wildner
50354ba9607SSascha Wildner for (i = 0; i < sizeof(lines)/sizeof(lines[0]); i++)
50454ba9607SSascha Wildner if (uc == lines[i].unicode)
50554ba9607SSascha Wildner return lines[i].ascii;
50654ba9607SSascha Wildner return "<?>";
50780387638SSascha Wildner }
508