1*c1c95addSBrooks Davis /* $Id: roff_escape.c,v 1.15 2024/05/16 21:23:00 schwarze Exp $ */ 2*c1c95addSBrooks Davis /* 3*c1c95addSBrooks Davis * Copyright (c) 2011, 2012, 2013, 2014, 2015, 2017, 2018, 2020, 2022 4*c1c95addSBrooks Davis * Ingo Schwarze <schwarze@openbsd.org> 5*c1c95addSBrooks Davis * Copyright (c) 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv> 6*c1c95addSBrooks Davis * 7*c1c95addSBrooks Davis * Permission to use, copy, modify, and distribute this software for any 8*c1c95addSBrooks Davis * purpose with or without fee is hereby granted, provided that the above 9*c1c95addSBrooks Davis * copyright notice and this permission notice appear in all copies. 10*c1c95addSBrooks Davis * 11*c1c95addSBrooks Davis * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES 12*c1c95addSBrooks Davis * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 13*c1c95addSBrooks Davis * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR 14*c1c95addSBrooks Davis * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 15*c1c95addSBrooks Davis * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 16*c1c95addSBrooks Davis * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 17*c1c95addSBrooks Davis * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 18*c1c95addSBrooks Davis * 19*c1c95addSBrooks Davis * Parser for roff(7) escape sequences. 20*c1c95addSBrooks Davis * To be used by all mandoc(1) parsers and formatters. 21*c1c95addSBrooks Davis */ 22*c1c95addSBrooks Davis #include <assert.h> 23*c1c95addSBrooks Davis #include <ctype.h> 24*c1c95addSBrooks Davis #include <limits.h> 25*c1c95addSBrooks Davis #include <stdio.h> 26*c1c95addSBrooks Davis #include <string.h> 27*c1c95addSBrooks Davis 28*c1c95addSBrooks Davis #include "mandoc.h" 29*c1c95addSBrooks Davis #include "roff.h" 30*c1c95addSBrooks Davis #include "roff_int.h" 31*c1c95addSBrooks Davis 32*c1c95addSBrooks Davis /* 33*c1c95addSBrooks Davis * Traditional escape sequence interpreter for general use 34*c1c95addSBrooks Davis * including in high-level formatters. This function does not issue 35*c1c95addSBrooks Davis * diagnostics and is not usable for expansion in the roff(7) parser. 36*c1c95addSBrooks Davis * It is documented in the mandoc_escape(3) manual page. 37*c1c95addSBrooks Davis */ 38*c1c95addSBrooks Davis enum mandoc_esc 39*c1c95addSBrooks Davis mandoc_escape(const char **rendarg, const char **rarg, int *rargl) 40*c1c95addSBrooks Davis { 41*c1c95addSBrooks Davis int iarg, iendarg, iend; 42*c1c95addSBrooks Davis enum mandoc_esc rval; 43*c1c95addSBrooks Davis 44*c1c95addSBrooks Davis rval = roff_escape(--*rendarg, 0, 0, 45*c1c95addSBrooks Davis NULL, NULL, &iarg, &iendarg, &iend); 46*c1c95addSBrooks Davis assert(rval != ESCAPE_EXPAND); 47*c1c95addSBrooks Davis if (rarg != NULL) 48*c1c95addSBrooks Davis *rarg = *rendarg + iarg; 49*c1c95addSBrooks Davis if (rargl != NULL) 50*c1c95addSBrooks Davis *rargl = iendarg - iarg; 51*c1c95addSBrooks Davis *rendarg += iend; 52*c1c95addSBrooks Davis return rval; 53*c1c95addSBrooks Davis } 54*c1c95addSBrooks Davis 55*c1c95addSBrooks Davis /* 56*c1c95addSBrooks Davis * Full-featured escape sequence parser. 57*c1c95addSBrooks Davis * If it encounters a nested escape sequence that requires expansion 58*c1c95addSBrooks Davis * by the parser and re-parsing, the positions of that inner escape 59*c1c95addSBrooks Davis * sequence are returned in *resc ... *rend. 60*c1c95addSBrooks Davis * Otherwise, *resc is set to aesc and the positions of the escape 61*c1c95addSBrooks Davis * sequence starting at aesc are returned. 62*c1c95addSBrooks Davis * Diagnostic messages are generated if and only if ln != 0, 63*c1c95addSBrooks Davis * that is, if and only if called by roff_expand(). 64*c1c95addSBrooks Davis */ 65*c1c95addSBrooks Davis enum mandoc_esc 66*c1c95addSBrooks Davis roff_escape(const char *buf, const int ln, const int aesc, 67*c1c95addSBrooks Davis int *resc, int *rnam, int *rarg, int *rendarg, int *rend) 68*c1c95addSBrooks Davis { 69*c1c95addSBrooks Davis int iesc; /* index of leading escape char */ 70*c1c95addSBrooks Davis int inam; /* index of escape name */ 71*c1c95addSBrooks Davis int iarg; /* index beginning the argument */ 72*c1c95addSBrooks Davis int iendarg; /* index right after the argument */ 73*c1c95addSBrooks Davis int iend; /* index right after the sequence */ 74*c1c95addSBrooks Davis int sesc, snam, sarg, sendarg, send; /* for sub-escape */ 75*c1c95addSBrooks Davis int escterm; /* whether term is escaped */ 76*c1c95addSBrooks Davis int maxl; /* expected length of the argument */ 77*c1c95addSBrooks Davis int argl; /* actual length of the argument */ 78*c1c95addSBrooks Davis int c, i; /* for \[char...] parsing */ 79*c1c95addSBrooks Davis int valid_A; /* for \A parsing */ 80*c1c95addSBrooks Davis enum mandoc_esc rval; /* return value */ 81*c1c95addSBrooks Davis enum mandoc_esc stype; /* for sub-escape */ 82*c1c95addSBrooks Davis enum mandocerr err; /* diagnostic code */ 83*c1c95addSBrooks Davis char term; /* byte terminating the argument */ 84*c1c95addSBrooks Davis 85*c1c95addSBrooks Davis /* 86*c1c95addSBrooks Davis * Treat "\E" just like "\"; 87*c1c95addSBrooks Davis * it only makes a difference in copy mode. 88*c1c95addSBrooks Davis */ 89*c1c95addSBrooks Davis 90*c1c95addSBrooks Davis iesc = inam = aesc; 91*c1c95addSBrooks Davis do { 92*c1c95addSBrooks Davis inam++; 93*c1c95addSBrooks Davis } while (buf[inam] == 'E'); 94*c1c95addSBrooks Davis 95*c1c95addSBrooks Davis /* 96*c1c95addSBrooks Davis * Sort the following cases first by syntax category, 97*c1c95addSBrooks Davis * then by escape sequence type, and finally by ASCII code. 98*c1c95addSBrooks Davis */ 99*c1c95addSBrooks Davis 100*c1c95addSBrooks Davis iarg = iendarg = iend = inam + 1; 101*c1c95addSBrooks Davis maxl = INT_MAX; 102*c1c95addSBrooks Davis term = '\0'; 103*c1c95addSBrooks Davis err = MANDOCERR_OK; 104*c1c95addSBrooks Davis switch (buf[inam]) { 105*c1c95addSBrooks Davis 106*c1c95addSBrooks Davis /* Escape sequences taking no arguments at all. */ 107*c1c95addSBrooks Davis 108*c1c95addSBrooks Davis case '!': 109*c1c95addSBrooks Davis case '?': 110*c1c95addSBrooks Davis case 'r': 111*c1c95addSBrooks Davis rval = ESCAPE_UNSUPP; 112*c1c95addSBrooks Davis goto out; 113*c1c95addSBrooks Davis 114*c1c95addSBrooks Davis case '%': 115*c1c95addSBrooks Davis case '&': 116*c1c95addSBrooks Davis case ')': 117*c1c95addSBrooks Davis case ',': 118*c1c95addSBrooks Davis case '/': 119*c1c95addSBrooks Davis case '^': 120*c1c95addSBrooks Davis case 'a': 121*c1c95addSBrooks Davis case 'd': 122*c1c95addSBrooks Davis case 't': 123*c1c95addSBrooks Davis case 'u': 124*c1c95addSBrooks Davis case '{': 125*c1c95addSBrooks Davis case '|': 126*c1c95addSBrooks Davis case '}': 127*c1c95addSBrooks Davis rval = ESCAPE_IGNORE; 128*c1c95addSBrooks Davis goto out; 129*c1c95addSBrooks Davis 130*c1c95addSBrooks Davis case '\0': 131*c1c95addSBrooks Davis iendarg = --iend; 132*c1c95addSBrooks Davis /* FALLTHROUGH */ 133*c1c95addSBrooks Davis case '.': 134*c1c95addSBrooks Davis case '\\': 135*c1c95addSBrooks Davis default: 136*c1c95addSBrooks Davis iarg--; 137*c1c95addSBrooks Davis rval = ESCAPE_UNDEF; 138*c1c95addSBrooks Davis goto out; 139*c1c95addSBrooks Davis 140*c1c95addSBrooks Davis case ' ': 141*c1c95addSBrooks Davis case '\'': 142*c1c95addSBrooks Davis case '-': 143*c1c95addSBrooks Davis case '0': 144*c1c95addSBrooks Davis case ':': 145*c1c95addSBrooks Davis case '_': 146*c1c95addSBrooks Davis case '`': 147*c1c95addSBrooks Davis case 'e': 148*c1c95addSBrooks Davis case '~': 149*c1c95addSBrooks Davis iarg--; 150*c1c95addSBrooks Davis argl = 1; 151*c1c95addSBrooks Davis rval = ESCAPE_SPECIAL; 152*c1c95addSBrooks Davis goto out; 153*c1c95addSBrooks Davis case 'p': 154*c1c95addSBrooks Davis rval = ESCAPE_BREAK; 155*c1c95addSBrooks Davis goto out; 156*c1c95addSBrooks Davis case 'c': 157*c1c95addSBrooks Davis rval = ESCAPE_NOSPACE; 158*c1c95addSBrooks Davis goto out; 159*c1c95addSBrooks Davis case 'z': 160*c1c95addSBrooks Davis rval = ESCAPE_SKIPCHAR; 161*c1c95addSBrooks Davis goto out; 162*c1c95addSBrooks Davis 163*c1c95addSBrooks Davis /* Standard argument format. */ 164*c1c95addSBrooks Davis 165*c1c95addSBrooks Davis case '$': 166*c1c95addSBrooks Davis case '*': 167*c1c95addSBrooks Davis case 'V': 168*c1c95addSBrooks Davis case 'g': 169*c1c95addSBrooks Davis case 'n': 170*c1c95addSBrooks Davis rval = ESCAPE_EXPAND; 171*c1c95addSBrooks Davis break; 172*c1c95addSBrooks Davis case 'F': 173*c1c95addSBrooks Davis case 'M': 174*c1c95addSBrooks Davis case 'O': 175*c1c95addSBrooks Davis case 'Y': 176*c1c95addSBrooks Davis case 'k': 177*c1c95addSBrooks Davis case 'm': 178*c1c95addSBrooks Davis rval = ESCAPE_IGNORE; 179*c1c95addSBrooks Davis break; 180*c1c95addSBrooks Davis case '(': 181*c1c95addSBrooks Davis case '[': 182*c1c95addSBrooks Davis rval = ESCAPE_SPECIAL; 183*c1c95addSBrooks Davis iendarg = iend = --iarg; 184*c1c95addSBrooks Davis break; 185*c1c95addSBrooks Davis case 'f': 186*c1c95addSBrooks Davis rval = ESCAPE_FONT; 187*c1c95addSBrooks Davis break; 188*c1c95addSBrooks Davis 189*c1c95addSBrooks Davis /* Quoted arguments */ 190*c1c95addSBrooks Davis 191*c1c95addSBrooks Davis case 'A': 192*c1c95addSBrooks Davis case 'B': 193*c1c95addSBrooks Davis case 'w': 194*c1c95addSBrooks Davis rval = ESCAPE_EXPAND; 195*c1c95addSBrooks Davis term = '\b'; 196*c1c95addSBrooks Davis break; 197*c1c95addSBrooks Davis case 'D': 198*c1c95addSBrooks Davis case 'H': 199*c1c95addSBrooks Davis case 'L': 200*c1c95addSBrooks Davis case 'R': 201*c1c95addSBrooks Davis case 'S': 202*c1c95addSBrooks Davis case 'X': 203*c1c95addSBrooks Davis case 'Z': 204*c1c95addSBrooks Davis case 'b': 205*c1c95addSBrooks Davis case 'v': 206*c1c95addSBrooks Davis case 'x': 207*c1c95addSBrooks Davis rval = ESCAPE_IGNORE; 208*c1c95addSBrooks Davis term = '\b'; 209*c1c95addSBrooks Davis break; 210*c1c95addSBrooks Davis case 'C': 211*c1c95addSBrooks Davis rval = ESCAPE_SPECIAL; 212*c1c95addSBrooks Davis term = '\b'; 213*c1c95addSBrooks Davis break; 214*c1c95addSBrooks Davis case 'N': 215*c1c95addSBrooks Davis rval = ESCAPE_NUMBERED; 216*c1c95addSBrooks Davis term = '\b'; 217*c1c95addSBrooks Davis break; 218*c1c95addSBrooks Davis case 'h': 219*c1c95addSBrooks Davis rval = ESCAPE_HORIZ; 220*c1c95addSBrooks Davis term = '\b'; 221*c1c95addSBrooks Davis break; 222*c1c95addSBrooks Davis case 'l': 223*c1c95addSBrooks Davis rval = ESCAPE_HLINE; 224*c1c95addSBrooks Davis term = '\b'; 225*c1c95addSBrooks Davis break; 226*c1c95addSBrooks Davis case 'o': 227*c1c95addSBrooks Davis rval = ESCAPE_OVERSTRIKE; 228*c1c95addSBrooks Davis term = '\b'; 229*c1c95addSBrooks Davis break; 230*c1c95addSBrooks Davis 231*c1c95addSBrooks Davis /* Sizes support both forms, with additional peculiarities. */ 232*c1c95addSBrooks Davis 233*c1c95addSBrooks Davis case 's': 234*c1c95addSBrooks Davis rval = ESCAPE_IGNORE; 235*c1c95addSBrooks Davis if (buf[iarg] == '+' || buf[iarg] == '-'|| 236*c1c95addSBrooks Davis buf[iarg] == ASCII_HYPH) 237*c1c95addSBrooks Davis iarg++; 238*c1c95addSBrooks Davis switch (buf[iarg]) { 239*c1c95addSBrooks Davis case '(': 240*c1c95addSBrooks Davis maxl = 2; 241*c1c95addSBrooks Davis iarg++; 242*c1c95addSBrooks Davis break; 243*c1c95addSBrooks Davis case '[': 244*c1c95addSBrooks Davis term = ']'; 245*c1c95addSBrooks Davis iarg++; 246*c1c95addSBrooks Davis break; 247*c1c95addSBrooks Davis case '\'': 248*c1c95addSBrooks Davis term = '\''; 249*c1c95addSBrooks Davis iarg++; 250*c1c95addSBrooks Davis break; 251*c1c95addSBrooks Davis case '1': 252*c1c95addSBrooks Davis case '2': 253*c1c95addSBrooks Davis case '3': 254*c1c95addSBrooks Davis if (buf[iarg - 1] == 's' && 255*c1c95addSBrooks Davis isdigit((unsigned char)buf[iarg + 1])) { 256*c1c95addSBrooks Davis maxl = 2; 257*c1c95addSBrooks Davis break; 258*c1c95addSBrooks Davis } 259*c1c95addSBrooks Davis /* FALLTHROUGH */ 260*c1c95addSBrooks Davis default: 261*c1c95addSBrooks Davis maxl = 1; 262*c1c95addSBrooks Davis break; 263*c1c95addSBrooks Davis } 264*c1c95addSBrooks Davis iendarg = iend = iarg; 265*c1c95addSBrooks Davis } 266*c1c95addSBrooks Davis 267*c1c95addSBrooks Davis /* Decide how to end the argument. */ 268*c1c95addSBrooks Davis 269*c1c95addSBrooks Davis escterm = 0; 270*c1c95addSBrooks Davis stype = ESCAPE_EXPAND; 271*c1c95addSBrooks Davis if ((term == '\b' || (term == '\0' && maxl == INT_MAX)) && 272*c1c95addSBrooks Davis buf[iarg] == buf[iesc]) { 273*c1c95addSBrooks Davis stype = roff_escape(buf, ln, iendarg, 274*c1c95addSBrooks Davis &sesc, &snam, &sarg, &sendarg, &send); 275*c1c95addSBrooks Davis if (stype == ESCAPE_EXPAND) 276*c1c95addSBrooks Davis goto out_sub; 277*c1c95addSBrooks Davis } 278*c1c95addSBrooks Davis 279*c1c95addSBrooks Davis if (term == '\b') { 280*c1c95addSBrooks Davis if (stype == ESCAPE_UNDEF) 281*c1c95addSBrooks Davis iarg++; 282*c1c95addSBrooks Davis if (stype != ESCAPE_EXPAND && stype != ESCAPE_UNDEF) { 283*c1c95addSBrooks Davis if (strchr("BHLRSNhlvx", buf[inam]) != NULL && 284*c1c95addSBrooks Davis strchr(" ,.0DLOXYZ^abdhlortuvx|~", 285*c1c95addSBrooks Davis buf[snam]) != NULL) { 286*c1c95addSBrooks Davis err = MANDOCERR_ESC_DELIM; 287*c1c95addSBrooks Davis iend = send; 288*c1c95addSBrooks Davis iarg = iendarg = sesc; 289*c1c95addSBrooks Davis goto out; 290*c1c95addSBrooks Davis } 291*c1c95addSBrooks Davis escterm = 1; 292*c1c95addSBrooks Davis iarg = send; 293*c1c95addSBrooks Davis term = buf[snam]; 294*c1c95addSBrooks Davis } else if (strchr("BDHLRSvxNhl", buf[inam]) != NULL && 295*c1c95addSBrooks Davis strchr(" %&()*+-./0123456789:<=>", buf[iarg]) != NULL) { 296*c1c95addSBrooks Davis err = MANDOCERR_ESC_DELIM; 297*c1c95addSBrooks Davis if (rval != ESCAPE_EXPAND) 298*c1c95addSBrooks Davis rval = ESCAPE_ERROR; 299*c1c95addSBrooks Davis if (buf[inam] != 'D') { 300*c1c95addSBrooks Davis iendarg = iend = iarg + 1; 301*c1c95addSBrooks Davis goto out; 302*c1c95addSBrooks Davis } 303*c1c95addSBrooks Davis } 304*c1c95addSBrooks Davis if (term == '\b') 305*c1c95addSBrooks Davis term = buf[iarg++]; 306*c1c95addSBrooks Davis } else if (term == '\0' && maxl == INT_MAX) { 307*c1c95addSBrooks Davis if (buf[inam] == 'n' && (buf[iarg] == '+' || buf[iarg] == '-')) 308*c1c95addSBrooks Davis iarg++; 309*c1c95addSBrooks Davis switch (buf[iarg]) { 310*c1c95addSBrooks Davis case '(': 311*c1c95addSBrooks Davis maxl = 2; 312*c1c95addSBrooks Davis iarg++; 313*c1c95addSBrooks Davis break; 314*c1c95addSBrooks Davis case '[': 315*c1c95addSBrooks Davis if (buf[++iarg] == ' ') { 316*c1c95addSBrooks Davis iendarg = iend = iarg + 1; 317*c1c95addSBrooks Davis err = MANDOCERR_ESC_ARG; 318*c1c95addSBrooks Davis rval = ESCAPE_ERROR; 319*c1c95addSBrooks Davis goto out; 320*c1c95addSBrooks Davis } 321*c1c95addSBrooks Davis term = ']'; 322*c1c95addSBrooks Davis break; 323*c1c95addSBrooks Davis default: 324*c1c95addSBrooks Davis maxl = 1; 325*c1c95addSBrooks Davis break; 326*c1c95addSBrooks Davis } 327*c1c95addSBrooks Davis } 328*c1c95addSBrooks Davis 329*c1c95addSBrooks Davis /* Advance to the end of the argument. */ 330*c1c95addSBrooks Davis 331*c1c95addSBrooks Davis valid_A = 1; 332*c1c95addSBrooks Davis iendarg = iarg; 333*c1c95addSBrooks Davis while (maxl > 0) { 334*c1c95addSBrooks Davis if (buf[iendarg] == '\0') { 335*c1c95addSBrooks Davis err = MANDOCERR_ESC_INCOMPLETE; 336*c1c95addSBrooks Davis if (rval != ESCAPE_EXPAND && 337*c1c95addSBrooks Davis rval != ESCAPE_OVERSTRIKE) 338*c1c95addSBrooks Davis rval = ESCAPE_ERROR; 339*c1c95addSBrooks Davis /* Usually, ignore an incomplete argument. */ 340*c1c95addSBrooks Davis if (strchr("Aow", buf[inam]) == NULL) 341*c1c95addSBrooks Davis iendarg = iarg; 342*c1c95addSBrooks Davis break; 343*c1c95addSBrooks Davis } 344*c1c95addSBrooks Davis if (escterm == 0 && buf[iendarg] == term) { 345*c1c95addSBrooks Davis iend = iendarg + 1; 346*c1c95addSBrooks Davis break; 347*c1c95addSBrooks Davis } 348*c1c95addSBrooks Davis if (buf[iendarg] == buf[iesc]) { 349*c1c95addSBrooks Davis stype = roff_escape(buf, ln, iendarg, 350*c1c95addSBrooks Davis &sesc, &snam, &sarg, &sendarg, &send); 351*c1c95addSBrooks Davis if (stype == ESCAPE_EXPAND) 352*c1c95addSBrooks Davis goto out_sub; 353*c1c95addSBrooks Davis iend = send; 354*c1c95addSBrooks Davis if (escterm == 1 && 355*c1c95addSBrooks Davis (buf[snam] == term || buf[inam] == 'N')) 356*c1c95addSBrooks Davis break; 357*c1c95addSBrooks Davis if (stype != ESCAPE_UNDEF) 358*c1c95addSBrooks Davis valid_A = 0; 359*c1c95addSBrooks Davis iendarg = send; 360*c1c95addSBrooks Davis } else if (buf[inam] == 'N' && 361*c1c95addSBrooks Davis isdigit((unsigned char)buf[iendarg]) == 0) { 362*c1c95addSBrooks Davis iend = iendarg + 1; 363*c1c95addSBrooks Davis break; 364*c1c95addSBrooks Davis } else { 365*c1c95addSBrooks Davis if (buf[iendarg] == ' ' || buf[iendarg] == '\t') 366*c1c95addSBrooks Davis valid_A = 0; 367*c1c95addSBrooks Davis if (maxl != INT_MAX) 368*c1c95addSBrooks Davis maxl--; 369*c1c95addSBrooks Davis iend = ++iendarg; 370*c1c95addSBrooks Davis } 371*c1c95addSBrooks Davis } 372*c1c95addSBrooks Davis 373*c1c95addSBrooks Davis /* Post-process depending on the content of the argument. */ 374*c1c95addSBrooks Davis 375*c1c95addSBrooks Davis argl = iendarg - iarg; 376*c1c95addSBrooks Davis switch (buf[inam]) { 377*c1c95addSBrooks Davis case '*': 378*c1c95addSBrooks Davis if (resc == NULL && argl == 2 && 379*c1c95addSBrooks Davis buf[iarg] == '.' && buf[iarg + 1] == 'T') 380*c1c95addSBrooks Davis rval = ESCAPE_DEVICE; 381*c1c95addSBrooks Davis break; 382*c1c95addSBrooks Davis case 'A': 383*c1c95addSBrooks Davis if (valid_A == 0) 384*c1c95addSBrooks Davis iendarg = iarg; 385*c1c95addSBrooks Davis break; 386*c1c95addSBrooks Davis case 'O': 387*c1c95addSBrooks Davis switch (buf[iarg]) { 388*c1c95addSBrooks Davis case '0': 389*c1c95addSBrooks Davis rval = ESCAPE_UNSUPP; 390*c1c95addSBrooks Davis break; 391*c1c95addSBrooks Davis case '1': 392*c1c95addSBrooks Davis case '2': 393*c1c95addSBrooks Davis case '3': 394*c1c95addSBrooks Davis case '4': 395*c1c95addSBrooks Davis if (argl == 1) 396*c1c95addSBrooks Davis rval = ESCAPE_IGNORE; 397*c1c95addSBrooks Davis else { 398*c1c95addSBrooks Davis err = MANDOCERR_ESC_ARG; 399*c1c95addSBrooks Davis rval = ESCAPE_ERROR; 400*c1c95addSBrooks Davis } 401*c1c95addSBrooks Davis break; 402*c1c95addSBrooks Davis case '5': 403*c1c95addSBrooks Davis if (buf[iarg - 1] == '[') 404*c1c95addSBrooks Davis rval = ESCAPE_UNSUPP; 405*c1c95addSBrooks Davis else { 406*c1c95addSBrooks Davis err = MANDOCERR_ESC_ARG; 407*c1c95addSBrooks Davis rval = ESCAPE_ERROR; 408*c1c95addSBrooks Davis } 409*c1c95addSBrooks Davis break; 410*c1c95addSBrooks Davis default: 411*c1c95addSBrooks Davis err = MANDOCERR_ESC_ARG; 412*c1c95addSBrooks Davis rval = ESCAPE_ERROR; 413*c1c95addSBrooks Davis break; 414*c1c95addSBrooks Davis } 415*c1c95addSBrooks Davis break; 416*c1c95addSBrooks Davis default: 417*c1c95addSBrooks Davis break; 418*c1c95addSBrooks Davis } 419*c1c95addSBrooks Davis 420*c1c95addSBrooks Davis switch (rval) { 421*c1c95addSBrooks Davis case ESCAPE_FONT: 422*c1c95addSBrooks Davis rval = mandoc_font(buf + iarg, argl); 423*c1c95addSBrooks Davis if (rval == ESCAPE_ERROR) 424*c1c95addSBrooks Davis err = MANDOCERR_ESC_ARG; 425*c1c95addSBrooks Davis break; 426*c1c95addSBrooks Davis 427*c1c95addSBrooks Davis case ESCAPE_SPECIAL: 428*c1c95addSBrooks Davis if (argl == 0) { 429*c1c95addSBrooks Davis err = MANDOCERR_ESC_BADCHAR; 430*c1c95addSBrooks Davis rval = ESCAPE_ERROR; 431*c1c95addSBrooks Davis break; 432*c1c95addSBrooks Davis } 433*c1c95addSBrooks Davis 434*c1c95addSBrooks Davis /* 435*c1c95addSBrooks Davis * The file chars.c only provides one common list of 436*c1c95addSBrooks Davis * character names, but \[-] == \- is the only one of 437*c1c95addSBrooks Davis * the characters with one-byte names that allows 438*c1c95addSBrooks Davis * enclosing the name in brackets. 439*c1c95addSBrooks Davis */ 440*c1c95addSBrooks Davis 441*c1c95addSBrooks Davis if (term != '\0' && argl == 1 && buf[iarg] != '-') { 442*c1c95addSBrooks Davis err = MANDOCERR_ESC_BADCHAR; 443*c1c95addSBrooks Davis rval = ESCAPE_ERROR; 444*c1c95addSBrooks Davis break; 445*c1c95addSBrooks Davis } 446*c1c95addSBrooks Davis 447*c1c95addSBrooks Davis /* Treat \[char...] as an alias for \N'...'. */ 448*c1c95addSBrooks Davis 449*c1c95addSBrooks Davis if (buf[iarg] == 'c') { 450*c1c95addSBrooks Davis if (argl < 6 || argl > 7 || 451*c1c95addSBrooks Davis strncmp(buf + iarg, "char", 4) != 0 || 452*c1c95addSBrooks Davis (int)strspn(buf + iarg + 4, "0123456789") 453*c1c95addSBrooks Davis + 4 < argl) 454*c1c95addSBrooks Davis break; 455*c1c95addSBrooks Davis c = 0; 456*c1c95addSBrooks Davis for (i = iarg; i < iendarg; i++) 457*c1c95addSBrooks Davis c = 10 * c + (buf[i] - '0'); 458*c1c95addSBrooks Davis if (c < 0x21 || (c > 0x7e && c < 0xa0) || c > 0xff) { 459*c1c95addSBrooks Davis err = MANDOCERR_ESC_BADCHAR; 460*c1c95addSBrooks Davis break; 461*c1c95addSBrooks Davis } 462*c1c95addSBrooks Davis iarg += 4; 463*c1c95addSBrooks Davis rval = ESCAPE_NUMBERED; 464*c1c95addSBrooks Davis break; 465*c1c95addSBrooks Davis } 466*c1c95addSBrooks Davis 467*c1c95addSBrooks Davis /* 468*c1c95addSBrooks Davis * Unicode escapes are defined in groff as \[u0000] 469*c1c95addSBrooks Davis * to \[u10FFFF], where the contained value must be 470*c1c95addSBrooks Davis * a valid Unicode codepoint. 471*c1c95addSBrooks Davis */ 472*c1c95addSBrooks Davis 473*c1c95addSBrooks Davis if (buf[iarg] != 'u' || argl < 5 || argl > 7) 474*c1c95addSBrooks Davis break; 475*c1c95addSBrooks Davis if (argl == 7 && /* beyond the Unicode range */ 476*c1c95addSBrooks Davis (buf[iarg + 1] != '1' || buf[iarg + 2] != '0')) { 477*c1c95addSBrooks Davis err = MANDOCERR_ESC_BADCHAR; 478*c1c95addSBrooks Davis break; 479*c1c95addSBrooks Davis } 480*c1c95addSBrooks Davis if (argl == 6 && buf[iarg + 1] == '0') { 481*c1c95addSBrooks Davis err = MANDOCERR_ESC_BADCHAR; 482*c1c95addSBrooks Davis break; 483*c1c95addSBrooks Davis } 484*c1c95addSBrooks Davis if (argl == 5 && /* UTF-16 surrogate */ 485*c1c95addSBrooks Davis toupper((unsigned char)buf[iarg + 1]) == 'D' && 486*c1c95addSBrooks Davis strchr("89ABCDEFabcdef", buf[iarg + 2]) != NULL) { 487*c1c95addSBrooks Davis err = MANDOCERR_ESC_BADCHAR; 488*c1c95addSBrooks Davis break; 489*c1c95addSBrooks Davis } 490*c1c95addSBrooks Davis if ((int)strspn(buf + iarg + 1, "0123456789ABCDEFabcdef") 491*c1c95addSBrooks Davis + 1 == argl) 492*c1c95addSBrooks Davis rval = ESCAPE_UNICODE; 493*c1c95addSBrooks Davis break; 494*c1c95addSBrooks Davis default: 495*c1c95addSBrooks Davis break; 496*c1c95addSBrooks Davis } 497*c1c95addSBrooks Davis goto out; 498*c1c95addSBrooks Davis 499*c1c95addSBrooks Davis out_sub: 500*c1c95addSBrooks Davis iesc = sesc; 501*c1c95addSBrooks Davis inam = snam; 502*c1c95addSBrooks Davis iarg = sarg; 503*c1c95addSBrooks Davis iendarg = sendarg; 504*c1c95addSBrooks Davis iend = send; 505*c1c95addSBrooks Davis rval = ESCAPE_EXPAND; 506*c1c95addSBrooks Davis 507*c1c95addSBrooks Davis out: 508*c1c95addSBrooks Davis if (resc != NULL) 509*c1c95addSBrooks Davis *resc = iesc; 510*c1c95addSBrooks Davis if (rnam != NULL) 511*c1c95addSBrooks Davis *rnam = inam; 512*c1c95addSBrooks Davis if (rarg != NULL) 513*c1c95addSBrooks Davis *rarg = iarg; 514*c1c95addSBrooks Davis if (rendarg != NULL) 515*c1c95addSBrooks Davis *rendarg = iendarg; 516*c1c95addSBrooks Davis if (rend != NULL) 517*c1c95addSBrooks Davis *rend = iend; 518*c1c95addSBrooks Davis if (ln == 0) 519*c1c95addSBrooks Davis return rval; 520*c1c95addSBrooks Davis 521*c1c95addSBrooks Davis /* 522*c1c95addSBrooks Davis * Diagnostic messages are only issued when called 523*c1c95addSBrooks Davis * from the parser, not when called from the formatters. 524*c1c95addSBrooks Davis */ 525*c1c95addSBrooks Davis 526*c1c95addSBrooks Davis switch (rval) { 527*c1c95addSBrooks Davis case ESCAPE_UNSUPP: 528*c1c95addSBrooks Davis err = MANDOCERR_ESC_UNSUPP; 529*c1c95addSBrooks Davis break; 530*c1c95addSBrooks Davis case ESCAPE_UNDEF: 531*c1c95addSBrooks Davis if (buf[inam] != '\\' && buf[inam] != '.') 532*c1c95addSBrooks Davis err = MANDOCERR_ESC_UNDEF; 533*c1c95addSBrooks Davis break; 534*c1c95addSBrooks Davis case ESCAPE_SPECIAL: 535*c1c95addSBrooks Davis if (mchars_spec2cp(buf + iarg, argl) >= 0) 536*c1c95addSBrooks Davis err = MANDOCERR_OK; 537*c1c95addSBrooks Davis else if (err == MANDOCERR_OK) 538*c1c95addSBrooks Davis err = MANDOCERR_ESC_UNKCHAR; 539*c1c95addSBrooks Davis break; 540*c1c95addSBrooks Davis default: 541*c1c95addSBrooks Davis break; 542*c1c95addSBrooks Davis } 543*c1c95addSBrooks Davis if (err != MANDOCERR_OK) 544*c1c95addSBrooks Davis mandoc_msg(err, ln, iesc, "%.*s", iend - iesc, buf + iesc); 545*c1c95addSBrooks Davis return rval; 546*c1c95addSBrooks Davis } 547