1*c1c95addSBrooks Davis /* $Id: mandoc.c,v 1.121 2022/05/19 15:37:47 schwarze Exp $ */ 261d06d6bSBaptiste Daroussin /* 3*c1c95addSBrooks Davis * Copyright (c) 2010, 2011, 2015, 2017, 2018, 2019, 2020, 2021 4*c1c95addSBrooks Davis * Ingo Schwarze <schwarze@openbsd.org> 5*c1c95addSBrooks Davis * Copyright (c) 2009, 2010 Kristaps Dzonsons <kristaps@bsd.lv> 661d06d6bSBaptiste Daroussin * 761d06d6bSBaptiste Daroussin * Permission to use, copy, modify, and distribute this software for any 861d06d6bSBaptiste Daroussin * purpose with or without fee is hereby granted, provided that the above 961d06d6bSBaptiste Daroussin * copyright notice and this permission notice appear in all copies. 1061d06d6bSBaptiste Daroussin * 1161d06d6bSBaptiste Daroussin * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES 1261d06d6bSBaptiste Daroussin * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 1361d06d6bSBaptiste Daroussin * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR 1461d06d6bSBaptiste Daroussin * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 1561d06d6bSBaptiste Daroussin * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 1661d06d6bSBaptiste Daroussin * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 1761d06d6bSBaptiste Daroussin * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 18*c1c95addSBrooks Davis * 19*c1c95addSBrooks Davis * Utility functions to handle end of sentence punctuation 20*c1c95addSBrooks Davis * and dates and times, for use by mdoc(7) and man(7) parsers. 21*c1c95addSBrooks Davis * Utility functions to handle fonts and numbers, 22*c1c95addSBrooks Davis * for use by mandoc(1) parsers and formatters. 2361d06d6bSBaptiste Daroussin */ 2461d06d6bSBaptiste Daroussin #include "config.h" 2561d06d6bSBaptiste Daroussin 2661d06d6bSBaptiste Daroussin #include <sys/types.h> 2761d06d6bSBaptiste Daroussin 2861d06d6bSBaptiste Daroussin #include <assert.h> 2961d06d6bSBaptiste Daroussin #include <ctype.h> 3061d06d6bSBaptiste Daroussin #include <errno.h> 3161d06d6bSBaptiste Daroussin #include <limits.h> 3261d06d6bSBaptiste Daroussin #include <stdlib.h> 3361d06d6bSBaptiste Daroussin #include <stdio.h> 3461d06d6bSBaptiste Daroussin #include <string.h> 3561d06d6bSBaptiste Daroussin #include <time.h> 3661d06d6bSBaptiste Daroussin 3761d06d6bSBaptiste Daroussin #include "mandoc_aux.h" 3861d06d6bSBaptiste Daroussin #include "mandoc.h" 3961d06d6bSBaptiste Daroussin #include "roff.h" 4061d06d6bSBaptiste Daroussin #include "libmandoc.h" 417295610fSBaptiste Daroussin #include "roff_int.h" 4261d06d6bSBaptiste Daroussin 4361d06d6bSBaptiste Daroussin static int a2time(time_t *, const char *, const char *); 4461d06d6bSBaptiste Daroussin static char *time2a(time_t); 4561d06d6bSBaptiste Daroussin 4661d06d6bSBaptiste Daroussin 4761d06d6bSBaptiste Daroussin enum mandoc_esc 487295610fSBaptiste Daroussin mandoc_font(const char *cp, int sz) 497295610fSBaptiste Daroussin { 507295610fSBaptiste Daroussin switch (sz) { 517295610fSBaptiste Daroussin case 0: 527295610fSBaptiste Daroussin return ESCAPE_FONTPREV; 537295610fSBaptiste Daroussin case 1: 547295610fSBaptiste Daroussin switch (cp[0]) { 557295610fSBaptiste Daroussin case 'B': 567295610fSBaptiste Daroussin case '3': 577295610fSBaptiste Daroussin return ESCAPE_FONTBOLD; 587295610fSBaptiste Daroussin case 'I': 597295610fSBaptiste Daroussin case '2': 607295610fSBaptiste Daroussin return ESCAPE_FONTITALIC; 617295610fSBaptiste Daroussin case 'P': 627295610fSBaptiste Daroussin return ESCAPE_FONTPREV; 637295610fSBaptiste Daroussin case 'R': 647295610fSBaptiste Daroussin case '1': 657295610fSBaptiste Daroussin return ESCAPE_FONTROMAN; 667295610fSBaptiste Daroussin case '4': 677295610fSBaptiste Daroussin return ESCAPE_FONTBI; 687295610fSBaptiste Daroussin default: 697295610fSBaptiste Daroussin return ESCAPE_ERROR; 707295610fSBaptiste Daroussin } 717295610fSBaptiste Daroussin case 2: 727295610fSBaptiste Daroussin switch (cp[0]) { 737295610fSBaptiste Daroussin case 'B': 747295610fSBaptiste Daroussin switch (cp[1]) { 757295610fSBaptiste Daroussin case 'I': 767295610fSBaptiste Daroussin return ESCAPE_FONTBI; 777295610fSBaptiste Daroussin default: 787295610fSBaptiste Daroussin return ESCAPE_ERROR; 797295610fSBaptiste Daroussin } 807295610fSBaptiste Daroussin case 'C': 817295610fSBaptiste Daroussin switch (cp[1]) { 827295610fSBaptiste Daroussin case 'B': 836d38604fSBaptiste Daroussin return ESCAPE_FONTCB; 847295610fSBaptiste Daroussin case 'I': 856d38604fSBaptiste Daroussin return ESCAPE_FONTCI; 867295610fSBaptiste Daroussin case 'R': 877295610fSBaptiste Daroussin case 'W': 886d38604fSBaptiste Daroussin return ESCAPE_FONTCR; 897295610fSBaptiste Daroussin default: 907295610fSBaptiste Daroussin return ESCAPE_ERROR; 917295610fSBaptiste Daroussin } 927295610fSBaptiste Daroussin default: 937295610fSBaptiste Daroussin return ESCAPE_ERROR; 947295610fSBaptiste Daroussin } 957295610fSBaptiste Daroussin default: 967295610fSBaptiste Daroussin return ESCAPE_ERROR; 977295610fSBaptiste Daroussin } 987295610fSBaptiste Daroussin } 997295610fSBaptiste Daroussin 10061d06d6bSBaptiste Daroussin static int 10161d06d6bSBaptiste Daroussin a2time(time_t *t, const char *fmt, const char *p) 10261d06d6bSBaptiste Daroussin { 10361d06d6bSBaptiste Daroussin struct tm tm; 10461d06d6bSBaptiste Daroussin char *pp; 10561d06d6bSBaptiste Daroussin 10661d06d6bSBaptiste Daroussin memset(&tm, 0, sizeof(struct tm)); 10761d06d6bSBaptiste Daroussin 10861d06d6bSBaptiste Daroussin pp = NULL; 10961d06d6bSBaptiste Daroussin #if HAVE_STRPTIME 11061d06d6bSBaptiste Daroussin pp = strptime(p, fmt, &tm); 11161d06d6bSBaptiste Daroussin #endif 11261d06d6bSBaptiste Daroussin if (NULL != pp && '\0' == *pp) { 11361d06d6bSBaptiste Daroussin *t = mktime(&tm); 11461d06d6bSBaptiste Daroussin return 1; 11561d06d6bSBaptiste Daroussin } 11661d06d6bSBaptiste Daroussin 11761d06d6bSBaptiste Daroussin return 0; 11861d06d6bSBaptiste Daroussin } 11961d06d6bSBaptiste Daroussin 12061d06d6bSBaptiste Daroussin static char * 12161d06d6bSBaptiste Daroussin time2a(time_t t) 12261d06d6bSBaptiste Daroussin { 12361d06d6bSBaptiste Daroussin struct tm *tm; 12461d06d6bSBaptiste Daroussin char *buf, *p; 12561d06d6bSBaptiste Daroussin size_t ssz; 12661d06d6bSBaptiste Daroussin int isz; 12761d06d6bSBaptiste Daroussin 12845a5aec3SBaptiste Daroussin buf = NULL; 12961d06d6bSBaptiste Daroussin tm = localtime(&t); 13061d06d6bSBaptiste Daroussin if (tm == NULL) 13145a5aec3SBaptiste Daroussin goto fail; 13261d06d6bSBaptiste Daroussin 13361d06d6bSBaptiste Daroussin /* 13461d06d6bSBaptiste Daroussin * Reserve space: 13561d06d6bSBaptiste Daroussin * up to 9 characters for the month (September) + blank 13661d06d6bSBaptiste Daroussin * up to 2 characters for the day + comma + blank 13761d06d6bSBaptiste Daroussin * 4 characters for the year and a terminating '\0' 13861d06d6bSBaptiste Daroussin */ 13961d06d6bSBaptiste Daroussin 14061d06d6bSBaptiste Daroussin p = buf = mandoc_malloc(10 + 4 + 4 + 1); 14161d06d6bSBaptiste Daroussin 14261d06d6bSBaptiste Daroussin if ((ssz = strftime(p, 10 + 1, "%B ", tm)) == 0) 14361d06d6bSBaptiste Daroussin goto fail; 14461d06d6bSBaptiste Daroussin p += (int)ssz; 14561d06d6bSBaptiste Daroussin 14661d06d6bSBaptiste Daroussin /* 14761d06d6bSBaptiste Daroussin * The output format is just "%d" here, not "%2d" or "%02d". 14861d06d6bSBaptiste Daroussin * That's also the reason why we can't just format the 14961d06d6bSBaptiste Daroussin * date as a whole with "%B %e, %Y" or "%B %d, %Y". 15061d06d6bSBaptiste Daroussin * Besides, the present approach is less prone to buffer 15161d06d6bSBaptiste Daroussin * overflows, in case anybody should ever introduce the bug 15261d06d6bSBaptiste Daroussin * of looking at LC_TIME. 15361d06d6bSBaptiste Daroussin */ 15461d06d6bSBaptiste Daroussin 15545a5aec3SBaptiste Daroussin isz = snprintf(p, 4 + 1, "%d, ", tm->tm_mday); 15645a5aec3SBaptiste Daroussin if (isz < 0 || isz > 4) 15761d06d6bSBaptiste Daroussin goto fail; 15861d06d6bSBaptiste Daroussin p += isz; 15961d06d6bSBaptiste Daroussin 16061d06d6bSBaptiste Daroussin if (strftime(p, 4 + 1, "%Y", tm) == 0) 16161d06d6bSBaptiste Daroussin goto fail; 16261d06d6bSBaptiste Daroussin return buf; 16361d06d6bSBaptiste Daroussin 16461d06d6bSBaptiste Daroussin fail: 16561d06d6bSBaptiste Daroussin free(buf); 16645a5aec3SBaptiste Daroussin return mandoc_strdup(""); 16761d06d6bSBaptiste Daroussin } 16861d06d6bSBaptiste Daroussin 16961d06d6bSBaptiste Daroussin char * 1706d38604fSBaptiste Daroussin mandoc_normdate(struct roff_node *nch, struct roff_node *nbl) 17161d06d6bSBaptiste Daroussin { 17261d06d6bSBaptiste Daroussin char *cp; 17361d06d6bSBaptiste Daroussin time_t t; 17461d06d6bSBaptiste Daroussin 1756d38604fSBaptiste Daroussin /* No date specified. */ 17645a5aec3SBaptiste Daroussin 1776d38604fSBaptiste Daroussin if (nch == NULL) { 1786d38604fSBaptiste Daroussin if (nbl == NULL) 1796d38604fSBaptiste Daroussin mandoc_msg(MANDOCERR_DATE_MISSING, 0, 0, NULL); 1806d38604fSBaptiste Daroussin else 1816d38604fSBaptiste Daroussin mandoc_msg(MANDOCERR_DATE_MISSING, nbl->line, 1826d38604fSBaptiste Daroussin nbl->pos, "%s", roff_name[nbl->tok]); 1836d38604fSBaptiste Daroussin return mandoc_strdup(""); 1846d38604fSBaptiste Daroussin } 1856d38604fSBaptiste Daroussin if (*nch->string == '\0') { 1866d38604fSBaptiste Daroussin mandoc_msg(MANDOCERR_DATE_MISSING, nch->line, 1876d38604fSBaptiste Daroussin nch->pos, "%s", roff_name[nbl->tok]); 1886d38604fSBaptiste Daroussin return mandoc_strdup(""); 1896d38604fSBaptiste Daroussin } 1906d38604fSBaptiste Daroussin if (strcmp(nch->string, "$" "Mdocdate$") == 0) 19161d06d6bSBaptiste Daroussin return time2a(time(NULL)); 19261d06d6bSBaptiste Daroussin 19361d06d6bSBaptiste Daroussin /* Valid mdoc(7) date format. */ 19461d06d6bSBaptiste Daroussin 1956d38604fSBaptiste Daroussin if (a2time(&t, "$" "Mdocdate: %b %d %Y $", nch->string) || 1966d38604fSBaptiste Daroussin a2time(&t, "%b %d, %Y", nch->string)) { 19761d06d6bSBaptiste Daroussin cp = time2a(t); 19861d06d6bSBaptiste Daroussin if (t > time(NULL) + 86400) 1996d38604fSBaptiste Daroussin mandoc_msg(MANDOCERR_DATE_FUTURE, nch->line, 2006d38604fSBaptiste Daroussin nch->pos, "%s %s", roff_name[nbl->tok], cp); 2016d38604fSBaptiste Daroussin else if (*nch->string != '$' && 2026d38604fSBaptiste Daroussin strcmp(nch->string, cp) != 0) 2036d38604fSBaptiste Daroussin mandoc_msg(MANDOCERR_DATE_NORM, nch->line, 2046d38604fSBaptiste Daroussin nch->pos, "%s %s", roff_name[nbl->tok], cp); 20561d06d6bSBaptiste Daroussin return cp; 20661d06d6bSBaptiste Daroussin } 20761d06d6bSBaptiste Daroussin 20861d06d6bSBaptiste Daroussin /* In man(7), do not warn about the legacy format. */ 20961d06d6bSBaptiste Daroussin 2106d38604fSBaptiste Daroussin if (a2time(&t, "%Y-%m-%d", nch->string) == 0) 2116d38604fSBaptiste Daroussin mandoc_msg(MANDOCERR_DATE_BAD, nch->line, nch->pos, 2126d38604fSBaptiste Daroussin "%s %s", roff_name[nbl->tok], nch->string); 21361d06d6bSBaptiste Daroussin else if (t > time(NULL) + 86400) 2146d38604fSBaptiste Daroussin mandoc_msg(MANDOCERR_DATE_FUTURE, nch->line, nch->pos, 2156d38604fSBaptiste Daroussin "%s %s", roff_name[nbl->tok], nch->string); 2166d38604fSBaptiste Daroussin else if (nbl->tok == MDOC_Dd) 2176d38604fSBaptiste Daroussin mandoc_msg(MANDOCERR_DATE_LEGACY, nch->line, nch->pos, 2186d38604fSBaptiste Daroussin "Dd %s", nch->string); 21961d06d6bSBaptiste Daroussin 22061d06d6bSBaptiste Daroussin /* Use any non-mdoc(7) date verbatim. */ 22161d06d6bSBaptiste Daroussin 2226d38604fSBaptiste Daroussin return mandoc_strdup(nch->string); 22361d06d6bSBaptiste Daroussin } 22461d06d6bSBaptiste Daroussin 22561d06d6bSBaptiste Daroussin int 22661d06d6bSBaptiste Daroussin mandoc_eos(const char *p, size_t sz) 22761d06d6bSBaptiste Daroussin { 22861d06d6bSBaptiste Daroussin const char *q; 22961d06d6bSBaptiste Daroussin int enclosed, found; 23061d06d6bSBaptiste Daroussin 23161d06d6bSBaptiste Daroussin if (0 == sz) 23261d06d6bSBaptiste Daroussin return 0; 23361d06d6bSBaptiste Daroussin 23461d06d6bSBaptiste Daroussin /* 23561d06d6bSBaptiste Daroussin * End-of-sentence recognition must include situations where 23661d06d6bSBaptiste Daroussin * some symbols, such as `)', allow prior EOS punctuation to 23761d06d6bSBaptiste Daroussin * propagate outward. 23861d06d6bSBaptiste Daroussin */ 23961d06d6bSBaptiste Daroussin 24061d06d6bSBaptiste Daroussin enclosed = found = 0; 24161d06d6bSBaptiste Daroussin for (q = p + (int)sz - 1; q >= p; q--) { 24261d06d6bSBaptiste Daroussin switch (*q) { 24361d06d6bSBaptiste Daroussin case '\"': 24461d06d6bSBaptiste Daroussin case '\'': 24561d06d6bSBaptiste Daroussin case ']': 24661d06d6bSBaptiste Daroussin case ')': 24761d06d6bSBaptiste Daroussin if (0 == found) 24861d06d6bSBaptiste Daroussin enclosed = 1; 24961d06d6bSBaptiste Daroussin break; 25061d06d6bSBaptiste Daroussin case '.': 25161d06d6bSBaptiste Daroussin case '!': 25261d06d6bSBaptiste Daroussin case '?': 25361d06d6bSBaptiste Daroussin found = 1; 25461d06d6bSBaptiste Daroussin break; 25561d06d6bSBaptiste Daroussin default: 25661d06d6bSBaptiste Daroussin return found && 25761d06d6bSBaptiste Daroussin (!enclosed || isalnum((unsigned char)*q)); 25861d06d6bSBaptiste Daroussin } 25961d06d6bSBaptiste Daroussin } 26061d06d6bSBaptiste Daroussin 26161d06d6bSBaptiste Daroussin return found && !enclosed; 26261d06d6bSBaptiste Daroussin } 26361d06d6bSBaptiste Daroussin 26461d06d6bSBaptiste Daroussin /* 26561d06d6bSBaptiste Daroussin * Convert a string to a long that may not be <0. 26661d06d6bSBaptiste Daroussin * If the string is invalid, or is less than 0, return -1. 26761d06d6bSBaptiste Daroussin */ 26861d06d6bSBaptiste Daroussin int 26961d06d6bSBaptiste Daroussin mandoc_strntoi(const char *p, size_t sz, int base) 27061d06d6bSBaptiste Daroussin { 27161d06d6bSBaptiste Daroussin char buf[32]; 27261d06d6bSBaptiste Daroussin char *ep; 27361d06d6bSBaptiste Daroussin long v; 27461d06d6bSBaptiste Daroussin 27561d06d6bSBaptiste Daroussin if (sz > 31) 27661d06d6bSBaptiste Daroussin return -1; 27761d06d6bSBaptiste Daroussin 27861d06d6bSBaptiste Daroussin memcpy(buf, p, sz); 27961d06d6bSBaptiste Daroussin buf[(int)sz] = '\0'; 28061d06d6bSBaptiste Daroussin 28161d06d6bSBaptiste Daroussin errno = 0; 28261d06d6bSBaptiste Daroussin v = strtol(buf, &ep, base); 28361d06d6bSBaptiste Daroussin 28461d06d6bSBaptiste Daroussin if (buf[0] == '\0' || *ep != '\0') 28561d06d6bSBaptiste Daroussin return -1; 28661d06d6bSBaptiste Daroussin 28761d06d6bSBaptiste Daroussin if (v > INT_MAX) 28861d06d6bSBaptiste Daroussin v = INT_MAX; 28961d06d6bSBaptiste Daroussin if (v < INT_MIN) 29061d06d6bSBaptiste Daroussin v = INT_MIN; 29161d06d6bSBaptiste Daroussin 29261d06d6bSBaptiste Daroussin return (int)v; 29361d06d6bSBaptiste Daroussin } 294