src/util/mystrtok.c

/*	$NetBSD: mystrtok.c,v 1.3 2022/10/08 16:12:50 christos Exp $	*/

/*++
/* NAME
/*	mystrtok 3
/* SUMMARY
/*	safe tokenizer
/* SYNOPSIS
/*	#include <stringops.h>
/*
/*	char	*mystrtok(bufp, delimiters)
/*	char	**bufp;
/*	const char *delimiters;
/*
/*	char	*mystrtokq(bufp, delimiters, parens)
/*	char	**bufp;
/*	const char *delimiters;
/*	const char *parens;
/*
/*	char	*mystrtokdq(bufp, delimiters)
/*	char	**bufp;
/*	const char *delimiters;
/* DESCRIPTION
/*	mystrtok() splits a buffer on the specified \fIdelimiters\fR.
/*	Tokens are delimited by runs of delimiters, so this routine
/*	cannot return zero-length tokens.
/*
/*	mystrtokq() is like mystrtok() but will not split text
/*	between balanced parentheses.  \fIparens\fR specifies the
/*	opening and closing parenthesis (one of each).  The set of
/*	\fIparens\fR must be distinct from the set of \fIdelimiters\fR.
/*
/*	mystrtokdq() is like mystrtok() but will not split text
/*	between double quotes. The backslash character may be used
/*	to escape characters. The double quote and backslash
/*	character must not appear in the set of \fIdelimiters\fR.
/*
/*	The \fIbufp\fR argument specifies the start of the search; it
/*	is updated with each call. The input is destroyed.
/*
/*	The result value is the next token, or a null pointer when the
/*	end of the buffer was reached.
/* LICENSE
/* .ad
/* .fi
/*	The Secure Mailer license must be distributed with this software.
/* AUTHOR(S)
/*	Wietse Venema
/*	IBM T.J. Watson Research
/*	P.O. Box 704
/*	Yorktown Heights, NY 10598, USA
/*
/*	Wietse Venema
/*	Google, Inc.
/*	111 8th Avenue
/*	New York, NY 10011, USA
/*--*/

/* System library. */

#include "sys_defs.h"
#include <string.h>

/* Utility library. */

#include "stringops.h"

/* mystrtok - safe tokenizer */

char   *mystrtok(char **src, const char *sep)
{
    char   *start = *src;
    char   *end;

    /*
     * Skip over leading delimiters.
     */
    start += strspn(start, sep);
    if (*start == 0) {
	*src = start;
	return (0);
    }

    /*
     * Separate off one token.
     */
    end = start + strcspn(start, sep);
    if (*end != 0)
	*end++ = 0;
    *src = end;
    return (start);
}

/* mystrtokq - safe tokenizer with quoting support */

char   *mystrtokq(char **src, const char *sep, const char *parens)
{
    char   *start = *src;
    static char *cp;
    int     ch;
    int     level;

    /*
     * Skip over leading delimiters.
     */
    start += strspn(start, sep);
    if (*start == 0) {
	*src = start;
	return (0);
    }

    /*
     * Parse out the next token.
     */
    for (level = 0, cp = start; (ch = *(unsigned char *) cp) != 0; cp++) {
	if (ch == parens[0]) {
	    level++;
	} else if (level > 0 && ch == parens[1]) {
	    level--;
	} else if (level == 0 && strchr(sep, ch) != 0) {
	    *cp++ = 0;
	    break;
	}
    }
    *src = cp;
    return (start);
}

/* mystrtokdq - safe tokenizer, double quote and backslash support */

char   *mystrtokdq(char **src, const char *sep)
{
    char   *cp = *src;
    char   *start;

    /*
     * Skip leading delimiters.
     */
    cp += strspn(cp, sep);

    /*
     * Skip to next unquoted space or comma.
     */
    if (*cp == 0) {
	start = 0;
    } else {
	int     in_quotes;

	for (in_quotes = 0, start = cp; *cp; cp++) {
	    if (*cp == '\\') {
		if (*++cp == 0)
		    break;
	    } else if (*cp == '"') {
		in_quotes = !in_quotes;
	    } else if (!in_quotes && strchr(sep, *(unsigned char *) cp) != 0) {
		*cp++ = 0;
		break;
	    }
	}
    }
    *src = cp;
    return (start);
}

#ifdef TEST

 /*
  * Test program.
  */
#include "msg.h"
#include "mymalloc.h"

 /*
  * The following needs to be large enough to include a null terminator in
  * every testcase.expected field.
  */
#define EXPECT_SIZE	5

struct testcase {
    const char *action;
    const char *input;
    const char *expected[EXPECT_SIZE];
};
static const struct testcase testcases[] = {
    {"mystrtok", ""},
    {"mystrtok", "  foo  ", {"foo"}},
    {"mystrtok", "  foo  bar  ", {"foo", "bar"}},
    {"mystrtokq", ""},
    {"mystrtokq", "foo bar", {"foo", "bar"}},
    {"mystrtokq", "{ bar }  ", {"{ bar }"}},
    {"mystrtokq", "foo { bar } baz", {"foo", "{ bar }", "baz"}},
    {"mystrtokq", "foo{ bar } baz", {"foo{ bar }", "baz"}},
    {"mystrtokq", "foo { bar }baz", {"foo", "{ bar }baz"}},
    {"mystrtokdq", ""},
    {"mystrtokdq", "  foo  ", {"foo"}},
    {"mystrtokdq", "  foo  bar  ", {"foo", "bar"}},
    {"mystrtokdq", "  foo\\ bar  ", {"foo\\ bar"}},
    {"mystrtokdq", "  foo \\\" bar", {"foo", "\\\"", "bar"}},
    {"mystrtokdq", "  foo \" bar baz\"  ", {"foo", "\" bar baz\""}},
};

int     main(void)
{
    const struct testcase *tp;
    char   *actual;
    int     pass;
    int     fail;
    int     match;
    int     n;

#define NUM_TESTS       sizeof(testcases)/sizeof(testcases[0])
#define STR_OR_NULL(s)	((s) ? (s) : "null")

    for (pass = fail = 0, tp = testcases; tp < testcases + NUM_TESTS; tp++) {
	char   *saved_input = mystrdup(tp->input);
	char   *cp = saved_input;

	msg_info("RUN test case %ld %s >%s<",
		 (long) (tp - testcases), tp->action, tp->input);
#if 0
	msg_info("action=%s", tp->action);
	msg_info("input=%s", tp->input);
	for (n = 0; tp->expected[n]; tp++)
	    msg_info("expected[%d]=%s", n, tp->expected[n]);
#endif

	for (n = 0; n < EXPECT_SIZE; n++) {
	    if (strcmp(tp->action, "mystrtok") == 0) {
		actual = mystrtok(&cp, CHARS_SPACE);
	    } else if (strcmp(tp->action, "mystrtokq") == 0) {
		actual = mystrtokq(&cp, CHARS_SPACE, CHARS_BRACE);
	    } else if (strcmp(tp->action, "mystrtokdq") == 0) {
		actual = mystrtokdq(&cp, CHARS_SPACE);
	    } else {
		msg_panic("invalid command: %s", tp->action);
	    }
	    if ((match = (actual && tp->expected[n]) ?
		 (strcmp(actual, tp->expected[n]) == 0) :
		 (actual == tp->expected[n])) != 0) {
		if (actual == 0) {
		    msg_info("PASS test %ld", (long) (tp - testcases));
		    pass++;
		    break;
		}
	    } else {
		msg_warn("expected: >%s<, got: >%s<",
			 STR_OR_NULL(tp->expected[n]), STR_OR_NULL(actual));
		msg_info("FAIL test %ld", (long) (tp - testcases));
		fail++;
		break;
	    }
	}
	if (n >= EXPECT_SIZE)
	    msg_panic("need to increase EXPECT_SIZE");
	myfree(saved_input);
    }
    return (fail > 0);
}

#endif