xref: /openbsd-src/gnu/gcc/libstdc++-v3/docs/html/21_strings/stringtok_h.txt (revision 404b540a9034ac75a6199ad1a32d1bbc7a0d4210)
1/*
2 * stringtok.h -- Breaks a string into tokens.  This is an example for lib3.
3 *
4 * Template function looks like this:
5 *
6 *    template <typename Container>
7 *    void stringtok (Container &l,
8 *                    string const &s,
9 *                    char const * const ws = " \t\n");
10 *
11 * A nondestructive version of strtok() that handles its own memory and can
12 * be broken up by any character(s).  Does all the work at once rather than
13 * in an invocation loop like strtok() requires.
14 *
15 * Container is any type that supports push_back(a_string), although using
16 * list<string> and deque<string> are indicated due to their O(1) push_back.
17 * (I prefer deque<> because op[]/at() is available as well.)  The first
18 * parameter references an existing Container.
19 *
20 * s is the string to be tokenized.  From the parameter declaration, it can
21 * be seen that s is not affected.  Since references-to-const may refer to
22 * temporaries, you could use stringtok(some_container, readline("")) when
23 * using the GNU readline library.
24 *
25 * The final parameter is an array of characters that serve as whitespace.
26 * Whitespace characters default to one or more of tab, space, and newline,
27 * in any combination.
28 *
29 * 'l' need not be empty on entry.  On return, 'l' will have the token
30 * strings appended.
31 *
32 *
33 * [Example:
34 *       list<string>       ls;
35 *       stringtok (ls, " this  \t is\t\n  a test  ");
36 *       for (list<string>::const_iterator i = ls.begin();
37 *            i != ls.end(); ++i)
38 *       {
39 *            cerr << ':' << (*i) << ":\n";
40 *       }
41 *
42 *  would print
43 *       :this:
44 *       :is:
45 *       :a:
46 *       :test:
47 * -end example]
48 *
49 * pedwards@jaj.com  May 1999
50*/
51
52
53#include <string>
54#include <cstring>    // for strchr
55
56
57/*****************************************************************
58 * This is the only part of the implementation that I don't like.
59 * It can probably be improved upon by the reader...
60*/
61namespace {
62    inline bool
63    isws (char c, char const * const wstr)
64    {
65        return (strchr(wstr,c) != NULL);
66    }
67}
68
69
70/*****************************************************************
71 * Simplistic and quite Standard, but a bit slow.  This should be
72 * templatized on basic_string instead, or on a more generic StringT
73 * that just happens to support ::size_type, .substr(), and so on.
74 * I had hoped that "whitespace" would be a trait, but it isn't, so
75 * the user must supply it.  Enh, this lets them break up strings on
76 * different things easier than traits would anyhow.
77*/
78template <typename Container>
79void
80stringtok (Container &l, string const &s, char const * const ws = " \t\n")
81{
82    const string::size_type  S = s.size();
83          string::size_type  i = 0;
84
85    while (i < S) {
86        // eat leading whitespace
87        while ((i < S) && (isws(s[i],ws)))  ++i;
88        if (i == S)  return;  // nothing left but WS
89
90        // find end of word
91        string::size_type  j = i+1;
92        while ((j < S) && (!isws(s[j],ws)))  ++j;
93
94        // add word
95        l.push_back(s.substr(i,j-i));
96
97        // set up for next loop
98        i = j+1;
99    }
100}
101
102
103