1/* 2 * stringtok.h -- Breaks a string into tokens. This is an example for lib3. 3 * 4 * Template function looks like this: 5 * 6 * template <typename Container> 7 * void stringtok (Container &l, 8 * string const &s, 9 * char const * const ws = " \t\n"); 10 * 11 * A nondestructive version of strtok() that handles its own memory and can 12 * be broken up by any character(s). Does all the work at once rather than 13 * in an invocation loop like strtok() requires. 14 * 15 * Container is any type that supports push_back(a_string), although using 16 * list<string> and deque<string> are indicated due to their O(1) push_back. 17 * (I prefer deque<> because op[]/at() is available as well.) The first 18 * parameter references an existing Container. 19 * 20 * s is the string to be tokenized. From the parameter declaration, it can 21 * be seen that s is not affected. Since references-to-const may refer to 22 * temporaries, you could use stringtok(some_container, readline("")) when 23 * using the GNU readline library. 24 * 25 * The final parameter is an array of characters that serve as whitespace. 26 * Whitespace characters default to one or more of tab, space, and newline, 27 * in any combination. 28 * 29 * 'l' need not be empty on entry. On return, 'l' will have the token 30 * strings appended. 31 * 32 * 33 * [Example: 34 * list<string> ls; 35 * stringtok (ls, " this \t is\t\n a test "); 36 * for (list<string>::const_iterator i = ls.begin(); 37 * i != ls.end(); ++i) 38 * { 39 * cerr << ':' << (*i) << ":\n"; 40 * } 41 * 42 * would print 43 * :this: 44 * :is: 45 * :a: 46 * :test: 47 * -end example] 48 * 49 * pedwards@jaj.com May 1999 50*/ 51 52 53#include <string> 54#include <cstring> // for strchr 55 56 57/***************************************************************** 58 * This is the only part of the implementation that I don't like. 59 * It can probably be improved upon by the reader... 60*/ 61namespace { 62 inline bool 63 isws (char c, char const * const wstr) 64 { 65 return (strchr(wstr,c) != NULL); 66 } 67} 68 69 70/***************************************************************** 71 * Simplistic and quite Standard, but a bit slow. This should be 72 * templatized on basic_string instead, or on a more generic StringT 73 * that just happens to support ::size_type, .substr(), and so on. 74 * I had hoped that "whitespace" would be a trait, but it isn't, so 75 * the user must supply it. Enh, this lets them break up strings on 76 * different things easier than traits would anyhow. 77*/ 78template <typename Container> 79void 80stringtok (Container &l, string const &s, char const * const ws = " \t\n") 81{ 82 const string::size_type S = s.size(); 83 string::size_type i = 0; 84 85 while (i < S) { 86 // eat leading whitespace 87 while ((i < S) && (isws(s[i],ws))) ++i; 88 if (i == S) return; // nothing left but WS 89 90 // find end of word 91 string::size_type j = i+1; 92 while ((j < S) && (!isws(s[j],ws))) ++j; 93 94 // add word 95 l.push_back(s.substr(i,j-i)); 96 97 // set up for next loop 98 i = j+1; 99 } 100} 101 102 103