Note that there are some explanatory texts on larger screens.

plurals
  1. PO
    text
    copied!<p>Two things come to mind:</p> <ol> <li>Use string views instead of strings as the split result, saves a lot of allocations.</li> <li>If you know you're only going to be working with chars (in the [0,255] range), try using a bitset to test membership instead of <code>find</code>ing into the delimiter characters.</li> </ol> <p>Here's a quick attempt at applying these ideas:</p> <pre><code>#include &lt;vector&gt; #include &lt;bitset&gt; #include &lt;iostream&gt; #include &lt;boost/algorithm/string/split.hpp&gt; #include &lt;boost/algorithm/string/classification.hpp&gt; #include &lt;boost/timer.hpp&gt; using namespace std; size_t const N = 10000000; template&lt;typename C&gt; void test_custom(string const&amp; s, char const* d, C&amp; ret) { C output; bitset&lt;255&gt; delims; while( *d ) { unsigned char code = *d++; delims[code] = true; } typedef string::const_iterator iter; iter beg; bool in_token = false; for( string::const_iterator it = s.begin(), end = s.end(); it != end; ++it ) { if( delims[*it] ) { if( in_token ) { output.push_back(typename C::value_type(beg, it)); in_token = false; } } else if( !in_token ) { beg = it; in_token = true; } } if( in_token ) output.push_back(typename C::value_type(beg, s.end())); output.swap(ret); } template&lt;typename C&gt; void test_strpbrk(string const&amp; s, char const* delims, C&amp; ret) { C output; char const* p = s.c_str(); char const* q = strpbrk(p+1, delims); for( ; q != NULL; q = strpbrk(p, delims) ) { output.push_back(typename C::value_type(p, q)); p = q + 1; } output.swap(ret); } template&lt;typename C&gt; void test_boost(string const&amp; s, char const* delims) { C output; boost::split(output, s, boost::is_any_of(delims)); } int main() { // Generate random text string text(N, ' '); for( size_t i = 0; i != N; ++i ) text[i] = (i % 2 == 0)?('a'+(i/2)%26):((i/2)%2?' ':'\t'); char const* delims = " \t[],-'/\\!\"§$%&amp;=()&lt;&gt;?"; // Output strings boost::timer timer; test_boost&lt;vector&lt;string&gt; &gt;(text, delims); cout &lt;&lt; "Time: " &lt;&lt; timer.elapsed() &lt;&lt; endl; // Output string views typedef string::const_iterator iter; typedef boost::iterator_range&lt;iter&gt; string_view; timer.restart(); test_boost&lt;vector&lt;string_view&gt; &gt;(text, delims); cout &lt;&lt; "Time: " &lt;&lt; timer.elapsed() &lt;&lt; endl; // Custom split timer.restart(); vector&lt;string&gt; vs; test_custom(text, delims, vs); cout &lt;&lt; "Time: " &lt;&lt; timer.elapsed() &lt;&lt; endl; // Custom split timer.restart(); vector&lt;string_view&gt; vsv; test_custom(text, delims, vsv); cout &lt;&lt; "Time: " &lt;&lt; timer.elapsed() &lt;&lt; endl; // Custom split timer.restart(); vector&lt;string&gt; vsp; test_strpbrk(text, delims, vsp); cout &lt;&lt; "Time: " &lt;&lt; timer.elapsed() &lt;&lt; endl; // Custom split timer.restart(); vector&lt;string_view&gt; vsvp; test_strpbrk(text, delims, vsvp); cout &lt;&lt; "Time: " &lt;&lt; timer.elapsed() &lt;&lt; endl; return 0; } </code></pre> <p>Compiling this with Boost 1.46.1 using GCC 4.5.1 with the <code>-O4</code> flag enabled I get:</p> <ul> <li>Time: 5.951 (Boost.Split + vector)</li> <li>Time: 3.728 (Boost.Split + vector <li>Time: 1.662 (Custom split + vector)</li> <li>Time: 0.144 (Custom split + vector)</li> <li>Time: 2.13 (Strpbrk + vector)</li> <li>Time: 0.527 (Strpbrk + vector)</li> </ul> <p>NOTE: There's a slight difference in the output as empty tokens are dropped by my custom function. But you can adapt this code to your needs if you decide to use it.</p>
 

Querying!

 
Guidance

SQuiL has stopped working due to an internal error.

If you are curious you may find further information in the browser console, which is accessible through the devtools (F12).

Reload