Note that there are some explanatory texts on larger screens.

plurals
  1. POFast intersection of sets: C++ vs C#
    primarykey
    data
    text
    <p>On my machine (Quad core, 8gb ram), running Vista x64 Business, with Visual Studio 2008 SP1, I am trying to intersect two sets of numbers very quickly.</p> <p>I've implemented two approaches in C++, and one in C#. The C# approach is faster so far, I'd like to improve the C++ approach so its faster than C#, which I expect C++ can do.</p> <p>Here is the C# output: (Release build)</p> <pre><code>Found the intersection 1000 times, in 4741.407 ms </code></pre> <p>Here is the initial C++ output, for two different approaches (Release x64 build):</p> <pre><code>Found the intersection (using unordered_map) 1000 times, in 21580.7ms Found the intersection (using set_intersection) 1000 times, in 22366.6ms </code></pre> <p>Here is the latest C++ output, for three approaches (Release x64 build):</p> <p>Latest benchmark:</p> <pre><code>Found the intersection of 504 values (using unordered_map) 1000 times, in 28827.6ms Found the intersection of 495 values (using set_intersection) 1000 times, in 9817.69ms Found the intersection of 504 values (using unordered_set) 1000 times, in 24769.1ms </code></pre> <p>So, the set_intersection approach is now approx 2x slower than C#, but 2x faster than the initial C++ approaches.</p> <p>Latest C++ code:</p> <pre><code>Code: // MapPerformance.cpp : Defines the entry point for the console application. // #include "stdafx.h" #include &lt;hash_map&gt; #include &lt;vector&gt; #include &lt;iostream&gt; #include &lt;time.h&gt; #include &lt;algorithm&gt; #include &lt;set&gt; #include &lt;unordered_set&gt; #include &lt;boost\unordered\unordered_map.hpp&gt; #include "timer.h" using namespace std; using namespace stdext; using namespace boost; using namespace tr1; int runIntersectionTest2(const vector&lt;int&gt;&amp; set1, const vector&lt;int&gt;&amp; set2) { // hash_map&lt;int,int&gt; theMap; // map&lt;int,int&gt; theMap; unordered_set&lt;int&gt; theSet; theSet.insert( set1.begin(), set1.end() ); int intersectionSize = 0; vector&lt;int&gt;::const_iterator set2_end = set2.end(); for ( vector&lt;int&gt;::const_iterator iterator = set2.begin(); iterator != set2_end; ++iterator ) { if ( theSet.find(*iterator) != theSet.end() ) { intersectionSize++; } } return intersectionSize; } int runIntersectionTest(const vector&lt;int&gt;&amp; set1, const vector&lt;int&gt;&amp; set2) { // hash_map&lt;int,int&gt; theMap; // map&lt;int,int&gt; theMap; unordered_map&lt;int,int&gt; theMap; vector&lt;int&gt;::const_iterator set1_end = set1.end(); // Now intersect the two sets by populating the map for ( vector&lt;int&gt;::const_iterator iterator = set1.begin(); iterator != set1_end; ++iterator ) { int value = *iterator; theMap[value] = 1; } int intersectionSize = 0; vector&lt;int&gt;::const_iterator set2_end = set2.end(); for ( vector&lt;int&gt;::const_iterator iterator = set2.begin(); iterator != set2_end; ++iterator ) { int value = *iterator; unordered_map&lt;int,int&gt;::iterator foundValue = theMap.find(value); if ( foundValue != theMap.end() ) { theMap[value] = 2; intersectionSize++; } } return intersectionSize; } int runSetIntersection(const vector&lt;int&gt;&amp; set1_unsorted, const vector&lt;int&gt;&amp; set2_unsorted) { // Create two vectors std::vector&lt;int&gt; set1(set1_unsorted.size()); std::vector&lt;int&gt; set2(set2_unsorted.size()); // Copy the unsorted data into them std::copy(set1_unsorted.begin(), set1_unsorted.end(), set1.begin()); std::copy(set2_unsorted.begin(), set2_unsorted.end(), set2.begin()); // Sort the data sort(set1.begin(),set1.end()); sort(set2.begin(),set2.end()); vector&lt;int&gt; intersection; intersection.reserve(1000); set_intersection(set1.begin(),set1.end(), set2.begin(), set2.end(), back_inserter(intersection)); return intersection.size(); } void createSets( vector&lt;int&gt;&amp; set1, vector&lt;int&gt;&amp; set2 ) { srand ( time(NULL) ); set1.reserve(100000); set2.reserve(1000); // Create 100,000 values for set1 for ( int i = 0; i &lt; 100000; i++ ) { int value = 1000000000 + i; set1.push_back(value); } // Try to get half of our values intersecting float ratio = 200000.0f / RAND_MAX; // Create 1,000 values for set2 for ( int i = 0; i &lt; 1000; i++ ) { int random = rand() * ratio + 1; int value = 1000000000 + random; set2.push_back(value); } // Make sure set1 is in random order (not sorted) random_shuffle(set1.begin(),set1.end()); } int _tmain(int argc, _TCHAR* argv[]) { int intersectionSize = 0; vector&lt;int&gt; set1, set2; createSets( set1, set2 ); Timer timer; for ( int i = 0; i &lt; 1000; i++ ) { intersectionSize = runIntersectionTest(set1, set2); } timer.Stop(); cout &lt;&lt; "Found the intersection of " &lt;&lt; intersectionSize &lt;&lt; " values (using unordered_map) 1000 times, in " &lt;&lt; timer.GetMilliseconds() &lt;&lt; "ms" &lt;&lt; endl; timer.Reset(); for ( int i = 0; i &lt; 1000; i++ ) { intersectionSize = runSetIntersection(set1,set2); } timer.Stop(); cout &lt;&lt; "Found the intersection of " &lt;&lt; intersectionSize &lt;&lt; " values (using set_intersection) 1000 times, in " &lt;&lt; timer.GetMilliseconds() &lt;&lt; "ms" &lt;&lt; endl; timer.Reset(); for ( int i = 0; i &lt; 1000; i++ ) { intersectionSize = runIntersectionTest2(set1,set2); } timer.Stop(); cout &lt;&lt; "Found the intersection of " &lt;&lt; intersectionSize &lt;&lt; " values (using unordered_set) 1000 times, in " &lt;&lt; timer.GetMilliseconds() &lt;&lt; "ms" &lt;&lt; endl; getchar(); return 0; } </code></pre> <p>C# code:</p> <pre><code>using System; using System.Collections.Generic; using System.Linq; using System.Text; namespace DictionaryPerformance { class Program { static void Main(string[] args) { List&lt;int&gt; set1 = new List&lt;int&gt;(100000); List&lt;int&gt; set2 = new List&lt;int&gt;(1000); // Create 100,000 values for set1 for (int i = 0; i &lt; 100000; i++) { int value = 1000000000 + i; set1.Add(value); } Random random = new Random(DateTime.Now.Millisecond); // Create 1,000 values for set2 for (int i = 0; i &lt; 1000; i++) { int value = 1000000000 + (random.Next() % 200000 + 1); set2.Add(value); } long start = System.Diagnostics.Stopwatch.GetTimestamp(); for (int i = 0; i &lt; 1000; i++) { runIntersectionTest(set1,set2); } long duration = System.Diagnostics.Stopwatch.GetTimestamp() - start; Console.WriteLine(String.Format("Found the intersection 1000 times, in {0} ms", ((float) duration * 1000.0f) / System.Diagnostics.Stopwatch.Frequency)); Console.ReadKey(); } static int runIntersectionTest(List&lt;int&gt; set1, List&lt;int&gt; set2) { Dictionary&lt;int,int&gt; theMap = new Dictionary&lt;int,int&gt;(100000); // Now intersect the two sets by populating the map foreach( int value in set1 ) { theMap[value] = 1; } int intersectionSize = 0; foreach ( int value in set2 ) { int count; if ( theMap.TryGetValue(value, out count ) ) { theMap[value] = 2; intersectionSize++; } } return intersectionSize; } } } </code></pre> <p>C++ code:</p> <pre><code>// MapPerformance.cpp : Defines the entry point for the console application. // #include "stdafx.h" #include &lt;hash_map&gt; #include &lt;vector&gt; #include &lt;iostream&gt; #include &lt;time.h&gt; #include &lt;algorithm&gt; #include &lt;set&gt; #include &lt;boost\unordered\unordered_map.hpp&gt; #include "timer.h" using namespace std; using namespace stdext; using namespace boost; int runIntersectionTest(vector&lt;int&gt; set1, vector&lt;int&gt; set2) { // hash_map&lt;int,int&gt; theMap; // map&lt;int,int&gt; theMap; unordered_map&lt;int,int&gt; theMap; // Now intersect the two sets by populating the map for ( vector&lt;int&gt;::iterator iterator = set1.begin(); iterator != set1.end(); iterator++ ) { int value = *iterator; theMap[value] = 1; } int intersectionSize = 0; for ( vector&lt;int&gt;::iterator iterator = set2.begin(); iterator != set2.end(); iterator++ ) { int value = *iterator; unordered_map&lt;int,int&gt;::iterator foundValue = theMap.find(value); if ( foundValue != theMap.end() ) { theMap[value] = 2; intersectionSize++; } } return intersectionSize; } int runSetIntersection(set&lt;int&gt; set1, set&lt;int&gt; set2) { set&lt;int&gt; intersection; set_intersection(set1.begin(),set1.end(), set2.begin(), set2.end(), inserter(intersection, intersection.end())); return intersection.size(); } int _tmain(int argc, _TCHAR* argv[]) { srand ( time(NULL) ); vector&lt;int&gt; set1; vector&lt;int&gt; set2; set1.reserve(10000); set2.reserve(1000); // Create 100,000 values for set1 for ( int i = 0; i &lt; 100000; i++ ) { int value = 1000000000 + i; set1.push_back(value); } // Create 1,000 values for set2 for ( int i = 0; i &lt; 1000; i++ ) { int random = rand() % 200000 + 1; random *= 10; int value = 1000000000 + random; set2.push_back(value); } Timer timer; for ( int i = 0; i &lt; 1000; i++ ) { runIntersectionTest(set1, set2); } timer.Stop(); cout &lt;&lt; "Found the intersection (using unordered_map) 1000 times, in " &lt;&lt; timer.GetMilliseconds() &lt;&lt; "ms" &lt;&lt; endl; set&lt;int&gt; set21; set&lt;int&gt; set22; // Create 100,000 values for set1 for ( int i = 0; i &lt; 100000; i++ ) { int value = 1000000000 + i; set21.insert(value); } // Create 1,000 values for set2 for ( int i = 0; i &lt; 1000; i++ ) { int random = rand() % 200000 + 1; random *= 10; int value = 1000000000 + random; set22.insert(value); } timer.Reset(); for ( int i = 0; i &lt; 1000; i++ ) { runSetIntersection(set21,set22); } timer.Stop(); cout &lt;&lt; "Found the intersection (using set_intersection) 1000 times, in " &lt;&lt; timer.GetMilliseconds() &lt;&lt; "ms" &lt;&lt; endl; getchar(); return 0; } </code></pre> <hr> <p>Ok, here is the latest, with some changes:</p> <ul> <li>The C++ sets are now properly setup so they have a 50% intersection (like the C#)</li> <li>Set1 is shuffled so its not sorted, set2 was already not sorted</li> <li>The set_intersection implementation now uses vectors, and sorts them first</li> </ul> <p>C++ (Release, x64) Results:</p> <pre><code>Found the intersection of 503 values (using unordered_map) 1000 times, in 35131.1ms Found the intersection of 494 values (using set_intersection) 1000 times, in 10317ms </code></pre> <p>So its 2x slower than C#. @Jalf: You're getting some pretty fast numbers, is there something I'm doing wrong here? </p> <p>C++ Code:</p> <pre><code>// MapPerformance.cpp : Defines the entry point for the console application. // #include "stdafx.h" #include &lt;hash_map&gt; #include &lt;vector&gt; #include &lt;iostream&gt; #include &lt;time.h&gt; #include &lt;algorithm&gt; #include &lt;set&gt; #include &lt;boost\unordered\unordered_map.hpp&gt; #include "timer.h" using namespace std; using namespace stdext; using namespace boost; int runIntersectionTest(const vector&lt;int&gt;&amp; set1, const vector&lt;int&gt;&amp; set2) { // hash_map&lt;int,int&gt; theMap; // map&lt;int,int&gt; theMap; unordered_map&lt;int,int&gt; theMap; vector&lt;int&gt;::const_iterator set1_end = set1.end(); // Now intersect the two sets by populating the map for ( vector&lt;int&gt;::const_iterator iterator = set1.begin(); iterator != set1_end; ++iterator ) { int value = *iterator; theMap[value] = 1; } int intersectionSize = 0; vector&lt;int&gt;::const_iterator set2_end = set2.end(); for ( vector&lt;int&gt;::const_iterator iterator = set2.begin(); iterator != set2_end; ++iterator ) { int value = *iterator; unordered_map&lt;int,int&gt;::iterator foundValue = theMap.find(value); if ( foundValue != theMap.end() ) { theMap[value] = 2; intersectionSize++; } } return intersectionSize; } int runSetIntersection(const vector&lt;int&gt; set1_unsorted, const vector&lt;int&gt; set2_unsorted) { // Create two vectors std::vector&lt;int&gt; set1(set1_unsorted.size()); std::vector&lt;int&gt; set2(set2_unsorted.size()); // Copy the unsorted data into them std::copy(set1_unsorted.begin(), set1_unsorted.end(), set1.begin()); std::copy(set2_unsorted.begin(), set2_unsorted.end(), set2.begin()); // Sort the data sort(set1.begin(),set1.end()); sort(set2.begin(),set2.end()); vector&lt;int&gt; intersection; intersection.reserve(1000); set_intersection(set1.begin(),set1.end(), set2.begin(), set2.end(), inserter(intersection, intersection.end())); return intersection.size(); } void createSets( vector&lt;int&gt;&amp; set1, vector&lt;int&gt;&amp; set2 ) { srand ( time(NULL) ); set1.reserve(100000); set2.reserve(1000); // Create 100,000 values for set1 for ( int i = 0; i &lt; 100000; i++ ) { int value = 1000000000 + i; set1.push_back(value); } // Try to get half of our values intersecting float ratio = 200000.0f / RAND_MAX; // Create 1,000 values for set2 for ( int i = 0; i &lt; 1000; i++ ) { int random = rand() * ratio + 1; int value = 1000000000 + random; set2.push_back(value); } // Make sure set1 is in random order (not sorted) random_shuffle(set1.begin(),set1.end()); } int _tmain(int argc, _TCHAR* argv[]) { int intersectionSize = 0; vector&lt;int&gt; set1, set2; createSets( set1, set2 ); Timer timer; for ( int i = 0; i &lt; 1000; i++ ) { intersectionSize = runIntersectionTest(set1, set2); } timer.Stop(); cout &lt;&lt; "Found the intersection of " &lt;&lt; intersectionSize &lt;&lt; " values (using unordered_map) 1000 times, in " &lt;&lt; timer.GetMilliseconds() &lt;&lt; "ms" &lt;&lt; endl; timer.Reset(); for ( int i = 0; i &lt; 1000; i++ ) { intersectionSize = runSetIntersection(set1,set2); } timer.Stop(); cout &lt;&lt; "Found the intersection of " &lt;&lt; intersectionSize &lt;&lt; " values (using set_intersection) 1000 times, in " &lt;&lt; timer.GetMilliseconds() &lt;&lt; "ms" &lt;&lt; endl; getchar(); return 0; } </code></pre>
    singulars
    1. This table or related slice is empty.
    plurals
    1. This table or related slice is empty.
    1. This table or related slice is empty.
 

Querying!

 
Guidance

SQuiL has stopped working due to an internal error.

If you are curious you may find further information in the browser console, which is accessible through the devtools (F12).

Reload