Note that there are some explanatory texts on larger screens.

plurals
  1. POClass using std::vector allocation slower than pointer allocation by a LOT
    primarykey
    data
    text
    <p>I have a class for handling allocation of arrays. My class is simple and it is defined as follows:</p> <p><strong>DimArray.hpp:</strong></p> <pre><code>#ifndef DIMARRAY_HPP_INCLUDED #define DIMARRAY_HPP_INCLUDED #include &lt;vector&gt; template&lt;typename T&gt; class DimArray { private: int Width, Height; std::vector&lt;T&gt; Data; public: DimArray(int Width, int Height); DimArray(T* Data, int Width, int Height); DimArray(T** Data, int Width, int Height); DimArray(const DimArray &amp;da); DimArray(DimArray &amp;&amp;da); inline int size() {return Width * Height;} inline int size() const {return Width * Height;} inline int width() {return Width;} inline int width() const {return Width;} inline int height() {return Height;} inline int height() const {return Height;} inline T* operator [](int Index) {return const_cast&lt;T*&gt;(Data.data()) + Height * Index;} inline const T* operator [](int Index) const {return Data.data() + Height * Index;} DimArray&amp; operator = (DimArray da); }; template&lt;typename T&gt; DimArray&lt;T&gt;::DimArray(int Width, int Height) : Width(Width), Height(Height), Data(Width * Height, 0) {} template&lt;typename T&gt; DimArray&lt;T&gt;::DimArray(T* Data, int Width, int Height) : Width(Width), Height(Height), Data(Width * Height, 0) {std::copy(Data, Data + Width * Height, const_cast&lt;T*&gt;(this-&gt;Data.data()));} template&lt;typename T&gt; DimArray&lt;T&gt;::DimArray(T** Data, int Width, int Height) : Width(Width), Height(Height), Data(Width * Height, 0) {std::copy(Data[0], Data[0] + Width * Height, const_cast&lt;T*&gt;(this-&gt;Data.data()));} template&lt;typename T&gt; DimArray&lt;T&gt;::DimArray(const DimArray &amp;da) : Width(da.Width), Height(da.Height), Data(da.Data) {} template&lt;typename T&gt; DimArray&lt;T&gt;::DimArray(DimArray &amp;&amp;da) : Width(std::move(da.Width)), Height(std::move(da.Height)), Data(std::move(da.Data)) {} template&lt;typename T&gt; DimArray&lt;T&gt;&amp; DimArray&lt;T&gt;::operator = (DimArray&lt;T&gt; da) { this-&gt;Width = da.Width; this-&gt;Height = da.Height; this-&gt;Data.swap(da.Data); return *this; } #endif // DIMARRAY_HPP_INCLUDED </code></pre> <p>For benchmark timing, I use the following:</p> <p><strong>Timer.hpp:</strong></p> <pre><code>#ifndef TIME_HPP_INCLUDED #define TIME_HPP_INCLUDED #include &lt;chrono&gt; #if defined _WIN32 || defined _WIN64 #include &lt;windows.h&gt; template&lt;typename T&gt; class Timer { private: typedef T duration; typedef typename T::rep rep; typedef typename T::period period; typedef std::chrono::time_point&lt;Timer, duration&gt; time_point; std::chrono::time_point&lt;Timer, duration&gt; Time; static const bool is_steady = true; const rep g_Frequency = []() -&gt; rep { LARGE_INTEGER frequency; QueryPerformanceFrequency(&amp;frequency); return frequency.QuadPart; }(); inline std::chrono::time_point&lt;Timer, duration&gt; now() { LARGE_INTEGER count; QueryPerformanceCounter(&amp;count); return time_point(duration(count.QuadPart * static_cast&lt;rep&gt;(period::den) / g_Frequency)); } public: inline void Start() {this-&gt;Time = this-&gt;now();} inline rep End() {return std::chrono::duration_cast&lt;T&gt;(this-&gt;now() - this-&gt;Time).count();} }; #else template&lt;typename T&gt; class Timer { private: static const bool is_steady = true; std::chrono::high_resolution_clock Clock; std::chrono::time_point&lt;std::chrono::high_resolution_clock&gt; Time; public: inline void Start() {this-&gt;Time = this-&gt;Clock.now();} inline T::rep End() {return std::chrono::duration_cast&lt;T&gt;(this-&gt;Clock.now() - this-&gt;Time).count();} }; #endif #endif // TIME_HPP_INCLUDED </code></pre> <p>And my benchmark is as follows:</p> <pre><code>int main() { Timer&lt;std::chrono::nanoseconds&gt; T; T.Start(); for (int i = 0; i &lt; 100; ++i) { int** T2DArray = new int*[10000]; for (int i = 0; i &lt; 10000; ++i) { T2DArray[i] = new int[10000]; } for (int i = 0; i &lt; 10000; ++i) { delete[] T2DArray[i]; } delete[] T2DArray; } std::cout&lt;&lt;T.End()&lt;&lt;" us\n\n"; T.Start(); for (int i = 0; i &lt; 100; ++i) { DimArray&lt;int&gt; TwoDArray(10000, 10000); } std::cout&lt;&lt;T.End()&lt;&lt;" us\n\n"; } </code></pre> <p>The results it printed were:</p> <pre><code>4.9599256 seconds //for int** 42.9303941 seconds //for DimArray&lt;int&gt; </code></pre> <p>That's a huge difference! I cannot figure out why?!</p> <p>So I changed it to:</p> <pre><code>int main() { Timer&lt;std::chrono::nanoseconds&gt; T; T.Start(); for (int i = 0; i &lt; 100; ++i) { int** T2DArray = new int*[10000]; for (int i = 0; i &lt; 10000; ++i) { T2DArray[i] = new int[10000]; } for (int i = 0; i &lt; 10000; ++i) { delete[] T2DArray[i]; } delete[] T2DArray; } std::cout&lt;&lt;T.End()&lt;&lt;" us\n\n"; T.Start(); for (int i = 0; i &lt; 100; ++i) { int* TwoDArray = new int[10000 * 10000]; delete[] TwoDArray; } std::cout&lt;&lt;T.End()&lt;&lt;" us\n\n"; } </code></pre> <p>and the results were:</p> <pre><code>4.6085725 seconds //for int** 0.1142958 seconds //for int* </code></pre> <p>Any ideas why my class that uses <code>std::vector</code> is so slow compared to using a raw pointer?</p>
    singulars
    1. This table or related slice is empty.
    1. This table or related slice is empty.
    plurals
    1. This table or related slice is empty.
    1. This table or related slice is empty.
    1. This table or related slice is empty.
 

Querying!

 
Guidance

SQuiL has stopped working due to an internal error.

If you are curious you may find further information in the browser console, which is accessible through the devtools (F12).

Reload