Note that there are some explanatory texts on larger screens.

plurals
  1. PONegative Speedup on Multithreading my Program
    primarykey
    data
    text
    <p>On my laptop with Intel Pentium dual-core processor T2370 (Acer Extensa) I ran a simple multithreading speedup test. I am using Linux. The code is pasted below. While I was expecting a speedup of 2-3 times, I was surprised to see a <strong>slowdown</strong> by a factor of 2. I tried the same with gcc optimization levels -O0 ... -O3, but everytime I got the same result. I am using pthreads. I also tried the same with only two threads (instead of 3 threads in the code), but the performance was similar. </p> <p>What could be the reason? The faster version took reasonably long - about 20 secs - so it seems is not an issue of startup overhead. </p> <p>NOTE: This code is a lot buggy (indeed it does not make much sense as the output of serial and parallel versions would be different). The intention was just to "get" a speedup comparison for the same number of instructions. </p> <pre><code>#include &lt;stdio.h&gt; #include &lt;time.h&gt; #include &lt;unistd.h&gt; #include &lt;pthread.h&gt; class Thread{ private: pthread_t thread; static void *thread_func(void *d){((Thread *)d)-&gt;run();} public: Thread(){} virtual ~Thread(){} virtual void run(){} int start(){return pthread_create(&amp;thread, NULL, Thread::thread_func, (void*)this);} int wait(){return pthread_join(thread, NULL);} }; #include &lt;iostream&gt; const int ARR_SIZE = 100000000; const int N = 20; int arr[ARR_SIZE]; int main(void) { class Thread_a:public Thread{ public: Thread_a(int* a): arr_(a) {} void run() { for(int n = 0; n&lt;N; n++) for(int i=0; i&lt;ARR_SIZE/3; i++){ arr_[i] += arr_[i-1];} } private: int* arr_; }; class Thread_b:public Thread{ public: Thread_b(int* a): arr_(a) {} void run() { for(int n = 0; n&lt;N; n++) for(int i=ARR_SIZE/3; i&lt;2*ARR_SIZE/3; i++){ arr_[i] += arr_[i-1];} } private: int* arr_; }; class Thread_c:public Thread{ public: Thread_c(int* a): arr_(a) {} void run() { for(int n = 0; n&lt;N; n++) for(int i=2*ARR_SIZE/3; i&lt;ARR_SIZE; i++){ arr_[i] += arr_[i-1];} } private: int* arr_; }; { Thread *a=new Thread_a(arr); Thread *b=new Thread_b(arr); Thread *c=new Thread_c(arr); clock_t start = clock(); if (a-&gt;start() != 0) { return 1; } if (b-&gt;start() != 0) { return 1; } if (c-&gt;start() != 0) { return 1; } if (a-&gt;wait() != 0) { return 1; } if (b-&gt;wait() != 0) { return 1; } if (c-&gt;wait() != 0) { return 1; } clock_t end = clock(); double duration = (double)(end - start) / CLOCKS_PER_SEC; std::cout &lt;&lt; duration &lt;&lt; "seconds\n"; delete a; delete b; } { clock_t start = clock(); for(int n = 0; n&lt;N; n++) for(int i=0; i&lt;ARR_SIZE; i++){ arr[i] += arr[i-1];} clock_t end = clock(); double duration = (double)(end - start) / CLOCKS_PER_SEC; std::cout &lt;&lt; "serial: " &lt;&lt; duration &lt;&lt; "seconds\n"; } return 0; } </code></pre> <hr> <p>See also: <a href="https://stackoverflow.com/questions/612860/what-can-make-a-program-run-slower-when-using-more-threads">What can make a program run slower when using more threads?</a></p>
    singulars
    1. This table or related slice is empty.
    plurals
    1. This table or related slice is empty.
    1. This table or related slice is empty.
 

Querying!

 
Guidance

SQuiL has stopped working due to an internal error.

If you are curious you may find further information in the browser console, which is accessible through the devtools (F12).

Reload