Note that there are some explanatory texts on larger screens.

plurals
  1. POvastly different run times on different machines
    primarykey
    data
    text
    <p>Background: I was tasked to write an MPI program in C that calculates all the primes up to a given number. This program runs correctly.</p> <p>I compile the program using openmpi and the -O3 optimization.</p> <p>When running it on my personal computer (Ubuntu 11.10 x64) using 1 process, I get the results I expect (~13 seconds for all the primes up to 4E9). The same is true for my CS department's machines.</p> <p>However, when I run it on Carver at NERSC, the time jumps dramatically (~61 seconds for 1 process).</p> <p>I have tried using both the openmpi and intel compilers...no difference. I got it to run with the proper times once, but I don't remember what (if anything) I did differently and there was a slight index error in my code that I have since fixed (not related to actually performing the computation, so the timings were accurate).</p> <p>I've tried to be as clear as possible; if you have any more questions, I'd be happy to answer. Thanks!</p> <pre><code>#include &lt;stdio.h&gt; #include &lt;mpi.h&gt; #include &lt;stdlib.h&gt; #include &lt;math.h&gt; #define MAX(x,y) ((x)&gt;(y) ? (x) : (y) ) #define MIN(x,y) ((x)&gt;(y) ? (y) : (x) ) #define A(i,j) A[(i)*M+j] #define b(i) b[i] #define c(i) c[i] long* preamble(long N,char* mark){ N = sqrt(N)+1; long size; long curr, index; long i, j,n; long count; long* primes; //Pierre Dusart proven upper bound for number of primes up to N //found at http://primes.utm.edu/howmany.shtml size = (N/log(N))*(1+(1.2762/log(N)))*sizeof(long); primes = (long *)malloc(size); if(N%2) n=N/2 - 2; else n=(N-1)/2 -1; index = 0; curr = 3; while (curr*curr&lt;=N) { for (i=(curr*curr-3)/2; i&lt;=n; i+=curr){ mark[i]=1; } while (mark[++index]) ; curr = index*2+3; } /*number of primes*/ count = 0; for(i = 0; i &lt;=n; i+=1){ if(mark[i] == 0) { primes[++count]=i*2+3; } } primes[0]=count; return primes; } long FMIB(long p, long b){ if(b%p==0 &amp;&amp; b!=p) return b; long i = b + p - b % p; if(i%2){return i;}else{return i+p;} } int main(int argc, char **argv) { long N = 4000000000; long BKSIZE = 500000; char *mark; long *primes; long *loopprimes; long size, offset; long numprimes; long i, j, n, ii, start, index; long count, total; double time; if ( argc &gt; 1 ) N = atol(argv[1]); if ( argc &gt; 2 ) BKSIZE = atol(argv[2]); int id, p; BKSIZE = (BKSIZE-3)/2 +1; if(N%2) n=N/2 - 2; else n=(N-1)/2 -1; MPI_Init(&amp;argc, &amp;argv); MPI_Comm_rank(MPI_COMM_WORLD, &amp;id); MPI_Comm_size(MPI_COMM_WORLD, &amp;p); MPI_Barrier(MPI_COMM_WORLD); if(id==0) time = MPI_Wtime(); size = (n/p+1)*sizeof(char); mark = (char *)malloc(size); for (i=1; i&lt;=n/p+1; i++){ mark[i]=0; } primes = preamble(N,mark); if(id!=0){ for (i=0; i&lt;=n/p+1; i++){ mark[i]=0; } } offset = (1+n/p)*id; numprimes=primes[0]; if(id==0){ start = (sqrt(N)-3)/2+1; //mark index to start at }else{ start = offset; } //MAIN COMPUTATION - BLOCKING for(ii=start; ii&lt;=MIN(ii+BKSIZE,offset+n/p); ii+=BKSIZE){ for(j=0; j &lt; numprimes; j++){ for(i=(FMIB(primes[j+1],ii*2+3)-3)/2; i&lt;=MIN(ii+BKSIZE,offset+n/p); i+=primes[j+1]){ mark[i-offset]=1; } } } /*number of primes*/ if(id==0){ count = 1; }else{ count = 0; } for(i = 0; i &lt;= n/p &amp;&amp; (i+offset)*2+3 &lt;= N; i++){ if(mark[i] == 0) { ++count; } } MPI_Barrier(MPI_COMM_WORLD); MPI_Reduce(&amp;count, &amp;total, 1, MPI_LONG, MPI_SUM, 0, MPI_COMM_WORLD); MPI_Barrier(MPI_COMM_WORLD); if(id==0){ time = MPI_Wtime() - time; printf("There are %ld primes less than %ld\n", total, N); printf("First three primes:"); j = 1; printf(" %d", 2); for ( i=0 ; i &lt;= n &amp;&amp; j &lt; 3; i+=1 ) { if (mark[i]==0){ printf(" %ld", (i*2+3)); ++j; } } printf("\n"); } MPI_Barrier(MPI_COMM_WORLD); if(id == p-1){ printf("Last three primes:"); j = 0; for (i = n-offset; i &gt;= 0 &amp;&amp; j &lt; 3; i--){ if (mark[i]==0){ printf(" %ld", ((offset+i)*2+3)); j++; } } if(j &lt; 3){ printf(" %d",2); } printf("\n"); } MPI_Barrier(MPI_COMM_WORLD); if(id == 0){ printf("Elapsed time: %f seconds\n",time); fflush(stdout); } MPI_Finalize(); //free(mark); return 0; } </code></pre> <p>Script:</p> <pre><code>#!/bin/csh #used for intel compiler #module unload pgi openmpi #module load intel openmpi-intel mkl make clean make set x = "sieve_mpi" set n = 4000000000 set b = 500000 foreach p ( 1 2 3 4 5 6 7 8 ) cat &gt; ${x}${p}.pbs &lt;&lt;EOF #PBS -q regular #PBS -l nodes=1:ppn=8 #PBS -l walltime=00:01:00 #PBS -N ${x}${p} #PBS -e err/${x}${p}.err #PBS -o out/${x}${p}.out #used when using intel compiler #module unload pgi openmpi #module load intel openmpi-intel mkl cd \$PBS_O_WORKDIR echo ${x} echo ${p} mpirun -np ${p} ${x} ${n} ${b} EOF qsub ${x}${p}.pbs end </code></pre> <p>Makefile:</p> <pre><code>CC = mpicc EXEC = pi_cyc pi_block sieve_mpi OBJS = H_FILE = MATHFLAG = -lm FLAGS = -O3 SEQFLAGS = -O3 all: $(EXEC) pi_cyc: pi_cyc.c $(OBJS) $(H_FILE) $(CC) $(FLAGS) -o $@ pi_cyc.c $(OBJS) $(MATHFLAG) pi_block: pi_block.c $(OBJS) $(H_FILE) $(CC) $(FLAGS) -o $@ pi_block.c $(OBJS) $(MATHFLAG) sieve_mpi: sieve_mpi.c $(OBJS) $(H_FILE) $(CC) $(FLAGS) -o $@ sieve_mpi.c $(OBJS) $(MATHFLAG) clean: rm -f *.o *.pgm $(OBJS) $(EXEC) </code></pre>
    singulars
    1. This table or related slice is empty.
    1. This table or related slice is empty.
    plurals
    1. This table or related slice is empty.
    1. This table or related slice is empty.
    1. This table or related slice is empty.
 

Querying!

 
Guidance

SQuiL has stopped working due to an internal error.

If you are curious you may find further information in the browser console, which is accessible through the devtools (F12).

Reload