Note that there are some explanatory texts on larger screens.

plurals
  1. POHow Can I Improve/SpeedUp This FrequentFunction in C?
    text
    copied!<p>How can I improve / speed up this frequent function?</p> <pre><code>#include &lt;math.h&gt; #include &lt;stdio.h&gt; #include &lt;stdlib.h&gt; #include &lt;time.h&gt; #define M 10 // This is fixed #define N 8 // This is NOT fixed // Assumptions: 1. x, a, b and c are all arrays of 10 (M). // 2. y and z are all matrices of 8 x 10 (N x M). // Requirement: 1. return the value of ret; // 2. get all elements of array c float fnFrequentFunction(const float* x, const float* const* y, const float* const* z, const float* a, const float* b, float *c, int n) { register float tmp; register float sum; register float ret = 0; register const float* yy; register const float* zz; int i; for (i = 0; i &lt; n; i++) // M == 1, 2, 4, or 8 { sum = 0; yy = y[i]; zz = z[i]; tmp = x[0] - yy[0]; sum += tmp * tmp * zz[0]; tmp = x[1] - yy[1]; sum += tmp * tmp * zz[1]; tmp = x[2] - yy[2]; sum += tmp * tmp * zz[2]; tmp = x[3] - yy[3]; sum += tmp * tmp * zz[3]; tmp = x[4] - yy[4]; sum += tmp * tmp * zz[4]; tmp = x[5] - yy[5]; sum += tmp * tmp * zz[5]; tmp = x[6] - yy[6]; sum += tmp * tmp * zz[6]; tmp = x[7] - yy[7]; sum += tmp * tmp * zz[7]; tmp = x[8] - yy[8]; sum += tmp * tmp * zz[8]; tmp = x[9] - yy[9]; sum += tmp * tmp * zz[9]; ret += (c[i] = log(a[i] * b[i]) + sum); } return ret; } // In the main function, all values are just example data. int main() { float x[M] = {0.001251f, 0.563585f, 0.193304f, 0.808741f, 0.585009f, 0.479873f, 0.350291f, 0.895962f, 0.622840f, 0.746605f}; float* y[N]; float* z[N]; float a[M] = {0.870205f, 0.733879f, 0.711386f, 0.588244f, 0.484176f, 0.852962f, 0.168126f, 0.684286f, 0.072573f, 0.632160f}; float b[M] = {0.871487f, 0.998108f, 0.798608f, 0.134831f, 0.576281f, 0.410779f, 0.402936f, 0.522935f, 0.623218f, 0.193030f}; float c[N]; float t1[M] = {0.864406f, 0.709006f, 0.091433f, 0.995727f, 0.227180f, 0.902585f, 0.659047f, 0.865627f, 0.846767f, 0.514359f}; float t2[M] = {0.866817f, 0.581347f, 0.175542f, 0.620197f, 0.781823f, 0.778588f, 0.938688f, 0.721610f, 0.940214f, 0.811353f}; int i, j; int n = 10000000; long start; // Initialize y, z for test example: for(i = 0; i &lt; N; ++i) { y[i] = (float*)malloc(sizeof(float) * M); z[i] = (float*)malloc(sizeof(float) * M); for(j = 0; j &lt; M; ++j) { y[i][j] = t1[j] * j; z[i][j] = t2[j] * j; } } // Speed test here: start = clock(); while(--n) fnFrequentFunction(x, y, z, a, b, c, 8); printf("Time used: %ld\n", clock() - start); // Output the result here: printf("fnFrequentFunction == %f\n", fnFrequentFunction(x, y, z, a, b, c, 8)); for(j = 0; j &lt; N; ++j) printf(" c[%d] == %f\n", j, c[j]); printf("\n"); // Free memory for(j = 0; j &lt; N; ++j) { free(y[j]); free(z[j]); } return 0; } </code></pre> <p>Any suggestions are welcome :-)</p> <p>I feel terrible that I made a big mistake in my function. The above code is the new one. I'm rechecking it now to make sure that is what I need.</p>
 

Querying!

 
Guidance

SQuiL has stopped working due to an internal error.

If you are curious you may find further information in the browser console, which is accessible through the devtools (F12).

Reload