diff options
Diffstat (limited to 'benchtests/bench-skeleton.c')
-rw-r--r-- | benchtests/bench-skeleton.c | 101 |
1 files changed, 68 insertions, 33 deletions
diff --git a/benchtests/bench-skeleton.c b/benchtests/bench-skeleton.c index 13f986d817..4e3a50704d 100644 --- a/benchtests/bench-skeleton.c +++ b/benchtests/bench-skeleton.c @@ -21,55 +21,90 @@ #include <stdio.h> #include <time.h> #include <inttypes.h> +#include "bench-timing.h" +volatile unsigned int dontoptimize = 0; + +void +startup (void) +{ + /* This loop should cause CPU to switch to maximal freqency. + This makes subsequent measurement more accurate. We need a side effect + to prevent the loop being deleted by compiler. + This should be enough to cause CPU to speed up and it is simpler than + running loop for constant time. This is used when user does not have root + access to set a constant freqency. */ + for (int k = 0; k < 10000000; k++) + dontoptimize += 23 * dontoptimize + 2; +} + +#define TIMESPEC_AFTER(a, b) \ + (((a).tv_sec == (b).tv_sec) ? \ + ((a).tv_nsec > (b).tv_nsec) : \ + ((a).tv_sec > (b).tv_sec)) int main (int argc, char **argv) { - unsigned long i, j, k; - uint64_t total = 0, max = 0, min = 0x7fffffffffffffff; - struct timespec start, end; + unsigned long i, k; + struct timespec runtime; + timing_t start, end; + + startup(); - memset (&start, 0, sizeof (start)); - memset (&end, 0, sizeof (end)); + memset (&runtime, 0, sizeof (runtime)); - clock_getres (CLOCK_PROCESS_CPUTIME_ID, &start); + unsigned long iters; - /* Measure 1000 times the resolution of the clock. So for a 1ns resolution - clock, we measure 1000 iterations of the function call at a time. - Measurements close to the minimum clock resolution won't make much sense, - but it's better than having nothing at all. */ - unsigned long iters = 1000 * start.tv_nsec; - unsigned long total_iters = ITER / iters; + TIMING_INIT (iters); - for (i = 0; i < NUM_SAMPLES; i++) + for (int v = 0; v < NUM_VARIANTS; v++) { - for (j = 0; j < total_iters; j ++) + /* Run for approximately DURATION seconds. */ + clock_gettime (CLOCK_MONOTONIC_RAW, &runtime); + runtime.tv_sec += DURATION; + + double d_total_i = 0; + timing_t total = 0, max = 0, min = 0x7fffffffffffffff; + while (1) { - clock_gettime (CLOCK_PROCESS_CPUTIME_ID, &start); - for (k = 0; k < iters; k++) - BENCH_FUNC(i); - clock_gettime (CLOCK_PROCESS_CPUTIME_ID, &end); + for (i = 0; i < NUM_SAMPLES (v); i++) + { + uint64_t cur; + TIMING_NOW (start); + for (k = 0; k < iters; k++) + BENCH_FUNC (v, i); + TIMING_NOW (end); - uint64_t cur = (end.tv_nsec - start.tv_nsec - + ((end.tv_sec - start.tv_sec) - * (uint64_t) 1000000000)); + TIMING_DIFF (cur, start, end); - if (cur > max) - max = cur; + if (cur > max) + max = cur; - if (cur < min) - min = cur; + if (cur < min) + min = cur; - total += cur; + TIMING_ACCUM (total, cur); + + d_total_i += iters; + } + struct timespec curtime; + + memset (&curtime, 0, sizeof (curtime)); + clock_gettime (CLOCK_MONOTONIC_RAW, &curtime); + if (TIMESPEC_AFTER (curtime, runtime)) + goto done; } - } - double d_total_s = total * 1e-9; - double d_iters = iters; - double d_total_i = (double)ITER * NUM_SAMPLES; - printf (FUNCNAME ": ITERS:%g: TOTAL:%gs, MAX:%gns, MIN:%gns, %g iter/s\n", - d_total_i, d_total_s, max / d_iters, min / d_iters, - d_total_i / d_total_s); + double d_total_s; + double d_iters; + + done: + d_total_s = total; + d_iters = iters; + + TIMING_PRINT_STATS (VARIANT (v), d_total_s, d_iters, d_total_i, max, + min); + } return 0; } |