summaryrefslogtreecommitdiff
path: root/benchtests/bench-skeleton.c
diff options
context:
space:
mode:
Diffstat (limited to 'benchtests/bench-skeleton.c')
-rw-r--r--benchtests/bench-skeleton.c101
1 files changed, 68 insertions, 33 deletions
diff --git a/benchtests/bench-skeleton.c b/benchtests/bench-skeleton.c
index 13f986d817..4e3a50704d 100644
--- a/benchtests/bench-skeleton.c
+++ b/benchtests/bench-skeleton.c
@@ -21,55 +21,90 @@
#include <stdio.h>
#include <time.h>
#include <inttypes.h>
+#include "bench-timing.h"
+volatile unsigned int dontoptimize = 0;
+
+void
+startup (void)
+{
+ /* This loop should cause CPU to switch to maximal freqency.
+ This makes subsequent measurement more accurate. We need a side effect
+ to prevent the loop being deleted by compiler.
+ This should be enough to cause CPU to speed up and it is simpler than
+ running loop for constant time. This is used when user does not have root
+ access to set a constant freqency. */
+ for (int k = 0; k < 10000000; k++)
+ dontoptimize += 23 * dontoptimize + 2;
+}
+
+#define TIMESPEC_AFTER(a, b) \
+ (((a).tv_sec == (b).tv_sec) ? \
+ ((a).tv_nsec > (b).tv_nsec) : \
+ ((a).tv_sec > (b).tv_sec))
int
main (int argc, char **argv)
{
- unsigned long i, j, k;
- uint64_t total = 0, max = 0, min = 0x7fffffffffffffff;
- struct timespec start, end;
+ unsigned long i, k;
+ struct timespec runtime;
+ timing_t start, end;
+
+ startup();
- memset (&start, 0, sizeof (start));
- memset (&end, 0, sizeof (end));
+ memset (&runtime, 0, sizeof (runtime));
- clock_getres (CLOCK_PROCESS_CPUTIME_ID, &start);
+ unsigned long iters;
- /* Measure 1000 times the resolution of the clock. So for a 1ns resolution
- clock, we measure 1000 iterations of the function call at a time.
- Measurements close to the minimum clock resolution won't make much sense,
- but it's better than having nothing at all. */
- unsigned long iters = 1000 * start.tv_nsec;
- unsigned long total_iters = ITER / iters;
+ TIMING_INIT (iters);
- for (i = 0; i < NUM_SAMPLES; i++)
+ for (int v = 0; v < NUM_VARIANTS; v++)
{
- for (j = 0; j < total_iters; j ++)
+ /* Run for approximately DURATION seconds. */
+ clock_gettime (CLOCK_MONOTONIC_RAW, &runtime);
+ runtime.tv_sec += DURATION;
+
+ double d_total_i = 0;
+ timing_t total = 0, max = 0, min = 0x7fffffffffffffff;
+ while (1)
{
- clock_gettime (CLOCK_PROCESS_CPUTIME_ID, &start);
- for (k = 0; k < iters; k++)
- BENCH_FUNC(i);
- clock_gettime (CLOCK_PROCESS_CPUTIME_ID, &end);
+ for (i = 0; i < NUM_SAMPLES (v); i++)
+ {
+ uint64_t cur;
+ TIMING_NOW (start);
+ for (k = 0; k < iters; k++)
+ BENCH_FUNC (v, i);
+ TIMING_NOW (end);
- uint64_t cur = (end.tv_nsec - start.tv_nsec
- + ((end.tv_sec - start.tv_sec)
- * (uint64_t) 1000000000));
+ TIMING_DIFF (cur, start, end);
- if (cur > max)
- max = cur;
+ if (cur > max)
+ max = cur;
- if (cur < min)
- min = cur;
+ if (cur < min)
+ min = cur;
- total += cur;
+ TIMING_ACCUM (total, cur);
+
+ d_total_i += iters;
+ }
+ struct timespec curtime;
+
+ memset (&curtime, 0, sizeof (curtime));
+ clock_gettime (CLOCK_MONOTONIC_RAW, &curtime);
+ if (TIMESPEC_AFTER (curtime, runtime))
+ goto done;
}
- }
- double d_total_s = total * 1e-9;
- double d_iters = iters;
- double d_total_i = (double)ITER * NUM_SAMPLES;
- printf (FUNCNAME ": ITERS:%g: TOTAL:%gs, MAX:%gns, MIN:%gns, %g iter/s\n",
- d_total_i, d_total_s, max / d_iters, min / d_iters,
- d_total_i / d_total_s);
+ double d_total_s;
+ double d_iters;
+
+ done:
+ d_total_s = total;
+ d_iters = iters;
+
+ TIMING_PRINT_STATS (VARIANT (v), d_total_s, d_iters, d_total_i, max,
+ min);
+ }
return 0;
}