From bfdda211c6b082d12ec448a5a65bfba02e208b98 Mon Sep 17 00:00:00 2001 From: Siddhesh Poyarekar Date: Wed, 20 Apr 2016 10:58:20 +0530 Subject: benchtests: Update README to include instructions for bench-build target --- benchtests/README | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) (limited to 'benchtests/README') diff --git a/benchtests/README b/benchtests/README index 999d268cf8..847df87c96 100644 --- a/benchtests/README +++ b/benchtests/README @@ -34,6 +34,23 @@ the benchmark to use clock_gettime by invoking make as follows: Again, one must run `make bench-clean' before changing the measurement method. +Running benchmarks on another target: +==================================== + +If the target where you want to run benchmarks is not capable of building the +code or you're cross-building, you could build and execute the benchmark in +separate steps. On the build system run: + + $ make bench-build + +and then copy the source and build directories to the target and run the +benchmarks from the build directory as usual: + + $ make bench + +make sure the copy preserves timestamps by using either rsync or scp -p +otherwise the above command may try to build the benchmark again. + Adding a function to benchtests: =============================== -- cgit v1.2.3 From 2d304f3c6f5c34b4dd34c7ab0663d93adec14c26 Mon Sep 17 00:00:00 2001 From: Siddhesh Poyarekar Date: Wed, 20 Apr 2016 13:19:01 +0530 Subject: benchtests: Support for cross-building benchmarks This patch adds full support for cross-building benchmarks. Some benchmarks like those that need locales to be generated cannot be built and are hence skipped for cross builds. Tested by cross building for aarch64 on x86_64 and then running the generated benchmark on aarch64. * benchtests/Makefile (wcsmbs-benchset): Include only for native builds and runs. (LOCALES): Likewise. (bench-build): Build timing-type here instead of the bench target. Generate locale only for native builds. * benchtests/README: Add note for cross-building. --- ChangeLog | 7 +++++++ benchtests/Makefile | 23 +++++++++++++++++++---- benchtests/README | 4 +++- 3 files changed, 29 insertions(+), 5 deletions(-) (limited to 'benchtests/README') diff --git a/ChangeLog b/ChangeLog index f26f8f8f1a..ded10937fa 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,5 +1,12 @@ 2016-04-20 Siddhesh Poyarekar + * benchtests/Makefile (wcsmbs-benchset): Include only for + native builds and runs. + (LOCALES): Likewise. + (bench-build): Build timing-type here instead of the bench + target. Generate locale only for native builds. + * benchtests/README: Add note for cross-building. + * benchtests/Makefile (bench-clean): Clean up extra-objs. * benchtests/README: Update README to include instructions on diff --git a/benchtests/Makefile b/benchtests/Makefile index 779eb395d5..144b32ea38 100644 --- a/benchtests/Makefile +++ b/benchtests/Makefile @@ -39,11 +39,19 @@ string-benchset := bcopy bzero memccpy memchr memcmp memcpy memmem memmove \ strncasecmp strncat strncmp strncpy strnlen strpbrk strrchr \ strspn strstr strcpy_chk stpcpy_chk memrchr strsep strtok \ strcoll memcpy-large memmove-large memset-large + +# Build and run locale-dependent benchmarks only if we're building natively. +ifeq (no,$(cross-compiling)) wcsmbs-benchset := wcslen wcsnlen wcscpy wcpcpy wcsncpy wcpncpy wcscat wcsncat \ wcscmp wcsncmp wcschr wcschrnul wcsrchr wcsspn wcspbrk wcscspn \ wmemchr wmemset wmemcmp +else +wcsmbs-benchset := +endif + string-benchset-all := $(string-benchset) ${wcsmbs-benchset} +ifeq (no,$(cross-compiling)) # We have to generate locales LOCALES := en_US.UTF-8 tr_TR.UTF-8 cs_CZ.UTF-8 fa_IR.UTF-8 fr_FR.UTF-8 \ ja_JP.UTF-8 si_LK.UTF-8 en_GB.UTF-8 vi_VN.UTF-8 ar_SA.UTF-8 \ @@ -51,6 +59,7 @@ LOCALES := en_US.UTF-8 tr_TR.UTF-8 cs_CZ.UTF-8 fa_IR.UTF-8 fr_FR.UTF-8 \ he_IL.UTF-8 is_IS.UTF-8 es_ES.UTF-8 hi_IN.UTF-8 sv_SE.UTF-8 \ hu_HU.UTF-8 it_IT.UTF-8 sr_RS.UTF-8 zh_CN.UTF-8 include ../gen-locales.mk +endif stdlib-benchset := strtod @@ -131,10 +140,16 @@ bench-clean: rm -f $(timing-type) $(addsuffix .o,$(timing-type)) rm -f $(addprefix $(objpfx),$(bench-extra-objs)) -bench: $(timing-type) $(gen-locales) bench-build bench-set bench-func \ - bench-malloc -# Target to only build the benchmark without running it. -bench-build: $(binaries-bench) $(binaries-benchset) $(binaries-bench-malloc) +bench: bench-build bench-set bench-func bench-malloc +# Target to only build the benchmark without running it. We generate locales +# only if we're building natively. +ifeq (no,$(cross-compiling)) +bench-build: $(gen-locales) $(timing-type) $(binaries-bench) \ + $(binaries-benchset) $(binaries-bench-malloc) +else +bench-build: $(timing-type) $(binaries-bench) $(binaries-benchset) \ + $(binaries-bench-malloc) +endif bench-set: $(binaries-benchset) for run in $^; do \ diff --git a/benchtests/README b/benchtests/README index 847df87c96..2c5f381135 100644 --- a/benchtests/README +++ b/benchtests/README @@ -49,7 +49,9 @@ benchmarks from the build directory as usual: $ make bench make sure the copy preserves timestamps by using either rsync or scp -p -otherwise the above command may try to build the benchmark again. +otherwise the above command may try to build the benchmark again. Benchmarks +that require generated code to be executed during the build are skipped when +cross-building. Adding a function to benchtests: =============================== -- cgit v1.2.3 From beb52f502f0477465313675d2a0fbf3962e130b8 Mon Sep 17 00:00:00 2001 From: Wilco Dijkstra Date: Tue, 20 Jun 2017 16:26:26 +0100 Subject: Improve math benchmark infrastructure Improve support for math function benchmarking. This patch adds a feature that allows accurate benchmarking of traces extracted from real workloads. This is done by iterating over all samples rather than repeating each sample many times (which completely ignores branch prediction and cache effects). A trace can be added to existing math function inputs via "## name: workload-", followed by the trace. * benchtests/README: Describe workload feature. * benchtests/bench-skeleton.c (main): Add support for benchmarking traces from workloads. --- ChangeLog | 6 +++++ benchtests/README | 6 +++++ benchtests/bench-skeleton.c | 57 +++++++++++++++++++++++++++++++-------------- 3 files changed, 51 insertions(+), 18 deletions(-) (limited to 'benchtests/README') diff --git a/ChangeLog b/ChangeLog index 2d81375287..80bb1f847e 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,9 @@ +2017-06-20 Wilco Dijkstra + + * benchtests/README: Describe workload feature. + * benchtests/bench-skeleton.c (main): Add support for + benchmarking traces from workloads. + 2017-06-20 Zack Weinberg * string/string.h (__mempcpy_inline): Delete. diff --git a/benchtests/README b/benchtests/README index 2c5f381135..b015acfd53 100644 --- a/benchtests/README +++ b/benchtests/README @@ -102,6 +102,12 @@ the same file by using the `name' directive that looks something like this: See the pow-inputs file for an example of what such a partitioned input file would look like. +It is also possible to measure throughput of a (partial) trace extracted from +a real workload. In this case the whole trace is iterated over multiple times +rather than repeating every input multiple times. This can be done via: + + ##name: workload- + Benchmark Sets: ============== diff --git a/benchtests/bench-skeleton.c b/benchtests/bench-skeleton.c index 09eb78df1b..3c6dad7055 100644 --- a/benchtests/bench-skeleton.c +++ b/benchtests/bench-skeleton.c @@ -68,34 +68,50 @@ main (int argc, char **argv) clock_gettime (CLOCK_MONOTONIC_RAW, &runtime); runtime.tv_sec += DURATION; + bool is_bench = strncmp (VARIANT (v), "workload-", 9) == 0; double d_total_i = 0; timing_t total = 0, max = 0, min = 0x7fffffffffffffff; int64_t c = 0; + uint64_t cur; while (1) { - for (i = 0; i < NUM_SAMPLES (v); i++) + if (is_bench) { - uint64_t cur; + /* Benchmark a real trace of calls - all samples are iterated + over once before repeating. This models actual use more + accurately than repeating the same sample many times. */ TIMING_NOW (start); for (k = 0; k < iters; k++) - BENCH_FUNC (v, i); + for (i = 0; i < NUM_SAMPLES (v); i++) + BENCH_FUNC (v, i); TIMING_NOW (end); - TIMING_DIFF (cur, start, end); + TIMING_ACCUM (total, cur); + d_total_i += iters * NUM_SAMPLES (v); + } + else + for (i = 0; i < NUM_SAMPLES (v); i++) + { + TIMING_NOW (start); + for (k = 0; k < iters; k++) + BENCH_FUNC (v, i); + TIMING_NOW (end); - if (cur > max) - max = cur; + TIMING_DIFF (cur, start, end); - if (cur < min) - min = cur; + if (cur > max) + max = cur; - TIMING_ACCUM (total, cur); - /* Accumulate timings for the value. In the end we will divide - by the total iterations. */ - RESULT_ACCUM (cur, v, i, c * iters, (c + 1) * iters); + if (cur < min) + min = cur; - d_total_i += iters; - } + TIMING_ACCUM (total, cur); + /* Accumulate timings for the value. In the end we will divide + by the total iterations. */ + RESULT_ACCUM (cur, v, i, c * iters, (c + 1) * iters); + + d_total_i += iters; + } c++; struct timespec curtime; @@ -117,11 +133,16 @@ main (int argc, char **argv) json_attr_double (&json_ctx, "duration", d_total_s); json_attr_double (&json_ctx, "iterations", d_total_i); - json_attr_double (&json_ctx, "max", max / d_iters); - json_attr_double (&json_ctx, "min", min / d_iters); - json_attr_double (&json_ctx, "mean", d_total_s / d_total_i); + if (is_bench) + json_attr_double (&json_ctx, "throughput", d_total_s / d_total_i); + else + { + json_attr_double (&json_ctx, "max", max / d_iters); + json_attr_double (&json_ctx, "min", min / d_iters); + json_attr_double (&json_ctx, "mean", d_total_s / d_total_i); + } - if (detailed) + if (detailed && !is_bench) { json_array_begin (&json_ctx, "timings"); -- cgit v1.2.3 From 06b1de237801402c7da327f0a36f4e6aa2f06cc2 Mon Sep 17 00:00:00 2001 From: Siddhesh Poyarekar Date: Sat, 16 Sep 2017 11:47:32 +0530 Subject: benchtests: Use argparse to parse arguments Make the script more usable by adding proper command line options along with a way to query the options. The script is capable of doing a bunch of things right now like choosing a base for comparison, choosing to generate graphs, etc. and they should be accessible via command line switches. * benchtests/scripts/compare_strings.py: Use argparse. * benchtests/README: Document existence of compare_strings.py. --- ChangeLog | 5 +++++ benchtests/README | 11 +++++++++++ benchtests/scripts/compare_strings.py | 37 +++++++++++++++++++++++------------ 3 files changed, 40 insertions(+), 13 deletions(-) (limited to 'benchtests/README') diff --git a/ChangeLog b/ChangeLog index 58acb54b0a..fd9cc0ce99 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,8 @@ +2017-09-16 Siddhesh Poyarekar + + * benchtests/scripts/compare_strings.py: Use argparse. + * benchtests/README: Document existence of compare_strings.py. + 2017-09-15 Joseph Myers * math/s_fma.c: Include . diff --git a/benchtests/README b/benchtests/README index b015acfd53..9aa750a519 100644 --- a/benchtests/README +++ b/benchtests/README @@ -122,3 +122,14 @@ To add a benchset for `foo': - Write your bench-foo.c that prints out the measurements to stdout. - On execution, a bench-foo.out is created in $(objpfx) with the contents of stdout. + +Reading String Benchmark Results: +================================ + +Some of the string benchmark results are now in JSON to make it easier to read +in scripts. Use the benchtests/compare_strings.py script to show the results +in a tabular format, generate graphs and more. Run + + benchtests/scripts/compare_strings.py -h + +for usage information. diff --git a/benchtests/scripts/compare_strings.py b/benchtests/scripts/compare_strings.py index b3c57e2b34..3ca9429d04 100755 --- a/benchtests/scripts/compare_strings.py +++ b/benchtests/scripts/compare_strings.py @@ -28,6 +28,7 @@ import sys import os import json import pylab +import argparse try: import jsonschema as validator @@ -118,22 +119,32 @@ def main(args): Take a string benchmark output file and compare timings. """ - if len(args) < 3: - print('Usage: %s [-base=ifunc_name] attr1 [attr2 ...]' % sys.argv[0]) - sys.exit(os.EX_USAGE) base_func = None - filename = args[0] - schema_filename = args[1] - if args[2].find('-base=') == 0: - base_func = args[2][6:] - attrs = args[3:] - else: - attrs = args[2:] - - results = parse_file(filename, schema_filename) + filename = args.input + schema_filename = args.schema + base_func = args.base + attrs = args.attributes.split(',') + + results = parse_file(args.input, args.schema) process_results(results, attrs, base_func) if __name__ == '__main__': - main(sys.argv[1:]) + parser = argparse.ArgumentParser() + + # The required arguments. + req = parser.add_argument_group(title='required arguments') + req.add_argument('-a', '--attributes', required=True, + help='Comma separated list of benchmark attributes.') + req.add_argument('-i', '--input', required=True, + help='Input JSON benchmark result file.') + req.add_argument('-s', '--schema', required=True, + help='Schema file to validate the result file.') + + # Optional arguments. + parser.add_argument('-b', '--base', + help='IFUNC variant to set as baseline.') + + args = parser.parse_args() + main(args) -- cgit v1.2.3 From 0422ed1e84d923023f3bd57e723d3d4dc7569901 Mon Sep 17 00:00:00 2001 From: Victor Rodriguez Date: Tue, 28 Nov 2017 19:57:46 +0530 Subject: benchtests: Enable BENCHSET to run subset of tests This patch adds BENCHSET variable to benchtests/Makefile in order to provide the capability to run a list of subsets of benchmark tests, ie; make bench BENCHSET="bench-pthread bench-math malloc-thread" This helps users to benchmark specific glibc area ChangeLog: * benchtests/Makefile:Add BENCHSET to allow subsets of benchmarks to be run. * benchtests/README: Add documentation for: Running subsets of benchmarks. Signed-off-by: Victor Rodriguez Signed-off-by: Icarus Sparry Reviewed-By: Siddhesh Poyarekar --- ChangeLog | 8 +++++++ benchtests/Makefile | 69 +++++++++++++++++++++++++++++++++++++---------------- benchtests/README | 19 +++++++++++++++ 3 files changed, 76 insertions(+), 20 deletions(-) (limited to 'benchtests/README') diff --git a/ChangeLog b/ChangeLog index 9685aec392..f7616dcb2b 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,11 @@ +2017-11-28 Victor Rodriguez + Icarus Sparry + + * benchtests/Makefile:Add BENCHSET to allow subsets of + benchmarks to be run. + * benchtests/README: Add documentation for: Running subsets of + benchmarks. + 2017-11-28 Victor Rodriguez * benchtests/scripts/benchout.schema.json: Fix regex to accept a wider diff --git a/benchtests/Makefile b/benchtests/Makefile index 4157f8ce22..74b3821ccf 100644 --- a/benchtests/Makefile +++ b/benchtests/Makefile @@ -30,7 +30,11 @@ bench-pthread := pthread_once thread_create bench-string := ffs ffsll +ifeq (${BENCHSET},) bench := $(bench-math) $(bench-pthread) $(bench-string) +else +bench := $(foreach B,$(filter bench-%,${BENCHSET}), ${${B}}) +endif # String function benchmarks. string-benchset := bcopy bzero memccpy memchr memcmp memcpy memmem memmove \ @@ -68,8 +72,12 @@ stdio-common-benchset := sprintf math-benchset := math-inlines +ifeq (${BENCHSET},) benchset := $(string-benchset-all) $(stdlib-benchset) $(stdio-common-benchset) \ $(math-benchset) +else +benchset := $(foreach B,$(filter %-benchset,${BENCHSET}), ${${B}}) +endif CFLAGS-bench-ffs.c += -fno-builtin CFLAGS-bench-ffsll.c += -fno-builtin @@ -81,7 +89,11 @@ CFLAGS-bench-fmaxf.c += -fno-builtin CFLAGS-bench-trunc.c += -fno-builtin CFLAGS-bench-truncf.c += -fno-builtin +ifeq (${BENCHSET},) bench-malloc := malloc-thread +else +bench-malloc := $(filter malloc-%,${BENCHSET}) +endif $(addprefix $(objpfx)bench-,$(bench-math)): $(libm) $(addprefix $(objpfx)bench-,$(math-benchset)): $(libm) @@ -149,6 +161,19 @@ bench-clean: rm -f $(timing-type) $(addsuffix .o,$(timing-type)) rm -f $(addprefix $(objpfx),$(bench-extra-objs)) +# Validate the passed in BENCHSET +ifneq ($(strip ${BENCHSET}),) +VALIDBENCHSETNAMES := bench-pthread bench-math bench-string string-benchset \ + wcsmbs-benchset stdlib-benchset stdio-common-benchset math-benchset \ + malloc-thread +INVALIDBENCHSETNAMES := $(filter-out ${VALIDBENCHSETNAMES},${BENCHSET}) +ifneq (${INVALIDBENCHSETNAMES},) +$(info The following values in BENCHSET are invalid: ${INVALIDBENCHSETNAMES}) +$(info The valid ones are: ${VALIDBENCHSETNAMES}) +$(error Invalid BENCHSET value) +endif +endif + # Define the bench target only if the target has a usable python installation. ifdef PYTHON bench: bench-build bench-set bench-func bench-malloc @@ -175,10 +200,11 @@ bench-set: $(binaries-benchset) done bench-malloc: $(binaries-bench-malloc) - run=$(objpfx)bench-malloc-thread; \ - for thr in 1 8 16 32; do \ - echo "Running $${run} $${thr}"; \ + for run in $^; do \ + for thr in 1 8 16 32; do \ + echo "Running $${run} $${thr}"; \ $(run-bench) $${thr} > $${run}-$${thr}.out; \ + done;\ done # Build and execute the benchmark functions. This target generates JSON @@ -186,25 +212,28 @@ bench-malloc: $(binaries-bench-malloc) # so one could even execute them individually and process it using any JSON # capable language or tool. bench-func: $(binaries-bench) + if [ -n '$^' ] ; then \ { timing_type=$$($(timing-type)); \ - echo "{\"timing_type\": \"$${timing_type}\","; \ - echo " \"functions\": {"; \ - for run in $^; do \ - if ! [ "x$${run}" = "x$<" ]; then \ - echo ","; \ + echo "{\"timing_type\": \"$${timing_type}\","; \ + echo " \"functions\": {"; \ + for run in $^; do \ + if ! [ "x$${run}" = "x$<" ]; then \ + echo ","; \ + fi; \ + echo "Running $${run}" >&2; \ + $(run-bench) $(DETAILED_OPT); \ + done; \ + echo; \ + echo " }"; \ + echo "}"; \ + } > $(objpfx)bench.out-tmp; \ + if [ -f $(objpfx)bench.out ]; then \ + mv -f $(objpfx)bench.out $(objpfx)bench.out.old; \ fi; \ - echo "Running $${run}" >&2; \ - $(run-bench) $(DETAILED_OPT); \ - done; \ - echo; \ - echo " }"; \ - echo "}"; } > $(objpfx)bench.out-tmp; \ - if [ -f $(objpfx)bench.out ]; then \ - mv -f $(objpfx)bench.out $(objpfx)bench.out.old; \ - fi; \ - mv -f $(objpfx)bench.out-tmp $(objpfx)bench.out - $(PYTHON) scripts/validate_benchout.py $(objpfx)bench.out \ - scripts/benchout.schema.json + mv -f $(objpfx)bench.out-tmp $(objpfx)bench.out; \ + $(PYTHON) scripts/validate_benchout.py $(objpfx)bench.out \ + scripts/benchout.schema.json; \ + fi $(timing-type) $(binaries-bench) $(binaries-benchset) \ $(binaries-bench-malloc): %: %.o $(objpfx)json-lib.o \ diff --git a/benchtests/README b/benchtests/README index 9aa750a519..4ddff794d1 100644 --- a/benchtests/README +++ b/benchtests/README @@ -53,6 +53,25 @@ otherwise the above command may try to build the benchmark again. Benchmarks that require generated code to be executed during the build are skipped when cross-building. +Running subsets of benchmarks: +============================== + +To run only a subset of benchmarks, one may invoke make as follows + + $ make bench BENCHSET="bench-pthread bench-math malloc-thread" + +where BENCHSET may be a space-separated list of the following values: + + bench-math + bench-pthread + bench-string + string-benchset + wcsmbs-benchset + stdlib-benchset + stdio-common-benchset + math-benchset + malloc-thread + Adding a function to benchtests: =============================== -- cgit v1.2.3