From bfdda211c6b082d12ec448a5a65bfba02e208b98 Mon Sep 17 00:00:00 2001
From: Siddhesh Poyarekar <sid@reserved-bit.com>
Date: Wed, 20 Apr 2016 10:58:20 +0530
Subject: benchtests: Update README to include instructions for bench-build
 target

---
 benchtests/README | 17 +++++++++++++++++
 1 file changed, 17 insertions(+)

(limited to 'benchtests/README')

diff --git a/benchtests/README b/benchtests/README
index 999d268cf8..847df87c96 100644
--- a/benchtests/README
+++ b/benchtests/README
@@ -34,6 +34,23 @@ the benchmark to use clock_gettime by invoking make as follows:
 
 Again, one must run `make bench-clean' before changing the measurement method.
 
+Running benchmarks on another target:
+====================================
+
+If the target where you want to run benchmarks is not capable of building the
+code or you're cross-building, you could build and execute the benchmark in
+separate steps.  On the build system run:
+
+  $ make bench-build
+
+and then copy the source and build directories to the target and run the
+benchmarks from the build directory as usual:
+
+  $ make bench
+
+make sure the copy preserves timestamps by using either rsync or scp -p
+otherwise the above command may try to build the benchmark again.
+
 Adding a function to benchtests:
 ===============================
 
-- 
cgit v1.2.3


From 2d304f3c6f5c34b4dd34c7ab0663d93adec14c26 Mon Sep 17 00:00:00 2001
From: Siddhesh Poyarekar <sid@reserved-bit.com>
Date: Wed, 20 Apr 2016 13:19:01 +0530
Subject: benchtests: Support for cross-building benchmarks

This patch adds full support for cross-building benchmarks.  Some
benchmarks like those that need locales to be generated cannot be
built and are hence skipped for cross builds.

Tested by cross building for aarch64 on x86_64 and then running the
generated benchmark on aarch64.

	* benchtests/Makefile (wcsmbs-benchset): Include only for
	native builds and runs.
	(LOCALES): Likewise.
	(bench-build): Build timing-type here instead of the bench
	target.  Generate locale only for native builds.
	* benchtests/README: Add note for cross-building.
---
 ChangeLog           |  7 +++++++
 benchtests/Makefile | 23 +++++++++++++++++++----
 benchtests/README   |  4 +++-
 3 files changed, 29 insertions(+), 5 deletions(-)

(limited to 'benchtests/README')

diff --git a/ChangeLog b/ChangeLog
index f26f8f8f1a..ded10937fa 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,5 +1,12 @@
 2016-04-20  Siddhesh Poyarekar  <sid@reserved-bit.com>
 
+	* benchtests/Makefile (wcsmbs-benchset): Include only for
+	native builds and runs.
+	(LOCALES): Likewise.
+	(bench-build): Build timing-type here instead of the bench
+	target.  Generate locale only for native builds.
+	* benchtests/README: Add note for cross-building.
+
 	* benchtests/Makefile (bench-clean): Clean up extra-objs.
 
 	* benchtests/README: Update README to include instructions on
diff --git a/benchtests/Makefile b/benchtests/Makefile
index 779eb395d5..144b32ea38 100644
--- a/benchtests/Makefile
+++ b/benchtests/Makefile
@@ -39,11 +39,19 @@ string-benchset := bcopy bzero memccpy memchr memcmp memcpy memmem memmove \
 		   strncasecmp strncat strncmp strncpy strnlen strpbrk strrchr \
 		   strspn strstr strcpy_chk stpcpy_chk memrchr strsep strtok \
 		   strcoll memcpy-large memmove-large memset-large
+
+# Build and run locale-dependent benchmarks only if we're building natively.
+ifeq (no,$(cross-compiling))
 wcsmbs-benchset := wcslen wcsnlen wcscpy wcpcpy wcsncpy wcpncpy wcscat wcsncat \
 		   wcscmp wcsncmp wcschr wcschrnul wcsrchr wcsspn wcspbrk wcscspn \
 		   wmemchr wmemset wmemcmp
+else
+wcsmbs-benchset :=
+endif
+
 string-benchset-all := $(string-benchset) ${wcsmbs-benchset}
 
+ifeq (no,$(cross-compiling))
 # We have to generate locales
 LOCALES := en_US.UTF-8 tr_TR.UTF-8 cs_CZ.UTF-8 fa_IR.UTF-8 fr_FR.UTF-8 \
 	   ja_JP.UTF-8 si_LK.UTF-8 en_GB.UTF-8 vi_VN.UTF-8 ar_SA.UTF-8 \
@@ -51,6 +59,7 @@ LOCALES := en_US.UTF-8 tr_TR.UTF-8 cs_CZ.UTF-8 fa_IR.UTF-8 fr_FR.UTF-8 \
 	   he_IL.UTF-8 is_IS.UTF-8 es_ES.UTF-8 hi_IN.UTF-8 sv_SE.UTF-8 \
 	   hu_HU.UTF-8 it_IT.UTF-8 sr_RS.UTF-8 zh_CN.UTF-8
 include ../gen-locales.mk
+endif
 
 stdlib-benchset := strtod
 
@@ -131,10 +140,16 @@ bench-clean:
 	rm -f $(timing-type) $(addsuffix .o,$(timing-type))
 	rm -f $(addprefix $(objpfx),$(bench-extra-objs))
 
-bench: $(timing-type) $(gen-locales) bench-build bench-set bench-func \
-	bench-malloc
-# Target to only build the benchmark without running it.
-bench-build: $(binaries-bench) $(binaries-benchset) $(binaries-bench-malloc)
+bench: bench-build bench-set bench-func bench-malloc
+# Target to only build the benchmark without running it.  We generate locales
+# only if we're building natively.
+ifeq (no,$(cross-compiling))
+bench-build: $(gen-locales) $(timing-type) $(binaries-bench) \
+	$(binaries-benchset) $(binaries-bench-malloc)
+else
+bench-build: $(timing-type) $(binaries-bench) $(binaries-benchset) \
+	$(binaries-bench-malloc)
+endif
 
 bench-set: $(binaries-benchset)
 	for run in $^; do \
diff --git a/benchtests/README b/benchtests/README
index 847df87c96..2c5f381135 100644
--- a/benchtests/README
+++ b/benchtests/README
@@ -49,7 +49,9 @@ benchmarks from the build directory as usual:
   $ make bench
 
 make sure the copy preserves timestamps by using either rsync or scp -p
-otherwise the above command may try to build the benchmark again.
+otherwise the above command may try to build the benchmark again.  Benchmarks
+that require generated code to be executed during the build are skipped when
+cross-building.
 
 Adding a function to benchtests:
 ===============================
-- 
cgit v1.2.3


From beb52f502f0477465313675d2a0fbf3962e130b8 Mon Sep 17 00:00:00 2001
From: Wilco Dijkstra <wdijkstr@arm.com>
Date: Tue, 20 Jun 2017 16:26:26 +0100
Subject: Improve math benchmark infrastructure

Improve support for math function benchmarking.  This patch adds
a feature that allows accurate benchmarking of traces extracted
from real workloads.  This is done by iterating over all samples
rather than repeating each sample many times (which completely
ignores branch prediction and cache effects).  A trace can be
added to existing math function inputs via
"## name: workload-<name>", followed by the trace.

        * benchtests/README: Describe workload feature.
        * benchtests/bench-skeleton.c (main): Add support for
        benchmarking traces from workloads.
---
 ChangeLog                   |  6 +++++
 benchtests/README           |  6 +++++
 benchtests/bench-skeleton.c | 57 +++++++++++++++++++++++++++++++--------------
 3 files changed, 51 insertions(+), 18 deletions(-)

(limited to 'benchtests/README')

diff --git a/ChangeLog b/ChangeLog
index 2d81375287..80bb1f847e 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,9 @@
+2017-06-20  Wilco Dijkstra  <wdijkstr@arm.com>
+
+	* benchtests/README: Describe workload feature.
+	* benchtests/bench-skeleton.c (main): Add support for
+	benchmarking traces from workloads.
+
 2017-06-20  Zack Weinberg  <zackw@panix.com>
 
 	* string/string.h (__mempcpy_inline): Delete.
diff --git a/benchtests/README b/benchtests/README
index 2c5f381135..b015acfd53 100644
--- a/benchtests/README
+++ b/benchtests/README
@@ -102,6 +102,12 @@ the same file by using the `name' directive that looks something like this:
 See the pow-inputs file for an example of what such a partitioned input file
 would look like.
 
+It is also possible to measure throughput of a (partial) trace extracted from
+a real workload.  In this case the whole trace is iterated over multiple times
+rather than repeating every input multiple times.  This can be done via:
+
+  ##name: workload-<name>
+
 Benchmark Sets:
 ==============
 
diff --git a/benchtests/bench-skeleton.c b/benchtests/bench-skeleton.c
index 09eb78df1b..3c6dad7055 100644
--- a/benchtests/bench-skeleton.c
+++ b/benchtests/bench-skeleton.c
@@ -68,34 +68,50 @@ main (int argc, char **argv)
       clock_gettime (CLOCK_MONOTONIC_RAW, &runtime);
       runtime.tv_sec += DURATION;
 
+      bool is_bench = strncmp (VARIANT (v), "workload-", 9) == 0;
       double d_total_i = 0;
       timing_t total = 0, max = 0, min = 0x7fffffffffffffff;
       int64_t c = 0;
+      uint64_t cur;
       while (1)
 	{
-	  for (i = 0; i < NUM_SAMPLES (v); i++)
+	  if (is_bench)
 	    {
-	      uint64_t cur;
+	      /* Benchmark a real trace of calls - all samples are iterated
+		 over once before repeating.  This models actual use more
+		 accurately than repeating the same sample many times.  */
 	      TIMING_NOW (start);
 	      for (k = 0; k < iters; k++)
-		BENCH_FUNC (v, i);
+		for (i = 0; i < NUM_SAMPLES (v); i++)
+		  BENCH_FUNC (v, i);
 	      TIMING_NOW (end);
-
 	      TIMING_DIFF (cur, start, end);
+	      TIMING_ACCUM (total, cur);
+	      d_total_i += iters * NUM_SAMPLES (v);
+	    }
+	  else
+	    for (i = 0; i < NUM_SAMPLES (v); i++)
+	      {
+		TIMING_NOW (start);
+		for (k = 0; k < iters; k++)
+		  BENCH_FUNC (v, i);
+		TIMING_NOW (end);
 
-	      if (cur > max)
-		max = cur;
+		TIMING_DIFF (cur, start, end);
 
-	      if (cur < min)
-		min = cur;
+		if (cur > max)
+		  max = cur;
 
-	      TIMING_ACCUM (total, cur);
-	      /* Accumulate timings for the value.  In the end we will divide
-	         by the total iterations.  */
-	      RESULT_ACCUM (cur, v, i, c * iters, (c + 1) * iters);
+		if (cur < min)
+		  min = cur;
 
-	      d_total_i += iters;
-	    }
+		TIMING_ACCUM (total, cur);
+		/* Accumulate timings for the value.  In the end we will divide
+		   by the total iterations.  */
+		RESULT_ACCUM (cur, v, i, c * iters, (c + 1) * iters);
+
+		d_total_i += iters;
+	      }
 	  c++;
 	  struct timespec curtime;
 
@@ -117,11 +133,16 @@ main (int argc, char **argv)
 
       json_attr_double (&json_ctx, "duration", d_total_s);
       json_attr_double (&json_ctx, "iterations", d_total_i);
-      json_attr_double (&json_ctx, "max", max / d_iters);
-      json_attr_double (&json_ctx, "min", min / d_iters);
-      json_attr_double (&json_ctx, "mean", d_total_s / d_total_i);
+      if (is_bench)
+	json_attr_double (&json_ctx, "throughput", d_total_s / d_total_i);
+      else
+	{
+	  json_attr_double (&json_ctx, "max", max / d_iters);
+	  json_attr_double (&json_ctx, "min", min / d_iters);
+	  json_attr_double (&json_ctx, "mean", d_total_s / d_total_i);
+	}
 
-      if (detailed)
+      if (detailed && !is_bench)
 	{
 	  json_array_begin (&json_ctx, "timings");
 
-- 
cgit v1.2.3


From 06b1de237801402c7da327f0a36f4e6aa2f06cc2 Mon Sep 17 00:00:00 2001
From: Siddhesh Poyarekar <siddhesh@sourceware.org>
Date: Sat, 16 Sep 2017 11:47:32 +0530
Subject: benchtests: Use argparse to parse arguments

Make the script more usable by adding proper command line options
along with a way to query the options.  The script is capable of doing
a bunch of things right now like choosing a base for comparison,
choosing to generate graphs, etc. and they should be accessible via
command line switches.

	* benchtests/scripts/compare_strings.py: Use argparse.
	* benchtests/README: Document existence of compare_strings.py.
---
 ChangeLog                             |  5 +++++
 benchtests/README                     | 11 +++++++++++
 benchtests/scripts/compare_strings.py | 37 +++++++++++++++++++++++------------
 3 files changed, 40 insertions(+), 13 deletions(-)

(limited to 'benchtests/README')

diff --git a/ChangeLog b/ChangeLog
index 58acb54b0a..fd9cc0ce99 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,8 @@
+2017-09-16  Siddhesh Poyarekar  <siddhesh@sourceware.org>
+
+	* benchtests/scripts/compare_strings.py: Use argparse.
+	* benchtests/README: Document existence of compare_strings.py.
+
 2017-09-15  Joseph Myers  <joseph@codesourcery.com>
 
 	* math/s_fma.c: Include <libm-alias-double.h>.
diff --git a/benchtests/README b/benchtests/README
index b015acfd53..9aa750a519 100644
--- a/benchtests/README
+++ b/benchtests/README
@@ -122,3 +122,14 @@ To add a benchset for `foo':
 - Write your bench-foo.c that prints out the measurements to stdout.
 - On execution, a bench-foo.out is created in $(objpfx) with the contents of
   stdout.
+
+Reading String Benchmark Results:
+================================
+
+Some of the string benchmark results are now in JSON to make it easier to read
+in scripts.  Use the benchtests/compare_strings.py script to show the results
+in a tabular format, generate graphs and more. Run
+
+    benchtests/scripts/compare_strings.py -h
+
+for usage information.
diff --git a/benchtests/scripts/compare_strings.py b/benchtests/scripts/compare_strings.py
index b3c57e2b34..3ca9429d04 100755
--- a/benchtests/scripts/compare_strings.py
+++ b/benchtests/scripts/compare_strings.py
@@ -28,6 +28,7 @@ import sys
 import os
 import json
 import pylab
+import argparse
 
 try:
     import jsonschema as validator
@@ -118,22 +119,32 @@ def main(args):
 
     Take a string benchmark output file and compare timings.
     """
-    if len(args) < 3:
-        print('Usage: %s <input file> <schema file> [-base=ifunc_name] attr1 [attr2 ...]' % sys.argv[0])
-        sys.exit(os.EX_USAGE)
 
     base_func = None
-    filename = args[0]
-    schema_filename = args[1]
-    if args[2].find('-base=') == 0:
-        base_func = args[2][6:]
-        attrs = args[3:]
-    else:
-        attrs = args[2:]
-
-    results = parse_file(filename, schema_filename)
+    filename = args.input
+    schema_filename = args.schema
+    base_func = args.base
+    attrs = args.attributes.split(',')
+
+    results = parse_file(args.input, args.schema)
     process_results(results, attrs, base_func)
 
 
 if __name__ == '__main__':
-    main(sys.argv[1:])
+    parser = argparse.ArgumentParser()
+
+    # The required arguments.
+    req = parser.add_argument_group(title='required arguments')
+    req.add_argument('-a', '--attributes', required=True,
+                        help='Comma separated list of benchmark attributes.')
+    req.add_argument('-i', '--input', required=True,
+                        help='Input JSON benchmark result file.')
+    req.add_argument('-s', '--schema', required=True,
+                        help='Schema file to validate the result file.')
+
+    # Optional arguments.
+    parser.add_argument('-b', '--base',
+                        help='IFUNC variant to set as baseline.')
+
+    args = parser.parse_args()
+    main(args)
-- 
cgit v1.2.3


From 0422ed1e84d923023f3bd57e723d3d4dc7569901 Mon Sep 17 00:00:00 2001
From: Victor Rodriguez <victor.rodriguez.bahena@intel.com>
Date: Tue, 28 Nov 2017 19:57:46 +0530
Subject: benchtests: Enable BENCHSET to run subset of tests

This patch adds BENCHSET variable to benchtests/Makefile in order to
provide the capability to run a list of subsets of benchmark tests, ie;

    make bench BENCHSET="bench-pthread bench-math malloc-thread"

This helps users to benchmark specific glibc area

ChangeLog:

        * benchtests/Makefile:Add BENCHSET to allow subsets of
        benchmarks to be run.
        * benchtests/README: Add documentation for: Running subsets of
        benchmarks.

Signed-off-by: Victor Rodriguez <victor.rodriguez.bahena@intel.com>
Signed-off-by: Icarus Sparry <icarus.w.sparry@intel.com>
Reviewed-By: Siddhesh Poyarekar <siddhesh@sourceware.org>
---
 ChangeLog           |  8 +++++++
 benchtests/Makefile | 69 +++++++++++++++++++++++++++++++++++++----------------
 benchtests/README   | 19 +++++++++++++++
 3 files changed, 76 insertions(+), 20 deletions(-)

(limited to 'benchtests/README')

diff --git a/ChangeLog b/ChangeLog
index 9685aec392..f7616dcb2b 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,11 @@
+2017-11-28  Victor Rodriguez  <victor.rodriguez.bahena@intel.com>
+	    Icarus Sparry  <icarus.w.sparry@intel.com>
+
+	* benchtests/Makefile:Add BENCHSET to allow subsets of
+	benchmarks to be run.
+	* benchtests/README: Add documentation for: Running subsets of
+	benchmarks.
+
 2017-11-28  Victor Rodriguez  <victor.rodriguez.bahena@intel.com>
 
 	* benchtests/scripts/benchout.schema.json: Fix regex to accept a wider
diff --git a/benchtests/Makefile b/benchtests/Makefile
index 4157f8ce22..74b3821ccf 100644
--- a/benchtests/Makefile
+++ b/benchtests/Makefile
@@ -30,7 +30,11 @@ bench-pthread := pthread_once thread_create
 
 bench-string := ffs ffsll
 
+ifeq (${BENCHSET},)
 bench := $(bench-math) $(bench-pthread) $(bench-string)
+else
+bench := $(foreach B,$(filter bench-%,${BENCHSET}), ${${B}})
+endif
 
 # String function benchmarks.
 string-benchset := bcopy bzero memccpy memchr memcmp memcpy memmem memmove \
@@ -68,8 +72,12 @@ stdio-common-benchset := sprintf
 
 math-benchset := math-inlines
 
+ifeq (${BENCHSET},)
 benchset := $(string-benchset-all) $(stdlib-benchset) $(stdio-common-benchset) \
 	    $(math-benchset)
+else
+benchset := $(foreach B,$(filter %-benchset,${BENCHSET}), ${${B}})
+endif
 
 CFLAGS-bench-ffs.c += -fno-builtin
 CFLAGS-bench-ffsll.c += -fno-builtin
@@ -81,7 +89,11 @@ CFLAGS-bench-fmaxf.c += -fno-builtin
 CFLAGS-bench-trunc.c += -fno-builtin
 CFLAGS-bench-truncf.c += -fno-builtin
 
+ifeq (${BENCHSET},)
 bench-malloc := malloc-thread
+else
+bench-malloc := $(filter malloc-%,${BENCHSET})
+endif
 
 $(addprefix $(objpfx)bench-,$(bench-math)): $(libm)
 $(addprefix $(objpfx)bench-,$(math-benchset)): $(libm)
@@ -149,6 +161,19 @@ bench-clean:
 	rm -f $(timing-type) $(addsuffix .o,$(timing-type))
 	rm -f $(addprefix $(objpfx),$(bench-extra-objs))
 
+# Validate the passed in BENCHSET
+ifneq ($(strip ${BENCHSET}),)
+VALIDBENCHSETNAMES := bench-pthread bench-math bench-string string-benchset \
+   wcsmbs-benchset stdlib-benchset stdio-common-benchset math-benchset \
+   malloc-thread
+INVALIDBENCHSETNAMES := $(filter-out ${VALIDBENCHSETNAMES},${BENCHSET})
+ifneq (${INVALIDBENCHSETNAMES},)
+$(info The following values in BENCHSET are invalid: ${INVALIDBENCHSETNAMES})
+$(info The valid ones are: ${VALIDBENCHSETNAMES})
+$(error Invalid BENCHSET value)
+endif
+endif
+
 # Define the bench target only if the target has a usable python installation.
 ifdef PYTHON
 bench: bench-build bench-set bench-func bench-malloc
@@ -175,10 +200,11 @@ bench-set: $(binaries-benchset)
 	done
 
 bench-malloc: $(binaries-bench-malloc)
-	run=$(objpfx)bench-malloc-thread; \
-	for thr in 1 8 16 32; do \
-	  echo "Running $${run} $${thr}"; \
+	for run in $^; do \
+		for thr in 1 8 16 32; do \
+			echo "Running $${run} $${thr}"; \
 	  $(run-bench) $${thr} > $${run}-$${thr}.out; \
+	  done;\
 	done
 
 # Build and execute the benchmark functions.  This target generates JSON
@@ -186,25 +212,28 @@ bench-malloc: $(binaries-bench-malloc)
 # so one could even execute them individually and process it using any JSON
 # capable language or tool.
 bench-func: $(binaries-bench)
+	if [ -n '$^' ] ; then \
 	{ timing_type=$$($(timing-type)); \
-	echo "{\"timing_type\": \"$${timing_type}\","; \
-	echo " \"functions\": {"; \
-	for run in $^; do \
-	  if ! [ "x$${run}" = "x$<" ]; then \
-	    echo ","; \
+	  echo "{\"timing_type\": \"$${timing_type}\","; \
+	  echo " \"functions\": {"; \
+	  for run in $^; do \
+	    if ! [ "x$${run}" = "x$<" ]; then \
+	      echo ","; \
+	    fi; \
+	    echo "Running $${run}" >&2; \
+	    $(run-bench) $(DETAILED_OPT); \
+	  done; \
+	  echo; \
+	  echo " }"; \
+	  echo "}"; \
+	  } > $(objpfx)bench.out-tmp; \
+	  if [ -f $(objpfx)bench.out ]; then \
+	    mv -f $(objpfx)bench.out $(objpfx)bench.out.old; \
 	  fi; \
-	  echo "Running $${run}" >&2; \
-	  $(run-bench) $(DETAILED_OPT); \
-	done; \
-	echo; \
-	echo " }"; \
-	echo "}"; } > $(objpfx)bench.out-tmp; \
-	if [ -f $(objpfx)bench.out ]; then \
-	  mv -f $(objpfx)bench.out $(objpfx)bench.out.old; \
-	fi; \
-	mv -f $(objpfx)bench.out-tmp $(objpfx)bench.out
-	$(PYTHON) scripts/validate_benchout.py $(objpfx)bench.out \
-		scripts/benchout.schema.json
+	  mv -f $(objpfx)bench.out-tmp $(objpfx)bench.out; \
+	  $(PYTHON) scripts/validate_benchout.py $(objpfx)bench.out \
+	  scripts/benchout.schema.json; \
+	fi
 
 $(timing-type) $(binaries-bench) $(binaries-benchset) \
 	$(binaries-bench-malloc): %: %.o $(objpfx)json-lib.o \
diff --git a/benchtests/README b/benchtests/README
index 9aa750a519..4ddff794d1 100644
--- a/benchtests/README
+++ b/benchtests/README
@@ -53,6 +53,25 @@ otherwise the above command may try to build the benchmark again.  Benchmarks
 that require generated code to be executed during the build are skipped when
 cross-building.
 
+Running subsets of benchmarks:
+==============================
+
+To run only a subset of benchmarks, one may invoke make as follows
+
+  $ make bench BENCHSET="bench-pthread bench-math malloc-thread"
+
+where BENCHSET may be a space-separated list of the following values:
+
+    bench-math
+    bench-pthread
+    bench-string
+    string-benchset
+    wcsmbs-benchset
+    stdlib-benchset
+    stdio-common-benchset
+    math-benchset
+    malloc-thread
+
 Adding a function to benchtests:
 ===============================
 
-- 
cgit v1.2.3