summaryrefslogtreecommitdiff
path: root/benchtests/scripts/import_bench.py
diff options
context:
space:
mode:
Diffstat (limited to 'benchtests/scripts/import_bench.py')
-rw-r--r--benchtests/scripts/import_bench.py141
1 files changed, 141 insertions, 0 deletions
diff --git a/benchtests/scripts/import_bench.py b/benchtests/scripts/import_bench.py
new file mode 100644
index 0000000000..d37ff62383
--- /dev/null
+++ b/benchtests/scripts/import_bench.py
@@ -0,0 +1,141 @@
+#!/usr/bin/python
+# Copyright (C) 2015 Free Software Foundation, Inc.
+# This file is part of the GNU C Library.
+#
+# The GNU C Library is free software; you can redistribute it and/or
+# modify it under the terms of the GNU Lesser General Public
+# License as published by the Free Software Foundation; either
+# version 2.1 of the License, or (at your option) any later version.
+#
+# The GNU C Library is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+# Lesser General Public License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public
+# License along with the GNU C Library; if not, see
+# <http://www.gnu.org/licenses/>.
+"""Functions to import benchmark data and process it"""
+
+import json
+try:
+ import jsonschema as validator
+except ImportError:
+ print('Could not find jsonschema module.')
+ raise
+
+
+def mean(lst):
+ """Compute and return mean of numbers in a list
+
+ The numpy average function has horrible performance, so implement our
+ own mean function.
+
+ Args:
+ lst: The list of numbers to average.
+ Return:
+ The mean of members in the list.
+ """
+ return sum(lst) / len(lst)
+
+
+def split_list(bench, func, var):
+ """ Split the list into a smaller set of more distinct points
+
+ Group together points such that the difference between the smallest
+ point and the mean is less than 1/3rd of the mean. This means that
+ the mean is at most 1.5x the smallest member of that group.
+
+ mean - xmin < mean / 3
+ i.e. 2 * mean / 3 < xmin
+ i.e. mean < 3 * xmin / 2
+
+ For an evenly distributed group, the largest member will be less than
+ twice the smallest member of the group.
+ Derivation:
+
+ An evenly distributed series would be xmin, xmin + d, xmin + 2d...
+
+ mean = (2 * n * xmin + n * (n - 1) * d) / 2 * n
+ and max element is xmin + (n - 1) * d
+
+ Now, mean < 3 * xmin / 2
+
+ 3 * xmin > 2 * mean
+ 3 * xmin > (2 * n * xmin + n * (n - 1) * d) / n
+ 3 * n * xmin > 2 * n * xmin + n * (n - 1) * d
+ n * xmin > n * (n - 1) * d
+ xmin > (n - 1) * d
+ 2 * xmin > xmin + (n-1) * d
+ 2 * xmin > xmax
+
+ Hence, proved.
+
+ Similarly, it is trivial to prove that for a similar aggregation by using
+ the maximum element, the maximum element in the group must be at most 4/3
+ times the mean.
+
+ Args:
+ bench: The benchmark object
+ func: The function name
+ var: The function variant name
+ """
+ means = []
+ lst = bench['functions'][func][var]['timings']
+ last = len(lst) - 1
+ while lst:
+ for i in range(last + 1):
+ avg = mean(lst[i:])
+ if avg > 0.75 * lst[last]:
+ means.insert(0, avg)
+ lst = lst[:i]
+ last = i - 1
+ break
+ bench['functions'][func][var]['timings'] = means
+
+
+def do_for_all_timings(bench, callback):
+ """Call a function for all timing objects for each function and its
+ variants.
+
+ Args:
+ bench: The benchmark object
+ callback: The callback function
+ """
+ for func in bench['functions'].keys():
+ for k in bench['functions'][func].keys():
+ if 'timings' not in bench['functions'][func][k].keys():
+ continue
+
+ callback(bench, func, k)
+
+
+def compress_timings(points):
+ """Club points with close enough values into a single mean value
+
+ See split_list for details on how the clubbing is done.
+
+ Args:
+ points: The set of points.
+ """
+ do_for_all_timings(points, split_list)
+
+
+def parse_bench(filename, schema_filename):
+ """Parse the input file
+
+ Parse and validate the json file containing the benchmark outputs. Return
+ the resulting object.
+ Args:
+ filename: Name of the benchmark output file.
+ Return:
+ The bench dictionary.
+ """
+ with open(schema_filename, 'r') as schemafile:
+ schema = json.load(schemafile)
+ with open(filename, 'r') as benchfile:
+ bench = json.load(benchfile)
+ validator.validate(bench, schema)
+ do_for_all_timings(bench, lambda b, f, v:
+ b['functions'][f][v]['timings'].sort())
+ return bench