bpf/benchs: Add benchmarks for comparing hashmap lookups w/ vs. w/out bloom filter

This patch adds benchmark tests for comparing the performance of hashmap lookups without the bloom filter vs. hashmap lookups with the bloom filter. Checking the bloom filter first for whether the element exists should overall enable a higher throughput for hashmap lookups, since if the element does not exist in the bloom filter, we can avoid a costly lookup in the hashmap. On average, using 5 hash functions in the bloom filter tended to perform the best across the widest range of different entry sizes. The benchmark results using 5 hash functions (running on 8 threads on a machine with one numa node, and taking the average of 3 runs) were roughly as follows: value_size = 4 bytes - 10k entries: 30% faster 50k entries: 40% faster 100k entries: 40% faster 500k entres: 70% faster 1 million entries: 90% faster 5 million entries: 140% faster value_size = 8 bytes - 10k entries: 30% faster 50k entries: 40% faster 100k entries: 50% faster 500k entres: 80% faster 1 million entries: 100% faster 5 million entries: 150% faster value_size = 16 bytes - 10k entries: 20% faster 50k entries: 30% faster 100k entries: 35% faster 500k entres: 65% faster 1 million entries: 85% faster 5 million entries: 110% faster value_size = 40 bytes - 10k entries: 5% faster 50k entries: 15% faster 100k entries: 20% faster 500k entres: 65% faster 1 million entries: 75% faster 5 million entries: 120% faster Signed-off-by: Joanne Koong <joannekoong@fb.com> Signed-off-by: Alexei Starovoitov <ast@kernel.org> Link: https://lore.kernel.org/bpf/20211027234504.30744-6-joannekoong@fb.com
author: Joanne Koong <joannekoong@fb.com> 2021-10-27 16:45:04 -0700
committer: Alexei Starovoitov <ast@kernel.org> 2021-10-28 13:22:49 -0700
commit: f44bc543a079c2ebc534cbfabd6fbfcfc2b09f72 (patch)
tree: 829ff328e26b552bd254d7b3b77ce0067e85f678 /tools/testing/selftests/bpf/bench.c
parent: 57fd1c63c9a687c5fdc86fa628c490d6733e8d0b (diff)
1 files changed, 18 insertions, 5 deletions
diff --git a/tools/testing/selftests/bpf/bench.c b/tools/testing/selftests/bpf/bench.c
index a1d5dffe5ef61..cc4722f693e9a 100644
--- a/tools/testing/selftests/bpf/bench.c
+++ b/tools/testing/selftests/bpf/bench.c
@@ -92,20 +92,22 @@ void hits_drops_report_progress(int iter, struct bench_res *res, long delta_ns)
 	printf("Iter %3d (%7.3lfus): ",
 	       iter, (delta_ns - 1000000000) / 1000.0);
 
-	printf("hits %8.3lfM/s (%7.3lfM/prod), drops %8.3lfM/s\n",
-	       hits_per_sec, hits_per_prod, drops_per_sec);
+	printf("hits %8.3lfM/s (%7.3lfM/prod), drops %8.3lfM/s, total operations %8.3lfM/s\n",
+	       hits_per_sec, hits_per_prod, drops_per_sec, hits_per_sec + drops_per_sec);
 }
 
 void hits_drops_report_final(struct bench_res res[], int res_cnt)
 {
 	int i;
-	double hits_mean = 0.0, drops_mean = 0.0;
-	double hits_stddev = 0.0, drops_stddev = 0.0;
+	double hits_mean = 0.0, drops_mean = 0.0, total_ops_mean = 0.0;
+	double hits_stddev = 0.0, drops_stddev = 0.0, total_ops_stddev = 0.0;
+	double total_ops;
 
 	for (i = 0; i < res_cnt; i++) {
 		hits_mean += res[i].hits / 1000000.0 / (0.0 + res_cnt);
 		drops_mean += res[i].drops / 1000000.0 / (0.0 + res_cnt);
 	}
+	total_ops_mean = hits_mean + drops_mean;
 
 	if (res_cnt > 1)  {
 		for (i = 0; i < res_cnt; i++) {
@@ -115,14 +117,21 @@ void hits_drops_report_final(struct bench_res res[], int res_cnt)
 			drops_stddev += (drops_mean - res[i].drops / 1000000.0) *
 					(drops_mean - res[i].drops / 1000000.0) /
 					(res_cnt - 1.0);
+			total_ops = res[i].hits + res[i].drops;
+			total_ops_stddev += (total_ops_mean - total_ops / 1000000.0) *
+					(total_ops_mean - total_ops / 1000000.0) /
+					(res_cnt - 1.0);
 		}
 		hits_stddev = sqrt(hits_stddev);
 		drops_stddev = sqrt(drops_stddev);
+		total_ops_stddev = sqrt(total_ops_stddev);
 	}
 	printf("Summary: hits %8.3lf \u00B1 %5.3lfM/s (%7.3lfM/prod), ",
 	       hits_mean, hits_stddev, hits_mean / env.producer_cnt);
-	printf("drops %8.3lf \u00B1 %5.3lfM/s\n",
+	printf("drops %8.3lf \u00B1 %5.3lfM/s, ",
 	       drops_mean, drops_stddev);
+	printf("total operations %8.3lf \u00B1 %5.3lfM/s\n",
+	       total_ops_mean, total_ops_stddev);
 }
 
 const char *argp_program_version = "benchmark";
@@ -357,6 +366,8 @@ extern const struct bench bench_pb_custom;
 extern const struct bench bench_bloom_lookup;
 extern const struct bench bench_bloom_update;
 extern const struct bench bench_bloom_false_positive;
+extern const struct bench bench_hashmap_without_bloom;
+extern const struct bench bench_hashmap_with_bloom;
 
 static const struct bench *benchs[] = {
 	&bench_count_global,
@@ -381,6 +392,8 @@ static const struct bench *benchs[] = {
 	&bench_bloom_lookup,
 	&bench_bloom_update,
 	&bench_bloom_false_positive,
+	&bench_hashmap_without_bloom,
+	&bench_hashmap_with_bloom,
 };
 
 static void setup_benchmark()
author	Joanne Koong <joannekoong@fb.com>	2021-10-27 16:45:04 -0700
committer	Alexei Starovoitov <ast@kernel.org>	2021-10-28 13:22:49 -0700
commit	f44bc543a079c2ebc534cbfabd6fbfcfc2b09f72 (patch)
tree	829ff328e26b552bd254d7b3b77ce0067e85f678 /tools/testing/selftests/bpf/bench.c
parent	57fd1c63c9a687c5fdc86fa628c490d6733e8d0b (diff)