summaryrefslogtreecommitdiff
path: root/tools/perf
diff options
context:
space:
mode:
authorIan Rogers <irogers@google.com>2025-02-10 11:12:31 -0800
committerNamhyung Kim <namhyung@kernel.org>2025-02-27 08:47:25 -0800
commitc760174401f605cf63314d3817069a40945f0e0b (patch)
tree2654c020096f7ee1986c7dc7d5524ea0428faa58 /tools/perf
parent2337b7251dd5d3c58af877e6eee85a594c6b51f5 (diff)
perf cpumap: Reduce cpu size from int to int16_t
Fewer than 32k logical CPUs are currently supported by perf. A cpumap is indexed by an integer (see perf_cpu_map__cpu) yielding a perf_cpu that wraps a 4-byte int for the logical CPU - the wrapping is done deliberately to avoid confusing a logical CPU with an index into a cpumap. Using a 4-byte int within the perf_cpu is larger than required so this patch reduces it to the 2-byte int16_t. For a cpumap containing 16 entries this will reduce the array size from 64 to 32 bytes. For very large servers with lots of logical CPUs the size savings will be greater. Signed-off-by: Ian Rogers <irogers@google.com> Reviewed-by: James Clark <james.clark@linaro.org> Link: https://lore.kernel.org/r/20250210191231.156294-1-irogers@google.com Signed-off-by: Namhyung Kim <namhyung@kernel.org>
Diffstat (limited to 'tools/perf')
-rw-r--r--tools/perf/util/cpumap.c68
-rw-r--r--tools/perf/util/env.c2
2 files changed, 48 insertions, 22 deletions
diff --git a/tools/perf/util/cpumap.c b/tools/perf/util/cpumap.c
index 5c329ad614e9b..9bc5e03702346 100644
--- a/tools/perf/util/cpumap.c
+++ b/tools/perf/util/cpumap.c
@@ -67,19 +67,23 @@ static struct perf_cpu_map *cpu_map__from_entries(const struct perf_record_cpu_m
struct perf_cpu_map *map;
map = perf_cpu_map__empty_new(data->cpus_data.nr);
- if (map) {
- unsigned i;
-
- for (i = 0; i < data->cpus_data.nr; i++) {
- /*
- * Special treatment for -1, which is not real cpu number,
- * and we need to use (int) -1 to initialize map[i],
- * otherwise it would become 65535.
- */
- if (data->cpus_data.cpu[i] == (u16) -1)
- RC_CHK_ACCESS(map)->map[i].cpu = -1;
- else
- RC_CHK_ACCESS(map)->map[i].cpu = (int) data->cpus_data.cpu[i];
+ if (!map)
+ return NULL;
+
+ for (unsigned int i = 0; i < data->cpus_data.nr; i++) {
+ /*
+ * Special treatment for -1, which is not real cpu number,
+ * and we need to use (int) -1 to initialize map[i],
+ * otherwise it would become 65535.
+ */
+ if (data->cpus_data.cpu[i] == (u16) -1) {
+ RC_CHK_ACCESS(map)->map[i].cpu = -1;
+ } else if (data->cpus_data.cpu[i] < INT16_MAX) {
+ RC_CHK_ACCESS(map)->map[i].cpu = (int16_t) data->cpus_data.cpu[i];
+ } else {
+ pr_err("Invalid cpumap entry %u\n", data->cpus_data.cpu[i]);
+ perf_cpu_map__put(map);
+ return NULL;
}
}
@@ -106,8 +110,15 @@ static struct perf_cpu_map *cpu_map__from_mask(const struct perf_record_cpu_map_
int cpu;
perf_record_cpu_map_data__read_one_mask(data, i, local_copy);
- for_each_set_bit(cpu, local_copy, 64)
- RC_CHK_ACCESS(map)->map[j++].cpu = cpu + cpus_per_i;
+ for_each_set_bit(cpu, local_copy, 64) {
+ if (cpu + cpus_per_i < INT16_MAX) {
+ RC_CHK_ACCESS(map)->map[j++].cpu = cpu + cpus_per_i;
+ } else {
+ pr_err("Invalid cpumap entry %d\n", cpu + cpus_per_i);
+ perf_cpu_map__put(map);
+ return NULL;
+ }
+ }
}
return map;
@@ -127,8 +138,15 @@ static struct perf_cpu_map *cpu_map__from_range(const struct perf_record_cpu_map
RC_CHK_ACCESS(map)->map[i++].cpu = -1;
for (int cpu = data->range_cpu_data.start_cpu; cpu <= data->range_cpu_data.end_cpu;
- i++, cpu++)
- RC_CHK_ACCESS(map)->map[i].cpu = cpu;
+ i++, cpu++) {
+ if (cpu < INT16_MAX) {
+ RC_CHK_ACCESS(map)->map[i].cpu = cpu;
+ } else {
+ pr_err("Invalid cpumap entry %d\n", cpu);
+ perf_cpu_map__put(map);
+ return NULL;
+ }
+ }
return map;
}
@@ -427,7 +445,7 @@ static void set_max_cpu_num(void)
{
const char *mnt;
char path[PATH_MAX];
- int ret = -1;
+ int max, ret = -1;
/* set up default */
max_cpu_num.cpu = 4096;
@@ -444,10 +462,12 @@ static void set_max_cpu_num(void)
goto out;
}
- ret = get_max_num(path, &max_cpu_num.cpu);
+ ret = get_max_num(path, &max);
if (ret)
goto out;
+ max_cpu_num.cpu = max;
+
/* get the highest present cpu number for a sparse allocation */
ret = snprintf(path, PATH_MAX, "%s/devices/system/cpu/present", mnt);
if (ret >= PATH_MAX) {
@@ -455,8 +475,14 @@ static void set_max_cpu_num(void)
goto out;
}
- ret = get_max_num(path, &max_present_cpu_num.cpu);
+ ret = get_max_num(path, &max);
+ if (!ret && max > INT16_MAX) {
+ pr_err("Read out of bounds max cpus of %d\n", max);
+ ret = -1;
+ }
+ if (!ret)
+ max_present_cpu_num.cpu = (int16_t)max;
out:
if (ret)
pr_err("Failed to read max cpus, using default of %d\n", max_cpu_num.cpu);
@@ -606,7 +632,7 @@ size_t cpu_map__snprint(struct perf_cpu_map *map, char *buf, size_t size)
#define COMMA first ? "" : ","
for (i = 0; i < perf_cpu_map__nr(map) + 1; i++) {
- struct perf_cpu cpu = { .cpu = INT_MAX };
+ struct perf_cpu cpu = { .cpu = INT16_MAX };
bool last = i == perf_cpu_map__nr(map);
if (!last)
diff --git a/tools/perf/util/env.c b/tools/perf/util/env.c
index cae4f6d63318f..36411749e0077 100644
--- a/tools/perf/util/env.c
+++ b/tools/perf/util/env.c
@@ -543,7 +543,7 @@ int perf_env__numa_node(struct perf_env *env, struct perf_cpu cpu)
for (i = 0; i < env->nr_numa_nodes; i++) {
nn = &env->numa_nodes[i];
- nr = max(nr, perf_cpu_map__max(nn->map).cpu);
+ nr = max(nr, (int)perf_cpu_map__max(nn->map).cpu);
}
nr++;