From 329687a03d18143f491b535d22be1cccc291bb58 Mon Sep 17 00:00:00 2001 From: Jiajian Ye Date: Thu, 28 Apr 2022 23:15:56 -0700 Subject: tools/vm/page_owner_sort.c: use fprintf() to send error messages to stderr Error messages should be send to stderr using fprintf() instead of printf(). This work is coauthored by Yixuan Cao Shenghong Han Yinan Zhang Chongxi Zhao Yuhong Feng Yongqiang Liu Link: https://lkml.kernel.org/r/20220401024856.767-1-yejiajian2018@email.szu.edu.cn Signed-off-by: Jiajian Ye Cc: Shenghong Han Cc: Yixuan Cao Cc: Yinan Zhang Cc: Chongxi Zhao Cc: Yuhong Feng Cc: Yongqiang Liu Cc: Haowen Bai Cc: Sean Anderson Signed-off-by: Andrew Morton --- tools/vm/page_owner_sort.c | 30 ++++++++++++++++-------------- 1 file changed, 16 insertions(+), 14 deletions(-) (limited to 'tools') diff --git a/tools/vm/page_owner_sort.c b/tools/vm/page_owner_sort.c index 7d98e76c2291..6771003ed5f1 100644 --- a/tools/vm/page_owner_sort.c +++ b/tools/vm/page_owner_sort.c @@ -186,7 +186,7 @@ static int search_pattern(regex_t *pattern, char *pattern_str, char *buf) err = regexec(pattern, buf, 2, pmatch, REG_NOTBOL); if (err != 0 || pmatch[1].rm_so == -1) { - printf("no matching pattern in %s\n", buf); + fprintf(stderr, "no matching pattern in %s\n", buf); return -1; } val_len = pmatch[1].rm_eo - pmatch[1].rm_so; @@ -202,7 +202,7 @@ static void check_regcomp(regex_t *pattern, const char *regex) err = regcomp(pattern, regex, REG_EXTENDED | REG_NEWLINE); if (err != 0 || pattern->re_nsub != 1) { - printf("Invalid pattern %s code %d\n", regex, err); + fprintf(stderr, "Invalid pattern %s code %d\n", regex, err); exit(1); } } @@ -251,7 +251,7 @@ static int get_page_num(char *buf) errno = 0; order_val = strtol(order_str, &endptr, 10); if (order_val > 64 || errno != 0 || endptr == order_str || *endptr != '\0') { - printf("wrong order in follow buf:\n%s\n", buf); + fprintf(stderr, "wrong order in follow buf:\n%s\n", buf); return 0; } @@ -268,7 +268,7 @@ static pid_t get_pid(char *buf) errno = 0; pid = strtol(pid_str, &endptr, 10); if (errno != 0 || endptr == pid_str || *endptr != '\0') { - printf("wrong/invalid pid in follow buf:\n%s\n", buf); + fprintf(stderr, "wrong/invalid pid in follow buf:\n%s\n", buf); return -1; } @@ -286,7 +286,7 @@ static pid_t get_tgid(char *buf) errno = 0; tgid = strtol(tgid_str, &endptr, 10); if (errno != 0 || endptr == tgid_str || *endptr != '\0') { - printf("wrong/invalid tgid in follow buf:\n%s\n", buf); + fprintf(stderr, "wrong/invalid tgid in follow buf:\n%s\n", buf); return -1; } @@ -304,7 +304,7 @@ static __u64 get_ts_nsec(char *buf) errno = 0; ts_nsec = strtoull(ts_nsec_str, &endptr, 10); if (errno != 0 || endptr == ts_nsec_str || *endptr != '\0') { - printf("wrong ts_nsec in follow buf:\n%s\n", buf); + fprintf(stderr, "wrong ts_nsec in follow buf:\n%s\n", buf); return -1; } @@ -321,7 +321,7 @@ static __u64 get_free_ts_nsec(char *buf) errno = 0; free_ts_nsec = strtoull(free_ts_nsec_str, &endptr, 10); if (errno != 0 || endptr == free_ts_nsec_str || *endptr != '\0') { - printf("wrong free_ts_nsec in follow buf:\n%s\n", buf); + fprintf(stderr, "wrong free_ts_nsec in follow buf:\n%s\n", buf); return -1; } @@ -337,7 +337,7 @@ static char *get_comm(char *buf) search_pattern(&comm_pattern, comm_str, buf); errno = 0; if (errno != 0) { - printf("wrong comm in follow buf:\n%s\n", buf); + fprintf(stderr, "wrong comm in follow buf:\n%s\n", buf); return NULL; } @@ -373,7 +373,7 @@ static void add_list(char *buf, int len) return; } if (list_size == max_size) { - printf("max_size too small??\n"); + fprintf(stderr, "max_size too small??\n"); exit(1); } if (!is_need(buf)) @@ -383,7 +383,7 @@ static void add_list(char *buf, int len) list[list_size].comm = get_comm(buf); list[list_size].txt = malloc(len+1); if (!list[list_size].txt) { - printf("Out of memory\n"); + fprintf(stderr, "Out of memory\n"); exit(1); } memcpy(list[list_size].txt, buf, len); @@ -499,7 +499,8 @@ int main(int argc, char **argv) errno = 0; fc.pid = strtol(optarg, &endptr, 10); if (errno != 0 || endptr == optarg || *endptr != '\0') { - printf("wrong/invalid pid in from the command line:%s\n", optarg); + fprintf(stderr, "wrong/invalid pid in from the command line:%s\n", + optarg); exit(1); } break; @@ -508,7 +509,8 @@ int main(int argc, char **argv) errno = 0; fc.tgid = strtol(optarg, &endptr, 10); if (errno != 0 || endptr == optarg || *endptr != '\0') { - printf("wrong/invalid tgid in from the command line:%s\n", optarg); + fprintf(stderr, "wrong/invalid tgid in from the command line:%s\n", + optarg); exit(1); } break; @@ -519,7 +521,7 @@ int main(int argc, char **argv) break; case 4: if (!parse_cull_args(optarg)) { - printf("wrong argument after --cull in from the command line:%s\n", + fprintf(stderr, "wrong argument after --cull option:%s\n", optarg); exit(1); } @@ -554,7 +556,7 @@ int main(int argc, char **argv) list = malloc(max_size * sizeof(*list)); buf = malloc(BUF_SIZE); if (!list || !buf) { - printf("Out of memory\n"); + fprintf(stderr, "Out of memory\n"); exit(1); } -- cgit v1.2.3 From 75382a2dca0e9e9e57e88b479cf537549461a934 Mon Sep 17 00:00:00 2001 From: Jiajian Ye Date: Thu, 28 Apr 2022 23:15:56 -0700 Subject: tools/vm/page_owner_sort.c: support for multi-value selection in single argument When viewing page owner information, we may want to select blocks whose PID/TGID/TASK_COMM_NAME appears in a user-specified list for data analysis and aggregation. But currently page_owner_sort only supports selecting blocks associated with only one specified PID/TGID/TASK_COMM_NAME. Therefore, following adjustments are made to fix the problem: 1. Enhance selecting function to support the selection of multiple PIDs/TGIDs/TASK_COMM_NAMEs. The enhanced usages are as follows: --pid Select by pid. This selects the blocks whose PID numbers appear in . --tgid Select by tgid. This selects the blocks whose TGID numbers appear in . --name Select by task command name. This selects the blocks whose task command name appear in . Where , , are single arguments in the form of a comma-separated list,which offers a way to specify individual selecting rules. For example, if you want to select blocks whose tgids are 1, 2 or 3, you have to use 4 commands as follows: ./page_owner_sort --tgid=1 ./page_owner_sort --tgid=2 ./page_owner_sort --tgid=3 cat > With this patch, you can use only 1 command to obtain the same result as above: ./page_owner_sort --tgid=1,2,3 2. Update explanations of --pid, --tgid and --name in the function usage() and the document(Documents/vm/page_owner.rst). This work is coauthored by Yixuan Cao Shenghong Han Yinan Zhang Chongxi Zhao Yuhong Feng Yongqiang Liu Link: https://lkml.kernel.org/r/20220401024856.767-2-yejiajian2018@email.szu.edu.cn Signed-off-by: Jiajian Ye Cc: Chongxi Zhao Cc: Shenghong Han Cc: Yinan Zhang Cc: Yixuan Cao Cc: Yongqiang Liu Cc: Yuhong Feng Cc: Haowen Bai Cc: Sean Anderson Signed-off-by: Andrew Morton --- Documentation/vm/page_owner.rst | 20 ++++++++--- tools/vm/page_owner_sort.c | 78 ++++++++++++++++++++++++++++++----------- 2 files changed, 72 insertions(+), 26 deletions(-) (limited to 'tools') diff --git a/Documentation/vm/page_owner.rst b/Documentation/vm/page_owner.rst index 7e0c3f574e78..3102f91d635c 100644 --- a/Documentation/vm/page_owner.rst +++ b/Documentation/vm/page_owner.rst @@ -129,7 +129,6 @@ Usage Specify culling rules.Culling syntax is key[,key[,...]].Choose a multi-letter key from the **STANDARD FORMAT SPECIFIERS** section. - is a single argument in the form of a comma-separated list, which offers a way to specify individual culling rules. The recognized keywords are described in the **STANDARD FORMAT SPECIFIERS** section below. @@ -137,7 +136,6 @@ Usage the STANDARD SORT KEYS section below. Mixed use of abbreviated and complete-form of keys is allowed. - Examples: ./page_owner_sort --cull=stacktrace ./page_owner_sort --cull=st,pid,name @@ -147,9 +145,21 @@ Usage -f Filter out the information of blocks whose memory has been released. Select: - --pid Select by pid. - --tgid Select by tgid. - --name Select by task command name. + --pid Select by pid. This selects the blocks whose process ID + numbers appear in . + --tgid Select by tgid. This selects the blocks whose thread + group ID numbers appear in . + --name Select by task command name. This selects the blocks whose + task command name appear in . + + , , are single arguments in the form of a comma-separated list, + which offers a way to specify individual selecting rules. + + + Examples: + ./page_owner_sort --pid=1 + ./page_owner_sort --tgid=1,2,3 + ./page_owner_sort --name name1,name2 STANDARD FORMAT SPECIFIERS ========================== diff --git a/tools/vm/page_owner_sort.c b/tools/vm/page_owner_sort.c index 6771003ed5f1..16fb034c6a4e 100644 --- a/tools/vm/page_owner_sort.c +++ b/tools/vm/page_owner_sort.c @@ -54,9 +54,12 @@ enum CULL_BIT { CULL_STACKTRACE = 1<<5 }; struct filter_condition { - pid_t tgid; - pid_t pid; - char comm[TASK_COMM_LEN]; + pid_t *tgids; + int tgids_size; + pid_t *pids; + int pids_size; + char **comms; + int comms_size; }; static struct filter_condition fc; static regex_t order_pattern; @@ -149,7 +152,6 @@ static int compare_free_ts(const void *p1, const void *p2) return l1->free_ts_nsec < l2->free_ts_nsec ? -1 : 1; } - static int compare_release(const void *p1, const void *p2) { const struct block_list *l1 = p1, *l2 = p2; @@ -161,7 +163,6 @@ static int compare_release(const void *p1, const void *p2) return l1->free_ts_nsec ? 1 : -1; } - static int compare_cull_condition(const void *p1, const void *p2) { if (cull == 0) @@ -344,22 +345,40 @@ static char *get_comm(char *buf) return comm_str; } +static bool match_num_list(int num, int *list, int list_size) +{ + for (int i = 0; i < list_size; ++i) + if (list[i] == num) + return true; + return false; +} + +static bool match_str_list(const char *str, char **list, int list_size) +{ + for (int i = 0; i < list_size; ++i) + if (!strcmp(list[i], str)) + return true; + return false; +} + static bool is_need(char *buf) { if ((filter & FILTER_UNRELEASE) && get_free_ts_nsec(buf) != 0) return false; - if ((filter & FILTER_PID) && get_pid(buf) != fc.pid) + if ((filter & FILTER_PID) && !match_num_list(get_pid(buf), fc.pids, fc.pids_size)) return false; - if ((filter & FILTER_TGID) && get_tgid(buf) != fc.tgid) + if ((filter & FILTER_TGID) && + !match_num_list(get_tgid(buf), fc.tgids, fc.tgids_size)) return false; char *comm = get_comm(buf); if ((filter & FILTER_COMM) && - strncmp(comm, fc.comm, TASK_COMM_LEN) != 0) { + !match_str_list(comm, fc.comms, fc.comms_size)) { free(comm); return false; } + free(comm); return true; } @@ -428,6 +447,27 @@ static bool parse_cull_args(const char *arg_str) return true; } +static int *parse_nums_list(char *arg_str, int *list_size) +{ + int size = 0; + char **args = explode(',', arg_str, &size); + int *list = calloc(size, sizeof(int)); + + errno = 0; + for (int i = 0; i < size; ++i) { + char *endptr = NULL; + + list[i] = strtol(args[i], &endptr, 10); + if (errno != 0 || endptr == args[i] || *endptr != '\0') { + free(list); + return NULL; + } + } + *list_size = size; + free_explode(args, size); + return list; +} + #define BUF_SIZE (128 * 1024) static void usage(void) @@ -442,9 +482,9 @@ static void usage(void) "-a\t\tSort by memory allocate time.\n" "-r\t\tSort by memory release time.\n" "-f\t\tFilter out the information of blocks whose memory has been released.\n" - "--pid \tSelect by pid. This selects the information of blocks whose process ID number equals to .\n" - "--tgid \tSelect by tgid. This selects the information of blocks whose Thread Group ID number equals to .\n" - "--name \n\t\tSelect by command name. This selects the information of blocks whose command name identical to .\n" + "--pid \tSelect by pid. This selects the information of blocks whose process ID numbers appear in .\n" + "--tgid \tSelect by tgid. This selects the information of blocks whose Thread Group ID numbers appear in .\n" + "--name \n\t\tSelect by command name. This selects the information of blocks whose command name appears in .\n" "--cull \tCull by user-defined rules. is a single argument in the form of a comma-separated list with some common fields predefined\n" ); } @@ -453,7 +493,7 @@ int main(int argc, char **argv) { int (*cmp)(const void *, const void *) = compare_num; FILE *fin, *fout; - char *buf, *endptr; + char *buf; int ret, i, count; struct stat st; int opt; @@ -496,9 +536,8 @@ int main(int argc, char **argv) break; case 1: filter = filter | FILTER_PID; - errno = 0; - fc.pid = strtol(optarg, &endptr, 10); - if (errno != 0 || endptr == optarg || *endptr != '\0') { + fc.pids = parse_nums_list(optarg, &fc.pids_size); + if (fc.pids == NULL) { fprintf(stderr, "wrong/invalid pid in from the command line:%s\n", optarg); exit(1); @@ -506,9 +545,8 @@ int main(int argc, char **argv) break; case 2: filter = filter | FILTER_TGID; - errno = 0; - fc.tgid = strtol(optarg, &endptr, 10); - if (errno != 0 || endptr == optarg || *endptr != '\0') { + fc.tgids = parse_nums_list(optarg, &fc.tgids_size); + if (fc.tgids == NULL) { fprintf(stderr, "wrong/invalid tgid in from the command line:%s\n", optarg); exit(1); @@ -516,8 +554,7 @@ int main(int argc, char **argv) break; case 3: filter = filter | FILTER_COMM; - strncpy(fc.comm, optarg, TASK_COMM_LEN); - fc.comm[TASK_COMM_LEN-1] = '\0'; + fc.comms = explode(',', optarg, &fc.comms_size); break; case 4: if (!parse_cull_args(optarg)) { @@ -564,7 +601,6 @@ int main(int argc, char **argv) ret = read_block(buf, BUF_SIZE, fin); if (ret < 0) break; - add_list(buf, ret); } -- cgit v1.2.3 From ebbeae36387ccf1326c896167872c3acf6c3c956 Mon Sep 17 00:00:00 2001 From: Jiajian Ye Date: Thu, 28 Apr 2022 23:15:57 -0700 Subject: tools/vm/page_owner_sort.c: support sorting blocks by multiple keys When viewing page owner information, we may want to sort blocks of information by multiple keys, since one single key does not uniquely identify a block. Therefore, following adjustments are made: 1. Add a new --sort option to support sorting blocks of information by multiple keys. ./page_owner_sort --sort= ./page_owner_sort --sort is a single argument in the form of a comma-separated list, which offers a way to specify sorting order. Sorting syntax is [+|-]key[,[+|-]key[,...]]. The ascending or descending order can be specified by adding the + (ascending, default) or - (descend -ing) prefix to the key: ./page_owner_sort [option] --sort -key1,+key2,key3... For example, to sort the blocks first by task command name in lexicographic order and then by pid in ascending numerical order, use the following: ./page_owner_sort --sort=name,+pid To sort the blocks first by pid in ascending order and then by timestamp of the page when it is allocated in descending order, use the following: ./page_owner_sort --sort=pid,-alloc_ts 2. Add explanations of a newly added --sort option in the function usage() and the document(Documentation/vm/page_owner.rst). This work is coauthored by Yixuan Cao Shenghong Han Yinan Zhang Chongxi Zhao Yuhong Feng Yongqiang Liu Link: https://lkml.kernel.org/r/20220401024856.767-3-yejiajian2018@email.szu.edu.cn Signed-off-by: Jiajian Ye Cc: Chongxi Zhao Cc: Shenghong Han Cc: Yinan Zhang Cc: Yixuan Cao Cc: Yongqiang Liu Cc: Yuhong Feng Cc: Haowen Bai Cc: Sean Anderson Signed-off-by: Andrew Morton --- Documentation/vm/page_owner.rst | 24 +++++- tools/vm/page_owner_sort.c | 164 ++++++++++++++++++++++++++++++++++------ 2 files changed, 165 insertions(+), 23 deletions(-) (limited to 'tools') diff --git a/Documentation/vm/page_owner.rst b/Documentation/vm/page_owner.rst index 3102f91d635c..523bf3419512 100644 --- a/Documentation/vm/page_owner.rst +++ b/Documentation/vm/page_owner.rst @@ -121,6 +121,14 @@ Usage -r Sort by memory release time. -s Sort by stack trace. -t Sort by times (default). + --sort Specify sorting order. Sorting syntax is [+|-]key[,[+|-]key[,...]]. + Choose a key from the **STANDARD FORMAT SPECIFIERS** section. The "+" is + optional since default direction is increasing numerical or lexicographic + order. Mixed use of abbreviated and complete-form of keys is allowed. + + Examples: + ./page_owner_sort --sort=n,+pid,-tgid + ./page_owner_sort --sort=at additional function:: @@ -165,9 +173,23 @@ STANDARD FORMAT SPECIFIERS ========================== :: +For --sort option: + + KEY LONG DESCRIPTION + p pid process ID + tg tgid thread group ID + n name task command name + st stacktrace stack trace of the page allocation + T txt full text of block + ft free_ts timestamp of the page when it was released + at alloc_ts timestamp of the page when it was allocated + +For --curl option: + KEY LONG DESCRIPTION p pid process ID tg tgid thread group ID n name task command name f free whether the page has been released or not - st stacktrace stace trace of the page allocation + st stacktrace stack trace of the page allocation + diff --git a/tools/vm/page_owner_sort.c b/tools/vm/page_owner_sort.c index 16fb034c6a4e..beca990707fb 100644 --- a/tools/vm/page_owner_sort.c +++ b/tools/vm/page_owner_sort.c @@ -53,15 +53,29 @@ enum CULL_BIT { CULL_COMM = 1<<4, CULL_STACKTRACE = 1<<5 }; +enum ARG_TYPE { + ARG_TXT, ARG_COMM, ARG_STACKTRACE, ARG_ALLOC_TS, ARG_FREE_TS, + ARG_CULL_TIME, ARG_PAGE_NUM, ARG_PID, ARG_TGID, ARG_UNKNOWN, ARG_FREE +}; +enum SORT_ORDER { + SORT_ASC = 1, + SORT_DESC = -1, +}; struct filter_condition { - pid_t *tgids; - int tgids_size; pid_t *pids; - int pids_size; + pid_t *tgids; char **comms; + int pids_size; + int tgids_size; int comms_size; }; +struct sort_condition { + int (**cmps)(const void *, const void *); + int *signs; + int size; +}; static struct filter_condition fc; +static struct sort_condition sc; static regex_t order_pattern; static regex_t pid_pattern; static regex_t tgid_pattern; @@ -107,14 +121,14 @@ static int compare_num(const void *p1, const void *p2) { const struct block_list *l1 = p1, *l2 = p2; - return l2->num - l1->num; + return l1->num - l2->num; } static int compare_page_num(const void *p1, const void *p2) { const struct block_list *l1 = p1, *l2 = p2; - return l2->page_num - l1->page_num; + return l1->page_num - l2->page_num; } static int compare_pid(const void *p1, const void *p2) @@ -180,6 +194,16 @@ static int compare_cull_condition(const void *p1, const void *p2) return 0; } +static int compare_sort_condition(const void *p1, const void *p2) +{ + int cmp = 0; + + for (int i = 0; i < sc.size; ++i) + if (cmp == 0) + cmp = sc.signs[i] * sc.cmps[i](p1, p2); + return cmp; +} + static int search_pattern(regex_t *pattern, char *pattern_str, char *buf) { int err, val_len; @@ -345,6 +369,29 @@ static char *get_comm(char *buf) return comm_str; } +static int get_arg_type(const char *arg) +{ + if (!strcmp(arg, "pid") || !strcmp(arg, "p")) + return ARG_PID; + else if (!strcmp(arg, "tgid") || !strcmp(arg, "tg")) + return ARG_TGID; + else if (!strcmp(arg, "name") || !strcmp(arg, "n")) + return ARG_COMM; + else if (!strcmp(arg, "stacktrace") || !strcmp(arg, "st")) + return ARG_STACKTRACE; + else if (!strcmp(arg, "free") || !strcmp(arg, "f")) + return ARG_FREE; + else if (!strcmp(arg, "txt") || !strcmp(arg, "T")) + return ARG_TXT; + else if (!strcmp(arg, "free_ts") || !strcmp(arg, "ft")) + return ARG_FREE_TS; + else if (!strcmp(arg, "alloc_ts") || !strcmp(arg, "at")) + return ARG_ALLOC_TS; + else { + return ARG_UNKNOWN; + } +} + static bool match_num_list(int num, int *list, int list_size) { for (int i = 0; i < list_size; ++i) @@ -428,21 +475,86 @@ static bool parse_cull_args(const char *arg_str) int size = 0; char **args = explode(',', arg_str, &size); - for (int i = 0; i < size; ++i) - if (!strcmp(args[i], "pid") || !strcmp(args[i], "p")) + for (int i = 0; i < size; ++i) { + int arg_type = get_arg_type(args[i]); + + if (arg_type == ARG_PID) cull |= CULL_PID; - else if (!strcmp(args[i], "tgid") || !strcmp(args[i], "tg")) + else if (arg_type == ARG_TGID) cull |= CULL_TGID; - else if (!strcmp(args[i], "name") || !strcmp(args[i], "n")) + else if (arg_type == ARG_COMM) cull |= CULL_COMM; - else if (!strcmp(args[i], "stacktrace") || !strcmp(args[i], "st")) + else if (arg_type == ARG_STACKTRACE) cull |= CULL_STACKTRACE; - else if (!strcmp(args[i], "free") || !strcmp(args[i], "f")) + else if (arg_type == ARG_FREE) cull |= CULL_UNRELEASE; else { free_explode(args, size); return false; } + } + free_explode(args, size); + return true; +} + +static void set_single_cmp(int (*cmp)(const void *, const void *), int sign) +{ + if (sc.signs == NULL || sc.size < 1) + sc.signs = calloc(1, sizeof(int)); + sc.signs[0] = sign; + if (sc.cmps == NULL || sc.size < 1) + sc.cmps = calloc(1, sizeof(int *)); + sc.cmps[0] = cmp; + sc.size = 1; +} + +static bool parse_sort_args(const char *arg_str) +{ + int size = 0; + + if (sc.size != 0) { /* reset sort_condition */ + free(sc.signs); + free(sc.cmps); + size = 0; + } + + char **args = explode(',', arg_str, &size); + + sc.signs = calloc(size, sizeof(int)); + sc.cmps = calloc(size, sizeof(int *)); + for (int i = 0; i < size; ++i) { + int offset = 0; + + sc.signs[i] = SORT_ASC; + if (args[i][0] == '-' || args[i][0] == '+') { + if (args[i][0] == '-') + sc.signs[i] = SORT_DESC; + offset = 1; + } + + int arg_type = get_arg_type(args[i]+offset); + + if (arg_type == ARG_PID) + sc.cmps[i] = compare_pid; + else if (arg_type == ARG_TGID) + sc.cmps[i] = compare_tgid; + else if (arg_type == ARG_COMM) + sc.cmps[i] = compare_comm; + else if (arg_type == ARG_STACKTRACE) + sc.cmps[i] = compare_stacktrace; + else if (arg_type == ARG_ALLOC_TS) + sc.cmps[i] = compare_ts; + else if (arg_type == ARG_FREE_TS) + sc.cmps[i] = compare_free_ts; + else if (arg_type == ARG_TXT) + sc.cmps[i] = compare_txt; + else { + free_explode(args, size); + sc.size = 0; + return false; + } + } + sc.size = size; free_explode(args, size); return true; } @@ -485,13 +597,13 @@ static void usage(void) "--pid \tSelect by pid. This selects the information of blocks whose process ID numbers appear in .\n" "--tgid \tSelect by tgid. This selects the information of blocks whose Thread Group ID numbers appear in .\n" "--name \n\t\tSelect by command name. This selects the information of blocks whose command name appears in .\n" - "--cull \tCull by user-defined rules. is a single argument in the form of a comma-separated list with some common fields predefined\n" + "--cull \tCull by user-defined rules. is a single argument in the form of a comma-separated list with some common fields predefined\n" + "--sort \tSpecify sort order as: [+|-]key[,[+|-]key[,...]]\n" ); } int main(int argc, char **argv) { - int (*cmp)(const void *, const void *) = compare_num; FILE *fin, *fout; char *buf; int ret, i, count; @@ -502,37 +614,38 @@ int main(int argc, char **argv) { "tgid", required_argument, NULL, 2 }, { "name", required_argument, NULL, 3 }, { "cull", required_argument, NULL, 4 }, + { "sort", required_argument, NULL, 5 }, { 0, 0, 0, 0}, }; while ((opt = getopt_long(argc, argv, "afmnprstP", longopts, NULL)) != -1) switch (opt) { case 'a': - cmp = compare_ts; + set_single_cmp(compare_ts, SORT_ASC); break; case 'f': filter = filter | FILTER_UNRELEASE; break; case 'm': - cmp = compare_page_num; + set_single_cmp(compare_page_num, SORT_DESC); break; case 'p': - cmp = compare_pid; + set_single_cmp(compare_pid, SORT_ASC); break; case 'r': - cmp = compare_free_ts; + set_single_cmp(compare_free_ts, SORT_ASC); break; case 's': - cmp = compare_stacktrace; + set_single_cmp(compare_stacktrace, SORT_ASC); break; case 't': - cmp = compare_num; + set_single_cmp(compare_num, SORT_DESC); break; case 'P': - cmp = compare_tgid; + set_single_cmp(compare_tgid, SORT_ASC); break; case 'n': - cmp = compare_comm; + set_single_cmp(compare_comm, SORT_ASC); break; case 1: filter = filter | FILTER_PID; @@ -563,6 +676,13 @@ int main(int argc, char **argv) exit(1); } break; + case 5: + if (!parse_sort_args(optarg)) { + fprintf(stderr, "wrong argument after --sort option:%s\n", + optarg); + exit(1); + } + break; default: usage(); exit(1); @@ -622,7 +742,7 @@ int main(int argc, char **argv) } } - qsort(list, count, sizeof(list[0]), cmp); + qsort(list, count, sizeof(list[0]), compare_sort_condition); for (i = 0; i < count; i++) { if (cull == 0) -- cgit v1.2.3 From a72469aa593881c2a5ad3a38cfb3e7871c50f169 Mon Sep 17 00:00:00 2001 From: Haowen Bai Date: Thu, 28 Apr 2022 23:15:57 -0700 Subject: tools/vm/page_owner: support debug log to avoid huge log print As normal usage, tool will print huge parser log and spend a lot of time printing, so it would be preferable add "-d" debug control to avoid this problem. Link: https://lkml.kernel.org/r/1649672446-5685-1-git-send-email-baihaowen@meizu.com Signed-off-by: Haowen Bai Cc: Chongxi Zhao Cc: Jiajian Ye Cc: Shenghong Han Cc: Yinan Zhang Cc: Yixuan Cao Cc: Yongqiang Liu Cc: Yuhong Feng Cc: Sean Anderson Signed-off-by: Andrew Morton --- tools/vm/page_owner_sort.c | 28 ++++++++++++++++++++-------- 1 file changed, 20 insertions(+), 8 deletions(-) (limited to 'tools') diff --git a/tools/vm/page_owner_sort.c b/tools/vm/page_owner_sort.c index beca990707fb..a32e446e5bb2 100644 --- a/tools/vm/page_owner_sort.c +++ b/tools/vm/page_owner_sort.c @@ -87,6 +87,7 @@ static int list_size; static int max_size; static int cull; static int filter; +static bool debug_on; int read_block(char *buf, int buf_size, FILE *fin) { @@ -211,7 +212,8 @@ static int search_pattern(regex_t *pattern, char *pattern_str, char *buf) err = regexec(pattern, buf, 2, pmatch, REG_NOTBOL); if (err != 0 || pmatch[1].rm_so == -1) { - fprintf(stderr, "no matching pattern in %s\n", buf); + if (debug_on) + fprintf(stderr, "no matching pattern in %s\n", buf); return -1; } val_len = pmatch[1].rm_eo - pmatch[1].rm_so; @@ -276,7 +278,8 @@ static int get_page_num(char *buf) errno = 0; order_val = strtol(order_str, &endptr, 10); if (order_val > 64 || errno != 0 || endptr == order_str || *endptr != '\0') { - fprintf(stderr, "wrong order in follow buf:\n%s\n", buf); + if (debug_on) + fprintf(stderr, "wrong order in follow buf:\n%s\n", buf); return 0; } @@ -293,7 +296,8 @@ static pid_t get_pid(char *buf) errno = 0; pid = strtol(pid_str, &endptr, 10); if (errno != 0 || endptr == pid_str || *endptr != '\0') { - fprintf(stderr, "wrong/invalid pid in follow buf:\n%s\n", buf); + if (debug_on) + fprintf(stderr, "wrong/invalid pid in follow buf:\n%s\n", buf); return -1; } @@ -311,7 +315,8 @@ static pid_t get_tgid(char *buf) errno = 0; tgid = strtol(tgid_str, &endptr, 10); if (errno != 0 || endptr == tgid_str || *endptr != '\0') { - fprintf(stderr, "wrong/invalid tgid in follow buf:\n%s\n", buf); + if (debug_on) + fprintf(stderr, "wrong/invalid tgid in follow buf:\n%s\n", buf); return -1; } @@ -329,7 +334,8 @@ static __u64 get_ts_nsec(char *buf) errno = 0; ts_nsec = strtoull(ts_nsec_str, &endptr, 10); if (errno != 0 || endptr == ts_nsec_str || *endptr != '\0') { - fprintf(stderr, "wrong ts_nsec in follow buf:\n%s\n", buf); + if (debug_on) + fprintf(stderr, "wrong ts_nsec in follow buf:\n%s\n", buf); return -1; } @@ -346,7 +352,8 @@ static __u64 get_free_ts_nsec(char *buf) errno = 0; free_ts_nsec = strtoull(free_ts_nsec_str, &endptr, 10); if (errno != 0 || endptr == free_ts_nsec_str || *endptr != '\0') { - fprintf(stderr, "wrong free_ts_nsec in follow buf:\n%s\n", buf); + if (debug_on) + fprintf(stderr, "wrong free_ts_nsec in follow buf:\n%s\n", buf); return -1; } @@ -362,7 +369,8 @@ static char *get_comm(char *buf) search_pattern(&comm_pattern, comm_str, buf); errno = 0; if (errno != 0) { - fprintf(stderr, "wrong comm in follow buf:\n%s\n", buf); + if (debug_on) + fprintf(stderr, "wrong comm in follow buf:\n%s\n", buf); return NULL; } @@ -594,6 +602,7 @@ static void usage(void) "-a\t\tSort by memory allocate time.\n" "-r\t\tSort by memory release time.\n" "-f\t\tFilter out the information of blocks whose memory has been released.\n" + "-d\t\tPrint debug information.\n" "--pid \tSelect by pid. This selects the information of blocks whose process ID numbers appear in .\n" "--tgid \tSelect by tgid. This selects the information of blocks whose Thread Group ID numbers appear in .\n" "--name \n\t\tSelect by command name. This selects the information of blocks whose command name appears in .\n" @@ -618,11 +627,14 @@ int main(int argc, char **argv) { 0, 0, 0, 0}, }; - while ((opt = getopt_long(argc, argv, "afmnprstP", longopts, NULL)) != -1) + while ((opt = getopt_long(argc, argv, "adfmnprstP", longopts, NULL)) != -1) switch (opt) { case 'a': set_single_cmp(compare_ts, SORT_ASC); break; + case 'd': + debug_on = true; + break; case 'f': filter = filter | FILTER_UNRELEASE; break; -- cgit v1.2.3 From f09654bb88127473b4baf3bc0b68d4d4695aca7b Mon Sep 17 00:00:00 2001 From: Yixuan Cao Date: Thu, 28 Apr 2022 23:15:57 -0700 Subject: tools/vm/page_owner_sort.c: provide allocator labelling and update --cull and --sort options An application is suspected of having memory leak when its memory consumption is high and keeps increasing. There are several commonly used memory allocators: slab, cma, vmalloc, etc. The memory leak identification can be sped up if the page information allocated by an allocator can be analyzed separately. This patch provides supports for memory allocator labelling for slab, vmalloc, and cma. The pages allocated by slab and cma can be confirmed from the "PFN" line according to the kernel codes, and the label of the vmalloc allocator can be obtained by analyzing the stack trace. Thanks for Vlastimil Babka's constructive suggestions. Based on Yinan Zhang's study, the call chain of vmalloc() is vmalloc() -> ... -> __vmalloc_node_range() -> __vmalloc_area_node(). __vmalloc_area_node() requests memory through the interface of buddy allocation system. In the current version, __vmalloc_area_node() uses four interfaces: alloc_pages_bulk_array_mempolicy(), alloc_pages_bulk_array_node(), alloc_pages() and alloc_pages_node(). By disassembling the code, we find that __vmalloc_area_node() is expanded in __vmalloc_node_range(). So __vmalloc_area_node is not in the stack trace. On the test machine, the stack trace of pages allocated by vmalloc has the following four forms: __alloc_pages_bulk+0x230/0x6a0 __vmalloc_node_range+0x19c/0x598 alloc_pages_bulk_array_mempolicy+0xbc/0x278 __vmalloc_node_range+0x1e8/0x598 __alloc_pages+0x160/0x2b0 __vmalloc_node_range+0x234/0x598 alloc_pages+0xac/0x150 __vmalloc_node_range+0x44c/0x598 Therefore, in two consecutive lines of stacktrace, if the first line contains the word "alloc_pages" and the second line contains the word "__vmalloc_node_range", it can be determined that the page is allocated by vmalloc. And the function offset and size are not the same on different machines, so there is no need to match them. At the same time, this patch updates the --cull and --sort options to support allocator-based merge statistics and sorting. The added functions are fully compatible with the original work. When using, you can use "allocator", or abbreviated as "ator". Relevant updates have also been made in the documentation(Documentation/vm/page_owner.rst). Example: ./page_owner_sort --cull=st,pid,name,allocator ./page_owner_sort --sort=ator,pid,name This work is coauthored by Jiajian Ye, Yinan Zhang, Shenghong Han, Chongxi Zhao, Yuhong Feng and Yongqiang Liu. Link: https://lkml.kernel.org/r/20220410132932.9402-1-caoyixuan2019@email.szu.edu.cn Signed-off-by: Yixuan Cao Cc: Chongxi Zhao Cc: Haowen Bai Cc: Jiajian Ye Cc: Sean Anderson Cc: Shenghong Han Cc: Yinan Zhang Cc: Yongqiang Liu Cc: Yuhong Feng Signed-off-by: Andrew Morton --- Documentation/vm/page_owner.rst | 3 +- tools/vm/page_owner_sort.c | 112 ++++++++++++++++++++++++++++++++++------ 2 files changed, 99 insertions(+), 16 deletions(-) (limited to 'tools') diff --git a/Documentation/vm/page_owner.rst b/Documentation/vm/page_owner.rst index 523bf3419512..25622c715823 100644 --- a/Documentation/vm/page_owner.rst +++ b/Documentation/vm/page_owner.rst @@ -183,6 +183,7 @@ For --sort option: T txt full text of block ft free_ts timestamp of the page when it was released at alloc_ts timestamp of the page when it was allocated + ator allocator memory allocator for pages For --curl option: @@ -192,4 +193,4 @@ For --curl option: n name task command name f free whether the page has been released or not st stacktrace stack trace of the page allocation - + ator allocator memory allocator for pages diff --git a/tools/vm/page_owner_sort.c b/tools/vm/page_owner_sort.c index a32e446e5bb2..fa2e4d2a9d68 100644 --- a/tools/vm/page_owner_sort.c +++ b/tools/vm/page_owner_sort.c @@ -39,6 +39,7 @@ struct block_list { int page_num; pid_t pid; pid_t tgid; + int allocator; }; enum FILTER_BIT { FILTER_UNRELEASE = 1<<1, @@ -51,11 +52,19 @@ enum CULL_BIT { CULL_PID = 1<<2, CULL_TGID = 1<<3, CULL_COMM = 1<<4, - CULL_STACKTRACE = 1<<5 + CULL_STACKTRACE = 1<<5, + CULL_ALLOCATOR = 1<<6 +}; +enum ALLOCATOR_BIT { + ALLOCATOR_CMA = 1<<1, + ALLOCATOR_SLAB = 1<<2, + ALLOCATOR_VMALLOC = 1<<3, + ALLOCATOR_OTHERS = 1<<4 }; enum ARG_TYPE { ARG_TXT, ARG_COMM, ARG_STACKTRACE, ARG_ALLOC_TS, ARG_FREE_TS, - ARG_CULL_TIME, ARG_PAGE_NUM, ARG_PID, ARG_TGID, ARG_UNKNOWN, ARG_FREE + ARG_CULL_TIME, ARG_PAGE_NUM, ARG_PID, ARG_TGID, ARG_UNKNOWN, ARG_FREE, + ARG_ALLOCATOR }; enum SORT_ORDER { SORT_ASC = 1, @@ -89,15 +98,20 @@ static int cull; static int filter; static bool debug_on; -int read_block(char *buf, int buf_size, FILE *fin) +static void set_single_cmp(int (*cmp)(const void *, const void *), int sign); + +int read_block(char *buf, char *ext_buf, int buf_size, FILE *fin) { char *curr = buf, *const buf_end = buf + buf_size; while (buf_end - curr > 1 && fgets(curr, buf_end - curr, fin)) { - if (*curr == '\n') /* empty line */ + if (*curr == '\n') { /* empty line */ return curr - buf; - if (!strncmp(curr, "PFN", 3)) + } + if (!strncmp(curr, "PFN", 3)) { + strcpy(ext_buf, curr); continue; + } curr += strlen(curr); } @@ -146,6 +160,13 @@ static int compare_tgid(const void *p1, const void *p2) return l1->tgid - l2->tgid; } +static int compare_allocator(const void *p1, const void *p2) +{ + const struct block_list *l1 = p1, *l2 = p2; + + return l1->allocator - l2->allocator; +} + static int compare_comm(const void *p1, const void *p2) { const struct block_list *l1 = p1, *l2 = p2; @@ -192,6 +213,8 @@ static int compare_cull_condition(const void *p1, const void *p2) return compare_comm(p1, p2); if ((cull & CULL_UNRELEASE) && compare_release(p1, p2)) return compare_release(p1, p2); + if ((cull & CULL_ALLOCATOR) && compare_allocator(p1, p2)) + return compare_allocator(p1, p2); return 0; } @@ -395,11 +418,42 @@ static int get_arg_type(const char *arg) return ARG_FREE_TS; else if (!strcmp(arg, "alloc_ts") || !strcmp(arg, "at")) return ARG_ALLOC_TS; + else if (!strcmp(arg, "allocator") || !strcmp(arg, "ator")) + return ARG_ALLOCATOR; else { return ARG_UNKNOWN; } } +static int get_allocator(const char *buf, const char *migrate_info) +{ + char *tmp, *first_line, *second_line; + int allocator = 0; + + if (strstr(migrate_info, "CMA")) + allocator |= ALLOCATOR_CMA; + if (strstr(migrate_info, "slab")) + allocator |= ALLOCATOR_SLAB; + tmp = strstr(buf, "__vmalloc_node_range"); + if (tmp) { + second_line = tmp; + while (*tmp != '\n') + tmp--; + tmp--; + while (*tmp != '\n') + tmp--; + first_line = ++tmp; + tmp = strstr(tmp, "alloc_pages"); + if (tmp) { + if (tmp && first_line <= tmp && tmp < second_line) + allocator |= ALLOCATOR_VMALLOC; + } + } + if (allocator == 0) + allocator = ALLOCATOR_OTHERS; + return allocator; +} + static bool match_num_list(int num, int *list, int list_size) { for (int i = 0; i < list_size; ++i) @@ -437,7 +491,7 @@ static bool is_need(char *buf) return true; } -static void add_list(char *buf, int len) +static void add_list(char *buf, int len, char *ext_buf) { if (list_size != 0 && len == list[list_size-1].len && @@ -471,6 +525,7 @@ static void add_list(char *buf, int len) list[list_size].stacktrace++; list[list_size].ts_nsec = get_ts_nsec(buf); list[list_size].free_ts_nsec = get_free_ts_nsec(buf); + list[list_size].allocator = get_allocator(buf, ext_buf); list_size++; if (list_size % 1000 == 0) { printf("loaded %d\r", list_size); @@ -496,12 +551,16 @@ static bool parse_cull_args(const char *arg_str) cull |= CULL_STACKTRACE; else if (arg_type == ARG_FREE) cull |= CULL_UNRELEASE; + else if (arg_type == ARG_ALLOCATOR) + cull |= CULL_ALLOCATOR; else { free_explode(args, size); return false; } } free_explode(args, size); + if (sc.size == 0) + set_single_cmp(compare_num, SORT_DESC); return true; } @@ -556,6 +615,8 @@ static bool parse_sort_args(const char *arg_str) sc.cmps[i] = compare_free_ts; else if (arg_type == ARG_TXT) sc.cmps[i] = compare_txt; + else if (arg_type == ARG_ALLOCATOR) + sc.cmps[i] = compare_allocator; else { free_explode(args, size); sc.size = 0; @@ -588,6 +649,19 @@ static int *parse_nums_list(char *arg_str, int *list_size) return list; } +static void print_allocator(FILE *out, int allocator) +{ + fprintf(out, "allocated by "); + if (allocator & ALLOCATOR_CMA) + fprintf(out, "CMA "); + if (allocator & ALLOCATOR_SLAB) + fprintf(out, "SLAB "); + if (allocator & ALLOCATOR_VMALLOC) + fprintf(out, "VMALLOC "); + if (allocator & ALLOCATOR_OTHERS) + fprintf(out, "OTHERS "); +} + #define BUF_SIZE (128 * 1024) static void usage(void) @@ -614,8 +688,8 @@ static void usage(void) int main(int argc, char **argv) { FILE *fin, *fout; - char *buf; - int ret, i, count; + char *buf, *ext_buf; + int i, count; struct stat st; int opt; struct option longopts[] = { @@ -724,16 +798,18 @@ int main(int argc, char **argv) list = malloc(max_size * sizeof(*list)); buf = malloc(BUF_SIZE); - if (!list || !buf) { + ext_buf = malloc(BUF_SIZE); + if (!list || !buf || !ext_buf) { fprintf(stderr, "Out of memory\n"); exit(1); } for ( ; ; ) { - ret = read_block(buf, BUF_SIZE, fin); - if (ret < 0) + int buf_len = read_block(buf, ext_buf, BUF_SIZE, fin); + + if (buf_len < 0) break; - add_list(buf, ret); + add_list(buf, buf_len, ext_buf); } printf("loaded %d\n", list_size); @@ -757,9 +833,11 @@ int main(int argc, char **argv) qsort(list, count, sizeof(list[0]), compare_sort_condition); for (i = 0; i < count; i++) { - if (cull == 0) - fprintf(fout, "%d times, %d pages:\n%s\n", - list[i].num, list[i].page_num, list[i].txt); + if (cull == 0) { + fprintf(fout, "%d times, %d pages, ", list[i].num, list[i].page_num); + print_allocator(fout, list[i].allocator); + fprintf(fout, ":\n%s\n", list[i].txt); + } else { fprintf(fout, "%d times, %d pages", list[i].num, list[i].page_num); @@ -769,6 +847,10 @@ int main(int argc, char **argv) fprintf(fout, ", TGID %d", list[i].pid); if (cull & CULL_COMM || filter & FILTER_COMM) fprintf(fout, ", task_comm_name: %s", list[i].comm); + if (cull & CULL_ALLOCATOR) { + fprintf(fout, ", "); + print_allocator(fout, list[i].allocator); + } if (cull & CULL_UNRELEASE) fprintf(fout, " (%s)", list[i].free_ts_nsec ? "UNRELEASED" : "RELEASED"); -- cgit v1.2.3 From c7c4ab859642830a14c45785ca7866659b65fc44 Mon Sep 17 00:00:00 2001 From: Yixuan Cao Date: Thu, 28 Apr 2022 23:15:57 -0700 Subject: tools/vm/page_owner_sort.c: avoid repeated judgments I noticed a detail that needs to be adjusted. When judging whether a page is allocated by vmalloc, the value of the variable "tmp" was repeatedly judged, so the code was adjusted. This work is coauthored by Yinan Zhang, Jiajian Ye, Shenghong Han, Chongxi Zhao, Yuhong Feng and Yongqiang Liu. Link: https://lkml.kernel.org/r/20220414042744.13896-1-caoyixuan2019@email.szu.edu.cn Signed-off-by: Yixuan Cao Cc: Chongxi Zhao Cc: Haowen Bai Cc: Jiajian Ye Cc: Sean Anderson Cc: Shenghong Han Cc: Yinan Zhang Cc: Yongqiang Liu Cc: Yuhong Feng Signed-off-by: Andrew Morton --- tools/vm/page_owner_sort.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) (limited to 'tools') diff --git a/tools/vm/page_owner_sort.c b/tools/vm/page_owner_sort.c index fa2e4d2a9d68..c149427eb1c9 100644 --- a/tools/vm/page_owner_sort.c +++ b/tools/vm/page_owner_sort.c @@ -444,10 +444,8 @@ static int get_allocator(const char *buf, const char *migrate_info) tmp--; first_line = ++tmp; tmp = strstr(tmp, "alloc_pages"); - if (tmp) { - if (tmp && first_line <= tmp && tmp < second_line) - allocator |= ALLOCATOR_VMALLOC; - } + if (tmp && first_line <= tmp && tmp < second_line) + allocator |= ALLOCATOR_VMALLOC; } if (allocator == 0) allocator = ALLOCATOR_OTHERS; -- cgit v1.2.3 From c85bcc912f4f404bf6eaf4b6bdb8480ef2c2faa1 Mon Sep 17 00:00:00 2001 From: Roman Gushchin Date: Thu, 28 Apr 2022 23:15:59 -0700 Subject: kselftests: memcg: update the oom group leaf events test Patch series "mm: memcg kselftests fixes". This patch (of 4): Commit 9852ae3fe529 ("mm, memcg: consider subtrees in memory.events") made memory.events recursive: all events are propagated upwards by the tree. It was a change in semantics. It broke the oom group leaf events test: it assumes that after an OOM the oom_kill counter is zero on parent's level. Let's adjust the test: it should have similar expectations for the child and parent levels. The test passes after this fix. Link: https://lkml.kernel.org/r/20220415000133.3955987-2-roman.gushchin@linux.dev Link: https://lkml.kernel.org/r/20220415000133.3955987-1-roman.gushchin@linux.dev Signed-off-by: Roman Gushchin Reviewed-by: David Vernet Cc: Chris Down Cc: Johannes Weiner Cc: Michal Hocko Cc: Shakeel Butt Cc: Tejun Heo Cc: Zefan Li Signed-off-by: Andrew Morton --- tools/testing/selftests/cgroup/test_memcontrol.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/cgroup/test_memcontrol.c b/tools/testing/selftests/cgroup/test_memcontrol.c index 36ccf2322e21..00b430e7f2a2 100644 --- a/tools/testing/selftests/cgroup/test_memcontrol.c +++ b/tools/testing/selftests/cgroup/test_memcontrol.c @@ -1079,7 +1079,8 @@ cleanup: /* * This test disables swapping and tries to allocate anonymous memory * up to OOM with memory.group.oom set. Then it checks that all - * processes in the leaf (but not the parent) were killed. + * processes in the leaf were killed. It also checks that oom_events + * were propagated to the parent level. */ static int test_memcg_oom_group_leaf_events(const char *root) { @@ -1122,7 +1123,7 @@ static int test_memcg_oom_group_leaf_events(const char *root) if (cg_read_key_long(child, "memory.events", "oom_kill ") <= 0) goto cleanup; - if (cg_read_key_long(parent, "memory.events", "oom_kill ") != 0) + if (cg_read_key_long(parent, "memory.events", "oom_kill ") <= 0) goto cleanup; ret = KSFT_PASS; -- cgit v1.2.3 From be74553f250fb2154375b8e14e9f9b58aafd23b0 Mon Sep 17 00:00:00 2001 From: Roman Gushchin Date: Thu, 28 Apr 2022 23:15:59 -0700 Subject: kselftests: memcg: speed up the memory.high test After commit 0e4b01df8659 ("mm, memcg: throttle allocators when failing reclaim over memory.high") allocating memory over memory.high became very time consuming. But it's exactly what the memory.high test from cgroup kselftests is doing: it tries to allocate 100M with 30M memory.high value. It takes forever to complete. In order to keep it passing (or failing) in a reasonable amount of time let's try to allocate only a little over 30M: 31M to be precise. With this change test_memcontrol finishes in a reasonable amount of time: $ time ./test_memcontrol ok 1 test_memcg_subtree_control ok 2 test_memcg_current ok 3 test_memcg_min ok 4 test_memcg_low ok 5 test_memcg_high ok 6 test_memcg_max ok 7 test_memcg_oom_events ok 8 test_memcg_swap_max ok 9 test_memcg_sock ok 10 test_memcg_oom_group_leaf_events ok 11 test_memcg_oom_group_parent_events ok 12 test_memcg_oom_group_score_events real 0m2.273s user 0m0.064s sys 0m0.739s Link: https://lkml.kernel.org/r/20220415000133.3955987-3-roman.gushchin@linux.dev Signed-off-by: Roman Gushchin Reviewed-by: David Vernet Cc: Chris Down Cc: Johannes Weiner Cc: Michal Hocko Cc: Shakeel Butt Cc: Tejun Heo Cc: Zefan Li Signed-off-by: Andrew Morton --- tools/testing/selftests/cgroup/test_memcontrol.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/testing/selftests/cgroup/test_memcontrol.c b/tools/testing/selftests/cgroup/test_memcontrol.c index 00b430e7f2a2..9c1f19fe2e37 100644 --- a/tools/testing/selftests/cgroup/test_memcontrol.c +++ b/tools/testing/selftests/cgroup/test_memcontrol.c @@ -607,7 +607,7 @@ static int test_memcg_high(const char *root) if (cg_write(memcg, "memory.high", "30M")) goto cleanup; - if (cg_run(memcg, alloc_anon, (void *)MB(100))) + if (cg_run(memcg, alloc_anon, (void *)MB(31))) goto cleanup; if (!cg_run(memcg, alloc_pagecache_50M_check, NULL)) -- cgit v1.2.3 From 0c2d08728470b93a3f05416f53222f38a89868e2 Mon Sep 17 00:00:00 2001 From: Alistair Popple Date: Thu, 28 Apr 2022 23:16:07 -0700 Subject: mm: add selftests for migration entries Add some basic migration tests and in particular tests that will stress both the pte and pmd migration entry wait paths. Link: https://lkml.kernel.org/r/20220324014349.229253-1-apopple@nvidia.com Signed-off-by: Alistair Popple Cc: Hugh Dickins Cc: Jan Kara Cc: "Kirill A. Shutemov" Cc: Matthew Wilcox (Oracle) Cc: Ralph Campbell Cc: Muchun Song Cc: John Hubbard Cc: Thomas Gleixner Cc: Sebastian Andrzej Siewior Cc: Shuah Khan Signed-off-by: Andrew Morton --- tools/testing/selftests/vm/Makefile | 3 + tools/testing/selftests/vm/migration.c | 193 +++++++++++++++++++++++++++++++++ 2 files changed, 196 insertions(+) create mode 100644 tools/testing/selftests/vm/migration.c (limited to 'tools') diff --git a/tools/testing/selftests/vm/Makefile b/tools/testing/selftests/vm/Makefile index 04a49e876a46..ff0c7a87785b 100644 --- a/tools/testing/selftests/vm/Makefile +++ b/tools/testing/selftests/vm/Makefile @@ -41,6 +41,7 @@ TEST_GEN_FILES += map_fixed_noreplace TEST_GEN_FILES += map_hugetlb TEST_GEN_FILES += map_populate TEST_GEN_FILES += memfd_secret +TEST_GEN_FILES += migration TEST_GEN_FILES += mlock-random-test TEST_GEN_FILES += mlock2-tests TEST_GEN_FILES += mremap_dontunmap @@ -149,6 +150,8 @@ $(OUTPUT)/hmm-tests: LDLIBS += $(HMM_EXTRA_LIBS) $(OUTPUT)/ksm_tests: LDLIBS += -lnuma +$(OUTPUT)/migration: LDLIBS += -lnuma + local_config.mk local_config.h: check_config.sh /bin/sh ./check_config.sh $(CC) diff --git a/tools/testing/selftests/vm/migration.c b/tools/testing/selftests/vm/migration.c new file mode 100644 index 000000000000..1cec8425e3ca --- /dev/null +++ b/tools/testing/selftests/vm/migration.c @@ -0,0 +1,193 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * The main purpose of the tests here is to exercise the migration entry code + * paths in the kernel. + */ + +#include "../kselftest_harness.h" +#include +#include +#include +#include +#include +#include +#include +#include + +#define TWOMEG (2<<20) +#define RUNTIME (60) + +#define ALIGN(x, a) (((x) + (a - 1)) & (~((a) - 1))) + +FIXTURE(migration) +{ + pthread_t *threads; + pid_t *pids; + int nthreads; + int n1; + int n2; +}; + +FIXTURE_SETUP(migration) +{ + int n; + + ASSERT_EQ(numa_available(), 0); + self->nthreads = numa_num_task_cpus() - 1; + self->n1 = -1; + self->n2 = -1; + + for (n = 0; n < numa_max_possible_node(); n++) + if (numa_bitmask_isbitset(numa_all_nodes_ptr, n)) { + if (self->n1 == -1) { + self->n1 = n; + } else { + self->n2 = n; + break; + } + } + + self->threads = malloc(self->nthreads * sizeof(*self->threads)); + ASSERT_NE(self->threads, NULL); + self->pids = malloc(self->nthreads * sizeof(*self->pids)); + ASSERT_NE(self->pids, NULL); +}; + +FIXTURE_TEARDOWN(migration) +{ + free(self->threads); + free(self->pids); +} + +int migrate(uint64_t *ptr, int n1, int n2) +{ + int ret, tmp; + int status = 0; + struct timespec ts1, ts2; + + if (clock_gettime(CLOCK_MONOTONIC, &ts1)) + return -1; + + while (1) { + if (clock_gettime(CLOCK_MONOTONIC, &ts2)) + return -1; + + if (ts2.tv_sec - ts1.tv_sec >= RUNTIME) + return 0; + + ret = move_pages(0, 1, (void **) &ptr, &n2, &status, + MPOL_MF_MOVE_ALL); + if (ret) { + if (ret > 0) + printf("Didn't migrate %d pages\n", ret); + else + perror("Couldn't migrate pages"); + return -2; + } + + tmp = n2; + n2 = n1; + n1 = tmp; + } + + return 0; +} + +void *access_mem(void *ptr) +{ + uint64_t y = 0; + volatile uint64_t *x = ptr; + + while (1) { + pthread_testcancel(); + y += *x; + } + + return NULL; +} + +/* + * Basic migration entry testing. One thread will move pages back and forth + * between nodes whilst other threads try and access them triggering the + * migration entry wait paths in the kernel. + */ +TEST_F_TIMEOUT(migration, private_anon, 2*RUNTIME) +{ + uint64_t *ptr; + int i; + + if (self->nthreads < 2 || self->n1 < 0 || self->n2 < 0) + SKIP(return, "Not enough threads or NUMA nodes available"); + + ptr = mmap(NULL, TWOMEG, PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); + ASSERT_NE(ptr, MAP_FAILED); + + memset(ptr, 0xde, TWOMEG); + for (i = 0; i < self->nthreads - 1; i++) + if (pthread_create(&self->threads[i], NULL, access_mem, ptr)) + perror("Couldn't create thread"); + + ASSERT_EQ(migrate(ptr, self->n1, self->n2), 0); + for (i = 0; i < self->nthreads - 1; i++) + ASSERT_EQ(pthread_cancel(self->threads[i]), 0); +} + +/* + * Same as the previous test but with shared memory. + */ +TEST_F_TIMEOUT(migration, shared_anon, 2*RUNTIME) +{ + pid_t pid; + uint64_t *ptr; + int i; + + if (self->nthreads < 2 || self->n1 < 0 || self->n2 < 0) + SKIP(return, "Not enough threads or NUMA nodes available"); + + ptr = mmap(NULL, TWOMEG, PROT_READ | PROT_WRITE, + MAP_SHARED | MAP_ANONYMOUS, -1, 0); + ASSERT_NE(ptr, MAP_FAILED); + + memset(ptr, 0xde, TWOMEG); + for (i = 0; i < self->nthreads - 1; i++) { + pid = fork(); + if (!pid) + access_mem(ptr); + else + self->pids[i] = pid; + } + + ASSERT_EQ(migrate(ptr, self->n1, self->n2), 0); + for (i = 0; i < self->nthreads - 1; i++) + ASSERT_EQ(kill(self->pids[i], SIGTERM), 0); +} + +/* + * Tests the pmd migration entry paths. + */ +TEST_F_TIMEOUT(migration, private_anon_thp, 2*RUNTIME) +{ + uint64_t *ptr; + int i; + + if (self->nthreads < 2 || self->n1 < 0 || self->n2 < 0) + SKIP(return, "Not enough threads or NUMA nodes available"); + + ptr = mmap(NULL, 2*TWOMEG, PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); + ASSERT_NE(ptr, MAP_FAILED); + + ptr = (uint64_t *) ALIGN((uintptr_t) ptr, TWOMEG); + ASSERT_EQ(madvise(ptr, TWOMEG, MADV_HUGEPAGE), 0); + memset(ptr, 0xde, TWOMEG); + for (i = 0; i < self->nthreads - 1; i++) + if (pthread_create(&self->threads[i], NULL, access_mem, ptr)) + perror("Couldn't create thread"); + + ASSERT_EQ(migrate(ptr, self->n1, self->n2), 0); + for (i = 0; i < self->nthreads - 1; i++) + ASSERT_EQ(pthread_cancel(self->threads[i]), 0); +} + +TEST_HARNESS_MAIN -- cgit v1.2.3 From 62e80f2b5072ed80a41fc6a272e44e8e17fdcf66 Mon Sep 17 00:00:00 2001 From: Sidhartha Kumar Date: Thu, 28 Apr 2022 23:16:10 -0700 Subject: tools/testing/selftests/vm/gup_test.c: clarify error statement Print three possible reasons /sys/kernel/debug/gup_test cannot be opened to help users of this test diagnose failures. Link: https://lkml.kernel.org/r/20220405214809.3351223-1-sidhartha.kumar@oracle.com Signed-off-by: Sidhartha Kumar Reviewed-by: Shuah Khan Signed-off-by: Andrew Morton --- tools/testing/selftests/vm/gup_test.c | 22 +++++++++++++++++++-- tools/testing/selftests/vm/run_vmtests.sh | 33 ++++++++++++++++++++++--------- 2 files changed, 44 insertions(+), 11 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/vm/gup_test.c b/tools/testing/selftests/vm/gup_test.c index cda837a14736..593262555e18 100644 --- a/tools/testing/selftests/vm/gup_test.c +++ b/tools/testing/selftests/vm/gup_test.c @@ -1,7 +1,9 @@ #include +#include #include #include #include +#include #include #include #include @@ -9,6 +11,7 @@ #include #include #include "../../../../mm/gup_test.h" +#include "../kselftest.h" #include "util.h" @@ -206,8 +209,23 @@ int main(int argc, char **argv) gup_fd = open("/sys/kernel/debug/gup_test", O_RDWR); if (gup_fd == -1) { - perror("open"); - exit(1); + switch (errno) { + case EACCES: + if (getuid()) + printf("Please run this test as root\n"); + break; + case ENOENT: + if (opendir("/sys/kernel/debug") == NULL) { + printf("mount debugfs at /sys/kernel/debug\n"); + break; + } + printf("check if CONFIG_GUP_TEST is enabled in kernel config\n"); + break; + default: + perror("failed to open /sys/kernel/debug/gup_test"); + break; + } + exit(KSFT_SKIP); } p = mmap(NULL, size, PROT_READ | PROT_WRITE, flags, filed, 0); diff --git a/tools/testing/selftests/vm/run_vmtests.sh b/tools/testing/selftests/vm/run_vmtests.sh index 352ba00cf26b..8865ff365cc6 100755 --- a/tools/testing/selftests/vm/run_vmtests.sh +++ b/tools/testing/selftests/vm/run_vmtests.sh @@ -162,22 +162,32 @@ echo "------------------------------------------------------" echo "running: gup_test -u # get_user_pages_fast() benchmark" echo "------------------------------------------------------" ./gup_test -u -if [ $? -ne 0 ]; then +ret_val=$? + +if [ $ret_val -eq 0 ]; then + echo "[PASS]" +elif [ $ret_val -eq $ksft_skip ]; then + echo "[SKIP]" + exitcode=$ksft_skip +else echo "[FAIL]" exitcode=1 -else - echo "[PASS]" fi echo "------------------------------------------------------" echo "running: gup_test -a # pin_user_pages_fast() benchmark" echo "------------------------------------------------------" ./gup_test -a -if [ $? -ne 0 ]; then +ret_val=$? + +if [ $ret_val -eq 0 ]; then + echo "[PASS]" +elif [ $ret_val -eq $ksft_skip ]; then + echo "[SKIP]" + exitcode=$ksft_skip +else echo "[FAIL]" exitcode=1 -else - echo "[PASS]" fi echo "------------------------------------------------------------" @@ -185,11 +195,16 @@ echo "# Dump pages 0, 19, and 4096, using pin_user_pages:" echo "running: gup_test -ct -F 0x1 0 19 0x1000 # dump_page() test" echo "------------------------------------------------------------" ./gup_test -ct -F 0x1 0 19 0x1000 -if [ $? -ne 0 ]; then +ret_val=$? + +if [ $ret_val -eq 0 ]; then + echo "[PASS]" +elif [ $ret_val -eq $ksft_skip ]; then + echo "[SKIP]" + exitcode=$ksft_skip +else echo "[FAIL]" exitcode=1 -else - echo "[PASS]" fi echo "-------------------" -- cgit v1.2.3 From 642bc52aed9c99e8c9c9cfb6781f77719717a36c Mon Sep 17 00:00:00 2001 From: Muhammad Usama Anjum Date: Thu, 28 Apr 2022 23:16:11 -0700 Subject: selftests: vm: bring common functions to a new file Bring common functions to a new file while keeping code as much same as possible. These functions can be used in the new tests. This helps in avoiding code duplication. Link: https://lkml.kernel.org/r/20220420084036.4101604-1-usama.anjum@collabora.com Signed-off-by: Muhammad Usama Anjum Acked-by: David Hildenbrand Cc: Gabriel Krisman Bertazi Cc: Shuah Khan Cc: Will Deacon Signed-off-by: Andrew Morton --- tools/testing/selftests/vm/Makefile | 7 +- tools/testing/selftests/vm/madv_populate.c | 34 +------ tools/testing/selftests/vm/split_huge_page_test.c | 79 +--------------- tools/testing/selftests/vm/vm_util.c | 108 ++++++++++++++++++++++ tools/testing/selftests/vm/vm_util.h | 9 ++ 5 files changed, 124 insertions(+), 113 deletions(-) create mode 100644 tools/testing/selftests/vm/vm_util.c create mode 100644 tools/testing/selftests/vm/vm_util.h (limited to 'tools') diff --git a/tools/testing/selftests/vm/Makefile b/tools/testing/selftests/vm/Makefile index ff0c7a87785b..6fd967839ccd 100644 --- a/tools/testing/selftests/vm/Makefile +++ b/tools/testing/selftests/vm/Makefile @@ -36,7 +36,7 @@ TEST_GEN_FILES += hugepage-mremap TEST_GEN_FILES += hugepage-shm TEST_GEN_FILES += hugepage-vmemmap TEST_GEN_FILES += khugepaged -TEST_GEN_FILES += madv_populate +TEST_GEN_PROGS = madv_populate TEST_GEN_FILES += map_fixed_noreplace TEST_GEN_FILES += map_hugetlb TEST_GEN_FILES += map_populate @@ -50,7 +50,7 @@ TEST_GEN_FILES += on-fault-limit TEST_GEN_FILES += thuge-gen TEST_GEN_FILES += transhuge-stress TEST_GEN_FILES += userfaultfd -TEST_GEN_FILES += split_huge_page_test +TEST_GEN_PROGS += split_huge_page_test TEST_GEN_FILES += ksm_tests ifeq ($(MACHINE),x86_64) @@ -94,6 +94,9 @@ TEST_FILES := test_vmalloc.sh KSFT_KHDR_INSTALL := 1 include ../lib.mk +$(OUTPUT)/madv_populate: vm_util.c +$(OUTPUT)/split_huge_page_test: vm_util.c + ifeq ($(MACHINE),x86_64) BINARIES_32 := $(patsubst %,$(OUTPUT)/%,$(BINARIES_32)) BINARIES_64 := $(patsubst %,$(OUTPUT)/%,$(BINARIES_64)) diff --git a/tools/testing/selftests/vm/madv_populate.c b/tools/testing/selftests/vm/madv_populate.c index 3ee0e8275600..715a42e8e2cd 100644 --- a/tools/testing/selftests/vm/madv_populate.c +++ b/tools/testing/selftests/vm/madv_populate.c @@ -18,6 +18,7 @@ #include #include "../kselftest.h" +#include "vm_util.h" /* * For now, we're using 2 MiB of private anonymous memory for all tests. @@ -26,18 +27,6 @@ static size_t pagesize; -static uint64_t pagemap_get_entry(int fd, char *start) -{ - const unsigned long pfn = (unsigned long)start / pagesize; - uint64_t entry; - int ret; - - ret = pread(fd, &entry, sizeof(entry), pfn * sizeof(entry)); - if (ret != sizeof(entry)) - ksft_exit_fail_msg("reading pagemap failed\n"); - return entry; -} - static bool pagemap_is_populated(int fd, char *start) { uint64_t entry = pagemap_get_entry(fd, start); @@ -46,13 +35,6 @@ static bool pagemap_is_populated(int fd, char *start) return entry & 0xc000000000000000ull; } -static bool pagemap_is_softdirty(int fd, char *start) -{ - uint64_t entry = pagemap_get_entry(fd, start); - - return entry & 0x0080000000000000ull; -} - static void sense_support(void) { char *addr; @@ -258,20 +240,6 @@ static bool range_is_not_softdirty(char *start, ssize_t size) return ret; } -static void clear_softdirty(void) -{ - int fd = open("/proc/self/clear_refs", O_WRONLY); - const char *ctrl = "4"; - int ret; - - if (fd < 0) - ksft_exit_fail_msg("opening clear_refs failed\n"); - ret = write(fd, ctrl, strlen(ctrl)); - if (ret != strlen(ctrl)) - ksft_exit_fail_msg("writing clear_refs failed\n"); - close(fd); -} - static void test_softdirty(void) { char *addr; diff --git a/tools/testing/selftests/vm/split_huge_page_test.c b/tools/testing/selftests/vm/split_huge_page_test.c index 52497b7b9f1d..6aa2b8253aed 100644 --- a/tools/testing/selftests/vm/split_huge_page_test.c +++ b/tools/testing/selftests/vm/split_huge_page_test.c @@ -16,14 +16,13 @@ #include #include #include +#include "vm_util.h" uint64_t pagesize; unsigned int pageshift; uint64_t pmd_pagesize; -#define PMD_SIZE_PATH "/sys/kernel/mm/transparent_hugepage/hpage_pmd_size" #define SPLIT_DEBUGFS "/sys/kernel/debug/split_huge_pages" -#define SMAP_PATH "/proc/self/smaps" #define INPUT_MAX 80 #define PID_FMT "%d,0x%lx,0x%lx" @@ -51,30 +50,6 @@ int is_backed_by_thp(char *vaddr, int pagemap_file, int kpageflags_file) return 0; } - -static uint64_t read_pmd_pagesize(void) -{ - int fd; - char buf[20]; - ssize_t num_read; - - fd = open(PMD_SIZE_PATH, O_RDONLY); - if (fd == -1) { - perror("Open hpage_pmd_size failed"); - exit(EXIT_FAILURE); - } - num_read = read(fd, buf, 19); - if (num_read < 1) { - close(fd); - perror("Read hpage_pmd_size failed"); - exit(EXIT_FAILURE); - } - buf[num_read] = '\0'; - close(fd); - - return strtoul(buf, NULL, 10); -} - static int write_file(const char *path, const char *buf, size_t buflen) { int fd; @@ -113,58 +88,6 @@ static void write_debugfs(const char *fmt, ...) } } -#define MAX_LINE_LENGTH 500 - -static bool check_for_pattern(FILE *fp, const char *pattern, char *buf) -{ - while (fgets(buf, MAX_LINE_LENGTH, fp) != NULL) { - if (!strncmp(buf, pattern, strlen(pattern))) - return true; - } - return false; -} - -static uint64_t check_huge(void *addr) -{ - uint64_t thp = 0; - int ret; - FILE *fp; - char buffer[MAX_LINE_LENGTH]; - char addr_pattern[MAX_LINE_LENGTH]; - - ret = snprintf(addr_pattern, MAX_LINE_LENGTH, "%08lx-", - (unsigned long) addr); - if (ret >= MAX_LINE_LENGTH) { - printf("%s: Pattern is too long\n", __func__); - exit(EXIT_FAILURE); - } - - - fp = fopen(SMAP_PATH, "r"); - if (!fp) { - printf("%s: Failed to open file %s\n", __func__, SMAP_PATH); - exit(EXIT_FAILURE); - } - if (!check_for_pattern(fp, addr_pattern, buffer)) - goto err_out; - - /* - * Fetch the AnonHugePages: in the same block and check the number of - * hugepages. - */ - if (!check_for_pattern(fp, "AnonHugePages:", buffer)) - goto err_out; - - if (sscanf(buffer, "AnonHugePages:%10ld kB", &thp) != 1) { - printf("Reading smap error\n"); - exit(EXIT_FAILURE); - } - -err_out: - fclose(fp); - return thp; -} - void split_pmd_thp(void) { char *one_page; diff --git a/tools/testing/selftests/vm/vm_util.c b/tools/testing/selftests/vm/vm_util.c new file mode 100644 index 000000000000..b58ab11a7a30 --- /dev/null +++ b/tools/testing/selftests/vm/vm_util.c @@ -0,0 +1,108 @@ +// SPDX-License-Identifier: GPL-2.0 +#include +#include +#include "../kselftest.h" +#include "vm_util.h" + +#define PMD_SIZE_FILE_PATH "/sys/kernel/mm/transparent_hugepage/hpage_pmd_size" +#define SMAP_FILE_PATH "/proc/self/smaps" +#define MAX_LINE_LENGTH 500 + +uint64_t pagemap_get_entry(int fd, char *start) +{ + const unsigned long pfn = (unsigned long)start / getpagesize(); + uint64_t entry; + int ret; + + ret = pread(fd, &entry, sizeof(entry), pfn * sizeof(entry)); + if (ret != sizeof(entry)) + ksft_exit_fail_msg("reading pagemap failed\n"); + return entry; +} + +bool pagemap_is_softdirty(int fd, char *start) +{ + uint64_t entry = pagemap_get_entry(fd, start); + + // Check if dirty bit (55th bit) is set + return entry & 0x0080000000000000ull; +} + +void clear_softdirty(void) +{ + int ret; + const char *ctrl = "4"; + int fd = open("/proc/self/clear_refs", O_WRONLY); + + if (fd < 0) + ksft_exit_fail_msg("opening clear_refs failed\n"); + ret = write(fd, ctrl, strlen(ctrl)); + close(fd); + if (ret != strlen(ctrl)) + ksft_exit_fail_msg("writing clear_refs failed\n"); +} + +static bool check_for_pattern(FILE *fp, const char *pattern, char *buf) +{ + while (fgets(buf, MAX_LINE_LENGTH, fp) != NULL) { + if (!strncmp(buf, pattern, strlen(pattern))) + return true; + } + return false; +} + +uint64_t read_pmd_pagesize(void) +{ + int fd; + char buf[20]; + ssize_t num_read; + + fd = open(PMD_SIZE_FILE_PATH, O_RDONLY); + if (fd == -1) + ksft_exit_fail_msg("Open hpage_pmd_size failed\n"); + + num_read = read(fd, buf, 19); + if (num_read < 1) { + close(fd); + ksft_exit_fail_msg("Read hpage_pmd_size failed\n"); + } + buf[num_read] = '\0'; + close(fd); + + return strtoul(buf, NULL, 10); +} + +uint64_t check_huge(void *addr) +{ + uint64_t thp = 0; + int ret; + FILE *fp; + char buffer[MAX_LINE_LENGTH]; + char addr_pattern[MAX_LINE_LENGTH]; + + ret = snprintf(addr_pattern, MAX_LINE_LENGTH, "%08lx-", + (unsigned long) addr); + if (ret >= MAX_LINE_LENGTH) + ksft_exit_fail_msg("%s: Pattern is too long\n", __func__); + + fp = fopen(SMAP_FILE_PATH, "r"); + if (!fp) + ksft_exit_fail_msg("%s: Failed to open file %s\n", __func__, SMAP_FILE_PATH); + + if (!check_for_pattern(fp, addr_pattern, buffer)) + goto err_out; + + /* + * Fetch the AnonHugePages: in the same block and check the number of + * hugepages. + */ + if (!check_for_pattern(fp, "AnonHugePages:", buffer)) + goto err_out; + + if (sscanf(buffer, "AnonHugePages:%10ld kB", &thp) != 1) + ksft_exit_fail_msg("Reading smap error\n"); + +err_out: + fclose(fp); + return thp; +} diff --git a/tools/testing/selftests/vm/vm_util.h b/tools/testing/selftests/vm/vm_util.h new file mode 100644 index 000000000000..2e512bd57ae1 --- /dev/null +++ b/tools/testing/selftests/vm/vm_util.h @@ -0,0 +1,9 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#include +#include + +uint64_t pagemap_get_entry(int fd, char *start); +bool pagemap_is_softdirty(int fd, char *start); +void clear_softdirty(void); +uint64_t read_pmd_pagesize(void); +uint64_t check_huge(void *addr); -- cgit v1.2.3 From 9f3265db6ae87de27a5e382410b8eb9af53b161e Mon Sep 17 00:00:00 2001 From: Gabriel Krisman Bertazi Date: Thu, 28 Apr 2022 23:16:11 -0700 Subject: selftests: vm: add test for Soft-Dirty PTE bit This introduces three tests: 1) Sanity check soft dirty basic semantics: allocate area, clean, dirty, check if the SD bit is flipped. 2) Check VMA reuse: validate the VM_SOFTDIRTY usage 3) Check soft-dirty on huge pages This was motivated by Will Deacon's fix commit 912efa17e512 ("mm: proc: Invalidate TLB after clearing soft-dirty page state"). I was tracking the same issue that he fixed, and this test would have caught it. Link: https://lkml.kernel.org/r/20220420084036.4101604-2-usama.anjum@collabora.com Signed-off-by: Gabriel Krisman Bertazi Signed-off-by: Muhammad Usama Anjum Co-developed-by: Muhammad Usama Anjum Cc: Will Deacon Cc: Shuah Khan Signed-off-by: Andrew Morton --- tools/testing/selftests/vm/.gitignore | 1 + tools/testing/selftests/vm/Makefile | 2 + tools/testing/selftests/vm/config | 2 + tools/testing/selftests/vm/soft-dirty.c | 145 ++++++++++++++++++++++++++++++++ 4 files changed, 150 insertions(+) create mode 100644 tools/testing/selftests/vm/soft-dirty.c (limited to 'tools') diff --git a/tools/testing/selftests/vm/.gitignore b/tools/testing/selftests/vm/.gitignore index d7507f3c7c76..3cb4fa771ec2 100644 --- a/tools/testing/selftests/vm/.gitignore +++ b/tools/testing/selftests/vm/.gitignore @@ -29,5 +29,6 @@ write_to_hugetlbfs hmm-tests memfd_secret local_config.* +soft-dirty split_huge_page_test ksm_tests diff --git a/tools/testing/selftests/vm/Makefile b/tools/testing/selftests/vm/Makefile index 6fd967839ccd..f1228370e99b 100644 --- a/tools/testing/selftests/vm/Makefile +++ b/tools/testing/selftests/vm/Makefile @@ -50,6 +50,7 @@ TEST_GEN_FILES += on-fault-limit TEST_GEN_FILES += thuge-gen TEST_GEN_FILES += transhuge-stress TEST_GEN_FILES += userfaultfd +TEST_GEN_PROGS += soft-dirty TEST_GEN_PROGS += split_huge_page_test TEST_GEN_FILES += ksm_tests @@ -95,6 +96,7 @@ KSFT_KHDR_INSTALL := 1 include ../lib.mk $(OUTPUT)/madv_populate: vm_util.c +$(OUTPUT)/soft-dirty: vm_util.c $(OUTPUT)/split_huge_page_test: vm_util.c ifeq ($(MACHINE),x86_64) diff --git a/tools/testing/selftests/vm/config b/tools/testing/selftests/vm/config index 60e82da0de85..be087c4bc396 100644 --- a/tools/testing/selftests/vm/config +++ b/tools/testing/selftests/vm/config @@ -4,3 +4,5 @@ CONFIG_TEST_VMALLOC=m CONFIG_DEVICE_PRIVATE=y CONFIG_TEST_HMM=m CONFIG_GUP_TEST=y +CONFIG_TRANSPARENT_HUGEPAGE=y +CONFIG_MEM_SOFT_DIRTY=y diff --git a/tools/testing/selftests/vm/soft-dirty.c b/tools/testing/selftests/vm/soft-dirty.c new file mode 100644 index 000000000000..08ab62a4a9d0 --- /dev/null +++ b/tools/testing/selftests/vm/soft-dirty.c @@ -0,0 +1,145 @@ +// SPDX-License-Identifier: GPL-2.0 +#include +#include +#include +#include +#include +#include +#include +#include "../kselftest.h" +#include "vm_util.h" + +#define PAGEMAP_FILE_PATH "/proc/self/pagemap" +#define TEST_ITERATIONS 10000 + +static void test_simple(int pagemap_fd, int pagesize) +{ + int i; + char *map; + + map = aligned_alloc(pagesize, pagesize); + if (!map) + ksft_exit_fail_msg("mmap failed\n"); + + clear_softdirty(); + + for (i = 0 ; i < TEST_ITERATIONS; i++) { + if (pagemap_is_softdirty(pagemap_fd, map) == 1) { + ksft_print_msg("dirty bit was 1, but should be 0 (i=%d)\n", i); + break; + } + + clear_softdirty(); + // Write something to the page to get the dirty bit enabled on the page + map[0]++; + + if (pagemap_is_softdirty(pagemap_fd, map) == 0) { + ksft_print_msg("dirty bit was 0, but should be 1 (i=%d)\n", i); + break; + } + + clear_softdirty(); + } + free(map); + + ksft_test_result(i == TEST_ITERATIONS, "Test %s\n", __func__); +} + +static void test_vma_reuse(int pagemap_fd, int pagesize) +{ + char *map, *map2; + + map = mmap(NULL, pagesize, (PROT_READ | PROT_WRITE), (MAP_PRIVATE | MAP_ANON), -1, 0); + if (map == MAP_FAILED) + ksft_exit_fail_msg("mmap failed"); + + // The kernel always marks new regions as soft dirty + ksft_test_result(pagemap_is_softdirty(pagemap_fd, map) == 1, + "Test %s dirty bit of allocated page\n", __func__); + + clear_softdirty(); + munmap(map, pagesize); + + map2 = mmap(NULL, pagesize, (PROT_READ | PROT_WRITE), (MAP_PRIVATE | MAP_ANON), -1, 0); + if (map2 == MAP_FAILED) + ksft_exit_fail_msg("mmap failed"); + + // Dirty bit is set for new regions even if they are reused + if (map == map2) + ksft_test_result(pagemap_is_softdirty(pagemap_fd, map2) == 1, + "Test %s dirty bit of reused address page\n", __func__); + else + ksft_test_result_skip("Test %s dirty bit of reused address page\n", __func__); + + munmap(map2, pagesize); +} + +static void test_hugepage(int pagemap_fd, int pagesize) +{ + char *map; + int i, ret; + size_t hpage_len = read_pmd_pagesize(); + + map = memalign(hpage_len, hpage_len); + if (!map) + ksft_exit_fail_msg("memalign failed\n"); + + ret = madvise(map, hpage_len, MADV_HUGEPAGE); + if (ret) + ksft_exit_fail_msg("madvise failed %d\n", ret); + + for (i = 0; i < hpage_len; i++) + map[i] = (char)i; + + if (check_huge(map)) { + ksft_test_result_pass("Test %s huge page allocation\n", __func__); + + clear_softdirty(); + for (i = 0 ; i < TEST_ITERATIONS ; i++) { + if (pagemap_is_softdirty(pagemap_fd, map) == 1) { + ksft_print_msg("dirty bit was 1, but should be 0 (i=%d)\n", i); + break; + } + + clear_softdirty(); + // Write something to the page to get the dirty bit enabled on the page + map[0]++; + + if (pagemap_is_softdirty(pagemap_fd, map) == 0) { + ksft_print_msg("dirty bit was 0, but should be 1 (i=%d)\n", i); + break; + } + clear_softdirty(); + } + + ksft_test_result(i == TEST_ITERATIONS, "Test %s huge page dirty bit\n", __func__); + } else { + // hugepage allocation failed. skip these tests + ksft_test_result_skip("Test %s huge page allocation\n", __func__); + ksft_test_result_skip("Test %s huge page dirty bit\n", __func__); + } + free(map); +} + +int main(int argc, char **argv) +{ + int pagemap_fd; + int pagesize; + + ksft_print_header(); + ksft_set_plan(5); + + pagemap_fd = open(PAGEMAP_FILE_PATH, O_RDONLY); + if (pagemap_fd < 0) + ksft_exit_fail_msg("Failed to open %s\n", PAGEMAP_FILE_PATH); + + pagesize = getpagesize(); + + test_simple(pagemap_fd, pagesize); + test_vma_reuse(pagemap_fd, pagesize); + test_hugepage(pagemap_fd, pagesize); + + close(pagemap_fd); + + return ksft_exit_pass(); +} -- cgit v1.2.3 From b67bd551201a3e2c7e1def84980e9b2f0b3a3c77 Mon Sep 17 00:00:00 2001 From: Axel Rasmussen Date: Thu, 28 Apr 2022 23:16:11 -0700 Subject: selftests: vm: refactor run_vmtests.sh to reduce boilerplate Previously, each test printed out its own header, dealt with its own return code, etc. By just putting this standard stuff in a function, we can delete > 300 lines from the script. This also makes adding future tests easier. And, it gets rid of various inconsistencies that already exist: - Some tests correctly deal with ksft_skip, but others don't. - Some tests just print the executable name, others print arguments, and yet others print some comment in the header. - Most tests print out a header with two separator lines, but not the HMM smoke test or the memfd_secret test, which only print one. - We had a redundant "exit" at the end, with all the boilerplate it's an easy oversight. Link: https://lkml.kernel.org/r/20220421224928.1848230-1-axelrasmussen@google.com Signed-off-by: Axel Rasmussen Cc: Shuah Khan Signed-off-by: Andrew Morton --- tools/testing/selftests/vm/run_vmtests.sh | 479 ++++-------------------------- 1 file changed, 64 insertions(+), 415 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/vm/run_vmtests.sh b/tools/testing/selftests/vm/run_vmtests.sh index 8865ff365cc6..2d5a3da42cbe 100755 --- a/tools/testing/selftests/vm/run_vmtests.sh +++ b/tools/testing/selftests/vm/run_vmtests.sh @@ -66,467 +66,116 @@ fi VADDR64=0 echo "$ARCH64STR" | grep $ARCH && VADDR64=1 +# Usage: run_test [test binary] [arbitrary test arguments...] +run_test() { + local title="running $*" + local sep=$(echo -n "$title" | tr "[:graph:][:space:]" -) + printf "%s\n%s\n%s\n" "$sep" "$title" "$sep" + + "$@" + local ret=$? + if [ $ret -eq 0 ]; then + echo "[PASS]" + elif [ $ret -eq $ksft_skip ]; then + echo "[SKIP]" + exitcode=$ksft_skip + else + echo "[FAIL]" + exitcode=1 + fi +} + mkdir $mnt mount -t hugetlbfs none $mnt -echo "---------------------" -echo "running hugepage-mmap" -echo "---------------------" -./hugepage-mmap -if [ $? -ne 0 ]; then - echo "[FAIL]" - exitcode=1 -else - echo "[PASS]" -fi +run_test ./hugepage-mmap shmmax=`cat /proc/sys/kernel/shmmax` shmall=`cat /proc/sys/kernel/shmall` echo 268435456 > /proc/sys/kernel/shmmax echo 4194304 > /proc/sys/kernel/shmall -echo "--------------------" -echo "running hugepage-shm" -echo "--------------------" -./hugepage-shm -if [ $? -ne 0 ]; then - echo "[FAIL]" - exitcode=1 -else - echo "[PASS]" -fi +run_test ./hugepage-shm echo $shmmax > /proc/sys/kernel/shmmax echo $shmall > /proc/sys/kernel/shmall -echo "-------------------" -echo "running map_hugetlb" -echo "-------------------" -./map_hugetlb -if [ $? -ne 0 ]; then - echo "[FAIL]" - exitcode=1 -else - echo "[PASS]" -fi +run_test ./map_hugetlb -echo "-----------------------" -echo "running hugepage-mremap" -echo "-----------------------" -./hugepage-mremap $mnt/huge_mremap -if [ $? -ne 0 ]; then - echo "[FAIL]" - exitcode=1 -else - echo "[PASS]" -fi +run_test ./hugepage-mremap $mnt/huge_mremap rm -f $mnt/huge_mremap -echo "------------------------" -echo "running hugepage-vmemmap" -echo "------------------------" -./hugepage-vmemmap -if [ $? -ne 0 ]; then - echo "[FAIL]" - exitcode=1 -else - echo "[PASS]" -fi +run_test ./hugepage-vmemmap -echo "-----------------------" -echo "running hugetlb-madvise" -echo "-----------------------" -./hugetlb-madvise $mnt/madvise-test -if [ $? -ne 0 ]; then - echo "[FAIL]" - exitcode=1 -else - echo "[PASS]" -fi +run_test ./hugetlb-madvise $mnt/madvise-test rm -f $mnt/madvise-test echo "NOTE: The above hugetlb tests provide minimal coverage. Use" echo " https://github.com/libhugetlbfs/libhugetlbfs.git for" echo " hugetlb regression testing." -echo "---------------------------" -echo "running map_fixed_noreplace" -echo "---------------------------" -./map_fixed_noreplace -if [ $? -ne 0 ]; then - echo "[FAIL]" - exitcode=1 -else - echo "[PASS]" -fi - -echo "------------------------------------------------------" -echo "running: gup_test -u # get_user_pages_fast() benchmark" -echo "------------------------------------------------------" -./gup_test -u -ret_val=$? - -if [ $ret_val -eq 0 ]; then - echo "[PASS]" -elif [ $ret_val -eq $ksft_skip ]; then - echo "[SKIP]" - exitcode=$ksft_skip -else - echo "[FAIL]" - exitcode=1 -fi +run_test ./map_fixed_noreplace -echo "------------------------------------------------------" -echo "running: gup_test -a # pin_user_pages_fast() benchmark" -echo "------------------------------------------------------" -./gup_test -a -ret_val=$? - -if [ $ret_val -eq 0 ]; then - echo "[PASS]" -elif [ $ret_val -eq $ksft_skip ]; then - echo "[SKIP]" - exitcode=$ksft_skip -else - echo "[FAIL]" - exitcode=1 -fi +# get_user_pages_fast() benchmark +run_test ./gup_test -u +# pin_user_pages_fast() benchmark +run_test ./gup_test -a +# Dump pages 0, 19, and 4096, using pin_user_pages: +run_test ./gup_test -ct -F 0x1 0 19 0x1000 -echo "------------------------------------------------------------" -echo "# Dump pages 0, 19, and 4096, using pin_user_pages:" -echo "running: gup_test -ct -F 0x1 0 19 0x1000 # dump_page() test" -echo "------------------------------------------------------------" -./gup_test -ct -F 0x1 0 19 0x1000 -ret_val=$? - -if [ $ret_val -eq 0 ]; then - echo "[PASS]" -elif [ $ret_val -eq $ksft_skip ]; then - echo "[SKIP]" - exitcode=$ksft_skip -else - echo "[FAIL]" - exitcode=1 -fi - -echo "-------------------" -echo "running userfaultfd" -echo "-------------------" -./userfaultfd anon 20 16 -if [ $? -ne 0 ]; then - echo "[FAIL]" - exitcode=1 -else - echo "[PASS]" -fi - -echo "---------------------------" -echo "running userfaultfd_hugetlb" -echo "---------------------------" +run_test ./userfaultfd anon 20 16 # Test requires source and destination huge pages. Size of source # (half_ufd_size_MB) is passed as argument to test. -./userfaultfd hugetlb $half_ufd_size_MB 32 -if [ $? -ne 0 ]; then - echo "[FAIL]" - exitcode=1 -else - echo "[PASS]" -fi - -echo "-------------------------" -echo "running userfaultfd_shmem" -echo "-------------------------" -./userfaultfd shmem 20 16 -if [ $? -ne 0 ]; then - echo "[FAIL]" - exitcode=1 -else - echo "[PASS]" -fi +run_test ./userfaultfd hugetlb $half_ufd_size_MB 32 +run_test ./userfaultfd shmem 20 16 #cleanup umount $mnt rm -rf $mnt echo $nr_hugepgs > /proc/sys/vm/nr_hugepages -echo "-----------------------" -echo "running compaction_test" -echo "-----------------------" -./compaction_test -if [ $? -ne 0 ]; then - echo "[FAIL]" - exitcode=1 -else - echo "[PASS]" -fi +run_test ./compaction_test -echo "----------------------" -echo "running on-fault-limit" -echo "----------------------" -sudo -u nobody ./on-fault-limit -if [ $? -ne 0 ]; then - echo "[FAIL]" - exitcode=1 -else - echo "[PASS]" -fi +run_test sudo -u nobody ./on-fault-limit -echo "--------------------" -echo "running map_populate" -echo "--------------------" -./map_populate -if [ $? -ne 0 ]; then - echo "[FAIL]" - exitcode=1 -else - echo "[PASS]" -fi +run_test ./map_populate -echo "-------------------------" -echo "running mlock-random-test" -echo "-------------------------" -./mlock-random-test -if [ $? -ne 0 ]; then - echo "[FAIL]" - exitcode=1 -else - echo "[PASS]" -fi +run_test ./mlock-random-test -echo "--------------------" -echo "running mlock2-tests" -echo "--------------------" -./mlock2-tests -if [ $? -ne 0 ]; then - echo "[FAIL]" - exitcode=1 -else - echo "[PASS]" -fi +run_test ./mlock2-tests -echo "-------------------" -echo "running mremap_test" -echo "-------------------" -./mremap_test -ret_val=$? - -if [ $ret_val -eq 0 ]; then - echo "[PASS]" -elif [ $ret_val -eq $ksft_skip ]; then - echo "[SKIP]" - exitcode=$ksft_skip -else - echo "[FAIL]" - exitcode=1 -fi +run_test ./mremap_test -echo "-----------------" -echo "running thuge-gen" -echo "-----------------" -./thuge-gen -if [ $? -ne 0 ]; then - echo "[FAIL]" - exitcode=1 -else - echo "[PASS]" -fi +run_test ./thuge-gen if [ $VADDR64 -ne 0 ]; then -echo "-----------------------------" -echo "running virtual_address_range" -echo "-----------------------------" -./virtual_address_range -if [ $? -ne 0 ]; then - echo "[FAIL]" - exitcode=1 -else - echo "[PASS]" -fi + run_test ./virtual_address_range -echo "-----------------------------" -echo "running virtual address 128TB switch test" -echo "-----------------------------" -./va_128TBswitch -if [ $? -ne 0 ]; then - echo "[FAIL]" - exitcode=1 -else - echo "[PASS]" -fi + # virtual address 128TB switch test + run_test ./va_128TBswitch fi # VADDR64 -echo "------------------------------------" -echo "running vmalloc stability smoke test" -echo "------------------------------------" -./test_vmalloc.sh smoke -ret_val=$? - -if [ $ret_val -eq 0 ]; then - echo "[PASS]" -elif [ $ret_val -eq $ksft_skip ]; then - echo "[SKIP]" - exitcode=$ksft_skip -else - echo "[FAIL]" - exitcode=1 -fi - -echo "------------------------------------" -echo "running MREMAP_DONTUNMAP smoke test" -echo "------------------------------------" -./mremap_dontunmap -ret_val=$? - -if [ $ret_val -eq 0 ]; then - echo "[PASS]" -elif [ $ret_val -eq $ksft_skip ]; then - echo "[SKIP]" - exitcode=$ksft_skip -else - echo "[FAIL]" - exitcode=1 -fi - -echo "running HMM smoke test" -echo "------------------------------------" -./test_hmm.sh smoke -ret_val=$? - -if [ $ret_val -eq 0 ]; then - echo "[PASS]" -elif [ $ret_val -eq $ksft_skip ]; then - echo "[SKIP]" - exitcode=$ksft_skip -else - echo "[FAIL]" - exitcode=1 -fi - -echo "--------------------------------------------------------" -echo "running MADV_POPULATE_READ and MADV_POPULATE_WRITE tests" -echo "--------------------------------------------------------" -./madv_populate -ret_val=$? - -if [ $ret_val -eq 0 ]; then - echo "[PASS]" -elif [ $ret_val -eq $ksft_skip ]; then - echo "[SKIP]" - exitcode=$ksft_skip -else - echo "[FAIL]" - exitcode=1 -fi - -echo "running memfd_secret test" -echo "------------------------------------" -./memfd_secret -ret_val=$? - -if [ $ret_val -eq 0 ]; then - echo "[PASS]" -elif [ $ret_val -eq $ksft_skip ]; then - echo "[SKIP]" - exitcode=$ksft_skip -else - echo "[FAIL]" - exitcode=1 -fi - -echo "-------------------------------------------------------" -echo "running KSM MADV_MERGEABLE test with 10 identical pages" -echo "-------------------------------------------------------" -./ksm_tests -M -p 10 -ret_val=$? - -if [ $ret_val -eq 0 ]; then - echo "[PASS]" -elif [ $ret_val -eq $ksft_skip ]; then - echo "[SKIP]" - exitcode=$ksft_skip -else - echo "[FAIL]" - exitcode=1 -fi - -echo "------------------------" -echo "running KSM unmerge test" -echo "------------------------" -./ksm_tests -U -ret_val=$? - -if [ $ret_val -eq 0 ]; then - echo "[PASS]" -elif [ $ret_val -eq $ksft_skip ]; then - echo "[SKIP]" - exitcode=$ksft_skip -else - echo "[FAIL]" - exitcode=1 -fi +# vmalloc stability smoke test +run_test ./test_vmalloc.sh smoke -echo "----------------------------------------------------------" -echo "running KSM test with 10 zero pages and use_zero_pages = 0" -echo "----------------------------------------------------------" -./ksm_tests -Z -p 10 -z 0 -ret_val=$? - -if [ $ret_val -eq 0 ]; then - echo "[PASS]" -elif [ $ret_val -eq $ksft_skip ]; then - echo "[SKIP]" - exitcode=$ksft_skip -else - echo "[FAIL]" - exitcode=1 -fi +run_test ./mremap_dontunmap -echo "----------------------------------------------------------" -echo "running KSM test with 10 zero pages and use_zero_pages = 1" -echo "----------------------------------------------------------" -./ksm_tests -Z -p 10 -z 1 -ret_val=$? - -if [ $ret_val -eq 0 ]; then - echo "[PASS]" -elif [ $ret_val -eq $ksft_skip ]; then - echo "[SKIP]" - exitcode=$ksft_skip -else - echo "[FAIL]" - exitcode=1 -fi +run_test ./test_hmm.sh smoke -echo "-------------------------------------------------------------" -echo "running KSM test with 2 NUMA nodes and merge_across_nodes = 1" -echo "-------------------------------------------------------------" -./ksm_tests -N -m 1 -ret_val=$? - -if [ $ret_val -eq 0 ]; then - echo "[PASS]" -elif [ $ret_val -eq $ksft_skip ]; then - echo "[SKIP]" - exitcode=$ksft_skip -else - echo "[FAIL]" - exitcode=1 -fi +# MADV_POPULATE_READ and MADV_POPULATE_WRITE tests +run_test ./madv_populate -echo "-------------------------------------------------------------" -echo "running KSM test with 2 NUMA nodes and merge_across_nodes = 0" -echo "-------------------------------------------------------------" -./ksm_tests -N -m 0 -ret_val=$? - -if [ $ret_val -eq 0 ]; then - echo "[PASS]" -elif [ $ret_val -eq $ksft_skip ]; then - echo "[SKIP]" - exitcode=$ksft_skip -else - echo "[FAIL]" - exitcode=1 -fi +run_test ./memfd_secret -exit $exitcode +# KSM MADV_MERGEABLE test with 10 identical pages +run_test ./ksm_tests -M -p 10 +# KSM unmerge test +run_test ./ksm_tests -U +# KSM test with 10 zero pages and use_zero_pages = 0 +run_test ./ksm_tests -Z -p 10 -z 0 +# KSM test with 10 zero pages and use_zero_pages = 1 +run_test ./ksm_tests -Z -p 10 -z 1 +# KSM test with 2 NUMA nodes and merge_across_nodes = 1 +run_test ./ksm_tests -N -m 1 +# KSM test with 2 NUMA nodes and merge_across_nodes = 0 +run_test ./ksm_tests -N -m 0 exit $exitcode -- cgit v1.2.3 From 241ec63a9a0fbb39292ea1dd2d07f8dabedfe3df Mon Sep 17 00:00:00 2001 From: Axel Rasmussen Date: Thu, 28 Apr 2022 23:16:11 -0700 Subject: selftests: vm: fix shellcheck warnings in run_vmtests.sh These might not be issues yet, but they make the script more fragile. Also by fixing them we give a better example to future readers, who might copy/paste or otherwise re-use snippets from our script. - Use "read -r", since we don't ever want read to be interpreting '\' characters as escape sequences... - Quote variables, to deal with spaces properly. - Use $() instead of the older and harder-to-nest ``. - Get rid of superfluous "$" prefixes inside arithmetic $(()). Link: https://lkml.kernel.org/r/20220421224928.1848230-2-axelrasmussen@google.com Signed-off-by: Axel Rasmussen Cc: Shuah Khan Signed-off-by: Andrew Morton --- tools/testing/selftests/vm/run_vmtests.sh | 55 +++++++++++++++---------------- 1 file changed, 27 insertions(+), 28 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/vm/run_vmtests.sh b/tools/testing/selftests/vm/run_vmtests.sh index 2d5a3da42cbe..a2302b5faaf2 100755 --- a/tools/testing/selftests/vm/run_vmtests.sh +++ b/tools/testing/selftests/vm/run_vmtests.sh @@ -9,12 +9,12 @@ mnt=./huge exitcode=0 #get huge pagesize and freepages from /proc/meminfo -while read name size unit; do +while read -r name size unit; do if [ "$name" = "HugePages_Free:" ]; then - freepgs=$size + freepgs="$size" fi if [ "$name" = "Hugepagesize:" ]; then - hpgsize_KB=$size + hpgsize_KB="$size" fi done < /proc/meminfo @@ -30,27 +30,26 @@ needmem_KB=$((half_ufd_size_MB * 2 * 1024)) #set proper nr_hugepages if [ -n "$freepgs" ] && [ -n "$hpgsize_KB" ]; then - nr_hugepgs=`cat /proc/sys/vm/nr_hugepages` + nr_hugepgs=$(cat /proc/sys/vm/nr_hugepages) needpgs=$((needmem_KB / hpgsize_KB)) tries=2 - while [ $tries -gt 0 ] && [ $freepgs -lt $needpgs ]; do - lackpgs=$(( $needpgs - $freepgs )) + while [ "$tries" -gt 0 ] && [ "$freepgs" -lt "$needpgs" ]; do + lackpgs=$((needpgs - freepgs)) echo 3 > /proc/sys/vm/drop_caches - echo $(( $lackpgs + $nr_hugepgs )) > /proc/sys/vm/nr_hugepages - if [ $? -ne 0 ]; then + if ! echo $((lackpgs + nr_hugepgs)) > /proc/sys/vm/nr_hugepages; then echo "Please run this test as root" exit $ksft_skip fi - while read name size unit; do + while read -r name size unit; do if [ "$name" = "HugePages_Free:" ]; then freepgs=$size fi done < /proc/meminfo tries=$((tries - 1)) done - if [ $freepgs -lt $needpgs ]; then + if [ "$freepgs" -lt "$needpgs" ]; then printf "Not enough huge pages available (%d < %d)\n" \ - $freepgs $needpgs + "$freepgs" "$needpgs" exit 1 fi else @@ -60,11 +59,11 @@ fi #filter 64bit architectures ARCH64STR="arm64 ia64 mips64 parisc64 ppc64 ppc64le riscv64 s390x sh64 sparc64 x86_64" -if [ -z $ARCH ]; then - ARCH=`uname -m 2>/dev/null | sed -e 's/aarch64.*/arm64/'` +if [ -z "$ARCH" ]; then + ARCH=$(uname -m 2>/dev/null | sed -e 's/aarch64.*/arm64/') fi VADDR64=0 -echo "$ARCH64STR" | grep $ARCH && VADDR64=1 +echo "$ARCH64STR" | grep "$ARCH" && VADDR64=1 # Usage: run_test [test binary] [arbitrary test arguments...] run_test() { @@ -85,28 +84,28 @@ run_test() { fi } -mkdir $mnt -mount -t hugetlbfs none $mnt +mkdir "$mnt" +mount -t hugetlbfs none "$mnt" run_test ./hugepage-mmap -shmmax=`cat /proc/sys/kernel/shmmax` -shmall=`cat /proc/sys/kernel/shmall` +shmmax=$(cat /proc/sys/kernel/shmmax) +shmall=$(cat /proc/sys/kernel/shmall) echo 268435456 > /proc/sys/kernel/shmmax echo 4194304 > /proc/sys/kernel/shmall run_test ./hugepage-shm -echo $shmmax > /proc/sys/kernel/shmmax -echo $shmall > /proc/sys/kernel/shmall +echo "$shmmax" > /proc/sys/kernel/shmmax +echo "$shmall" > /proc/sys/kernel/shmall run_test ./map_hugetlb -run_test ./hugepage-mremap $mnt/huge_mremap -rm -f $mnt/huge_mremap +run_test ./hugepage-mremap "$mnt"/huge_mremap +rm -f "$mnt"/huge_mremap run_test ./hugepage-vmemmap -run_test ./hugetlb-madvise $mnt/madvise-test -rm -f $mnt/madvise-test +run_test ./hugetlb-madvise "$mnt"/madvise-test +rm -f "$mnt"/madvise-test echo "NOTE: The above hugetlb tests provide minimal coverage. Use" echo " https://github.com/libhugetlbfs/libhugetlbfs.git for" @@ -124,13 +123,13 @@ run_test ./gup_test -ct -F 0x1 0 19 0x1000 run_test ./userfaultfd anon 20 16 # Test requires source and destination huge pages. Size of source # (half_ufd_size_MB) is passed as argument to test. -run_test ./userfaultfd hugetlb $half_ufd_size_MB 32 +run_test ./userfaultfd hugetlb "$half_ufd_size_MB" 32 run_test ./userfaultfd shmem 20 16 #cleanup -umount $mnt -rm -rf $mnt -echo $nr_hugepgs > /proc/sys/vm/nr_hugepages +umount "$mnt" +rm -rf "$mnt" +echo "$nr_hugepgs" > /proc/sys/vm/nr_hugepages run_test ./compaction_test -- cgit v1.2.3 From 6c26df84e1f2f9181c0741865105a53537da842c Mon Sep 17 00:00:00 2001 From: Yosry Ahmed Date: Fri, 29 Apr 2022 14:36:59 -0700 Subject: selftests: cgroup: return -errno from cg_read()/cg_write() on failure Currently, cg_read()/cg_write() returns 0 on success and -1 on failure. Modify them to return the -errno on failure. Link: https://lkml.kernel.org/r/20220425190040.2475377-3-yosryahmed@google.com Signed-off-by: Yosry Ahmed Acked-by: Shakeel Butt Acked-by: David Rientjes Acked-by: Roman Gushchin Cc: Chen Wandun Cc: Dave Hansen Cc: Greg Thelen Cc: Johannes Weiner Cc: Jonathan Corbet Cc: Michal Hocko Cc: "Michal Koutn" Cc: Shuah Khan Cc: Tejun Heo Cc: Tim Chen Cc: Vaibhav Jain Cc: Wei Xu Cc: Yu Zhao Cc: Zefan Li Signed-off-by: Andrew Morton --- tools/testing/selftests/cgroup/cgroup_util.c | 44 ++++++++++++---------------- 1 file changed, 19 insertions(+), 25 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/cgroup/cgroup_util.c b/tools/testing/selftests/cgroup/cgroup_util.c index dbaa7aabbb4a..e6f3679cdcc0 100644 --- a/tools/testing/selftests/cgroup/cgroup_util.c +++ b/tools/testing/selftests/cgroup/cgroup_util.c @@ -19,6 +19,7 @@ #include "cgroup_util.h" #include "../clone3/clone3_selftests.h" +/* Returns read len on success, or -errno on failure. */ static ssize_t read_text(const char *path, char *buf, size_t max_len) { ssize_t len; @@ -26,35 +27,29 @@ static ssize_t read_text(const char *path, char *buf, size_t max_len) fd = open(path, O_RDONLY); if (fd < 0) - return fd; + return -errno; len = read(fd, buf, max_len - 1); - if (len < 0) - goto out; - buf[len] = 0; -out: + if (len >= 0) + buf[len] = 0; + close(fd); - return len; + return len < 0 ? -errno : len; } +/* Returns written len on success, or -errno on failure. */ static ssize_t write_text(const char *path, char *buf, ssize_t len) { int fd; fd = open(path, O_WRONLY | O_APPEND); if (fd < 0) - return fd; + return -errno; len = write(fd, buf, len); - if (len < 0) { - close(fd); - return len; - } - close(fd); - - return len; + return len < 0 ? -errno : len; } char *cg_name(const char *root, const char *name) @@ -87,16 +82,16 @@ char *cg_control(const char *cgroup, const char *control) return ret; } +/* Returns 0 on success, or -errno on failure. */ int cg_read(const char *cgroup, const char *control, char *buf, size_t len) { char path[PATH_MAX]; + ssize_t ret; snprintf(path, sizeof(path), "%s/%s", cgroup, control); - if (read_text(path, buf, len) >= 0) - return 0; - - return -1; + ret = read_text(path, buf, len); + return ret >= 0 ? 0 : ret; } int cg_read_strcmp(const char *cgroup, const char *control, @@ -177,17 +172,15 @@ long cg_read_lc(const char *cgroup, const char *control) return cnt; } +/* Returns 0 on success, or -errno on failure. */ int cg_write(const char *cgroup, const char *control, char *buf) { char path[PATH_MAX]; - ssize_t len = strlen(buf); + ssize_t len = strlen(buf), ret; snprintf(path, sizeof(path), "%s/%s", cgroup, control); - - if (write_text(path, buf, len) == len) - return 0; - - return -1; + ret = write_text(path, buf, len); + return ret == len ? 0 : ret; } int cg_find_unified_root(char *root, size_t len) @@ -545,7 +538,8 @@ ssize_t proc_read_text(int pid, bool thread, const char *item, char *buf, size_t else snprintf(path, sizeof(path), "/proc/%d/%s", pid, item); - return read_text(path, buf, size); + size = read_text(path, buf, size); + return size < 0 ? -1 : size; } int proc_read_strstr(int pid, bool thread, const char *item, const char *needle) -- cgit v1.2.3 From a3622a53e620700053b648478dbc638ad373be3b Mon Sep 17 00:00:00 2001 From: Yosry Ahmed Date: Fri, 29 Apr 2022 14:36:59 -0700 Subject: selftests: cgroup: fix alloc_anon_noexit() instantly freeing memory Currently, alloc_anon_noexit() calls alloc_anon() which instantly frees the allocated memory. alloc_anon_noexit() is usually used with cg_run_nowait() to run a process in the background that allocates memory. It makes sense for the background process to keep the memory allocated and not instantly free it (otherwise there is no point of running it in the background). Link: https://lkml.kernel.org/r/20220425190040.2475377-4-yosryahmed@google.com Signed-off-by: Yosry Ahmed Acked-by: Roman Gushchin Acked-by: Shakeel Butt Acked-by: David Rientjes Cc: Chen Wandun Cc: Dave Hansen Cc: Greg Thelen Cc: Johannes Weiner Cc: Jonathan Corbet Cc: Michal Hocko Cc: "Michal Koutn" Cc: Shuah Khan Cc: Tejun Heo Cc: Tim Chen Cc: Vaibhav Jain Cc: Wei Xu Cc: Yu Zhao Cc: Zefan Li Signed-off-by: Andrew Morton --- tools/testing/selftests/cgroup/test_memcontrol.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/cgroup/test_memcontrol.c b/tools/testing/selftests/cgroup/test_memcontrol.c index 9c1f19fe2e37..a639bf49d396 100644 --- a/tools/testing/selftests/cgroup/test_memcontrol.c +++ b/tools/testing/selftests/cgroup/test_memcontrol.c @@ -211,13 +211,17 @@ static int alloc_pagecache_50M_noexit(const char *cgroup, void *arg) static int alloc_anon_noexit(const char *cgroup, void *arg) { int ppid = getppid(); + size_t size = (unsigned long)arg; + char *buf, *ptr; - if (alloc_anon(cgroup, arg)) - return -1; + buf = malloc(size); + for (ptr = buf; ptr < buf + size; ptr += PAGE_SIZE) + *ptr = 0; while (getppid() == ppid) sleep(1); + free(buf); return 0; } -- cgit v1.2.3 From eae3cb2e87ff84547e66211b81301a8f9122840f Mon Sep 17 00:00:00 2001 From: Yosry Ahmed Date: Fri, 29 Apr 2022 14:37:00 -0700 Subject: selftests: cgroup: add a selftest for memory.reclaim Add a new test for memory.reclaim that verifies that the interface correctly reclaims memory as intended, from both anon and file pages. Link: https://lkml.kernel.org/r/20220425190040.2475377-5-yosryahmed@google.com Signed-off-by: Yosry Ahmed Acked-by: Roman Gushchin Acked-by: David Rientjes Cc: Chen Wandun Cc: Dave Hansen Cc: Greg Thelen Cc: Johannes Weiner Cc: Jonathan Corbet Cc: Michal Hocko Cc: "Michal Koutn" Cc: Shakeel Butt Cc: Shuah Khan Cc: Tejun Heo Cc: Tim Chen Cc: Vaibhav Jain Cc: Wei Xu Cc: Yu Zhao Cc: Zefan Li Signed-off-by: Andrew Morton --- tools/testing/selftests/cgroup/test_memcontrol.c | 106 +++++++++++++++++++++++ 1 file changed, 106 insertions(+) (limited to 'tools') diff --git a/tools/testing/selftests/cgroup/test_memcontrol.c b/tools/testing/selftests/cgroup/test_memcontrol.c index a639bf49d396..ee7defb17280 100644 --- a/tools/testing/selftests/cgroup/test_memcontrol.c +++ b/tools/testing/selftests/cgroup/test_memcontrol.c @@ -760,6 +760,111 @@ cleanup: return ret; } +/* + * This test checks that memory.reclaim reclaims the given + * amount of memory (from both anon and file, if possible). + */ +static int test_memcg_reclaim(const char *root) +{ + int ret = KSFT_FAIL, fd, retries; + char *memcg; + long current, expected_usage, to_reclaim; + char buf[64]; + + memcg = cg_name(root, "memcg_test"); + if (!memcg) + goto cleanup; + + if (cg_create(memcg)) + goto cleanup; + + current = cg_read_long(memcg, "memory.current"); + if (current != 0) + goto cleanup; + + fd = get_temp_fd(); + if (fd < 0) + goto cleanup; + + cg_run_nowait(memcg, alloc_pagecache_50M_noexit, (void *)(long)fd); + + /* + * If swap is enabled, try to reclaim from both anon and file, else try + * to reclaim from file only. + */ + if (is_swap_enabled()) { + cg_run_nowait(memcg, alloc_anon_noexit, (void *) MB(50)); + expected_usage = MB(100); + } else + expected_usage = MB(50); + + /* + * Wait until current usage reaches the expected usage (or we run out of + * retries). + */ + retries = 5; + while (!values_close(cg_read_long(memcg, "memory.current"), + expected_usage, 10)) { + if (retries--) { + sleep(1); + continue; + } else { + fprintf(stderr, + "failed to allocate %ld for memcg reclaim test\n", + expected_usage); + goto cleanup; + } + } + + /* + * Reclaim until current reaches 30M, this makes sure we hit both anon + * and file if swap is enabled. + */ + retries = 5; + while (true) { + int err; + + current = cg_read_long(memcg, "memory.current"); + to_reclaim = current - MB(30); + + /* + * We only keep looping if we get EAGAIN, which means we could + * not reclaim the full amount. + */ + if (to_reclaim <= 0) + goto cleanup; + + + snprintf(buf, sizeof(buf), "%ld", to_reclaim); + err = cg_write(memcg, "memory.reclaim", buf); + if (!err) { + /* + * If writing succeeds, then the written amount should have been + * fully reclaimed (and maybe more). + */ + current = cg_read_long(memcg, "memory.current"); + if (!values_close(current, MB(30), 3) && current > MB(30)) + goto cleanup; + break; + } + + /* The kernel could not reclaim the full amount, try again. */ + if (err == -EAGAIN && retries--) + continue; + + /* We got an unexpected error or ran out of retries. */ + goto cleanup; + } + + ret = KSFT_PASS; +cleanup: + cg_destroy(memcg); + free(memcg); + close(fd); + + return ret; +} + static int alloc_anon_50M_check_swap(const char *cgroup, void *arg) { long mem_max = (long)arg; @@ -1264,6 +1369,7 @@ struct memcg_test { T(test_memcg_high), T(test_memcg_high_sync), T(test_memcg_max), + T(test_memcg_reclaim), T(test_memcg_oom_events), T(test_memcg_swap_max), T(test_memcg_sock), -- cgit v1.2.3 From 78fbe906cc900b33ce078102e13e0e99b5b8c406 Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Mon, 9 May 2022 18:20:44 -0700 Subject: mm/page-flags: reuse PG_mappedtodisk as PG_anon_exclusive for PageAnon() pages The basic question we would like to have a reliable and efficient answer to is: is this anonymous page exclusive to a single process or might it be shared? We need that information for ordinary/single pages, hugetlb pages, and possibly each subpage of a THP. Introduce a way to mark an anonymous page as exclusive, with the ultimate goal of teaching our COW logic to not do "wrong COWs", whereby GUP pins lose consistency with the pages mapped into the page table, resulting in reported memory corruptions. Most pageflags already have semantics for anonymous pages, however, PG_mappedtodisk should never apply to pages in the swapcache, so let's reuse that flag. As PG_has_hwpoisoned also uses that flag on the second tail page of a compound page, convert it to PG_error instead, which is marked as PF_NO_TAIL, so never used for tail pages. Use custom page flag modification functions such that we can do additional sanity checks. The semantics we'll put into some kernel doc in the future are: " PG_anon_exclusive is *usually* only expressive in combination with a page table entry. Depending on the page table entry type it might store the following information: Is what's mapped via this page table entry exclusive to the single process and can be mapped writable without further checks? If not, it might be shared and we might have to COW. For now, we only expect PTE-mapped THPs to make use of PG_anon_exclusive in subpages. For other anonymous compound folios (i.e., hugetlb), only the head page is logically mapped and holds this information. For example, an exclusive, PMD-mapped THP only has PG_anon_exclusive set on the head page. When replacing the PMD by a page table full of PTEs, PG_anon_exclusive, if set on the head page, will be set on all tail pages accordingly. Note that converting from a PTE-mapping to a PMD mapping using the same compound page is currently not possible and consequently doesn't require care. If GUP wants to take a reliable pin (FOLL_PIN) on an anonymous page, it should only pin if the relevant PG_anon_exclusive is set. In that case, the pin will be fully reliable and stay consistent with the pages mapped into the page table, as the bit cannot get cleared (e.g., by fork(), KSM) while the page is pinned. For anonymous pages that are mapped R/W, PG_anon_exclusive can be assumed to always be set because such pages cannot possibly be shared. The page table lock protecting the page table entry is the primary synchronization mechanism for PG_anon_exclusive; GUP-fast that does not take the PT lock needs special care when trying to clear the flag. Page table entry types and PG_anon_exclusive: * Present: PG_anon_exclusive applies. * Swap: the information is lost. PG_anon_exclusive was cleared. * Migration: the entry holds this information instead. PG_anon_exclusive was cleared. * Device private: PG_anon_exclusive applies. * Device exclusive: PG_anon_exclusive applies. * HW Poison: PG_anon_exclusive is stale and not changed. If the page may be pinned (FOLL_PIN), clearing PG_anon_exclusive is not allowed and the flag will stick around until the page is freed and folio->mapping is cleared. " We won't be clearing PG_anon_exclusive on destructive unmapping (i.e., zapping) of page table entries, page freeing code will handle that when also invalidate page->mapping to not indicate PageAnon() anymore. Letting information about exclusivity stick around will be an important property when adding sanity checks to unpinning code. Note that we properly clear the flag in free_pages_prepare() via PAGE_FLAGS_CHECK_AT_PREP for each individual subpage of a compound page, so there is no need to manually clear the flag. Link: https://lkml.kernel.org/r/20220428083441.37290-12-david@redhat.com Signed-off-by: David Hildenbrand Acked-by: Vlastimil Babka Cc: Andrea Arcangeli Cc: Christoph Hellwig Cc: David Rientjes Cc: Don Dutile Cc: Hugh Dickins Cc: Jan Kara Cc: Jann Horn Cc: Jason Gunthorpe Cc: John Hubbard Cc: Khalid Aziz Cc: "Kirill A. Shutemov" Cc: Liang Zhang Cc: "Matthew Wilcox (Oracle)" Cc: Michal Hocko Cc: Mike Kravetz Cc: Mike Rapoport Cc: Nadav Amit Cc: Oded Gabbay Cc: Oleg Nesterov Cc: Pedro Demarchi Gomes Cc: Peter Xu Cc: Rik van Riel Cc: Roman Gushchin Cc: Shakeel Butt Cc: Yang Shi Signed-off-by: Andrew Morton --- include/linux/page-flags.h | 39 ++++++++++++++++++++++++++++++++++++++- mm/hugetlb.c | 2 ++ mm/memory.c | 11 +++++++++++ mm/memremap.c | 9 +++++++++ mm/swapfile.c | 4 ++++ tools/vm/page-types.c | 8 +++++++- 6 files changed, 71 insertions(+), 2 deletions(-) (limited to 'tools') diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h index 1ea896887ee4..b70124b9c7c1 100644 --- a/include/linux/page-flags.h +++ b/include/linux/page-flags.h @@ -142,6 +142,15 @@ enum pageflags { PG_readahead = PG_reclaim, + /* + * Depending on the way an anonymous folio can be mapped into a page + * table (e.g., single PMD/PUD/CONT of the head page vs. PTE-mapped + * THP), PG_anon_exclusive may be set only for the head page or for + * tail pages of an anonymous folio. For now, we only expect it to be + * set on tail pages for PTE-mapped THP. + */ + PG_anon_exclusive = PG_mappedtodisk, + /* Filesystems */ PG_checked = PG_owner_priv_1, @@ -176,7 +185,7 @@ enum pageflags { * Indicates that at least one subpage is hwpoisoned in the * THP. */ - PG_has_hwpoisoned = PG_mappedtodisk, + PG_has_hwpoisoned = PG_error, #endif /* non-lru isolated movable page */ @@ -1002,6 +1011,34 @@ extern bool is_free_buddy_page(struct page *page); PAGEFLAG(Isolated, isolated, PF_ANY); +static __always_inline int PageAnonExclusive(struct page *page) +{ + VM_BUG_ON_PGFLAGS(!PageAnon(page), page); + VM_BUG_ON_PGFLAGS(PageHuge(page) && !PageHead(page), page); + return test_bit(PG_anon_exclusive, &PF_ANY(page, 1)->flags); +} + +static __always_inline void SetPageAnonExclusive(struct page *page) +{ + VM_BUG_ON_PGFLAGS(!PageAnon(page) || PageKsm(page), page); + VM_BUG_ON_PGFLAGS(PageHuge(page) && !PageHead(page), page); + set_bit(PG_anon_exclusive, &PF_ANY(page, 1)->flags); +} + +static __always_inline void ClearPageAnonExclusive(struct page *page) +{ + VM_BUG_ON_PGFLAGS(!PageAnon(page) || PageKsm(page), page); + VM_BUG_ON_PGFLAGS(PageHuge(page) && !PageHead(page), page); + clear_bit(PG_anon_exclusive, &PF_ANY(page, 1)->flags); +} + +static __always_inline void __ClearPageAnonExclusive(struct page *page) +{ + VM_BUG_ON_PGFLAGS(!PageAnon(page), page); + VM_BUG_ON_PGFLAGS(PageHuge(page) && !PageHead(page), page); + __clear_bit(PG_anon_exclusive, &PF_ANY(page, 1)->flags); +} + #ifdef CONFIG_MMU #define __PG_MLOCKED (1UL << PG_mlocked) #else diff --git a/mm/hugetlb.c b/mm/hugetlb.c index 7a6052a984e9..03cbb75bcb54 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -1677,6 +1677,8 @@ void free_huge_page(struct page *page) VM_BUG_ON_PAGE(page_mapcount(page), page); hugetlb_set_page_subpool(page, NULL); + if (PageAnon(page)) + __ClearPageAnonExclusive(page); page->mapping = NULL; restore_reserve = HPageRestoreReserve(page); ClearHPageRestoreReserve(page); diff --git a/mm/memory.c b/mm/memory.c index d4dba178e130..0b0727758c86 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -3667,6 +3667,17 @@ vm_fault_t do_swap_page(struct vm_fault *vmf) goto out_nomap; } + /* + * PG_anon_exclusive reuses PG_mappedtodisk for anon pages. A swap pte + * must never point at an anonymous page in the swapcache that is + * PG_anon_exclusive. Sanity check that this holds and especially, that + * no filesystem set PG_mappedtodisk on a page in the swapcache. Sanity + * check after taking the PT lock and making sure that nobody + * concurrently faulted in this page and set PG_anon_exclusive. + */ + BUG_ON(!PageAnon(page) && PageMappedToDisk(page)); + BUG_ON(PageAnon(page) && PageAnonExclusive(page)); + /* * Remove the swap entry and conditionally try to free up the swapcache. * We're already holding a reference on the page but haven't mapped it diff --git a/mm/memremap.c b/mm/memremap.c index c33bcd0398c9..2b92e97cb25b 100644 --- a/mm/memremap.c +++ b/mm/memremap.c @@ -459,6 +459,15 @@ void free_zone_device_page(struct page *page) mem_cgroup_uncharge(page_folio(page)); + /* + * Note: we don't expect anonymous compound pages yet. Once supported + * and we could PTE-map them similar to THP, we'd have to clear + * PG_anon_exclusive on all tail pages. + */ + VM_BUG_ON_PAGE(PageAnon(page) && PageCompound(page), page); + if (PageAnon(page)) + __ClearPageAnonExclusive(page); + /* * When a device managed page is freed, the page->mapping field * may still contain a (stale) mapping value. For example, the diff --git a/mm/swapfile.c b/mm/swapfile.c index 0ad7ed7ded21..a7847324d476 100644 --- a/mm/swapfile.c +++ b/mm/swapfile.c @@ -1796,6 +1796,10 @@ static int unuse_pte(struct vm_area_struct *vma, pmd_t *pmd, goto out; } + /* See do_swap_page() */ + BUG_ON(!PageAnon(page) && PageMappedToDisk(page)); + BUG_ON(PageAnon(page) && PageAnonExclusive(page)); + dec_mm_counter(vma->vm_mm, MM_SWAPENTS); inc_mm_counter(vma->vm_mm, MM_ANONPAGES); get_page(page); diff --git a/tools/vm/page-types.c b/tools/vm/page-types.c index b1ed76d9a979..381dcc00cb62 100644 --- a/tools/vm/page-types.c +++ b/tools/vm/page-types.c @@ -80,9 +80,10 @@ #define KPF_SOFTDIRTY 40 #define KPF_ARCH_2 41 -/* [48-] take some arbitrary free slots for expanding overloaded flags +/* [47-] take some arbitrary free slots for expanding overloaded flags * not part of kernel API */ +#define KPF_ANON_EXCLUSIVE 47 #define KPF_READAHEAD 48 #define KPF_SLOB_FREE 49 #define KPF_SLUB_FROZEN 50 @@ -138,6 +139,7 @@ static const char * const page_flag_names[] = { [KPF_SOFTDIRTY] = "f:softdirty", [KPF_ARCH_2] = "H:arch_2", + [KPF_ANON_EXCLUSIVE] = "d:anon_exclusive", [KPF_READAHEAD] = "I:readahead", [KPF_SLOB_FREE] = "P:slob_free", [KPF_SLUB_FROZEN] = "A:slub_frozen", @@ -472,6 +474,10 @@ static int bit_mask_ok(uint64_t flags) static uint64_t expand_overloaded_flags(uint64_t flags, uint64_t pme) { + /* Anonymous pages overload PG_mappedtodisk */ + if ((flags & BIT(ANON)) && (flags & BIT(MAPPEDTODISK))) + flags ^= BIT(MAPPEDTODISK) | BIT(ANON_EXCLUSIVE); + /* SLOB/SLUB overload several page flags */ if (flags & BIT(SLAB)) { if (flags & BIT(PRIVATE)) -- cgit v1.2.3 From 17de1e559cf1eb01d5d90afd3064d5a280060f6f Mon Sep 17 00:00:00 2001 From: Joel Savitz Date: Mon, 9 May 2022 18:20:47 -0700 Subject: selftests: clarify common error when running gup_test The gup_test binary will fail showing only the output of perror("open") in the case that /sys/kernel/debug/gup_test is not found. This will almost always be due to CONFIG_GUP_TEST not being set, which enables compilation of a kernel that provides this file. Add a short error message to clarify this failure and point the user to the solution. Link: https://lkml.kernel.org/r/20220502224942.995427-1-jsavitz@redhat.com Signed-off-by: Joel Savitz Cc: Shuah Khan Cc: Nico Pache Signed-off-by: Andrew Morton --- tools/testing/selftests/vm/gup_test.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'tools') diff --git a/tools/testing/selftests/vm/gup_test.c b/tools/testing/selftests/vm/gup_test.c index 593262555e18..6bb36ca71cb5 100644 --- a/tools/testing/selftests/vm/gup_test.c +++ b/tools/testing/selftests/vm/gup_test.c @@ -21,6 +21,8 @@ #define FOLL_WRITE 0x01 /* check pte is writable */ #define FOLL_TOUCH 0x02 /* mark page accessed */ +#define GUP_TEST_FILE "/sys/kernel/debug/gup_test" + static unsigned long cmd = GUP_FAST_BENCHMARK; static int gup_fd, repeats = 1; static unsigned long size = 128 * MB; -- cgit v1.2.3 From f893abbd6997f9a95815acfb84aa865f0c996373 Mon Sep 17 00:00:00 2001 From: SeongJae Park Date: Mon, 9 May 2022 18:20:51 -0700 Subject: selftets/damon/sysfs: test existence and permission of avail_operations This commit adds a selftest test case for ensuring the existence and the permission (read-only) of the 'avail_oprations' DAMON sysfs file. Link: https://lkml.kernel.org/r/20220426203843.45238-4-sj@kernel.org Signed-off-by: SeongJae Park Signed-off-by: Andrew Morton --- tools/testing/selftests/damon/sysfs.sh | 1 + 1 file changed, 1 insertion(+) (limited to 'tools') diff --git a/tools/testing/selftests/damon/sysfs.sh b/tools/testing/selftests/damon/sysfs.sh index 2e3ae77cb6db..89592c64462f 100644 --- a/tools/testing/selftests/damon/sysfs.sh +++ b/tools/testing/selftests/damon/sysfs.sh @@ -231,6 +231,7 @@ test_context() { context_dir=$1 ensure_dir "$context_dir" "exist" + ensure_file "$context_dir/avail_operations" "exit" 400 ensure_file "$context_dir/operations" "exist" 600 test_monitoring_attrs "$context_dir/monitoring_attrs" test_targets "$context_dir/targets" -- cgit v1.2.3 From 9994715333515e82865e533250e488496b9742f4 Mon Sep 17 00:00:00 2001 From: Niels Dossche Date: Mon, 9 May 2022 18:20:54 -0700 Subject: selftest/vm: test that mremap fails on non-existent vma Add a regression test that validates that mremap fails for vma's that don't exist. Link: https://lkml.kernel.org/r/20220427224439.23828-3-dossche.niels@gmail.com Signed-off-by: Niels Dossche Cc: Mina Almasry Cc: Mike Kravetz Signed-off-by: Andrew Morton --- tools/testing/selftests/vm/hugepage-mremap.c | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'tools') diff --git a/tools/testing/selftests/vm/hugepage-mremap.c b/tools/testing/selftests/vm/hugepage-mremap.c index 1d689084a54b..585978f181ed 100644 --- a/tools/testing/selftests/vm/hugepage-mremap.c +++ b/tools/testing/selftests/vm/hugepage-mremap.c @@ -178,6 +178,12 @@ int main(int argc, char *argv[]) munmap(addr, length); + addr = mremap(addr, length, length, 0); + if (addr != MAP_FAILED) { + printf("mremap: Expected failure, but call succeeded\n"); + exit(1); + } + close(fd); unlink(argv[argc-1]); -- cgit v1.2.3 From c0eeeb02d9df878c71a457008900b650d94bd0d9 Mon Sep 17 00:00:00 2001 From: Peter Xu Date: Thu, 12 May 2022 20:22:56 -0700 Subject: selftests/uffd: enable uffd-wp for shmem/hugetlbfs After we added support for shmem and hugetlbfs, we can turn uffd-wp test on always now. Link: https://lkml.kernel.org/r/20220405014932.15212-1-peterx@redhat.com Signed-off-by: Peter Xu Cc: Alistair Popple Cc: Andrea Arcangeli Cc: Axel Rasmussen Cc: David Hildenbrand Cc: Hugh Dickins Cc: Jerome Glisse Cc: "Kirill A . Shutemov" Cc: Matthew Wilcox Cc: Mike Kravetz Cc: Mike Rapoport Cc: Nadav Amit Signed-off-by: Andrew Morton --- tools/testing/selftests/vm/userfaultfd.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/vm/userfaultfd.c b/tools/testing/selftests/vm/userfaultfd.c index 92a4516f8f0d..bbc4a6d8cf7b 100644 --- a/tools/testing/selftests/vm/userfaultfd.c +++ b/tools/testing/selftests/vm/userfaultfd.c @@ -82,7 +82,7 @@ static int test_type; static volatile bool test_uffdio_copy_eexist = true; static volatile bool test_uffdio_zeropage_eexist = true; /* Whether to test uffd write-protection */ -static bool test_uffdio_wp = false; +static bool test_uffdio_wp = true; /* Whether to test uffd minor faults */ static bool test_uffdio_minor = false; @@ -1594,8 +1594,6 @@ static void set_test_type(const char *type) if (!strcmp(type, "anon")) { test_type = TEST_ANON; uffd_test_ops = &anon_uffd_test_ops; - /* Only enable write-protect test for anonymous test */ - test_uffdio_wp = true; } else if (!strcmp(type, "hugetlb")) { test_type = TEST_HUGETLB; uffd_test_ops = &hugetlb_uffd_test_ops; -- cgit v1.2.3 From 1bf0831383c6b372ff870d061ee62156635035c2 Mon Sep 17 00:00:00 2001 From: Guo Zhengkui Date: Thu, 12 May 2022 20:22:56 -0700 Subject: userfaultfd/selftests: use swap() instead of open coding it Address the following coccicheck warning: tools/testing/selftests/vm/userfaultfd.c:1536:21-22: WARNING opportunity for swap(). tools/testing/selftests/vm/userfaultfd.c:1540:33-34: WARNING opportunity for swap(). by using swap() for the swapping of variable values and drop `tmp_area` that is not needed any more. `swap()` macro in userfaultfd.c is introduced in commit 681696862bc18 ("selftests: vm: remove dependecy from internal kernel macros") It has been tested with gcc (Debian 8.3.0-6) 8.3.0. Link: https://lkml.kernel.org/r/20220407123141.4998-1-guozhengkui@vivo.com Signed-off-by: Guo Zhengkui Reviewed-by: Muchun Song Reviewed-by: Shuah Khan Signed-off-by: Andrew Morton --- tools/testing/selftests/vm/userfaultfd.c | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/vm/userfaultfd.c b/tools/testing/selftests/vm/userfaultfd.c index bbc4a6d8cf7b..0bdfc1955229 100644 --- a/tools/testing/selftests/vm/userfaultfd.c +++ b/tools/testing/selftests/vm/userfaultfd.c @@ -1422,7 +1422,6 @@ static void userfaultfd_pagemap_test(unsigned int test_pgsize) static int userfaultfd_stress(void) { void *area; - char *tmp_area; unsigned long nr; struct uffdio_register uffdio_register; struct uffd_stats uffd_stats[nr_cpus]; @@ -1533,13 +1532,9 @@ static int userfaultfd_stress(void) count_verify[nr], nr); /* prepare next bounce */ - tmp_area = area_src; - area_src = area_dst; - area_dst = tmp_area; + swap(area_src, area_dst); - tmp_area = area_src_alias; - area_src_alias = area_dst_alias; - area_dst_alias = tmp_area; + swap(area_src_alias, area_dst_alias); uffd_stats_report(uffd_stats, nr_cpus); } -- cgit v1.2.3 From f0cdaa5687d3178f3759033a7ff8411720b61647 Mon Sep 17 00:00:00 2001 From: David Vernet Date: Thu, 12 May 2022 20:22:56 -0700 Subject: cgroups: refactor children cgroups in memcg tests Patch series "Fix bugs in memcontroller cgroup tests", v2. tools/testing/selftests/cgroup/test_memcontrol.c contains a set of testcases which validate expected behavior of the cgroup memory controller. Roman Gushchin recently sent out a patchset that fixed a few issues in the test. This patchset continues that effort by fixing a few more issues that were causing non-deterministic failures in the suite. With this patchset, I'm unable to reproduce any more errors after running the tests in a continuous loop for many iterations. Before, I was able to reproduce at least one of the errors fixed in this patchset with just one or two runs. This patch (of 5): In test_memcg_min() and test_memcg_low(), there is an array of four sibling cgroups. All but one of these sibling groups does a 50MB allocation, and the group that does no allocation is the third of four in the array. This is not a problem per se, but makes it a bit tricky to do some assertions in test_memcg_low(), as we want to make assertions on the siblings based on whether or not they performed allocations. Having a static index before which all groups have performed an allocation makes this cleaner. This patch therefore reorders the sibling groups so that the group that performs no allocations is the last in the array. A follow-on patch will leverage this to fix a bug in the test that incorrectly asserts that a sibling group that had performed an allocation, but only had protection from its parent, will not observe any memory.events.low events during reclaim. Link: https://lkml.kernel.org/r/20220423155619.3669555-1-void@manifault.com Link: https://lkml.kernel.org/r/20220423155619.3669555-2-void@manifault.com Signed-off-by: David Vernet Acked-by: Roman Gushchin Cc: Tejun Heo Cc: Johannes Weiner Cc: Michal Hocko Cc: Shakeel Butt Signed-off-by: Andrew Morton --- tools/testing/selftests/cgroup/test_memcontrol.c | 28 ++++++++++++------------ 1 file changed, 14 insertions(+), 14 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/cgroup/test_memcontrol.c b/tools/testing/selftests/cgroup/test_memcontrol.c index ee7defb17280..d240a391f99e 100644 --- a/tools/testing/selftests/cgroup/test_memcontrol.c +++ b/tools/testing/selftests/cgroup/test_memcontrol.c @@ -248,8 +248,8 @@ static int cg_test_proc_killed(const char *cgroup) * A/B memory.min = 50M, memory.current = 50M * A/B/C memory.min = 75M, memory.current = 50M * A/B/D memory.min = 25M, memory.current = 50M - * A/B/E memory.min = 500M, memory.current = 0 - * A/B/F memory.min = 0, memory.current = 50M + * A/B/E memory.min = 0, memory.current = 50M + * A/B/F memory.min = 500M, memory.current = 0 * * Usages are pagecache, but the test keeps a running * process in every leaf cgroup. @@ -259,7 +259,7 @@ static int cg_test_proc_killed(const char *cgroup) * A/B memory.current ~= 50M * A/B/C memory.current ~= 33M * A/B/D memory.current ~= 17M - * A/B/E memory.current ~= 0 + * A/B/F memory.current ~= 0 * * After that it tries to allocate more than there is * unprotected memory in A available, and checks @@ -325,7 +325,7 @@ static int test_memcg_min(const char *root) if (cg_create(children[i])) goto cleanup; - if (i == 2) + if (i > 2) continue; cg_run_nowait(children[i], alloc_pagecache_50M_noexit, @@ -340,9 +340,9 @@ static int test_memcg_min(const char *root) goto cleanup; if (cg_write(children[1], "memory.min", "25M")) goto cleanup; - if (cg_write(children[2], "memory.min", "500M")) + if (cg_write(children[2], "memory.min", "0")) goto cleanup; - if (cg_write(children[3], "memory.min", "0")) + if (cg_write(children[3], "memory.min", "500M")) goto cleanup; attempts = 0; @@ -368,7 +368,7 @@ static int test_memcg_min(const char *root) if (!values_close(c[1], MB(17), 10)) goto cleanup; - if (!values_close(c[2], 0, 1)) + if (c[3] != 0) goto cleanup; if (!cg_run(parent[2], alloc_anon, (void *)MB(170))) @@ -405,8 +405,8 @@ cleanup: * A/B memory.low = 50M, memory.current = 50M * A/B/C memory.low = 75M, memory.current = 50M * A/B/D memory.low = 25M, memory.current = 50M - * A/B/E memory.low = 500M, memory.current = 0 - * A/B/F memory.low = 0, memory.current = 50M + * A/B/E memory.low = 0, memory.current = 50M + * A/B/F memory.low = 500M, memory.current = 0 * * Usages are pagecache. * Then it creates A/G an creates a significant @@ -416,7 +416,7 @@ cleanup: * A/B memory.current ~= 50M * A/B/ memory.current ~= 33M * A/B/D memory.current ~= 17M - * A/B/E memory.current ~= 0 + * A/B/F memory.current ~= 0 * * After that it tries to allocate more than there is * unprotected memory in A available, @@ -480,7 +480,7 @@ static int test_memcg_low(const char *root) if (cg_create(children[i])) goto cleanup; - if (i == 2) + if (i > 2) continue; if (cg_run(children[i], alloc_pagecache_50M, (void *)(long)fd)) @@ -495,9 +495,9 @@ static int test_memcg_low(const char *root) goto cleanup; if (cg_write(children[1], "memory.low", "25M")) goto cleanup; - if (cg_write(children[2], "memory.low", "500M")) + if (cg_write(children[2], "memory.low", "0")) goto cleanup; - if (cg_write(children[3], "memory.low", "0")) + if (cg_write(children[3], "memory.low", "500M")) goto cleanup; if (cg_run(parent[2], alloc_anon, (void *)MB(148))) @@ -515,7 +515,7 @@ static int test_memcg_low(const char *root) if (!values_close(c[1], MB(17), 10)) goto cleanup; - if (!values_close(c[2], 0, 1)) + if (c[3] != 0) goto cleanup; if (cg_run(parent[2], alloc_anon, (void *)MB(166))) { -- cgit v1.2.3 From cdc69458a5f3d4cf31372efd45fe92cec6b167e4 Mon Sep 17 00:00:00 2001 From: David Vernet Date: Thu, 12 May 2022 20:22:57 -0700 Subject: cgroup: account for memory_recursiveprot in test_memcg_low() The test_memcg_low() testcase in test_memcontrol.c verifies the expected behavior of groups using the memory.low knob. Part of the testcase verifies that a group with memory.low that experiences reclaim due to memory pressure elsewhere in the system, observes memory.events.low events as a result of that reclaim. In commit 8a931f801340 ("mm: memcontrol: recursive memory.low protection"), the memory controller was updated to propagate memory.low and memory.min protection from a parent group to its children via a configurable memory_recursiveprot mount option. This unfortunately broke the memcg tests, which asserts that a sibling that experienced reclaim but had a memory.low value of 0, would not observe any memory.low events. This patch updates test_memcg_low() to account for the new behavior introduced by memory_recursiveprot. So as to make the test resilient to multiple configurations, the patch also adds a new proc_mount_contains() helper that checks for a string in /proc/mounts, and is used to toggle behavior based on whether the default memory_recursiveprot was present. Link: https://lkml.kernel.org/r/20220423155619.3669555-3-void@manifault.com Signed-off-by: David Vernet Acked-by: Roman Gushchin Cc: Johannes Weiner Cc: Michal Hocko Cc: Shakeel Butt Cc: Tejun Heo Signed-off-by: Andrew Morton --- tools/testing/selftests/cgroup/cgroup_util.c | 12 ++++++++++++ tools/testing/selftests/cgroup/cgroup_util.h | 1 + tools/testing/selftests/cgroup/test_memcontrol.c | 16 +++++++++++++--- 3 files changed, 26 insertions(+), 3 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/cgroup/cgroup_util.c b/tools/testing/selftests/cgroup/cgroup_util.c index e6f3679cdcc0..b4d7027a44c3 100644 --- a/tools/testing/selftests/cgroup/cgroup_util.c +++ b/tools/testing/selftests/cgroup/cgroup_util.c @@ -528,6 +528,18 @@ int set_oom_adj_score(int pid, int score) return 0; } +int proc_mount_contains(const char *option) +{ + char buf[4 * PAGE_SIZE]; + ssize_t read; + + read = read_text("/proc/mounts", buf, sizeof(buf)); + if (read < 0) + return read; + + return strstr(buf, option) != NULL; +} + ssize_t proc_read_text(int pid, bool thread, const char *item, char *buf, size_t size) { char path[PATH_MAX]; diff --git a/tools/testing/selftests/cgroup/cgroup_util.h b/tools/testing/selftests/cgroup/cgroup_util.h index 628738532ac9..756f76052b44 100644 --- a/tools/testing/selftests/cgroup/cgroup_util.h +++ b/tools/testing/selftests/cgroup/cgroup_util.h @@ -48,6 +48,7 @@ extern int is_swap_enabled(void); extern int set_oom_adj_score(int pid, int score); extern int cg_wait_for_proc_count(const char *cgroup, int count); extern int cg_killall(const char *cgroup); +int proc_mount_contains(const char *option); extern ssize_t proc_read_text(int pid, bool thread, const char *item, char *buf, size_t size); extern int proc_read_strstr(int pid, bool thread, const char *item, const char *needle); extern pid_t clone_into_cgroup(int cgroup_fd); diff --git a/tools/testing/selftests/cgroup/test_memcontrol.c b/tools/testing/selftests/cgroup/test_memcontrol.c index d240a391f99e..4da138d05acb 100644 --- a/tools/testing/selftests/cgroup/test_memcontrol.c +++ b/tools/testing/selftests/cgroup/test_memcontrol.c @@ -21,6 +21,8 @@ #include "../kselftest.h" #include "cgroup_util.h" +static bool has_recursiveprot; + /* * This test creates two nested cgroups with and without enabling * the memory controller. @@ -525,15 +527,18 @@ static int test_memcg_low(const char *root) } for (i = 0; i < ARRAY_SIZE(children); i++) { + int no_low_events_index = has_recursiveprot ? 2 : 1; + oom = cg_read_key_long(children[i], "memory.events", "oom "); low = cg_read_key_long(children[i], "memory.events", "low "); if (oom) goto cleanup; - if (i < 2 && low <= 0) + if (i <= no_low_events_index && low <= 0) goto cleanup; - if (i >= 2 && low) + if (i > no_low_events_index && low) goto cleanup; + } ret = KSFT_PASS; @@ -1382,7 +1387,7 @@ struct memcg_test { int main(int argc, char **argv) { char root[PATH_MAX]; - int i, ret = EXIT_SUCCESS; + int i, proc_status, ret = EXIT_SUCCESS; if (cg_find_unified_root(root, sizeof(root))) ksft_exit_skip("cgroup v2 isn't mounted\n"); @@ -1398,6 +1403,11 @@ int main(int argc, char **argv) if (cg_write(root, "cgroup.subtree_control", "+memory")) ksft_exit_skip("Failed to set memory controller\n"); + proc_status = proc_mount_contains("memory_recursiveprot"); + if (proc_status < 0) + ksft_exit_skip("Failed to query cgroup mount option\n"); + has_recursiveprot = proc_status; + for (i = 0; i < ARRAY_SIZE(tests); i++) { switch (tests[i].fn(root)) { case KSFT_PASS: -- cgit v1.2.3 From 72b1e03aa7255094d15752952a7e56c5f39b6e37 Mon Sep 17 00:00:00 2001 From: David Vernet Date: Thu, 12 May 2022 20:22:57 -0700 Subject: cgroup: account for memory_localevents in test_memcg_oom_group_leaf_events() The test_memcg_oom_group_leaf_events() testcase in the cgroup memcg tests validates that processes in a group that perform allocations exceeding memory.oom.group are killed. It also validates that the memory.events.oom_kill events are properly propagated in this case. Commit 06e11c907ea4 ("kselftests: memcg: update the oom group leaf events test") fixed test_memcg_oom_group_leaf_events() to account for the fact that the memory.events.oom_kill events in a child cgroup is propagated up to its parent. This behavior can actually be configured by the memory_localevents mount option, so this patch updates the testcase to properly account for the possible presence of this mount option. Link: https://lkml.kernel.org/r/20220423155619.3669555-4-void@manifault.com Signed-off-by: David Vernet Acked-by: Roman Gushchin Cc: Johannes Weiner Cc: Michal Hocko Cc: Shakeel Butt Cc: Tejun Heo Signed-off-by: Andrew Morton --- tools/testing/selftests/cgroup/test_memcontrol.c | 22 +++++++++++++++++----- 1 file changed, 17 insertions(+), 5 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/cgroup/test_memcontrol.c b/tools/testing/selftests/cgroup/test_memcontrol.c index 4da138d05acb..31d5c3f9b2b2 100644 --- a/tools/testing/selftests/cgroup/test_memcontrol.c +++ b/tools/testing/selftests/cgroup/test_memcontrol.c @@ -21,6 +21,7 @@ #include "../kselftest.h" #include "cgroup_util.h" +static bool has_localevents; static bool has_recursiveprot; /* @@ -1200,6 +1201,7 @@ static int test_memcg_oom_group_leaf_events(const char *root) { int ret = KSFT_FAIL; char *parent, *child; + long parent_oom_events; parent = cg_name(root, "memcg_test_0"); child = cg_name(root, "memcg_test_0/memcg_test_1"); @@ -1345,14 +1347,20 @@ static int test_memcg_oom_group_score_events(const char *root) if (!cg_run(memcg, alloc_anon, (void *)MB(100))) goto cleanup; - if (cg_read_key_long(memcg, "memory.events", "oom_kill ") != 3) - goto cleanup; + parent_oom_events = cg_read_key_long( + parent, "memory.events", "oom_kill "); + /* + * If memory_localevents is not enabled (the default), the parent should + * count OOM events in its children groups. Otherwise, it should not + * have observed any events. + */ + if ((has_localevents && parent_oom_events == 0) || + parent_oom_events > 0) + ret = KSFT_PASS; if (kill(safe_pid, SIGKILL)) goto cleanup; - ret = KSFT_PASS; - cleanup: if (memcg) cg_destroy(memcg); @@ -1361,7 +1369,6 @@ cleanup: return ret; } - #define T(x) { x, #x } struct memcg_test { int (*fn)(const char *root); @@ -1408,6 +1415,11 @@ int main(int argc, char **argv) ksft_exit_skip("Failed to query cgroup mount option\n"); has_recursiveprot = proc_status; + proc_status = proc_mount_contains("memory_localevents"); + if (proc_status < 0) + ksft_exit_skip("Failed to query cgroup mount option\n"); + has_localevents = proc_status; + for (i = 0; i < ARRAY_SIZE(tests); i++) { switch (tests[i].fn(root)) { case KSFT_PASS: -- cgit v1.2.3 From 830316807e0275146cbd5d2ae66fd338d0dfd09e Mon Sep 17 00:00:00 2001 From: David Vernet Date: Thu, 12 May 2022 20:22:57 -0700 Subject: cgroup: remove racy check in test_memcg_sock() test_memcg_sock() in the cgroup memcg tests, verifies expected memory accounting for sockets. The test forks a process which functions as a TCP server, and sends large buffers back and forth between itself (as the TCP client) and the forked TCP server. While doing so, it verifies that memory.current and memory.stat.sock look correct. There is currently a check in tcp_client() which asserts memory.current >= memory.stat.sock. This check is racy, as between memory.current and memory.stat.sock being queried, a packet could come in which causes mem_cgroup_charge_skmem() to be invoked. This could cause memory.stat.sock to exceed memory.current. Reversing the order of querying doesn't address the problem either, as memory may be reclaimed between the two calls. Instead, this patch just removes that assertion altogether, and instead relies on the values_close() check that follows to validate the expected accounting. Link: https://lkml.kernel.org/r/20220423155619.3669555-5-void@manifault.com Signed-off-by: David Vernet Acked-by: Roman Gushchin Cc: Johannes Weiner Cc: Michal Hocko Cc: Shakeel Butt Cc: Tejun Heo Signed-off-by: Andrew Morton --- tools/testing/selftests/cgroup/test_memcontrol.c | 3 --- 1 file changed, 3 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/cgroup/test_memcontrol.c b/tools/testing/selftests/cgroup/test_memcontrol.c index 31d5c3f9b2b2..6d3aace6be53 100644 --- a/tools/testing/selftests/cgroup/test_memcontrol.c +++ b/tools/testing/selftests/cgroup/test_memcontrol.c @@ -1102,9 +1102,6 @@ static int tcp_client(const char *cgroup, unsigned short port) if (current < 0 || sock < 0) goto close_sk; - if (current < sock) - goto close_sk; - if (values_close(current, sock, 10)) { ret = KSFT_PASS; break; -- cgit v1.2.3 From c1a31a2f7a9c08665f95a16c40b3551af43cb95c Mon Sep 17 00:00:00 2001 From: David Vernet Date: Thu, 12 May 2022 20:22:57 -0700 Subject: cgroup: fix racy check in alloc_pagecache_max_30M() helper function alloc_pagecache_max_30M() in the cgroup memcg tests performs a 50MB pagecache allocation, which it expects to be capped at 30MB due to the calling process having a memory.high setting of 30MB. After the allocation, the function contains a check that verifies that MB(29) < memory.current <= MB(30). This check can actually fail non-deterministically. The testcases that use this function are test_memcg_high() and test_memcg_max(), which set memory.min and memory.max to 30MB respectively for the cgroup under test. The allocation can slightly exceed this number in both cases, and for memory.max, the process performing the allocation will not have the OOM killer invoked as it's performing a pagecache allocation. This patchset therefore updates the above check to instead use the verify_close() helper function. Link: https://lkml.kernel.org/r/20220423155619.3669555-6-void@manifault.com Signed-off-by: David Vernet Acked-by: Roman Gushchin Cc: Johannes Weiner Cc: Michal Hocko Cc: Shakeel Butt Cc: Tejun Heo Signed-off-by: Andrew Morton --- tools/testing/selftests/cgroup/test_memcontrol.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/cgroup/test_memcontrol.c b/tools/testing/selftests/cgroup/test_memcontrol.c index 6d3aace6be53..6ab94317c87b 100644 --- a/tools/testing/selftests/cgroup/test_memcontrol.c +++ b/tools/testing/selftests/cgroup/test_memcontrol.c @@ -568,9 +568,14 @@ static int alloc_pagecache_max_30M(const char *cgroup, void *arg) { size_t size = MB(50); int ret = -1; - long current; + long current, high, max; int fd; + high = cg_read_long(cgroup, "memory.high"); + max = cg_read_long(cgroup, "memory.max"); + if (high != MB(30) && max != MB(30)) + goto cleanup; + fd = get_temp_fd(); if (fd < 0) return -1; @@ -579,7 +584,7 @@ static int alloc_pagecache_max_30M(const char *cgroup, void *arg) goto cleanup; current = cg_read_long(cgroup, "memory.current"); - if (current <= MB(29) || current > MB(30)) + if (!values_close(current, MB(30), 5)) goto cleanup; ret = 0; -- cgit v1.2.3 From 7fb6378701dc0d8f19c1ac4623b55f5125f0e286 Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Sun, 22 May 2022 16:18:51 +0200 Subject: cgroup: fix an error handling path in alloc_pagecache_max_30M() If the first goto is taken, 'fd' is not opened yet (and is un-initialized). So a direct return is safer. Link: https://lkml.kernel.org/r/628312312eb40e0e39463a2c06415fde5295c716.1653229120.git.christophe.jaillet@wanadoo.fr Fixes: c1a31a2f7a9c ("cgroup: fix racy check in alloc_pagecache_max_30M() helper function") Signed-off-by: Christophe JAILLET Reviewed-by: Andrew Morton Cc: Dan Carpenter Cc: Johannes Weiner Cc: Michal Hocko Cc: Roman Gushchin Cc: Shakeel Butt Cc: Muchun Song Cc: Tejun Heo Cc: Zefan Li Cc: Shuah Khan Cc: David Vernet Signed-off-by: Andrew Morton --- tools/testing/selftests/cgroup/test_memcontrol.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/testing/selftests/cgroup/test_memcontrol.c b/tools/testing/selftests/cgroup/test_memcontrol.c index 6ab94317c87b..44a974ec472c 100644 --- a/tools/testing/selftests/cgroup/test_memcontrol.c +++ b/tools/testing/selftests/cgroup/test_memcontrol.c @@ -574,7 +574,7 @@ static int alloc_pagecache_max_30M(const char *cgroup, void *arg) high = cg_read_long(cgroup, "memory.high"); max = cg_read_long(cgroup, "memory.max"); if (high != MB(30) && max != MB(30)) - goto cleanup; + return -1; fd = get_temp_fd(); if (fd < 0) -- cgit v1.2.3 From 33776141b81296e604a39a8065b28b89c61c7f74 Mon Sep 17 00:00:00 2001 From: Suren Baghdasaryan Date: Wed, 18 May 2022 13:43:16 -0700 Subject: selftests: vm: add process_mrelease tests Introduce process_mrelease syscall sanity tests which include tests which expect to fail: - process_mrelease with invalid pidfd and flags inputs - process_mrelease on a live process with no pending signals and valid process_mrelease usage which is expected to succeed. Because process_mrelease has to be used against a process with a pending SIGKILL, it's possible that the process exits before process_mrelease gets called. In such cases we retry the test with a victim that allocates twice more memory up to 1GB. This would require the victim process to spend more time during exit and process_mrelease has a better chance of catching the process before it exits and succeeding. On success the test reports the amount of memory the child had to allocate for reaping to succeed. Sample output: $ mrelease_test Success reaping a child with 1MB of memory allocations On failure the test reports the failure. Sample outputs: $ mrelease_test All process_mrelease attempts failed! $ mrelease_test process_mrelease: Invalid argument Link: https://lkml.kernel.org/r/20220518204316.13131-1-surenb@google.com Signed-off-by: Suren Baghdasaryan Reviewed-by: Shuah Khan Acked-by: Christian Brauner (Microsoft) Reviewed-by: Muhammad Usama Anjum Cc: Michal Hocko Cc: David Rientjes Cc: Matthew Wilcox (Oracle) Cc: Johannes Weiner Cc: Roman Gushchin Cc: Minchan Kim Cc: "Kirill A . Shutemov" Cc: Andrea Arcangeli Cc: Christoph Hellwig Cc: Oleg Nesterov Cc: David Hildenbrand Cc: Jann Horn Cc: Shakeel Butt Cc: Peter Xu Cc: John Hubbard Signed-off-by: Andrew Morton --- tools/testing/selftests/vm/.gitignore | 1 + tools/testing/selftests/vm/Makefile | 1 + tools/testing/selftests/vm/mrelease_test.c | 200 +++++++++++++++++++++++++++++ tools/testing/selftests/vm/run_vmtests.sh | 2 + 4 files changed, 204 insertions(+) create mode 100644 tools/testing/selftests/vm/mrelease_test.c (limited to 'tools') diff --git a/tools/testing/selftests/vm/.gitignore b/tools/testing/selftests/vm/.gitignore index 3cb4fa771ec2..6c2ac4208c27 100644 --- a/tools/testing/selftests/vm/.gitignore +++ b/tools/testing/selftests/vm/.gitignore @@ -10,6 +10,7 @@ map_populate thuge-gen compaction_test mlock2-tests +mrelease_test mremap_dontunmap mremap_test on-fault-limit diff --git a/tools/testing/selftests/vm/Makefile b/tools/testing/selftests/vm/Makefile index f1228370e99b..8111a33e4824 100644 --- a/tools/testing/selftests/vm/Makefile +++ b/tools/testing/selftests/vm/Makefile @@ -44,6 +44,7 @@ TEST_GEN_FILES += memfd_secret TEST_GEN_FILES += migration TEST_GEN_FILES += mlock-random-test TEST_GEN_FILES += mlock2-tests +TEST_GEN_FILES += mrelease_test TEST_GEN_FILES += mremap_dontunmap TEST_GEN_FILES += mremap_test TEST_GEN_FILES += on-fault-limit diff --git a/tools/testing/selftests/vm/mrelease_test.c b/tools/testing/selftests/vm/mrelease_test.c new file mode 100644 index 000000000000..96671c2f7d48 --- /dev/null +++ b/tools/testing/selftests/vm/mrelease_test.c @@ -0,0 +1,200 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright 2022 Google LLC + */ +#define _GNU_SOURCE +#include +#include +#include +#include +#include +#include + +#include "util.h" + +#include "../kselftest.h" + +#ifndef __NR_pidfd_open +#define __NR_pidfd_open -1 +#endif + +#ifndef __NR_process_mrelease +#define __NR_process_mrelease -1 +#endif + +#define MB(x) (x << 20) +#define MAX_SIZE_MB 1024 + +static int alloc_noexit(unsigned long nr_pages, int pipefd) +{ + int ppid = getppid(); + int timeout = 10; /* 10sec timeout to get killed */ + unsigned long i; + char *buf; + + buf = (char *)mmap(NULL, nr_pages * PAGE_SIZE, PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANON, 0, 0); + if (buf == MAP_FAILED) { + perror("mmap failed, halting the test"); + return KSFT_FAIL; + } + + for (i = 0; i < nr_pages; i++) + *((unsigned long *)(buf + (i * PAGE_SIZE))) = i; + + /* Signal the parent that the child is ready */ + if (write(pipefd, "", 1) < 0) { + perror("write"); + return KSFT_FAIL; + } + + /* Wait to be killed (when reparenting happens) */ + while (getppid() == ppid && timeout > 0) { + sleep(1); + timeout--; + } + + munmap(buf, nr_pages * PAGE_SIZE); + + return (timeout > 0) ? KSFT_PASS : KSFT_FAIL; +} + +/* The process_mrelease calls in this test are expected to fail */ +static void run_negative_tests(int pidfd) +{ + /* Test invalid flags. Expect to fail with EINVAL error code. */ + if (!syscall(__NR_process_mrelease, pidfd, (unsigned int)-1) || + errno != EINVAL) { + perror("process_mrelease with wrong flags"); + exit(errno == ENOSYS ? KSFT_SKIP : KSFT_FAIL); + } + /* + * Test reaping while process is alive with no pending SIGKILL. + * Expect to fail with EINVAL error code. + */ + if (!syscall(__NR_process_mrelease, pidfd, 0) || errno != EINVAL) { + perror("process_mrelease on a live process"); + exit(errno == ENOSYS ? KSFT_SKIP : KSFT_FAIL); + } +} + +static int child_main(int pipefd[], size_t size) +{ + int res; + + /* Allocate and fault-in memory and wait to be killed */ + close(pipefd[0]); + res = alloc_noexit(MB(size) / PAGE_SIZE, pipefd[1]); + close(pipefd[1]); + return res; +} + +int main(void) +{ + int pipefd[2], pidfd; + bool success, retry; + size_t size; + pid_t pid; + char byte; + int res; + + /* Test a wrong pidfd */ + if (!syscall(__NR_process_mrelease, -1, 0) || errno != EBADF) { + perror("process_mrelease with wrong pidfd"); + exit(errno == ENOSYS ? KSFT_SKIP : KSFT_FAIL); + } + + /* Start the test with 1MB child memory allocation */ + size = 1; +retry: + /* + * Pipe for the child to signal when it's done allocating + * memory + */ + if (pipe(pipefd)) { + perror("pipe"); + exit(KSFT_FAIL); + } + pid = fork(); + if (pid < 0) { + perror("fork"); + close(pipefd[0]); + close(pipefd[1]); + exit(KSFT_FAIL); + } + + if (pid == 0) { + /* Child main routine */ + res = child_main(pipefd, size); + exit(res); + } + + /* + * Parent main routine: + * Wait for the child to finish allocations, then kill and reap + */ + close(pipefd[1]); + /* Block until the child is ready */ + res = read(pipefd[0], &byte, 1); + close(pipefd[0]); + if (res < 0) { + perror("read"); + if (!kill(pid, SIGKILL)) + waitpid(pid, NULL, 0); + exit(KSFT_FAIL); + } + + pidfd = syscall(__NR_pidfd_open, pid, 0); + if (pidfd < 0) { + perror("pidfd_open"); + if (!kill(pid, SIGKILL)) + waitpid(pid, NULL, 0); + exit(KSFT_FAIL); + } + + /* Run negative tests which require a live child */ + run_negative_tests(pidfd); + + if (kill(pid, SIGKILL)) { + perror("kill"); + exit(errno == ENOSYS ? KSFT_SKIP : KSFT_FAIL); + } + + success = (syscall(__NR_process_mrelease, pidfd, 0) == 0); + if (!success) { + /* + * If we failed to reap because the child exited too soon, + * before we could call process_mrelease. Double child's memory + * which causes it to spend more time on cleanup and increases + * our chances of reaping its memory before it exits. + * Retry until we succeed or reach MAX_SIZE_MB. + */ + if (errno == ESRCH) { + retry = (size <= MAX_SIZE_MB); + } else { + perror("process_mrelease"); + waitpid(pid, NULL, 0); + exit(errno == ENOSYS ? KSFT_SKIP : KSFT_FAIL); + } + } + + /* Cleanup to prevent zombies */ + if (waitpid(pid, NULL, 0) < 0) { + perror("waitpid"); + exit(KSFT_FAIL); + } + close(pidfd); + + if (!success) { + if (retry) { + size *= 2; + goto retry; + } + printf("All process_mrelease attempts failed!\n"); + exit(KSFT_FAIL); + } + + printf("Success reaping a child with %zuMB of memory allocations\n", + size); + return KSFT_PASS; +} diff --git a/tools/testing/selftests/vm/run_vmtests.sh b/tools/testing/selftests/vm/run_vmtests.sh index a2302b5faaf2..41fce8bea929 100755 --- a/tools/testing/selftests/vm/run_vmtests.sh +++ b/tools/testing/selftests/vm/run_vmtests.sh @@ -141,6 +141,8 @@ run_test ./mlock-random-test run_test ./mlock2-tests +run_test ./mrelease_test + run_test ./mremap_test run_test ./thuge-gen -- cgit v1.2.3 From 75c96ccea2e1de1342996722ee505d2cadedc0dd Mon Sep 17 00:00:00 2001 From: Julia Lawall Date: Sat, 21 May 2022 13:11:30 +0200 Subject: selftests/vm/pkeys: fix typo in comment Spelling mistake (triple letters) in comment. Detected with the help of Coccinelle. Link: https://lkml.kernel.org/r/20220521111145.81697-80-Julia.Lawall@inria.fr Signed-off-by: Julia Lawall Reviewed-by: Muchun Song Signed-off-by: Andrew Morton --- tools/testing/selftests/vm/protection_keys.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/testing/selftests/vm/protection_keys.c b/tools/testing/selftests/vm/protection_keys.c index 2d0ae88665db..291bc1e07842 100644 --- a/tools/testing/selftests/vm/protection_keys.c +++ b/tools/testing/selftests/vm/protection_keys.c @@ -1523,7 +1523,7 @@ void test_implicit_mprotect_exec_only_memory(int *ptr, u16 pkey) /* * Reset the shadow, assuming that the above mprotect() * correctly changed PKRU, but to an unknown value since - * the actual alllocated pkey is unknown. + * the actual allocated pkey is unknown. */ shadow_pkey_reg = __read_pkey_reg(); -- cgit v1.2.3 From 3d3921ed271b0e23d60c91fcad089f2f5e71af98 Mon Sep 17 00:00:00 2001 From: Muhammad Usama Anjum Date: Sat, 21 May 2022 14:43:13 +0500 Subject: selftests: vm: add migration to the .gitignore Add newly added migration test object to .gitignore file. Link: https://lkml.kernel.org/r/20220521094313.166505-1-usama.anjum@collabora.com Fixes: 0c2d08728470 ("mm: add selftests for migration entries") Signed-off-by: Muhammad Usama Anjum Reviewed-by: Alistair Popple Signed-off-by: Andrew Morton --- tools/testing/selftests/vm/.gitignore | 1 + 1 file changed, 1 insertion(+) (limited to 'tools') diff --git a/tools/testing/selftests/vm/.gitignore b/tools/testing/selftests/vm/.gitignore index 6c2ac4208c27..31e5eea2a9b9 100644 --- a/tools/testing/selftests/vm/.gitignore +++ b/tools/testing/selftests/vm/.gitignore @@ -9,6 +9,7 @@ map_hugetlb map_populate thuge-gen compaction_test +migration mlock2-tests mrelease_test mremap_dontunmap -- cgit v1.2.3 From 9aa1af954db02a3228763015356684a169503c68 Mon Sep 17 00:00:00 2001 From: Patrick Wang Date: Sat, 21 May 2022 16:38:23 +0800 Subject: selftests: vm: check numa_available() before operating "merge_across_nodes" in ksm_tests Patch series "selftests: vm: a few fixup patches". This series contains three fixup patches for vm selftests. They are independent. Please see the patches. This patch (of 3): Currently, ksm_tests operates "merge_across_nodes" with NUMA either enabled or disabled. In a system with NUMA disabled, these operations will fail and output a misleading report given "merge_across_nodes" does not exist in sysfs: ---------------------------- running ./ksm_tests -M -p 10 ---------------------------- f /sys/kernel/mm/ksm/merge_across_nodes fopen: No such file or directory Cannot save default tunables [FAIL] ---------------------- So check numa_available() before those operations to skip them if NUMA is disabled. Link: https://lkml.kernel.org/r/20220521083825.319654-1-patrick.wang.shcn@gmail.com Link: https://lkml.kernel.org/r/20220521083825.319654-2-patrick.wang.shcn@gmail.com Signed-off-by: Patrick Wang Cc: Shuah Khan Signed-off-by: Andrew Morton --- tools/testing/selftests/vm/ksm_tests.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/vm/ksm_tests.c b/tools/testing/selftests/vm/ksm_tests.c index fd85f15869d1..2fcf24312da8 100644 --- a/tools/testing/selftests/vm/ksm_tests.c +++ b/tools/testing/selftests/vm/ksm_tests.c @@ -221,7 +221,8 @@ static bool assert_ksm_pages_count(long dupl_page_count) static int ksm_save_def(struct ksm_sysfs *ksm_sysfs) { if (ksm_read_sysfs(KSM_FP("max_page_sharing"), &ksm_sysfs->max_page_sharing) || - ksm_read_sysfs(KSM_FP("merge_across_nodes"), &ksm_sysfs->merge_across_nodes) || + numa_available() ? 0 : + ksm_read_sysfs(KSM_FP("merge_across_nodes"), &ksm_sysfs->merge_across_nodes) || ksm_read_sysfs(KSM_FP("sleep_millisecs"), &ksm_sysfs->sleep_millisecs) || ksm_read_sysfs(KSM_FP("pages_to_scan"), &ksm_sysfs->pages_to_scan) || ksm_read_sysfs(KSM_FP("run"), &ksm_sysfs->run) || @@ -236,7 +237,8 @@ static int ksm_save_def(struct ksm_sysfs *ksm_sysfs) static int ksm_restore(struct ksm_sysfs *ksm_sysfs) { if (ksm_write_sysfs(KSM_FP("max_page_sharing"), ksm_sysfs->max_page_sharing) || - ksm_write_sysfs(KSM_FP("merge_across_nodes"), ksm_sysfs->merge_across_nodes) || + numa_available() ? 0 : + ksm_write_sysfs(KSM_FP("merge_across_nodes"), ksm_sysfs->merge_across_nodes) || ksm_write_sysfs(KSM_FP("pages_to_scan"), ksm_sysfs->pages_to_scan) || ksm_write_sysfs(KSM_FP("run"), ksm_sysfs->run) || ksm_write_sysfs(KSM_FP("sleep_millisecs"), ksm_sysfs->sleep_millisecs) || @@ -720,7 +722,8 @@ int main(int argc, char *argv[]) if (ksm_write_sysfs(KSM_FP("run"), 2) || ksm_write_sysfs(KSM_FP("sleep_millisecs"), 0) || - ksm_write_sysfs(KSM_FP("merge_across_nodes"), 1) || + numa_available() ? 0 : + ksm_write_sysfs(KSM_FP("merge_across_nodes"), 1) || ksm_write_sysfs(KSM_FP("pages_to_scan"), page_count)) return KSFT_FAIL; -- cgit v1.2.3 From ccd2a1201d267bf6f1950bf31cfd55fb4e17a231 Mon Sep 17 00:00:00 2001 From: Patrick Wang Date: Sat, 21 May 2022 16:38:24 +0800 Subject: selftests: vm: add "test_hmm.sh" to TEST_FILES The "test_hmm.sh" file used by run_vmtests.sh dose not be installed into INSTALL_PATH. Thus run_vmtests.sh can not call it in INSTALL_PATH: --------------------------- running ./test_hmm.sh smoke --------------------------- ./run_vmtests.sh: line 74: ./test_hmm.sh: No such file or directory [FAIL] ----------------------- Add "test_hmm.sh" to TEST_FILES so that it will be installed. Link: https://lkml.kernel.org/r/20220521083825.319654-3-patrick.wang.shcn@gmail.com Signed-off-by: Patrick Wang Cc: Shuah Khan Signed-off-by: Andrew Morton --- tools/testing/selftests/vm/Makefile | 1 + 1 file changed, 1 insertion(+) (limited to 'tools') diff --git a/tools/testing/selftests/vm/Makefile b/tools/testing/selftests/vm/Makefile index 8111a33e4824..064bfae6dd0d 100644 --- a/tools/testing/selftests/vm/Makefile +++ b/tools/testing/selftests/vm/Makefile @@ -92,6 +92,7 @@ endif TEST_PROGS := run_vmtests.sh TEST_FILES := test_vmalloc.sh +TEST_FILES += test_hmm.sh KSFT_KHDR_INSTALL := 1 include ../lib.mk -- cgit v1.2.3 From 0598739900071feff82b89f1515f963a6889b330 Mon Sep 17 00:00:00 2001 From: Patrick Wang Date: Sat, 21 May 2022 16:38:25 +0800 Subject: selftests: vm: add the "settings" file with timeout variable The default "timeout" for one kselftest is 45 seconds, while some cases in run_vmtests.sh require more time. This will cause testing timeout like: not ok 4 selftests: vm: run_vmtests.sh # TIMEOUT 45 seconds Therefore, add the "settings" file with timeout variable so users can set the "timeout" value. Link: https://lkml.kernel.org/r/20220521083825.319654-4-patrick.wang.shcn@gmail.com Signed-off-by: Patrick Wang Cc: Shuah Khan Signed-off-by: Andrew Morton --- tools/testing/selftests/vm/settings | 1 + 1 file changed, 1 insertion(+) create mode 100644 tools/testing/selftests/vm/settings (limited to 'tools') diff --git a/tools/testing/selftests/vm/settings b/tools/testing/selftests/vm/settings new file mode 100644 index 000000000000..9abfc60e9e6f --- /dev/null +++ b/tools/testing/selftests/vm/settings @@ -0,0 +1 @@ +timeout=45 -- cgit v1.2.3