diff options
Diffstat (limited to 'kernel')
-rw-r--r-- | kernel/cpu.c | 28 | ||||
-rw-r--r-- | kernel/cpuset.c | 58 | ||||
-rw-r--r-- | kernel/exit.c | 2 | ||||
-rw-r--r-- | kernel/module.c | 2 | ||||
-rw-r--r-- | kernel/sysctl.c | 25 | ||||
-rw-r--r-- | kernel/sysctl_binary.c | 9 |
6 files changed, 108 insertions, 16 deletions
diff --git a/kernel/cpu.c b/kernel/cpu.c index 545777574779..124ad9d6be16 100644 --- a/kernel/cpu.c +++ b/kernel/cpu.c @@ -326,6 +326,12 @@ out_notify: int __cpuinit cpu_up(unsigned int cpu) { int err = 0; + +#ifdef CONFIG_MEMORY_HOTPLUG + int nid; + pg_data_t *pgdat; +#endif + if (!cpu_possible(cpu)) { printk(KERN_ERR "can't online cpu %d because it is not " "configured as may-hotadd at boot time\n", cpu); @@ -336,6 +342,28 @@ int __cpuinit cpu_up(unsigned int cpu) return -EINVAL; } +#ifdef CONFIG_MEMORY_HOTPLUG + nid = cpu_to_node(cpu); + if (!node_online(nid)) { + err = mem_online_node(nid); + if (err) + return err; + } + + pgdat = NODE_DATA(nid); + if (!pgdat) { + printk(KERN_ERR + "Can't online cpu %d due to NULL pgdat\n", cpu); + return -ENOMEM; + } + + if (pgdat->node_zonelists->_zonerefs->zone == NULL) { + mutex_lock(&zonelists_mutex); + build_all_zonelists(NULL); + mutex_unlock(&zonelists_mutex); + } +#endif + cpu_maps_update_begin(); if (cpu_hotplug_disabled) { diff --git a/kernel/cpuset.c b/kernel/cpuset.c index 9a50c5f6e727..61d6af7fa676 100644 --- a/kernel/cpuset.c +++ b/kernel/cpuset.c @@ -946,16 +946,62 @@ static void cpuset_migrate_mm(struct mm_struct *mm, const nodemask_t *from, * In order to avoid seeing no nodes if the old and new nodes are disjoint, * we structure updates as setting all new allowed nodes, then clearing newly * disallowed ones. - * - * Called with task's alloc_lock held */ static void cpuset_change_task_nodemask(struct task_struct *tsk, nodemask_t *newmems) { +repeat: + /* + * Allow tasks that have access to memory reserves because they have + * been OOM killed to get memory anywhere. + */ + if (unlikely(test_thread_flag(TIF_MEMDIE))) + return; + if (current->flags & PF_EXITING) /* Let dying task have memory */ + return; + + task_lock(tsk); nodes_or(tsk->mems_allowed, tsk->mems_allowed, *newmems); - mpol_rebind_task(tsk, &tsk->mems_allowed); - mpol_rebind_task(tsk, newmems); + mpol_rebind_task(tsk, newmems, MPOL_REBIND_STEP1); + + + /* + * ensure checking ->mems_allowed_change_disable after setting all new + * allowed nodes. + * + * the read-side task can see an nodemask with new allowed nodes and + * old allowed nodes. and if it allocates page when cpuset clears newly + * disallowed ones continuous, it can see the new allowed bits. + * + * And if setting all new allowed nodes is after the checking, setting + * all new allowed nodes and clearing newly disallowed ones will be done + * continuous, and the read-side task may find no node to alloc page. + */ + smp_mb(); + + /* + * Allocation of memory is very fast, we needn't sleep when waiting + * for the read-side. + */ + while (ACCESS_ONCE(tsk->mems_allowed_change_disable)) { + task_unlock(tsk); + if (!task_curr(tsk)) + yield(); + goto repeat; + } + + /* + * ensure checking ->mems_allowed_change_disable before clearing all new + * disallowed nodes. + * + * if clearing newly disallowed bits before the checking, the read-side + * task may find no node to alloc page. + */ + smp_mb(); + + mpol_rebind_task(tsk, newmems, MPOL_REBIND_STEP2); tsk->mems_allowed = *newmems; + task_unlock(tsk); } /* @@ -978,9 +1024,7 @@ static void cpuset_change_nodemask(struct task_struct *p, cs = cgroup_cs(scan->cg); guarantee_online_mems(cs, newmems); - task_lock(p); cpuset_change_task_nodemask(p, newmems); - task_unlock(p); NODEMASK_FREE(newmems); @@ -1383,9 +1427,7 @@ static void cpuset_attach_task(struct task_struct *tsk, nodemask_t *to, err = set_cpus_allowed_ptr(tsk, cpus_attach); WARN_ON_ONCE(err); - task_lock(tsk); cpuset_change_task_nodemask(tsk, to); - task_unlock(tsk); cpuset_update_task_spread_flag(cs, tsk); } diff --git a/kernel/exit.c b/kernel/exit.c index eabca5a73a85..019a2843bf95 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -1002,8 +1002,10 @@ NORET_TYPE void do_exit(long code) exit_notify(tsk, group_dead); #ifdef CONFIG_NUMA + task_lock(tsk); mpol_put(tsk->mempolicy); tsk->mempolicy = NULL; + task_unlock(tsk); #endif #ifdef CONFIG_FUTEX if (unlikely(current->pi_state_cache)) diff --git a/kernel/module.c b/kernel/module.c index a8014bfb5a4e..625985e70e9d 100644 --- a/kernel/module.c +++ b/kernel/module.c @@ -180,8 +180,6 @@ extern const struct kernel_symbol __start___ksymtab_gpl[]; extern const struct kernel_symbol __stop___ksymtab_gpl[]; extern const struct kernel_symbol __start___ksymtab_gpl_future[]; extern const struct kernel_symbol __stop___ksymtab_gpl_future[]; -extern const struct kernel_symbol __start___ksymtab_gpl_future[]; -extern const struct kernel_symbol __stop___ksymtab_gpl_future[]; extern const unsigned long __start___kcrctab[]; extern const unsigned long __start___kcrctab_gpl[]; extern const unsigned long __start___kcrctab_gpl_future[]; diff --git a/kernel/sysctl.c b/kernel/sysctl.c index 4c93486b45d1..84ff5e75c084 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -37,6 +37,7 @@ #include <linux/highuid.h> #include <linux/writeback.h> #include <linux/ratelimit.h> +#include <linux/compaction.h> #include <linux/hugetlb.h> #include <linux/initrd.h> #include <linux/key.h> @@ -262,6 +263,11 @@ static int min_sched_shares_ratelimit = 100000; /* 100 usec */ static int max_sched_shares_ratelimit = NSEC_PER_SEC; /* 1 second */ #endif +#ifdef CONFIG_COMPACTION +static int min_extfrag_threshold; +static int max_extfrag_threshold = 1000; +#endif + static struct ctl_table kern_table[] = { { .procname = "sched_child_runs_first", @@ -1121,6 +1127,25 @@ static struct ctl_table vm_table[] = { .mode = 0644, .proc_handler = drop_caches_sysctl_handler, }, +#ifdef CONFIG_COMPACTION + { + .procname = "compact_memory", + .data = &sysctl_compact_memory, + .maxlen = sizeof(int), + .mode = 0200, + .proc_handler = sysctl_compaction_handler, + }, + { + .procname = "extfrag_threshold", + .data = &sysctl_extfrag_threshold, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = sysctl_extfrag_handler, + .extra1 = &min_extfrag_threshold, + .extra2 = &max_extfrag_threshold, + }, + +#endif /* CONFIG_COMPACTION */ { .procname = "min_free_kbytes", .data = &min_free_kbytes, diff --git a/kernel/sysctl_binary.c b/kernel/sysctl_binary.c index 937d31dc8566..1357c5786064 100644 --- a/kernel/sysctl_binary.c +++ b/kernel/sysctl_binary.c @@ -13,6 +13,7 @@ #include <linux/file.h> #include <linux/ctype.h> #include <linux/netdevice.h> +#include <linux/kernel.h> #include <linux/slab.h> #ifdef CONFIG_SYSCTL_SYSCALL @@ -1124,11 +1125,6 @@ out: return result; } -static unsigned hex_value(int ch) -{ - return isdigit(ch) ? ch - '0' : ((ch | 0x20) - 'a') + 10; -} - static ssize_t bin_uuid(struct file *file, void __user *oldval, size_t oldlen, void __user *newval, size_t newlen) { @@ -1156,7 +1152,8 @@ static ssize_t bin_uuid(struct file *file, if (!isxdigit(str[0]) || !isxdigit(str[1])) goto out; - uuid[i] = (hex_value(str[0]) << 4) | hex_value(str[1]); + uuid[i] = (hex_to_bin(str[0]) << 4) | + hex_to_bin(str[1]); str += 2; if (*str == '-') str++; |