summaryrefslogtreecommitdiff
path: root/kernel
diff options
context:
space:
mode:
Diffstat (limited to 'kernel')
-rw-r--r--kernel/cpu.c28
-rw-r--r--kernel/cpuset.c58
-rw-r--r--kernel/exit.c2
-rw-r--r--kernel/module.c2
-rw-r--r--kernel/sysctl.c25
-rw-r--r--kernel/sysctl_binary.c9
6 files changed, 108 insertions, 16 deletions
diff --git a/kernel/cpu.c b/kernel/cpu.c
index 545777574779..124ad9d6be16 100644
--- a/kernel/cpu.c
+++ b/kernel/cpu.c
@@ -326,6 +326,12 @@ out_notify:
int __cpuinit cpu_up(unsigned int cpu)
{
int err = 0;
+
+#ifdef CONFIG_MEMORY_HOTPLUG
+ int nid;
+ pg_data_t *pgdat;
+#endif
+
if (!cpu_possible(cpu)) {
printk(KERN_ERR "can't online cpu %d because it is not "
"configured as may-hotadd at boot time\n", cpu);
@@ -336,6 +342,28 @@ int __cpuinit cpu_up(unsigned int cpu)
return -EINVAL;
}
+#ifdef CONFIG_MEMORY_HOTPLUG
+ nid = cpu_to_node(cpu);
+ if (!node_online(nid)) {
+ err = mem_online_node(nid);
+ if (err)
+ return err;
+ }
+
+ pgdat = NODE_DATA(nid);
+ if (!pgdat) {
+ printk(KERN_ERR
+ "Can't online cpu %d due to NULL pgdat\n", cpu);
+ return -ENOMEM;
+ }
+
+ if (pgdat->node_zonelists->_zonerefs->zone == NULL) {
+ mutex_lock(&zonelists_mutex);
+ build_all_zonelists(NULL);
+ mutex_unlock(&zonelists_mutex);
+ }
+#endif
+
cpu_maps_update_begin();
if (cpu_hotplug_disabled) {
diff --git a/kernel/cpuset.c b/kernel/cpuset.c
index 9a50c5f6e727..61d6af7fa676 100644
--- a/kernel/cpuset.c
+++ b/kernel/cpuset.c
@@ -946,16 +946,62 @@ static void cpuset_migrate_mm(struct mm_struct *mm, const nodemask_t *from,
* In order to avoid seeing no nodes if the old and new nodes are disjoint,
* we structure updates as setting all new allowed nodes, then clearing newly
* disallowed ones.
- *
- * Called with task's alloc_lock held
*/
static void cpuset_change_task_nodemask(struct task_struct *tsk,
nodemask_t *newmems)
{
+repeat:
+ /*
+ * Allow tasks that have access to memory reserves because they have
+ * been OOM killed to get memory anywhere.
+ */
+ if (unlikely(test_thread_flag(TIF_MEMDIE)))
+ return;
+ if (current->flags & PF_EXITING) /* Let dying task have memory */
+ return;
+
+ task_lock(tsk);
nodes_or(tsk->mems_allowed, tsk->mems_allowed, *newmems);
- mpol_rebind_task(tsk, &tsk->mems_allowed);
- mpol_rebind_task(tsk, newmems);
+ mpol_rebind_task(tsk, newmems, MPOL_REBIND_STEP1);
+
+
+ /*
+ * ensure checking ->mems_allowed_change_disable after setting all new
+ * allowed nodes.
+ *
+ * the read-side task can see an nodemask with new allowed nodes and
+ * old allowed nodes. and if it allocates page when cpuset clears newly
+ * disallowed ones continuous, it can see the new allowed bits.
+ *
+ * And if setting all new allowed nodes is after the checking, setting
+ * all new allowed nodes and clearing newly disallowed ones will be done
+ * continuous, and the read-side task may find no node to alloc page.
+ */
+ smp_mb();
+
+ /*
+ * Allocation of memory is very fast, we needn't sleep when waiting
+ * for the read-side.
+ */
+ while (ACCESS_ONCE(tsk->mems_allowed_change_disable)) {
+ task_unlock(tsk);
+ if (!task_curr(tsk))
+ yield();
+ goto repeat;
+ }
+
+ /*
+ * ensure checking ->mems_allowed_change_disable before clearing all new
+ * disallowed nodes.
+ *
+ * if clearing newly disallowed bits before the checking, the read-side
+ * task may find no node to alloc page.
+ */
+ smp_mb();
+
+ mpol_rebind_task(tsk, newmems, MPOL_REBIND_STEP2);
tsk->mems_allowed = *newmems;
+ task_unlock(tsk);
}
/*
@@ -978,9 +1024,7 @@ static void cpuset_change_nodemask(struct task_struct *p,
cs = cgroup_cs(scan->cg);
guarantee_online_mems(cs, newmems);
- task_lock(p);
cpuset_change_task_nodemask(p, newmems);
- task_unlock(p);
NODEMASK_FREE(newmems);
@@ -1383,9 +1427,7 @@ static void cpuset_attach_task(struct task_struct *tsk, nodemask_t *to,
err = set_cpus_allowed_ptr(tsk, cpus_attach);
WARN_ON_ONCE(err);
- task_lock(tsk);
cpuset_change_task_nodemask(tsk, to);
- task_unlock(tsk);
cpuset_update_task_spread_flag(cs, tsk);
}
diff --git a/kernel/exit.c b/kernel/exit.c
index eabca5a73a85..019a2843bf95 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -1002,8 +1002,10 @@ NORET_TYPE void do_exit(long code)
exit_notify(tsk, group_dead);
#ifdef CONFIG_NUMA
+ task_lock(tsk);
mpol_put(tsk->mempolicy);
tsk->mempolicy = NULL;
+ task_unlock(tsk);
#endif
#ifdef CONFIG_FUTEX
if (unlikely(current->pi_state_cache))
diff --git a/kernel/module.c b/kernel/module.c
index a8014bfb5a4e..625985e70e9d 100644
--- a/kernel/module.c
+++ b/kernel/module.c
@@ -180,8 +180,6 @@ extern const struct kernel_symbol __start___ksymtab_gpl[];
extern const struct kernel_symbol __stop___ksymtab_gpl[];
extern const struct kernel_symbol __start___ksymtab_gpl_future[];
extern const struct kernel_symbol __stop___ksymtab_gpl_future[];
-extern const struct kernel_symbol __start___ksymtab_gpl_future[];
-extern const struct kernel_symbol __stop___ksymtab_gpl_future[];
extern const unsigned long __start___kcrctab[];
extern const unsigned long __start___kcrctab_gpl[];
extern const unsigned long __start___kcrctab_gpl_future[];
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 4c93486b45d1..84ff5e75c084 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -37,6 +37,7 @@
#include <linux/highuid.h>
#include <linux/writeback.h>
#include <linux/ratelimit.h>
+#include <linux/compaction.h>
#include <linux/hugetlb.h>
#include <linux/initrd.h>
#include <linux/key.h>
@@ -262,6 +263,11 @@ static int min_sched_shares_ratelimit = 100000; /* 100 usec */
static int max_sched_shares_ratelimit = NSEC_PER_SEC; /* 1 second */
#endif
+#ifdef CONFIG_COMPACTION
+static int min_extfrag_threshold;
+static int max_extfrag_threshold = 1000;
+#endif
+
static struct ctl_table kern_table[] = {
{
.procname = "sched_child_runs_first",
@@ -1121,6 +1127,25 @@ static struct ctl_table vm_table[] = {
.mode = 0644,
.proc_handler = drop_caches_sysctl_handler,
},
+#ifdef CONFIG_COMPACTION
+ {
+ .procname = "compact_memory",
+ .data = &sysctl_compact_memory,
+ .maxlen = sizeof(int),
+ .mode = 0200,
+ .proc_handler = sysctl_compaction_handler,
+ },
+ {
+ .procname = "extfrag_threshold",
+ .data = &sysctl_extfrag_threshold,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = sysctl_extfrag_handler,
+ .extra1 = &min_extfrag_threshold,
+ .extra2 = &max_extfrag_threshold,
+ },
+
+#endif /* CONFIG_COMPACTION */
{
.procname = "min_free_kbytes",
.data = &min_free_kbytes,
diff --git a/kernel/sysctl_binary.c b/kernel/sysctl_binary.c
index 937d31dc8566..1357c5786064 100644
--- a/kernel/sysctl_binary.c
+++ b/kernel/sysctl_binary.c
@@ -13,6 +13,7 @@
#include <linux/file.h>
#include <linux/ctype.h>
#include <linux/netdevice.h>
+#include <linux/kernel.h>
#include <linux/slab.h>
#ifdef CONFIG_SYSCTL_SYSCALL
@@ -1124,11 +1125,6 @@ out:
return result;
}
-static unsigned hex_value(int ch)
-{
- return isdigit(ch) ? ch - '0' : ((ch | 0x20) - 'a') + 10;
-}
-
static ssize_t bin_uuid(struct file *file,
void __user *oldval, size_t oldlen, void __user *newval, size_t newlen)
{
@@ -1156,7 +1152,8 @@ static ssize_t bin_uuid(struct file *file,
if (!isxdigit(str[0]) || !isxdigit(str[1]))
goto out;
- uuid[i] = (hex_value(str[0]) << 4) | hex_value(str[1]);
+ uuid[i] = (hex_to_bin(str[0]) << 4) |
+ hex_to_bin(str[1]);
str += 2;
if (*str == '-')
str++;