diff options
Diffstat (limited to 'kernel/workqueue.c')
| -rw-r--r-- | kernel/workqueue.c | 903 | 
1 files changed, 566 insertions, 337 deletions
| diff --git a/kernel/workqueue.c b/kernel/workqueue.c index f28849394791..586ad91300b0 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c @@ -159,6 +159,7 @@ struct worker_pool {  	/* see manage_workers() for details on the two manager mutexes */  	struct mutex		manager_arb;	/* manager arbitration */ +	struct worker		*manager;	/* L: purely informational */  	struct mutex		attach_mutex;	/* attach/detach exclusion */  	struct list_head	workers;	/* A: attached workers */  	struct completion	*detach_completion; /* all workers detached */ @@ -230,7 +231,7 @@ struct wq_device;   */  struct workqueue_struct {  	struct list_head	pwqs;		/* WR: all pwqs of this wq */ -	struct list_head	list;		/* PL: list of all workqueues */ +	struct list_head	list;		/* PR: list of all workqueues */  	struct mutex		mutex;		/* protects this wq */  	int			work_color;	/* WQ: current work color */ @@ -257,6 +258,13 @@ struct workqueue_struct {  #endif  	char			name[WQ_NAME_LEN]; /* I: workqueue name */ +	/* +	 * Destruction of workqueue_struct is sched-RCU protected to allow +	 * walking the workqueues list without grabbing wq_pool_mutex. +	 * This is used to dump all workqueues from sysrq. +	 */ +	struct rcu_head		rcu; +  	/* hot fields used during command issue, aligned to cacheline */  	unsigned int		flags ____cacheline_aligned; /* WQ: WQ_* flags */  	struct pool_workqueue __percpu *cpu_pwqs; /* I: per-cpu pwqs */ @@ -288,7 +296,7 @@ static struct workqueue_attrs *wq_update_unbound_numa_attrs_buf;  static DEFINE_MUTEX(wq_pool_mutex);	/* protects pools and workqueues list */  static DEFINE_SPINLOCK(wq_mayday_lock);	/* protects wq->maydays list */ -static LIST_HEAD(workqueues);		/* PL: list of all workqueues */ +static LIST_HEAD(workqueues);		/* PR: list of all workqueues */  static bool workqueue_freezing;		/* PL: have wqs started freezing? */  /* the per-cpu worker pools */ @@ -324,6 +332,7 @@ EXPORT_SYMBOL_GPL(system_freezable_power_efficient_wq);  static int worker_thread(void *__worker);  static void copy_workqueue_attrs(struct workqueue_attrs *to,  				 const struct workqueue_attrs *from); +static void workqueue_sysfs_unregister(struct workqueue_struct *wq);  #define CREATE_TRACE_POINTS  #include <trace/events/workqueue.h> @@ -1911,9 +1920,11 @@ static bool manage_workers(struct worker *worker)  	 */  	if (!mutex_trylock(&pool->manager_arb))  		return false; +	pool->manager = worker;  	maybe_create_worker(pool); +	pool->manager = NULL;  	mutex_unlock(&pool->manager_arb);  	return true;  } @@ -2303,6 +2314,7 @@ repeat:  struct wq_barrier {  	struct work_struct	work;  	struct completion	done; +	struct task_struct	*task;	/* purely informational */  };  static void wq_barrier_func(struct work_struct *work) @@ -2351,6 +2363,7 @@ static void insert_wq_barrier(struct pool_workqueue *pwq,  	INIT_WORK_ONSTACK(&barr->work, wq_barrier_func);  	__set_bit(WORK_STRUCT_PENDING_BIT, work_data_bits(&barr->work));  	init_completion(&barr->done); +	barr->task = current;  	/*  	 * If @target is currently being executed, schedule the @@ -2728,19 +2741,57 @@ bool flush_work(struct work_struct *work)  }  EXPORT_SYMBOL_GPL(flush_work); +struct cwt_wait { +	wait_queue_t		wait; +	struct work_struct	*work; +}; + +static int cwt_wakefn(wait_queue_t *wait, unsigned mode, int sync, void *key) +{ +	struct cwt_wait *cwait = container_of(wait, struct cwt_wait, wait); + +	if (cwait->work != key) +		return 0; +	return autoremove_wake_function(wait, mode, sync, key); +} +  static bool __cancel_work_timer(struct work_struct *work, bool is_dwork)  { +	static DECLARE_WAIT_QUEUE_HEAD(cancel_waitq);  	unsigned long flags;  	int ret;  	do {  		ret = try_to_grab_pending(work, is_dwork, &flags);  		/* -		 * If someone else is canceling, wait for the same event it -		 * would be waiting for before retrying. +		 * If someone else is already canceling, wait for it to +		 * finish.  flush_work() doesn't work for PREEMPT_NONE +		 * because we may get scheduled between @work's completion +		 * and the other canceling task resuming and clearing +		 * CANCELING - flush_work() will return false immediately +		 * as @work is no longer busy, try_to_grab_pending() will +		 * return -ENOENT as @work is still being canceled and the +		 * other canceling task won't be able to clear CANCELING as +		 * we're hogging the CPU. +		 * +		 * Let's wait for completion using a waitqueue.  As this +		 * may lead to the thundering herd problem, use a custom +		 * wake function which matches @work along with exclusive +		 * wait and wakeup.  		 */ -		if (unlikely(ret == -ENOENT)) -			flush_work(work); +		if (unlikely(ret == -ENOENT)) { +			struct cwt_wait cwait; + +			init_wait(&cwait.wait); +			cwait.wait.func = cwt_wakefn; +			cwait.work = work; + +			prepare_to_wait_exclusive(&cancel_waitq, &cwait.wait, +						  TASK_UNINTERRUPTIBLE); +			if (work_is_canceling(work)) +				schedule(); +			finish_wait(&cancel_waitq, &cwait.wait); +		}  	} while (unlikely(ret < 0));  	/* tell other tasks trying to grab @work to back off */ @@ -2749,6 +2800,16 @@ static bool __cancel_work_timer(struct work_struct *work, bool is_dwork)  	flush_work(work);  	clear_work_data(work); + +	/* +	 * Paired with prepare_to_wait() above so that either +	 * waitqueue_active() is visible here or !work_is_canceling() is +	 * visible there. +	 */ +	smp_mb(); +	if (waitqueue_active(&cancel_waitq)) +		__wake_up(&cancel_waitq, TASK_NORMAL, 1, work); +  	return ret;  } @@ -2941,323 +3002,6 @@ int execute_in_process_context(work_func_t fn, struct execute_work *ew)  }  EXPORT_SYMBOL_GPL(execute_in_process_context); -#ifdef CONFIG_SYSFS -/* - * Workqueues with WQ_SYSFS flag set is visible to userland via - * /sys/bus/workqueue/devices/WQ_NAME.  All visible workqueues have the - * following attributes. - * - *  per_cpu	RO bool	: whether the workqueue is per-cpu or unbound - *  max_active	RW int	: maximum number of in-flight work items - * - * Unbound workqueues have the following extra attributes. - * - *  id		RO int	: the associated pool ID - *  nice	RW int	: nice value of the workers - *  cpumask	RW mask	: bitmask of allowed CPUs for the workers - */ -struct wq_device { -	struct workqueue_struct		*wq; -	struct device			dev; -}; - -static struct workqueue_struct *dev_to_wq(struct device *dev) -{ -	struct wq_device *wq_dev = container_of(dev, struct wq_device, dev); - -	return wq_dev->wq; -} - -static ssize_t per_cpu_show(struct device *dev, struct device_attribute *attr, -			    char *buf) -{ -	struct workqueue_struct *wq = dev_to_wq(dev); - -	return scnprintf(buf, PAGE_SIZE, "%d\n", (bool)!(wq->flags & WQ_UNBOUND)); -} -static DEVICE_ATTR_RO(per_cpu); - -static ssize_t max_active_show(struct device *dev, -			       struct device_attribute *attr, char *buf) -{ -	struct workqueue_struct *wq = dev_to_wq(dev); - -	return scnprintf(buf, PAGE_SIZE, "%d\n", wq->saved_max_active); -} - -static ssize_t max_active_store(struct device *dev, -				struct device_attribute *attr, const char *buf, -				size_t count) -{ -	struct workqueue_struct *wq = dev_to_wq(dev); -	int val; - -	if (sscanf(buf, "%d", &val) != 1 || val <= 0) -		return -EINVAL; - -	workqueue_set_max_active(wq, val); -	return count; -} -static DEVICE_ATTR_RW(max_active); - -static struct attribute *wq_sysfs_attrs[] = { -	&dev_attr_per_cpu.attr, -	&dev_attr_max_active.attr, -	NULL, -}; -ATTRIBUTE_GROUPS(wq_sysfs); - -static ssize_t wq_pool_ids_show(struct device *dev, -				struct device_attribute *attr, char *buf) -{ -	struct workqueue_struct *wq = dev_to_wq(dev); -	const char *delim = ""; -	int node, written = 0; - -	rcu_read_lock_sched(); -	for_each_node(node) { -		written += scnprintf(buf + written, PAGE_SIZE - written, -				     "%s%d:%d", delim, node, -				     unbound_pwq_by_node(wq, node)->pool->id); -		delim = " "; -	} -	written += scnprintf(buf + written, PAGE_SIZE - written, "\n"); -	rcu_read_unlock_sched(); - -	return written; -} - -static ssize_t wq_nice_show(struct device *dev, struct device_attribute *attr, -			    char *buf) -{ -	struct workqueue_struct *wq = dev_to_wq(dev); -	int written; - -	mutex_lock(&wq->mutex); -	written = scnprintf(buf, PAGE_SIZE, "%d\n", wq->unbound_attrs->nice); -	mutex_unlock(&wq->mutex); - -	return written; -} - -/* prepare workqueue_attrs for sysfs store operations */ -static struct workqueue_attrs *wq_sysfs_prep_attrs(struct workqueue_struct *wq) -{ -	struct workqueue_attrs *attrs; - -	attrs = alloc_workqueue_attrs(GFP_KERNEL); -	if (!attrs) -		return NULL; - -	mutex_lock(&wq->mutex); -	copy_workqueue_attrs(attrs, wq->unbound_attrs); -	mutex_unlock(&wq->mutex); -	return attrs; -} - -static ssize_t wq_nice_store(struct device *dev, struct device_attribute *attr, -			     const char *buf, size_t count) -{ -	struct workqueue_struct *wq = dev_to_wq(dev); -	struct workqueue_attrs *attrs; -	int ret; - -	attrs = wq_sysfs_prep_attrs(wq); -	if (!attrs) -		return -ENOMEM; - -	if (sscanf(buf, "%d", &attrs->nice) == 1 && -	    attrs->nice >= MIN_NICE && attrs->nice <= MAX_NICE) -		ret = apply_workqueue_attrs(wq, attrs); -	else -		ret = -EINVAL; - -	free_workqueue_attrs(attrs); -	return ret ?: count; -} - -static ssize_t wq_cpumask_show(struct device *dev, -			       struct device_attribute *attr, char *buf) -{ -	struct workqueue_struct *wq = dev_to_wq(dev); -	int written; - -	mutex_lock(&wq->mutex); -	written = scnprintf(buf, PAGE_SIZE, "%*pb\n", -			    cpumask_pr_args(wq->unbound_attrs->cpumask)); -	mutex_unlock(&wq->mutex); -	return written; -} - -static ssize_t wq_cpumask_store(struct device *dev, -				struct device_attribute *attr, -				const char *buf, size_t count) -{ -	struct workqueue_struct *wq = dev_to_wq(dev); -	struct workqueue_attrs *attrs; -	int ret; - -	attrs = wq_sysfs_prep_attrs(wq); -	if (!attrs) -		return -ENOMEM; - -	ret = cpumask_parse(buf, attrs->cpumask); -	if (!ret) -		ret = apply_workqueue_attrs(wq, attrs); - -	free_workqueue_attrs(attrs); -	return ret ?: count; -} - -static ssize_t wq_numa_show(struct device *dev, struct device_attribute *attr, -			    char *buf) -{ -	struct workqueue_struct *wq = dev_to_wq(dev); -	int written; - -	mutex_lock(&wq->mutex); -	written = scnprintf(buf, PAGE_SIZE, "%d\n", -			    !wq->unbound_attrs->no_numa); -	mutex_unlock(&wq->mutex); - -	return written; -} - -static ssize_t wq_numa_store(struct device *dev, struct device_attribute *attr, -			     const char *buf, size_t count) -{ -	struct workqueue_struct *wq = dev_to_wq(dev); -	struct workqueue_attrs *attrs; -	int v, ret; - -	attrs = wq_sysfs_prep_attrs(wq); -	if (!attrs) -		return -ENOMEM; - -	ret = -EINVAL; -	if (sscanf(buf, "%d", &v) == 1) { -		attrs->no_numa = !v; -		ret = apply_workqueue_attrs(wq, attrs); -	} - -	free_workqueue_attrs(attrs); -	return ret ?: count; -} - -static struct device_attribute wq_sysfs_unbound_attrs[] = { -	__ATTR(pool_ids, 0444, wq_pool_ids_show, NULL), -	__ATTR(nice, 0644, wq_nice_show, wq_nice_store), -	__ATTR(cpumask, 0644, wq_cpumask_show, wq_cpumask_store), -	__ATTR(numa, 0644, wq_numa_show, wq_numa_store), -	__ATTR_NULL, -}; - -static struct bus_type wq_subsys = { -	.name				= "workqueue", -	.dev_groups			= wq_sysfs_groups, -}; - -static int __init wq_sysfs_init(void) -{ -	return subsys_virtual_register(&wq_subsys, NULL); -} -core_initcall(wq_sysfs_init); - -static void wq_device_release(struct device *dev) -{ -	struct wq_device *wq_dev = container_of(dev, struct wq_device, dev); - -	kfree(wq_dev); -} - -/** - * workqueue_sysfs_register - make a workqueue visible in sysfs - * @wq: the workqueue to register - * - * Expose @wq in sysfs under /sys/bus/workqueue/devices. - * alloc_workqueue*() automatically calls this function if WQ_SYSFS is set - * which is the preferred method. - * - * Workqueue user should use this function directly iff it wants to apply - * workqueue_attrs before making the workqueue visible in sysfs; otherwise, - * apply_workqueue_attrs() may race against userland updating the - * attributes. - * - * Return: 0 on success, -errno on failure. - */ -int workqueue_sysfs_register(struct workqueue_struct *wq) -{ -	struct wq_device *wq_dev; -	int ret; - -	/* -	 * Adjusting max_active or creating new pwqs by applyting -	 * attributes breaks ordering guarantee.  Disallow exposing ordered -	 * workqueues. -	 */ -	if (WARN_ON(wq->flags & __WQ_ORDERED)) -		return -EINVAL; - -	wq->wq_dev = wq_dev = kzalloc(sizeof(*wq_dev), GFP_KERNEL); -	if (!wq_dev) -		return -ENOMEM; - -	wq_dev->wq = wq; -	wq_dev->dev.bus = &wq_subsys; -	wq_dev->dev.init_name = wq->name; -	wq_dev->dev.release = wq_device_release; - -	/* -	 * unbound_attrs are created separately.  Suppress uevent until -	 * everything is ready. -	 */ -	dev_set_uevent_suppress(&wq_dev->dev, true); - -	ret = device_register(&wq_dev->dev); -	if (ret) { -		kfree(wq_dev); -		wq->wq_dev = NULL; -		return ret; -	} - -	if (wq->flags & WQ_UNBOUND) { -		struct device_attribute *attr; - -		for (attr = wq_sysfs_unbound_attrs; attr->attr.name; attr++) { -			ret = device_create_file(&wq_dev->dev, attr); -			if (ret) { -				device_unregister(&wq_dev->dev); -				wq->wq_dev = NULL; -				return ret; -			} -		} -	} - -	dev_set_uevent_suppress(&wq_dev->dev, false); -	kobject_uevent(&wq_dev->dev.kobj, KOBJ_ADD); -	return 0; -} - -/** - * workqueue_sysfs_unregister - undo workqueue_sysfs_register() - * @wq: the workqueue to unregister - * - * If @wq is registered to sysfs by workqueue_sysfs_register(), unregister. - */ -static void workqueue_sysfs_unregister(struct workqueue_struct *wq) -{ -	struct wq_device *wq_dev = wq->wq_dev; - -	if (!wq->wq_dev) -		return; - -	wq->wq_dev = NULL; -	device_unregister(&wq_dev->dev); -} -#else	/* CONFIG_SYSFS */ -static void workqueue_sysfs_unregister(struct workqueue_struct *wq)	{ } -#endif	/* CONFIG_SYSFS */ -  /**   * free_workqueue_attrs - free a workqueue_attrs   * @attrs: workqueue_attrs to free @@ -3376,6 +3120,20 @@ static int init_worker_pool(struct worker_pool *pool)  	return 0;  } +static void rcu_free_wq(struct rcu_head *rcu) +{ +	struct workqueue_struct *wq = +		container_of(rcu, struct workqueue_struct, rcu); + +	if (!(wq->flags & WQ_UNBOUND)) +		free_percpu(wq->cpu_pwqs); +	else +		free_workqueue_attrs(wq->unbound_attrs); + +	kfree(wq->rescuer); +	kfree(wq); +} +  static void rcu_free_pool(struct rcu_head *rcu)  {  	struct worker_pool *pool = container_of(rcu, struct worker_pool, rcu); @@ -3553,12 +3311,10 @@ static void pwq_unbound_release_workfn(struct work_struct *work)  	/*  	 * If we're the last pwq going away, @wq is already dead and no one -	 * is gonna access it anymore.  Free it. +	 * is gonna access it anymore.  Schedule RCU free.  	 */ -	if (is_last) { -		free_workqueue_attrs(wq->unbound_attrs); -		kfree(wq); -	} +	if (is_last) +		call_rcu_sched(&wq->rcu, rcu_free_wq);  }  /** @@ -4095,7 +3851,7 @@ struct workqueue_struct *__alloc_workqueue_key(const char *fmt,  		pwq_adjust_max_active(pwq);  	mutex_unlock(&wq->mutex); -	list_add(&wq->list, &workqueues); +	list_add_tail_rcu(&wq->list, &workqueues);  	mutex_unlock(&wq_pool_mutex); @@ -4151,24 +3907,20 @@ void destroy_workqueue(struct workqueue_struct *wq)  	 * flushing is complete in case freeze races us.  	 */  	mutex_lock(&wq_pool_mutex); -	list_del_init(&wq->list); +	list_del_rcu(&wq->list);  	mutex_unlock(&wq_pool_mutex);  	workqueue_sysfs_unregister(wq); -	if (wq->rescuer) { +	if (wq->rescuer)  		kthread_stop(wq->rescuer->task); -		kfree(wq->rescuer); -		wq->rescuer = NULL; -	}  	if (!(wq->flags & WQ_UNBOUND)) {  		/*  		 * The base ref is never dropped on per-cpu pwqs.  Directly -		 * free the pwqs and wq. +		 * schedule RCU free.  		 */ -		free_percpu(wq->cpu_pwqs); -		kfree(wq); +		call_rcu_sched(&wq->rcu, rcu_free_wq);  	} else {  		/*  		 * We're the sole accessor of @wq at this point.  Directly @@ -4389,6 +4141,166 @@ void print_worker_info(const char *log_lvl, struct task_struct *task)  	}  } +static void pr_cont_pool_info(struct worker_pool *pool) +{ +	pr_cont(" cpus=%*pbl", nr_cpumask_bits, pool->attrs->cpumask); +	if (pool->node != NUMA_NO_NODE) +		pr_cont(" node=%d", pool->node); +	pr_cont(" flags=0x%x nice=%d", pool->flags, pool->attrs->nice); +} + +static void pr_cont_work(bool comma, struct work_struct *work) +{ +	if (work->func == wq_barrier_func) { +		struct wq_barrier *barr; + +		barr = container_of(work, struct wq_barrier, work); + +		pr_cont("%s BAR(%d)", comma ? "," : "", +			task_pid_nr(barr->task)); +	} else { +		pr_cont("%s %pf", comma ? "," : "", work->func); +	} +} + +static void show_pwq(struct pool_workqueue *pwq) +{ +	struct worker_pool *pool = pwq->pool; +	struct work_struct *work; +	struct worker *worker; +	bool has_in_flight = false, has_pending = false; +	int bkt; + +	pr_info("  pwq %d:", pool->id); +	pr_cont_pool_info(pool); + +	pr_cont(" active=%d/%d%s\n", pwq->nr_active, pwq->max_active, +		!list_empty(&pwq->mayday_node) ? " MAYDAY" : ""); + +	hash_for_each(pool->busy_hash, bkt, worker, hentry) { +		if (worker->current_pwq == pwq) { +			has_in_flight = true; +			break; +		} +	} +	if (has_in_flight) { +		bool comma = false; + +		pr_info("    in-flight:"); +		hash_for_each(pool->busy_hash, bkt, worker, hentry) { +			if (worker->current_pwq != pwq) +				continue; + +			pr_cont("%s %d%s:%pf", comma ? "," : "", +				task_pid_nr(worker->task), +				worker == pwq->wq->rescuer ? "(RESCUER)" : "", +				worker->current_func); +			list_for_each_entry(work, &worker->scheduled, entry) +				pr_cont_work(false, work); +			comma = true; +		} +		pr_cont("\n"); +	} + +	list_for_each_entry(work, &pool->worklist, entry) { +		if (get_work_pwq(work) == pwq) { +			has_pending = true; +			break; +		} +	} +	if (has_pending) { +		bool comma = false; + +		pr_info("    pending:"); +		list_for_each_entry(work, &pool->worklist, entry) { +			if (get_work_pwq(work) != pwq) +				continue; + +			pr_cont_work(comma, work); +			comma = !(*work_data_bits(work) & WORK_STRUCT_LINKED); +		} +		pr_cont("\n"); +	} + +	if (!list_empty(&pwq->delayed_works)) { +		bool comma = false; + +		pr_info("    delayed:"); +		list_for_each_entry(work, &pwq->delayed_works, entry) { +			pr_cont_work(comma, work); +			comma = !(*work_data_bits(work) & WORK_STRUCT_LINKED); +		} +		pr_cont("\n"); +	} +} + +/** + * show_workqueue_state - dump workqueue state + * + * Called from a sysrq handler and prints out all busy workqueues and + * pools. + */ +void show_workqueue_state(void) +{ +	struct workqueue_struct *wq; +	struct worker_pool *pool; +	unsigned long flags; +	int pi; + +	rcu_read_lock_sched(); + +	pr_info("Showing busy workqueues and worker pools:\n"); + +	list_for_each_entry_rcu(wq, &workqueues, list) { +		struct pool_workqueue *pwq; +		bool idle = true; + +		for_each_pwq(pwq, wq) { +			if (pwq->nr_active || !list_empty(&pwq->delayed_works)) { +				idle = false; +				break; +			} +		} +		if (idle) +			continue; + +		pr_info("workqueue %s: flags=0x%x\n", wq->name, wq->flags); + +		for_each_pwq(pwq, wq) { +			spin_lock_irqsave(&pwq->pool->lock, flags); +			if (pwq->nr_active || !list_empty(&pwq->delayed_works)) +				show_pwq(pwq); +			spin_unlock_irqrestore(&pwq->pool->lock, flags); +		} +	} + +	for_each_pool(pool, pi) { +		struct worker *worker; +		bool first = true; + +		spin_lock_irqsave(&pool->lock, flags); +		if (pool->nr_workers == pool->nr_idle) +			goto next_pool; + +		pr_info("pool %d:", pool->id); +		pr_cont_pool_info(pool); +		pr_cont(" workers=%d", pool->nr_workers); +		if (pool->manager) +			pr_cont(" manager: %d", +				task_pid_nr(pool->manager->task)); +		list_for_each_entry(worker, &pool->idle_list, entry) { +			pr_cont(" %s%d", first ? "idle: " : "", +				task_pid_nr(worker->task)); +			first = false; +		} +		pr_cont("\n"); +	next_pool: +		spin_unlock_irqrestore(&pool->lock, flags); +	} + +	rcu_read_unlock_sched(); +} +  /*   * CPU hotplug.   * @@ -4786,6 +4698,323 @@ out_unlock:  }  #endif /* CONFIG_FREEZER */ +#ifdef CONFIG_SYSFS +/* + * Workqueues with WQ_SYSFS flag set is visible to userland via + * /sys/bus/workqueue/devices/WQ_NAME.  All visible workqueues have the + * following attributes. + * + *  per_cpu	RO bool	: whether the workqueue is per-cpu or unbound + *  max_active	RW int	: maximum number of in-flight work items + * + * Unbound workqueues have the following extra attributes. + * + *  id		RO int	: the associated pool ID + *  nice	RW int	: nice value of the workers + *  cpumask	RW mask	: bitmask of allowed CPUs for the workers + */ +struct wq_device { +	struct workqueue_struct		*wq; +	struct device			dev; +}; + +static struct workqueue_struct *dev_to_wq(struct device *dev) +{ +	struct wq_device *wq_dev = container_of(dev, struct wq_device, dev); + +	return wq_dev->wq; +} + +static ssize_t per_cpu_show(struct device *dev, struct device_attribute *attr, +			    char *buf) +{ +	struct workqueue_struct *wq = dev_to_wq(dev); + +	return scnprintf(buf, PAGE_SIZE, "%d\n", (bool)!(wq->flags & WQ_UNBOUND)); +} +static DEVICE_ATTR_RO(per_cpu); + +static ssize_t max_active_show(struct device *dev, +			       struct device_attribute *attr, char *buf) +{ +	struct workqueue_struct *wq = dev_to_wq(dev); + +	return scnprintf(buf, PAGE_SIZE, "%d\n", wq->saved_max_active); +} + +static ssize_t max_active_store(struct device *dev, +				struct device_attribute *attr, const char *buf, +				size_t count) +{ +	struct workqueue_struct *wq = dev_to_wq(dev); +	int val; + +	if (sscanf(buf, "%d", &val) != 1 || val <= 0) +		return -EINVAL; + +	workqueue_set_max_active(wq, val); +	return count; +} +static DEVICE_ATTR_RW(max_active); + +static struct attribute *wq_sysfs_attrs[] = { +	&dev_attr_per_cpu.attr, +	&dev_attr_max_active.attr, +	NULL, +}; +ATTRIBUTE_GROUPS(wq_sysfs); + +static ssize_t wq_pool_ids_show(struct device *dev, +				struct device_attribute *attr, char *buf) +{ +	struct workqueue_struct *wq = dev_to_wq(dev); +	const char *delim = ""; +	int node, written = 0; + +	rcu_read_lock_sched(); +	for_each_node(node) { +		written += scnprintf(buf + written, PAGE_SIZE - written, +				     "%s%d:%d", delim, node, +				     unbound_pwq_by_node(wq, node)->pool->id); +		delim = " "; +	} +	written += scnprintf(buf + written, PAGE_SIZE - written, "\n"); +	rcu_read_unlock_sched(); + +	return written; +} + +static ssize_t wq_nice_show(struct device *dev, struct device_attribute *attr, +			    char *buf) +{ +	struct workqueue_struct *wq = dev_to_wq(dev); +	int written; + +	mutex_lock(&wq->mutex); +	written = scnprintf(buf, PAGE_SIZE, "%d\n", wq->unbound_attrs->nice); +	mutex_unlock(&wq->mutex); + +	return written; +} + +/* prepare workqueue_attrs for sysfs store operations */ +static struct workqueue_attrs *wq_sysfs_prep_attrs(struct workqueue_struct *wq) +{ +	struct workqueue_attrs *attrs; + +	attrs = alloc_workqueue_attrs(GFP_KERNEL); +	if (!attrs) +		return NULL; + +	mutex_lock(&wq->mutex); +	copy_workqueue_attrs(attrs, wq->unbound_attrs); +	mutex_unlock(&wq->mutex); +	return attrs; +} + +static ssize_t wq_nice_store(struct device *dev, struct device_attribute *attr, +			     const char *buf, size_t count) +{ +	struct workqueue_struct *wq = dev_to_wq(dev); +	struct workqueue_attrs *attrs; +	int ret; + +	attrs = wq_sysfs_prep_attrs(wq); +	if (!attrs) +		return -ENOMEM; + +	if (sscanf(buf, "%d", &attrs->nice) == 1 && +	    attrs->nice >= MIN_NICE && attrs->nice <= MAX_NICE) +		ret = apply_workqueue_attrs(wq, attrs); +	else +		ret = -EINVAL; + +	free_workqueue_attrs(attrs); +	return ret ?: count; +} + +static ssize_t wq_cpumask_show(struct device *dev, +			       struct device_attribute *attr, char *buf) +{ +	struct workqueue_struct *wq = dev_to_wq(dev); +	int written; + +	mutex_lock(&wq->mutex); +	written = scnprintf(buf, PAGE_SIZE, "%*pb\n", +			    cpumask_pr_args(wq->unbound_attrs->cpumask)); +	mutex_unlock(&wq->mutex); +	return written; +} + +static ssize_t wq_cpumask_store(struct device *dev, +				struct device_attribute *attr, +				const char *buf, size_t count) +{ +	struct workqueue_struct *wq = dev_to_wq(dev); +	struct workqueue_attrs *attrs; +	int ret; + +	attrs = wq_sysfs_prep_attrs(wq); +	if (!attrs) +		return -ENOMEM; + +	ret = cpumask_parse(buf, attrs->cpumask); +	if (!ret) +		ret = apply_workqueue_attrs(wq, attrs); + +	free_workqueue_attrs(attrs); +	return ret ?: count; +} + +static ssize_t wq_numa_show(struct device *dev, struct device_attribute *attr, +			    char *buf) +{ +	struct workqueue_struct *wq = dev_to_wq(dev); +	int written; + +	mutex_lock(&wq->mutex); +	written = scnprintf(buf, PAGE_SIZE, "%d\n", +			    !wq->unbound_attrs->no_numa); +	mutex_unlock(&wq->mutex); + +	return written; +} + +static ssize_t wq_numa_store(struct device *dev, struct device_attribute *attr, +			     const char *buf, size_t count) +{ +	struct workqueue_struct *wq = dev_to_wq(dev); +	struct workqueue_attrs *attrs; +	int v, ret; + +	attrs = wq_sysfs_prep_attrs(wq); +	if (!attrs) +		return -ENOMEM; + +	ret = -EINVAL; +	if (sscanf(buf, "%d", &v) == 1) { +		attrs->no_numa = !v; +		ret = apply_workqueue_attrs(wq, attrs); +	} + +	free_workqueue_attrs(attrs); +	return ret ?: count; +} + +static struct device_attribute wq_sysfs_unbound_attrs[] = { +	__ATTR(pool_ids, 0444, wq_pool_ids_show, NULL), +	__ATTR(nice, 0644, wq_nice_show, wq_nice_store), +	__ATTR(cpumask, 0644, wq_cpumask_show, wq_cpumask_store), +	__ATTR(numa, 0644, wq_numa_show, wq_numa_store), +	__ATTR_NULL, +}; + +static struct bus_type wq_subsys = { +	.name				= "workqueue", +	.dev_groups			= wq_sysfs_groups, +}; + +static int __init wq_sysfs_init(void) +{ +	return subsys_virtual_register(&wq_subsys, NULL); +} +core_initcall(wq_sysfs_init); + +static void wq_device_release(struct device *dev) +{ +	struct wq_device *wq_dev = container_of(dev, struct wq_device, dev); + +	kfree(wq_dev); +} + +/** + * workqueue_sysfs_register - make a workqueue visible in sysfs + * @wq: the workqueue to register + * + * Expose @wq in sysfs under /sys/bus/workqueue/devices. + * alloc_workqueue*() automatically calls this function if WQ_SYSFS is set + * which is the preferred method. + * + * Workqueue user should use this function directly iff it wants to apply + * workqueue_attrs before making the workqueue visible in sysfs; otherwise, + * apply_workqueue_attrs() may race against userland updating the + * attributes. + * + * Return: 0 on success, -errno on failure. + */ +int workqueue_sysfs_register(struct workqueue_struct *wq) +{ +	struct wq_device *wq_dev; +	int ret; + +	/* +	 * Adjusting max_active or creating new pwqs by applyting +	 * attributes breaks ordering guarantee.  Disallow exposing ordered +	 * workqueues. +	 */ +	if (WARN_ON(wq->flags & __WQ_ORDERED)) +		return -EINVAL; + +	wq->wq_dev = wq_dev = kzalloc(sizeof(*wq_dev), GFP_KERNEL); +	if (!wq_dev) +		return -ENOMEM; + +	wq_dev->wq = wq; +	wq_dev->dev.bus = &wq_subsys; +	wq_dev->dev.init_name = wq->name; +	wq_dev->dev.release = wq_device_release; + +	/* +	 * unbound_attrs are created separately.  Suppress uevent until +	 * everything is ready. +	 */ +	dev_set_uevent_suppress(&wq_dev->dev, true); + +	ret = device_register(&wq_dev->dev); +	if (ret) { +		kfree(wq_dev); +		wq->wq_dev = NULL; +		return ret; +	} + +	if (wq->flags & WQ_UNBOUND) { +		struct device_attribute *attr; + +		for (attr = wq_sysfs_unbound_attrs; attr->attr.name; attr++) { +			ret = device_create_file(&wq_dev->dev, attr); +			if (ret) { +				device_unregister(&wq_dev->dev); +				wq->wq_dev = NULL; +				return ret; +			} +		} +	} + +	dev_set_uevent_suppress(&wq_dev->dev, false); +	kobject_uevent(&wq_dev->dev.kobj, KOBJ_ADD); +	return 0; +} + +/** + * workqueue_sysfs_unregister - undo workqueue_sysfs_register() + * @wq: the workqueue to unregister + * + * If @wq is registered to sysfs by workqueue_sysfs_register(), unregister. + */ +static void workqueue_sysfs_unregister(struct workqueue_struct *wq) +{ +	struct wq_device *wq_dev = wq->wq_dev; + +	if (!wq->wq_dev) +		return; + +	wq->wq_dev = NULL; +	device_unregister(&wq_dev->dev); +} +#else	/* CONFIG_SYSFS */ +static void workqueue_sysfs_unregister(struct workqueue_struct *wq)	{ } +#endif	/* CONFIG_SYSFS */ +  static void __init wq_numa_init(void)  {  	cpumask_var_t *tbl; | 
