From 021ada7dff22d0d9540ff596cb0f8bb866755ee1 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Fri, 29 Mar 2013 19:27:05 -0400
Subject: procfs: switch /proc/self away from proc_dir_entry

Just have it pinned in dcache all along and let procfs ->kill_sb()
drop it before kill_anon_super().

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/proc/base.c | 16 ++++++++++++----
 1 file changed, 12 insertions(+), 4 deletions(-)

(limited to 'fs/proc/base.c')
diff --git a/fs/proc/base.c b/fs/proc/base.c
index 69078c7cef1f..593e7c5ddb49 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -2794,7 +2794,7 @@ retry:
 	return iter;
 }
 
-#define TGID_OFFSET (FIRST_PROCESS_ENTRY)
+#define TGID_OFFSET (FIRST_PROCESS_ENTRY + 1)
 
 static int proc_pid_fill_cache(struct file *filp, void *dirent, filldir_t filldir,
 	struct tgid_iter iter)
@@ -2817,13 +2817,21 @@ int proc_pid_readdir(struct file * filp, void * dirent, filldir_t filldir)
 	struct tgid_iter iter;
 	struct pid_namespace *ns;
 	filldir_t __filldir;
+	loff_t pos = filp->f_pos;
 
-	if (filp->f_pos >= PID_MAX_LIMIT + TGID_OFFSET)
+	if (pos >= PID_MAX_LIMIT + TGID_OFFSET)
 		goto out;
 
-	ns = filp->f_dentry->d_sb->s_fs_info;
+	if (pos == TGID_OFFSET - 1) {
+		if (proc_fill_cache(filp, dirent, filldir, "self", 4,
+					NULL, NULL, NULL) < 0)
+			goto out;
+		iter.tgid = 0;
+	} else {
+		iter.tgid = pos - TGID_OFFSET;
+	}
 	iter.task = NULL;
-	iter.tgid = filp->f_pos - TGID_OFFSET;
+	ns = filp->f_dentry->d_sb->s_fs_info;
 	for (iter = next_tgid(ns, iter);
 	     iter.task;
 	     iter.tgid += 1, iter = next_tgid(ns, iter)) {
-- 
cgit v1.2.3


From 48f6a7a511ef8823fdff39afee0320092d43a8a0 Mon Sep 17 00:00:00 2001
From: Pavel Emelyanov <xemul@parallels.com>
Date: Mon, 11 Mar 2013 13:12:45 +0400
Subject: posix-timers: Introduce /proc/PID/timers file

Currently kernel doesn't provide any API for getting info about what
posix timers are configured by processes. It's implied, that a process
which configured some timers, knows what it did. However, for external
tools it's impossible to get this information. In particular, this is
critical for checkpoint-restore project to have this info.

Introduce a per-pid proc file with information about posix
timers. Since these timers are shared between threads, this file is
present on tgid level only, no such thing in tid subdirs.

The file format is expected to be the "/proc/<pid>/smaps"-like,
i.e. each timer will occupy seveal lines to allow for future
extending.

Each new timer entry starts with the

ID: <number>

line which is added by this patch.

Signed-off-by: Pavel Emelyanov <xemul@parallels.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Michael Kerrisk <mtk.manpages@gmail.com>
Cc: Matthew Helsley <matt.helsley@gmail.com>
Link: http://lkml.kernel.org/r/513DA00D.6070009@parallels.com
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 fs/proc/base.c | 83 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 83 insertions(+)

(limited to 'fs/proc/base.c')

diff --git a/fs/proc/base.c b/fs/proc/base.c
index 69078c7cef1f..01def9f8aa74 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -86,6 +86,7 @@
 #include <linux/fs_struct.h>
 #include <linux/slab.h>
 #include <linux/flex_array.h>
+#include <linux/posix-timers.h>
 #ifdef CONFIG_HARDWALL
 #include <asm/hardwall.h>
 #endif
@@ -2013,6 +2014,85 @@ static const struct file_operations proc_map_files_operations = {
 	.llseek		= default_llseek,
 };
 
+struct timers_private {
+	struct pid *pid;
+	struct task_struct *task;
+	struct sighand_struct *sighand;
+	unsigned long flags;
+};
+
+static void *timers_start(struct seq_file *m, loff_t *pos)
+{
+	struct timers_private *tp = m->private;
+
+	tp->task = get_pid_task(tp->pid, PIDTYPE_PID);
+	if (!tp->task)
+		return ERR_PTR(-ESRCH);
+
+	tp->sighand = lock_task_sighand(tp->task, &tp->flags);
+	if (!tp->sighand)
+		return ERR_PTR(-ESRCH);
+
+	return seq_list_start(&tp->task->signal->posix_timers, *pos);
+}
+
+static void *timers_next(struct seq_file *m, void *v, loff_t *pos)
+{
+	struct timers_private *tp = m->private;
+	return seq_list_next(v, &tp->task->signal->posix_timers, pos);
+}
+
+static void timers_stop(struct seq_file *m, void *v)
+{
+	struct timers_private *tp = m->private;
+
+	if (tp->sighand) {
+		unlock_task_sighand(tp->task, &tp->flags);
+		tp->sighand = NULL;
+	}
+
+	if (tp->task) {
+		put_task_struct(tp->task);
+		tp->task = NULL;
+	}
+}
+
+static int show_timer(struct seq_file *m, void *v)
+{
+	struct k_itimer *timer;
+
+	timer = list_entry((struct list_head *)v, struct k_itimer, list);
+	seq_printf(m, "ID: %d\n", timer->it_id);
+
+	return 0;
+}
+
+static const struct seq_operations proc_timers_seq_ops = {
+	.start	= timers_start,
+	.next	= timers_next,
+	.stop	= timers_stop,
+	.show	= show_timer,
+};
+
+static int proc_timers_open(struct inode *inode, struct file *file)
+{
+	struct timers_private *tp;
+
+	tp = __seq_open_private(file, &proc_timers_seq_ops,
+			sizeof(struct timers_private));
+	if (!tp)
+		return -ENOMEM;
+
+	tp->pid = proc_pid(inode);
+	return 0;
+}
+
+static const struct file_operations proc_timers_operations = {
+	.open		= proc_timers_open,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= seq_release_private,
+};
 #endif /* CONFIG_CHECKPOINT_RESTORE */
 
 static struct dentry *proc_pident_instantiate(struct inode *dir,
@@ -2583,6 +2663,9 @@ static const struct pid_entry tgid_base_stuff[] = {
 	REG("gid_map",    S_IRUGO|S_IWUSR, proc_gid_map_operations),
 	REG("projid_map", S_IRUGO|S_IWUSR, proc_projid_map_operations),
 #endif
+#ifdef CONFIG_CHECKPOINT_RESTORE
+	REG("timers",	  S_IRUGO, proc_timers_operations),
+#endif
 };
 
 static int proc_tgid_base_readdir(struct file * filp,
-- 
cgit v1.2.3


From 57b8015e07a70301e9ec9f324db1a8b73b5a1e2b Mon Sep 17 00:00:00 2001
From: Pavel Emelyanov <xemul@parallels.com>
Date: Mon, 11 Mar 2013 13:13:08 +0400
Subject: posix-timers: Show sigevent info in proc file

Previous patch added proc file to list posix timers created by task.
Expand the information provided in this file by adding info about
notification method, with which timers were created. I.e. after
the "ID:" line there go

1. "signal:" line, that shows signal number and sigval bits;
2. "notify:" line, that shows the timer notification method.

Thus the timer entry would looke like this:

ID: 123
signal: 14/0000000000b005d0
notify: signal/pid.732

This information is enough to understand how timer_create() was called
for each particular timer.

Signed-off-by: Pavel Emelyanov <xemul@parallels.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Michael Kerrisk <mtk.manpages@gmail.com>
Cc: Matthew Helsley <matt.helsley@gmail.com>
Link: http://lkml.kernel.org/r/513DA024.80404@parallels.com
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 fs/proc/base.c | 17 +++++++++++++++++
 1 file changed, 17 insertions(+)

(limited to 'fs/proc/base.c')

diff --git a/fs/proc/base.c b/fs/proc/base.c
index 01def9f8aa74..a19308604145 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -2018,6 +2018,7 @@ struct timers_private {
 	struct pid *pid;
 	struct task_struct *task;
 	struct sighand_struct *sighand;
+	struct pid_namespace *ns;
 	unsigned long flags;
 };
 
@@ -2060,9 +2061,24 @@ static void timers_stop(struct seq_file *m, void *v)
 static int show_timer(struct seq_file *m, void *v)
 {
 	struct k_itimer *timer;
+	struct timers_private *tp = m->private;
+	int notify;
+	static char *nstr[] = {
+		[SIGEV_SIGNAL] = "signal",
+		[SIGEV_NONE] = "none",
+		[SIGEV_THREAD] = "thread",
+	};
 
 	timer = list_entry((struct list_head *)v, struct k_itimer, list);
+	notify = timer->it_sigev_notify;
+
 	seq_printf(m, "ID: %d\n", timer->it_id);
+	seq_printf(m, "signal: %d/%p\n", timer->sigq->info.si_signo,
+			timer->sigq->info.si_value.sival_ptr);
+	seq_printf(m, "notify: %s/%s.%d\n",
+		nstr[notify & ~SIGEV_THREAD_ID],
+		(notify & SIGEV_THREAD_ID) ? "tid" : "pid",
+		pid_nr_ns(timer->it_pid, tp->ns));
 
 	return 0;
 }
@@ -2084,6 +2100,7 @@ static int proc_timers_open(struct inode *inode, struct file *file)
 		return -ENOMEM;
 
 	tp->pid = proc_pid(inode);
+	tp->ns = inode->i_sb->s_fs_info;
 	return 0;
 }
 
-- 
cgit v1.2.3


From 830e0fc967a7ee5013d5d1cf6a3cea71a8868466 Mon Sep 17 00:00:00 2001
From: David Rientjes <rientjes@google.com>
Date: Tue, 30 Apr 2013 15:28:18 -0700
Subject: fs, proc: truncate /proc/pid/comm writes to first TASK_COMM_LEN bytes

Currently, a write to a procfs file will return the number of bytes
successfully written.  If the actual string is longer than this, the
remainder of the string will not be be written and userspace will
complete the operation by issuing additional write()s.

Hence

	$ echo -n "abcdefghijklmnopqrs" > /proc/self/comm

results in

	$ cat /proc/$$/comm
	pqrs

since the final four bytes were written with a second write() since
TASK_COMM_LEN == 16.  This is obviously an undesired result and not
equivalent to prctl(PR_SET_NAME).  The implementation should not need to
know the definition of TASK_COMM_LEN.

This patch truncates the string to the first TASK_COMM_LEN bytes and
returns the bytes written as the length of the string written so the
second write() is suppressed.

	$ cat /proc/$$/comm
	abcdefghijklmno

Signed-off-by: David Rientjes <rientjes@google.com>
Acked-by: John Stultz <john.stultz@linaro.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/proc/base.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

(limited to 'fs/proc/base.c')

diff --git a/fs/proc/base.c b/fs/proc/base.c
index a19308604145..3861bcec41ff 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -1348,11 +1348,10 @@ static ssize_t comm_write(struct file *file, const char __user *buf,
 	struct inode *inode = file_inode(file);
 	struct task_struct *p;
 	char buffer[TASK_COMM_LEN];
+	const size_t maxlen = sizeof(buffer) - 1;
 
 	memset(buffer, 0, sizeof(buffer));
-	if (count > sizeof(buffer) - 1)
-		count = sizeof(buffer) - 1;
-	if (copy_from_user(buffer, buf, count))
+	if (copy_from_user(buffer, buf, count > maxlen ? maxlen : count))
 		return -EFAULT;
 
 	p = get_proc_task(inode);
-- 
cgit v1.2.3


From 1dd704b6175f067781807ad4da1b878357dc9755 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Fri, 12 Apr 2013 01:08:50 +0100
Subject: proc: Uninline pid_delete_dentry()

Uninline pid_delete_dentry() as it's only used by three function pointers.

Signed-off-by: David Howells <dhowells@redhat.com>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/proc/base.c     |  9 +++++++++
 fs/proc/internal.h | 14 +++++---------
 2 files changed, 14 insertions(+), 9 deletions(-)

(limited to 'fs/proc/base.c')

diff --git a/fs/proc/base.c b/fs/proc/base.c
index 593e7c5ddb49..f2637c972160 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -1621,6 +1621,15 @@ int pid_revalidate(struct dentry *dentry, unsigned int flags)
 	return 0;
 }
 
+int pid_delete_dentry(const struct dentry *dentry)
+{
+	/* Is the task we represent dead?
+	 * If so, then don't put the dentry on the lru list,
+	 * kill it immediately.
+	 */
+	return !proc_pid(dentry->d_inode)->tasks[PIDTYPE_PID].first;
+}
+
 const struct dentry_operations pid_dentry_operations =
 {
 	.d_revalidate	= pid_revalidate,
diff --git a/fs/proc/internal.h b/fs/proc/internal.h
index 4b13417acfc4..aaf2dd8c2b10 100644
--- a/fs/proc/internal.h
+++ b/fs/proc/internal.h
@@ -114,15 +114,6 @@ static inline int task_dumpable(struct task_struct *task)
 	return 0;
 }
 
-static inline int pid_delete_dentry(const struct dentry * dentry)
-{
-	/* Is the task we represent dead?
-	 * If so, then don't put the dentry on the lru list,
-	 * kill it immediately.
-	 */
-	return !proc_pid(dentry->d_inode)->tasks[PIDTYPE_PID].first;
-}
-
 static inline unsigned name_to_int(struct dentry *dentry)
 {
 	const char *name = dentry->d_name.name;
@@ -145,6 +136,11 @@ out:
 	return ~0U;
 }
 
+/*
+ * base.c
+ */
+extern int pid_delete_dentry(const struct dentry *);
+
 struct dentry *proc_lookup_de(struct proc_dir_entry *de, struct inode *ino,
 		struct dentry *dentry);
 int proc_readdir_de(struct proc_dir_entry *de, struct file *filp, void *dirent,
-- 
cgit v1.2.3


From 8d8b97ba499cb69fccb5fd9f2b439e3265fc3f27 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Fri, 19 Apr 2013 23:11:24 -0400
Subject: take cgroup_open() and cpuset_open() to fs/proc/base.c

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/proc/base.c         | 31 +++++++++++++++++++++++++++++++
 include/linux/cgroup.h |  2 +-
 include/linux/cpuset.h |  3 +--
 kernel/cgroup.c        | 15 +--------------
 kernel/cpuset.c        | 15 +--------------
 5 files changed, 35 insertions(+), 31 deletions(-)

(limited to 'fs/proc/base.c')

diff --git a/fs/proc/base.c b/fs/proc/base.c
index f2637c972160..8281986693be 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -404,6 +404,37 @@ static const struct file_operations proc_lstats_operations = {
 
 #endif
 
+#ifdef CONFIG_CGROUPS
+static int cgroup_open(struct inode *inode, struct file *file)
+{
+	struct pid *pid = PROC_I(inode)->pid;
+	return single_open(file, proc_cgroup_show, pid);
+}
+
+static const struct file_operations proc_cgroup_operations = {
+	.open		= cgroup_open,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= single_release,
+};
+#endif
+
+#ifdef CONFIG_PROC_PID_CPUSET
+
+static int cpuset_open(struct inode *inode, struct file *file)
+{
+	struct pid *pid = PROC_I(inode)->pid;
+	return single_open(file, proc_cpuset_show, pid);
+}
+
+static const struct file_operations proc_cpuset_operations = {
+	.open		= cpuset_open,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= single_release,
+};
+#endif
+
 static int proc_oom_score(struct task_struct *task, char *buffer)
 {
 	unsigned long totalpages = totalram_pages + total_swap_pages;
diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h
index 900af5964f55..68f2157b71d4 100644
--- a/include/linux/cgroup.h
+++ b/include/linux/cgroup.h
@@ -42,7 +42,7 @@ extern int cgroupstats_build(struct cgroupstats *stats,
 extern int cgroup_load_subsys(struct cgroup_subsys *ss);
 extern void cgroup_unload_subsys(struct cgroup_subsys *ss);
 
-extern const struct file_operations proc_cgroup_operations;
+extern int proc_cgroup_show(struct seq_file *, void *);
 
 /* Define the enumeration of all builtin cgroup subsystems */
 #define SUBSYS(_x) _x ## _subsys_id,
diff --git a/include/linux/cpuset.h b/include/linux/cpuset.h
index 8c8a60d29407..22b637c5ecae 100644
--- a/include/linux/cpuset.h
+++ b/include/linux/cpuset.h
@@ -64,10 +64,9 @@ extern int cpuset_mems_allowed_intersects(const struct task_struct *tsk1,
 extern int cpuset_memory_pressure_enabled;
 extern void __cpuset_memory_pressure_bump(void);
 
-extern const struct file_operations proc_cpuset_operations;
-struct seq_file;
 extern void cpuset_task_status_allowed(struct seq_file *m,
 					struct task_struct *task);
+extern int proc_cpuset_show(struct seq_file *, void *);
 
 extern int cpuset_mem_spread_node(void);
 extern int cpuset_slab_spread_node(void);
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index a32f9432666c..d5cffe80b469 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -4769,7 +4769,7 @@ out:
  */
 
 /* TODO: Use a proper seq_file iterator */
-static int proc_cgroup_show(struct seq_file *m, void *v)
+int proc_cgroup_show(struct seq_file *m, void *v)
 {
 	struct pid *pid;
 	struct task_struct *tsk;
@@ -4821,19 +4821,6 @@ out:
 	return retval;
 }
 
-static int cgroup_open(struct inode *inode, struct file *file)
-{
-	struct pid *pid = PROC_I(inode)->pid;
-	return single_open(file, proc_cgroup_show, pid);
-}
-
-const struct file_operations proc_cgroup_operations = {
-	.open		= cgroup_open,
-	.read		= seq_read,
-	.llseek		= seq_lseek,
-	.release	= single_release,
-};
-
 /* Display information about each subsystem and each hierarchy */
 static int proc_cgroupstats_show(struct seq_file *m, void *v)
 {
diff --git a/kernel/cpuset.c b/kernel/cpuset.c
index 4f9dfe43ecbd..1b6f615be587 100644
--- a/kernel/cpuset.c
+++ b/kernel/cpuset.c
@@ -2666,7 +2666,7 @@ void __cpuset_memory_pressure_bump(void)
  *    and we take cpuset_mutex, keeping cpuset_attach() from changing it
  *    anyway.
  */
-static int proc_cpuset_show(struct seq_file *m, void *unused_v)
+int proc_cpuset_show(struct seq_file *m, void *unused_v)
 {
 	struct pid *pid;
 	struct task_struct *tsk;
@@ -2700,19 +2700,6 @@ out_free:
 out:
 	return retval;
 }
-
-static int cpuset_open(struct inode *inode, struct file *file)
-{
-	struct pid *pid = PROC_I(inode)->pid;
-	return single_open(file, proc_cpuset_show, pid);
-}
-
-const struct file_operations proc_cpuset_operations = {
-	.open		= cpuset_open,
-	.read		= seq_read,
-	.llseek		= seq_lseek,
-	.release	= single_release,
-};
 #endif /* CONFIG_PROC_PID_CPUSET */
 
 /* Display task mems_allowed in /proc/<pid>/status file. */
-- 
cgit v1.2.3