summaryrefslogtreecommitdiff
path: root/kernel/entry/syscall_user_dispatch.c
diff options
context:
space:
mode:
authorDmitry Vyukov <dvyukov@google.com>2025-05-21 17:04:29 +0200
committerThomas Gleixner <tglx@linutronix.de>2025-06-13 18:36:39 +0200
commita2fc422ed75748eef2985454e97847fb22f873c2 (patch)
tree811388d91b3d50b1b65b3b7230174de8a36448ce /kernel/entry/syscall_user_dispatch.c
parentb89732c8c8357487185f260a723a060b3476144e (diff)
syscall_user_dispatch: Add PR_SYS_DISPATCH_INCLUSIVE_ON
There are two possible scenarios for syscall filtering: - having a trusted/allowed range of PCs, and intercepting everything else - or the opposite: a single untrusted/intercepted range and allowing everything else (this is relevant for any kind of sandboxing scenario, or monitoring behavior of a single library) The current API only allows the former use case due to allowed range wrap-around check. Add PR_SYS_DISPATCH_INCLUSIVE_ON that enables the second use case. Add PR_SYS_DISPATCH_EXCLUSIVE_ON alias for PR_SYS_DISPATCH_ON to make it clear how it's different from the new PR_SYS_DISPATCH_INCLUSIVE_ON. Signed-off-by: Dmitry Vyukov <dvyukov@google.com> Signed-off-by: Thomas Gleixner <tglx@linutronix.de> Link: https://lore.kernel.org/all/97947cc8e205ff49675826d7b0327ef2e2c66eea.1747839857.git.dvyukov@google.com
Diffstat (limited to 'kernel/entry/syscall_user_dispatch.c')
-rw-r--r--kernel/entry/syscall_user_dispatch.c36
1 files changed, 23 insertions, 13 deletions
diff --git a/kernel/entry/syscall_user_dispatch.c b/kernel/entry/syscall_user_dispatch.c
index 5340c5aa89e7..a9055eccb27e 100644
--- a/kernel/entry/syscall_user_dispatch.c
+++ b/kernel/entry/syscall_user_dispatch.c
@@ -78,7 +78,7 @@ static int task_set_syscall_user_dispatch(struct task_struct *task, unsigned lon
if (offset || len || selector)
return -EINVAL;
break;
- case PR_SYS_DISPATCH_ON:
+ case PR_SYS_DISPATCH_EXCLUSIVE_ON:
/*
* Validate the direct dispatcher region just for basic
* sanity against overflow and a 0-sized dispatcher
@@ -87,30 +87,40 @@ static int task_set_syscall_user_dispatch(struct task_struct *task, unsigned lon
*/
if (offset && offset + len <= offset)
return -EINVAL;
-
+ break;
+ case PR_SYS_DISPATCH_INCLUSIVE_ON:
+ if (len == 0 || offset + len <= offset)
+ return -EINVAL;
/*
- * access_ok() will clear memory tags for tagged addresses
- * if current has memory tagging enabled.
-
- * To enable a tracer to set a tracees selector the
- * selector address must be untagged for access_ok(),
- * otherwise an untagged tracer will always fail to set a
- * tagged tracees selector.
+ * Invert the range, the check in syscall_user_dispatch()
+ * supports wrap-around.
*/
- if (selector && !access_ok(untagged_addr(selector), sizeof(*selector)))
- return -EFAULT;
-
+ offset = offset + len;
+ len = -len;
break;
default:
return -EINVAL;
}
+ /*
+ * access_ok() will clear memory tags for tagged addresses
+ * if current has memory tagging enabled.
+ *
+ * To enable a tracer to set a tracees selector the
+ * selector address must be untagged for access_ok(),
+ * otherwise an untagged tracer will always fail to set a
+ * tagged tracees selector.
+ */
+ if (mode != PR_SYS_DISPATCH_OFF && selector &&
+ !access_ok(untagged_addr(selector), sizeof(*selector)))
+ return -EFAULT;
+
task->syscall_dispatch.selector = selector;
task->syscall_dispatch.offset = offset;
task->syscall_dispatch.len = len;
task->syscall_dispatch.on_dispatch = false;
- if (mode == PR_SYS_DISPATCH_ON)
+ if (mode != PR_SYS_DISPATCH_OFF)
set_task_syscall_work(task, SYSCALL_USER_DISPATCH);
else
clear_task_syscall_work(task, SYSCALL_USER_DISPATCH);