From 851849824bb5590e61048bdd3b311aadeb6a032a Mon Sep 17 00:00:00 2001 From: Wedson Almeida Filho Date: Sun, 15 Sep 2024 14:31:29 +0000 Subject: rust: file: add Rust abstraction for `struct file` This abstraction makes it possible to manipulate the open files for a process. The new `File` struct wraps the C `struct file`. When accessing it using the smart pointer `ARef`, the pointer will own a reference count to the file. When accessing it as `&File`, then the reference does not own a refcount, but the borrow checker will ensure that the reference count does not hit zero while the `&File` is live. Since this is intended to manipulate the open files of a process, we introduce an `fget` constructor that corresponds to the C `fget` method. In future patches, it will become possible to create a new fd in a process and bind it to a `File`. Rust Binder will use these to send fds from one process to another. We also provide a method for accessing the file's flags. Rust Binder will use this to access the flags of the Binder fd to check whether the non-blocking flag is set, which affects what the Binder ioctl does. This introduces a struct for the EBADF error type, rather than just using the Error type directly. This has two advantages: * `File::fget` returns a `Result, BadFdError>`, which the compiler will represent as a single pointer, with null being an error. This is possible because the compiler understands that `BadFdError` has only one possible value, and it also understands that the `ARef` smart pointer is guaranteed non-null. * Additionally, we promise to users of the method that the method can only fail with EBADF, which means that they can rely on this promise without having to inspect its implementation. That said, there are also two disadvantages: * Defining additional error types involves boilerplate. * The question mark operator will only utilize the `From` trait once, which prevents you from using the question mark operator on `BadFdError` in methods that return some third error type that the kernel `Error` is convertible into. (However, it works fine in methods that return `Error`.) Signed-off-by: Wedson Almeida Filho Co-developed-by: Daniel Xu Signed-off-by: Daniel Xu Co-developed-by: Alice Ryhl Reviewed-by: Benno Lossin Signed-off-by: Alice Ryhl Link: https://lore.kernel.org/r/20240915-alice-file-v10-3-88484f7a3dcf@google.com Reviewed-by: Gary Guo Signed-off-by: Christian Brauner --- rust/helpers/helpers.c | 1 + 1 file changed, 1 insertion(+) (limited to 'rust/helpers/helpers.c') diff --git a/rust/helpers/helpers.c b/rust/helpers/helpers.c index 30f40149f3a96..3f2d0d0c80179 100644 --- a/rust/helpers/helpers.c +++ b/rust/helpers/helpers.c @@ -12,6 +12,7 @@ #include "build_assert.c" #include "build_bug.c" #include "err.c" +#include "fs.c" #include "kunit.c" #include "mutex.c" #include "page.c" -- cgit v1.2.3 From a3df991d3d0648dabf761cee70bc1a1ef874db8b Mon Sep 17 00:00:00 2001 From: Wedson Almeida Filho Date: Sun, 15 Sep 2024 14:31:30 +0000 Subject: rust: cred: add Rust abstraction for `struct cred` Add a wrapper around `struct cred` called `Credential`, and provide functionality to get the `Credential` associated with a `File`. Rust Binder must check the credentials of processes when they attempt to perform various operations, and these checks usually take a `&Credential` as parameter. The security_binder_set_context_mgr function would be one example. This patch is necessary to access these security_* methods from Rust. This Rust abstraction makes the following assumptions about the C side: * `struct cred` is refcounted with `get_cred`/`put_cred`. * It's okay to transfer a `struct cred` across threads, that is, you do not need to call `put_cred` on the same thread as where you called `get_cred`. * The `euid` field of a `struct cred` never changes after initialization. * The `f_cred` field of a `struct file` never changes after initialization. Signed-off-by: Wedson Almeida Filho Co-developed-by: Alice Ryhl Reviewed-by: Trevor Gross Reviewed-by: Benno Lossin Reviewed-by: Martin Rodriguez Reboredo Reviewed-by: Gary Guo Signed-off-by: Alice Ryhl Link: https://lore.kernel.org/r/20240915-alice-file-v10-4-88484f7a3dcf@google.com Reviewed-by: Kees Cook Reviewed-by: Paul Moore Signed-off-by: Christian Brauner --- rust/bindings/bindings_helper.h | 1 + rust/helpers/cred.c | 13 +++++++ rust/helpers/helpers.c | 1 + rust/kernel/cred.rs | 76 +++++++++++++++++++++++++++++++++++++++++ rust/kernel/fs/file.rs | 13 +++++++ rust/kernel/lib.rs | 1 + 6 files changed, 105 insertions(+) create mode 100644 rust/helpers/cred.c create mode 100644 rust/kernel/cred.rs (limited to 'rust/helpers/helpers.c') diff --git a/rust/bindings/bindings_helper.h b/rust/bindings/bindings_helper.h index 4a400a9549791..f74247205cb57 100644 --- a/rust/bindings/bindings_helper.h +++ b/rust/bindings/bindings_helper.h @@ -10,6 +10,7 @@ #include #include #include +#include #include #include #include diff --git a/rust/helpers/cred.c b/rust/helpers/cred.c new file mode 100644 index 0000000000000..fde7ae20cdd19 --- /dev/null +++ b/rust/helpers/cred.c @@ -0,0 +1,13 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include + +const struct cred *rust_helper_get_cred(const struct cred *cred) +{ + return get_cred(cred); +} + +void rust_helper_put_cred(const struct cred *cred) +{ + put_cred(cred); +} diff --git a/rust/helpers/helpers.c b/rust/helpers/helpers.c index 3f2d0d0c80179..16e5de352dab7 100644 --- a/rust/helpers/helpers.c +++ b/rust/helpers/helpers.c @@ -11,6 +11,7 @@ #include "bug.c" #include "build_assert.c" #include "build_bug.c" +#include "cred.c" #include "err.c" #include "fs.c" #include "kunit.c" diff --git a/rust/kernel/cred.rs b/rust/kernel/cred.rs new file mode 100644 index 0000000000000..acee047689279 --- /dev/null +++ b/rust/kernel/cred.rs @@ -0,0 +1,76 @@ +// SPDX-License-Identifier: GPL-2.0 + +// Copyright (C) 2024 Google LLC. + +//! Credentials management. +//! +//! C header: [`include/linux/cred.h`](srctree/include/linux/cred.h). +//! +//! Reference: + +use crate::{ + bindings, + types::{AlwaysRefCounted, Opaque}, +}; + +/// Wraps the kernel's `struct cred`. +/// +/// Credentials are used for various security checks in the kernel. +/// +/// Most fields of credentials are immutable. When things have their credentials changed, that +/// happens by replacing the credential instead of changing an existing credential. See the [kernel +/// documentation][ref] for more info on this. +/// +/// # Invariants +/// +/// Instances of this type are always ref-counted, that is, a call to `get_cred` ensures that the +/// allocation remains valid at least until the matching call to `put_cred`. +/// +/// [ref]: https://www.kernel.org/doc/html/latest/security/credentials.html +#[repr(transparent)] +pub struct Credential(Opaque); + +// SAFETY: +// - `Credential::dec_ref` can be called from any thread. +// - It is okay to send ownership of `Credential` across thread boundaries. +unsafe impl Send for Credential {} + +// SAFETY: It's OK to access `Credential` through shared references from other threads because +// we're either accessing properties that don't change or that are properly synchronised by C code. +unsafe impl Sync for Credential {} + +impl Credential { + /// Creates a reference to a [`Credential`] from a valid pointer. + /// + /// # Safety + /// + /// The caller must ensure that `ptr` is valid and remains valid for the lifetime of the + /// returned [`Credential`] reference. + pub unsafe fn from_ptr<'a>(ptr: *const bindings::cred) -> &'a Credential { + // SAFETY: The safety requirements guarantee the validity of the dereference, while the + // `Credential` type being transparent makes the cast ok. + unsafe { &*ptr.cast() } + } + + /// Returns the effective UID of the given credential. + pub fn euid(&self) -> bindings::kuid_t { + // SAFETY: By the type invariant, we know that `self.0` is valid. Furthermore, the `euid` + // field of a credential is never changed after initialization, so there is no potential + // for data races. + unsafe { (*self.0.get()).euid } + } +} + +// SAFETY: The type invariants guarantee that `Credential` is always ref-counted. +unsafe impl AlwaysRefCounted for Credential { + fn inc_ref(&self) { + // SAFETY: The existence of a shared reference means that the refcount is nonzero. + unsafe { bindings::get_cred(self.0.get()) }; + } + + unsafe fn dec_ref(obj: core::ptr::NonNull) { + // SAFETY: The safety requirements guarantee that the refcount is nonzero. The cast is okay + // because `Credential` has the same representation as `struct cred`. + unsafe { bindings::put_cred(obj.cast().as_ptr()) }; + } +} diff --git a/rust/kernel/fs/file.rs b/rust/kernel/fs/file.rs index 6adb7a7199ecc..3c1f517198042 100644 --- a/rust/kernel/fs/file.rs +++ b/rust/kernel/fs/file.rs @@ -9,6 +9,7 @@ use crate::{ bindings, + cred::Credential, error::{code::*, Error, Result}, types::{ARef, AlwaysRefCounted, Opaque}, }; @@ -308,6 +309,18 @@ impl LocalFile { self.inner.get() } + /// Returns the credentials of the task that originally opened the file. + pub fn cred(&self) -> &Credential { + // SAFETY: It's okay to read the `f_cred` field without synchronization because `f_cred` is + // never changed after initialization of the file. + let ptr = unsafe { (*self.as_ptr()).f_cred }; + + // SAFETY: The signature of this function ensures that the caller will only access the + // returned credential while the file is still valid, and the C side ensures that the + // credential stays valid at least as long as the file. + unsafe { Credential::from_ptr(ptr) } + } + /// Returns the flags associated with the file. /// /// The flags are a combination of the constants in [`flags`]. diff --git a/rust/kernel/lib.rs b/rust/kernel/lib.rs index c7d50f245f582..c537d17c6db9b 100644 --- a/rust/kernel/lib.rs +++ b/rust/kernel/lib.rs @@ -30,6 +30,7 @@ pub mod alloc; #[cfg(CONFIG_BLOCK)] pub mod block; mod build_assert; +pub mod cred; pub mod device; pub mod error; #[cfg(CONFIG_RUST_FW_LOADER_ABSTRACTIONS)] -- cgit v1.2.3 From 94d356c0335f95412575c4fa3954b48722359c8a Mon Sep 17 00:00:00 2001 From: Alice Ryhl Date: Sun, 15 Sep 2024 14:31:31 +0000 Subject: rust: security: add abstraction for secctx Add an abstraction for viewing the string representation of a security context. This is needed by Rust Binder because it has a feature where a process can view the string representation of the security context for incoming transactions. The process can use that to authenticate incoming transactions, and since the feature is provided by the kernel, the process can trust that the security context is legitimate. This abstraction makes the following assumptions about the C side: * When a call to `security_secid_to_secctx` is successful, it returns a pointer and length. The pointer references a byte string and is valid for reading for that many bytes. * The string may be referenced until `security_release_secctx` is called. * If CONFIG_SECURITY is set, then the three methods mentioned in rust/helpers are available without a helper. (That is, they are not a #define or `static inline`.) Reviewed-by: Benno Lossin Reviewed-by: Martin Rodriguez Reboredo Reviewed-by: Trevor Gross Reviewed-by: Gary Guo Signed-off-by: Alice Ryhl Link: https://lore.kernel.org/r/20240915-alice-file-v10-5-88484f7a3dcf@google.com Acked-by: Paul Moore Reviewed-by: Kees Cook Signed-off-by: Christian Brauner --- rust/bindings/bindings_helper.h | 1 + rust/helpers/helpers.c | 1 + rust/helpers/security.c | 20 +++++++++++ rust/kernel/cred.rs | 8 +++++ rust/kernel/lib.rs | 1 + rust/kernel/security.rs | 74 +++++++++++++++++++++++++++++++++++++++++ 6 files changed, 105 insertions(+) create mode 100644 rust/helpers/security.c create mode 100644 rust/kernel/security.rs (limited to 'rust/helpers/helpers.c') diff --git a/rust/bindings/bindings_helper.h b/rust/bindings/bindings_helper.h index f74247205cb57..51ec78c355c05 100644 --- a/rust/bindings/bindings_helper.h +++ b/rust/bindings/bindings_helper.h @@ -21,6 +21,7 @@ #include #include #include +#include #include #include #include diff --git a/rust/helpers/helpers.c b/rust/helpers/helpers.c index 16e5de352dab7..62022b18caf5e 100644 --- a/rust/helpers/helpers.c +++ b/rust/helpers/helpers.c @@ -19,6 +19,7 @@ #include "page.c" #include "rbtree.c" #include "refcount.c" +#include "security.c" #include "signal.c" #include "slab.c" #include "spinlock.c" diff --git a/rust/helpers/security.c b/rust/helpers/security.c new file mode 100644 index 0000000000000..239e5b4745fe8 --- /dev/null +++ b/rust/helpers/security.c @@ -0,0 +1,20 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include + +#ifndef CONFIG_SECURITY +void rust_helper_security_cred_getsecid(const struct cred *c, u32 *secid) +{ + security_cred_getsecid(c, secid); +} + +int rust_helper_security_secid_to_secctx(u32 secid, char **secdata, u32 *seclen) +{ + return security_secid_to_secctx(secid, secdata, seclen); +} + +void rust_helper_security_release_secctx(char *secdata, u32 seclen) +{ + security_release_secctx(secdata, seclen); +} +#endif diff --git a/rust/kernel/cred.rs b/rust/kernel/cred.rs index acee047689279..92659649e9322 100644 --- a/rust/kernel/cred.rs +++ b/rust/kernel/cred.rs @@ -52,6 +52,14 @@ impl Credential { unsafe { &*ptr.cast() } } + /// Get the id for this security context. + pub fn get_secid(&self) -> u32 { + let mut secid = 0; + // SAFETY: The invariants of this type ensures that the pointer is valid. + unsafe { bindings::security_cred_getsecid(self.0.get(), &mut secid) }; + secid + } + /// Returns the effective UID of the given credential. pub fn euid(&self) -> bindings::kuid_t { // SAFETY: By the type invariant, we know that `self.0` is valid. Furthermore, the `euid` diff --git a/rust/kernel/lib.rs b/rust/kernel/lib.rs index c537d17c6db9b..e088c94a5a144 100644 --- a/rust/kernel/lib.rs +++ b/rust/kernel/lib.rs @@ -47,6 +47,7 @@ pub mod page; pub mod prelude; pub mod print; pub mod rbtree; +pub mod security; mod static_assert; #[doc(hidden)] pub mod std_vendor; diff --git a/rust/kernel/security.rs b/rust/kernel/security.rs new file mode 100644 index 0000000000000..2522868862a1b --- /dev/null +++ b/rust/kernel/security.rs @@ -0,0 +1,74 @@ +// SPDX-License-Identifier: GPL-2.0 + +// Copyright (C) 2024 Google LLC. + +//! Linux Security Modules (LSM). +//! +//! C header: [`include/linux/security.h`](srctree/include/linux/security.h). + +use crate::{ + bindings, + error::{to_result, Result}, +}; + +/// A security context string. +/// +/// # Invariants +/// +/// The `secdata` and `seclen` fields correspond to a valid security context as returned by a +/// successful call to `security_secid_to_secctx`, that has not yet been destroyed by calling +/// `security_release_secctx`. +pub struct SecurityCtx { + secdata: *mut core::ffi::c_char, + seclen: usize, +} + +impl SecurityCtx { + /// Get the security context given its id. + pub fn from_secid(secid: u32) -> Result { + let mut secdata = core::ptr::null_mut(); + let mut seclen = 0u32; + // SAFETY: Just a C FFI call. The pointers are valid for writes. + to_result(unsafe { bindings::security_secid_to_secctx(secid, &mut secdata, &mut seclen) })?; + + // INVARIANT: If the above call did not fail, then we have a valid security context. + Ok(Self { + secdata, + seclen: seclen as usize, + }) + } + + /// Returns whether the security context is empty. + pub fn is_empty(&self) -> bool { + self.seclen == 0 + } + + /// Returns the length of this security context. + pub fn len(&self) -> usize { + self.seclen + } + + /// Returns the bytes for this security context. + pub fn as_bytes(&self) -> &[u8] { + let ptr = self.secdata; + if ptr.is_null() { + debug_assert_eq!(self.seclen, 0); + // We can't pass a null pointer to `slice::from_raw_parts` even if the length is zero. + return &[]; + } + + // SAFETY: The call to `security_secid_to_secctx` guarantees that the pointer is valid for + // `seclen` bytes. Furthermore, if the length is zero, then we have ensured that the + // pointer is not null. + unsafe { core::slice::from_raw_parts(ptr.cast(), self.seclen) } + } +} + +impl Drop for SecurityCtx { + fn drop(&mut self) { + // SAFETY: By the invariant of `Self`, this frees a pointer that came from a successful + // call to `security_secid_to_secctx` and has not yet been destroyed by + // `security_release_secctx`. + unsafe { bindings::security_release_secctx(self.secdata, self.seclen as u32) }; + } +} -- cgit v1.2.3 From e0020ba6cbcbfbaaa50c3d4b610c7caa36459624 Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Wed, 2 Oct 2024 13:38:10 +0200 Subject: rust: add PidNamespace The lifetime of `PidNamespace` is bound to `Task` and `struct pid`. The `PidNamespace` of a `Task` doesn't ever change once the `Task` is alive. A `unshare(CLONE_NEWPID)` or `setns(fd_pidns/pidfd, CLONE_NEWPID)` will not have an effect on the calling `Task`'s pid namespace. It will only effect the pid namespace of children created by the calling `Task`. This invariant guarantees that after having acquired a reference to a `Task`'s pid namespace it will remain unchanged. When a task has exited and been reaped `release_task()` will be called. This will set the `PidNamespace` of the task to `NULL`. So retrieving the `PidNamespace` of a task that is dead will return `NULL`. Note, that neither holding the RCU lock nor holding a referencing count to the `Task` will prevent `release_task()` being called. In order to retrieve the `PidNamespace` of a `Task` the `task_active_pid_ns()` function can be used. There are two cases to consider: (1) retrieving the `PidNamespace` of the `current` task (2) retrieving the `PidNamespace` of a non-`current` task From system call context retrieving the `PidNamespace` for case (1) is always safe and requires neither RCU locking nor a reference count to be held. Retrieving the `PidNamespace` after `release_task()` for current will return `NULL` but no codepath like that is exposed to Rust. Retrieving the `PidNamespace` from system call context for (2) requires RCU protection. Accessing `PidNamespace` outside of RCU protection requires a reference count that must've been acquired while holding the RCU lock. Note that accessing a non-`current` task means `NULL` can be returned as the non-`current` task could have already passed through `release_task()`. To retrieve (1) the `current_pid_ns!()` macro should be used which ensure that the returned `PidNamespace` cannot outlive the calling scope. The associated `current_pid_ns()` function should not be called directly as it could be abused to created an unbounded lifetime for `PidNamespace`. The `current_pid_ns!()` macro allows Rust to handle the common case of accessing `current`'s `PidNamespace` without RCU protection and without having to acquire a reference count. For (2) the `task_get_pid_ns()` method must be used. This will always acquire a reference on `PidNamespace` and will return an `Option` to force the caller to explicitly handle the case where `PidNamespace` is `None`, something that tends to be forgotten when doing the equivalent operation in `C`. Missing RCU primitives make it difficult to perform operations that are otherwise safe without holding a reference count as long as RCU protection is guaranteed. But it is not important currently. But we do want it in the future. Note for (2) the required RCU protection around calling `task_active_pid_ns()` synchronizes against putting the last reference of the associated `struct pid` of `task->thread_pid`. The `struct pid` stored in that field is used to retrieve the `PidNamespace` of the caller. When `release_task()` is called `task->thread_pid` will be `NULL`ed and `put_pid()` on said `struct pid` will be delayed in `free_pid()` via `call_rcu()` allowing everyone with an RCU protected access to the `struct pid` acquired from `task->thread_pid` to finish. Link: https://lore.kernel.org/r/20241002-brauner-rust-pid_namespace-v5-1-a90e70d44fde@kernel.org Reviewed-by: Alice Ryhl Signed-off-by: Christian Brauner --- rust/helpers/helpers.c | 1 + rust/helpers/pid_namespace.c | 26 +++++++++ rust/kernel/lib.rs | 1 + rust/kernel/pid_namespace.rs | 68 ++++++++++++++++++++++ rust/kernel/task.rs | 135 +++++++++++++++++++++++++++++++++++++++++-- 5 files changed, 225 insertions(+), 6 deletions(-) create mode 100644 rust/helpers/pid_namespace.c create mode 100644 rust/kernel/pid_namespace.rs (limited to 'rust/helpers/helpers.c') diff --git a/rust/helpers/helpers.c b/rust/helpers/helpers.c index 62022b18caf5e..d553ad9361ce1 100644 --- a/rust/helpers/helpers.c +++ b/rust/helpers/helpers.c @@ -17,6 +17,7 @@ #include "kunit.c" #include "mutex.c" #include "page.c" +#include "pid_namespace.c" #include "rbtree.c" #include "refcount.c" #include "security.c" diff --git a/rust/helpers/pid_namespace.c b/rust/helpers/pid_namespace.c new file mode 100644 index 0000000000000..f41482bdec9a7 --- /dev/null +++ b/rust/helpers/pid_namespace.c @@ -0,0 +1,26 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include +#include + +struct pid_namespace *rust_helper_get_pid_ns(struct pid_namespace *ns) +{ + return get_pid_ns(ns); +} + +void rust_helper_put_pid_ns(struct pid_namespace *ns) +{ + put_pid_ns(ns); +} + +/* Get a reference on a task's pid namespace. */ +struct pid_namespace *rust_helper_task_get_pid_ns(struct task_struct *task) +{ + struct pid_namespace *pid_ns; + + guard(rcu)(); + pid_ns = task_active_pid_ns(task); + if (pid_ns) + get_pid_ns(pid_ns); + return pid_ns; +} diff --git a/rust/kernel/lib.rs b/rust/kernel/lib.rs index 9843eedd42939..98219a5118c70 100644 --- a/rust/kernel/lib.rs +++ b/rust/kernel/lib.rs @@ -44,6 +44,7 @@ pub mod list; #[cfg(CONFIG_NET)] pub mod net; pub mod page; +pub mod pid_namespace; pub mod prelude; pub mod print; pub mod rbtree; diff --git a/rust/kernel/pid_namespace.rs b/rust/kernel/pid_namespace.rs new file mode 100644 index 0000000000000..0e93808e4639b --- /dev/null +++ b/rust/kernel/pid_namespace.rs @@ -0,0 +1,68 @@ +// SPDX-License-Identifier: GPL-2.0 + +// Copyright (c) 2024 Christian Brauner + +//! Pid namespaces. +//! +//! C header: [`include/linux/pid_namespace.h`](srctree/include/linux/pid_namespace.h) and +//! [`include/linux/pid.h`](srctree/include/linux/pid.h) + +use crate::{ + bindings, + types::{AlwaysRefCounted, Opaque}, +}; +use core::ptr; + +/// Wraps the kernel's `struct pid_namespace`. Thread safe. +/// +/// This structure represents the Rust abstraction for a C `struct pid_namespace`. This +/// implementation abstracts the usage of an already existing C `struct pid_namespace` within Rust +/// code that we get passed from the C side. +#[repr(transparent)] +pub struct PidNamespace { + inner: Opaque, +} + +impl PidNamespace { + /// Returns a raw pointer to the inner C struct. + #[inline] + pub fn as_ptr(&self) -> *mut bindings::pid_namespace { + self.inner.get() + } + + /// Creates a reference to a [`PidNamespace`] from a valid pointer. + /// + /// # Safety + /// + /// The caller must ensure that `ptr` is valid and remains valid for the lifetime of the + /// returned [`PidNamespace`] reference. + pub unsafe fn from_ptr<'a>(ptr: *const bindings::pid_namespace) -> &'a Self { + // SAFETY: The safety requirements guarantee the validity of the dereference, while the + // `PidNamespace` type being transparent makes the cast ok. + unsafe { &*ptr.cast() } + } +} + +// SAFETY: Instances of `PidNamespace` are always reference-counted. +unsafe impl AlwaysRefCounted for PidNamespace { + #[inline] + fn inc_ref(&self) { + // SAFETY: The existence of a shared reference means that the refcount is nonzero. + unsafe { bindings::get_pid_ns(self.as_ptr()) }; + } + + #[inline] + unsafe fn dec_ref(obj: ptr::NonNull) { + // SAFETY: The safety requirements guarantee that the refcount is non-zero. + unsafe { bindings::put_pid_ns(obj.cast().as_ptr()) } + } +} + +// SAFETY: +// - `PidNamespace::dec_ref` can be called from any thread. +// - It is okay to send ownership of `PidNamespace` across thread boundaries. +unsafe impl Send for PidNamespace {} + +// SAFETY: It's OK to access `PidNamespace` through shared references from other threads because +// we're either accessing properties that don't change or that are properly synchronised by C code. +unsafe impl Sync for PidNamespace {} diff --git a/rust/kernel/task.rs b/rust/kernel/task.rs index 1a36a9f193685..4b8c59a82746d 100644 --- a/rust/kernel/task.rs +++ b/rust/kernel/task.rs @@ -6,7 +6,8 @@ use crate::{ bindings, - types::{NotThreadSafe, Opaque}, + pid_namespace::PidNamespace, + types::{ARef, NotThreadSafe, Opaque}, }; use core::{ cmp::{Eq, PartialEq}, @@ -36,6 +37,16 @@ macro_rules! current { }; } +/// Returns the currently running task's pid namespace. +#[macro_export] +macro_rules! current_pid_ns { + () => { + // SAFETY: Deref + addr-of below create a temporary `PidNamespaceRef` that cannot outlive + // the caller. + unsafe { &*$crate::task::Task::current_pid_ns() } + }; +} + /// Wraps the kernel's `struct task_struct`. /// /// # Invariants @@ -145,6 +156,97 @@ impl Task { } } + /// Returns a PidNamespace reference for the currently executing task's/thread's pid namespace. + /// + /// This function can be used to create an unbounded lifetime by e.g., storing the returned + /// PidNamespace in a global variable which would be a bug. So the recommended way to get the + /// current task's/thread's pid namespace is to use the [`current_pid_ns`] macro because it is + /// safe. + /// + /// # Safety + /// + /// Callers must ensure that the returned object doesn't outlive the current task/thread. + pub unsafe fn current_pid_ns() -> impl Deref { + struct PidNamespaceRef<'a> { + task: &'a PidNamespace, + _not_send: NotThreadSafe, + } + + impl Deref for PidNamespaceRef<'_> { + type Target = PidNamespace; + + fn deref(&self) -> &Self::Target { + self.task + } + } + + // The lifetime of `PidNamespace` is bound to `Task` and `struct pid`. + // + // The `PidNamespace` of a `Task` doesn't ever change once the `Task` is alive. A + // `unshare(CLONE_NEWPID)` or `setns(fd_pidns/pidfd, CLONE_NEWPID)` will not have an effect + // on the calling `Task`'s pid namespace. It will only effect the pid namespace of children + // created by the calling `Task`. This invariant guarantees that after having acquired a + // reference to a `Task`'s pid namespace it will remain unchanged. + // + // When a task has exited and been reaped `release_task()` will be called. This will set + // the `PidNamespace` of the task to `NULL`. So retrieving the `PidNamespace` of a task + // that is dead will return `NULL`. Note, that neither holding the RCU lock nor holding a + // referencing count to + // the `Task` will prevent `release_task()` being called. + // + // In order to retrieve the `PidNamespace` of a `Task` the `task_active_pid_ns()` function + // can be used. There are two cases to consider: + // + // (1) retrieving the `PidNamespace` of the `current` task + // (2) retrieving the `PidNamespace` of a non-`current` task + // + // From system call context retrieving the `PidNamespace` for case (1) is always safe and + // requires neither RCU locking nor a reference count to be held. Retrieving the + // `PidNamespace` after `release_task()` for current will return `NULL` but no codepath + // like that is exposed to Rust. + // + // Retrieving the `PidNamespace` from system call context for (2) requires RCU protection. + // Accessing `PidNamespace` outside of RCU protection requires a reference count that + // must've been acquired while holding the RCU lock. Note that accessing a non-`current` + // task means `NULL` can be returned as the non-`current` task could have already passed + // through `release_task()`. + // + // To retrieve (1) the `current_pid_ns!()` macro should be used which ensure that the + // returned `PidNamespace` cannot outlive the calling scope. The associated + // `current_pid_ns()` function should not be called directly as it could be abused to + // created an unbounded lifetime for `PidNamespace`. The `current_pid_ns!()` macro allows + // Rust to handle the common case of accessing `current`'s `PidNamespace` without RCU + // protection and without having to acquire a reference count. + // + // For (2) the `task_get_pid_ns()` method must be used. This will always acquire a + // reference on `PidNamespace` and will return an `Option` to force the caller to + // explicitly handle the case where `PidNamespace` is `None`, something that tends to be + // forgotten when doing the equivalent operation in `C`. Missing RCU primitives make it + // difficult to perform operations that are otherwise safe without holding a reference + // count as long as RCU protection is guaranteed. But it is not important currently. But we + // do want it in the future. + // + // Note for (2) the required RCU protection around calling `task_active_pid_ns()` + // synchronizes against putting the last reference of the associated `struct pid` of + // `task->thread_pid`. The `struct pid` stored in that field is used to retrieve the + // `PidNamespace` of the caller. When `release_task()` is called `task->thread_pid` will be + // `NULL`ed and `put_pid()` on said `struct pid` will be delayed in `free_pid()` via + // `call_rcu()` allowing everyone with an RCU protected access to the `struct pid` acquired + // from `task->thread_pid` to finish. + // + // SAFETY: The current task's pid namespace is valid as long as the current task is running. + let pidns = unsafe { bindings::task_active_pid_ns(Task::current_raw()) }; + PidNamespaceRef { + // SAFETY: If the current thread is still running, the current task and its associated + // pid namespace are valid. `PidNamespaceRef` is not `Send`, so we know it cannot be + // transferred to another thread (where it could potentially outlive the current + // `Task`). The caller needs to ensure that the PidNamespaceRef doesn't outlive the + // current task/thread. + task: unsafe { PidNamespace::from_ptr(pidns) }, + _not_send: NotThreadSafe, + } + } + /// Returns the group leader of the given task. pub fn group_leader(&self) -> &Task { // SAFETY: By the type invariant, we know that `self.0` is a valid task. Valid tasks always @@ -182,11 +284,32 @@ impl Task { unsafe { bindings::signal_pending(self.0.get()) != 0 } } - /// Returns the given task's pid in the current pid namespace. - pub fn pid_in_current_ns(&self) -> Pid { - // SAFETY: We know that `self.0.get()` is valid by the type invariant, and passing a null - // pointer as the namespace is correct for using the current namespace. - unsafe { bindings::task_tgid_nr_ns(self.0.get(), ptr::null_mut()) } + /// Returns task's pid namespace with elevated reference count + pub fn get_pid_ns(&self) -> Option> { + // SAFETY: By the type invariant, we know that `self.0` is valid. + let ptr = unsafe { bindings::task_get_pid_ns(self.0.get()) }; + if ptr.is_null() { + None + } else { + // SAFETY: `ptr` is valid by the safety requirements of this function. And we own a + // reference count via `task_get_pid_ns()`. + // CAST: `Self` is a `repr(transparent)` wrapper around `bindings::pid_namespace`. + Some(unsafe { ARef::from_raw(ptr::NonNull::new_unchecked(ptr.cast::())) }) + } + } + + /// Returns the given task's pid in the provided pid namespace. + #[doc(alias = "task_tgid_nr_ns")] + pub fn tgid_nr_ns(&self, pidns: Option<&PidNamespace>) -> Pid { + let pidns = match pidns { + Some(pidns) => pidns.as_ptr(), + None => core::ptr::null_mut(), + }; + // SAFETY: By the type invariant, we know that `self.0` is valid. We received a valid + // PidNamespace that we can use as a pointer or we received an empty PidNamespace and + // thus pass a null pointer. The underlying C function is safe to be used with NULL + // pointers. + unsafe { bindings::task_tgid_nr_ns(self.0.get(), pidns) } } /// Wakes up the task. -- cgit v1.2.3 From 61c004781d6b928443052e7a6cf84b35d4f61401 Mon Sep 17 00:00:00 2001 From: Danilo Krummrich Date: Fri, 4 Oct 2024 17:41:12 +0200 Subject: rust: alloc: implement `Vmalloc` allocator Implement `Allocator` for `Vmalloc`, the kernel's virtually contiguous allocator, typically used for larger objects, (much) larger than page size. All memory allocations made with `Vmalloc` end up in `vrealloc()`. Reviewed-by: Alice Ryhl Reviewed-by: Benno Lossin Reviewed-by: Gary Guo Signed-off-by: Danilo Krummrich Link: https://lore.kernel.org/r/20241004154149.93856-9-dakr@kernel.org Signed-off-by: Miguel Ojeda --- rust/helpers/helpers.c | 1 + rust/helpers/vmalloc.c | 9 +++++++++ rust/kernel/alloc/allocator.rs | 37 +++++++++++++++++++++++++++++++++++++ rust/kernel/alloc/allocator_test.rs | 1 + 4 files changed, 48 insertions(+) create mode 100644 rust/helpers/vmalloc.c (limited to 'rust/helpers/helpers.c') diff --git a/rust/helpers/helpers.c b/rust/helpers/helpers.c index 30f40149f3a96..20a0c69d5cc7b 100644 --- a/rust/helpers/helpers.c +++ b/rust/helpers/helpers.c @@ -22,5 +22,6 @@ #include "spinlock.c" #include "task.c" #include "uaccess.c" +#include "vmalloc.c" #include "wait.c" #include "workqueue.c" diff --git a/rust/helpers/vmalloc.c b/rust/helpers/vmalloc.c new file mode 100644 index 0000000000000..80d34501bbc01 --- /dev/null +++ b/rust/helpers/vmalloc.c @@ -0,0 +1,9 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include + +void * __must_check __realloc_size(2) +rust_helper_vrealloc(const void *p, size_t size, gfp_t flags) +{ + return vrealloc(p, size, flags); +} diff --git a/rust/kernel/alloc/allocator.rs b/rust/kernel/alloc/allocator.rs index 272c581e24a01..ee4c828cbd58b 100644 --- a/rust/kernel/alloc/allocator.rs +++ b/rust/kernel/alloc/allocator.rs @@ -15,6 +15,7 @@ use core::ptr::NonNull; use crate::alloc::{AllocError, Allocator}; use crate::bindings; +use crate::pr_warn; /// The contiguous kernel allocator. /// @@ -24,6 +25,15 @@ use crate::bindings; /// For more details see [self]. pub struct Kmalloc; +/// The virtually contiguous kernel allocator. +/// +/// `Vmalloc` allocates pages from the page level allocator and maps them into the contiguous kernel +/// virtual space. It is typically used for large allocations. The memory allocated with this +/// allocator is not physically contiguous. +/// +/// For more details see [self]. +pub struct Vmalloc; + /// Returns a proper size to alloc a new object aligned to `new_layout`'s alignment. fn aligned_size(new_layout: Layout) -> usize { // Customized layouts from `Layout::from_size_align()` can have size < align, so pad first. @@ -63,6 +73,9 @@ impl ReallocFunc { // INVARIANT: `krealloc` satisfies the type invariants. const KREALLOC: Self = Self(bindings::krealloc); + // INVARIANT: `vrealloc` satisfies the type invariants. + const VREALLOC: Self = Self(bindings::vrealloc); + /// # Safety /// /// This method has the same safety requirements as [`Allocator::realloc`]. @@ -168,6 +181,30 @@ unsafe impl GlobalAlloc for Kmalloc { } } +// SAFETY: `realloc` delegates to `ReallocFunc::call`, which guarantees that +// - memory remains valid until it is explicitly freed, +// - passing a pointer to a valid memory allocation is OK, +// - `realloc` satisfies the guarantees, since `ReallocFunc::call` has the same. +unsafe impl Allocator for Vmalloc { + #[inline] + unsafe fn realloc( + ptr: Option>, + layout: Layout, + old_layout: Layout, + flags: Flags, + ) -> Result, AllocError> { + // TODO: Support alignments larger than PAGE_SIZE. + if layout.align() > bindings::PAGE_SIZE { + pr_warn!("Vmalloc does not support alignments larger than PAGE_SIZE yet.\n"); + return Err(AllocError); + } + + // SAFETY: If not `None`, `ptr` is guaranteed to point to valid memory, which was previously + // allocated with this `Allocator`. + unsafe { ReallocFunc::VREALLOC.call(ptr, layout, old_layout, flags) } + } +} + #[global_allocator] static ALLOCATOR: Kmalloc = Kmalloc; diff --git a/rust/kernel/alloc/allocator_test.rs b/rust/kernel/alloc/allocator_test.rs index c5d325506f0c7..32a1450c4d392 100644 --- a/rust/kernel/alloc/allocator_test.rs +++ b/rust/kernel/alloc/allocator_test.rs @@ -7,6 +7,7 @@ use core::alloc::Layout; use core::ptr::NonNull; pub struct Kmalloc; +pub type Vmalloc = Kmalloc; unsafe impl Allocator for Kmalloc { unsafe fn realloc( -- cgit v1.2.3 From 6e59bcc9c8adec9a5bbedfa95a89946c56c510d9 Mon Sep 17 00:00:00 2001 From: Alice Ryhl Date: Wed, 30 Oct 2024 16:04:24 +0000 Subject: rust: add static_branch_unlikely for static_key_false Add just enough support for static key so that we can use it from tracepoints. Tracepoints rely on `static_branch_unlikely` with a `struct static_key_false`, so we add the same functionality to Rust. This patch only provides a generic implementation without code patching (matching the one used when CONFIG_JUMP_LABEL is disabled). Later patches add support for inline asm implementations that use runtime patching. When CONFIG_JUMP_LABEL is unset, `static_key_count` is a static inline function, so a Rust helper is defined for `static_key_count` in this case. If Rust is compiled with LTO, this call should get inlined. The helper can be eliminated once we have the necessary inline asm to make atomic operations from Rust. Cc: Masami Hiramatsu Cc: Mathieu Desnoyers Cc: Peter Zijlstra Cc: Josh Poimboeuf Cc: Jason Baron Cc: Ard Biesheuvel Cc: Miguel Ojeda Cc: Alex Gaynor Cc: Wedson Almeida Filho Cc: " =?utf-8?q?Bj=C3=B6rn_Roy_Baron?= " Cc: Benno Lossin Cc: Andreas Hindborg Cc: Arnd Bergmann Cc: Thomas Gleixner Cc: Ingo Molnar Cc: Borislav Petkov Cc: Dave Hansen Cc: "H. Peter Anvin" Cc: Sean Christopherson Cc: Uros Bizjak Cc: Catalin Marinas Cc: Will Deacon Cc: Marc Zyngier Cc: Oliver Upton Cc: Mark Rutland Cc: Ryan Roberts Cc: Fuad Tabba Cc: Paul Walmsley Cc: Palmer Dabbelt Cc: Albert Ou Cc: Anup Patel Cc: Andrew Jones Cc: Alexandre Ghiti Cc: Conor Dooley Cc: Samuel Holland Cc: Huacai Chen Cc: WANG Xuerui Cc: Bibo Mao Cc: Tiezhu Yang Cc: Andrew Morton Cc: Tianrui Zhao Link: https://lore.kernel.org/20241030-tracepoint-v12-1-eec7f0f8ad22@google.com Reviewed-by: Boqun Feng Reviewed-by: Gary Guo Signed-off-by: Alice Ryhl Signed-off-by: Steven Rostedt (Google) --- rust/bindings/bindings_helper.h | 1 + rust/helpers/helpers.c | 1 + rust/helpers/jump_label.c | 14 ++++++++++++++ rust/kernel/jump_label.rs | 30 ++++++++++++++++++++++++++++++ rust/kernel/lib.rs | 1 + 5 files changed, 47 insertions(+) create mode 100644 rust/helpers/jump_label.c create mode 100644 rust/kernel/jump_label.rs (limited to 'rust/helpers/helpers.c') diff --git a/rust/bindings/bindings_helper.h b/rust/bindings/bindings_helper.h index ae82e9c941afa..e0846e7e93e6e 100644 --- a/rust/bindings/bindings_helper.h +++ b/rust/bindings/bindings_helper.h @@ -14,6 +14,7 @@ #include #include #include +#include #include #include #include diff --git a/rust/helpers/helpers.c b/rust/helpers/helpers.c index 30f40149f3a96..17e1b60d178fd 100644 --- a/rust/helpers/helpers.c +++ b/rust/helpers/helpers.c @@ -12,6 +12,7 @@ #include "build_assert.c" #include "build_bug.c" #include "err.c" +#include "jump_label.c" #include "kunit.c" #include "mutex.c" #include "page.c" diff --git a/rust/helpers/jump_label.c b/rust/helpers/jump_label.c new file mode 100644 index 0000000000000..fc1f1e0df08e7 --- /dev/null +++ b/rust/helpers/jump_label.c @@ -0,0 +1,14 @@ +// SPDX-License-Identifier: GPL-2.0 + +/* + * Copyright (C) 2024 Google LLC. + */ + +#include + +#ifndef CONFIG_JUMP_LABEL +int rust_helper_static_key_count(struct static_key *key) +{ + return static_key_count(key); +} +#endif diff --git a/rust/kernel/jump_label.rs b/rust/kernel/jump_label.rs new file mode 100644 index 0000000000000..4b7655b2a0226 --- /dev/null +++ b/rust/kernel/jump_label.rs @@ -0,0 +1,30 @@ +// SPDX-License-Identifier: GPL-2.0 + +// Copyright (C) 2024 Google LLC. + +//! Logic for static keys. +//! +//! C header: [`include/linux/jump_label.h`](srctree/include/linux/jump_label.h). + +/// Branch based on a static key. +/// +/// Takes three arguments: +/// +/// * `key` - the path to the static variable containing the `static_key`. +/// * `keytyp` - the type of `key`. +/// * `field` - the name of the field of `key` that contains the `static_key`. +/// +/// # Safety +/// +/// The macro must be used with a real static key defined by C. +#[macro_export] +macro_rules! static_branch_unlikely { + ($key:path, $keytyp:ty, $field:ident) => {{ + let _key: *const $keytyp = ::core::ptr::addr_of!($key); + let _key: *const $crate::bindings::static_key_false = ::core::ptr::addr_of!((*_key).$field); + let _key: *const $crate::bindings::static_key = _key.cast(); + + $crate::bindings::static_key_count(_key.cast_mut()) > 0 + }}; +} +pub use static_branch_unlikely; diff --git a/rust/kernel/lib.rs b/rust/kernel/lib.rs index b5f4b3ce6b482..708ff817ccc3b 100644 --- a/rust/kernel/lib.rs +++ b/rust/kernel/lib.rs @@ -36,6 +36,7 @@ pub mod error; pub mod firmware; pub mod init; pub mod ioctl; +pub mod jump_label; #[cfg(CONFIG_KUNIT)] pub mod kunit; pub mod list; -- cgit v1.2.3