diff options
Diffstat (limited to 'rust/kernel')
| -rw-r--r-- | rust/kernel/cred.rs | 85 | ||||
| -rw-r--r-- | rust/kernel/device.rs | 15 | ||||
| -rw-r--r-- | rust/kernel/firmware.rs | 2 | ||||
| -rw-r--r-- | rust/kernel/fs.rs | 8 | ||||
| -rw-r--r-- | rust/kernel/fs/file.rs | 461 | ||||
| -rw-r--r-- | rust/kernel/kunit.rs | 4 | ||||
| -rw-r--r-- | rust/kernel/lib.rs | 6 | ||||
| -rw-r--r-- | rust/kernel/security.rs | 74 | ||||
| -rw-r--r-- | rust/kernel/seq_file.rs | 52 | ||||
| -rw-r--r-- | rust/kernel/sync.rs | 1 | ||||
| -rw-r--r-- | rust/kernel/sync/lock.rs | 13 | ||||
| -rw-r--r-- | rust/kernel/sync/locked_by.rs | 18 | ||||
| -rw-r--r-- | rust/kernel/sync/poll.rs | 121 | ||||
| -rw-r--r-- | rust/kernel/task.rs | 120 | ||||
| -rw-r--r-- | rust/kernel/types.rs | 21 | 
15 files changed, 957 insertions, 44 deletions
| diff --git a/rust/kernel/cred.rs b/rust/kernel/cred.rs new file mode 100644 index 000000000000..81d67789b16f --- /dev/null +++ b/rust/kernel/cred.rs @@ -0,0 +1,85 @@ +// SPDX-License-Identifier: GPL-2.0 + +// Copyright (C) 2024 Google LLC. + +//! Credentials management. +//! +//! C header: [`include/linux/cred.h`](srctree/include/linux/cred.h). +//! +//! Reference: <https://www.kernel.org/doc/html/latest/security/credentials.html> + +use crate::{ +    bindings, +    task::Kuid, +    types::{AlwaysRefCounted, Opaque}, +}; + +/// Wraps the kernel's `struct cred`. +/// +/// Credentials are used for various security checks in the kernel. +/// +/// Most fields of credentials are immutable. When things have their credentials changed, that +/// happens by replacing the credential instead of changing an existing credential. See the [kernel +/// documentation][ref] for more info on this. +/// +/// # Invariants +/// +/// Instances of this type are always ref-counted, that is, a call to `get_cred` ensures that the +/// allocation remains valid at least until the matching call to `put_cred`. +/// +/// [ref]: https://www.kernel.org/doc/html/latest/security/credentials.html +#[repr(transparent)] +pub struct Credential(Opaque<bindings::cred>); + +// SAFETY: +// - `Credential::dec_ref` can be called from any thread. +// - It is okay to send ownership of `Credential` across thread boundaries. +unsafe impl Send for Credential {} + +// SAFETY: It's OK to access `Credential` through shared references from other threads because +// we're either accessing properties that don't change or that are properly synchronised by C code. +unsafe impl Sync for Credential {} + +impl Credential { +    /// Creates a reference to a [`Credential`] from a valid pointer. +    /// +    /// # Safety +    /// +    /// The caller must ensure that `ptr` is valid and remains valid for the lifetime of the +    /// returned [`Credential`] reference. +    pub unsafe fn from_ptr<'a>(ptr: *const bindings::cred) -> &'a Credential { +        // SAFETY: The safety requirements guarantee the validity of the dereference, while the +        // `Credential` type being transparent makes the cast ok. +        unsafe { &*ptr.cast() } +    } + +    /// Get the id for this security context. +    pub fn get_secid(&self) -> u32 { +        let mut secid = 0; +        // SAFETY: The invariants of this type ensures that the pointer is valid. +        unsafe { bindings::security_cred_getsecid(self.0.get(), &mut secid) }; +        secid +    } + +    /// Returns the effective UID of the given credential. +    pub fn euid(&self) -> Kuid { +        // SAFETY: By the type invariant, we know that `self.0` is valid. Furthermore, the `euid` +        // field of a credential is never changed after initialization, so there is no potential +        // for data races. +        Kuid::from_raw(unsafe { (*self.0.get()).euid }) +    } +} + +// SAFETY: The type invariants guarantee that `Credential` is always ref-counted. +unsafe impl AlwaysRefCounted for Credential { +    fn inc_ref(&self) { +        // SAFETY: The existence of a shared reference means that the refcount is nonzero. +        unsafe { bindings::get_cred(self.0.get()) }; +    } + +    unsafe fn dec_ref(obj: core::ptr::NonNull<Credential>) { +        // SAFETY: The safety requirements guarantee that the refcount is nonzero. The cast is okay +        // because `Credential` has the same representation as `struct cred`. +        unsafe { bindings::put_cred(obj.cast().as_ptr()) }; +    } +} diff --git a/rust/kernel/device.rs b/rust/kernel/device.rs index 851018eef885..c8199ee079ef 100644 --- a/rust/kernel/device.rs +++ b/rust/kernel/device.rs @@ -51,18 +51,9 @@ impl Device {      ///      /// It must also be ensured that `bindings::device::release` can be called from any thread.      /// While not officially documented, this should be the case for any `struct device`. -    pub unsafe fn from_raw(ptr: *mut bindings::device) -> ARef<Self> { -        // SAFETY: By the safety requirements, ptr is valid. -        // Initially increase the reference count by one to compensate for the final decrement once -        // this newly created `ARef<Device>` instance is dropped. -        unsafe { bindings::get_device(ptr) }; - -        // CAST: `Self` is a `repr(transparent)` wrapper around `bindings::device`. -        let ptr = ptr.cast::<Self>(); - -        // SAFETY: `ptr` is valid by the safety requirements of this function. By the above call to -        // `bindings::get_device` we also own a reference to the underlying `struct device`. -        unsafe { ARef::from_raw(ptr::NonNull::new_unchecked(ptr)) } +    pub unsafe fn get_device(ptr: *mut bindings::device) -> ARef<Self> { +        // SAFETY: By the safety requirements ptr is valid +        unsafe { Self::as_ref(ptr) }.into()      }      /// Obtain the raw `struct device *`. diff --git a/rust/kernel/firmware.rs b/rust/kernel/firmware.rs index dee5b4b18aec..13a374a5cdb7 100644 --- a/rust/kernel/firmware.rs +++ b/rust/kernel/firmware.rs @@ -44,7 +44,7 @@ impl FwFunc {  ///  /// # fn no_run() -> Result<(), Error> {  /// # // SAFETY: *NOT* safe, just for the example to get an `ARef<Device>` instance -/// # let dev = unsafe { Device::from_raw(core::ptr::null_mut()) }; +/// # let dev = unsafe { Device::get_device(core::ptr::null_mut()) };  ///  /// let fw = Firmware::request(c_str!("path/to/firmware.bin"), &dev)?;  /// let blob = fw.data(); diff --git a/rust/kernel/fs.rs b/rust/kernel/fs.rs new file mode 100644 index 000000000000..0121b38c59e6 --- /dev/null +++ b/rust/kernel/fs.rs @@ -0,0 +1,8 @@ +// SPDX-License-Identifier: GPL-2.0 + +//! Kernel file systems. +//! +//! C headers: [`include/linux/fs.h`](srctree/include/linux/fs.h) + +pub mod file; +pub use self::file::{File, LocalFile}; diff --git a/rust/kernel/fs/file.rs b/rust/kernel/fs/file.rs new file mode 100644 index 000000000000..e03dbe14d62a --- /dev/null +++ b/rust/kernel/fs/file.rs @@ -0,0 +1,461 @@ +// SPDX-License-Identifier: GPL-2.0 + +// Copyright (C) 2024 Google LLC. + +//! Files and file descriptors. +//! +//! C headers: [`include/linux/fs.h`](srctree/include/linux/fs.h) and +//! [`include/linux/file.h`](srctree/include/linux/file.h) + +use crate::{ +    bindings, +    cred::Credential, +    error::{code::*, Error, Result}, +    types::{ARef, AlwaysRefCounted, NotThreadSafe, Opaque}, +}; +use core::ptr; + +/// Flags associated with a [`File`]. +pub mod flags { +    /// File is opened in append mode. +    pub const O_APPEND: u32 = bindings::O_APPEND; + +    /// Signal-driven I/O is enabled. +    pub const O_ASYNC: u32 = bindings::FASYNC; + +    /// Close-on-exec flag is set. +    pub const O_CLOEXEC: u32 = bindings::O_CLOEXEC; + +    /// File was created if it didn't already exist. +    pub const O_CREAT: u32 = bindings::O_CREAT; + +    /// Direct I/O is enabled for this file. +    pub const O_DIRECT: u32 = bindings::O_DIRECT; + +    /// File must be a directory. +    pub const O_DIRECTORY: u32 = bindings::O_DIRECTORY; + +    /// Like [`O_SYNC`] except metadata is not synced. +    pub const O_DSYNC: u32 = bindings::O_DSYNC; + +    /// Ensure that this file is created with the `open(2)` call. +    pub const O_EXCL: u32 = bindings::O_EXCL; + +    /// Large file size enabled (`off64_t` over `off_t`). +    pub const O_LARGEFILE: u32 = bindings::O_LARGEFILE; + +    /// Do not update the file last access time. +    pub const O_NOATIME: u32 = bindings::O_NOATIME; + +    /// File should not be used as process's controlling terminal. +    pub const O_NOCTTY: u32 = bindings::O_NOCTTY; + +    /// If basename of path is a symbolic link, fail open. +    pub const O_NOFOLLOW: u32 = bindings::O_NOFOLLOW; + +    /// File is using nonblocking I/O. +    pub const O_NONBLOCK: u32 = bindings::O_NONBLOCK; + +    /// File is using nonblocking I/O. +    /// +    /// This is effectively the same flag as [`O_NONBLOCK`] on all architectures +    /// except SPARC64. +    pub const O_NDELAY: u32 = bindings::O_NDELAY; + +    /// Used to obtain a path file descriptor. +    pub const O_PATH: u32 = bindings::O_PATH; + +    /// Write operations on this file will flush data and metadata. +    pub const O_SYNC: u32 = bindings::O_SYNC; + +    /// This file is an unnamed temporary regular file. +    pub const O_TMPFILE: u32 = bindings::O_TMPFILE; + +    /// File should be truncated to length 0. +    pub const O_TRUNC: u32 = bindings::O_TRUNC; + +    /// Bitmask for access mode flags. +    /// +    /// # Examples +    /// +    /// ``` +    /// use kernel::fs::file; +    /// # fn do_something() {} +    /// # let flags = 0; +    /// if (flags & file::flags::O_ACCMODE) == file::flags::O_RDONLY { +    ///     do_something(); +    /// } +    /// ``` +    pub const O_ACCMODE: u32 = bindings::O_ACCMODE; + +    /// File is read only. +    pub const O_RDONLY: u32 = bindings::O_RDONLY; + +    /// File is write only. +    pub const O_WRONLY: u32 = bindings::O_WRONLY; + +    /// File can be both read and written. +    pub const O_RDWR: u32 = bindings::O_RDWR; +} + +/// Wraps the kernel's `struct file`. Thread safe. +/// +/// This represents an open file rather than a file on a filesystem. Processes generally reference +/// open files using file descriptors. However, file descriptors are not the same as files. A file +/// descriptor is just an integer that corresponds to a file, and a single file may be referenced +/// by multiple file descriptors. +/// +/// # Refcounting +/// +/// Instances of this type are reference-counted. The reference count is incremented by the +/// `fget`/`get_file` functions and decremented by `fput`. The Rust type `ARef<File>` represents a +/// pointer that owns a reference count on the file. +/// +/// Whenever a process opens a file descriptor (fd), it stores a pointer to the file in its fd +/// table (`struct files_struct`). This pointer owns a reference count to the file, ensuring the +/// file isn't prematurely deleted while the file descriptor is open. In Rust terminology, the +/// pointers in `struct files_struct` are `ARef<File>` pointers. +/// +/// ## Light refcounts +/// +/// Whenever a process has an fd to a file, it may use something called a "light refcount" as a +/// performance optimization. Light refcounts are acquired by calling `fdget` and released with +/// `fdput`. The idea behind light refcounts is that if the fd is not closed between the calls to +/// `fdget` and `fdput`, then the refcount cannot hit zero during that time, as the `struct +/// files_struct` holds a reference until the fd is closed. This means that it's safe to access the +/// file even if `fdget` does not increment the refcount. +/// +/// The requirement that the fd is not closed during a light refcount applies globally across all +/// threads - not just on the thread using the light refcount. For this reason, light refcounts are +/// only used when the `struct files_struct` is not shared with other threads, since this ensures +/// that other unrelated threads cannot suddenly start using the fd and close it. Therefore, +/// calling `fdget` on a shared `struct files_struct` creates a normal refcount instead of a light +/// refcount. +/// +/// Light reference counts must be released with `fdput` before the system call returns to +/// userspace. This means that if you wait until the current system call returns to userspace, then +/// all light refcounts that existed at the time have gone away. +/// +/// ### The file position +/// +/// Each `struct file` has a position integer, which is protected by the `f_pos_lock` mutex. +/// However, if the `struct file` is not shared, then the kernel may avoid taking the lock as a +/// performance optimization. +/// +/// The condition for avoiding the `f_pos_lock` mutex is different from the condition for using +/// `fdget`. With `fdget`, you may avoid incrementing the refcount as long as the current fd table +/// is not shared; it is okay if there are other fd tables that also reference the same `struct +/// file`. However, `fdget_pos` can only avoid taking the `f_pos_lock` if the entire `struct file` +/// is not shared, as different processes with an fd to the same `struct file` share the same +/// position. +/// +/// To represent files that are not thread safe due to this optimization, the [`LocalFile`] type is +/// used. +/// +/// ## Rust references +/// +/// The reference type `&File` is similar to light refcounts: +/// +/// * `&File` references don't own a reference count. They can only exist as long as the reference +///   count stays positive, and can only be created when there is some mechanism in place to ensure +///   this. +/// +/// * The Rust borrow-checker normally ensures this by enforcing that the `ARef<File>` from which +///   a `&File` is created outlives the `&File`. +/// +/// * Using the unsafe [`File::from_raw_file`] means that it is up to the caller to ensure that the +///   `&File` only exists while the reference count is positive. +/// +/// * You can think of `fdget` as using an fd to look up an `ARef<File>` in the `struct +///   files_struct` and create an `&File` from it. The "fd cannot be closed" rule is like the Rust +///   rule "the `ARef<File>` must outlive the `&File`". +/// +/// # Invariants +/// +/// * All instances of this type are refcounted using the `f_count` field. +/// * There must not be any active calls to `fdget_pos` on this file that did not take the +///   `f_pos_lock` mutex. +#[repr(transparent)] +pub struct File { +    inner: Opaque<bindings::file>, +} + +// SAFETY: This file is known to not have any active `fdget_pos` calls that did not take the +// `f_pos_lock` mutex, so it is safe to transfer it between threads. +unsafe impl Send for File {} + +// SAFETY: This file is known to not have any active `fdget_pos` calls that did not take the +// `f_pos_lock` mutex, so it is safe to access its methods from several threads in parallel. +unsafe impl Sync for File {} + +// SAFETY: The type invariants guarantee that `File` is always ref-counted. This implementation +// makes `ARef<File>` own a normal refcount. +unsafe impl AlwaysRefCounted for File { +    #[inline] +    fn inc_ref(&self) { +        // SAFETY: The existence of a shared reference means that the refcount is nonzero. +        unsafe { bindings::get_file(self.as_ptr()) }; +    } + +    #[inline] +    unsafe fn dec_ref(obj: ptr::NonNull<File>) { +        // SAFETY: To call this method, the caller passes us ownership of a normal refcount, so we +        // may drop it. The cast is okay since `File` has the same representation as `struct file`. +        unsafe { bindings::fput(obj.cast().as_ptr()) } +    } +} + +/// Wraps the kernel's `struct file`. Not thread safe. +/// +/// This type represents a file that is not known to be safe to transfer across thread boundaries. +/// To obtain a thread-safe [`File`], use the [`assume_no_fdget_pos`] conversion. +/// +/// See the documentation for [`File`] for more information. +/// +/// # Invariants +/// +/// * All instances of this type are refcounted using the `f_count` field. +/// * If there is an active call to `fdget_pos` that did not take the `f_pos_lock` mutex, then it +///   must be on the same thread as this file. +/// +/// [`assume_no_fdget_pos`]: LocalFile::assume_no_fdget_pos +pub struct LocalFile { +    inner: Opaque<bindings::file>, +} + +// SAFETY: The type invariants guarantee that `LocalFile` is always ref-counted. This implementation +// makes `ARef<File>` own a normal refcount. +unsafe impl AlwaysRefCounted for LocalFile { +    #[inline] +    fn inc_ref(&self) { +        // SAFETY: The existence of a shared reference means that the refcount is nonzero. +        unsafe { bindings::get_file(self.as_ptr()) }; +    } + +    #[inline] +    unsafe fn dec_ref(obj: ptr::NonNull<LocalFile>) { +        // SAFETY: To call this method, the caller passes us ownership of a normal refcount, so we +        // may drop it. The cast is okay since `File` has the same representation as `struct file`. +        unsafe { bindings::fput(obj.cast().as_ptr()) } +    } +} + +impl LocalFile { +    /// Constructs a new `struct file` wrapper from a file descriptor. +    /// +    /// The file descriptor belongs to the current process, and there might be active local calls +    /// to `fdget_pos` on the same file. +    /// +    /// To obtain an `ARef<File>`, use the [`assume_no_fdget_pos`] function to convert. +    /// +    /// [`assume_no_fdget_pos`]: LocalFile::assume_no_fdget_pos +    #[inline] +    pub fn fget(fd: u32) -> Result<ARef<LocalFile>, BadFdError> { +        // SAFETY: FFI call, there are no requirements on `fd`. +        let ptr = ptr::NonNull::new(unsafe { bindings::fget(fd) }).ok_or(BadFdError)?; + +        // SAFETY: `bindings::fget` created a refcount, and we pass ownership of it to the `ARef`. +        // +        // INVARIANT: This file is in the fd table on this thread, so either all `fdget_pos` calls +        // are on this thread, or the file is shared, in which case `fdget_pos` calls took the +        // `f_pos_lock` mutex. +        Ok(unsafe { ARef::from_raw(ptr.cast()) }) +    } + +    /// Creates a reference to a [`LocalFile`] from a valid pointer. +    /// +    /// # Safety +    /// +    /// * The caller must ensure that `ptr` points at a valid file and that the file's refcount is +    ///   positive for the duration of 'a. +    /// * The caller must ensure that if there is an active call to `fdget_pos` that did not take +    ///   the `f_pos_lock` mutex, then that call is on the current thread. +    #[inline] +    pub unsafe fn from_raw_file<'a>(ptr: *const bindings::file) -> &'a LocalFile { +        // SAFETY: The caller guarantees that the pointer is not dangling and stays valid for the +        // duration of 'a. The cast is okay because `File` is `repr(transparent)`. +        // +        // INVARIANT: The caller guarantees that there are no problematic `fdget_pos` calls. +        unsafe { &*ptr.cast() } +    } + +    /// Assume that there are no active `fdget_pos` calls that prevent us from sharing this file. +    /// +    /// This makes it safe to transfer this file to other threads. No checks are performed, and +    /// using it incorrectly may lead to a data race on the file position if the file is shared +    /// with another thread. +    /// +    /// This method is intended to be used together with [`LocalFile::fget`] when the caller knows +    /// statically that there are no `fdget_pos` calls on the current thread. For example, you +    /// might use it when calling `fget` from an ioctl, since ioctls usually do not touch the file +    /// position. +    /// +    /// # Safety +    /// +    /// There must not be any active `fdget_pos` calls on the current thread. +    #[inline] +    pub unsafe fn assume_no_fdget_pos(me: ARef<LocalFile>) -> ARef<File> { +        // INVARIANT: There are no `fdget_pos` calls on the current thread, and by the type +        // invariants, if there is a `fdget_pos` call on another thread, then it took the +        // `f_pos_lock` mutex. +        // +        // SAFETY: `LocalFile` and `File` have the same layout. +        unsafe { ARef::from_raw(ARef::into_raw(me).cast()) } +    } + +    /// Returns a raw pointer to the inner C struct. +    #[inline] +    pub fn as_ptr(&self) -> *mut bindings::file { +        self.inner.get() +    } + +    /// Returns the credentials of the task that originally opened the file. +    pub fn cred(&self) -> &Credential { +        // SAFETY: It's okay to read the `f_cred` field without synchronization because `f_cred` is +        // never changed after initialization of the file. +        let ptr = unsafe { (*self.as_ptr()).f_cred }; + +        // SAFETY: The signature of this function ensures that the caller will only access the +        // returned credential while the file is still valid, and the C side ensures that the +        // credential stays valid at least as long as the file. +        unsafe { Credential::from_ptr(ptr) } +    } + +    /// Returns the flags associated with the file. +    /// +    /// The flags are a combination of the constants in [`flags`]. +    #[inline] +    pub fn flags(&self) -> u32 { +        // This `read_volatile` is intended to correspond to a READ_ONCE call. +        // +        // SAFETY: The file is valid because the shared reference guarantees a nonzero refcount. +        // +        // FIXME(read_once): Replace with `read_once` when available on the Rust side. +        unsafe { core::ptr::addr_of!((*self.as_ptr()).f_flags).read_volatile() } +    } +} + +impl File { +    /// Creates a reference to a [`File`] from a valid pointer. +    /// +    /// # Safety +    /// +    /// * The caller must ensure that `ptr` points at a valid file and that the file's refcount is +    ///   positive for the duration of 'a. +    /// * The caller must ensure that if there are active `fdget_pos` calls on this file, then they +    ///   took the `f_pos_lock` mutex. +    #[inline] +    pub unsafe fn from_raw_file<'a>(ptr: *const bindings::file) -> &'a File { +        // SAFETY: The caller guarantees that the pointer is not dangling and stays valid for the +        // duration of 'a. The cast is okay because `File` is `repr(transparent)`. +        // +        // INVARIANT: The caller guarantees that there are no problematic `fdget_pos` calls. +        unsafe { &*ptr.cast() } +    } +} + +// Make LocalFile methods available on File. +impl core::ops::Deref for File { +    type Target = LocalFile; +    #[inline] +    fn deref(&self) -> &LocalFile { +        // SAFETY: The caller provides a `&File`, and since it is a reference, it must point at a +        // valid file for the desired duration. +        // +        // By the type invariants, there are no `fdget_pos` calls that did not take the +        // `f_pos_lock` mutex. +        unsafe { LocalFile::from_raw_file(self as *const File as *const bindings::file) } +    } +} + +/// A file descriptor reservation. +/// +/// This allows the creation of a file descriptor in two steps: first, we reserve a slot for it, +/// then we commit or drop the reservation. The first step may fail (e.g., the current process ran +/// out of available slots), but commit and drop never fail (and are mutually exclusive). +/// +/// Dropping the reservation happens in the destructor of this type. +/// +/// # Invariants +/// +/// The fd stored in this struct must correspond to a reserved file descriptor of the current task. +pub struct FileDescriptorReservation { +    fd: u32, +    /// Prevent values of this type from being moved to a different task. +    /// +    /// The `fd_install` and `put_unused_fd` functions assume that the value of `current` is +    /// unchanged since the call to `get_unused_fd_flags`. By adding this marker to this type, we +    /// prevent it from being moved across task boundaries, which ensures that `current` does not +    /// change while this value exists. +    _not_send: NotThreadSafe, +} + +impl FileDescriptorReservation { +    /// Creates a new file descriptor reservation. +    pub fn get_unused_fd_flags(flags: u32) -> Result<Self> { +        // SAFETY: FFI call, there are no safety requirements on `flags`. +        let fd: i32 = unsafe { bindings::get_unused_fd_flags(flags) }; +        if fd < 0 { +            return Err(Error::from_errno(fd)); +        } +        Ok(Self { +            fd: fd as u32, +            _not_send: NotThreadSafe, +        }) +    } + +    /// Returns the file descriptor number that was reserved. +    pub fn reserved_fd(&self) -> u32 { +        self.fd +    } + +    /// Commits the reservation. +    /// +    /// The previously reserved file descriptor is bound to `file`. This method consumes the +    /// [`FileDescriptorReservation`], so it will not be usable after this call. +    pub fn fd_install(self, file: ARef<File>) { +        // SAFETY: `self.fd` was previously returned by `get_unused_fd_flags`. We have not yet used +        // the fd, so it is still valid, and `current` still refers to the same task, as this type +        // cannot be moved across task boundaries. +        // +        // Furthermore, the file pointer is guaranteed to own a refcount by its type invariants, +        // and we take ownership of that refcount by not running the destructor below. +        // Additionally, the file is known to not have any non-shared `fdget_pos` calls, so even if +        // this process starts using the file position, this will not result in a data race on the +        // file position. +        unsafe { bindings::fd_install(self.fd, file.as_ptr()) }; + +        // `fd_install` consumes both the file descriptor and the file reference, so we cannot run +        // the destructors. +        core::mem::forget(self); +        core::mem::forget(file); +    } +} + +impl Drop for FileDescriptorReservation { +    fn drop(&mut self) { +        // SAFETY: By the type invariants of this type, `self.fd` was previously returned by +        // `get_unused_fd_flags`. We have not yet used the fd, so it is still valid, and `current` +        // still refers to the same task, as this type cannot be moved across task boundaries. +        unsafe { bindings::put_unused_fd(self.fd) }; +    } +} + +/// Represents the `EBADF` error code. +/// +/// Used for methods that can only fail with `EBADF`. +#[derive(Copy, Clone, Eq, PartialEq)] +pub struct BadFdError; + +impl From<BadFdError> for Error { +    #[inline] +    fn from(_: BadFdError) -> Error { +        EBADF +    } +} + +impl core::fmt::Debug for BadFdError { +    fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { +        f.pad("EBADF") +    } +} diff --git a/rust/kernel/kunit.rs b/rust/kernel/kunit.rs index 0ba77276ae7e..824da0e9738a 100644 --- a/rust/kernel/kunit.rs +++ b/rust/kernel/kunit.rs @@ -18,7 +18,7 @@ pub fn err(args: fmt::Arguments<'_>) {      #[cfg(CONFIG_PRINTK)]      unsafe {          bindings::_printk( -            b"\x013%pA\0".as_ptr() as _, +            c"\x013%pA".as_ptr() as _,              &args as *const _ as *const c_void,          );      } @@ -34,7 +34,7 @@ pub fn info(args: fmt::Arguments<'_>) {      #[cfg(CONFIG_PRINTK)]      unsafe {          bindings::_printk( -            b"\x016%pA\0".as_ptr() as _, +            c"\x016%pA".as_ptr() as _,              &args as *const _ as *const c_void,          );      } diff --git a/rust/kernel/lib.rs b/rust/kernel/lib.rs index 22a3bfa5a9e9..9843eedd4293 100644 --- a/rust/kernel/lib.rs +++ b/rust/kernel/lib.rs @@ -30,10 +30,12 @@ pub mod alloc;  #[cfg(CONFIG_BLOCK)]  pub mod block;  mod build_assert; +pub mod cred;  pub mod device;  pub mod error;  #[cfg(CONFIG_RUST_FW_LOADER_ABSTRACTIONS)]  pub mod firmware; +pub mod fs;  pub mod init;  pub mod ioctl;  #[cfg(CONFIG_KUNIT)] @@ -44,8 +46,10 @@ pub mod net;  pub mod page;  pub mod prelude;  pub mod print; -pub mod sizes;  pub mod rbtree; +pub mod security; +pub mod seq_file; +pub mod sizes;  mod static_assert;  #[doc(hidden)]  pub mod std_vendor; diff --git a/rust/kernel/security.rs b/rust/kernel/security.rs new file mode 100644 index 000000000000..2522868862a1 --- /dev/null +++ b/rust/kernel/security.rs @@ -0,0 +1,74 @@ +// SPDX-License-Identifier: GPL-2.0 + +// Copyright (C) 2024 Google LLC. + +//! Linux Security Modules (LSM). +//! +//! C header: [`include/linux/security.h`](srctree/include/linux/security.h). + +use crate::{ +    bindings, +    error::{to_result, Result}, +}; + +/// A security context string. +/// +/// # Invariants +/// +/// The `secdata` and `seclen` fields correspond to a valid security context as returned by a +/// successful call to `security_secid_to_secctx`, that has not yet been destroyed by calling +/// `security_release_secctx`. +pub struct SecurityCtx { +    secdata: *mut core::ffi::c_char, +    seclen: usize, +} + +impl SecurityCtx { +    /// Get the security context given its id. +    pub fn from_secid(secid: u32) -> Result<Self> { +        let mut secdata = core::ptr::null_mut(); +        let mut seclen = 0u32; +        // SAFETY: Just a C FFI call. The pointers are valid for writes. +        to_result(unsafe { bindings::security_secid_to_secctx(secid, &mut secdata, &mut seclen) })?; + +        // INVARIANT: If the above call did not fail, then we have a valid security context. +        Ok(Self { +            secdata, +            seclen: seclen as usize, +        }) +    } + +    /// Returns whether the security context is empty. +    pub fn is_empty(&self) -> bool { +        self.seclen == 0 +    } + +    /// Returns the length of this security context. +    pub fn len(&self) -> usize { +        self.seclen +    } + +    /// Returns the bytes for this security context. +    pub fn as_bytes(&self) -> &[u8] { +        let ptr = self.secdata; +        if ptr.is_null() { +            debug_assert_eq!(self.seclen, 0); +            // We can't pass a null pointer to `slice::from_raw_parts` even if the length is zero. +            return &[]; +        } + +        // SAFETY: The call to `security_secid_to_secctx` guarantees that the pointer is valid for +        // `seclen` bytes. Furthermore, if the length is zero, then we have ensured that the +        // pointer is not null. +        unsafe { core::slice::from_raw_parts(ptr.cast(), self.seclen) } +    } +} + +impl Drop for SecurityCtx { +    fn drop(&mut self) { +        // SAFETY: By the invariant of `Self`, this frees a pointer that came from a successful +        // call to `security_secid_to_secctx` and has not yet been destroyed by +        // `security_release_secctx`. +        unsafe { bindings::security_release_secctx(self.secdata, self.seclen as u32) }; +    } +} diff --git a/rust/kernel/seq_file.rs b/rust/kernel/seq_file.rs new file mode 100644 index 000000000000..6ca29d576d02 --- /dev/null +++ b/rust/kernel/seq_file.rs @@ -0,0 +1,52 @@ +// SPDX-License-Identifier: GPL-2.0 + +//! Seq file bindings. +//! +//! C header: [`include/linux/seq_file.h`](srctree/include/linux/seq_file.h) + +use crate::{bindings, c_str, types::NotThreadSafe, types::Opaque}; + +/// A utility for generating the contents of a seq file. +#[repr(transparent)] +pub struct SeqFile { +    inner: Opaque<bindings::seq_file>, +    _not_send: NotThreadSafe, +} + +impl SeqFile { +    /// Creates a new [`SeqFile`] from a raw pointer. +    /// +    /// # Safety +    /// +    /// The caller must ensure that for the duration of 'a the following is satisfied: +    /// * The pointer points at a valid `struct seq_file`. +    /// * The `struct seq_file` is not accessed from any other thread. +    pub unsafe fn from_raw<'a>(ptr: *mut bindings::seq_file) -> &'a SeqFile { +        // SAFETY: The caller ensures that the reference is valid for 'a. There's no way to trigger +        // a data race by using the `&SeqFile` since this is the only thread accessing the seq_file. +        // +        // CAST: The layout of `struct seq_file` and `SeqFile` is compatible. +        unsafe { &*ptr.cast() } +    } + +    /// Used by the [`seq_print`] macro. +    pub fn call_printf(&self, args: core::fmt::Arguments<'_>) { +        // SAFETY: Passing a void pointer to `Arguments` is valid for `%pA`. +        unsafe { +            bindings::seq_printf( +                self.inner.get(), +                c_str!("%pA").as_char_ptr(), +                &args as *const _ as *const core::ffi::c_void, +            ); +        } +    } +} + +/// Write to a [`SeqFile`] with the ordinary Rust formatting syntax. +#[macro_export] +macro_rules! seq_print { +    ($m:expr, $($arg:tt)+) => ( +        $m.call_printf(format_args!($($arg)+)) +    ); +} +pub use seq_print; diff --git a/rust/kernel/sync.rs b/rust/kernel/sync.rs index 0ab20975a3b5..bae4a5179c72 100644 --- a/rust/kernel/sync.rs +++ b/rust/kernel/sync.rs @@ -11,6 +11,7 @@ mod arc;  mod condvar;  pub mod lock;  mod locked_by; +pub mod poll;  pub use arc::{Arc, ArcBorrow, UniqueArc};  pub use condvar::{new_condvar, CondVar, CondVarTimeoutResult}; diff --git a/rust/kernel/sync/lock.rs b/rust/kernel/sync/lock.rs index f6c34ca4d819..d6e9bab114b8 100644 --- a/rust/kernel/sync/lock.rs +++ b/rust/kernel/sync/lock.rs @@ -6,8 +6,13 @@  //! spinlocks, raw spinlocks) to be provided with minimal effort.  use super::LockClassKey; -use crate::{init::PinInit, pin_init, str::CStr, types::Opaque, types::ScopeGuard}; -use core::{cell::UnsafeCell, marker::PhantomData, marker::PhantomPinned}; +use crate::{ +    init::PinInit, +    pin_init, +    str::CStr, +    types::{NotThreadSafe, Opaque, ScopeGuard}, +}; +use core::{cell::UnsafeCell, marker::PhantomPinned};  use macros::pin_data;  pub mod mutex; @@ -139,7 +144,7 @@ impl<T: ?Sized, B: Backend> Lock<T, B> {  pub struct Guard<'a, T: ?Sized, B: Backend> {      pub(crate) lock: &'a Lock<T, B>,      pub(crate) state: B::GuardState, -    _not_send: PhantomData<*mut ()>, +    _not_send: NotThreadSafe,  }  // SAFETY: `Guard` is sync when the data protected by the lock is also sync. @@ -191,7 +196,7 @@ impl<'a, T: ?Sized, B: Backend> Guard<'a, T, B> {          Self {              lock,              state, -            _not_send: PhantomData, +            _not_send: NotThreadSafe,          }      }  } diff --git a/rust/kernel/sync/locked_by.rs b/rust/kernel/sync/locked_by.rs index babc731bd5f6..ce2ee8d87865 100644 --- a/rust/kernel/sync/locked_by.rs +++ b/rust/kernel/sync/locked_by.rs @@ -83,8 +83,12 @@ pub struct LockedBy<T: ?Sized, U: ?Sized> {  // SAFETY: `LockedBy` can be transferred across thread boundaries iff the data it protects can.  unsafe impl<T: ?Sized + Send, U: ?Sized> Send for LockedBy<T, U> {} -// SAFETY: `LockedBy` serialises the interior mutability it provides, so it is `Sync` as long as the -// data it protects is `Send`. +// SAFETY: If `T` is not `Sync`, then parallel shared access to this `LockedBy` allows you to use +// `access_mut` to hand out `&mut T` on one thread at the time. The requirement that `T: Send` is +// sufficient to allow that. +// +// If `T` is `Sync`, then the `access` method also becomes available, which allows you to obtain +// several `&T` from several threads at once. However, this is okay as `T` is `Sync`.  unsafe impl<T: ?Sized + Send, U: ?Sized> Sync for LockedBy<T, U> {}  impl<T, U> LockedBy<T, U> { @@ -118,7 +122,10 @@ impl<T: ?Sized, U> LockedBy<T, U> {      ///      /// Panics if `owner` is different from the data protected by the lock used in      /// [`new`](LockedBy::new). -    pub fn access<'a>(&'a self, owner: &'a U) -> &'a T { +    pub fn access<'a>(&'a self, owner: &'a U) -> &'a T +    where +        T: Sync, +    {          build_assert!(              size_of::<U>() > 0,              "`U` cannot be a ZST because `owner` wouldn't be unique" @@ -127,7 +134,10 @@ impl<T: ?Sized, U> LockedBy<T, U> {              panic!("mismatched owners");          } -        // SAFETY: `owner` is evidence that the owner is locked. +        // SAFETY: `owner` is evidence that there are only shared references to the owner for the +        // duration of 'a, so it's not possible to use `Self::access_mut` to obtain a mutable +        // reference to the inner value that aliases with this shared reference. The type is `Sync` +        // so there are no other requirements.          unsafe { &*self.data.get() }      } diff --git a/rust/kernel/sync/poll.rs b/rust/kernel/sync/poll.rs new file mode 100644 index 000000000000..d5f17153b424 --- /dev/null +++ b/rust/kernel/sync/poll.rs @@ -0,0 +1,121 @@ +// SPDX-License-Identifier: GPL-2.0 + +// Copyright (C) 2024 Google LLC. + +//! Utilities for working with `struct poll_table`. + +use crate::{ +    bindings, +    fs::File, +    prelude::*, +    sync::{CondVar, LockClassKey}, +    types::Opaque, +}; +use core::ops::Deref; + +/// Creates a [`PollCondVar`] initialiser with the given name and a newly-created lock class. +#[macro_export] +macro_rules! new_poll_condvar { +    ($($name:literal)?) => { +        $crate::sync::poll::PollCondVar::new( +            $crate::optional_name!($($name)?), $crate::static_lock_class!() +        ) +    }; +} + +/// Wraps the kernel's `struct poll_table`. +/// +/// # Invariants +/// +/// This struct contains a valid `struct poll_table`. +/// +/// For a `struct poll_table` to be valid, its `_qproc` function must follow the safety +/// requirements of `_qproc` functions: +/// +/// * The `_qproc` function is given permission to enqueue a waiter to the provided `poll_table` +///   during the call. Once the waiter is removed and an rcu grace period has passed, it must no +///   longer access the `wait_queue_head`. +#[repr(transparent)] +pub struct PollTable(Opaque<bindings::poll_table>); + +impl PollTable { +    /// Creates a reference to a [`PollTable`] from a valid pointer. +    /// +    /// # Safety +    /// +    /// The caller must ensure that for the duration of 'a, the pointer will point at a valid poll +    /// table (as defined in the type invariants). +    /// +    /// The caller must also ensure that the `poll_table` is only accessed via the returned +    /// reference for the duration of 'a. +    pub unsafe fn from_ptr<'a>(ptr: *mut bindings::poll_table) -> &'a mut PollTable { +        // SAFETY: The safety requirements guarantee the validity of the dereference, while the +        // `PollTable` type being transparent makes the cast ok. +        unsafe { &mut *ptr.cast() } +    } + +    fn get_qproc(&self) -> bindings::poll_queue_proc { +        let ptr = self.0.get(); +        // SAFETY: The `ptr` is valid because it originates from a reference, and the `_qproc` +        // field is not modified concurrently with this call since we have an immutable reference. +        unsafe { (*ptr)._qproc } +    } + +    /// Register this [`PollTable`] with the provided [`PollCondVar`], so that it can be notified +    /// using the condition variable. +    pub fn register_wait(&mut self, file: &File, cv: &PollCondVar) { +        if let Some(qproc) = self.get_qproc() { +            // SAFETY: The pointers to `file` and `self` need to be valid for the duration of this +            // call to `qproc`, which they are because they are references. +            // +            // The `cv.wait_queue_head` pointer must be valid until an rcu grace period after the +            // waiter is removed. The `PollCondVar` is pinned, so before `cv.wait_queue_head` can +            // be destroyed, the destructor must run. That destructor first removes all waiters, +            // and then waits for an rcu grace period. Therefore, `cv.wait_queue_head` is valid for +            // long enough. +            unsafe { qproc(file.as_ptr() as _, cv.wait_queue_head.get(), self.0.get()) }; +        } +    } +} + +/// A wrapper around [`CondVar`] that makes it usable with [`PollTable`]. +/// +/// [`CondVar`]: crate::sync::CondVar +#[pin_data(PinnedDrop)] +pub struct PollCondVar { +    #[pin] +    inner: CondVar, +} + +impl PollCondVar { +    /// Constructs a new condvar initialiser. +    pub fn new(name: &'static CStr, key: &'static LockClassKey) -> impl PinInit<Self> { +        pin_init!(Self { +            inner <- CondVar::new(name, key), +        }) +    } +} + +// Make the `CondVar` methods callable on `PollCondVar`. +impl Deref for PollCondVar { +    type Target = CondVar; + +    fn deref(&self) -> &CondVar { +        &self.inner +    } +} + +#[pinned_drop] +impl PinnedDrop for PollCondVar { +    fn drop(self: Pin<&mut Self>) { +        // Clear anything registered using `register_wait`. +        // +        // SAFETY: The pointer points at a valid `wait_queue_head`. +        unsafe { bindings::__wake_up_pollfree(self.inner.wait_queue_head.get()) }; + +        // Wait for epoll items to be properly removed. +        // +        // SAFETY: Just an FFI call. +        unsafe { bindings::synchronize_rcu() }; +    } +} diff --git a/rust/kernel/task.rs b/rust/kernel/task.rs index 55dff7e088bf..080599075875 100644 --- a/rust/kernel/task.rs +++ b/rust/kernel/task.rs @@ -4,10 +4,13 @@  //!  //! C header: [`include/linux/sched.h`](srctree/include/linux/sched.h). -use crate::types::Opaque; +use crate::{ +    bindings, +    types::{NotThreadSafe, Opaque}, +};  use core::{ +    cmp::{Eq, PartialEq},      ffi::{c_int, c_long, c_uint}, -    marker::PhantomData,      ops::Deref,      ptr,  }; @@ -94,7 +97,22 @@ unsafe impl Sync for Task {}  /// The type of process identifiers (PIDs).  type Pid = bindings::pid_t; +/// The type of user identifiers (UIDs). +#[derive(Copy, Clone)] +pub struct Kuid { +    kuid: bindings::kuid_t, +} +  impl Task { +    /// Returns a raw pointer to the current task. +    /// +    /// It is up to the user to use the pointer correctly. +    #[inline] +    pub fn current_raw() -> *mut bindings::task_struct { +        // SAFETY: Getting the current pointer is always safe. +        unsafe { bindings::get_current() } +    } +      /// Returns a task reference for the currently executing task/thread.      ///      /// The recommended way to get the current task/thread is to use the @@ -106,7 +124,7 @@ impl Task {      pub unsafe fn current() -> impl Deref<Target = Task> {          struct TaskRef<'a> {              task: &'a Task, -            _not_send: PhantomData<*mut ()>, +            _not_send: NotThreadSafe,          }          impl Deref for TaskRef<'_> { @@ -117,23 +135,27 @@ impl Task {              }          } -        // SAFETY: Just an FFI call with no additional safety requirements. -        let ptr = unsafe { bindings::get_current() }; - +        let current = Task::current_raw();          TaskRef {              // SAFETY: If the current thread is still running, the current task is valid. Given              // that `TaskRef` is not `Send`, we know it cannot be transferred to another thread              // (where it could potentially outlive the caller). -            task: unsafe { &*ptr.cast() }, -            _not_send: PhantomData, +            task: unsafe { &*current.cast() }, +            _not_send: NotThreadSafe,          }      } +    /// Returns a raw pointer to the task. +    #[inline] +    pub fn as_ptr(&self) -> *mut bindings::task_struct { +        self.0.get() +    } +      /// Returns the group leader of the given task.      pub fn group_leader(&self) -> &Task { -        // SAFETY: By the type invariant, we know that `self.0` is a valid task. Valid tasks always -        // have a valid `group_leader`. -        let ptr = unsafe { *ptr::addr_of!((*self.0.get()).group_leader) }; +        // SAFETY: The group leader of a task never changes after initialization, so reading this +        // field is not a data race. +        let ptr = unsafe { *ptr::addr_of!((*self.as_ptr()).group_leader) };          // SAFETY: The lifetime of the returned task reference is tied to the lifetime of `self`,          // and given that a task has a reference to its group leader, we know it must be valid for @@ -143,23 +165,41 @@ impl Task {      /// Returns the PID of the given task.      pub fn pid(&self) -> Pid { -        // SAFETY: By the type invariant, we know that `self.0` is a valid task. Valid tasks always -        // have a valid pid. -        unsafe { *ptr::addr_of!((*self.0.get()).pid) } +        // SAFETY: The pid of a task never changes after initialization, so reading this field is +        // not a data race. +        unsafe { *ptr::addr_of!((*self.as_ptr()).pid) } +    } + +    /// Returns the UID of the given task. +    pub fn uid(&self) -> Kuid { +        // SAFETY: It's always safe to call `task_uid` on a valid task. +        Kuid::from_raw(unsafe { bindings::task_uid(self.as_ptr()) }) +    } + +    /// Returns the effective UID of the given task. +    pub fn euid(&self) -> Kuid { +        // SAFETY: It's always safe to call `task_euid` on a valid task. +        Kuid::from_raw(unsafe { bindings::task_euid(self.as_ptr()) })      }      /// Determines whether the given task has pending signals.      pub fn signal_pending(&self) -> bool { -        // SAFETY: By the type invariant, we know that `self.0` is valid. -        unsafe { bindings::signal_pending(self.0.get()) != 0 } +        // SAFETY: It's always safe to call `signal_pending` on a valid task. +        unsafe { bindings::signal_pending(self.as_ptr()) != 0 } +    } + +    /// Returns the given task's pid in the current pid namespace. +    pub fn pid_in_current_ns(&self) -> Pid { +        // SAFETY: It's valid to pass a null pointer as the namespace (defaults to current +        // namespace). The task pointer is also valid. +        unsafe { bindings::task_tgid_nr_ns(self.as_ptr(), ptr::null_mut()) }      }      /// Wakes up the task.      pub fn wake_up(&self) { -        // SAFETY: By the type invariant, we know that `self.0.get()` is non-null and valid. -        // And `wake_up_process` is safe to be called for any valid task, even if the task is +        // SAFETY: It's always safe to call `signal_pending` on a valid task, even if the task          // running. -        unsafe { bindings::wake_up_process(self.0.get()) }; +        unsafe { bindings::wake_up_process(self.as_ptr()) };      }  } @@ -167,7 +207,7 @@ impl Task {  unsafe impl crate::types::AlwaysRefCounted for Task {      fn inc_ref(&self) {          // SAFETY: The existence of a shared reference means that the refcount is nonzero. -        unsafe { bindings::get_task_struct(self.0.get()) }; +        unsafe { bindings::get_task_struct(self.as_ptr()) };      }      unsafe fn dec_ref(obj: ptr::NonNull<Self>) { @@ -175,3 +215,43 @@ unsafe impl crate::types::AlwaysRefCounted for Task {          unsafe { bindings::put_task_struct(obj.cast().as_ptr()) }      }  } + +impl Kuid { +    /// Get the current euid. +    #[inline] +    pub fn current_euid() -> Kuid { +        // SAFETY: Just an FFI call. +        Self::from_raw(unsafe { bindings::current_euid() }) +    } + +    /// Create a `Kuid` given the raw C type. +    #[inline] +    pub fn from_raw(kuid: bindings::kuid_t) -> Self { +        Self { kuid } +    } + +    /// Turn this kuid into the raw C type. +    #[inline] +    pub fn into_raw(self) -> bindings::kuid_t { +        self.kuid +    } + +    /// Converts this kernel UID into a userspace UID. +    /// +    /// Uses the namespace of the current task. +    #[inline] +    pub fn into_uid_in_current_ns(self) -> bindings::uid_t { +        // SAFETY: Just an FFI call. +        unsafe { bindings::from_kuid(bindings::current_user_ns(), self.kuid) } +    } +} + +impl PartialEq for Kuid { +    #[inline] +    fn eq(&self, other: &Kuid) -> bool { +        // SAFETY: Just an FFI call. +        unsafe { bindings::uid_eq(self.kuid, other.kuid) } +    } +} + +impl Eq for Kuid {} diff --git a/rust/kernel/types.rs b/rust/kernel/types.rs index 9e7ca066355c..3238ffaab031 100644 --- a/rust/kernel/types.rs +++ b/rust/kernel/types.rs @@ -532,3 +532,24 @@ unsafe impl AsBytes for str {}  // does not have any uninitialized portions either.  unsafe impl<T: AsBytes> AsBytes for [T] {}  unsafe impl<T: AsBytes, const N: usize> AsBytes for [T; N] {} + +/// Zero-sized type to mark types not [`Send`]. +/// +/// Add this type as a field to your struct if your type should not be sent to a different task. +/// Since [`Send`] is an auto trait, adding a single field that is `!Send` will ensure that the +/// whole type is `!Send`. +/// +/// If a type is `!Send` it is impossible to give control over an instance of the type to another +/// task. This is useful to include in types that store or reference task-local information. A file +/// descriptor is an example of such task-local information. +/// +/// This type also makes the type `!Sync`, which prevents immutable access to the value from +/// several threads in parallel. +pub type NotThreadSafe = PhantomData<*mut ()>; + +/// Used to construct instances of type [`NotThreadSafe`] similar to how `PhantomData` is +/// constructed. +/// +/// [`NotThreadSafe`]: type@NotThreadSafe +#[allow(non_upper_case_globals)] +pub const NotThreadSafe: NotThreadSafe = PhantomData; | 
