diff options
910 files changed, 15031 insertions, 5169 deletions
@@ -197,6 +197,7 @@ Daniel Borkmann <daniel@iogearbox.net> <daniel.borkmann@tik.ee.ethz.ch> Daniel Borkmann <daniel@iogearbox.net> <dborkmann@redhat.com> Daniel Borkmann <daniel@iogearbox.net> <dborkman@redhat.com> Daniel Borkmann <daniel@iogearbox.net> <dxchgb@gmail.com> +Danilo Krummrich <dakr@kernel.org> <dakr@redhat.com> David Brownell <david-b@pacbell.net> David Collins <quic_collinsd@quicinc.com> <collinsd@codeaurora.org> David Heidelberg <david@ixit.cz> <d.okias@gmail.com> @@ -282,6 +283,7 @@ Gustavo Padovan <gustavo@las.ic.unicamp.br> Gustavo Padovan <padovan@profusion.mobi> Hamza Mahfooz <hamzamahfooz@linux.microsoft.com> <hamza.mahfooz@amd.com> Hanjun Guo <guohanjun@huawei.com> <hanjun.guo@linaro.org> +Hans de Goede <hansg@kernel.org> <hdegoede@redhat.com> Hans Verkuil <hverkuil@xs4all.nl> <hansverk@cisco.com> Hans Verkuil <hverkuil@xs4all.nl> <hverkuil-cisco@xs4all.nl> Harry Yoo <harry.yoo@oracle.com> <42.hyeyoo@gmail.com> @@ -426,6 +428,9 @@ Krzysztof WilczyÅ„ski <kwilczynski@kernel.org> <krzysztof.wilczynski@linux.com> Krzysztof WilczyÅ„ski <kwilczynski@kernel.org> <kw@linux.com> Kshitiz Godara <quic_kgodara@quicinc.com> <kgodara@codeaurora.org> Kuninori Morimoto <kuninori.morimoto.gx@renesas.com> +Kuniyuki Iwashima <kuniyu@google.com> <kuniyu@amazon.com> +Kuniyuki Iwashima <kuniyu@google.com> <kuniyu@amazon.co.jp> +Kuniyuki Iwashima <kuniyu@google.com> <kuni1840@gmail.com> Kuogee Hsieh <quic_khsieh@quicinc.com> <khsieh@codeaurora.org> Lee Jones <lee@kernel.org> <joneslee@google.com> Lee Jones <lee@kernel.org> <lee.jones@canonical.com> @@ -688,9 +693,10 @@ Serge Hallyn <sergeh@kernel.org> <serge.hallyn@canonical.com> Serge Hallyn <sergeh@kernel.org> <serue@us.ibm.com> Seth Forshee <sforshee@kernel.org> <seth.forshee@canonical.com> Shakeel Butt <shakeel.butt@linux.dev> <shakeelb@google.com> -Shannon Nelson <shannon.nelson@amd.com> <snelson@pensando.io> -Shannon Nelson <shannon.nelson@amd.com> <shannon.nelson@intel.com> -Shannon Nelson <shannon.nelson@amd.com> <shannon.nelson@oracle.com> +Shannon Nelson <sln@onemain.com> <shannon.nelson@amd.com> +Shannon Nelson <sln@onemain.com> <snelson@pensando.io> +Shannon Nelson <sln@onemain.com> <shannon.nelson@intel.com> +Shannon Nelson <sln@onemain.com> <shannon.nelson@oracle.com> Sharath Chandra Vurukala <quic_sharathv@quicinc.com> <sharathv@codeaurora.org> Shiraz Hashim <shiraz.linux.kernel@gmail.com> <shiraz.hashim@st.com> Shuah Khan <shuah@kernel.org> <shuahkhan@gmail.com> @@ -719,6 +725,7 @@ Srinivas Ramana <quic_sramana@quicinc.com> <sramana@codeaurora.org> Sriram R <quic_srirrama@quicinc.com> <srirrama@codeaurora.org> Sriram Yagnaraman <sriram.yagnaraman@ericsson.com> <sriram.yagnaraman@est.tech> Stanislav Fomichev <sdf@fomichev.me> <sdf@google.com> +Stanislav Fomichev <sdf@fomichev.me> <stfomichev@gmail.com> Stefan Wahren <wahrenst@gmx.net> <stefan.wahren@i2se.com> Stéphane Witzmann <stephane.witzmann@ubpmes.univ-bpclermont.fr> Stephen Hemminger <stephen@networkplumber.org> <shemminger@linux-foundation.org> @@ -2981,6 +2981,11 @@ S: 521 Pleasant Valley Road S: Potsdam, New York 13676 S: USA +N: Shannon Nelson +E: sln@onemain.com +D: Worked on several network drivers including +D: ixgbe, i40e, ionic, pds_core, pds_vdpa, pds_fwctl + N: Dave Neuer E: dave.neuer@pobox.com D: Helped implement support for Compaq's H31xx series iPAQs diff --git a/Documentation/admin-guide/cifs/usage.rst b/Documentation/admin-guide/cifs/usage.rst index c09674a75a9e..d989ae5778ba 100644 --- a/Documentation/admin-guide/cifs/usage.rst +++ b/Documentation/admin-guide/cifs/usage.rst @@ -270,6 +270,8 @@ configured for Unix Extensions (and the client has not disabled illegal Windows/NTFS/SMB characters to a remap range (this mount parameter is the default for SMB3). This remap (``mapposix``) range is also compatible with Mac (and "Services for Mac" on some older Windows). +When POSIX Extensions for SMB 3.1.1 are negotiated, remapping is automatically +disabled. CIFS VFS Mount Options ====================== diff --git a/Documentation/arch/arm64/booting.rst b/Documentation/arch/arm64/booting.rst index dee7b6de864f..ee9b790c0d72 100644 --- a/Documentation/arch/arm64/booting.rst +++ b/Documentation/arch/arm64/booting.rst @@ -234,7 +234,7 @@ Before jumping into the kernel, the following conditions must be met: - If the kernel is entered at EL1: - - ICC.SRE_EL2.Enable (bit 3) must be initialised to 0b1 + - ICC_SRE_EL2.Enable (bit 3) must be initialised to 0b1 - ICC_SRE_EL2.SRE (bit 0) must be initialised to 0b1. - The DT or ACPI tables must describe a GICv3 interrupt controller. diff --git a/Documentation/block/ublk.rst b/Documentation/block/ublk.rst index c368e1081b41..8c4030bcabb6 100644 --- a/Documentation/block/ublk.rst +++ b/Documentation/block/ublk.rst @@ -352,6 +352,83 @@ For reaching best IO performance, ublk server should align its segment parameter of `struct ublk_param_segment` with backend for avoiding unnecessary IO split, which usually hurts io_uring performance. +Auto Buffer Registration +------------------------ + +The ``UBLK_F_AUTO_BUF_REG`` feature automatically handles buffer registration +and unregistration for I/O requests, which simplifies the buffer management +process and reduces overhead in the ublk server implementation. + +This is another feature flag for using zero copy, and it is compatible with +``UBLK_F_SUPPORT_ZERO_COPY``. + +Feature Overview +~~~~~~~~~~~~~~~~ + +This feature automatically registers request buffers to the io_uring context +before delivering I/O commands to the ublk server and unregisters them when +completing I/O commands. This eliminates the need for manual buffer +registration/unregistration via ``UBLK_IO_REGISTER_IO_BUF`` and +``UBLK_IO_UNREGISTER_IO_BUF`` commands, then IO handling in ublk server +can avoid dependency on the two uring_cmd operations. + +IOs can't be issued concurrently to io_uring if there is any dependency +among these IOs. So this way not only simplifies ublk server implementation, +but also makes concurrent IO handling becomes possible by removing the +dependency on buffer registration & unregistration commands. + +Usage Requirements +~~~~~~~~~~~~~~~~~~ + +1. The ublk server must create a sparse buffer table on the same ``io_ring_ctx`` + used for ``UBLK_IO_FETCH_REQ`` and ``UBLK_IO_COMMIT_AND_FETCH_REQ``. If + uring_cmd is issued on a different ``io_ring_ctx``, manual buffer + unregistration is required. + +2. Buffer registration data must be passed via uring_cmd's ``sqe->addr`` with the + following structure:: + + struct ublk_auto_buf_reg { + __u16 index; /* Buffer index for registration */ + __u8 flags; /* Registration flags */ + __u8 reserved0; /* Reserved for future use */ + __u32 reserved1; /* Reserved for future use */ + }; + + ublk_auto_buf_reg_to_sqe_addr() is for converting the above structure into + ``sqe->addr``. + +3. All reserved fields in ``ublk_auto_buf_reg`` must be zeroed. + +4. Optional flags can be passed via ``ublk_auto_buf_reg.flags``. + +Fallback Behavior +~~~~~~~~~~~~~~~~~ + +If auto buffer registration fails: + +1. When ``UBLK_AUTO_BUF_REG_FALLBACK`` is enabled: + + - The uring_cmd is completed + - ``UBLK_IO_F_NEED_REG_BUF`` is set in ``ublksrv_io_desc.op_flags`` + - The ublk server must manually deal with the failure, such as, register + the buffer manually, or using user copy feature for retrieving the data + for handling ublk IO + +2. If fallback is not enabled: + + - The ublk I/O request fails silently + - The uring_cmd won't be completed + +Limitations +~~~~~~~~~~~ + +- Requires same ``io_ring_ctx`` for all operations +- May require manual buffer management in fallback cases +- io_ring_ctx buffer table has a max size of 16K, which may not be enough + in case that too many ublk devices are handled by this single io_ring_ctx + and each one has very large queue depth + References ========== diff --git a/Documentation/bpf/bpf_devel_QA.rst b/Documentation/bpf/bpf_devel_QA.rst index 0acb4c9b8d90..45bc5c5cd793 100644 --- a/Documentation/bpf/bpf_devel_QA.rst +++ b/Documentation/bpf/bpf_devel_QA.rst @@ -611,9 +611,10 @@ Q: I have added a new BPF instruction to the kernel, how can I integrate it into LLVM? A: LLVM has a ``-mcpu`` selector for the BPF back end in order to allow -the selection of BPF instruction set extensions. By default the -``generic`` processor target is used, which is the base instruction set -(v1) of BPF. +the selection of BPF instruction set extensions. Before llvm version 20, +the ``generic`` processor target is used, which is the base instruction +set (v1) of BPF. Since llvm 20, the default processor target has changed +to instruction set v3. LLVM has an option to select ``-mcpu=probe`` where it will probe the host kernel for supported BPF instruction set extensions and selects the diff --git a/Documentation/bpf/map_hash.rst b/Documentation/bpf/map_hash.rst index d2343952f2cb..8606bf958a8c 100644 --- a/Documentation/bpf/map_hash.rst +++ b/Documentation/bpf/map_hash.rst @@ -233,10 +233,16 @@ attempts in order to enforce the LRU property which have increasing impacts on other CPUs involved in the following operation attempts: - Attempt to use CPU-local state to batch operations -- Attempt to fetch free nodes from global lists +- Attempt to fetch ``target_free`` free nodes from global lists - Attempt to pull any node from a global list and remove it from the hashmap - Attempt to pull any node from any CPU's list and remove it from the hashmap +The number of nodes to borrow from the global list in a batch, ``target_free``, +depends on the size of the map. Larger batch size reduces lock contention, but +may also exhaust the global structure. The value is computed at map init to +avoid exhaustion, by limiting aggregate reservation by all CPUs to half the map +size. With a minimum of a single element and maximum budget of 128 at a time. + This algorithm is described visually in the following diagram. See the description in commit 3a08c2fd7634 ("bpf: LRU List") for a full explanation of the corresponding operations: diff --git a/Documentation/bpf/map_lru_hash_update.dot b/Documentation/bpf/map_lru_hash_update.dot index a0fee349d29c..ab10058f5b79 100644 --- a/Documentation/bpf/map_lru_hash_update.dot +++ b/Documentation/bpf/map_lru_hash_update.dot @@ -35,18 +35,18 @@ digraph { fn_bpf_lru_list_pop_free_to_local [shape=rectangle,fillcolor=2, label="Flush local pending, Rotate Global list, move - LOCAL_FREE_TARGET + target_free from global -> local"] // Also corresponds to: // fn__local_list_flush() // fn_bpf_lru_list_rotate() fn___bpf_lru_node_move_to_free[shape=diamond,fillcolor=2, - label="Able to free\nLOCAL_FREE_TARGET\nnodes?"] + label="Able to free\ntarget_free\nnodes?"] fn___bpf_lru_list_shrink_inactive [shape=rectangle,fillcolor=3, label="Shrink inactive list up to remaining - LOCAL_FREE_TARGET + target_free (global LRU -> local)"] fn___bpf_lru_list_shrink [shape=diamond,fillcolor=2, label="> 0 entries in\nlocal free list?"] diff --git a/Documentation/bpf/standardization/instruction-set.rst b/Documentation/bpf/standardization/instruction-set.rst index fbe975585236..39c74611752b 100644 --- a/Documentation/bpf/standardization/instruction-set.rst +++ b/Documentation/bpf/standardization/instruction-set.rst @@ -350,9 +350,9 @@ Underflow and overflow are allowed during arithmetic operations, meaning the 64-bit or 32-bit value will wrap. If BPF program execution would result in division by zero, the destination register is instead set to zero. Otherwise, for ``ALU64``, if execution would result in ``LLONG_MIN`` -dividing -1, the desination register is instead set to ``LLONG_MIN``. For -``ALU``, if execution would result in ``INT_MIN`` dividing -1, the -desination register is instead set to ``INT_MIN``. +divided by -1, the destination register is instead set to ``LLONG_MIN``. For +``ALU``, if execution would result in ``INT_MIN`` divided by -1, the +destination register is instead set to ``INT_MIN``. If execution would result in modulo by zero, for ``ALU64`` the value of the destination register is unchanged whereas for ``ALU`` the upper diff --git a/Documentation/devicetree/bindings/i2c/nvidia,tegra20-i2c.yaml b/Documentation/devicetree/bindings/i2c/nvidia,tegra20-i2c.yaml index b57ae6963e62..6b6f6762d122 100644 --- a/Documentation/devicetree/bindings/i2c/nvidia,tegra20-i2c.yaml +++ b/Documentation/devicetree/bindings/i2c/nvidia,tegra20-i2c.yaml @@ -97,7 +97,10 @@ properties: resets: items: - - description: module reset + - description: + Module reset. This property is optional for controllers in Tegra194, + Tegra234 etc where an internal software reset is available as an + alternative. reset-names: items: @@ -116,6 +119,13 @@ properties: - const: rx - const: tx +required: + - compatible + - reg + - interrupts + - clocks + - clock-names + allOf: - $ref: /schemas/i2c/i2c-controller.yaml - if: @@ -169,6 +179,18 @@ allOf: properties: power-domains: false + - if: + not: + properties: + compatible: + contains: + enum: + - nvidia,tegra194-i2c + then: + required: + - resets + - reset-names + unevaluatedProperties: false examples: diff --git a/Documentation/devicetree/bindings/pinctrl/starfive,jh7110-aon-pinctrl.yaml b/Documentation/devicetree/bindings/pinctrl/starfive,jh7110-aon-pinctrl.yaml index b470901f5f56..4dbef86bd958 100644 --- a/Documentation/devicetree/bindings/pinctrl/starfive,jh7110-aon-pinctrl.yaml +++ b/Documentation/devicetree/bindings/pinctrl/starfive,jh7110-aon-pinctrl.yaml @@ -15,7 +15,7 @@ description: | Some peripherals such as PWM have their I/O go through the 4 "GPIOs". maintainers: - - Jianlong Huang <jianlong.huang@starfivetech.com> + - Hal Feng <hal.feng@starfivetech.com> properties: compatible: diff --git a/Documentation/devicetree/bindings/pinctrl/starfive,jh7110-sys-pinctrl.yaml b/Documentation/devicetree/bindings/pinctrl/starfive,jh7110-sys-pinctrl.yaml index 222b9e240f8a..e2a25a20f6a6 100644 --- a/Documentation/devicetree/bindings/pinctrl/starfive,jh7110-sys-pinctrl.yaml +++ b/Documentation/devicetree/bindings/pinctrl/starfive,jh7110-sys-pinctrl.yaml @@ -18,7 +18,7 @@ description: | any GPIO can be set up to be controlled by any of the peripherals. maintainers: - - Jianlong Huang <jianlong.huang@starfivetech.com> + - Hal Feng <hal.feng@starfivetech.com> properties: compatible: diff --git a/Documentation/devicetree/bindings/pmem/pmem-region.txt b/Documentation/devicetree/bindings/pmem/pmem-region.txt deleted file mode 100644 index cd79975e85ec..000000000000 --- a/Documentation/devicetree/bindings/pmem/pmem-region.txt +++ /dev/null @@ -1,65 +0,0 @@ -Device-tree bindings for persistent memory regions ------------------------------------------------------ - -Persistent memory refers to a class of memory devices that are: - - a) Usable as main system memory (i.e. cacheable), and - b) Retain their contents across power failure. - -Given b) it is best to think of persistent memory as a kind of memory mapped -storage device. To ensure data integrity the operating system needs to manage -persistent regions separately to the normal memory pool. To aid with that this -binding provides a standardised interface for discovering where persistent -memory regions exist inside the physical address space. - -Bindings for the region nodes: ------------------------------ - -Required properties: - - compatible = "pmem-region" - - - reg = <base, size>; - The reg property should specify an address range that is - translatable to a system physical address range. This address - range should be mappable as normal system memory would be - (i.e cacheable). - - If the reg property contains multiple address ranges - each address range will be treated as though it was specified - in a separate device node. Having multiple address ranges in a - node implies no special relationship between the two ranges. - -Optional properties: - - Any relevant NUMA associativity properties for the target platform. - - - volatile; This property indicates that this region is actually - backed by non-persistent memory. This lets the OS know that it - may skip the cache flushes required to ensure data is made - persistent after a write. - - If this property is absent then the OS must assume that the region - is backed by non-volatile memory. - -Examples: --------------------- - - /* - * This node specifies one 4KB region spanning from - * 0x5000 to 0x5fff that is backed by non-volatile memory. - */ - pmem@5000 { - compatible = "pmem-region"; - reg = <0x00005000 0x00001000>; - }; - - /* - * This node specifies two 4KB regions that are backed by - * volatile (normal) memory. - */ - pmem@6000 { - compatible = "pmem-region"; - reg = < 0x00006000 0x00001000 - 0x00008000 0x00001000 >; - volatile; - }; - diff --git a/Documentation/devicetree/bindings/pmem/pmem-region.yaml b/Documentation/devicetree/bindings/pmem/pmem-region.yaml new file mode 100644 index 000000000000..bd0f0c793f03 --- /dev/null +++ b/Documentation/devicetree/bindings/pmem/pmem-region.yaml @@ -0,0 +1,48 @@ +# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/pmem-region.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +maintainers: + - Oliver O'Halloran <oohall@gmail.com> + +title: Persistent Memory Regions + +description: | + Persistent memory refers to a class of memory devices that are: + + a) Usable as main system memory (i.e. cacheable), and + b) Retain their contents across power failure. + + Given b) it is best to think of persistent memory as a kind of memory mapped + storage device. To ensure data integrity the operating system needs to manage + persistent regions separately to the normal memory pool. To aid with that this + binding provides a standardised interface for discovering where persistent + memory regions exist inside the physical address space. + +properties: + compatible: + const: pmem-region + + reg: + maxItems: 1 + + volatile: + description: + Indicates the region is volatile (non-persistent) and the OS can skip + cache flushes for writes + type: boolean + +required: + - compatible + - reg + +additionalProperties: false + +examples: + - | + pmem@5000 { + compatible = "pmem-region"; + reg = <0x00005000 0x00001000>; + }; diff --git a/Documentation/filesystems/porting.rst b/Documentation/filesystems/porting.rst index 3616d7161dab..a5734bdd1cc7 100644 --- a/Documentation/filesystems/porting.rst +++ b/Documentation/filesystems/porting.rst @@ -1249,3 +1249,12 @@ Using try_lookup_noperm() will require linux/namei.h to be included. Calling conventions for ->d_automount() have changed; we should *not* grab an extra reference to new mount - it should be returned with refcount 1. + +--- + +collect_mounts()/drop_collected_mounts()/iterate_mounts() are gone now. +Replacement is collect_paths()/drop_collected_path(), with no special +iterator needed. Instead of a cloned mount tree, the new interface returns +an array of struct path, one for each mount collect_mounts() would've +created. These struct path point to locations in the caller's namespace +that would be roots of the cloned mounts. diff --git a/Documentation/filesystems/proc.rst b/Documentation/filesystems/proc.rst index 2a17865dfe39..5236cb52e357 100644 --- a/Documentation/filesystems/proc.rst +++ b/Documentation/filesystems/proc.rst @@ -584,7 +584,6 @@ encoded manner. The codes are the following: ms may share gd stack segment growns down pf pure PFN range - dw disabled write to the mapped file lo pages are locked in memory io memory mapped I/O area sr sequential read advise provided @@ -607,8 +606,11 @@ encoded manner. The codes are the following: mt arm64 MTE allocation tags are enabled um userfaultfd missing tracking uw userfaultfd wr-protect tracking + ui userfaultfd minor fault ss shadow/guarded control stack page sl sealed + lf lock on fault pages + dp always lazily freeable mapping == ======================================= Note that there is no guarantee that every flag and associated mnemonic will diff --git a/Documentation/gpu/nouveau.rst b/Documentation/gpu/nouveau.rst index b8c801e0068c..cab2e81013bc 100644 --- a/Documentation/gpu/nouveau.rst +++ b/Documentation/gpu/nouveau.rst @@ -25,7 +25,7 @@ providing a consistent API to upper layers of the driver stack. GSP Support ------------------------ -.. kernel-doc:: drivers/gpu/drm/nouveau/nvkm/subdev/gsp/r535.c +.. kernel-doc:: drivers/gpu/drm/nouveau/nvkm/subdev/gsp/rm/r535/rpc.c :doc: GSP message queue element .. kernel-doc:: drivers/gpu/drm/nouveau/include/nvkm/subdev/gsp.h diff --git a/Documentation/netlink/genetlink-legacy.yaml b/Documentation/netlink/genetlink-legacy.yaml index 4cbfe666e6f5..b29d62eefa16 100644 --- a/Documentation/netlink/genetlink-legacy.yaml +++ b/Documentation/netlink/genetlink-legacy.yaml @@ -6,6 +6,9 @@ $schema: https://json-schema.org/draft-07/schema # Common defines $defs: + name: + type: string + pattern: ^[0-9a-z-]+$ uint: type: integer minimum: 0 @@ -76,7 +79,7 @@ properties: additionalProperties: False properties: name: - type: string + $ref: '#/$defs/name' header: description: For C-compatible languages, header which already defines this value. type: string @@ -103,7 +106,7 @@ properties: additionalProperties: False properties: name: - type: string + $ref: '#/$defs/name' value: type: integer doc: @@ -132,7 +135,7 @@ properties: additionalProperties: False properties: name: - type: string + $ref: '#/$defs/name' type: description: The netlink attribute type enum: [ u8, u16, u32, u64, s8, s16, s32, s64, string, binary ] @@ -169,7 +172,7 @@ properties: name: description: | Name used when referring to this space in other definitions, not used outside of the spec. - type: string + $ref: '#/$defs/name' name-prefix: description: | Prefix for the C enum name of the attributes. Default family[name]-set[name]-a- @@ -206,7 +209,7 @@ properties: additionalProperties: False properties: name: - type: string + $ref: '#/$defs/name' type: &attr-type description: The netlink attribute type enum: [ unused, pad, flag, binary, bitfield32, @@ -348,7 +351,7 @@ properties: properties: name: description: Name of the operation, also defining its C enum value in uAPI. - type: string + $ref: '#/$defs/name' doc: description: Documentation for the command. type: string diff --git a/Documentation/netlink/genetlink.yaml b/Documentation/netlink/genetlink.yaml index 40efbbad76ab..7b1ec153e834 100644 --- a/Documentation/netlink/genetlink.yaml +++ b/Documentation/netlink/genetlink.yaml @@ -6,6 +6,9 @@ $schema: https://json-schema.org/draft-07/schema # Common defines $defs: + name: + type: string + pattern: ^[0-9a-z-]+$ uint: type: integer minimum: 0 @@ -29,7 +32,7 @@ additionalProperties: False properties: name: description: Name of the genetlink family. - type: string + $ref: '#/$defs/name' doc: type: string protocol: @@ -48,7 +51,7 @@ properties: additionalProperties: False properties: name: - type: string + $ref: '#/$defs/name' header: description: For C-compatible languages, header which already defines this value. type: string @@ -75,7 +78,7 @@ properties: additionalProperties: False properties: name: - type: string + $ref: '#/$defs/name' value: type: integer doc: @@ -96,7 +99,7 @@ properties: name: description: | Name used when referring to this space in other definitions, not used outside of the spec. - type: string + $ref: '#/$defs/name' name-prefix: description: | Prefix for the C enum name of the attributes. Default family[name]-set[name]-a- @@ -121,7 +124,7 @@ properties: additionalProperties: False properties: name: - type: string + $ref: '#/$defs/name' type: &attr-type enum: [ unused, pad, flag, binary, uint, sint, u8, u16, u32, u64, s8, s16, s32, s64, @@ -243,7 +246,7 @@ properties: properties: name: description: Name of the operation, also defining its C enum value in uAPI. - type: string + $ref: '#/$defs/name' doc: description: Documentation for the command. type: string @@ -327,7 +330,7 @@ properties: name: description: | The name for the group, used to form the define and the value of the define. - type: string + $ref: '#/$defs/name' flags: *cmd_flags kernel-family: diff --git a/Documentation/netlink/netlink-raw.yaml b/Documentation/netlink/netlink-raw.yaml index e34bf23897fa..246fa07bccf6 100644 --- a/Documentation/netlink/netlink-raw.yaml +++ b/Documentation/netlink/netlink-raw.yaml @@ -6,6 +6,12 @@ $schema: https://json-schema.org/draft-07/schema # Common defines $defs: + name: + type: string + pattern: ^[0-9a-z-]+$ + name-cap: + type: string + pattern: ^[0-9a-zA-Z-]+$ uint: type: integer minimum: 0 @@ -71,7 +77,7 @@ properties: additionalProperties: False properties: name: - type: string + $ref: '#/$defs/name' header: description: For C-compatible languages, header which already defines this value. type: string @@ -98,7 +104,7 @@ properties: additionalProperties: False properties: name: - type: string + $ref: '#/$defs/name' value: type: integer doc: @@ -124,7 +130,7 @@ properties: additionalProperties: False properties: name: - type: string + $ref: '#/$defs/name-cap' type: description: | The netlink attribute type. Members of type 'binary' or 'pad' @@ -183,7 +189,7 @@ properties: name: description: | Name used when referring to this space in other definitions, not used outside of the spec. - type: string + $ref: '#/$defs/name' name-prefix: description: | Prefix for the C enum name of the attributes. Default family[name]-set[name]-a- @@ -220,7 +226,7 @@ properties: additionalProperties: False properties: name: - type: string + $ref: '#/$defs/name' type: &attr-type description: The netlink attribute type enum: [ unused, pad, flag, binary, bitfield32, @@ -408,7 +414,7 @@ properties: properties: name: description: Name of the operation, also defining its C enum value in uAPI. - type: string + $ref: '#/$defs/name' doc: description: Documentation for the command. type: string diff --git a/Documentation/netlink/specs/devlink.yaml b/Documentation/netlink/specs/devlink.yaml index 05fee1b7fe19..38ddc04f9e6d 100644 --- a/Documentation/netlink/specs/devlink.yaml +++ b/Documentation/netlink/specs/devlink.yaml @@ -38,15 +38,15 @@ definitions: - name: dsa - - name: pci_pf + name: pci-pf - - name: pci_vf + name: pci-vf - name: virtual - name: unused - - name: pci_sf + name: pci-sf - type: enum name: port-fn-state @@ -220,7 +220,7 @@ definitions: - name: flag - - name: nul_string + name: nul-string value: 10 - name: binary diff --git a/Documentation/netlink/specs/dpll.yaml b/Documentation/netlink/specs/dpll.yaml index 8feefeae5376..f434140b538e 100644 --- a/Documentation/netlink/specs/dpll.yaml +++ b/Documentation/netlink/specs/dpll.yaml @@ -188,7 +188,7 @@ definitions: value: 10000 - type: const - name: pin-frequency-77_5-khz + name: pin-frequency-77-5-khz value: 77500 - type: const diff --git a/Documentation/netlink/specs/ethtool.yaml b/Documentation/netlink/specs/ethtool.yaml index 9f98715a6512..348c6ad548f5 100644 --- a/Documentation/netlink/specs/ethtool.yaml +++ b/Documentation/netlink/specs/ethtool.yaml @@ -7,6 +7,9 @@ protocol: genetlink-legacy doc: Partial family for Ethtool Netlink. uapi-header: linux/ethtool_netlink_generated.h +c-family-name: ethtool-genl-name +c-version-name: ethtool-genl-version + definitions: - name: udp-tunnel-type @@ -45,7 +48,7 @@ definitions: name: started doc: The firmware flashing process has started. - - name: in_progress + name: in-progress doc: The firmware flashing process is in progress. - name: completed @@ -1419,7 +1422,7 @@ attribute-sets: name: hkey type: binary - - name: input_xfrm + name: input-xfrm type: u32 - name: start-context @@ -2235,7 +2238,7 @@ operations: - hfunc - indir - hkey - - input_xfrm + - input-xfrm dump: request: attributes: diff --git a/Documentation/netlink/specs/fou.yaml b/Documentation/netlink/specs/fou.yaml index 0af5ab842c04..b02ab19817d3 100644 --- a/Documentation/netlink/specs/fou.yaml +++ b/Documentation/netlink/specs/fou.yaml @@ -15,7 +15,7 @@ kernel-policy: global definitions: - type: enum - name: encap_type + name: encap-type name-prefix: fou-encap- enum-name: entries: [ unspec, direct, gue ] @@ -43,26 +43,26 @@ attribute-sets: name: type type: u8 - - name: remcsum_nopartial + name: remcsum-nopartial type: flag - - name: local_v4 + name: local-v4 type: u32 - - name: local_v6 + name: local-v6 type: binary checks: min-len: 16 - - name: peer_v4 + name: peer-v4 type: u32 - - name: peer_v6 + name: peer-v6 type: binary checks: min-len: 16 - - name: peer_port + name: peer-port type: u16 byte-order: big-endian - @@ -90,12 +90,12 @@ operations: - port - ipproto - type - - remcsum_nopartial - - local_v4 - - peer_v4 - - local_v6 - - peer_v6 - - peer_port + - remcsum-nopartial + - local-v4 + - peer-v4 + - local-v6 + - peer-v6 + - peer-port - ifindex - @@ -112,11 +112,11 @@ operations: - af - ifindex - port - - peer_port - - local_v4 - - peer_v4 - - local_v6 - - peer_v6 + - peer-port + - local-v4 + - peer-v4 + - local-v6 + - peer-v6 - name: get diff --git a/Documentation/netlink/specs/mptcp_pm.yaml b/Documentation/netlink/specs/mptcp_pm.yaml index dfd017780d2f..fb57860fe778 100644 --- a/Documentation/netlink/specs/mptcp_pm.yaml +++ b/Documentation/netlink/specs/mptcp_pm.yaml @@ -57,21 +57,21 @@ definitions: doc: >- A new subflow has been established. 'error' should not be set. Attributes: token, family, loc_id, rem_id, saddr4 | saddr6, daddr4 | - daddr6, sport, dport, backup, if_idx [, error]. + daddr6, sport, dport, backup, if-idx [, error]. - name: sub-closed doc: >- A subflow has been closed. An error (copy of sk_err) could be set if an error has been detected for this subflow. Attributes: token, family, loc_id, rem_id, saddr4 | saddr6, daddr4 | - daddr6, sport, dport, backup, if_idx [, error]. + daddr6, sport, dport, backup, if-idx [, error]. - name: sub-priority value: 13 doc: >- The priority of a subflow has changed. 'error' should not be set. Attributes: token, family, loc_id, rem_id, saddr4 | saddr6, daddr4 | - daddr6, sport, dport, backup, if_idx [, error]. + daddr6, sport, dport, backup, if-idx [, error]. - name: listener-created value: 15 @@ -255,7 +255,7 @@ attribute-sets: name: timeout type: u32 - - name: if_idx + name: if-idx type: u32 - name: reset-reason diff --git a/Documentation/netlink/specs/nfsd.yaml b/Documentation/netlink/specs/nfsd.yaml index c87658114852..8d1a3c01708f 100644 --- a/Documentation/netlink/specs/nfsd.yaml +++ b/Documentation/netlink/specs/nfsd.yaml @@ -27,7 +27,7 @@ attribute-sets: name: proc type: u32 - - name: service_time + name: service-time type: s64 - name: pad @@ -139,7 +139,7 @@ operations: - prog - version - proc - - service_time + - service-time - saddr4 - daddr4 - saddr6 diff --git a/Documentation/netlink/specs/ovs_flow.yaml b/Documentation/netlink/specs/ovs_flow.yaml index 46f5d1cd8a5f..7974aa7d8905 100644 --- a/Documentation/netlink/specs/ovs_flow.yaml +++ b/Documentation/netlink/specs/ovs_flow.yaml @@ -216,7 +216,7 @@ definitions: type: struct members: - - name: nd_target + name: nd-target type: binary len: 16 byte-order: big-endian @@ -258,12 +258,12 @@ definitions: type: struct members: - - name: vlan_tpid + name: vlan-tpid type: u16 byte-order: big-endian doc: Tag protocol identifier (TPID) to push. - - name: vlan_tci + name: vlan-tci type: u16 byte-order: big-endian doc: Tag control identifier (TCI) to push. diff --git a/Documentation/netlink/specs/rt-link.yaml b/Documentation/netlink/specs/rt-link.yaml index b41b31eebcae..28c4cf66517c 100644 --- a/Documentation/netlink/specs/rt-link.yaml +++ b/Documentation/netlink/specs/rt-link.yaml @@ -603,7 +603,7 @@ definitions: name: optmask type: u32 - - name: if_stats_msg + name: if-stats-msg type: struct members: - @@ -2486,7 +2486,7 @@ operations: name: getstats doc: Get / dump link stats. attribute-set: stats-attrs - fixed-header: if_stats_msg + fixed-header: if-stats-msg do: request: value: 94 diff --git a/Documentation/netlink/specs/tc.yaml b/Documentation/netlink/specs/tc.yaml index cb7ea7d62e56..42d74c9aeb54 100644 --- a/Documentation/netlink/specs/tc.yaml +++ b/Documentation/netlink/specs/tc.yaml @@ -232,7 +232,7 @@ definitions: type: u8 doc: log(P_max / (qth-max - qth-min)) - - name: Scell_log + name: Scell-log type: u8 doc: cell size for idle damping - @@ -253,7 +253,7 @@ definitions: name: DPs type: u32 - - name: def_DP + name: def-DP type: u32 - name: grio diff --git a/Documentation/networking/device_drivers/ethernet/marvell/octeontx2.rst b/Documentation/networking/device_drivers/ethernet/marvell/octeontx2.rst index af7db0e91f6b..a52850602cd8 100644 --- a/Documentation/networking/device_drivers/ethernet/marvell/octeontx2.rst +++ b/Documentation/networking/device_drivers/ethernet/marvell/octeontx2.rst @@ -66,7 +66,7 @@ Admin Function driver As mentioned above RVU PF0 is called the admin function (AF), this driver supports resource provisioning and configuration of functional blocks. Doesn't handle any I/O. It sets up few basic stuff but most of the -funcionality is achieved via configuration requests from PFs and VFs. +functionality is achieved via configuration requests from PFs and VFs. PF/VFs communicates with AF via a shared memory region (mailbox). Upon receiving requests AF does resource provisioning and other HW configuration. diff --git a/Documentation/process/embargoed-hardware-issues.rst b/Documentation/process/embargoed-hardware-issues.rst index da6bf0f6d01e..34e00848e0da 100644 --- a/Documentation/process/embargoed-hardware-issues.rst +++ b/Documentation/process/embargoed-hardware-issues.rst @@ -290,6 +290,7 @@ an involved disclosed party. The current ambassadors list: AMD Tom Lendacky <thomas.lendacky@amd.com> Ampere Darren Hart <darren@os.amperecomputing.com> ARM Catalin Marinas <catalin.marinas@arm.com> + IBM Power Madhavan Srinivasan <maddy@linux.ibm.com> IBM Z Christian Borntraeger <borntraeger@de.ibm.com> Intel Tony Luck <tony.luck@intel.com> Qualcomm Trilok Soni <quic_tsoni@quicinc.com> diff --git a/Documentation/sound/codecs/cs35l56.rst b/Documentation/sound/codecs/cs35l56.rst index 98c6f6c74394..57d1964453e1 100644 --- a/Documentation/sound/codecs/cs35l56.rst +++ b/Documentation/sound/codecs/cs35l56.rst @@ -1,8 +1,8 @@ .. SPDX-License-Identifier: GPL-2.0-only -===================================================================== -Audio drivers for Cirrus Logic CS35L54/56/57 Boosted Smart Amplifiers -===================================================================== +======================================================================== +Audio drivers for Cirrus Logic CS35L54/56/57/63 Boosted Smart Amplifiers +======================================================================== :Copyright: 2025 Cirrus Logic, Inc. and Cirrus Logic International Semiconductor Ltd. @@ -13,11 +13,11 @@ Summary The high-level summary of this document is: -**If you have a laptop that uses CS35L54/56/57 amplifiers but audio is not +**If you have a laptop that uses CS35L54/56/57/63 amplifiers but audio is not working, DO NOT ATTEMPT TO USE FIRMWARE AND SETTINGS FROM ANOTHER LAPTOP, EVEN IF THAT LAPTOP SEEMS SIMILAR.** -The CS35L54/56/57 amplifiers must be correctly configured for the power +The CS35L54/56/57/63 amplifiers must be correctly configured for the power supply voltage, speaker impedance, maximum speaker voltage/current, and other external hardware connections. @@ -34,6 +34,7 @@ The cs35l56 drivers support: * CS35L54 * CS35L56 * CS35L57 +* CS35L63 There are two drivers in the kernel @@ -104,6 +105,13 @@ In this example the SSID is 10280c63. The format of the firmware file names is: +SoundWire (except CS35L56 Rev B0): + cs35lxx-b0-dsp1-misc-SSID[-spkidX]-l?u? + +SoundWire CS35L56 Rev B0: + cs35lxx-b0-dsp1-misc-SSID[-spkidX]-ampN + +Non-SoundWire (HDA and I2S): cs35lxx-b0-dsp1-misc-SSID[-spkidX]-ampN Where: @@ -111,12 +119,18 @@ Where: * cs35lxx-b0 is the amplifier model and silicon revision. This information is logged by the driver during initialization. * SSID is the 8-digit hexadecimal SSID value. + * l?u? is the physical address on the SoundWire bus of the amp this + file applies to. * ampN is the amplifier number (for example amp1). This is the same as the prefix on the ALSA control names except that it is always lower-case in the file name. * spkidX is an optional part, used for laptops that have firmware configurations for different makes and models of internal speakers. +The CS35L56 Rev B0 continues to use the old filename scheme because a +large number of firmware files have already been published with these +names. + Sound Open Firmware and ALSA topology files ------------------------------------------- diff --git a/Documentation/virt/kvm/api.rst b/Documentation/virt/kvm/api.rst index 1bd2d42e6424..9abf93ee5f65 100644 --- a/Documentation/virt/kvm/api.rst +++ b/Documentation/virt/kvm/api.rst @@ -6645,7 +6645,8 @@ to the byte array. .. note:: For KVM_EXIT_IO, KVM_EXIT_MMIO, KVM_EXIT_OSI, KVM_EXIT_PAPR, KVM_EXIT_XEN, - KVM_EXIT_EPR, KVM_EXIT_X86_RDMSR and KVM_EXIT_X86_WRMSR the corresponding + KVM_EXIT_EPR, KVM_EXIT_HYPERCALL, KVM_EXIT_TDX, + KVM_EXIT_X86_RDMSR and KVM_EXIT_X86_WRMSR the corresponding operations are complete (and guest state is consistent) only after userspace has re-entered the kernel with KVM_RUN. The kernel side will first finish incomplete operations and then check for pending signals. @@ -7176,6 +7177,62 @@ The valid value for 'flags' is: :: + /* KVM_EXIT_TDX */ + struct { + __u64 flags; + __u64 nr; + union { + struct { + u64 ret; + u64 data[5]; + } unknown; + struct { + u64 ret; + u64 gpa; + u64 size; + } get_quote; + struct { + u64 ret; + u64 leaf; + u64 r11, r12, r13, r14; + } get_tdvmcall_info; + }; + } tdx; + +Process a TDVMCALL from the guest. KVM forwards select TDVMCALL based +on the Guest-Hypervisor Communication Interface (GHCI) specification; +KVM bridges these requests to the userspace VMM with minimal changes, +placing the inputs in the union and copying them back to the guest +on re-entry. + +Flags are currently always zero, whereas ``nr`` contains the TDVMCALL +number from register R11. The remaining field of the union provide the +inputs and outputs of the TDVMCALL. Currently the following values of +``nr`` are defined: + +* ``TDVMCALL_GET_QUOTE``: the guest has requested to generate a TD-Quote +signed by a service hosting TD-Quoting Enclave operating on the host. +Parameters and return value are in the ``get_quote`` field of the union. +The ``gpa`` field and ``size`` specify the guest physical address +(without the shared bit set) and the size of a shared-memory buffer, in +which the TDX guest passes a TD Report. The ``ret`` field represents +the return value of the GetQuote request. When the request has been +queued successfully, the TDX guest can poll the status field in the +shared-memory area to check whether the Quote generation is completed or +not. When completed, the generated Quote is returned via the same buffer. + +* ``TDVMCALL_GET_TD_VM_CALL_INFO``: the guest has requested the support +status of TDVMCALLs. The output values for the given leaf should be +placed in fields from ``r11`` to ``r14`` of the ``get_tdvmcall_info`` +field of the union. + +KVM may add support for more values in the future that may cause a userspace +exit, even without calls to ``KVM_ENABLE_CAP`` or similar. In this case, +it will enter with output fields already valid; in the common case, the +``unknown.ret`` field of the union will be ``TDVMCALL_STATUS_SUBFUNC_UNSUPPORTED``. +Userspace need not do anything if it does not wish to support a TDVMCALL. +:: + /* Fix the size of the union. */ char padding[256]; }; diff --git a/MAINTAINERS b/MAINTAINERS index a92290fffa16..efb51ee92683 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -207,7 +207,7 @@ X: arch/*/include/uapi/ X: include/uapi/ ABIT UGURU 1,2 HARDWARE MONITOR DRIVER -M: Hans de Goede <hdegoede@redhat.com> +M: Hans de Goede <hansg@kernel.org> L: linux-hwmon@vger.kernel.org S: Maintained F: drivers/hwmon/abituguru.c @@ -371,7 +371,7 @@ S: Maintained F: drivers/platform/x86/quickstart.c ACPI SERIAL MULTI INSTANTIATE DRIVER -M: Hans de Goede <hdegoede@redhat.com> +M: Hans de Goede <hansg@kernel.org> L: platform-driver-x86@vger.kernel.org S: Maintained F: drivers/platform/x86/serial-multi-instantiate.c @@ -1157,7 +1157,6 @@ F: arch/x86/include/asm/amd/node.h F: arch/x86/kernel/amd_node.c AMD PDS CORE DRIVER -M: Shannon Nelson <shannon.nelson@amd.com> M: Brett Creeley <brett.creeley@amd.com> L: netdev@vger.kernel.org S: Maintained @@ -3551,7 +3550,7 @@ F: arch/arm64/boot/Makefile F: scripts/make_fit.py ARM64 PLATFORM DRIVERS -M: Hans de Goede <hdegoede@redhat.com> +M: Hans de Goede <hansg@kernel.org> M: Ilpo Järvinen <ilpo.jarvinen@linux.intel.com> R: Bryan O'Donoghue <bryan.odonoghue@linaro.org> L: platform-driver-x86@vger.kernel.org @@ -3712,7 +3711,7 @@ F: drivers/platform/x86/asus*.c F: drivers/platform/x86/eeepc*.c ASUS TF103C DOCK DRIVER -M: Hans de Goede <hdegoede@redhat.com> +M: Hans de Goede <hansg@kernel.org> L: platform-driver-x86@vger.kernel.org S: Maintained T: git git://git.kernel.org/pub/scm/linux/kernel/git/pdx86/platform-drivers-x86.git @@ -4555,6 +4554,7 @@ BPF [NETWORKING] (tcx & tc BPF, sock_addr) M: Martin KaFai Lau <martin.lau@linux.dev> M: Daniel Borkmann <daniel@iogearbox.net> R: John Fastabend <john.fastabend@gmail.com> +R: Stanislav Fomichev <sdf@fomichev.me> L: bpf@vger.kernel.org L: netdev@vger.kernel.org S: Maintained @@ -5613,14 +5613,14 @@ T: git git://git.kernel.org/pub/scm/linux/kernel/git/peter.chen/usb.git F: drivers/usb/chipidea/ CHIPONE ICN8318 I2C TOUCHSCREEN DRIVER -M: Hans de Goede <hdegoede@redhat.com> +M: Hans de Goede <hansg@kernel.org> L: linux-input@vger.kernel.org S: Maintained F: Documentation/devicetree/bindings/input/touchscreen/chipone,icn8318.yaml F: drivers/input/touchscreen/chipone_icn8318.c CHIPONE ICN8505 I2C TOUCHSCREEN DRIVER -M: Hans de Goede <hdegoede@redhat.com> +M: Hans de Goede <hansg@kernel.org> L: linux-input@vger.kernel.org S: Maintained F: drivers/input/touchscreen/chipone_icn8505.c @@ -6254,6 +6254,7 @@ F: include/linux/cpuhotplug.h F: include/linux/smpboot.h F: kernel/cpu.c F: kernel/smpboot.* +F: rust/helper/cpu.c F: rust/kernel/cpu.rs CPU IDLE TIME MANAGEMENT FRAMEWORK @@ -6917,7 +6918,7 @@ F: include/dt-bindings/pmu/exynos_ppmu.h F: include/linux/devfreq-event.h DEVICE RESOURCE MANAGEMENT HELPERS -M: Hans de Goede <hdegoede@redhat.com> +M: Hans de Goede <hansg@kernel.org> R: Matti Vaittinen <mazziesaccount@gmail.com> S: Maintained F: include/linux/devm-helpers.h @@ -7516,7 +7517,7 @@ F: drivers/gpu/drm/gud/ F: include/drm/gud.h DRM DRIVER FOR GRAIN MEDIA GM12U320 PROJECTORS -M: Hans de Goede <hdegoede@redhat.com> +M: Hans de Goede <hansg@kernel.org> S: Maintained T: git https://gitlab.freedesktop.org/drm/misc/kernel.git F: drivers/gpu/drm/tiny/gm12u320.c @@ -7916,7 +7917,7 @@ F: drivers/gpu/drm/ci/xfails/vkms* F: drivers/gpu/drm/vkms/ DRM DRIVER FOR VIRTUALBOX VIRTUAL GPU -M: Hans de Goede <hdegoede@redhat.com> +M: Hans de Goede <hansg@kernel.org> L: dri-devel@lists.freedesktop.org S: Maintained T: git https://gitlab.freedesktop.org/drm/misc/kernel.git @@ -8317,7 +8318,7 @@ F: drivers/gpu/drm/panel/ F: include/drm/drm_panel.h DRM PRIVACY-SCREEN CLASS -M: Hans de Goede <hdegoede@redhat.com> +M: Hans de Goede <hansg@kernel.org> L: dri-devel@lists.freedesktop.org S: Maintained T: git https://gitlab.freedesktop.org/drm/misc/kernel.git @@ -9940,7 +9941,6 @@ F: drivers/fwctl/mlx5/ FWCTL PDS DRIVER M: Brett Creeley <brett.creeley@amd.com> -R: Shannon Nelson <shannon.nelson@amd.com> L: linux-kernel@vger.kernel.org S: Maintained F: drivers/fwctl/pds/ @@ -10221,7 +10221,7 @@ S: Maintained F: Documentation/devicetree/bindings/connector/gocontroll,moduline-module-slot.yaml GOODIX TOUCHSCREEN -M: Hans de Goede <hdegoede@redhat.com> +M: Hans de Goede <hansg@kernel.org> L: linux-input@vger.kernel.org S: Maintained F: drivers/input/touchscreen/goodix* @@ -10260,7 +10260,7 @@ F: include/dt-bindings/clock/google,gs101.h K: [gG]oogle.?[tT]ensor GPD POCKET FAN DRIVER -M: Hans de Goede <hdegoede@redhat.com> +M: Hans de Goede <hansg@kernel.org> L: platform-driver-x86@vger.kernel.org S: Maintained F: drivers/platform/x86/gpd-pocket-fan.c @@ -10839,7 +10839,7 @@ S: Maintained F: drivers/dma/hisi_dma.c HISILICON GPIO DRIVER -M: Jay Fang <f.fangjian@huawei.com> +M: Yang Shen <shenyang39@huawei.com> L: linux-gpio@vger.kernel.org S: Maintained F: Documentation/devicetree/bindings/gpio/hisilicon,ascend910-gpio.yaml @@ -11155,7 +11155,8 @@ F: include/linux/platform_data/huawei-gaokun-ec.h HUGETLB SUBSYSTEM M: Muchun Song <muchun.song@linux.dev> -R: Oscar Salvador <osalvador@suse.de> +M: Oscar Salvador <osalvador@suse.de> +R: David Hildenbrand <david@redhat.com> L: linux-mm@kvack.org S: Maintained F: Documentation/ABI/testing/sysfs-kernel-mm-hugepages @@ -11166,6 +11167,7 @@ F: fs/hugetlbfs/ F: include/linux/hugetlb.h F: include/trace/events/hugetlbfs.h F: mm/hugetlb.c +F: mm/hugetlb_cgroup.c F: mm/hugetlb_cma.c F: mm/hugetlb_cma.h F: mm/hugetlb_vmemmap.c @@ -11421,7 +11423,7 @@ F: drivers/i2c/busses/i2c-via.c F: drivers/i2c/busses/i2c-viapro.c I2C/SMBUS INTEL CHT WHISKEY COVE PMIC DRIVER -M: Hans de Goede <hdegoede@redhat.com> +M: Hans de Goede <hansg@kernel.org> L: linux-i2c@vger.kernel.org S: Maintained F: drivers/i2c/busses/i2c-cht-wc.c @@ -12011,13 +12013,13 @@ S: Supported F: sound/soc/intel/ INTEL ATOMISP2 DUMMY / POWER-MANAGEMENT DRIVER -M: Hans de Goede <hdegoede@redhat.com> +M: Hans de Goede <hansg@kernel.org> L: platform-driver-x86@vger.kernel.org S: Maintained F: drivers/platform/x86/intel/atomisp2/pm.c INTEL ATOMISP2 LED DRIVER -M: Hans de Goede <hdegoede@redhat.com> +M: Hans de Goede <hansg@kernel.org> L: platform-driver-x86@vger.kernel.org S: Maintained F: drivers/platform/x86/intel/atomisp2/led.c @@ -13345,6 +13347,7 @@ M: Alexander Graf <graf@amazon.com> M: Mike Rapoport <rppt@kernel.org> M: Changyuan Lyu <changyuanl@google.com> L: kexec@lists.infradead.org +L: linux-mm@kvack.org S: Maintained F: Documentation/admin-guide/mm/kho.rst F: Documentation/core-api/kho/* @@ -13678,7 +13681,7 @@ S: Maintained F: drivers/platform/x86/lenovo-wmi-hotkey-utilities.c LETSKETCH HID TABLET DRIVER -M: Hans de Goede <hdegoede@redhat.com> +M: Hans de Goede <hansg@kernel.org> L: linux-input@vger.kernel.org S: Maintained T: git git://git.kernel.org/pub/scm/linux/kernel/git/hid/hid.git @@ -13728,7 +13731,7 @@ F: drivers/ata/sata_gemini.c F: drivers/ata/sata_gemini.h LIBATA SATA AHCI PLATFORM devices support -M: Hans de Goede <hdegoede@redhat.com> +M: Hans de Goede <hansg@kernel.org> L: linux-ide@vger.kernel.org S: Maintained F: drivers/ata/ahci_platform.c @@ -13798,7 +13801,7 @@ M: Oliver O'Halloran <oohall@gmail.com> L: nvdimm@lists.linux.dev S: Supported Q: https://patchwork.kernel.org/project/linux-nvdimm/list/ -F: Documentation/devicetree/bindings/pmem/pmem-region.txt +F: Documentation/devicetree/bindings/pmem/pmem-region.yaml F: drivers/nvdimm/of_pmem.c LIBNVDIMM: NON-VOLATILE MEMORY DEVICE SUBSYSTEM @@ -14098,7 +14101,7 @@ F: Documentation/admin-guide/ldm.rst F: block/partitions/ldm.* LOGITECH HID GAMING KEYBOARDS -M: Hans de Goede <hdegoede@redhat.com> +M: Hans de Goede <hansg@kernel.org> L: linux-input@vger.kernel.org S: Maintained T: git git://git.kernel.org/pub/scm/linux/kernel/git/hid/hid.git @@ -14780,7 +14783,7 @@ F: Documentation/devicetree/bindings/power/supply/maxim,max17040.yaml F: drivers/power/supply/max17040_battery.c MAXIM MAX17042 FAMILY FUEL GAUGE DRIVERS -R: Hans de Goede <hdegoede@redhat.com> +R: Hans de Goede <hansg@kernel.org> R: Krzysztof Kozlowski <krzk@kernel.org> R: Marek Szyprowski <m.szyprowski@samsung.com> R: Sebastian Krzyszkowiak <sebastian.krzyszkowiak@puri.sm> @@ -15582,7 +15585,7 @@ Q: https://patchwork.kernel.org/project/netdevbpf/list/ F: drivers/net/ethernet/mellanox/mlxfw/ MELLANOX HARDWARE PLATFORM SUPPORT -M: Hans de Goede <hdegoede@redhat.com> +M: Hans de Goede <hansg@kernel.org> M: Ilpo Järvinen <ilpo.jarvinen@linux.intel.com> M: Vadim Pasternak <vadimp@nvidia.com> L: platform-driver-x86@vger.kernel.org @@ -15676,8 +15679,11 @@ S: Maintained F: Documentation/core-api/boot-time-mm.rst F: Documentation/core-api/kho/bindings/memblock/* F: include/linux/memblock.h +F: mm/bootmem_info.c F: mm/memblock.c +F: mm/memtest.c F: mm/mm_init.c +F: mm/rodata_test.c F: tools/testing/memblock/ MEMORY ALLOCATION PROFILING @@ -15732,7 +15738,6 @@ F: Documentation/admin-guide/mm/ F: Documentation/mm/ F: include/linux/gfp.h F: include/linux/gfp_types.h -F: include/linux/memfd.h F: include/linux/memory_hotplug.h F: include/linux/memory-tiers.h F: include/linux/mempolicy.h @@ -15792,6 +15797,10 @@ S: Maintained W: http://www.linux-mm.org T: git git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm F: mm/gup.c +F: mm/gup_test.c +F: mm/gup_test.h +F: tools/testing/selftests/mm/gup_longterm.c +F: tools/testing/selftests/mm/gup_test.c MEMORY MANAGEMENT - KSM (Kernel Samepage Merging) M: Andrew Morton <akpm@linux-foundation.org> @@ -15868,6 +15877,7 @@ L: linux-mm@kvack.org S: Maintained F: mm/pt_reclaim.c F: mm/vmscan.c +F: mm/workingset.c MEMORY MANAGEMENT - RMAP (REVERSE MAPPING) M: Andrew Morton <akpm@linux-foundation.org> @@ -15880,6 +15890,7 @@ R: Harry Yoo <harry.yoo@oracle.com> L: linux-mm@kvack.org S: Maintained F: include/linux/rmap.h +F: mm/page_vma_mapped.c F: mm/rmap.c MEMORY MANAGEMENT - SECRETMEM @@ -15919,6 +15930,7 @@ R: Liam R. Howlett <Liam.Howlett@oracle.com> R: Nico Pache <npache@redhat.com> R: Ryan Roberts <ryan.roberts@arm.com> R: Dev Jain <dev.jain@arm.com> +R: Barry Song <baohua@kernel.org> L: linux-mm@kvack.org S: Maintained W: http://www.linux-mm.org @@ -15971,11 +15983,14 @@ S: Maintained W: http://www.linux-mm.org T: git git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm F: include/trace/events/mmap.h +F: mm/mincore.c F: mm/mlock.c F: mm/mmap.c F: mm/mprotect.c F: mm/mremap.c F: mm/mseal.c +F: mm/msync.c +F: mm/nommu.c F: mm/vma.c F: mm/vma.h F: mm/vma_exec.c @@ -16538,7 +16553,7 @@ S: Maintained F: drivers/platform/surface/surface_gpe.c MICROSOFT SURFACE HARDWARE PLATFORM SUPPORT -M: Hans de Goede <hdegoede@redhat.com> +M: Hans de Goede <hansg@kernel.org> M: Ilpo Järvinen <ilpo.jarvinen@linux.intel.com> M: Maximilian Luz <luzmaximilian@gmail.com> L: platform-driver-x86@vger.kernel.org @@ -17493,7 +17508,7 @@ F: tools/testing/selftests/net/srv6* NETWORKING [TCP] M: Eric Dumazet <edumazet@google.com> M: Neal Cardwell <ncardwell@google.com> -R: Kuniyuki Iwashima <kuniyu@amazon.com> +R: Kuniyuki Iwashima <kuniyu@google.com> L: netdev@vger.kernel.org S: Maintained F: Documentation/networking/net_cachelines/tcp_sock.rst @@ -17523,7 +17538,7 @@ F: net/tls/* NETWORKING [SOCKETS] M: Eric Dumazet <edumazet@google.com> -M: Kuniyuki Iwashima <kuniyu@amazon.com> +M: Kuniyuki Iwashima <kuniyu@google.com> M: Paolo Abeni <pabeni@redhat.com> M: Willem de Bruijn <willemb@google.com> S: Maintained @@ -17538,7 +17553,7 @@ F: net/core/scm.c F: net/socket.c NETWORKING [UNIX SOCKETS] -M: Kuniyuki Iwashima <kuniyu@amazon.com> +M: Kuniyuki Iwashima <kuniyu@google.com> S: Maintained F: include/net/af_unix.h F: include/net/netns/unix.h @@ -17706,7 +17721,7 @@ F: tools/include/nolibc/ F: tools/testing/selftests/nolibc/ NOVATEK NVT-TS I2C TOUCHSCREEN DRIVER -M: Hans de Goede <hdegoede@redhat.com> +M: Hans de Goede <hansg@kernel.org> L: linux-input@vger.kernel.org S: Maintained F: Documentation/devicetree/bindings/input/touchscreen/novatek,nvt-ts.yaml @@ -19376,7 +19391,7 @@ F: crypto/pcrypt.c F: include/crypto/pcrypt.h PDS DSC VIRTIO DATA PATH ACCELERATOR -R: Shannon Nelson <shannon.nelson@amd.com> +R: Brett Creeley <brett.creeley@amd.com> F: drivers/vdpa/pds/ PECI HARDWARE MONITORING DRIVERS @@ -19398,7 +19413,6 @@ F: include/linux/peci-cpu.h F: include/linux/peci.h PENSANDO ETHERNET DRIVERS -M: Shannon Nelson <shannon.nelson@amd.com> M: Brett Creeley <brett.creeley@amd.com> L: netdev@vger.kernel.org S: Maintained @@ -22171,7 +22185,7 @@ R: Tejun Heo <tj@kernel.org> R: David Vernet <void@manifault.com> R: Andrea Righi <arighi@nvidia.com> R: Changwoo Min <changwoo@igalia.com> -L: linux-kernel@vger.kernel.org +L: sched-ext@lists.linux.dev S: Maintained W: https://github.com/sched-ext/scx T: git://git.kernel.org/pub/scm/linux/kernel/git/tj/sched_ext.git @@ -22708,7 +22722,7 @@ K: fu[57]40 K: [^@]sifive SILEAD TOUCHSCREEN DRIVER -M: Hans de Goede <hdegoede@redhat.com> +M: Hans de Goede <hansg@kernel.org> L: linux-input@vger.kernel.org L: platform-driver-x86@vger.kernel.org S: Maintained @@ -22741,7 +22755,7 @@ F: Documentation/devicetree/bindings/i3c/silvaco,i3c-master.yaml F: drivers/i3c/master/svc-i3c-master.c SIMPLEFB FB DRIVER -M: Hans de Goede <hdegoede@redhat.com> +M: Hans de Goede <hansg@kernel.org> L: linux-fbdev@vger.kernel.org S: Maintained F: Documentation/devicetree/bindings/display/simple-framebuffer.yaml @@ -22870,7 +22884,7 @@ F: Documentation/hwmon/emc2103.rst F: drivers/hwmon/emc2103.c SMSC SCH5627 HARDWARE MONITOR DRIVER -M: Hans de Goede <hdegoede@redhat.com> +M: Hans de Goede <hansg@kernel.org> L: linux-hwmon@vger.kernel.org S: Supported F: Documentation/hwmon/sch5627.rst @@ -23525,7 +23539,7 @@ S: Supported F: Documentation/process/stable-kernel-rules.rst STAGING - ATOMISP DRIVER -M: Hans de Goede <hdegoede@redhat.com> +M: Hans de Goede <hansg@kernel.org> M: Mauro Carvalho Chehab <mchehab@kernel.org> R: Sakari Ailus <sakari.ailus@linux.intel.com> L: linux-media@vger.kernel.org @@ -23661,7 +23675,6 @@ F: include/dt-bindings/clock/starfive?jh71*.h STARFIVE JH71X0 PINCTRL DRIVERS M: Emil Renner Berthing <kernel@esmil.dk> -M: Jianlong Huang <jianlong.huang@starfivetech.com> M: Hal Feng <hal.feng@starfivetech.com> L: linux-gpio@vger.kernel.org S: Maintained @@ -23822,7 +23835,7 @@ F: arch/m68k/sun3*/ F: drivers/net/ethernet/i825xx/sun3* SUN4I LOW RES ADC ATTACHED TABLET KEYS DRIVER -M: Hans de Goede <hdegoede@redhat.com> +M: Hans de Goede <hansg@kernel.org> L: linux-input@vger.kernel.org S: Maintained F: Documentation/devicetree/bindings/input/allwinner,sun4i-a10-lradc-keys.yaml @@ -25028,8 +25041,11 @@ M: Hugh Dickins <hughd@google.com> R: Baolin Wang <baolin.wang@linux.alibaba.com> L: linux-mm@kvack.org S: Maintained +F: include/linux/memfd.h F: include/linux/shmem_fs.h +F: mm/memfd.c F: mm/shmem.c +F: mm/shmem_quota.c TOMOYO SECURITY MODULE M: Kentaro Takeda <takedakn@nttdata.co.jp> @@ -25590,7 +25606,7 @@ F: Documentation/hid/hiddev.rst F: drivers/hid/usbhid/ USB INTEL XHCI ROLE MUX DRIVER -M: Hans de Goede <hdegoede@redhat.com> +M: Hans de Goede <hansg@kernel.org> L: linux-usb@vger.kernel.org S: Maintained F: drivers/usb/roles/intel-xhci-usb-role-switch.c @@ -25781,7 +25797,7 @@ F: Documentation/firmware-guide/acpi/intel-pmc-mux.rst F: drivers/usb/typec/mux/intel_pmc_mux.c USB TYPEC PI3USB30532 MUX DRIVER -M: Hans de Goede <hdegoede@redhat.com> +M: Hans de Goede <hansg@kernel.org> L: linux-usb@vger.kernel.org S: Maintained F: drivers/usb/typec/mux/pi3usb30532.c @@ -25810,7 +25826,7 @@ F: drivers/usb/host/uhci* USB VIDEO CLASS M: Laurent Pinchart <laurent.pinchart@ideasonboard.com> -M: Hans de Goede <hdegoede@redhat.com> +M: Hans de Goede <hansg@kernel.org> L: linux-media@vger.kernel.org S: Maintained W: http://www.ideasonboard.org/uvc/ @@ -26341,7 +26357,7 @@ F: include/uapi/linux/virtio_snd.h F: sound/virtio/* VIRTUAL BOX GUEST DEVICE DRIVER -M: Hans de Goede <hdegoede@redhat.com> +M: Hans de Goede <hansg@kernel.org> M: Arnd Bergmann <arnd@arndb.de> M: Greg Kroah-Hartman <gregkh@linuxfoundation.org> S: Maintained @@ -26350,7 +26366,7 @@ F: include/linux/vbox_utils.h F: include/uapi/linux/vbox*.h VIRTUAL BOX SHARED FOLDER VFS DRIVER -M: Hans de Goede <hdegoede@redhat.com> +M: Hans de Goede <hansg@kernel.org> L: linux-fsdevel@vger.kernel.org S: Maintained F: fs/vboxsf/* @@ -26604,7 +26620,7 @@ F: drivers/mmc/host/wbsd.* WACOM PROTOCOL 4 SERIAL TABLETS M: Julian Squires <julian@cipht.net> -M: Hans de Goede <hdegoede@redhat.com> +M: Hans de Goede <hansg@kernel.org> L: linux-input@vger.kernel.org S: Maintained F: drivers/input/tablet/wacom_serial4.c @@ -26771,7 +26787,7 @@ F: include/linux/wwan.h F: include/uapi/linux/wwan.h X-POWERS AXP288 PMIC DRIVERS -M: Hans de Goede <hdegoede@redhat.com> +M: Hans de Goede <hansg@kernel.org> S: Maintained F: drivers/acpi/pmic/intel_pmic_xpower.c N: axp288 @@ -26863,14 +26879,14 @@ T: git git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git x86/mm F: arch/x86/mm/ X86 PLATFORM ANDROID TABLETS DSDT FIXUP DRIVER -M: Hans de Goede <hdegoede@redhat.com> +M: Hans de Goede <hansg@kernel.org> L: platform-driver-x86@vger.kernel.org S: Maintained T: git git://git.kernel.org/pub/scm/linux/kernel/git/pdx86/platform-drivers-x86.git F: drivers/platform/x86/x86-android-tablets/ X86 PLATFORM DRIVERS -M: Hans de Goede <hdegoede@redhat.com> +M: Hans de Goede <hansg@kernel.org> M: Ilpo Järvinen <ilpo.jarvinen@linux.intel.com> L: platform-driver-x86@vger.kernel.org S: Maintained @@ -26967,6 +26983,7 @@ M: David S. Miller <davem@davemloft.net> M: Jakub Kicinski <kuba@kernel.org> M: Jesper Dangaard Brouer <hawk@kernel.org> M: John Fastabend <john.fastabend@gmail.com> +R: Stanislav Fomichev <sdf@fomichev.me> L: netdev@vger.kernel.org L: bpf@vger.kernel.org S: Supported @@ -26988,6 +27005,7 @@ M: Björn Töpel <bjorn@kernel.org> M: Magnus Karlsson <magnus.karlsson@intel.com> M: Maciej Fijalkowski <maciej.fijalkowski@intel.com> R: Jonathan Lemon <jonathan.lemon@gmail.com> +R: Stanislav Fomichev <sdf@fomichev.me> L: netdev@vger.kernel.org L: bpf@vger.kernel.org S: Maintained @@ -2,7 +2,7 @@ VERSION = 6 PATCHLEVEL = 16 SUBLEVEL = 0 -EXTRAVERSION = -rc1 +EXTRAVERSION = -rc3 NAME = Baby Opossum Posse # *DOCUMENTATION* @@ -1832,12 +1832,9 @@ rustfmtcheck: rustfmt # Misc # --------------------------------------------------------------------------- -# Run misc checks when ${KBUILD_EXTRA_WARN} contains 1 PHONY += misc-check -ifneq ($(findstring 1,$(KBUILD_EXTRA_WARN)),) misc-check: $(Q)$(srctree)/scripts/misc-check -endif all: misc-check diff --git a/arch/alpha/include/asm/pgtable.h b/arch/alpha/include/asm/pgtable.h index 2676017f42f1..44e7aedac6e8 100644 --- a/arch/alpha/include/asm/pgtable.h +++ b/arch/alpha/include/asm/pgtable.h @@ -327,7 +327,7 @@ extern inline pte_t mk_swap_pte(unsigned long type, unsigned long offset) #define __pte_to_swp_entry(pte) ((swp_entry_t) { pte_val(pte) }) #define __swp_entry_to_pte(x) ((pte_t) { (x).val }) -static inline int pte_swp_exclusive(pte_t pte) +static inline bool pte_swp_exclusive(pte_t pte) { return pte_val(pte) & _PAGE_SWP_EXCLUSIVE; } diff --git a/arch/arc/include/asm/arcregs.h b/arch/arc/include/asm/arcregs.h index 005d9e4d187a..a31bbf5c8bbc 100644 --- a/arch/arc/include/asm/arcregs.h +++ b/arch/arc/include/asm/arcregs.h @@ -144,7 +144,7 @@ #define ARC_AUX_AGU_MOD2 0x5E2 #define ARC_AUX_AGU_MOD3 0x5E3 -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ #include <soc/arc/arc_aux.h> diff --git a/arch/arc/include/asm/atomic.h b/arch/arc/include/asm/atomic.h index 592d7fffc223..e615c42b93ba 100644 --- a/arch/arc/include/asm/atomic.h +++ b/arch/arc/include/asm/atomic.h @@ -6,7 +6,7 @@ #ifndef _ASM_ARC_ATOMIC_H #define _ASM_ARC_ATOMIC_H -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ #include <linux/types.h> #include <linux/compiler.h> @@ -31,6 +31,6 @@ #include <asm/atomic64-arcv2.h> #endif -#endif /* !__ASSEMBLY__ */ +#endif /* !__ASSEMBLER__ */ #endif diff --git a/arch/arc/include/asm/atomic64-arcv2.h b/arch/arc/include/asm/atomic64-arcv2.h index 9b5791b85471..73080a664369 100644 --- a/arch/arc/include/asm/atomic64-arcv2.h +++ b/arch/arc/include/asm/atomic64-arcv2.h @@ -137,12 +137,9 @@ ATOMIC64_OPS(xor, xor, xor) #undef ATOMIC64_OP_RETURN #undef ATOMIC64_OP -static inline s64 -arch_atomic64_cmpxchg(atomic64_t *ptr, s64 expected, s64 new) +static inline u64 __arch_cmpxchg64_relaxed(volatile void *ptr, u64 old, u64 new) { - s64 prev; - - smp_mb(); + u64 prev; __asm__ __volatile__( "1: llockd %0, [%1] \n" @@ -152,14 +149,12 @@ arch_atomic64_cmpxchg(atomic64_t *ptr, s64 expected, s64 new) " bnz 1b \n" "2: \n" : "=&r"(prev) - : "r"(ptr), "ir"(expected), "r"(new) - : "cc"); /* memory clobber comes from smp_mb() */ - - smp_mb(); + : "r"(ptr), "ir"(old), "r"(new) + : "memory", "cc"); return prev; } -#define arch_atomic64_cmpxchg arch_atomic64_cmpxchg +#define arch_cmpxchg64_relaxed __arch_cmpxchg64_relaxed static inline s64 arch_atomic64_xchg(atomic64_t *ptr, s64 new) { diff --git a/arch/arc/include/asm/bitops.h b/arch/arc/include/asm/bitops.h index f5a936496f06..5340c2871392 100644 --- a/arch/arc/include/asm/bitops.h +++ b/arch/arc/include/asm/bitops.h @@ -10,7 +10,7 @@ #error only <linux/bitops.h> can be included directly #endif -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ #include <linux/types.h> #include <linux/compiler.h> @@ -192,6 +192,6 @@ static inline __attribute__ ((const)) unsigned long __ffs(unsigned long x) #include <asm-generic/bitops/le.h> #include <asm-generic/bitops/ext2-atomic-setbit.h> -#endif /* !__ASSEMBLY__ */ +#endif /* !__ASSEMBLER__ */ #endif diff --git a/arch/arc/include/asm/bug.h b/arch/arc/include/asm/bug.h index 4c453ba96c51..171c16021f70 100644 --- a/arch/arc/include/asm/bug.h +++ b/arch/arc/include/asm/bug.h @@ -6,7 +6,7 @@ #ifndef _ASM_ARC_BUG_H #define _ASM_ARC_BUG_H -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ #include <asm/ptrace.h> @@ -29,6 +29,6 @@ void die(const char *str, struct pt_regs *regs, unsigned long address); #include <asm-generic/bug.h> -#endif /* !__ASSEMBLY__ */ +#endif /* !__ASSEMBLER__ */ #endif diff --git a/arch/arc/include/asm/cache.h b/arch/arc/include/asm/cache.h index f0f1fc5d62b6..040a97f4dd82 100644 --- a/arch/arc/include/asm/cache.h +++ b/arch/arc/include/asm/cache.h @@ -23,7 +23,7 @@ */ #define ARC_UNCACHED_ADDR_SPACE 0xc0000000 -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ #include <linux/build_bug.h> @@ -65,7 +65,7 @@ extern int ioc_enable; extern unsigned long perip_base, perip_end; -#endif /* !__ASSEMBLY__ */ +#endif /* !__ASSEMBLER__ */ /* Instruction cache related Auxiliary registers */ #define ARC_REG_IC_BCR 0x77 /* Build Config reg */ diff --git a/arch/arc/include/asm/current.h b/arch/arc/include/asm/current.h index 06be89f6f2f0..03ffd005f3fa 100644 --- a/arch/arc/include/asm/current.h +++ b/arch/arc/include/asm/current.h @@ -9,7 +9,7 @@ #ifndef _ASM_ARC_CURRENT_H #define _ASM_ARC_CURRENT_H -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ #ifdef CONFIG_ARC_CURR_IN_REG @@ -20,6 +20,6 @@ register struct task_struct *curr_arc asm("gp"); #include <asm-generic/current.h> #endif /* ! CONFIG_ARC_CURR_IN_REG */ -#endif /* ! __ASSEMBLY__ */ +#endif /* ! __ASSEMBLER__ */ #endif /* _ASM_ARC_CURRENT_H */ diff --git a/arch/arc/include/asm/dsp-impl.h b/arch/arc/include/asm/dsp-impl.h index cd5636dfeb6f..fd5fdaad90c1 100644 --- a/arch/arc/include/asm/dsp-impl.h +++ b/arch/arc/include/asm/dsp-impl.h @@ -11,7 +11,7 @@ #define DSP_CTRL_DISABLED_ALL 0 -#ifdef __ASSEMBLY__ +#ifdef __ASSEMBLER__ /* clobbers r5 register */ .macro DSP_EARLY_INIT diff --git a/arch/arc/include/asm/dsp.h b/arch/arc/include/asm/dsp.h index f496dbc4640b..eeaaf4e4eabd 100644 --- a/arch/arc/include/asm/dsp.h +++ b/arch/arc/include/asm/dsp.h @@ -7,7 +7,7 @@ #ifndef __ASM_ARC_DSP_H #define __ASM_ARC_DSP_H -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ /* * DSP-related saved registers - need to be saved only when you are @@ -24,6 +24,6 @@ struct dsp_callee_regs { #endif }; -#endif /* !__ASSEMBLY__ */ +#endif /* !__ASSEMBLER__ */ #endif /* __ASM_ARC_DSP_H */ diff --git a/arch/arc/include/asm/dwarf.h b/arch/arc/include/asm/dwarf.h index a0d5ebe1bc3f..1524c5cf8b59 100644 --- a/arch/arc/include/asm/dwarf.h +++ b/arch/arc/include/asm/dwarf.h @@ -6,7 +6,7 @@ #ifndef _ASM_ARC_DWARF_H #define _ASM_ARC_DWARF_H -#ifdef __ASSEMBLY__ +#ifdef __ASSEMBLER__ #ifdef ARC_DW2_UNWIND_AS_CFI @@ -38,6 +38,6 @@ #endif /* !ARC_DW2_UNWIND_AS_CFI */ -#endif /* __ASSEMBLY__ */ +#endif /* __ASSEMBLER__ */ #endif /* _ASM_ARC_DWARF_H */ diff --git a/arch/arc/include/asm/entry.h b/arch/arc/include/asm/entry.h index 38c35722cebf..f453af251a1a 100644 --- a/arch/arc/include/asm/entry.h +++ b/arch/arc/include/asm/entry.h @@ -13,7 +13,7 @@ #include <asm/processor.h> /* For VMALLOC_START */ #include <asm/mmu.h> -#ifdef __ASSEMBLY__ +#ifdef __ASSEMBLER__ #ifdef CONFIG_ISA_ARCOMPACT #include <asm/entry-compact.h> /* ISA specific bits */ @@ -146,7 +146,7 @@ #endif /* CONFIG_ARC_CURR_IN_REG */ -#else /* !__ASSEMBLY__ */ +#else /* !__ASSEMBLER__ */ extern void do_signal(struct pt_regs *); extern void do_notify_resume(struct pt_regs *); diff --git a/arch/arc/include/asm/irqflags-arcv2.h b/arch/arc/include/asm/irqflags-arcv2.h index fb3c21f1a238..30aea562f8aa 100644 --- a/arch/arc/include/asm/irqflags-arcv2.h +++ b/arch/arc/include/asm/irqflags-arcv2.h @@ -50,7 +50,7 @@ #define ISA_INIT_STATUS_BITS (STATUS_IE_MASK | __AD_ENB | \ (ARCV2_IRQ_DEF_PRIO << 1)) -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ /* * Save IRQ state and disable IRQs @@ -170,6 +170,6 @@ static inline void arc_softirq_clear(int irq) seti .endm -#endif /* __ASSEMBLY__ */ +#endif /* __ASSEMBLER__ */ #endif diff --git a/arch/arc/include/asm/irqflags-compact.h b/arch/arc/include/asm/irqflags-compact.h index 936a2f21f315..85c2f6bcde0c 100644 --- a/arch/arc/include/asm/irqflags-compact.h +++ b/arch/arc/include/asm/irqflags-compact.h @@ -40,7 +40,7 @@ #define ISA_INIT_STATUS_BITS STATUS_IE_MASK -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ /****************************************************************** * IRQ Control Macros @@ -196,6 +196,6 @@ static inline int arch_irqs_disabled(void) flag \scratch .endm -#endif /* __ASSEMBLY__ */ +#endif /* __ASSEMBLER__ */ #endif diff --git a/arch/arc/include/asm/jump_label.h b/arch/arc/include/asm/jump_label.h index a339223d9e05..66ead75784d9 100644 --- a/arch/arc/include/asm/jump_label.h +++ b/arch/arc/include/asm/jump_label.h @@ -2,7 +2,7 @@ #ifndef _ASM_ARC_JUMP_LABEL_H #define _ASM_ARC_JUMP_LABEL_H -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ #include <linux/stringify.h> #include <linux/types.h> @@ -68,5 +68,5 @@ struct jump_entry { jump_label_t key; }; -#endif /* __ASSEMBLY__ */ +#endif /* __ASSEMBLER__ */ #endif diff --git a/arch/arc/include/asm/linkage.h b/arch/arc/include/asm/linkage.h index 8a3fb71e9cfa..ba3cb65b5eaa 100644 --- a/arch/arc/include/asm/linkage.h +++ b/arch/arc/include/asm/linkage.h @@ -12,7 +12,7 @@ #define __ALIGN .align 4 #define __ALIGN_STR __stringify(__ALIGN) -#ifdef __ASSEMBLY__ +#ifdef __ASSEMBLER__ .macro ST2 e, o, off #ifdef CONFIG_ARC_HAS_LL64 @@ -61,7 +61,7 @@ CFI_ENDPROC ASM_NL \ .size name, .-name -#else /* !__ASSEMBLY__ */ +#else /* !__ASSEMBLER__ */ #ifdef CONFIG_ARC_HAS_ICCM #define __arcfp_code __section(".text.arcfp") @@ -75,6 +75,6 @@ #define __arcfp_data __section(".data") #endif -#endif /* __ASSEMBLY__ */ +#endif /* __ASSEMBLER__ */ #endif diff --git a/arch/arc/include/asm/mmu-arcv2.h b/arch/arc/include/asm/mmu-arcv2.h index 41412642f279..5e5482026ac9 100644 --- a/arch/arc/include/asm/mmu-arcv2.h +++ b/arch/arc/include/asm/mmu-arcv2.h @@ -69,7 +69,7 @@ #define PTE_BITS_NON_RWX_IN_PD1 (PAGE_MASK_PHYS | _PAGE_CACHEABLE) -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ struct mm_struct; extern int pae40_exist_but_not_enab(void); @@ -100,6 +100,6 @@ static inline void mmu_setup_pgd(struct mm_struct *mm, void *pgd) sr \reg, [ARC_REG_PID] .endm -#endif /* !__ASSEMBLY__ */ +#endif /* !__ASSEMBLER__ */ #endif diff --git a/arch/arc/include/asm/mmu.h b/arch/arc/include/asm/mmu.h index 4ae2db59d494..e3b35ceab582 100644 --- a/arch/arc/include/asm/mmu.h +++ b/arch/arc/include/asm/mmu.h @@ -6,7 +6,7 @@ #ifndef _ASM_ARC_MMU_H #define _ASM_ARC_MMU_H -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ #include <linux/threads.h> /* NR_CPUS */ diff --git a/arch/arc/include/asm/page.h b/arch/arc/include/asm/page.h index def0dfb95b43..9720fe6b2c24 100644 --- a/arch/arc/include/asm/page.h +++ b/arch/arc/include/asm/page.h @@ -19,7 +19,7 @@ #endif /* CONFIG_ARC_HAS_PAE40 */ -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ #define clear_page(paddr) memset((paddr), 0, PAGE_SIZE) #define copy_user_page(to, from, vaddr, pg) copy_page(to, from) @@ -136,6 +136,6 @@ static inline unsigned long virt_to_pfn(const void *kaddr) #include <asm-generic/memory_model.h> /* page_to_pfn, pfn_to_page */ #include <asm-generic/getorder.h> -#endif /* !__ASSEMBLY__ */ +#endif /* !__ASSEMBLER__ */ #endif diff --git a/arch/arc/include/asm/pgtable-bits-arcv2.h b/arch/arc/include/asm/pgtable-bits-arcv2.h index 8ebec1b21d24..4630c5acca05 100644 --- a/arch/arc/include/asm/pgtable-bits-arcv2.h +++ b/arch/arc/include/asm/pgtable-bits-arcv2.h @@ -75,7 +75,7 @@ * This is to enable COW mechanism */ /* xwr */ -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ #define pte_write(pte) (pte_val(pte) & _PAGE_WRITE) #define pte_dirty(pte) (pte_val(pte) & _PAGE_DIRTY) @@ -130,7 +130,7 @@ void update_mmu_cache_range(struct vm_fault *vmf, struct vm_area_struct *vma, #define __pte_to_swp_entry(pte) ((swp_entry_t) { pte_val(pte) }) #define __swp_entry_to_pte(x) ((pte_t) { (x).val }) -static inline int pte_swp_exclusive(pte_t pte) +static inline bool pte_swp_exclusive(pte_t pte) { return pte_val(pte) & _PAGE_SWP_EXCLUSIVE; } @@ -142,6 +142,6 @@ PTE_BIT_FUNC(swp_clear_exclusive, &= ~(_PAGE_SWP_EXCLUSIVE)); #include <asm/hugepage.h> #endif -#endif /* __ASSEMBLY__ */ +#endif /* __ASSEMBLER__ */ #endif diff --git a/arch/arc/include/asm/pgtable-levels.h b/arch/arc/include/asm/pgtable-levels.h index d1ce4b0f1071..c8f9273372c0 100644 --- a/arch/arc/include/asm/pgtable-levels.h +++ b/arch/arc/include/asm/pgtable-levels.h @@ -85,7 +85,7 @@ #define PTRS_PER_PTE BIT(PMD_SHIFT - PAGE_SHIFT) -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ #if CONFIG_PGTABLE_LEVELS > 3 #include <asm-generic/pgtable-nop4d.h> @@ -181,6 +181,6 @@ #define pmd_leaf(x) (pmd_val(x) & _PAGE_HW_SZ) #endif -#endif /* !__ASSEMBLY__ */ +#endif /* !__ASSEMBLER__ */ #endif diff --git a/arch/arc/include/asm/pgtable.h b/arch/arc/include/asm/pgtable.h index 4cf45a99fd79..bd580e2b62d7 100644 --- a/arch/arc/include/asm/pgtable.h +++ b/arch/arc/include/asm/pgtable.h @@ -19,7 +19,7 @@ */ #define USER_PTRS_PER_PGD (TASK_SIZE / PGDIR_SIZE) -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ extern char empty_zero_page[PAGE_SIZE]; #define ZERO_PAGE(vaddr) (virt_to_page(empty_zero_page)) @@ -29,6 +29,6 @@ extern pgd_t swapper_pg_dir[] __aligned(PAGE_SIZE); /* to cope with aliasing VIPT cache */ #define HAVE_ARCH_UNMAPPED_AREA -#endif /* __ASSEMBLY__ */ +#endif /* __ASSEMBLER__ */ #endif diff --git a/arch/arc/include/asm/processor.h b/arch/arc/include/asm/processor.h index d606658e2fe7..7f7901ac6643 100644 --- a/arch/arc/include/asm/processor.h +++ b/arch/arc/include/asm/processor.h @@ -11,7 +11,7 @@ #ifndef __ASM_ARC_PROCESSOR_H #define __ASM_ARC_PROCESSOR_H -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ #include <asm/ptrace.h> #include <asm/dsp.h> @@ -66,7 +66,7 @@ extern void start_thread(struct pt_regs * regs, unsigned long pc, extern unsigned int __get_wchan(struct task_struct *p); -#endif /* !__ASSEMBLY__ */ +#endif /* !__ASSEMBLER__ */ /* * Default System Memory Map on ARC diff --git a/arch/arc/include/asm/ptrace.h b/arch/arc/include/asm/ptrace.h index cf79df0b2570..f6c052af8f4d 100644 --- a/arch/arc/include/asm/ptrace.h +++ b/arch/arc/include/asm/ptrace.h @@ -10,7 +10,7 @@ #include <uapi/asm/ptrace.h> #include <linux/compiler.h> -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ typedef union { struct { @@ -172,6 +172,6 @@ static inline unsigned long regs_get_register(struct pt_regs *regs, extern int syscall_trace_enter(struct pt_regs *); extern void syscall_trace_exit(struct pt_regs *); -#endif /* !__ASSEMBLY__ */ +#endif /* !__ASSEMBLER__ */ #endif /* __ASM_PTRACE_H */ diff --git a/arch/arc/include/asm/switch_to.h b/arch/arc/include/asm/switch_to.h index 1f85de8288b1..5806106a65f9 100644 --- a/arch/arc/include/asm/switch_to.h +++ b/arch/arc/include/asm/switch_to.h @@ -6,7 +6,7 @@ #ifndef _ASM_ARC_SWITCH_TO_H #define _ASM_ARC_SWITCH_TO_H -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ #include <linux/sched.h> #include <asm/dsp-impl.h> diff --git a/arch/arc/include/asm/thread_info.h b/arch/arc/include/asm/thread_info.h index 12daaf3a61ea..255d2c774219 100644 --- a/arch/arc/include/asm/thread_info.h +++ b/arch/arc/include/asm/thread_info.h @@ -24,7 +24,7 @@ #define THREAD_SIZE (PAGE_SIZE << THREAD_SIZE_ORDER) #define THREAD_SHIFT (PAGE_SHIFT << THREAD_SIZE_ORDER) -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ #include <linux/thread_info.h> @@ -62,7 +62,7 @@ static inline __attribute_const__ struct thread_info *current_thread_info(void) return (struct thread_info *)(sp & ~(THREAD_SIZE - 1)); } -#endif /* !__ASSEMBLY__ */ +#endif /* !__ASSEMBLER__ */ /* * thread information flags diff --git a/arch/arc/include/uapi/asm/ptrace.h b/arch/arc/include/uapi/asm/ptrace.h index 2a6eff57f6dd..3ae832db278c 100644 --- a/arch/arc/include/uapi/asm/ptrace.h +++ b/arch/arc/include/uapi/asm/ptrace.h @@ -14,7 +14,7 @@ #define PTRACE_GET_THREAD_AREA 25 -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ /* * Userspace ABI: Register state needed by * -ptrace (gdbserver) @@ -53,6 +53,6 @@ struct user_regs_arcv2 { unsigned long r30, r58, r59; }; -#endif /* !__ASSEMBLY__ */ +#endif /* !__ASSEMBLER__ */ #endif /* _UAPI__ASM_ARC_PTRACE_H */ diff --git a/arch/arc/kernel/unwind.c b/arch/arc/kernel/unwind.c index d8969dab12d4..789cfb9ea14e 100644 --- a/arch/arc/kernel/unwind.c +++ b/arch/arc/kernel/unwind.c @@ -241,15 +241,6 @@ static int cmp_eh_frame_hdr_table_entries(const void *p1, const void *p2) return (e1->start > e2->start) - (e1->start < e2->start); } -static void swap_eh_frame_hdr_table_entries(void *p1, void *p2, int size) -{ - struct eh_frame_hdr_table_entry *e1 = p1; - struct eh_frame_hdr_table_entry *e2 = p2; - - swap(e1->start, e2->start); - swap(e1->fde, e2->fde); -} - static void init_unwind_hdr(struct unwind_table *table, void *(*alloc) (unsigned long)) { @@ -345,7 +336,7 @@ static void init_unwind_hdr(struct unwind_table *table, sort(header->table, n, sizeof(*header->table), - cmp_eh_frame_hdr_table_entries, swap_eh_frame_hdr_table_entries); + cmp_eh_frame_hdr_table_entries, NULL); table->hdrsz = hdrSize; smp_wmb(); diff --git a/arch/arm/include/asm/pgtable.h b/arch/arm/include/asm/pgtable.h index 7f1c3b4e3e04..86378eec7757 100644 --- a/arch/arm/include/asm/pgtable.h +++ b/arch/arm/include/asm/pgtable.h @@ -301,7 +301,7 @@ static inline pte_t pte_modify(pte_t pte, pgprot_t newprot) #define __pte_to_swp_entry(pte) ((swp_entry_t) { pte_val(pte) }) #define __swp_entry_to_pte(swp) __pte((swp).val) -static inline int pte_swp_exclusive(pte_t pte) +static inline bool pte_swp_exclusive(pte_t pte) { return pte_isset(pte, L_PTE_SWP_EXCLUSIVE); } diff --git a/arch/arm64/include/asm/kvm_emulate.h b/arch/arm64/include/asm/kvm_emulate.h index bd020fc28aa9..0720898f563e 100644 --- a/arch/arm64/include/asm/kvm_emulate.h +++ b/arch/arm64/include/asm/kvm_emulate.h @@ -561,68 +561,6 @@ static __always_inline void kvm_incr_pc(struct kvm_vcpu *vcpu) vcpu_set_flag((v), e); \ } while (0) -#define __build_check_all_or_none(r, bits) \ - BUILD_BUG_ON(((r) & (bits)) && ((r) & (bits)) != (bits)) - -#define __cpacr_to_cptr_clr(clr, set) \ - ({ \ - u64 cptr = 0; \ - \ - if ((set) & CPACR_EL1_FPEN) \ - cptr |= CPTR_EL2_TFP; \ - if ((set) & CPACR_EL1_ZEN) \ - cptr |= CPTR_EL2_TZ; \ - if ((set) & CPACR_EL1_SMEN) \ - cptr |= CPTR_EL2_TSM; \ - if ((clr) & CPACR_EL1_TTA) \ - cptr |= CPTR_EL2_TTA; \ - if ((clr) & CPTR_EL2_TAM) \ - cptr |= CPTR_EL2_TAM; \ - if ((clr) & CPTR_EL2_TCPAC) \ - cptr |= CPTR_EL2_TCPAC; \ - \ - cptr; \ - }) - -#define __cpacr_to_cptr_set(clr, set) \ - ({ \ - u64 cptr = 0; \ - \ - if ((clr) & CPACR_EL1_FPEN) \ - cptr |= CPTR_EL2_TFP; \ - if ((clr) & CPACR_EL1_ZEN) \ - cptr |= CPTR_EL2_TZ; \ - if ((clr) & CPACR_EL1_SMEN) \ - cptr |= CPTR_EL2_TSM; \ - if ((set) & CPACR_EL1_TTA) \ - cptr |= CPTR_EL2_TTA; \ - if ((set) & CPTR_EL2_TAM) \ - cptr |= CPTR_EL2_TAM; \ - if ((set) & CPTR_EL2_TCPAC) \ - cptr |= CPTR_EL2_TCPAC; \ - \ - cptr; \ - }) - -#define cpacr_clear_set(clr, set) \ - do { \ - BUILD_BUG_ON((set) & CPTR_VHE_EL2_RES0); \ - BUILD_BUG_ON((clr) & CPACR_EL1_E0POE); \ - __build_check_all_or_none((clr), CPACR_EL1_FPEN); \ - __build_check_all_or_none((set), CPACR_EL1_FPEN); \ - __build_check_all_or_none((clr), CPACR_EL1_ZEN); \ - __build_check_all_or_none((set), CPACR_EL1_ZEN); \ - __build_check_all_or_none((clr), CPACR_EL1_SMEN); \ - __build_check_all_or_none((set), CPACR_EL1_SMEN); \ - \ - if (has_vhe() || has_hvhe()) \ - sysreg_clear_set(cpacr_el1, clr, set); \ - else \ - sysreg_clear_set(cptr_el2, \ - __cpacr_to_cptr_clr(clr, set), \ - __cpacr_to_cptr_set(clr, set));\ - } while (0) - /* * Returns a 'sanitised' view of CPTR_EL2, translating from nVHE to the VHE * format if E2H isn't set. diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index 6ce2c5173482..d27079968341 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -1107,14 +1107,36 @@ static inline u64 *___ctxt_sys_reg(const struct kvm_cpu_context *ctxt, int r) #define ctxt_sys_reg(c,r) (*__ctxt_sys_reg(c,r)) u64 kvm_vcpu_apply_reg_masks(const struct kvm_vcpu *, enum vcpu_sysreg, u64); + +#define __vcpu_assign_sys_reg(v, r, val) \ + do { \ + const struct kvm_cpu_context *ctxt = &(v)->arch.ctxt; \ + u64 __v = (val); \ + if (vcpu_has_nv((v)) && (r) >= __SANITISED_REG_START__) \ + __v = kvm_vcpu_apply_reg_masks((v), (r), __v); \ + \ + ctxt_sys_reg(ctxt, (r)) = __v; \ + } while (0) + +#define __vcpu_rmw_sys_reg(v, r, op, val) \ + do { \ + const struct kvm_cpu_context *ctxt = &(v)->arch.ctxt; \ + u64 __v = ctxt_sys_reg(ctxt, (r)); \ + __v op (val); \ + if (vcpu_has_nv((v)) && (r) >= __SANITISED_REG_START__) \ + __v = kvm_vcpu_apply_reg_masks((v), (r), __v); \ + \ + ctxt_sys_reg(ctxt, (r)) = __v; \ + } while (0) + #define __vcpu_sys_reg(v,r) \ - (*({ \ + ({ \ const struct kvm_cpu_context *ctxt = &(v)->arch.ctxt; \ - u64 *__r = __ctxt_sys_reg(ctxt, (r)); \ + u64 __v = ctxt_sys_reg(ctxt, (r)); \ if (vcpu_has_nv((v)) && (r) >= __SANITISED_REG_START__) \ - *__r = kvm_vcpu_apply_reg_masks((v), (r), *__r);\ - __r; \ - })) + __v = kvm_vcpu_apply_reg_masks((v), (r), __v); \ + __v; \ + }) u64 vcpu_read_sys_reg(const struct kvm_vcpu *vcpu, int reg); void vcpu_write_sys_reg(struct kvm_vcpu *vcpu, u64 val, int reg); @@ -1267,9 +1289,8 @@ void kvm_arm_resume_guest(struct kvm *kvm); }) /* - * The couple of isb() below are there to guarantee the same behaviour - * on VHE as on !VHE, where the eret to EL1 acts as a context - * synchronization event. + * The isb() below is there to guarantee the same behaviour on VHE as on !VHE, + * where the eret to EL1 acts as a context synchronization event. */ #define kvm_call_hyp(f, ...) \ do { \ @@ -1287,7 +1308,6 @@ void kvm_arm_resume_guest(struct kvm *kvm); \ if (has_vhe()) { \ ret = f(__VA_ARGS__); \ - isb(); \ } else { \ ret = kvm_call_hyp_nvhe(f, ##__VA_ARGS__); \ } \ diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h index 88db8a0c0b37..192d86e1cc76 100644 --- a/arch/arm64/include/asm/pgtable.h +++ b/arch/arm64/include/asm/pgtable.h @@ -563,7 +563,7 @@ static inline pte_t pte_swp_mkexclusive(pte_t pte) return set_pte_bit(pte, __pgprot(PTE_SWP_EXCLUSIVE)); } -static inline int pte_swp_exclusive(pte_t pte) +static inline bool pte_swp_exclusive(pte_t pte) { return pte_val(pte) & PTE_SWP_EXCLUSIVE; } diff --git a/arch/arm64/kernel/process.c b/arch/arm64/kernel/process.c index a5ca15daeb8a..5954cec19660 100644 --- a/arch/arm64/kernel/process.c +++ b/arch/arm64/kernel/process.c @@ -288,7 +288,9 @@ static void flush_gcs(void) if (!system_supports_gcs()) return; - gcs_free(current); + current->thread.gcspr_el0 = 0; + current->thread.gcs_base = 0; + current->thread.gcs_size = 0; current->thread.gcs_el0_mode = 0; write_sysreg_s(GCSCRE0_EL1_nTR, SYS_GCSCRE0_EL1); write_sysreg_s(0, SYS_GCSPR_EL0); diff --git a/arch/arm64/kernel/ptrace.c b/arch/arm64/kernel/ptrace.c index a360e52db02f..ee94b72bf8fb 100644 --- a/arch/arm64/kernel/ptrace.c +++ b/arch/arm64/kernel/ptrace.c @@ -141,7 +141,7 @@ unsigned long regs_get_kernel_stack_nth(struct pt_regs *regs, unsigned int n) addr += n; if (regs_within_kernel_stack(regs, (unsigned long)addr)) - return *addr; + return READ_ONCE_NOCHECK(*addr); else return 0; } diff --git a/arch/arm64/kvm/arch_timer.c b/arch/arm64/kvm/arch_timer.c index fdbc8beec930..701ea10a63f1 100644 --- a/arch/arm64/kvm/arch_timer.c +++ b/arch/arm64/kvm/arch_timer.c @@ -108,16 +108,16 @@ static void timer_set_ctl(struct arch_timer_context *ctxt, u32 ctl) switch(arch_timer_ctx_index(ctxt)) { case TIMER_VTIMER: - __vcpu_sys_reg(vcpu, CNTV_CTL_EL0) = ctl; + __vcpu_assign_sys_reg(vcpu, CNTV_CTL_EL0, ctl); break; case TIMER_PTIMER: - __vcpu_sys_reg(vcpu, CNTP_CTL_EL0) = ctl; + __vcpu_assign_sys_reg(vcpu, CNTP_CTL_EL0, ctl); break; case TIMER_HVTIMER: - __vcpu_sys_reg(vcpu, CNTHV_CTL_EL2) = ctl; + __vcpu_assign_sys_reg(vcpu, CNTHV_CTL_EL2, ctl); break; case TIMER_HPTIMER: - __vcpu_sys_reg(vcpu, CNTHP_CTL_EL2) = ctl; + __vcpu_assign_sys_reg(vcpu, CNTHP_CTL_EL2, ctl); break; default: WARN_ON(1); @@ -130,16 +130,16 @@ static void timer_set_cval(struct arch_timer_context *ctxt, u64 cval) switch(arch_timer_ctx_index(ctxt)) { case TIMER_VTIMER: - __vcpu_sys_reg(vcpu, CNTV_CVAL_EL0) = cval; + __vcpu_assign_sys_reg(vcpu, CNTV_CVAL_EL0, cval); break; case TIMER_PTIMER: - __vcpu_sys_reg(vcpu, CNTP_CVAL_EL0) = cval; + __vcpu_assign_sys_reg(vcpu, CNTP_CVAL_EL0, cval); break; case TIMER_HVTIMER: - __vcpu_sys_reg(vcpu, CNTHV_CVAL_EL2) = cval; + __vcpu_assign_sys_reg(vcpu, CNTHV_CVAL_EL2, cval); break; case TIMER_HPTIMER: - __vcpu_sys_reg(vcpu, CNTHP_CVAL_EL2) = cval; + __vcpu_assign_sys_reg(vcpu, CNTHP_CVAL_EL2, cval); break; default: WARN_ON(1); @@ -1036,7 +1036,7 @@ void kvm_timer_vcpu_reset(struct kvm_vcpu *vcpu) if (vcpu_has_nv(vcpu)) { struct arch_timer_offset *offs = &vcpu_vtimer(vcpu)->offset; - offs->vcpu_offset = &__vcpu_sys_reg(vcpu, CNTVOFF_EL2); + offs->vcpu_offset = __ctxt_sys_reg(&vcpu->arch.ctxt, CNTVOFF_EL2); offs->vm_offset = &vcpu->kvm->arch.timer_data.poffset; } diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c index de2b4e9c9f9f..38a91bb5d4c7 100644 --- a/arch/arm64/kvm/arm.c +++ b/arch/arm64/kvm/arm.c @@ -2764,7 +2764,8 @@ void kvm_arch_irq_bypass_del_producer(struct irq_bypass_consumer *cons, bool kvm_arch_irqfd_route_changed(struct kvm_kernel_irq_routing_entry *old, struct kvm_kernel_irq_routing_entry *new) { - if (new->type != KVM_IRQ_ROUTING_MSI) + if (old->type != KVM_IRQ_ROUTING_MSI || + new->type != KVM_IRQ_ROUTING_MSI) return true; return memcmp(&old->msi, &new->msi, sizeof(new->msi)); diff --git a/arch/arm64/kvm/debug.c b/arch/arm64/kvm/debug.c index 0e4c805e7e89..1a7dab333f55 100644 --- a/arch/arm64/kvm/debug.c +++ b/arch/arm64/kvm/debug.c @@ -216,9 +216,9 @@ void kvm_debug_set_guest_ownership(struct kvm_vcpu *vcpu) void kvm_debug_handle_oslar(struct kvm_vcpu *vcpu, u64 val) { if (val & OSLAR_EL1_OSLK) - __vcpu_sys_reg(vcpu, OSLSR_EL1) |= OSLSR_EL1_OSLK; + __vcpu_rmw_sys_reg(vcpu, OSLSR_EL1, |=, OSLSR_EL1_OSLK); else - __vcpu_sys_reg(vcpu, OSLSR_EL1) &= ~OSLSR_EL1_OSLK; + __vcpu_rmw_sys_reg(vcpu, OSLSR_EL1, &=, ~OSLSR_EL1_OSLK); preempt_disable(); kvm_arch_vcpu_put(vcpu); diff --git a/arch/arm64/kvm/fpsimd.c b/arch/arm64/kvm/fpsimd.c index 7f6e43d25691..8f6c8f57c6b9 100644 --- a/arch/arm64/kvm/fpsimd.c +++ b/arch/arm64/kvm/fpsimd.c @@ -103,8 +103,8 @@ void kvm_arch_vcpu_ctxsync_fp(struct kvm_vcpu *vcpu) fp_state.sve_state = vcpu->arch.sve_state; fp_state.sve_vl = vcpu->arch.sve_max_vl; fp_state.sme_state = NULL; - fp_state.svcr = &__vcpu_sys_reg(vcpu, SVCR); - fp_state.fpmr = &__vcpu_sys_reg(vcpu, FPMR); + fp_state.svcr = __ctxt_sys_reg(&vcpu->arch.ctxt, SVCR); + fp_state.fpmr = __ctxt_sys_reg(&vcpu->arch.ctxt, FPMR); fp_state.fp_type = &vcpu->arch.fp_type; if (vcpu_has_sve(vcpu)) diff --git a/arch/arm64/kvm/hyp/exception.c b/arch/arm64/kvm/hyp/exception.c index 424a5107cddb..6a2a899a344e 100644 --- a/arch/arm64/kvm/hyp/exception.c +++ b/arch/arm64/kvm/hyp/exception.c @@ -37,7 +37,7 @@ static inline void __vcpu_write_sys_reg(struct kvm_vcpu *vcpu, u64 val, int reg) if (unlikely(vcpu_has_nv(vcpu))) vcpu_write_sys_reg(vcpu, val, reg); else if (!__vcpu_write_sys_reg_to_cpu(val, reg)) - __vcpu_sys_reg(vcpu, reg) = val; + __vcpu_assign_sys_reg(vcpu, reg, val); } static void __vcpu_write_spsr(struct kvm_vcpu *vcpu, unsigned long target_mode, @@ -51,7 +51,7 @@ static void __vcpu_write_spsr(struct kvm_vcpu *vcpu, unsigned long target_mode, } else if (has_vhe()) { write_sysreg_el1(val, SYS_SPSR); } else { - __vcpu_sys_reg(vcpu, SPSR_EL1) = val; + __vcpu_assign_sys_reg(vcpu, SPSR_EL1, val); } } diff --git a/arch/arm64/kvm/hyp/include/hyp/switch.h b/arch/arm64/kvm/hyp/include/hyp/switch.h index bb9f2eecfb67..2ad57b117385 100644 --- a/arch/arm64/kvm/hyp/include/hyp/switch.h +++ b/arch/arm64/kvm/hyp/include/hyp/switch.h @@ -45,7 +45,7 @@ static inline void __fpsimd_save_fpexc32(struct kvm_vcpu *vcpu) if (!vcpu_el1_is_32bit(vcpu)) return; - __vcpu_sys_reg(vcpu, FPEXC32_EL2) = read_sysreg(fpexc32_el2); + __vcpu_assign_sys_reg(vcpu, FPEXC32_EL2, read_sysreg(fpexc32_el2)); } static inline void __activate_traps_fpsimd32(struct kvm_vcpu *vcpu) @@ -65,6 +65,136 @@ static inline void __activate_traps_fpsimd32(struct kvm_vcpu *vcpu) } } +static inline void __activate_cptr_traps_nvhe(struct kvm_vcpu *vcpu) +{ + u64 val = CPTR_NVHE_EL2_RES1 | CPTR_EL2_TAM | CPTR_EL2_TTA; + + /* + * Always trap SME since it's not supported in KVM. + * TSM is RES1 if SME isn't implemented. + */ + val |= CPTR_EL2_TSM; + + if (!vcpu_has_sve(vcpu) || !guest_owns_fp_regs()) + val |= CPTR_EL2_TZ; + + if (!guest_owns_fp_regs()) + val |= CPTR_EL2_TFP; + + write_sysreg(val, cptr_el2); +} + +static inline void __activate_cptr_traps_vhe(struct kvm_vcpu *vcpu) +{ + /* + * With VHE (HCR.E2H == 1), accesses to CPACR_EL1 are routed to + * CPTR_EL2. In general, CPACR_EL1 has the same layout as CPTR_EL2, + * except for some missing controls, such as TAM. + * In this case, CPTR_EL2.TAM has the same position with or without + * VHE (HCR.E2H == 1) which allows us to use here the CPTR_EL2.TAM + * shift value for trapping the AMU accesses. + */ + u64 val = CPTR_EL2_TAM | CPACR_EL1_TTA; + u64 cptr; + + if (guest_owns_fp_regs()) { + val |= CPACR_EL1_FPEN; + if (vcpu_has_sve(vcpu)) + val |= CPACR_EL1_ZEN; + } + + if (!vcpu_has_nv(vcpu)) + goto write; + + /* + * The architecture is a bit crap (what a surprise): an EL2 guest + * writing to CPTR_EL2 via CPACR_EL1 can't set any of TCPAC or TTA, + * as they are RES0 in the guest's view. To work around it, trap the + * sucker using the very same bit it can't set... + */ + if (vcpu_el2_e2h_is_set(vcpu) && is_hyp_ctxt(vcpu)) + val |= CPTR_EL2_TCPAC; + + /* + * Layer the guest hypervisor's trap configuration on top of our own if + * we're in a nested context. + */ + if (is_hyp_ctxt(vcpu)) + goto write; + + cptr = vcpu_sanitised_cptr_el2(vcpu); + + /* + * Pay attention, there's some interesting detail here. + * + * The CPTR_EL2.xEN fields are 2 bits wide, although there are only two + * meaningful trap states when HCR_EL2.TGE = 0 (running a nested guest): + * + * - CPTR_EL2.xEN = x0, traps are enabled + * - CPTR_EL2.xEN = x1, traps are disabled + * + * In other words, bit[0] determines if guest accesses trap or not. In + * the interest of simplicity, clear the entire field if the guest + * hypervisor has traps enabled to dispel any illusion of something more + * complicated taking place. + */ + if (!(SYS_FIELD_GET(CPACR_EL1, FPEN, cptr) & BIT(0))) + val &= ~CPACR_EL1_FPEN; + if (!(SYS_FIELD_GET(CPACR_EL1, ZEN, cptr) & BIT(0))) + val &= ~CPACR_EL1_ZEN; + + if (kvm_has_feat(vcpu->kvm, ID_AA64MMFR3_EL1, S2POE, IMP)) + val |= cptr & CPACR_EL1_E0POE; + + val |= cptr & CPTR_EL2_TCPAC; + +write: + write_sysreg(val, cpacr_el1); +} + +static inline void __activate_cptr_traps(struct kvm_vcpu *vcpu) +{ + if (!guest_owns_fp_regs()) + __activate_traps_fpsimd32(vcpu); + + if (has_vhe() || has_hvhe()) + __activate_cptr_traps_vhe(vcpu); + else + __activate_cptr_traps_nvhe(vcpu); +} + +static inline void __deactivate_cptr_traps_nvhe(struct kvm_vcpu *vcpu) +{ + u64 val = CPTR_NVHE_EL2_RES1; + + if (!cpus_have_final_cap(ARM64_SVE)) + val |= CPTR_EL2_TZ; + if (!cpus_have_final_cap(ARM64_SME)) + val |= CPTR_EL2_TSM; + + write_sysreg(val, cptr_el2); +} + +static inline void __deactivate_cptr_traps_vhe(struct kvm_vcpu *vcpu) +{ + u64 val = CPACR_EL1_FPEN; + + if (cpus_have_final_cap(ARM64_SVE)) + val |= CPACR_EL1_ZEN; + if (cpus_have_final_cap(ARM64_SME)) + val |= CPACR_EL1_SMEN; + + write_sysreg(val, cpacr_el1); +} + +static inline void __deactivate_cptr_traps(struct kvm_vcpu *vcpu) +{ + if (has_vhe() || has_hvhe()) + __deactivate_cptr_traps_vhe(vcpu); + else + __deactivate_cptr_traps_nvhe(vcpu); +} + #define reg_to_fgt_masks(reg) \ ({ \ struct fgt_masks *m; \ @@ -456,7 +586,7 @@ static inline void fpsimd_lazy_switch_to_host(struct kvm_vcpu *vcpu) */ if (vcpu_has_sve(vcpu)) { zcr_el1 = read_sysreg_el1(SYS_ZCR); - __vcpu_sys_reg(vcpu, vcpu_sve_zcr_elx(vcpu)) = zcr_el1; + __vcpu_assign_sys_reg(vcpu, vcpu_sve_zcr_elx(vcpu), zcr_el1); /* * The guest's state is always saved using the guest's max VL. @@ -486,11 +616,6 @@ static void kvm_hyp_save_fpsimd_host(struct kvm_vcpu *vcpu) */ if (system_supports_sve()) { __hyp_sve_save_host(); - - /* Re-enable SVE traps if not supported for the guest vcpu. */ - if (!vcpu_has_sve(vcpu)) - cpacr_clear_set(CPACR_EL1_ZEN, 0); - } else { __fpsimd_save_state(host_data_ptr(host_ctxt.fp_regs)); } @@ -541,10 +666,7 @@ static inline bool kvm_hyp_handle_fpsimd(struct kvm_vcpu *vcpu, u64 *exit_code) /* Valid trap. Switch the context: */ /* First disable enough traps to allow us to update the registers */ - if (sve_guest || (is_protected_kvm_enabled() && system_supports_sve())) - cpacr_clear_set(0, CPACR_EL1_FPEN | CPACR_EL1_ZEN); - else - cpacr_clear_set(0, CPACR_EL1_FPEN); + __deactivate_cptr_traps(vcpu); isb(); /* Write out the host state if it's in the registers */ @@ -566,6 +688,13 @@ static inline bool kvm_hyp_handle_fpsimd(struct kvm_vcpu *vcpu, u64 *exit_code) *host_data_ptr(fp_owner) = FP_STATE_GUEST_OWNED; + /* + * Re-enable traps necessary for the current state of the guest, e.g. + * those enabled by a guest hypervisor. The ERET to the guest will + * provide the necessary context synchronization. + */ + __activate_cptr_traps(vcpu); + return true; } diff --git a/arch/arm64/kvm/hyp/include/hyp/sysreg-sr.h b/arch/arm64/kvm/hyp/include/hyp/sysreg-sr.h index b9cff893bbe0..4d0dbea4c56f 100644 --- a/arch/arm64/kvm/hyp/include/hyp/sysreg-sr.h +++ b/arch/arm64/kvm/hyp/include/hyp/sysreg-sr.h @@ -307,11 +307,11 @@ static inline void __sysreg32_save_state(struct kvm_vcpu *vcpu) vcpu->arch.ctxt.spsr_irq = read_sysreg(spsr_irq); vcpu->arch.ctxt.spsr_fiq = read_sysreg(spsr_fiq); - __vcpu_sys_reg(vcpu, DACR32_EL2) = read_sysreg(dacr32_el2); - __vcpu_sys_reg(vcpu, IFSR32_EL2) = read_sysreg(ifsr32_el2); + __vcpu_assign_sys_reg(vcpu, DACR32_EL2, read_sysreg(dacr32_el2)); + __vcpu_assign_sys_reg(vcpu, IFSR32_EL2, read_sysreg(ifsr32_el2)); if (has_vhe() || kvm_debug_regs_in_use(vcpu)) - __vcpu_sys_reg(vcpu, DBGVCR32_EL2) = read_sysreg(dbgvcr32_el2); + __vcpu_assign_sys_reg(vcpu, DBGVCR32_EL2, read_sysreg(dbgvcr32_el2)); } static inline void __sysreg32_restore_state(struct kvm_vcpu *vcpu) diff --git a/arch/arm64/kvm/hyp/nvhe/hyp-main.c b/arch/arm64/kvm/hyp/nvhe/hyp-main.c index 8e8848de4d47..3206b2c07f82 100644 --- a/arch/arm64/kvm/hyp/nvhe/hyp-main.c +++ b/arch/arm64/kvm/hyp/nvhe/hyp-main.c @@ -26,7 +26,7 @@ void __kvm_hyp_host_forward_smc(struct kvm_cpu_context *host_ctxt); static void __hyp_sve_save_guest(struct kvm_vcpu *vcpu) { - __vcpu_sys_reg(vcpu, ZCR_EL1) = read_sysreg_el1(SYS_ZCR); + __vcpu_assign_sys_reg(vcpu, ZCR_EL1, read_sysreg_el1(SYS_ZCR)); /* * On saving/restoring guest sve state, always use the maximum VL for * the guest. The layout of the data when saving the sve state depends @@ -69,7 +69,10 @@ static void fpsimd_sve_sync(struct kvm_vcpu *vcpu) if (!guest_owns_fp_regs()) return; - cpacr_clear_set(0, CPACR_EL1_FPEN | CPACR_EL1_ZEN); + /* + * Traps have been disabled by __deactivate_cptr_traps(), but there + * hasn't necessarily been a context synchronization event yet. + */ isb(); if (vcpu_has_sve(vcpu)) @@ -79,7 +82,7 @@ static void fpsimd_sve_sync(struct kvm_vcpu *vcpu) has_fpmr = kvm_has_fpmr(kern_hyp_va(vcpu->kvm)); if (has_fpmr) - __vcpu_sys_reg(vcpu, FPMR) = read_sysreg_s(SYS_FPMR); + __vcpu_assign_sys_reg(vcpu, FPMR, read_sysreg_s(SYS_FPMR)); if (system_supports_sve()) __hyp_sve_restore_host(); diff --git a/arch/arm64/kvm/hyp/nvhe/switch.c b/arch/arm64/kvm/hyp/nvhe/switch.c index 73affe1333a4..0e752b515d0f 100644 --- a/arch/arm64/kvm/hyp/nvhe/switch.c +++ b/arch/arm64/kvm/hyp/nvhe/switch.c @@ -47,65 +47,6 @@ struct fgt_masks hdfgwtr2_masks; extern void kvm_nvhe_prepare_backtrace(unsigned long fp, unsigned long pc); -static void __activate_cptr_traps(struct kvm_vcpu *vcpu) -{ - u64 val = CPTR_EL2_TAM; /* Same bit irrespective of E2H */ - - if (!guest_owns_fp_regs()) - __activate_traps_fpsimd32(vcpu); - - if (has_hvhe()) { - val |= CPACR_EL1_TTA; - - if (guest_owns_fp_regs()) { - val |= CPACR_EL1_FPEN; - if (vcpu_has_sve(vcpu)) - val |= CPACR_EL1_ZEN; - } - - write_sysreg(val, cpacr_el1); - } else { - val |= CPTR_EL2_TTA | CPTR_NVHE_EL2_RES1; - - /* - * Always trap SME since it's not supported in KVM. - * TSM is RES1 if SME isn't implemented. - */ - val |= CPTR_EL2_TSM; - - if (!vcpu_has_sve(vcpu) || !guest_owns_fp_regs()) - val |= CPTR_EL2_TZ; - - if (!guest_owns_fp_regs()) - val |= CPTR_EL2_TFP; - - write_sysreg(val, cptr_el2); - } -} - -static void __deactivate_cptr_traps(struct kvm_vcpu *vcpu) -{ - if (has_hvhe()) { - u64 val = CPACR_EL1_FPEN; - - if (cpus_have_final_cap(ARM64_SVE)) - val |= CPACR_EL1_ZEN; - if (cpus_have_final_cap(ARM64_SME)) - val |= CPACR_EL1_SMEN; - - write_sysreg(val, cpacr_el1); - } else { - u64 val = CPTR_NVHE_EL2_RES1; - - if (!cpus_have_final_cap(ARM64_SVE)) - val |= CPTR_EL2_TZ; - if (!cpus_have_final_cap(ARM64_SME)) - val |= CPTR_EL2_TSM; - - write_sysreg(val, cptr_el2); - } -} - static void __activate_traps(struct kvm_vcpu *vcpu) { ___activate_traps(vcpu, vcpu->arch.hcr_el2); diff --git a/arch/arm64/kvm/hyp/vhe/switch.c b/arch/arm64/kvm/hyp/vhe/switch.c index c9b330dc2066..477f1580ffea 100644 --- a/arch/arm64/kvm/hyp/vhe/switch.c +++ b/arch/arm64/kvm/hyp/vhe/switch.c @@ -90,87 +90,6 @@ static u64 __compute_hcr(struct kvm_vcpu *vcpu) return hcr | (guest_hcr & ~NV_HCR_GUEST_EXCLUDE); } -static void __activate_cptr_traps(struct kvm_vcpu *vcpu) -{ - u64 cptr; - - /* - * With VHE (HCR.E2H == 1), accesses to CPACR_EL1 are routed to - * CPTR_EL2. In general, CPACR_EL1 has the same layout as CPTR_EL2, - * except for some missing controls, such as TAM. - * In this case, CPTR_EL2.TAM has the same position with or without - * VHE (HCR.E2H == 1) which allows us to use here the CPTR_EL2.TAM - * shift value for trapping the AMU accesses. - */ - u64 val = CPACR_EL1_TTA | CPTR_EL2_TAM; - - if (guest_owns_fp_regs()) { - val |= CPACR_EL1_FPEN; - if (vcpu_has_sve(vcpu)) - val |= CPACR_EL1_ZEN; - } else { - __activate_traps_fpsimd32(vcpu); - } - - if (!vcpu_has_nv(vcpu)) - goto write; - - /* - * The architecture is a bit crap (what a surprise): an EL2 guest - * writing to CPTR_EL2 via CPACR_EL1 can't set any of TCPAC or TTA, - * as they are RES0 in the guest's view. To work around it, trap the - * sucker using the very same bit it can't set... - */ - if (vcpu_el2_e2h_is_set(vcpu) && is_hyp_ctxt(vcpu)) - val |= CPTR_EL2_TCPAC; - - /* - * Layer the guest hypervisor's trap configuration on top of our own if - * we're in a nested context. - */ - if (is_hyp_ctxt(vcpu)) - goto write; - - cptr = vcpu_sanitised_cptr_el2(vcpu); - - /* - * Pay attention, there's some interesting detail here. - * - * The CPTR_EL2.xEN fields are 2 bits wide, although there are only two - * meaningful trap states when HCR_EL2.TGE = 0 (running a nested guest): - * - * - CPTR_EL2.xEN = x0, traps are enabled - * - CPTR_EL2.xEN = x1, traps are disabled - * - * In other words, bit[0] determines if guest accesses trap or not. In - * the interest of simplicity, clear the entire field if the guest - * hypervisor has traps enabled to dispel any illusion of something more - * complicated taking place. - */ - if (!(SYS_FIELD_GET(CPACR_EL1, FPEN, cptr) & BIT(0))) - val &= ~CPACR_EL1_FPEN; - if (!(SYS_FIELD_GET(CPACR_EL1, ZEN, cptr) & BIT(0))) - val &= ~CPACR_EL1_ZEN; - - if (kvm_has_feat(vcpu->kvm, ID_AA64MMFR3_EL1, S2POE, IMP)) - val |= cptr & CPACR_EL1_E0POE; - - val |= cptr & CPTR_EL2_TCPAC; - -write: - write_sysreg(val, cpacr_el1); -} - -static void __deactivate_cptr_traps(struct kvm_vcpu *vcpu) -{ - u64 val = CPACR_EL1_FPEN | CPACR_EL1_ZEN_EL1EN; - - if (cpus_have_final_cap(ARM64_SME)) - val |= CPACR_EL1_SMEN_EL1EN; - - write_sysreg(val, cpacr_el1); -} - static void __activate_traps(struct kvm_vcpu *vcpu) { u64 val; @@ -223,9 +142,9 @@ static void __deactivate_traps(struct kvm_vcpu *vcpu) */ val = read_sysreg_el0(SYS_CNTP_CVAL); if (map.direct_ptimer == vcpu_ptimer(vcpu)) - __vcpu_sys_reg(vcpu, CNTP_CVAL_EL0) = val; + __vcpu_assign_sys_reg(vcpu, CNTP_CVAL_EL0, val); if (map.direct_ptimer == vcpu_hptimer(vcpu)) - __vcpu_sys_reg(vcpu, CNTHP_CVAL_EL2) = val; + __vcpu_assign_sys_reg(vcpu, CNTHP_CVAL_EL2, val); offset = read_sysreg_s(SYS_CNTPOFF_EL2); @@ -639,10 +558,10 @@ static int __kvm_vcpu_run_vhe(struct kvm_vcpu *vcpu) host_ctxt = host_data_ptr(host_ctxt); guest_ctxt = &vcpu->arch.ctxt; - sysreg_save_host_state_vhe(host_ctxt); - fpsimd_lazy_switch_to_guest(vcpu); + sysreg_save_host_state_vhe(host_ctxt); + /* * Note that ARM erratum 1165522 requires us to configure both stage 1 * and stage 2 translation for the guest context before we clear @@ -667,15 +586,23 @@ static int __kvm_vcpu_run_vhe(struct kvm_vcpu *vcpu) __deactivate_traps(vcpu); - fpsimd_lazy_switch_to_host(vcpu); - sysreg_restore_host_state_vhe(host_ctxt); + __debug_switch_to_host(vcpu); + + /* + * Ensure that all system register writes above have taken effect + * before returning to the host. In VHE mode, CPTR traps for + * FPSIMD/SVE/SME also apply to EL2, so FPSIMD/SVE/SME state must be + * manipulated after the ISB. + */ + isb(); + + fpsimd_lazy_switch_to_host(vcpu); + if (guest_owns_fp_regs()) __fpsimd_save_fpexc32(vcpu); - __debug_switch_to_host(vcpu); - return exit_code; } NOKPROBE_SYMBOL(__kvm_vcpu_run_vhe); @@ -705,12 +632,6 @@ int __kvm_vcpu_run(struct kvm_vcpu *vcpu) */ local_daif_restore(DAIF_PROCCTX_NOIRQ); - /* - * When we exit from the guest we change a number of CPU configuration - * parameters, such as traps. We rely on the isb() in kvm_call_hyp*() - * to make sure these changes take effect before running the host or - * additional guests. - */ return ret; } diff --git a/arch/arm64/kvm/hyp/vhe/sysreg-sr.c b/arch/arm64/kvm/hyp/vhe/sysreg-sr.c index 3814b0b2c937..73e4bc7fde9e 100644 --- a/arch/arm64/kvm/hyp/vhe/sysreg-sr.c +++ b/arch/arm64/kvm/hyp/vhe/sysreg-sr.c @@ -18,17 +18,17 @@ static void __sysreg_save_vel2_state(struct kvm_vcpu *vcpu) { /* These registers are common with EL1 */ - __vcpu_sys_reg(vcpu, PAR_EL1) = read_sysreg(par_el1); - __vcpu_sys_reg(vcpu, TPIDR_EL1) = read_sysreg(tpidr_el1); - - __vcpu_sys_reg(vcpu, ESR_EL2) = read_sysreg_el1(SYS_ESR); - __vcpu_sys_reg(vcpu, AFSR0_EL2) = read_sysreg_el1(SYS_AFSR0); - __vcpu_sys_reg(vcpu, AFSR1_EL2) = read_sysreg_el1(SYS_AFSR1); - __vcpu_sys_reg(vcpu, FAR_EL2) = read_sysreg_el1(SYS_FAR); - __vcpu_sys_reg(vcpu, MAIR_EL2) = read_sysreg_el1(SYS_MAIR); - __vcpu_sys_reg(vcpu, VBAR_EL2) = read_sysreg_el1(SYS_VBAR); - __vcpu_sys_reg(vcpu, CONTEXTIDR_EL2) = read_sysreg_el1(SYS_CONTEXTIDR); - __vcpu_sys_reg(vcpu, AMAIR_EL2) = read_sysreg_el1(SYS_AMAIR); + __vcpu_assign_sys_reg(vcpu, PAR_EL1, read_sysreg(par_el1)); + __vcpu_assign_sys_reg(vcpu, TPIDR_EL1, read_sysreg(tpidr_el1)); + + __vcpu_assign_sys_reg(vcpu, ESR_EL2, read_sysreg_el1(SYS_ESR)); + __vcpu_assign_sys_reg(vcpu, AFSR0_EL2, read_sysreg_el1(SYS_AFSR0)); + __vcpu_assign_sys_reg(vcpu, AFSR1_EL2, read_sysreg_el1(SYS_AFSR1)); + __vcpu_assign_sys_reg(vcpu, FAR_EL2, read_sysreg_el1(SYS_FAR)); + __vcpu_assign_sys_reg(vcpu, MAIR_EL2, read_sysreg_el1(SYS_MAIR)); + __vcpu_assign_sys_reg(vcpu, VBAR_EL2, read_sysreg_el1(SYS_VBAR)); + __vcpu_assign_sys_reg(vcpu, CONTEXTIDR_EL2, read_sysreg_el1(SYS_CONTEXTIDR)); + __vcpu_assign_sys_reg(vcpu, AMAIR_EL2, read_sysreg_el1(SYS_AMAIR)); /* * In VHE mode those registers are compatible between EL1 and EL2, @@ -46,21 +46,21 @@ static void __sysreg_save_vel2_state(struct kvm_vcpu *vcpu) * are always trapped, ensuring that the in-memory * copy is always up-to-date. A small blessing... */ - __vcpu_sys_reg(vcpu, SCTLR_EL2) = read_sysreg_el1(SYS_SCTLR); - __vcpu_sys_reg(vcpu, TTBR0_EL2) = read_sysreg_el1(SYS_TTBR0); - __vcpu_sys_reg(vcpu, TTBR1_EL2) = read_sysreg_el1(SYS_TTBR1); - __vcpu_sys_reg(vcpu, TCR_EL2) = read_sysreg_el1(SYS_TCR); + __vcpu_assign_sys_reg(vcpu, SCTLR_EL2, read_sysreg_el1(SYS_SCTLR)); + __vcpu_assign_sys_reg(vcpu, TTBR0_EL2, read_sysreg_el1(SYS_TTBR0)); + __vcpu_assign_sys_reg(vcpu, TTBR1_EL2, read_sysreg_el1(SYS_TTBR1)); + __vcpu_assign_sys_reg(vcpu, TCR_EL2, read_sysreg_el1(SYS_TCR)); if (ctxt_has_tcrx(&vcpu->arch.ctxt)) { - __vcpu_sys_reg(vcpu, TCR2_EL2) = read_sysreg_el1(SYS_TCR2); + __vcpu_assign_sys_reg(vcpu, TCR2_EL2, read_sysreg_el1(SYS_TCR2)); if (ctxt_has_s1pie(&vcpu->arch.ctxt)) { - __vcpu_sys_reg(vcpu, PIRE0_EL2) = read_sysreg_el1(SYS_PIRE0); - __vcpu_sys_reg(vcpu, PIR_EL2) = read_sysreg_el1(SYS_PIR); + __vcpu_assign_sys_reg(vcpu, PIRE0_EL2, read_sysreg_el1(SYS_PIRE0)); + __vcpu_assign_sys_reg(vcpu, PIR_EL2, read_sysreg_el1(SYS_PIR)); } if (ctxt_has_s1poe(&vcpu->arch.ctxt)) - __vcpu_sys_reg(vcpu, POR_EL2) = read_sysreg_el1(SYS_POR); + __vcpu_assign_sys_reg(vcpu, POR_EL2, read_sysreg_el1(SYS_POR)); } /* @@ -70,13 +70,13 @@ static void __sysreg_save_vel2_state(struct kvm_vcpu *vcpu) */ val = read_sysreg_el1(SYS_CNTKCTL); val &= CNTKCTL_VALID_BITS; - __vcpu_sys_reg(vcpu, CNTHCTL_EL2) &= ~CNTKCTL_VALID_BITS; - __vcpu_sys_reg(vcpu, CNTHCTL_EL2) |= val; + __vcpu_rmw_sys_reg(vcpu, CNTHCTL_EL2, &=, ~CNTKCTL_VALID_BITS); + __vcpu_rmw_sys_reg(vcpu, CNTHCTL_EL2, |=, val); } - __vcpu_sys_reg(vcpu, SP_EL2) = read_sysreg(sp_el1); - __vcpu_sys_reg(vcpu, ELR_EL2) = read_sysreg_el1(SYS_ELR); - __vcpu_sys_reg(vcpu, SPSR_EL2) = read_sysreg_el1(SYS_SPSR); + __vcpu_assign_sys_reg(vcpu, SP_EL2, read_sysreg(sp_el1)); + __vcpu_assign_sys_reg(vcpu, ELR_EL2, read_sysreg_el1(SYS_ELR)); + __vcpu_assign_sys_reg(vcpu, SPSR_EL2, read_sysreg_el1(SYS_SPSR)); } static void __sysreg_restore_vel2_state(struct kvm_vcpu *vcpu) diff --git a/arch/arm64/kvm/nested.c b/arch/arm64/kvm/nested.c index 4a53e4147fb0..5b191f4dc566 100644 --- a/arch/arm64/kvm/nested.c +++ b/arch/arm64/kvm/nested.c @@ -1757,7 +1757,7 @@ int kvm_init_nv_sysregs(struct kvm_vcpu *vcpu) out: for (enum vcpu_sysreg sr = __SANITISED_REG_START__; sr < NR_SYS_REGS; sr++) - (void)__vcpu_sys_reg(vcpu, sr); + __vcpu_rmw_sys_reg(vcpu, sr, |=, 0); return 0; } diff --git a/arch/arm64/kvm/pmu-emul.c b/arch/arm64/kvm/pmu-emul.c index 25c29107f13f..b03dbda7f1ab 100644 --- a/arch/arm64/kvm/pmu-emul.c +++ b/arch/arm64/kvm/pmu-emul.c @@ -178,7 +178,7 @@ static void kvm_pmu_set_pmc_value(struct kvm_pmc *pmc, u64 val, bool force) val |= lower_32_bits(val); } - __vcpu_sys_reg(vcpu, reg) = val; + __vcpu_assign_sys_reg(vcpu, reg, val); /* Recreate the perf event to reflect the updated sample_period */ kvm_pmu_create_perf_event(pmc); @@ -204,7 +204,7 @@ void kvm_pmu_set_counter_value(struct kvm_vcpu *vcpu, u64 select_idx, u64 val) void kvm_pmu_set_counter_value_user(struct kvm_vcpu *vcpu, u64 select_idx, u64 val) { kvm_pmu_release_perf_event(kvm_vcpu_idx_to_pmc(vcpu, select_idx)); - __vcpu_sys_reg(vcpu, counter_index_to_reg(select_idx)) = val; + __vcpu_assign_sys_reg(vcpu, counter_index_to_reg(select_idx), val); kvm_make_request(KVM_REQ_RELOAD_PMU, vcpu); } @@ -239,7 +239,7 @@ static void kvm_pmu_stop_counter(struct kvm_pmc *pmc) reg = counter_index_to_reg(pmc->idx); - __vcpu_sys_reg(vcpu, reg) = val; + __vcpu_assign_sys_reg(vcpu, reg, val); kvm_pmu_release_perf_event(pmc); } @@ -503,14 +503,14 @@ static void kvm_pmu_counter_increment(struct kvm_vcpu *vcpu, reg = __vcpu_sys_reg(vcpu, counter_index_to_reg(i)) + 1; if (!kvm_pmc_is_64bit(pmc)) reg = lower_32_bits(reg); - __vcpu_sys_reg(vcpu, counter_index_to_reg(i)) = reg; + __vcpu_assign_sys_reg(vcpu, counter_index_to_reg(i), reg); /* No overflow? move on */ if (kvm_pmc_has_64bit_overflow(pmc) ? reg : lower_32_bits(reg)) continue; /* Mark overflow */ - __vcpu_sys_reg(vcpu, PMOVSSET_EL0) |= BIT(i); + __vcpu_rmw_sys_reg(vcpu, PMOVSSET_EL0, |=, BIT(i)); if (kvm_pmu_counter_can_chain(pmc)) kvm_pmu_counter_increment(vcpu, BIT(i + 1), @@ -556,7 +556,7 @@ static void kvm_pmu_perf_overflow(struct perf_event *perf_event, perf_event->attr.sample_period = period; perf_event->hw.sample_period = period; - __vcpu_sys_reg(vcpu, PMOVSSET_EL0) |= BIT(idx); + __vcpu_rmw_sys_reg(vcpu, PMOVSSET_EL0, |=, BIT(idx)); if (kvm_pmu_counter_can_chain(pmc)) kvm_pmu_counter_increment(vcpu, BIT(idx + 1), @@ -602,7 +602,7 @@ void kvm_pmu_handle_pmcr(struct kvm_vcpu *vcpu, u64 val) kvm_make_request(KVM_REQ_RELOAD_PMU, vcpu); /* The reset bits don't indicate any state, and shouldn't be saved. */ - __vcpu_sys_reg(vcpu, PMCR_EL0) = val & ~(ARMV8_PMU_PMCR_C | ARMV8_PMU_PMCR_P); + __vcpu_assign_sys_reg(vcpu, PMCR_EL0, (val & ~(ARMV8_PMU_PMCR_C | ARMV8_PMU_PMCR_P))); if (val & ARMV8_PMU_PMCR_C) kvm_pmu_set_counter_value(vcpu, ARMV8_PMU_CYCLE_IDX, 0); @@ -779,7 +779,7 @@ void kvm_pmu_set_counter_event_type(struct kvm_vcpu *vcpu, u64 data, u64 reg; reg = counter_index_to_evtreg(pmc->idx); - __vcpu_sys_reg(vcpu, reg) = data & kvm_pmu_evtyper_mask(vcpu->kvm); + __vcpu_assign_sys_reg(vcpu, reg, (data & kvm_pmu_evtyper_mask(vcpu->kvm))); kvm_pmu_create_perf_event(pmc); } @@ -914,9 +914,9 @@ void kvm_vcpu_reload_pmu(struct kvm_vcpu *vcpu) { u64 mask = kvm_pmu_implemented_counter_mask(vcpu); - __vcpu_sys_reg(vcpu, PMOVSSET_EL0) &= mask; - __vcpu_sys_reg(vcpu, PMINTENSET_EL1) &= mask; - __vcpu_sys_reg(vcpu, PMCNTENSET_EL0) &= mask; + __vcpu_rmw_sys_reg(vcpu, PMOVSSET_EL0, &=, mask); + __vcpu_rmw_sys_reg(vcpu, PMINTENSET_EL1, &=, mask); + __vcpu_rmw_sys_reg(vcpu, PMCNTENSET_EL0, &=, mask); kvm_pmu_reprogram_counter_mask(vcpu, mask); } @@ -1038,7 +1038,7 @@ static void kvm_arm_set_nr_counters(struct kvm *kvm, unsigned int nr) u64 val = __vcpu_sys_reg(vcpu, MDCR_EL2); val &= ~MDCR_EL2_HPMN; val |= FIELD_PREP(MDCR_EL2_HPMN, kvm->arch.nr_pmu_counters); - __vcpu_sys_reg(vcpu, MDCR_EL2) = val; + __vcpu_assign_sys_reg(vcpu, MDCR_EL2, val); } } } diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c index a6cf2888d150..76c2f0da821f 100644 --- a/arch/arm64/kvm/sys_regs.c +++ b/arch/arm64/kvm/sys_regs.c @@ -228,7 +228,7 @@ void vcpu_write_sys_reg(struct kvm_vcpu *vcpu, u64 val, int reg) * to reverse-translate virtual EL2 system registers for a * non-VHE guest hypervisor. */ - __vcpu_sys_reg(vcpu, reg) = val; + __vcpu_assign_sys_reg(vcpu, reg, val); switch (reg) { case CNTHCTL_EL2: @@ -263,7 +263,7 @@ void vcpu_write_sys_reg(struct kvm_vcpu *vcpu, u64 val, int reg) return; memory_write: - __vcpu_sys_reg(vcpu, reg) = val; + __vcpu_assign_sys_reg(vcpu, reg, val); } /* CSSELR values; used to index KVM_REG_ARM_DEMUX_ID_CCSIDR */ @@ -605,7 +605,7 @@ static int set_oslsr_el1(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd, if ((val ^ rd->val) & ~OSLSR_EL1_OSLK) return -EINVAL; - __vcpu_sys_reg(vcpu, rd->reg) = val; + __vcpu_assign_sys_reg(vcpu, rd->reg, val); return 0; } @@ -791,7 +791,7 @@ static u64 reset_pmu_reg(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r) mask |= GENMASK(n - 1, 0); reset_unknown(vcpu, r); - __vcpu_sys_reg(vcpu, r->reg) &= mask; + __vcpu_rmw_sys_reg(vcpu, r->reg, &=, mask); return __vcpu_sys_reg(vcpu, r->reg); } @@ -799,7 +799,7 @@ static u64 reset_pmu_reg(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r) static u64 reset_pmevcntr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r) { reset_unknown(vcpu, r); - __vcpu_sys_reg(vcpu, r->reg) &= GENMASK(31, 0); + __vcpu_rmw_sys_reg(vcpu, r->reg, &=, GENMASK(31, 0)); return __vcpu_sys_reg(vcpu, r->reg); } @@ -811,7 +811,7 @@ static u64 reset_pmevtyper(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r) return 0; reset_unknown(vcpu, r); - __vcpu_sys_reg(vcpu, r->reg) &= kvm_pmu_evtyper_mask(vcpu->kvm); + __vcpu_rmw_sys_reg(vcpu, r->reg, &=, kvm_pmu_evtyper_mask(vcpu->kvm)); return __vcpu_sys_reg(vcpu, r->reg); } @@ -819,7 +819,7 @@ static u64 reset_pmevtyper(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r) static u64 reset_pmselr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r) { reset_unknown(vcpu, r); - __vcpu_sys_reg(vcpu, r->reg) &= PMSELR_EL0_SEL_MASK; + __vcpu_rmw_sys_reg(vcpu, r->reg, &=, PMSELR_EL0_SEL_MASK); return __vcpu_sys_reg(vcpu, r->reg); } @@ -835,7 +835,7 @@ static u64 reset_pmcr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r) * The value of PMCR.N field is included when the * vCPU register is read via kvm_vcpu_read_pmcr(). */ - __vcpu_sys_reg(vcpu, r->reg) = pmcr; + __vcpu_assign_sys_reg(vcpu, r->reg, pmcr); return __vcpu_sys_reg(vcpu, r->reg); } @@ -907,7 +907,7 @@ static bool access_pmselr(struct kvm_vcpu *vcpu, struct sys_reg_params *p, return false; if (p->is_write) - __vcpu_sys_reg(vcpu, PMSELR_EL0) = p->regval; + __vcpu_assign_sys_reg(vcpu, PMSELR_EL0, p->regval); else /* return PMSELR.SEL field */ p->regval = __vcpu_sys_reg(vcpu, PMSELR_EL0) @@ -1076,7 +1076,7 @@ static int set_pmreg(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r, u64 va { u64 mask = kvm_pmu_accessible_counter_mask(vcpu); - __vcpu_sys_reg(vcpu, r->reg) = val & mask; + __vcpu_assign_sys_reg(vcpu, r->reg, val & mask); kvm_make_request(KVM_REQ_RELOAD_PMU, vcpu); return 0; @@ -1103,10 +1103,10 @@ static bool access_pmcnten(struct kvm_vcpu *vcpu, struct sys_reg_params *p, val = p->regval & mask; if (r->Op2 & 0x1) /* accessing PMCNTENSET_EL0 */ - __vcpu_sys_reg(vcpu, PMCNTENSET_EL0) |= val; + __vcpu_rmw_sys_reg(vcpu, PMCNTENSET_EL0, |=, val); else /* accessing PMCNTENCLR_EL0 */ - __vcpu_sys_reg(vcpu, PMCNTENSET_EL0) &= ~val; + __vcpu_rmw_sys_reg(vcpu, PMCNTENSET_EL0, &=, ~val); kvm_pmu_reprogram_counter_mask(vcpu, val); } else { @@ -1129,10 +1129,10 @@ static bool access_pminten(struct kvm_vcpu *vcpu, struct sys_reg_params *p, if (r->Op2 & 0x1) /* accessing PMINTENSET_EL1 */ - __vcpu_sys_reg(vcpu, PMINTENSET_EL1) |= val; + __vcpu_rmw_sys_reg(vcpu, PMINTENSET_EL1, |=, val); else /* accessing PMINTENCLR_EL1 */ - __vcpu_sys_reg(vcpu, PMINTENSET_EL1) &= ~val; + __vcpu_rmw_sys_reg(vcpu, PMINTENSET_EL1, &=, ~val); } else { p->regval = __vcpu_sys_reg(vcpu, PMINTENSET_EL1); } @@ -1151,10 +1151,10 @@ static bool access_pmovs(struct kvm_vcpu *vcpu, struct sys_reg_params *p, if (p->is_write) { if (r->CRm & 0x2) /* accessing PMOVSSET_EL0 */ - __vcpu_sys_reg(vcpu, PMOVSSET_EL0) |= (p->regval & mask); + __vcpu_rmw_sys_reg(vcpu, PMOVSSET_EL0, |=, (p->regval & mask)); else /* accessing PMOVSCLR_EL0 */ - __vcpu_sys_reg(vcpu, PMOVSSET_EL0) &= ~(p->regval & mask); + __vcpu_rmw_sys_reg(vcpu, PMOVSSET_EL0, &=, ~(p->regval & mask)); } else { p->regval = __vcpu_sys_reg(vcpu, PMOVSSET_EL0); } @@ -1185,8 +1185,8 @@ static bool access_pmuserenr(struct kvm_vcpu *vcpu, struct sys_reg_params *p, if (!vcpu_mode_priv(vcpu)) return undef_access(vcpu, p, r); - __vcpu_sys_reg(vcpu, PMUSERENR_EL0) = - p->regval & ARMV8_PMU_USERENR_MASK; + __vcpu_assign_sys_reg(vcpu, PMUSERENR_EL0, + (p->regval & ARMV8_PMU_USERENR_MASK)); } else { p->regval = __vcpu_sys_reg(vcpu, PMUSERENR_EL0) & ARMV8_PMU_USERENR_MASK; @@ -1237,7 +1237,7 @@ static int set_pmcr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r, if (!kvm_supports_32bit_el0()) val |= ARMV8_PMU_PMCR_LC; - __vcpu_sys_reg(vcpu, r->reg) = val; + __vcpu_assign_sys_reg(vcpu, r->reg, val); kvm_make_request(KVM_REQ_RELOAD_PMU, vcpu); return 0; @@ -2213,7 +2213,7 @@ static u64 reset_clidr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r) if (kvm_has_mte(vcpu->kvm)) clidr |= 2ULL << CLIDR_TTYPE_SHIFT(loc); - __vcpu_sys_reg(vcpu, r->reg) = clidr; + __vcpu_assign_sys_reg(vcpu, r->reg, clidr); return __vcpu_sys_reg(vcpu, r->reg); } @@ -2227,7 +2227,7 @@ static int set_clidr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd, if ((val & CLIDR_EL1_RES0) || (!(ctr_el0 & CTR_EL0_IDC) && idc)) return -EINVAL; - __vcpu_sys_reg(vcpu, rd->reg) = val; + __vcpu_assign_sys_reg(vcpu, rd->reg, val); return 0; } @@ -2404,7 +2404,7 @@ static bool access_sp_el1(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r) { if (p->is_write) - __vcpu_sys_reg(vcpu, SP_EL1) = p->regval; + __vcpu_assign_sys_reg(vcpu, SP_EL1, p->regval); else p->regval = __vcpu_sys_reg(vcpu, SP_EL1); @@ -2428,7 +2428,7 @@ static bool access_spsr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r) { if (p->is_write) - __vcpu_sys_reg(vcpu, SPSR_EL1) = p->regval; + __vcpu_assign_sys_reg(vcpu, SPSR_EL1, p->regval); else p->regval = __vcpu_sys_reg(vcpu, SPSR_EL1); @@ -2440,7 +2440,7 @@ static bool access_cntkctl_el12(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r) { if (p->is_write) - __vcpu_sys_reg(vcpu, CNTKCTL_EL1) = p->regval; + __vcpu_assign_sys_reg(vcpu, CNTKCTL_EL1, p->regval); else p->regval = __vcpu_sys_reg(vcpu, CNTKCTL_EL1); @@ -2454,7 +2454,9 @@ static u64 reset_hcr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r) if (!cpus_have_final_cap(ARM64_HAS_HCR_NV1)) val |= HCR_E2H; - return __vcpu_sys_reg(vcpu, r->reg) = val; + __vcpu_assign_sys_reg(vcpu, r->reg, val); + + return __vcpu_sys_reg(vcpu, r->reg); } static unsigned int __el2_visibility(const struct kvm_vcpu *vcpu, @@ -2625,7 +2627,7 @@ static bool access_mdcr(struct kvm_vcpu *vcpu, u64_replace_bits(val, hpmn, MDCR_EL2_HPMN); } - __vcpu_sys_reg(vcpu, MDCR_EL2) = val; + __vcpu_assign_sys_reg(vcpu, MDCR_EL2, val); /* * Request a reload of the PMU to enable/disable the counters @@ -2754,7 +2756,7 @@ static int set_imp_id_reg(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r, static u64 reset_mdcr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r) { - __vcpu_sys_reg(vcpu, r->reg) = vcpu->kvm->arch.nr_pmu_counters; + __vcpu_assign_sys_reg(vcpu, r->reg, vcpu->kvm->arch.nr_pmu_counters); return vcpu->kvm->arch.nr_pmu_counters; } @@ -4790,7 +4792,7 @@ void kvm_reset_sys_regs(struct kvm_vcpu *vcpu) r->reset(vcpu, r); if (r->reg >= __SANITISED_REG_START__ && r->reg < NR_SYS_REGS) - (void)__vcpu_sys_reg(vcpu, r->reg); + __vcpu_rmw_sys_reg(vcpu, r->reg, |=, 0); } set_bit(KVM_ARCH_FLAG_ID_REGS_INITIALIZED, &kvm->arch.flags); @@ -5012,7 +5014,7 @@ int kvm_sys_reg_set_user(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg, if (r->set_user) { ret = (r->set_user)(vcpu, r, val); } else { - __vcpu_sys_reg(vcpu, r->reg) = val; + __vcpu_assign_sys_reg(vcpu, r->reg, val); ret = 0; } diff --git a/arch/arm64/kvm/sys_regs.h b/arch/arm64/kvm/sys_regs.h index cc6338d38766..ef97d9fc67cc 100644 --- a/arch/arm64/kvm/sys_regs.h +++ b/arch/arm64/kvm/sys_regs.h @@ -137,7 +137,7 @@ static inline u64 reset_unknown(struct kvm_vcpu *vcpu, { BUG_ON(!r->reg); BUG_ON(r->reg >= NR_SYS_REGS); - __vcpu_sys_reg(vcpu, r->reg) = 0x1de7ec7edbadc0deULL; + __vcpu_assign_sys_reg(vcpu, r->reg, 0x1de7ec7edbadc0deULL); return __vcpu_sys_reg(vcpu, r->reg); } @@ -145,7 +145,7 @@ static inline u64 reset_val(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r) { BUG_ON(!r->reg); BUG_ON(r->reg >= NR_SYS_REGS); - __vcpu_sys_reg(vcpu, r->reg) = r->val; + __vcpu_assign_sys_reg(vcpu, r->reg, r->val); return __vcpu_sys_reg(vcpu, r->reg); } diff --git a/arch/arm64/kvm/vgic/vgic-v3-nested.c b/arch/arm64/kvm/vgic/vgic-v3-nested.c index 4f6954c30674..a50fb7e6841f 100644 --- a/arch/arm64/kvm/vgic/vgic-v3-nested.c +++ b/arch/arm64/kvm/vgic/vgic-v3-nested.c @@ -36,6 +36,11 @@ struct shadow_if { static DEFINE_PER_CPU(struct shadow_if, shadow_if); +static int lr_map_idx_to_shadow_idx(struct shadow_if *shadow_if, int idx) +{ + return hweight16(shadow_if->lr_map & (BIT(idx) - 1)); +} + /* * Nesting GICv3 support * @@ -209,6 +214,29 @@ u64 vgic_v3_get_misr(struct kvm_vcpu *vcpu) return reg; } +static u64 translate_lr_pintid(struct kvm_vcpu *vcpu, u64 lr) +{ + struct vgic_irq *irq; + + if (!(lr & ICH_LR_HW)) + return lr; + + /* We have the HW bit set, check for validity of pINTID */ + irq = vgic_get_vcpu_irq(vcpu, FIELD_GET(ICH_LR_PHYS_ID_MASK, lr)); + /* If there was no real mapping, nuke the HW bit */ + if (!irq || !irq->hw || irq->intid > VGIC_MAX_SPI) + lr &= ~ICH_LR_HW; + + /* Translate the virtual mapping to the real one, even if invalid */ + if (irq) { + lr &= ~ICH_LR_PHYS_ID_MASK; + lr |= FIELD_PREP(ICH_LR_PHYS_ID_MASK, (u64)irq->hwintid); + vgic_put_irq(vcpu->kvm, irq); + } + + return lr; +} + /* * For LRs which have HW bit set such as timer interrupts, we modify them to * have the host hardware interrupt number instead of the virtual one programmed @@ -217,58 +245,37 @@ u64 vgic_v3_get_misr(struct kvm_vcpu *vcpu) static void vgic_v3_create_shadow_lr(struct kvm_vcpu *vcpu, struct vgic_v3_cpu_if *s_cpu_if) { - unsigned long lr_map = 0; - int index = 0; + struct shadow_if *shadow_if; + + shadow_if = container_of(s_cpu_if, struct shadow_if, cpuif); + shadow_if->lr_map = 0; for (int i = 0; i < kvm_vgic_global_state.nr_lr; i++) { u64 lr = __vcpu_sys_reg(vcpu, ICH_LRN(i)); - struct vgic_irq *irq; if (!(lr & ICH_LR_STATE)) - lr = 0; - - if (!(lr & ICH_LR_HW)) - goto next; - - /* We have the HW bit set, check for validity of pINTID */ - irq = vgic_get_vcpu_irq(vcpu, FIELD_GET(ICH_LR_PHYS_ID_MASK, lr)); - if (!irq || !irq->hw || irq->intid > VGIC_MAX_SPI ) { - /* There was no real mapping, so nuke the HW bit */ - lr &= ~ICH_LR_HW; - if (irq) - vgic_put_irq(vcpu->kvm, irq); - goto next; - } - - /* Translate the virtual mapping to the real one */ - lr &= ~ICH_LR_PHYS_ID_MASK; - lr |= FIELD_PREP(ICH_LR_PHYS_ID_MASK, (u64)irq->hwintid); + continue; - vgic_put_irq(vcpu->kvm, irq); + lr = translate_lr_pintid(vcpu, lr); -next: - s_cpu_if->vgic_lr[index] = lr; - if (lr) { - lr_map |= BIT(i); - index++; - } + s_cpu_if->vgic_lr[hweight16(shadow_if->lr_map)] = lr; + shadow_if->lr_map |= BIT(i); } - container_of(s_cpu_if, struct shadow_if, cpuif)->lr_map = lr_map; - s_cpu_if->used_lrs = index; + s_cpu_if->used_lrs = hweight16(shadow_if->lr_map); } void vgic_v3_sync_nested(struct kvm_vcpu *vcpu) { struct shadow_if *shadow_if = get_shadow_if(); - int i, index = 0; + int i; for_each_set_bit(i, &shadow_if->lr_map, kvm_vgic_global_state.nr_lr) { u64 lr = __vcpu_sys_reg(vcpu, ICH_LRN(i)); struct vgic_irq *irq; if (!(lr & ICH_LR_HW) || !(lr & ICH_LR_STATE)) - goto next; + continue; /* * If we had a HW lr programmed by the guest hypervisor, we @@ -277,15 +284,13 @@ void vgic_v3_sync_nested(struct kvm_vcpu *vcpu) */ irq = vgic_get_vcpu_irq(vcpu, FIELD_GET(ICH_LR_PHYS_ID_MASK, lr)); if (WARN_ON(!irq)) /* Shouldn't happen as we check on load */ - goto next; + continue; - lr = __gic_v3_get_lr(index); + lr = __gic_v3_get_lr(lr_map_idx_to_shadow_idx(shadow_if, i)); if (!(lr & ICH_LR_STATE)) irq->active = false; vgic_put_irq(vcpu->kvm, irq); - next: - index++; } } @@ -356,25 +361,23 @@ void vgic_v3_put_nested(struct kvm_vcpu *vcpu) val = __vcpu_sys_reg(vcpu, ICH_HCR_EL2); val &= ~ICH_HCR_EL2_EOIcount_MASK; val |= (s_cpu_if->vgic_hcr & ICH_HCR_EL2_EOIcount_MASK); - __vcpu_sys_reg(vcpu, ICH_HCR_EL2) = val; - __vcpu_sys_reg(vcpu, ICH_VMCR_EL2) = s_cpu_if->vgic_vmcr; + __vcpu_assign_sys_reg(vcpu, ICH_HCR_EL2, val); + __vcpu_assign_sys_reg(vcpu, ICH_VMCR_EL2, s_cpu_if->vgic_vmcr); for (i = 0; i < 4; i++) { - __vcpu_sys_reg(vcpu, ICH_AP0RN(i)) = s_cpu_if->vgic_ap0r[i]; - __vcpu_sys_reg(vcpu, ICH_AP1RN(i)) = s_cpu_if->vgic_ap1r[i]; + __vcpu_assign_sys_reg(vcpu, ICH_AP0RN(i), s_cpu_if->vgic_ap0r[i]); + __vcpu_assign_sys_reg(vcpu, ICH_AP1RN(i), s_cpu_if->vgic_ap1r[i]); } for_each_set_bit(i, &shadow_if->lr_map, kvm_vgic_global_state.nr_lr) { val = __vcpu_sys_reg(vcpu, ICH_LRN(i)); val &= ~ICH_LR_STATE; - val |= s_cpu_if->vgic_lr[i] & ICH_LR_STATE; + val |= s_cpu_if->vgic_lr[lr_map_idx_to_shadow_idx(shadow_if, i)] & ICH_LR_STATE; - __vcpu_sys_reg(vcpu, ICH_LRN(i)) = val; - s_cpu_if->vgic_lr[i] = 0; + __vcpu_assign_sys_reg(vcpu, ICH_LRN(i), val); } - shadow_if->lr_map = 0; vcpu->arch.vgic_cpu.vgic_v3.used_lrs = 0; } diff --git a/arch/arm64/lib/crypto/poly1305-glue.c b/arch/arm64/lib/crypto/poly1305-glue.c index 6a661cf04821..c9a74766785b 100644 --- a/arch/arm64/lib/crypto/poly1305-glue.c +++ b/arch/arm64/lib/crypto/poly1305-glue.c @@ -38,14 +38,14 @@ void poly1305_blocks_arch(struct poly1305_block_state *state, const u8 *src, unsigned int todo = min_t(unsigned int, len, SZ_4K); kernel_neon_begin(); - poly1305_blocks_neon(state, src, todo, 1); + poly1305_blocks_neon(state, src, todo, padbit); kernel_neon_end(); len -= todo; src += todo; } while (len); } else - poly1305_blocks(state, src, len, 1); + poly1305_blocks(state, src, len, padbit); } EXPORT_SYMBOL_GPL(poly1305_blocks_arch); diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c index 8fcf59ba39db..00ab1d648db6 100644 --- a/arch/arm64/mm/mmu.c +++ b/arch/arm64/mm/mmu.c @@ -1305,7 +1305,8 @@ int pud_free_pmd_page(pud_t *pudp, unsigned long addr) next = addr; end = addr + PUD_SIZE; do { - pmd_free_pte_page(pmdp, next); + if (pmd_present(pmdp_get(pmdp))) + pmd_free_pte_page(pmdp, next); } while (pmdp++, next += PMD_SIZE, next != end); pud_clear(pudp); diff --git a/arch/arm64/net/bpf_jit.h b/arch/arm64/net/bpf_jit.h index a3b0e693a125..bbea4f36f9f2 100644 --- a/arch/arm64/net/bpf_jit.h +++ b/arch/arm64/net/bpf_jit.h @@ -325,4 +325,9 @@ #define A64_MRS_SP_EL0(Rt) \ aarch64_insn_gen_mrs(Rt, AARCH64_INSN_SYSREG_SP_EL0) +/* Barriers */ +#define A64_SB aarch64_insn_get_sb_value() +#define A64_DSB_NSH (aarch64_insn_get_dsb_base_value() | 0x7 << 8) +#define A64_ISB aarch64_insn_get_isb_value() + #endif /* _BPF_JIT_H */ diff --git a/arch/arm64/net/bpf_jit_comp.c b/arch/arm64/net/bpf_jit_comp.c index da8b89dd2910..b6c42b5c9668 100644 --- a/arch/arm64/net/bpf_jit_comp.c +++ b/arch/arm64/net/bpf_jit_comp.c @@ -1630,17 +1630,14 @@ emit_cond_jmp: return ret; break; - /* speculation barrier */ + /* speculation barrier against v1 and v4 */ case BPF_ST | BPF_NOSPEC: - /* - * Nothing required here. - * - * In case of arm64, we rely on the firmware mitigation of - * Speculative Store Bypass as controlled via the ssbd kernel - * parameter. Whenever the mitigation is enabled, it works - * for all of the kernel code with no need to provide any - * additional instructions. - */ + if (alternative_has_cap_likely(ARM64_HAS_SB)) { + emit(A64_SB, ctx); + } else { + emit(A64_DSB_NSH, ctx); + emit(A64_ISB, ctx); + } break; /* ST: *(size *)(dst + off) = imm */ @@ -2911,6 +2908,17 @@ bool bpf_jit_supports_percpu_insn(void) return true; } +bool bpf_jit_bypass_spec_v4(void) +{ + /* In case of arm64, we rely on the firmware mitigation of Speculative + * Store Bypass as controlled via the ssbd kernel parameter. Whenever + * the mitigation is enabled, it works for all of the kernel code with + * no need to provide any additional instructions. Therefore, skip + * inserting nospec insns against Spectre v4. + */ + return true; +} + bool bpf_jit_inlines_helper_call(s32 imm) { switch (imm) { diff --git a/arch/csky/include/asm/pgtable.h b/arch/csky/include/asm/pgtable.h index b8378431aeff..5a394be09c35 100644 --- a/arch/csky/include/asm/pgtable.h +++ b/arch/csky/include/asm/pgtable.h @@ -200,7 +200,7 @@ static inline pte_t pte_mkyoung(pte_t pte) return pte; } -static inline int pte_swp_exclusive(pte_t pte) +static inline bool pte_swp_exclusive(pte_t pte) { return pte_val(pte) & _PAGE_SWP_EXCLUSIVE; } diff --git a/arch/hexagon/include/asm/pgtable.h b/arch/hexagon/include/asm/pgtable.h index 9fbdfdbc539f..fbf24d1d1ca6 100644 --- a/arch/hexagon/include/asm/pgtable.h +++ b/arch/hexagon/include/asm/pgtable.h @@ -387,7 +387,7 @@ static inline unsigned long pmd_page_vaddr(pmd_t pmd) (((type & 0x1f) << 1) | \ ((offset & 0x3ffff8) << 10) | ((offset & 0x7) << 7)) }) -static inline int pte_swp_exclusive(pte_t pte) +static inline bool pte_swp_exclusive(pte_t pte) { return pte_val(pte) & _PAGE_SWP_EXCLUSIVE; } diff --git a/arch/loongarch/include/asm/pgtable.h b/arch/loongarch/include/asm/pgtable.h index a3f17914dbab..b30185302c07 100644 --- a/arch/loongarch/include/asm/pgtable.h +++ b/arch/loongarch/include/asm/pgtable.h @@ -301,7 +301,7 @@ static inline pte_t mk_swap_pte(unsigned long type, unsigned long offset) #define __pmd_to_swp_entry(pmd) ((swp_entry_t) { pmd_val(pmd) }) #define __swp_entry_to_pmd(x) ((pmd_t) { (x).val | _PAGE_HUGE }) -static inline int pte_swp_exclusive(pte_t pte) +static inline bool pte_swp_exclusive(pte_t pte) { return pte_val(pte) & _PAGE_SWP_EXCLUSIVE; } diff --git a/arch/m68k/include/asm/mcf_pgtable.h b/arch/m68k/include/asm/mcf_pgtable.h index f5c596b211d4..d79fef609194 100644 --- a/arch/m68k/include/asm/mcf_pgtable.h +++ b/arch/m68k/include/asm/mcf_pgtable.h @@ -268,7 +268,7 @@ extern pgd_t kernel_pg_dir[PTRS_PER_PGD]; #define __pte_to_swp_entry(pte) ((swp_entry_t) { pte_val(pte) }) #define __swp_entry_to_pte(x) (__pte((x).val)) -static inline int pte_swp_exclusive(pte_t pte) +static inline bool pte_swp_exclusive(pte_t pte) { return pte_val(pte) & _PAGE_SWP_EXCLUSIVE; } diff --git a/arch/m68k/include/asm/motorola_pgtable.h b/arch/m68k/include/asm/motorola_pgtable.h index 040ac3bad713..14fee64d3e60 100644 --- a/arch/m68k/include/asm/motorola_pgtable.h +++ b/arch/m68k/include/asm/motorola_pgtable.h @@ -185,7 +185,7 @@ extern pgd_t kernel_pg_dir[128]; #define __pte_to_swp_entry(pte) ((swp_entry_t) { pte_val(pte) }) #define __swp_entry_to_pte(x) ((pte_t) { (x).val }) -static inline int pte_swp_exclusive(pte_t pte) +static inline bool pte_swp_exclusive(pte_t pte) { return pte_val(pte) & _PAGE_SWP_EXCLUSIVE; } diff --git a/arch/m68k/include/asm/sun3_pgtable.h b/arch/m68k/include/asm/sun3_pgtable.h index 73745dc0ec0e..858cbe936f5b 100644 --- a/arch/m68k/include/asm/sun3_pgtable.h +++ b/arch/m68k/include/asm/sun3_pgtable.h @@ -169,7 +169,7 @@ extern pgd_t kernel_pg_dir[PTRS_PER_PGD]; #define __pte_to_swp_entry(pte) ((swp_entry_t) { pte_val(pte) }) #define __swp_entry_to_pte(x) ((pte_t) { (x).val }) -static inline int pte_swp_exclusive(pte_t pte) +static inline bool pte_swp_exclusive(pte_t pte) { return pte_val(pte) & _PAGE_SWP_EXCLUSIVE; } diff --git a/arch/microblaze/include/asm/pgtable.h b/arch/microblaze/include/asm/pgtable.h index b1bb2c65dd04..bae1abfa6f6b 100644 --- a/arch/microblaze/include/asm/pgtable.h +++ b/arch/microblaze/include/asm/pgtable.h @@ -398,7 +398,7 @@ extern pgd_t swapper_pg_dir[PTRS_PER_PGD]; #define __pte_to_swp_entry(pte) ((swp_entry_t) { pte_val(pte) >> 2 }) #define __swp_entry_to_pte(x) ((pte_t) { (x).val << 2 }) -static inline int pte_swp_exclusive(pte_t pte) +static inline bool pte_swp_exclusive(pte_t pte) { return pte_val(pte) & _PAGE_SWP_EXCLUSIVE; } diff --git a/arch/mips/include/asm/pgtable.h b/arch/mips/include/asm/pgtable.h index 4852b005a72d..ae73ecf4c41a 100644 --- a/arch/mips/include/asm/pgtable.h +++ b/arch/mips/include/asm/pgtable.h @@ -534,7 +534,7 @@ static inline pte_t pte_modify(pte_t pte, pgprot_t newprot) #endif #if defined(CONFIG_PHYS_ADDR_T_64BIT) && defined(CONFIG_CPU_MIPS32) -static inline int pte_swp_exclusive(pte_t pte) +static inline bool pte_swp_exclusive(pte_t pte) { return pte.pte_low & _PAGE_SWP_EXCLUSIVE; } @@ -551,7 +551,7 @@ static inline pte_t pte_swp_clear_exclusive(pte_t pte) return pte; } #else -static inline int pte_swp_exclusive(pte_t pte) +static inline bool pte_swp_exclusive(pte_t pte) { return pte_val(pte) & _PAGE_SWP_EXCLUSIVE; } diff --git a/arch/nios2/include/asm/pgtable.h b/arch/nios2/include/asm/pgtable.h index e98578e27e26..844dce55569f 100644 --- a/arch/nios2/include/asm/pgtable.h +++ b/arch/nios2/include/asm/pgtable.h @@ -259,7 +259,7 @@ static inline unsigned long pmd_page_vaddr(pmd_t pmd) #define __swp_entry_to_pte(swp) ((pte_t) { (swp).val }) #define __pte_to_swp_entry(pte) ((swp_entry_t) { pte_val(pte) }) -static inline int pte_swp_exclusive(pte_t pte) +static inline bool pte_swp_exclusive(pte_t pte) { return pte_val(pte) & _PAGE_SWP_EXCLUSIVE; } diff --git a/arch/openrisc/include/asm/pgtable.h b/arch/openrisc/include/asm/pgtable.h index 71bfb8c8c482..5bd6463bd514 100644 --- a/arch/openrisc/include/asm/pgtable.h +++ b/arch/openrisc/include/asm/pgtable.h @@ -411,7 +411,7 @@ static inline void update_mmu_cache_range(struct vm_fault *vmf, #define __pte_to_swp_entry(pte) ((swp_entry_t) { pte_val(pte) }) #define __swp_entry_to_pte(x) ((pte_t) { (x).val }) -static inline int pte_swp_exclusive(pte_t pte) +static inline bool pte_swp_exclusive(pte_t pte) { return pte_val(pte) & _PAGE_SWP_EXCLUSIVE; } diff --git a/arch/parisc/include/asm/pgtable.h b/arch/parisc/include/asm/pgtable.h index 80f5e2a28413..1a86a4370b29 100644 --- a/arch/parisc/include/asm/pgtable.h +++ b/arch/parisc/include/asm/pgtable.h @@ -425,7 +425,7 @@ static inline void set_ptes(struct mm_struct *mm, unsigned long addr, #define __pte_to_swp_entry(pte) ((swp_entry_t) { pte_val(pte) }) #define __swp_entry_to_pte(x) ((pte_t) { (x).val }) -static inline int pte_swp_exclusive(pte_t pte) +static inline bool pte_swp_exclusive(pte_t pte) { return pte_val(pte) & _PAGE_SWP_EXCLUSIVE; } diff --git a/arch/powerpc/boot/dts/microwatt.dts b/arch/powerpc/boot/dts/microwatt.dts index c4e4d2a9b460..b7eac4e56019 100644 --- a/arch/powerpc/boot/dts/microwatt.dts +++ b/arch/powerpc/boot/dts/microwatt.dts @@ -4,7 +4,7 @@ / { #size-cells = <0x02>; #address-cells = <0x02>; - model-name = "microwatt"; + model = "microwatt"; compatible = "microwatt-soc"; aliases { diff --git a/arch/powerpc/boot/dts/mpc8315erdb.dts b/arch/powerpc/boot/dts/mpc8315erdb.dts index e09b37d7489d..a89cb3139ca8 100644 --- a/arch/powerpc/boot/dts/mpc8315erdb.dts +++ b/arch/powerpc/boot/dts/mpc8315erdb.dts @@ -6,6 +6,7 @@ */ /dts-v1/; +#include <dt-bindings/interrupt-controller/irq.h> / { compatible = "fsl,mpc8315erdb"; @@ -358,6 +359,15 @@ interrupt-parent = <&ipic>; fsl,mpc8313-wakeup-timer = <>m1>; }; + + gpio: gpio-controller@c00 { + compatible = "fsl,mpc8314-gpio"; + reg = <0xc00 0x100>; + interrupts = <74 IRQ_TYPE_LEVEL_LOW>; + interrupt-parent = <&ipic>; + gpio-controller; + #gpio-cells = <2>; + }; }; pci0: pci@e0008500 { diff --git a/arch/powerpc/include/asm/book3s/32/pgtable.h b/arch/powerpc/include/asm/book3s/32/pgtable.h index 42c3af90d1f0..92d21c6faf1e 100644 --- a/arch/powerpc/include/asm/book3s/32/pgtable.h +++ b/arch/powerpc/include/asm/book3s/32/pgtable.h @@ -365,7 +365,7 @@ static inline void __ptep_set_access_flags(struct vm_area_struct *vma, #define __pte_to_swp_entry(pte) ((swp_entry_t) { pte_val(pte) >> 3 }) #define __swp_entry_to_pte(x) ((pte_t) { (x).val << 3 }) -static inline int pte_swp_exclusive(pte_t pte) +static inline bool pte_swp_exclusive(pte_t pte) { return pte_val(pte) & _PAGE_SWP_EXCLUSIVE; } diff --git a/arch/powerpc/include/asm/book3s/64/pgtable.h b/arch/powerpc/include/asm/book3s/64/pgtable.h index 6ed93e290c2f..a2ddcbb3fcb9 100644 --- a/arch/powerpc/include/asm/book3s/64/pgtable.h +++ b/arch/powerpc/include/asm/book3s/64/pgtable.h @@ -693,7 +693,7 @@ static inline pte_t pte_swp_mkexclusive(pte_t pte) return __pte_raw(pte_raw(pte) | cpu_to_be64(_PAGE_SWP_EXCLUSIVE)); } -static inline int pte_swp_exclusive(pte_t pte) +static inline bool pte_swp_exclusive(pte_t pte) { return !!(pte_raw(pte) & cpu_to_be64(_PAGE_SWP_EXCLUSIVE)); } diff --git a/arch/powerpc/include/asm/nohash/pgtable.h b/arch/powerpc/include/asm/nohash/pgtable.h index 8d1f0b7062eb..7d6b9e5b286e 100644 --- a/arch/powerpc/include/asm/nohash/pgtable.h +++ b/arch/powerpc/include/asm/nohash/pgtable.h @@ -286,7 +286,7 @@ static inline pte_t pte_modify(pte_t pte, pgprot_t newprot) return __pte((pte_val(pte) & _PAGE_CHG_MASK) | pgprot_val(newprot)); } -static inline int pte_swp_exclusive(pte_t pte) +static inline bool pte_swp_exclusive(pte_t pte) { return pte_val(pte) & _PAGE_SWP_EXCLUSIVE; } diff --git a/arch/powerpc/include/asm/ppc_asm.h b/arch/powerpc/include/asm/ppc_asm.h index 02897f4b0dbf..b891910fce8a 100644 --- a/arch/powerpc/include/asm/ppc_asm.h +++ b/arch/powerpc/include/asm/ppc_asm.h @@ -183,7 +183,7 @@ /* * Used to name C functions called from asm */ -#ifdef CONFIG_PPC_KERNEL_PCREL +#if defined(__powerpc64__) && defined(CONFIG_PPC_KERNEL_PCREL) #define CFUNC(name) name@notoc #else #define CFUNC(name) name diff --git a/arch/powerpc/include/uapi/asm/ioctls.h b/arch/powerpc/include/uapi/asm/ioctls.h index 2c145da3b774..b5211e413829 100644 --- a/arch/powerpc/include/uapi/asm/ioctls.h +++ b/arch/powerpc/include/uapi/asm/ioctls.h @@ -23,10 +23,10 @@ #define TCSETSW _IOW('t', 21, struct termios) #define TCSETSF _IOW('t', 22, struct termios) -#define TCGETA _IOR('t', 23, struct termio) -#define TCSETA _IOW('t', 24, struct termio) -#define TCSETAW _IOW('t', 25, struct termio) -#define TCSETAF _IOW('t', 28, struct termio) +#define TCGETA 0x40147417 /* _IOR('t', 23, struct termio) */ +#define TCSETA 0x80147418 /* _IOW('t', 24, struct termio) */ +#define TCSETAW 0x80147419 /* _IOW('t', 25, struct termio) */ +#define TCSETAF 0x8014741c /* _IOW('t', 28, struct termio) */ #define TCSBRK _IO('t', 29) #define TCXONC _IO('t', 30) diff --git a/arch/powerpc/kernel/eeh.c b/arch/powerpc/kernel/eeh.c index 83fe99861eb1..ca7f7bb2b478 100644 --- a/arch/powerpc/kernel/eeh.c +++ b/arch/powerpc/kernel/eeh.c @@ -1509,6 +1509,8 @@ int eeh_pe_configure(struct eeh_pe *pe) /* Invalid PE ? */ if (!pe) return -ENODEV; + else + ret = eeh_ops->configure_bridge(pe); return ret; } diff --git a/arch/powerpc/kernel/vdso/Makefile b/arch/powerpc/kernel/vdso/Makefile index e8824f933326..8834dfe9d727 100644 --- a/arch/powerpc/kernel/vdso/Makefile +++ b/arch/powerpc/kernel/vdso/Makefile @@ -53,7 +53,7 @@ ldflags-$(CONFIG_LD_ORPHAN_WARN) += -Wl,--orphan-handling=$(CONFIG_LD_ORPHAN_WAR ldflags-y += $(filter-out $(CC_AUTO_VAR_INIT_ZERO_ENABLER) $(CC_FLAGS_FTRACE) -Wa$(comma)%, $(KBUILD_CPPFLAGS) $(KBUILD_CFLAGS)) CC32FLAGS := -m32 -CC32FLAGSREMOVE := -mcmodel=medium -mabi=elfv1 -mabi=elfv2 -mcall-aixdesc +CC32FLAGSREMOVE := -mcmodel=medium -mabi=elfv1 -mabi=elfv2 -mcall-aixdesc -mpcrel ifdef CONFIG_CC_IS_CLANG # This flag is supported by clang for 64-bit but not 32-bit so it will cause # an unused command line flag warning for this file. diff --git a/arch/powerpc/net/bpf_jit_comp64.c b/arch/powerpc/net/bpf_jit_comp64.c index 5daa77aee7f7..a25a6ffe7d7c 100644 --- a/arch/powerpc/net/bpf_jit_comp64.c +++ b/arch/powerpc/net/bpf_jit_comp64.c @@ -370,6 +370,23 @@ static int bpf_jit_emit_tail_call(u32 *image, struct codegen_context *ctx, u32 o return 0; } +bool bpf_jit_bypass_spec_v1(void) +{ +#if defined(CONFIG_PPC_E500) || defined(CONFIG_PPC_BOOK3S_64) + return !(security_ftr_enabled(SEC_FTR_FAVOUR_SECURITY) && + security_ftr_enabled(SEC_FTR_BNDS_CHK_SPEC_BAR)); +#else + return true; +#endif +} + +bool bpf_jit_bypass_spec_v4(void) +{ + return !(security_ftr_enabled(SEC_FTR_FAVOUR_SECURITY) && + security_ftr_enabled(SEC_FTR_STF_BARRIER) && + stf_barrier_type_get() != STF_BARRIER_NONE); +} + /* * We spill into the redzone always, even if the bpf program has its own stackframe. * Offsets hardcoded based on BPF_PPC_STACK_SAVE -- see bpf_jit_stack_local() @@ -397,6 +414,7 @@ int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, u32 *fimage, struct code u32 *addrs, int pass, bool extra_pass) { enum stf_barrier_type stf_barrier = stf_barrier_type_get(); + bool sync_emitted, ori31_emitted; const struct bpf_insn *insn = fp->insnsi; int flen = fp->len; int i, ret; @@ -789,30 +807,51 @@ emit_clear: /* * BPF_ST NOSPEC (speculation barrier) + * + * The following must act as a barrier against both Spectre v1 + * and v4 if we requested both mitigations. Therefore, also emit + * 'isync; sync' on E500 or 'ori31' on BOOK3S_64 in addition to + * the insns needed for a Spectre v4 barrier. + * + * If we requested only !bypass_spec_v1 OR only !bypass_spec_v4, + * we can skip the respective other barrier type as an + * optimization. */ case BPF_ST | BPF_NOSPEC: - if (!security_ftr_enabled(SEC_FTR_FAVOUR_SECURITY) || - !security_ftr_enabled(SEC_FTR_STF_BARRIER)) - break; - - switch (stf_barrier) { - case STF_BARRIER_EIEIO: - EMIT(PPC_RAW_EIEIO() | 0x02000000); - break; - case STF_BARRIER_SYNC_ORI: + sync_emitted = false; + ori31_emitted = false; + if (IS_ENABLED(CONFIG_PPC_E500) && + !bpf_jit_bypass_spec_v1()) { + EMIT(PPC_RAW_ISYNC()); EMIT(PPC_RAW_SYNC()); - EMIT(PPC_RAW_LD(tmp1_reg, _R13, 0)); - EMIT(PPC_RAW_ORI(_R31, _R31, 0)); - break; - case STF_BARRIER_FALLBACK: - ctx->seen |= SEEN_FUNC; - PPC_LI64(_R12, dereference_kernel_function_descriptor(bpf_stf_barrier)); - EMIT(PPC_RAW_MTCTR(_R12)); - EMIT(PPC_RAW_BCTRL()); - break; - case STF_BARRIER_NONE: - break; + sync_emitted = true; } + if (!bpf_jit_bypass_spec_v4()) { + switch (stf_barrier) { + case STF_BARRIER_EIEIO: + EMIT(PPC_RAW_EIEIO() | 0x02000000); + break; + case STF_BARRIER_SYNC_ORI: + if (!sync_emitted) + EMIT(PPC_RAW_SYNC()); + EMIT(PPC_RAW_LD(tmp1_reg, _R13, 0)); + EMIT(PPC_RAW_ORI(_R31, _R31, 0)); + ori31_emitted = true; + break; + case STF_BARRIER_FALLBACK: + ctx->seen |= SEEN_FUNC; + PPC_LI64(_R12, dereference_kernel_function_descriptor(bpf_stf_barrier)); + EMIT(PPC_RAW_MTCTR(_R12)); + EMIT(PPC_RAW_BCTRL()); + break; + case STF_BARRIER_NONE: + break; + } + } + if (IS_ENABLED(CONFIG_PPC_BOOK3S_64) && + !bpf_jit_bypass_spec_v1() && + !ori31_emitted) + EMIT(PPC_RAW_ORI(_R31, _R31, 0)); break; /* diff --git a/arch/powerpc/platforms/book3s/vas-api.c b/arch/powerpc/platforms/book3s/vas-api.c index 0b6365d85d11..dc6f75d3ac6e 100644 --- a/arch/powerpc/platforms/book3s/vas-api.c +++ b/arch/powerpc/platforms/book3s/vas-api.c @@ -521,6 +521,15 @@ static int coproc_mmap(struct file *fp, struct vm_area_struct *vma) return -EINVAL; } + /* + * Map complete page to the paste address. So the user + * space should pass 0ULL to the offset parameter. + */ + if (vma->vm_pgoff) { + pr_debug("Page offset unsupported to map paste address\n"); + return -EINVAL; + } + /* Ensure instance has an open send window */ if (!txwin) { pr_err("No send window open?\n"); diff --git a/arch/powerpc/platforms/powernv/memtrace.c b/arch/powerpc/platforms/powernv/memtrace.c index 4ac9808e55a4..2ea30b343354 100644 --- a/arch/powerpc/platforms/powernv/memtrace.c +++ b/arch/powerpc/platforms/powernv/memtrace.c @@ -48,11 +48,15 @@ static ssize_t memtrace_read(struct file *filp, char __user *ubuf, static int memtrace_mmap(struct file *filp, struct vm_area_struct *vma) { struct memtrace_entry *ent = filp->private_data; + unsigned long ent_nrpages = ent->size >> PAGE_SHIFT; + unsigned long vma_nrpages = vma_pages(vma); - if (ent->size < vma->vm_end - vma->vm_start) + /* The requested page offset should be within object's page count */ + if (vma->vm_pgoff >= ent_nrpages) return -EINVAL; - if (vma->vm_pgoff << PAGE_SHIFT >= ent->size) + /* The requested mapping range should remain within the bounds */ + if (vma_nrpages > ent_nrpages - vma->vm_pgoff) return -EINVAL; vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot); diff --git a/arch/riscv/include/asm/pgtable.h b/arch/riscv/include/asm/pgtable.h index a11816bbf9e7..438ce7df24c3 100644 --- a/arch/riscv/include/asm/pgtable.h +++ b/arch/riscv/include/asm/pgtable.h @@ -1028,7 +1028,7 @@ static inline pud_t pud_modify(pud_t pud, pgprot_t newprot) #define __pte_to_swp_entry(pte) ((swp_entry_t) { pte_val(pte) }) #define __swp_entry_to_pte(x) ((pte_t) { (x).val }) -static inline int pte_swp_exclusive(pte_t pte) +static inline bool pte_swp_exclusive(pte_t pte) { return pte_val(pte) & _PAGE_SWP_EXCLUSIVE; } diff --git a/arch/riscv/kvm/vcpu_sbi_replace.c b/arch/riscv/kvm/vcpu_sbi_replace.c index 5fbf3f94f1e8..b17fad091bab 100644 --- a/arch/riscv/kvm/vcpu_sbi_replace.c +++ b/arch/riscv/kvm/vcpu_sbi_replace.c @@ -103,7 +103,7 @@ static int kvm_sbi_ext_rfence_handler(struct kvm_vcpu *vcpu, struct kvm_run *run kvm_riscv_vcpu_pmu_incr_fw(vcpu, SBI_PMU_FW_FENCE_I_SENT); break; case SBI_EXT_RFENCE_REMOTE_SFENCE_VMA: - if (cp->a2 == 0 && cp->a3 == 0) + if ((cp->a2 == 0 && cp->a3 == 0) || cp->a3 == -1UL) kvm_riscv_hfence_vvma_all(vcpu->kvm, hbase, hmask); else kvm_riscv_hfence_vvma_gva(vcpu->kvm, hbase, hmask, @@ -111,7 +111,7 @@ static int kvm_sbi_ext_rfence_handler(struct kvm_vcpu *vcpu, struct kvm_run *run kvm_riscv_vcpu_pmu_incr_fw(vcpu, SBI_PMU_FW_HFENCE_VVMA_SENT); break; case SBI_EXT_RFENCE_REMOTE_SFENCE_VMA_ASID: - if (cp->a2 == 0 && cp->a3 == 0) + if ((cp->a2 == 0 && cp->a3 == 0) || cp->a3 == -1UL) kvm_riscv_hfence_vvma_asid_all(vcpu->kvm, hbase, hmask, cp->a4); else @@ -127,9 +127,9 @@ static int kvm_sbi_ext_rfence_handler(struct kvm_vcpu *vcpu, struct kvm_run *run case SBI_EXT_RFENCE_REMOTE_HFENCE_VVMA_ASID: /* * Until nested virtualization is implemented, the - * SBI HFENCE calls should be treated as NOPs + * SBI HFENCE calls should return not supported + * hence fallthrough. */ - break; default: retdata->err_val = SBI_ERR_NOT_SUPPORTED; } diff --git a/arch/s390/include/asm/pgtable.h b/arch/s390/include/asm/pgtable.h index 1c661ac62ce8..6d8bc27a366e 100644 --- a/arch/s390/include/asm/pgtable.h +++ b/arch/s390/include/asm/pgtable.h @@ -915,7 +915,7 @@ static inline int pmd_protnone(pmd_t pmd) } #endif -static inline int pte_swp_exclusive(pte_t pte) +static inline bool pte_swp_exclusive(pte_t pte) { return pte_val(pte) & _PAGE_SWP_EXCLUSIVE; } diff --git a/arch/sh/include/asm/pgtable_32.h b/arch/sh/include/asm/pgtable_32.h index 71b18741cc11..5f51af18997b 100644 --- a/arch/sh/include/asm/pgtable_32.h +++ b/arch/sh/include/asm/pgtable_32.h @@ -470,7 +470,7 @@ static inline unsigned long pmd_page_vaddr(pmd_t pmd) /* In both cases, we borrow bit 6 to store the exclusive marker in swap PTEs. */ #define _PAGE_SWP_EXCLUSIVE _PAGE_USER -static inline int pte_swp_exclusive(pte_t pte) +static inline bool pte_swp_exclusive(pte_t pte) { return pte.pte_low & _PAGE_SWP_EXCLUSIVE; } diff --git a/arch/sparc/include/asm/pgtable_32.h b/arch/sparc/include/asm/pgtable_32.h index 1454ebe91539..7c199c003ffe 100644 --- a/arch/sparc/include/asm/pgtable_32.h +++ b/arch/sparc/include/asm/pgtable_32.h @@ -348,7 +348,7 @@ static inline swp_entry_t __swp_entry(unsigned long type, unsigned long offset) #define __pte_to_swp_entry(pte) ((swp_entry_t) { pte_val(pte) }) #define __swp_entry_to_pte(x) ((pte_t) { (x).val }) -static inline int pte_swp_exclusive(pte_t pte) +static inline bool pte_swp_exclusive(pte_t pte) { return pte_val(pte) & SRMMU_SWP_EXCLUSIVE; } diff --git a/arch/sparc/include/asm/pgtable_64.h b/arch/sparc/include/asm/pgtable_64.h index 4af03e3c161b..669cd02469a1 100644 --- a/arch/sparc/include/asm/pgtable_64.h +++ b/arch/sparc/include/asm/pgtable_64.h @@ -1023,7 +1023,7 @@ pgtable_t pgtable_trans_huge_withdraw(struct mm_struct *mm, pmd_t *pmdp); #define __pte_to_swp_entry(pte) ((swp_entry_t) { pte_val(pte) }) #define __swp_entry_to_pte(x) ((pte_t) { (x).val }) -static inline int pte_swp_exclusive(pte_t pte) +static inline bool pte_swp_exclusive(pte_t pte) { return pte_val(pte) & _PAGE_SWP_EXCLUSIVE; } diff --git a/arch/um/drivers/ubd_user.c b/arch/um/drivers/ubd_user.c index c5e6545f6fcf..8e8a8bf518b6 100644 --- a/arch/um/drivers/ubd_user.c +++ b/arch/um/drivers/ubd_user.c @@ -41,7 +41,7 @@ int start_io_thread(struct os_helper_thread **td_out, int *fd_out) *fd_out = fds[1]; err = os_set_fd_block(*fd_out, 0); - err = os_set_fd_block(kernel_fd, 0); + err |= os_set_fd_block(kernel_fd, 0); if (err) { printk("start_io_thread - failed to set nonblocking I/O.\n"); goto out_close; diff --git a/arch/um/drivers/vector_kern.c b/arch/um/drivers/vector_kern.c index f292e0b4ff8b..9bbbddfe866b 100644 --- a/arch/um/drivers/vector_kern.c +++ b/arch/um/drivers/vector_kern.c @@ -1625,35 +1625,19 @@ static void vector_eth_configure( device->dev = dev; - *vp = ((struct vector_private) - { - .list = LIST_HEAD_INIT(vp->list), - .dev = dev, - .unit = n, - .options = get_transport_options(def), - .rx_irq = 0, - .tx_irq = 0, - .parsed = def, - .max_packet = get_mtu(def) + ETH_HEADER_OTHER, - /* TODO - we need to calculate headroom so that ip header - * is 16 byte aligned all the time - */ - .headroom = get_headroom(def), - .form_header = NULL, - .verify_header = NULL, - .header_rxbuffer = NULL, - .header_txbuffer = NULL, - .header_size = 0, - .rx_header_size = 0, - .rexmit_scheduled = false, - .opened = false, - .transport_data = NULL, - .in_write_poll = false, - .coalesce = 2, - .req_size = get_req_size(def), - .in_error = false, - .bpf = NULL - }); + INIT_LIST_HEAD(&vp->list); + vp->dev = dev; + vp->unit = n; + vp->options = get_transport_options(def); + vp->parsed = def; + vp->max_packet = get_mtu(def) + ETH_HEADER_OTHER; + /* + * TODO - we need to calculate headroom so that ip header + * is 16 byte aligned all the time + */ + vp->headroom = get_headroom(def); + vp->coalesce = 2; + vp->req_size = get_req_size(def); dev->features = dev->hw_features = (NETIF_F_SG | NETIF_F_FRAGLIST); INIT_WORK(&vp->reset_tx, vector_reset_tx); diff --git a/arch/um/drivers/vfio_kern.c b/arch/um/drivers/vfio_kern.c index b51fc9888ae1..13b971a2bd43 100644 --- a/arch/um/drivers/vfio_kern.c +++ b/arch/um/drivers/vfio_kern.c @@ -570,6 +570,17 @@ static void uml_vfio_release_device(struct uml_vfio_device *dev) kfree(dev); } +static struct uml_vfio_device *uml_vfio_find_device(const char *device) +{ + struct uml_vfio_device *dev; + + list_for_each_entry(dev, ¨_vfio_devices, list) { + if (!strcmp(dev->name, device)) + return dev; + } + return NULL; +} + static int uml_vfio_cmdline_set(const char *device, const struct kernel_param *kp) { struct uml_vfio_device *dev; @@ -582,6 +593,9 @@ static int uml_vfio_cmdline_set(const char *device, const struct kernel_param *k uml_vfio_container.fd = fd; } + if (uml_vfio_find_device(device)) + return -EEXIST; + dev = kzalloc(sizeof(*dev), GFP_KERNEL); if (!dev) return -ENOMEM; diff --git a/arch/um/include/asm/pgtable.h b/arch/um/include/asm/pgtable.h index ca2a519d53ab..24fdea6f88c3 100644 --- a/arch/um/include/asm/pgtable.h +++ b/arch/um/include/asm/pgtable.h @@ -314,7 +314,7 @@ extern pte_t *virt_to_pte(struct mm_struct *mm, unsigned long addr); ((swp_entry_t) { pte_val(pte_mkuptodate(pte)) }) #define __swp_entry_to_pte(x) ((pte_t) { (x).val }) -static inline int pte_swp_exclusive(pte_t pte) +static inline bool pte_swp_exclusive(pte_t pte) { return pte_get_bits(pte, _PAGE_SWP_EXCLUSIVE); } diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 340e5468980e..71019b3b54ea 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -89,7 +89,7 @@ config X86 select ARCH_HAS_DMA_OPS if GART_IOMMU || XEN select ARCH_HAS_EARLY_DEBUG if KGDB select ARCH_HAS_ELF_RANDOMIZE - select ARCH_HAS_EXECMEM_ROX if X86_64 + select ARCH_HAS_EXECMEM_ROX if X86_64 && STRICT_MODULE_RWX select ARCH_HAS_FAST_MULTIPLIER select ARCH_HAS_FORTIFY_SOURCE select ARCH_HAS_GCOV_PROFILE_ALL diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c index 741b229f0718..c2fb729c270e 100644 --- a/arch/x86/events/intel/core.c +++ b/arch/x86/events/intel/core.c @@ -2826,7 +2826,7 @@ static void intel_pmu_read_event(struct perf_event *event) * If the PEBS counters snapshotting is enabled, * the topdown event is available in PEBS records. */ - if (is_topdown_event(event) && !is_pebs_counter_event_group(event)) + if (is_topdown_count(event) && !is_pebs_counter_event_group(event)) static_call(intel_pmu_update_topdown_event)(event, NULL); else intel_pmu_drain_pebs_buffer(); diff --git a/arch/x86/include/asm/module.h b/arch/x86/include/asm/module.h index e988bac0a4a1..3c2de4ce3b10 100644 --- a/arch/x86/include/asm/module.h +++ b/arch/x86/include/asm/module.h @@ -5,12 +5,20 @@ #include <asm-generic/module.h> #include <asm/orc_types.h> +struct its_array { +#ifdef CONFIG_MITIGATION_ITS + void **pages; + int num; +#endif +}; + struct mod_arch_specific { #ifdef CONFIG_UNWINDER_ORC unsigned int num_orcs; int *orc_unwind_ip; struct orc_entry *orc_unwind; #endif + struct its_array its_pages; }; #endif /* _ASM_X86_MODULE_H */ diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h index 774430c3abff..97954c936c54 100644 --- a/arch/x86/include/asm/pgtable.h +++ b/arch/x86/include/asm/pgtable.h @@ -1561,7 +1561,7 @@ static inline pte_t pte_swp_mkexclusive(pte_t pte) return pte_set_flags(pte, _PAGE_SWP_EXCLUSIVE); } -static inline int pte_swp_exclusive(pte_t pte) +static inline bool pte_swp_exclusive(pte_t pte) { return pte_flags(pte) & _PAGE_SWP_EXCLUSIVE; } diff --git a/arch/x86/include/asm/shared/tdx.h b/arch/x86/include/asm/shared/tdx.h index 2f3820342598..d8525e6ef50a 100644 --- a/arch/x86/include/asm/shared/tdx.h +++ b/arch/x86/include/asm/shared/tdx.h @@ -80,6 +80,7 @@ #define TDVMCALL_STATUS_RETRY 0x0000000000000001ULL #define TDVMCALL_STATUS_INVALID_OPERAND 0x8000000000000000ULL #define TDVMCALL_STATUS_ALIGN_ERROR 0x8000000000000002ULL +#define TDVMCALL_STATUS_SUBFUNC_UNSUPPORTED 0x8000000000000003ULL /* * Bitmasks of exposed registers (with VMM). diff --git a/arch/x86/include/asm/sighandling.h b/arch/x86/include/asm/sighandling.h index e770c4fc47f4..8727c7e21dd1 100644 --- a/arch/x86/include/asm/sighandling.h +++ b/arch/x86/include/asm/sighandling.h @@ -24,4 +24,26 @@ int ia32_setup_rt_frame(struct ksignal *ksig, struct pt_regs *regs); int x64_setup_rt_frame(struct ksignal *ksig, struct pt_regs *regs); int x32_setup_rt_frame(struct ksignal *ksig, struct pt_regs *regs); +/* + * To prevent immediate repeat of single step trap on return from SIGTRAP + * handler if the trap flag (TF) is set without an external debugger attached, + * clear the software event flag in the augmented SS, ensuring no single-step + * trap is pending upon ERETU completion. + * + * Note, this function should be called in sigreturn() before the original + * state is restored to make sure the TF is read from the entry frame. + */ +static __always_inline void prevent_single_step_upon_eretu(struct pt_regs *regs) +{ + /* + * If the trap flag (TF) is set, i.e., the sigreturn() SYSCALL instruction + * is being single-stepped, do not clear the software event flag in the + * augmented SS, thus a debugger won't skip over the following instruction. + */ +#ifdef CONFIG_X86_FRED + if (!(regs->flags & X86_EFLAGS_TF)) + regs->fred_ss.swevent = 0; +#endif +} + #endif /* _ASM_X86_SIGHANDLING_H */ diff --git a/arch/x86/include/asm/tdx.h b/arch/x86/include/asm/tdx.h index 8b19294600c4..7ddef3a69866 100644 --- a/arch/x86/include/asm/tdx.h +++ b/arch/x86/include/asm/tdx.h @@ -106,7 +106,7 @@ void tdx_init(void); typedef u64 (*sc_func_t)(u64 fn, struct tdx_module_args *args); -static inline u64 sc_retry(sc_func_t func, u64 fn, +static __always_inline u64 sc_retry(sc_func_t func, u64 fn, struct tdx_module_args *args) { int retry = RDRAND_RETRY_LOOPS; diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c index ecfe7b497cad..ea1d984166cd 100644 --- a/arch/x86/kernel/alternative.c +++ b/arch/x86/kernel/alternative.c @@ -116,6 +116,24 @@ static struct module *its_mod; #endif static void *its_page; static unsigned int its_offset; +struct its_array its_pages; + +static void *__its_alloc(struct its_array *pages) +{ + void *page __free(execmem) = execmem_alloc(EXECMEM_MODULE_TEXT, PAGE_SIZE); + if (!page) + return NULL; + + void *tmp = krealloc(pages->pages, (pages->num+1) * sizeof(void *), + GFP_KERNEL); + if (!tmp) + return NULL; + + pages->pages = tmp; + pages->pages[pages->num++] = page; + + return no_free_ptr(page); +} /* Initialize a thunk with the "jmp *reg; int3" instructions. */ static void *its_init_thunk(void *thunk, int reg) @@ -151,6 +169,21 @@ static void *its_init_thunk(void *thunk, int reg) return thunk + offset; } +static void its_pages_protect(struct its_array *pages) +{ + for (int i = 0; i < pages->num; i++) { + void *page = pages->pages[i]; + execmem_restore_rox(page, PAGE_SIZE); + } +} + +static void its_fini_core(void) +{ + if (IS_ENABLED(CONFIG_STRICT_KERNEL_RWX)) + its_pages_protect(&its_pages); + kfree(its_pages.pages); +} + #ifdef CONFIG_MODULES void its_init_mod(struct module *mod) { @@ -173,10 +206,8 @@ void its_fini_mod(struct module *mod) its_page = NULL; mutex_unlock(&text_mutex); - for (int i = 0; i < mod->its_num_pages; i++) { - void *page = mod->its_page_array[i]; - execmem_restore_rox(page, PAGE_SIZE); - } + if (IS_ENABLED(CONFIG_STRICT_MODULE_RWX)) + its_pages_protect(&mod->arch.its_pages); } void its_free_mod(struct module *mod) @@ -184,37 +215,33 @@ void its_free_mod(struct module *mod) if (!cpu_feature_enabled(X86_FEATURE_INDIRECT_THUNK_ITS)) return; - for (int i = 0; i < mod->its_num_pages; i++) { - void *page = mod->its_page_array[i]; + for (int i = 0; i < mod->arch.its_pages.num; i++) { + void *page = mod->arch.its_pages.pages[i]; execmem_free(page); } - kfree(mod->its_page_array); + kfree(mod->arch.its_pages.pages); } #endif /* CONFIG_MODULES */ static void *its_alloc(void) { - void *page __free(execmem) = execmem_alloc(EXECMEM_MODULE_TEXT, PAGE_SIZE); - - if (!page) - return NULL; + struct its_array *pages = &its_pages; + void *page; #ifdef CONFIG_MODULES - if (its_mod) { - void *tmp = krealloc(its_mod->its_page_array, - (its_mod->its_num_pages+1) * sizeof(void *), - GFP_KERNEL); - if (!tmp) - return NULL; + if (its_mod) + pages = &its_mod->arch.its_pages; +#endif - its_mod->its_page_array = tmp; - its_mod->its_page_array[its_mod->its_num_pages++] = page; + page = __its_alloc(pages); + if (!page) + return NULL; - execmem_make_temp_rw(page, PAGE_SIZE); - } -#endif /* CONFIG_MODULES */ + execmem_make_temp_rw(page, PAGE_SIZE); + if (pages == &its_pages) + set_memory_x((unsigned long)page, 1); - return no_free_ptr(page); + return page; } static void *its_allocate_thunk(int reg) @@ -268,7 +295,9 @@ u8 *its_static_thunk(int reg) return thunk; } -#endif +#else +static inline void its_fini_core(void) {} +#endif /* CONFIG_MITIGATION_ITS */ /* * Nomenclature for variable names to simplify and clarify this code and ease @@ -2338,6 +2367,8 @@ void __init alternative_instructions(void) apply_retpolines(__retpoline_sites, __retpoline_sites_end); apply_returns(__return_sites, __return_sites_end); + its_fini_core(); + /* * Adjust all CALL instructions to point to func()-10, including * those in .altinstr_replacement. @@ -3107,6 +3138,6 @@ void __ref smp_text_poke_batch_add(void *addr, const void *opcode, size_t len, c */ void __ref smp_text_poke_single(void *addr, const void *opcode, size_t len, const void *emulate) { - __smp_text_poke_batch_add(addr, opcode, len, emulate); + smp_text_poke_batch_add(addr, opcode, len, emulate); smp_text_poke_batch_finish(); } diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index 93da466dfe2c..b2ad8d13211a 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c @@ -31,7 +31,7 @@ #include "cpu.h" -u16 invlpgb_count_max __ro_after_init; +u16 invlpgb_count_max __ro_after_init = 1; static inline int rdmsrq_amd_safe(unsigned msr, u64 *p) { diff --git a/arch/x86/kernel/cpu/resctrl/core.c b/arch/x86/kernel/cpu/resctrl/core.c index 7109cbfcad4f..187d527ef73b 100644 --- a/arch/x86/kernel/cpu/resctrl/core.c +++ b/arch/x86/kernel/cpu/resctrl/core.c @@ -498,6 +498,7 @@ static void domain_add_cpu_mon(int cpu, struct rdt_resource *r) struct rdt_hw_mon_domain *hw_dom; struct rdt_domain_hdr *hdr; struct rdt_mon_domain *d; + struct cacheinfo *ci; int err; lockdep_assert_held(&domain_list_lock); @@ -525,12 +526,13 @@ static void domain_add_cpu_mon(int cpu, struct rdt_resource *r) d = &hw_dom->d_resctrl; d->hdr.id = id; d->hdr.type = RESCTRL_MON_DOMAIN; - d->ci = get_cpu_cacheinfo_level(cpu, RESCTRL_L3_CACHE); - if (!d->ci) { + ci = get_cpu_cacheinfo_level(cpu, RESCTRL_L3_CACHE); + if (!ci) { pr_warn_once("Can't find L3 cache for CPU:%d resource %s\n", cpu, r->name); mon_domain_free(hw_dom); return; } + d->ci_id = ci->id; cpumask_set_cpu(cpu, &d->hdr.cpu_mask); arch_mon_domain_online(r, d); diff --git a/arch/x86/kernel/signal_32.c b/arch/x86/kernel/signal_32.c index 98123ff10506..42bbc42bd350 100644 --- a/arch/x86/kernel/signal_32.c +++ b/arch/x86/kernel/signal_32.c @@ -152,6 +152,8 @@ SYSCALL32_DEFINE0(sigreturn) struct sigframe_ia32 __user *frame = (struct sigframe_ia32 __user *)(regs->sp-8); sigset_t set; + prevent_single_step_upon_eretu(regs); + if (!access_ok(frame, sizeof(*frame))) goto badframe; if (__get_user(set.sig[0], &frame->sc.oldmask) @@ -175,6 +177,8 @@ SYSCALL32_DEFINE0(rt_sigreturn) struct rt_sigframe_ia32 __user *frame; sigset_t set; + prevent_single_step_upon_eretu(regs); + frame = (struct rt_sigframe_ia32 __user *)(regs->sp - 4); if (!access_ok(frame, sizeof(*frame))) diff --git a/arch/x86/kernel/signal_64.c b/arch/x86/kernel/signal_64.c index ee9453891901..d483b585c6c6 100644 --- a/arch/x86/kernel/signal_64.c +++ b/arch/x86/kernel/signal_64.c @@ -250,6 +250,8 @@ SYSCALL_DEFINE0(rt_sigreturn) sigset_t set; unsigned long uc_flags; + prevent_single_step_upon_eretu(regs); + frame = (struct rt_sigframe __user *)(regs->sp - sizeof(long)); if (!access_ok(frame, sizeof(*frame))) goto badframe; @@ -366,6 +368,8 @@ COMPAT_SYSCALL_DEFINE0(x32_rt_sigreturn) sigset_t set; unsigned long uc_flags; + prevent_single_step_upon_eretu(regs); + frame = (struct rt_sigframe_x32 __user *)(regs->sp - 8); if (!access_ok(frame, sizeof(*frame))) diff --git a/arch/x86/kernel/smp.c b/arch/x86/kernel/smp.c index 18266cc3d98c..b014e6d229f9 100644 --- a/arch/x86/kernel/smp.c +++ b/arch/x86/kernel/smp.c @@ -299,3 +299,27 @@ struct smp_ops smp_ops = { .send_call_func_single_ipi = native_send_call_func_single_ipi, }; EXPORT_SYMBOL_GPL(smp_ops); + +int arch_cpu_rescan_dead_smt_siblings(void) +{ + enum cpuhp_smt_control old = cpu_smt_control; + int ret; + + /* + * If SMT has been disabled and SMT siblings are in HLT, bring them back + * online and offline them again so that they end up in MWAIT proper. + * + * Called with hotplug enabled. + */ + if (old != CPU_SMT_DISABLED && old != CPU_SMT_FORCE_DISABLED) + return 0; + + ret = cpuhp_smt_enable(); + if (ret) + return ret; + + ret = cpuhp_smt_disable(old); + + return ret; +} +EXPORT_SYMBOL_GPL(arch_cpu_rescan_dead_smt_siblings); diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index fc78c2325fd2..58ede3fa6a75 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -1244,6 +1244,10 @@ void play_dead_common(void) local_irq_disable(); } +/* + * We need to flush the caches before going to sleep, lest we have + * dirty data in our caches when we come back up. + */ void __noreturn mwait_play_dead(unsigned int eax_hint) { struct mwait_cpu_dead *md = this_cpu_ptr(&mwait_cpu_dead); @@ -1290,50 +1294,6 @@ void __noreturn mwait_play_dead(unsigned int eax_hint) } /* - * We need to flush the caches before going to sleep, lest we have - * dirty data in our caches when we come back up. - */ -static inline void mwait_play_dead_cpuid_hint(void) -{ - unsigned int eax, ebx, ecx, edx; - unsigned int highest_cstate = 0; - unsigned int highest_subcstate = 0; - int i; - - if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD || - boot_cpu_data.x86_vendor == X86_VENDOR_HYGON) - return; - if (!this_cpu_has(X86_FEATURE_MWAIT)) - return; - if (!this_cpu_has(X86_FEATURE_CLFLUSH)) - return; - - eax = CPUID_LEAF_MWAIT; - ecx = 0; - native_cpuid(&eax, &ebx, &ecx, &edx); - - /* - * eax will be 0 if EDX enumeration is not valid. - * Initialized below to cstate, sub_cstate value when EDX is valid. - */ - if (!(ecx & CPUID5_ECX_EXTENSIONS_SUPPORTED)) { - eax = 0; - } else { - edx >>= MWAIT_SUBSTATE_SIZE; - for (i = 0; i < 7 && edx; i++, edx >>= MWAIT_SUBSTATE_SIZE) { - if (edx & MWAIT_SUBSTATE_MASK) { - highest_cstate = i; - highest_subcstate = edx & MWAIT_SUBSTATE_MASK; - } - } - eax = (highest_cstate << MWAIT_SUBSTATE_SIZE) | - (highest_subcstate - 1); - } - - mwait_play_dead(eax); -} - -/* * Kick all "offline" CPUs out of mwait on kexec(). See comment in * mwait_play_dead(). */ @@ -1383,9 +1343,9 @@ void native_play_dead(void) play_dead_common(); tboot_shutdown(TB_SHUTDOWN_WFS); - mwait_play_dead_cpuid_hint(); - if (cpuidle_play_dead()) - hlt_play_dead(); + /* Below returns only on error. */ + cpuidle_play_dead(); + hlt_play_dead(); } #else /* ... !CONFIG_HOTPLUG_CPU */ diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c index cbc84c6abc2e..4e06e2e89a8f 100644 --- a/arch/x86/kvm/mmu/mmu.c +++ b/arch/x86/kvm/mmu/mmu.c @@ -4896,12 +4896,16 @@ long kvm_arch_vcpu_pre_fault_memory(struct kvm_vcpu *vcpu, { u64 error_code = PFERR_GUEST_FINAL_MASK; u8 level = PG_LEVEL_4K; + u64 direct_bits; u64 end; int r; if (!vcpu->kvm->arch.pre_fault_allowed) return -EOPNOTSUPP; + if (kvm_is_gfn_alias(vcpu->kvm, gpa_to_gfn(range->gpa))) + return -EINVAL; + /* * reload is efficient when called repeatedly, so we can do it on * every iteration. @@ -4910,15 +4914,18 @@ long kvm_arch_vcpu_pre_fault_memory(struct kvm_vcpu *vcpu, if (r) return r; + direct_bits = 0; if (kvm_arch_has_private_mem(vcpu->kvm) && kvm_mem_is_private(vcpu->kvm, gpa_to_gfn(range->gpa))) error_code |= PFERR_PRIVATE_ACCESS; + else + direct_bits = gfn_to_gpa(kvm_gfn_direct_bits(vcpu->kvm)); /* * Shadow paging uses GVA for kvm page fault, so restrict to * two-dimensional paging. */ - r = kvm_tdp_map_page(vcpu, range->gpa, error_code, &level); + r = kvm_tdp_map_page(vcpu, range->gpa | direct_bits, error_code, &level); if (r < 0) return r; diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c index 5a69b657dae9..459c3b791fd4 100644 --- a/arch/x86/kvm/svm/sev.c +++ b/arch/x86/kvm/svm/sev.c @@ -2871,6 +2871,33 @@ void __init sev_set_cpu_caps(void) } } +static bool is_sev_snp_initialized(void) +{ + struct sev_user_data_snp_status *status; + struct sev_data_snp_addr buf; + bool initialized = false; + int ret, error = 0; + + status = snp_alloc_firmware_page(GFP_KERNEL | __GFP_ZERO); + if (!status) + return false; + + buf.address = __psp_pa(status); + ret = sev_do_cmd(SEV_CMD_SNP_PLATFORM_STATUS, &buf, &error); + if (ret) { + pr_err("SEV: SNP_PLATFORM_STATUS failed ret=%d, fw_error=%d (%#x)\n", + ret, error, error); + goto out; + } + + initialized = !!status->state; + +out: + snp_free_firmware_page(status); + + return initialized; +} + void __init sev_hardware_setup(void) { unsigned int eax, ebx, ecx, edx, sev_asid_count, sev_es_asid_count; @@ -2975,6 +3002,14 @@ void __init sev_hardware_setup(void) sev_snp_supported = sev_snp_enabled && cc_platform_has(CC_ATTR_HOST_SEV_SNP); out: + if (sev_enabled) { + init_args.probe = true; + if (sev_platform_init(&init_args)) + sev_supported = sev_es_supported = sev_snp_supported = false; + else if (sev_snp_supported) + sev_snp_supported = is_sev_snp_initialized(); + } + if (boot_cpu_has(X86_FEATURE_SEV)) pr_info("SEV %s (ASIDs %u - %u)\n", sev_supported ? min_sev_asid <= max_sev_asid ? "enabled" : @@ -3001,15 +3036,6 @@ out: sev_supported_vmsa_features = 0; if (sev_es_debug_swap_enabled) sev_supported_vmsa_features |= SVM_SEV_FEAT_DEBUG_SWAP; - - if (!sev_enabled) - return; - - /* - * Do both SNP and SEV initialization at KVM module load. - */ - init_args.probe = true; - sev_platform_init(&init_args); } void sev_hardware_unsetup(void) diff --git a/arch/x86/kvm/vmx/tdx.c b/arch/x86/kvm/vmx/tdx.c index b952bc673271..1ad20c273f3b 100644 --- a/arch/x86/kvm/vmx/tdx.c +++ b/arch/x86/kvm/vmx/tdx.c @@ -1212,11 +1212,13 @@ static int tdx_map_gpa(struct kvm_vcpu *vcpu) /* * Converting TDVMCALL_MAP_GPA to KVM_HC_MAP_GPA_RANGE requires * userspace to enable KVM_CAP_EXIT_HYPERCALL with KVM_HC_MAP_GPA_RANGE - * bit set. If not, the error code is not defined in GHCI for TDX, use - * TDVMCALL_STATUS_INVALID_OPERAND for this case. + * bit set. This is a base call so it should always be supported, but + * KVM has no way to ensure that userspace implements the GHCI correctly. + * So if KVM_HC_MAP_GPA_RANGE does not cause a VMEXIT, return an error + * to the guest. */ if (!user_exit_on_hypercall(vcpu->kvm, KVM_HC_MAP_GPA_RANGE)) { - ret = TDVMCALL_STATUS_INVALID_OPERAND; + ret = TDVMCALL_STATUS_SUBFUNC_UNSUPPORTED; goto error; } @@ -1449,20 +1451,85 @@ error: return 1; } +static int tdx_complete_get_td_vm_call_info(struct kvm_vcpu *vcpu) +{ + struct vcpu_tdx *tdx = to_tdx(vcpu); + + tdvmcall_set_return_code(vcpu, vcpu->run->tdx.get_tdvmcall_info.ret); + + /* + * For now, there is no TDVMCALL beyond GHCI base API supported by KVM + * directly without the support from userspace, just set the value + * returned from userspace. + */ + tdx->vp_enter_args.r11 = vcpu->run->tdx.get_tdvmcall_info.r11; + tdx->vp_enter_args.r12 = vcpu->run->tdx.get_tdvmcall_info.r12; + tdx->vp_enter_args.r13 = vcpu->run->tdx.get_tdvmcall_info.r13; + tdx->vp_enter_args.r14 = vcpu->run->tdx.get_tdvmcall_info.r14; + + return 1; +} + static int tdx_get_td_vm_call_info(struct kvm_vcpu *vcpu) { struct vcpu_tdx *tdx = to_tdx(vcpu); - if (tdx->vp_enter_args.r12) - tdvmcall_set_return_code(vcpu, TDVMCALL_STATUS_INVALID_OPERAND); - else { + switch (tdx->vp_enter_args.r12) { + case 0: tdx->vp_enter_args.r11 = 0; + tdx->vp_enter_args.r12 = 0; tdx->vp_enter_args.r13 = 0; tdx->vp_enter_args.r14 = 0; + tdvmcall_set_return_code(vcpu, TDVMCALL_STATUS_SUCCESS); + return 1; + case 1: + vcpu->run->tdx.get_tdvmcall_info.leaf = tdx->vp_enter_args.r12; + vcpu->run->exit_reason = KVM_EXIT_TDX; + vcpu->run->tdx.flags = 0; + vcpu->run->tdx.nr = TDVMCALL_GET_TD_VM_CALL_INFO; + vcpu->run->tdx.get_tdvmcall_info.ret = TDVMCALL_STATUS_SUCCESS; + vcpu->run->tdx.get_tdvmcall_info.r11 = 0; + vcpu->run->tdx.get_tdvmcall_info.r12 = 0; + vcpu->run->tdx.get_tdvmcall_info.r13 = 0; + vcpu->run->tdx.get_tdvmcall_info.r14 = 0; + vcpu->arch.complete_userspace_io = tdx_complete_get_td_vm_call_info; + return 0; + default: + tdvmcall_set_return_code(vcpu, TDVMCALL_STATUS_INVALID_OPERAND); + return 1; } +} + +static int tdx_complete_simple(struct kvm_vcpu *vcpu) +{ + tdvmcall_set_return_code(vcpu, vcpu->run->tdx.unknown.ret); return 1; } +static int tdx_get_quote(struct kvm_vcpu *vcpu) +{ + struct vcpu_tdx *tdx = to_tdx(vcpu); + u64 gpa = tdx->vp_enter_args.r12; + u64 size = tdx->vp_enter_args.r13; + + /* The gpa of buffer must have shared bit set. */ + if (vt_is_tdx_private_gpa(vcpu->kvm, gpa)) { + tdvmcall_set_return_code(vcpu, TDVMCALL_STATUS_INVALID_OPERAND); + return 1; + } + + vcpu->run->exit_reason = KVM_EXIT_TDX; + vcpu->run->tdx.flags = 0; + vcpu->run->tdx.nr = TDVMCALL_GET_QUOTE; + vcpu->run->tdx.get_quote.ret = TDVMCALL_STATUS_SUBFUNC_UNSUPPORTED; + vcpu->run->tdx.get_quote.gpa = gpa & ~gfn_to_gpa(kvm_gfn_direct_bits(tdx->vcpu.kvm)); + vcpu->run->tdx.get_quote.size = size; + + vcpu->arch.complete_userspace_io = tdx_complete_simple; + + return 0; +} + static int handle_tdvmcall(struct kvm_vcpu *vcpu) { switch (tdvmcall_leaf(vcpu)) { @@ -1472,11 +1539,13 @@ static int handle_tdvmcall(struct kvm_vcpu *vcpu) return tdx_report_fatal_error(vcpu); case TDVMCALL_GET_TD_VM_CALL_INFO: return tdx_get_td_vm_call_info(vcpu); + case TDVMCALL_GET_QUOTE: + return tdx_get_quote(vcpu); default: break; } - tdvmcall_set_return_code(vcpu, TDVMCALL_STATUS_INVALID_OPERAND); + tdvmcall_set_return_code(vcpu, TDVMCALL_STATUS_SUBFUNC_UNSUPPORTED); return 1; } diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c index 607d6a2e66e2..8a34fff6ab2b 100644 --- a/arch/x86/mm/init_32.c +++ b/arch/x86/mm/init_32.c @@ -30,7 +30,6 @@ #include <linux/initrd.h> #include <linux/cpumask.h> #include <linux/gfp.h> -#include <linux/execmem.h> #include <asm/asm.h> #include <asm/bios_ebda.h> @@ -749,8 +748,6 @@ void mark_rodata_ro(void) pr_info("Write protecting kernel text and read-only data: %luk\n", size >> 10); - execmem_cache_make_ro(); - kernel_set_to_readonly = 1; #ifdef CONFIG_CPA_DEBUG diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c index ee66fae9ebcc..fdb6cab524f0 100644 --- a/arch/x86/mm/init_64.c +++ b/arch/x86/mm/init_64.c @@ -34,7 +34,6 @@ #include <linux/gfp.h> #include <linux/kcore.h> #include <linux/bootmem_info.h> -#include <linux/execmem.h> #include <asm/processor.h> #include <asm/bios_ebda.h> @@ -1392,8 +1391,6 @@ void mark_rodata_ro(void) (end - start) >> 10); set_memory_ro(start, (end - start) >> PAGE_SHIFT); - execmem_cache_make_ro(); - kernel_set_to_readonly = 1; /* diff --git a/arch/x86/mm/pat/set_memory.c b/arch/x86/mm/pat/set_memory.c index 46edc11726b7..8834c76f91c9 100644 --- a/arch/x86/mm/pat/set_memory.c +++ b/arch/x86/mm/pat/set_memory.c @@ -1257,6 +1257,9 @@ static int collapse_pmd_page(pmd_t *pmd, unsigned long addr, pgprot_t pgprot; int i = 0; + if (!cpu_feature_enabled(X86_FEATURE_PSE)) + return 0; + addr &= PMD_MASK; pte = pte_offset_kernel(pmd, addr); first = *pte; diff --git a/arch/x86/mm/pti.c b/arch/x86/mm/pti.c index 190299834011..c0c40b67524e 100644 --- a/arch/x86/mm/pti.c +++ b/arch/x86/mm/pti.c @@ -98,6 +98,11 @@ void __init pti_check_boottime_disable(void) return; setup_force_cpu_cap(X86_FEATURE_PTI); + + if (cpu_feature_enabled(X86_FEATURE_INVLPGB)) { + pr_debug("PTI enabled, disabling INVLPGB\n"); + setup_clear_cpu_cap(X86_FEATURE_INVLPGB); + } } static int __init pti_parse_cmdline(char *arg) diff --git a/arch/x86/power/hibernate.c b/arch/x86/power/hibernate.c index a7c23f2a58c9..a2294c1649f6 100644 --- a/arch/x86/power/hibernate.c +++ b/arch/x86/power/hibernate.c @@ -192,7 +192,8 @@ out: int arch_resume_nosmt(void) { - int ret = 0; + int ret; + /* * We reached this while coming out of hibernation. This means * that SMT siblings are sleeping in hlt, as mwait is not safe @@ -206,18 +207,10 @@ int arch_resume_nosmt(void) * Called with hotplug disabled. */ cpu_hotplug_enable(); - if (cpu_smt_control == CPU_SMT_DISABLED || - cpu_smt_control == CPU_SMT_FORCE_DISABLED) { - enum cpuhp_smt_control old = cpu_smt_control; - - ret = cpuhp_smt_enable(); - if (ret) - goto out; - ret = cpuhp_smt_disable(old); - if (ret) - goto out; - } -out: + + ret = arch_cpu_rescan_dead_smt_siblings(); + cpu_hotplug_disable(); + return ret; } diff --git a/arch/x86/um/ptrace.c b/arch/x86/um/ptrace.c index 3275870330fe..fae8aabad10f 100644 --- a/arch/x86/um/ptrace.c +++ b/arch/x86/um/ptrace.c @@ -161,7 +161,7 @@ static int fpregs_legacy_set(struct task_struct *target, from = kbuf; } - return um_fxsr_from_i387(fxsave, &buf); + return um_fxsr_from_i387(fxsave, from); } #endif diff --git a/arch/x86/virt/vmx/tdx/tdx.c b/arch/x86/virt/vmx/tdx/tdx.c index 2457d13c3f9e..c7a9a087ccaf 100644 --- a/arch/x86/virt/vmx/tdx/tdx.c +++ b/arch/x86/virt/vmx/tdx/tdx.c @@ -75,8 +75,9 @@ static inline void seamcall_err_ret(u64 fn, u64 err, args->r9, args->r10, args->r11); } -static inline int sc_retry_prerr(sc_func_t func, sc_err_func_t err_func, - u64 fn, struct tdx_module_args *args) +static __always_inline int sc_retry_prerr(sc_func_t func, + sc_err_func_t err_func, + u64 fn, struct tdx_module_args *args) { u64 sret = sc_retry(func, fn, args); diff --git a/arch/xtensa/include/asm/pgtable.h b/arch/xtensa/include/asm/pgtable.h index cb1725c40e36..d62aa1c316fc 100644 --- a/arch/xtensa/include/asm/pgtable.h +++ b/arch/xtensa/include/asm/pgtable.h @@ -349,7 +349,7 @@ ptep_set_wrprotect(struct mm_struct *mm, unsigned long addr, pte_t *ptep) #define __pte_to_swp_entry(pte) ((swp_entry_t) { pte_val(pte) }) #define __swp_entry_to_pte(x) ((pte_t) { (x).val }) -static inline int pte_swp_exclusive(pte_t pte) +static inline bool pte_swp_exclusive(pte_t pte) { return pte_val(pte) & _PAGE_SWP_EXCLUSIVE; } diff --git a/block/blk-merge.c b/block/blk-merge.c index 3af1d284add5..70d704615be5 100644 --- a/block/blk-merge.c +++ b/block/blk-merge.c @@ -998,20 +998,20 @@ bool blk_attempt_plug_merge(struct request_queue *q, struct bio *bio, if (!plug || rq_list_empty(&plug->mq_list)) return false; - rq_list_for_each(&plug->mq_list, rq) { - if (rq->q == q) { - if (blk_attempt_bio_merge(q, rq, bio, nr_segs, false) == - BIO_MERGE_OK) - return true; - break; - } + rq = plug->mq_list.tail; + if (rq->q == q) + return blk_attempt_bio_merge(q, rq, bio, nr_segs, false) == + BIO_MERGE_OK; + else if (!plug->multiple_queues) + return false; - /* - * Only keep iterating plug list for merges if we have multiple - * queues - */ - if (!plug->multiple_queues) - break; + rq_list_for_each(&plug->mq_list, rq) { + if (rq->q != q) + continue; + if (blk_attempt_bio_merge(q, rq, bio, nr_segs, false) == + BIO_MERGE_OK) + return true; + break; } return false; } diff --git a/block/blk-zoned.c b/block/blk-zoned.c index 8f15d1aa6eb8..351d659280e1 100644 --- a/block/blk-zoned.c +++ b/block/blk-zoned.c @@ -1225,6 +1225,7 @@ void blk_zone_write_plug_bio_endio(struct bio *bio) if (bio_flagged(bio, BIO_EMULATES_ZONE_APPEND)) { bio->bi_opf &= ~REQ_OP_MASK; bio->bi_opf |= REQ_OP_ZONE_APPEND; + bio_clear_flag(bio, BIO_EMULATES_ZONE_APPEND); } /* @@ -1306,7 +1307,6 @@ again: spin_unlock_irqrestore(&zwplug->lock, flags); bdev = bio->bi_bdev; - submit_bio_noacct_nocheck(bio); /* * blk-mq devices will reuse the extra reference on the request queue @@ -1314,8 +1314,12 @@ again: * path for BIO-based devices will not do that. So drop this extra * reference here. */ - if (bdev_test_flag(bdev, BD_HAS_SUBMIT_BIO)) + if (bdev_test_flag(bdev, BD_HAS_SUBMIT_BIO)) { + bdev->bd_disk->fops->submit_bio(bio); blk_queue_exit(bdev->bd_disk->queue); + } else { + blk_mq_submit_bio(bio); + } put_zwplug: /* Drop the reference we took in disk_zone_wplug_schedule_bio_work(). */ diff --git a/crypto/Kconfig b/crypto/Kconfig index e9fee7818e27..e1cfd0d4cc8f 100644 --- a/crypto/Kconfig +++ b/crypto/Kconfig @@ -176,16 +176,33 @@ config CRYPTO_USER config CRYPTO_SELFTESTS bool "Enable cryptographic self-tests" - depends on DEBUG_KERNEL + depends on EXPERT help Enable the cryptographic self-tests. The cryptographic self-tests run at boot time, or at algorithm registration time if algorithms are dynamically loaded later. - This is primarily intended for developer use. It should not be - enabled in production kernels, unless you are trying to use these - tests to fulfill a FIPS testing requirement. + There are two main use cases for these tests: + + - Development and pre-release testing. In this case, also enable + CRYPTO_SELFTESTS_FULL to get the full set of tests. All crypto code + in the kernel is expected to pass the full set of tests. + + - Production kernels, to help prevent buggy drivers from being used + and/or meet FIPS 140-3 pre-operational testing requirements. In + this case, enable CRYPTO_SELFTESTS but not CRYPTO_SELFTESTS_FULL. + +config CRYPTO_SELFTESTS_FULL + bool "Enable the full set of cryptographic self-tests" + depends on CRYPTO_SELFTESTS + help + Enable the full set of cryptographic self-tests for each algorithm. + + The full set of tests should be enabled for development and + pre-release testing, but not in production kernels. + + All crypto code in the kernel is expected to pass the full tests. config CRYPTO_NULL tristate "Null algorithms" diff --git a/crypto/ahash.c b/crypto/ahash.c index e10bc2659ae4..bc84a07c924c 100644 --- a/crypto/ahash.c +++ b/crypto/ahash.c @@ -600,12 +600,14 @@ static void ahash_def_finup_done2(void *data, int err) static int ahash_def_finup_finish1(struct ahash_request *req, int err) { + struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); + if (err) goto out; req->base.complete = ahash_def_finup_done2; - err = crypto_ahash_final(req); + err = crypto_ahash_alg(tfm)->final(req); if (err == -EINPROGRESS || err == -EBUSY) return err; diff --git a/crypto/hkdf.c b/crypto/hkdf.c index f24c2a8d4df9..82d1b32ca6ce 100644 --- a/crypto/hkdf.c +++ b/crypto/hkdf.c @@ -566,7 +566,7 @@ static int __init crypto_hkdf_module_init(void) static void __exit crypto_hkdf_module_exit(void) {} -module_init(crypto_hkdf_module_init); +late_initcall(crypto_hkdf_module_init); module_exit(crypto_hkdf_module_exit); MODULE_LICENSE("GPL"); diff --git a/crypto/testmgr.c b/crypto/testmgr.c index 72005074a5c2..32f753d6c430 100644 --- a/crypto/testmgr.c +++ b/crypto/testmgr.c @@ -45,6 +45,7 @@ static bool notests; module_param(notests, bool, 0644); MODULE_PARM_DESC(notests, "disable all crypto self-tests"); +#ifdef CONFIG_CRYPTO_SELFTESTS_FULL static bool noslowtests; module_param(noslowtests, bool, 0644); MODULE_PARM_DESC(noslowtests, "disable slow crypto self-tests"); @@ -52,6 +53,10 @@ MODULE_PARM_DESC(noslowtests, "disable slow crypto self-tests"); static unsigned int fuzz_iterations = 100; module_param(fuzz_iterations, uint, 0644); MODULE_PARM_DESC(fuzz_iterations, "number of fuzz test iterations"); +#else +#define noslowtests 1 +#define fuzz_iterations 0 +#endif #ifndef CONFIG_CRYPTO_SELFTESTS @@ -319,9 +324,9 @@ struct testvec_config { /* * The following are the lists of testvec_configs to test for each algorithm - * type when the fast crypto self-tests are enabled. They aim to provide good - * test coverage, while keeping the test time much shorter than the full tests - * so that the fast tests can be used to fulfill FIPS 140 testing requirements. + * type when the "fast" crypto self-tests are enabled. They aim to provide good + * test coverage, while keeping the test time much shorter than the "full" tests + * so that the "fast" tests can be enabled in a wider range of circumstances. */ /* Configs for skciphers and aeads */ @@ -1183,14 +1188,18 @@ static void generate_random_testvec_config(struct rnd_state *rng, static void crypto_disable_simd_for_test(void) { +#ifdef CONFIG_CRYPTO_SELFTESTS_FULL migrate_disable(); __this_cpu_write(crypto_simd_disabled_for_test, true); +#endif } static void crypto_reenable_simd_for_test(void) { +#ifdef CONFIG_CRYPTO_SELFTESTS_FULL __this_cpu_write(crypto_simd_disabled_for_test, false); migrate_enable(); +#endif } /* diff --git a/drivers/accel/amdxdna/aie2_psp.c b/drivers/accel/amdxdna/aie2_psp.c index dc3a072ce3b6..f28a060a8810 100644 --- a/drivers/accel/amdxdna/aie2_psp.c +++ b/drivers/accel/amdxdna/aie2_psp.c @@ -126,8 +126,8 @@ struct psp_device *aie2m_psp_create(struct drm_device *ddev, struct psp_config * psp->ddev = ddev; memcpy(psp->psp_regs, conf->psp_regs, sizeof(psp->psp_regs)); - psp->fw_buf_sz = ALIGN(conf->fw_size, PSP_FW_ALIGN) + PSP_FW_ALIGN; - psp->fw_buffer = drmm_kmalloc(ddev, psp->fw_buf_sz, GFP_KERNEL); + psp->fw_buf_sz = ALIGN(conf->fw_size, PSP_FW_ALIGN); + psp->fw_buffer = drmm_kmalloc(ddev, psp->fw_buf_sz + PSP_FW_ALIGN, GFP_KERNEL); if (!psp->fw_buffer) { drm_err(ddev, "no memory for fw buffer"); return NULL; diff --git a/drivers/acpi/acpi_pad.c b/drivers/acpi/acpi_pad.c index 6f8bbe1247a5..c9a0bcaba2e4 100644 --- a/drivers/acpi/acpi_pad.c +++ b/drivers/acpi/acpi_pad.c @@ -33,7 +33,7 @@ static DEFINE_MUTEX(isolated_cpus_lock); static DEFINE_MUTEX(round_robin_lock); -static unsigned long power_saving_mwait_eax; +static unsigned int power_saving_mwait_eax; static unsigned char tsc_detected_unstable; static unsigned char tsc_marked_unstable; diff --git a/drivers/acpi/acpica/dsmethod.c b/drivers/acpi/acpica/dsmethod.c index c8f37f4e6626..fef6fb29ece4 100644 --- a/drivers/acpi/acpica/dsmethod.c +++ b/drivers/acpi/acpica/dsmethod.c @@ -483,6 +483,13 @@ acpi_ds_call_control_method(struct acpi_thread_state *thread, return_ACPI_STATUS(AE_NULL_OBJECT); } + if (this_walk_state->num_operands < obj_desc->method.param_count) { + ACPI_ERROR((AE_INFO, "Missing argument for method [%4.4s]", + acpi_ut_get_node_name(method_node))); + + return_ACPI_STATUS(AE_AML_UNINITIALIZED_ARG); + } + /* Init for new method, possibly wait on method mutex */ status = diff --git a/drivers/acpi/apei/einj-core.c b/drivers/acpi/apei/einj-core.c index fea11a35eea3..9b041415a9d0 100644 --- a/drivers/acpi/apei/einj-core.c +++ b/drivers/acpi/apei/einj-core.c @@ -883,19 +883,16 @@ static int __init einj_init(void) } einj_dev = faux_device_create("acpi-einj", NULL, &einj_device_ops); - if (!einj_dev) - return -ENODEV; - einj_initialized = true; + if (einj_dev) + einj_initialized = true; return 0; } static void __exit einj_exit(void) { - if (einj_initialized) - faux_device_destroy(einj_dev); - + faux_device_destroy(einj_dev); } module_init(einj_init); diff --git a/drivers/acpi/cppc_acpi.c b/drivers/acpi/cppc_acpi.c index a9ae2fd62863..6b649031808f 100644 --- a/drivers/acpi/cppc_acpi.c +++ b/drivers/acpi/cppc_acpi.c @@ -476,7 +476,7 @@ bool cppc_allow_fast_switch(void) struct cpc_desc *cpc_ptr; int cpu; - for_each_possible_cpu(cpu) { + for_each_present_cpu(cpu) { cpc_ptr = per_cpu(cpc_desc_ptr, cpu); desired_reg = &cpc_ptr->cpc_regs[DESIRED_PERF]; if (!CPC_IN_SYSTEM_MEMORY(desired_reg) && diff --git a/drivers/acpi/ec.c b/drivers/acpi/ec.c index 6f4203716b53..75c7db8b156a 100644 --- a/drivers/acpi/ec.c +++ b/drivers/acpi/ec.c @@ -23,8 +23,10 @@ #include <linux/delay.h> #include <linux/interrupt.h> #include <linux/list.h> +#include <linux/printk.h> #include <linux/spinlock.h> #include <linux/slab.h> +#include <linux/string.h> #include <linux/suspend.h> #include <linux/acpi.h> #include <linux/dmi.h> @@ -2031,6 +2033,21 @@ void __init acpi_ec_ecdt_probe(void) goto out; } + if (!strstarts(ecdt_ptr->id, "\\")) { + /* + * The ECDT table on some MSI notebooks contains invalid data, together + * with an empty ID string (""). + * + * Section 5.2.15 of the ACPI specification requires the ID string to be + * a "fully qualified reference to the (...) embedded controller device", + * so this string always has to start with a backslash. + * + * By verifying this we can avoid such faulty ECDT tables in a safe way. + */ + pr_err(FW_BUG "Ignoring ECDT due to invalid ID string \"%s\"\n", ecdt_ptr->id); + goto out; + } + ec = acpi_ec_alloc(); if (!ec) goto out; diff --git a/drivers/acpi/internal.h b/drivers/acpi/internal.h index 00910ccd7eda..e2781864fdce 100644 --- a/drivers/acpi/internal.h +++ b/drivers/acpi/internal.h @@ -175,6 +175,12 @@ bool processor_physically_present(acpi_handle handle); static inline void acpi_early_processor_control_setup(void) {} #endif +#ifdef CONFIG_ACPI_PROCESSOR_CSTATE +void acpi_idle_rescan_dead_smt_siblings(void); +#else +static inline void acpi_idle_rescan_dead_smt_siblings(void) {} +#endif + /* -------------------------------------------------------------------------- Embedded Controller -------------------------------------------------------------------------- */ diff --git a/drivers/acpi/processor_driver.c b/drivers/acpi/processor_driver.c index 3b281bc1e73c..65e779be64ff 100644 --- a/drivers/acpi/processor_driver.c +++ b/drivers/acpi/processor_driver.c @@ -279,6 +279,9 @@ static int __init acpi_processor_driver_init(void) * after acpi_cppc_processor_probe() has been called for all online CPUs */ acpi_processor_init_invariance_cppc(); + + acpi_idle_rescan_dead_smt_siblings(); + return 0; err: driver_unregister(&acpi_processor_driver); diff --git a/drivers/acpi/processor_idle.c b/drivers/acpi/processor_idle.c index e2febca2ec13..2c2dc559e0f8 100644 --- a/drivers/acpi/processor_idle.c +++ b/drivers/acpi/processor_idle.c @@ -24,6 +24,8 @@ #include <acpi/processor.h> #include <linux/context_tracking.h> +#include "internal.h" + /* * Include the apic definitions for x86 to have the APIC timer related defines * available also for UP (on SMP it gets magically included via linux/smp.h). @@ -55,6 +57,12 @@ struct cpuidle_driver acpi_idle_driver = { }; #ifdef CONFIG_ACPI_PROCESSOR_CSTATE +void acpi_idle_rescan_dead_smt_siblings(void) +{ + if (cpuidle_get_driver() == &acpi_idle_driver) + arch_cpu_rescan_dead_smt_siblings(); +} + static DEFINE_PER_CPU(struct acpi_processor_cx * [CPUIDLE_STATE_MAX], acpi_cstate); diff --git a/drivers/acpi/resource.c b/drivers/acpi/resource.c index 7d59c6c9185f..b1ab192d7a08 100644 --- a/drivers/acpi/resource.c +++ b/drivers/acpi/resource.c @@ -667,6 +667,13 @@ static const struct dmi_system_id irq1_edge_low_force_override[] = { }, }, { + /* MACHENIKE L16P/L16P */ + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "MACHENIKE"), + DMI_MATCH(DMI_BOARD_NAME, "L16P"), + }, + }, + { /* * TongFang GM5HG0A in case of the SKIKK Vanaheim relabel the * board-name is changed, so check OEM strings instead. Note diff --git a/drivers/ata/ahci.c b/drivers/ata/ahci.c index 163ac909bd06..e5e5c2e81d09 100644 --- a/drivers/ata/ahci.c +++ b/drivers/ata/ahci.c @@ -1410,8 +1410,15 @@ static bool ahci_broken_suspend(struct pci_dev *pdev) static bool ahci_broken_lpm(struct pci_dev *pdev) { + /* + * Platforms with LPM problems. + * If driver_data is NULL, there is no existing BIOS version with + * functioning LPM. + * If driver_data is non-NULL, then driver_data contains the DMI BIOS + * build date of the first BIOS version with functioning LPM (i.e. older + * BIOS versions have broken LPM). + */ static const struct dmi_system_id sysids[] = { - /* Various Lenovo 50 series have LPM issues with older BIOSen */ { .matches = { DMI_MATCH(DMI_SYS_VENDOR, "LENOVO"), @@ -1438,13 +1445,30 @@ static bool ahci_broken_lpm(struct pci_dev *pdev) DMI_MATCH(DMI_SYS_VENDOR, "LENOVO"), DMI_MATCH(DMI_PRODUCT_VERSION, "ThinkPad W541"), }, + .driver_data = "20180409", /* 2.35 */ + }, + { + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "ASUSTeK COMPUTER INC."), + DMI_MATCH(DMI_PRODUCT_VERSION, "ASUSPRO D840MB_M840SA"), + }, + /* 320 is broken, there is no known good version. */ + }, + { /* - * Note date based on release notes, 2.35 has been - * reported to be good, but I've been unable to get - * a hold of the reporter to get the DMI BIOS date. - * TODO: fix this. + * AMD 500 Series Chipset SATA Controller [1022:43eb] + * on this motherboard timeouts on ports 5 and 6 when + * LPM is enabled, at least with WDC WD20EFAX-68FB5N0 + * hard drives. LPM with the same drive works fine on + * all other ports on the same controller. */ - .driver_data = "20180310", /* 2.35 */ + .matches = { + DMI_MATCH(DMI_BOARD_VENDOR, + "ASUSTeK COMPUTER INC."), + DMI_MATCH(DMI_BOARD_NAME, + "ROG STRIX B550-F GAMING (WI-FI)"), + }, + /* 3621 is broken, there is no known good version. */ }, { } /* terminate list */ }; @@ -1455,6 +1479,9 @@ static bool ahci_broken_lpm(struct pci_dev *pdev) if (!dmi) return false; + if (!dmi->driver_data) + return true; + dmi_get_date(DMI_BIOS_DATE, &year, &month, &date); snprintf(buf, sizeof(buf), "%04d%02d%02d", year, month, date); diff --git a/drivers/ata/libata-acpi.c b/drivers/ata/libata-acpi.c index b7f0bf795521..f2140fc06ba0 100644 --- a/drivers/ata/libata-acpi.c +++ b/drivers/ata/libata-acpi.c @@ -514,15 +514,19 @@ unsigned int ata_acpi_gtm_xfermask(struct ata_device *dev, EXPORT_SYMBOL_GPL(ata_acpi_gtm_xfermask); /** - * ata_acpi_cbl_80wire - Check for 80 wire cable + * ata_acpi_cbl_pata_type - Return PATA cable type * @ap: Port to check - * @gtm: GTM data to use * - * Return 1 if the @gtm indicates the BIOS selected an 80wire mode. + * Return ATA_CBL_PATA* according to the transfer mode selected by BIOS */ -int ata_acpi_cbl_80wire(struct ata_port *ap, const struct ata_acpi_gtm *gtm) +int ata_acpi_cbl_pata_type(struct ata_port *ap) { struct ata_device *dev; + int ret = ATA_CBL_PATA_UNK; + const struct ata_acpi_gtm *gtm = ata_acpi_init_gtm(ap); + + if (!gtm) + return ATA_CBL_PATA40; ata_for_each_dev(dev, &ap->link, ENABLED) { unsigned int xfer_mask, udma_mask; @@ -530,13 +534,17 @@ int ata_acpi_cbl_80wire(struct ata_port *ap, const struct ata_acpi_gtm *gtm) xfer_mask = ata_acpi_gtm_xfermask(dev, gtm); ata_unpack_xfermask(xfer_mask, NULL, NULL, &udma_mask); - if (udma_mask & ~ATA_UDMA_MASK_40C) - return 1; + ret = ATA_CBL_PATA40; + + if (udma_mask & ~ATA_UDMA_MASK_40C) { + ret = ATA_CBL_PATA80; + break; + } } - return 0; + return ret; } -EXPORT_SYMBOL_GPL(ata_acpi_cbl_80wire); +EXPORT_SYMBOL_GPL(ata_acpi_cbl_pata_type); static void ata_acpi_gtf_to_tf(struct ata_device *dev, const struct ata_acpi_gtf *gtf, diff --git a/drivers/ata/pata_cs5536.c b/drivers/ata/pata_cs5536.c index b811efd2cc34..73e81e160c91 100644 --- a/drivers/ata/pata_cs5536.c +++ b/drivers/ata/pata_cs5536.c @@ -27,7 +27,7 @@ #include <scsi/scsi_host.h> #include <linux/dmi.h> -#ifdef CONFIG_X86_32 +#if defined(CONFIG_X86) && defined(CONFIG_X86_32) #include <asm/msr.h> static int use_msr; module_param_named(msr, use_msr, int, 0644); diff --git a/drivers/ata/pata_macio.c b/drivers/ata/pata_macio.c index fbf5f07ea357..f7a933eefe05 100644 --- a/drivers/ata/pata_macio.c +++ b/drivers/ata/pata_macio.c @@ -1298,7 +1298,7 @@ static int pata_macio_pci_attach(struct pci_dev *pdev, priv->dev = &pdev->dev; /* Get MMIO regions */ - if (pci_request_regions(pdev, "pata-macio")) { + if (pcim_request_all_regions(pdev, "pata-macio")) { dev_err(&pdev->dev, "Cannot obtain PCI resources\n"); return -EBUSY; diff --git a/drivers/ata/pata_via.c b/drivers/ata/pata_via.c index 696b99720dcb..bb80e7800dcb 100644 --- a/drivers/ata/pata_via.c +++ b/drivers/ata/pata_via.c @@ -201,11 +201,9 @@ static int via_cable_detect(struct ata_port *ap) { two drives */ if (ata66 & (0x10100000 >> (16 * ap->port_no))) return ATA_CBL_PATA80; + /* Check with ACPI so we can spot BIOS reported SATA bridges */ - if (ata_acpi_init_gtm(ap) && - ata_acpi_cbl_80wire(ap, ata_acpi_init_gtm(ap))) - return ATA_CBL_PATA80; - return ATA_CBL_PATA40; + return ata_acpi_cbl_pata_type(ap); } static int via_pre_reset(struct ata_link *link, unsigned long deadline) @@ -368,7 +366,8 @@ static unsigned int via_mode_filter(struct ata_device *dev, unsigned int mask) } if (dev->class == ATA_DEV_ATAPI && - dmi_check_system(no_atapi_dma_dmi_table)) { + (dmi_check_system(no_atapi_dma_dmi_table) || + config->id == PCI_DEVICE_ID_VIA_6415)) { ata_dev_warn(dev, "controller locks up on ATAPI DMA, forcing PIO\n"); mask &= ATA_MASK_PIO; } diff --git a/drivers/atm/atmtcp.c b/drivers/atm/atmtcp.c index d4aa0f353b6c..eeae160c898d 100644 --- a/drivers/atm/atmtcp.c +++ b/drivers/atm/atmtcp.c @@ -288,7 +288,9 @@ static int atmtcp_c_send(struct atm_vcc *vcc,struct sk_buff *skb) struct sk_buff *new_skb; int result = 0; - if (!skb->len) return 0; + if (skb->len < sizeof(struct atmtcp_hdr)) + goto done; + dev = vcc->dev_data; hdr = (struct atmtcp_hdr *) skb->data; if (hdr->length == ATMTCP_HDR_MAGIC) { diff --git a/drivers/atm/idt77252.c b/drivers/atm/idt77252.c index 1206ab764ba9..f2e91b7d79f0 100644 --- a/drivers/atm/idt77252.c +++ b/drivers/atm/idt77252.c @@ -852,6 +852,8 @@ queue_skb(struct idt77252_dev *card, struct vc_map *vc, IDT77252_PRV_PADDR(skb) = dma_map_single(&card->pcidev->dev, skb->data, skb->len, DMA_TO_DEVICE); + if (dma_mapping_error(&card->pcidev->dev, IDT77252_PRV_PADDR(skb))) + return -ENOMEM; error = -EINVAL; @@ -1857,6 +1859,8 @@ add_rx_skb(struct idt77252_dev *card, int queue, paddr = dma_map_single(&card->pcidev->dev, skb->data, skb_end_pointer(skb) - skb->data, DMA_FROM_DEVICE); + if (dma_mapping_error(&card->pcidev->dev, paddr)) + goto outpoolrm; IDT77252_PRV_PADDR(skb) = paddr; if (push_rx_skb(card, skb, queue)) { @@ -1871,6 +1875,7 @@ outunmap: dma_unmap_single(&card->pcidev->dev, IDT77252_PRV_PADDR(skb), skb_end_pointer(skb) - skb->data, DMA_FROM_DEVICE); +outpoolrm: handle = IDT77252_PRV_POOL(skb); card->sbpool[POOL_QUEUE(handle)].skb[POOL_INDEX(handle)] = NULL; diff --git a/drivers/base/faux.c b/drivers/base/faux.c index 9054d346bd7f..f5fbda0a9a44 100644 --- a/drivers/base/faux.c +++ b/drivers/base/faux.c @@ -86,6 +86,7 @@ static struct device_driver faux_driver = { .name = "faux_driver", .bus = &faux_bus_type, .probe_type = PROBE_FORCE_SYNCHRONOUS, + .suppress_bind_attrs = true, }; static void faux_device_release(struct device *dev) @@ -169,7 +170,7 @@ struct faux_device *faux_device_create_with_groups(const char *name, * successful is almost impossible to determine by the caller. */ if (!dev->driver) { - dev_err(dev, "probe did not succeed, tearing down the device\n"); + dev_dbg(dev, "probe did not succeed, tearing down the device\n"); faux_device_destroy(faux_dev); faux_dev = NULL; } diff --git a/drivers/block/aoe/aoe.h b/drivers/block/aoe/aoe.h index 749ae1246f4c..d35caa3c69e1 100644 --- a/drivers/block/aoe/aoe.h +++ b/drivers/block/aoe/aoe.h @@ -80,6 +80,7 @@ enum { DEVFL_NEWSIZE = (1<<6), /* need to update dev size in block layer */ DEVFL_FREEING = (1<<7), /* set when device is being cleaned up */ DEVFL_FREED = (1<<8), /* device has been cleaned up */ + DEVFL_DEAD = (1<<9), /* device has timed out of aoe_deadsecs */ }; enum { diff --git a/drivers/block/aoe/aoecmd.c b/drivers/block/aoe/aoecmd.c index 50cc90f6ab35..6298f8e271e3 100644 --- a/drivers/block/aoe/aoecmd.c +++ b/drivers/block/aoe/aoecmd.c @@ -754,7 +754,7 @@ rexmit_timer(struct timer_list *timer) utgts = count_targets(d, NULL); - if (d->flags & DEVFL_TKILL) { + if (d->flags & (DEVFL_TKILL | DEVFL_DEAD)) { spin_unlock_irqrestore(&d->lock, flags); return; } @@ -786,7 +786,8 @@ rexmit_timer(struct timer_list *timer) * to clean up. */ list_splice(&flist, &d->factive[0]); - aoedev_downdev(d); + d->flags |= DEVFL_DEAD; + queue_work(aoe_wq, &d->work); goto out; } @@ -898,6 +899,9 @@ aoecmd_sleepwork(struct work_struct *work) { struct aoedev *d = container_of(work, struct aoedev, work); + if (d->flags & DEVFL_DEAD) + aoedev_downdev(d); + if (d->flags & DEVFL_GDALLOC) aoeblk_gdalloc(d); diff --git a/drivers/block/aoe/aoedev.c b/drivers/block/aoe/aoedev.c index bba05f0c5bbd..3a240755045b 100644 --- a/drivers/block/aoe/aoedev.c +++ b/drivers/block/aoe/aoedev.c @@ -198,9 +198,13 @@ aoedev_downdev(struct aoedev *d) { struct aoetgt *t, **tt, **te; struct list_head *head, *pos, *nx; + struct request *rq, *rqnext; int i; + unsigned long flags; - d->flags &= ~DEVFL_UP; + spin_lock_irqsave(&d->lock, flags); + d->flags &= ~(DEVFL_UP | DEVFL_DEAD); + spin_unlock_irqrestore(&d->lock, flags); /* clean out active and to-be-retransmitted buffers */ for (i = 0; i < NFACTIVE; i++) { @@ -223,6 +227,13 @@ aoedev_downdev(struct aoedev *d) /* clean out the in-process request (if any) */ aoe_failip(d); + /* clean out any queued block requests */ + list_for_each_entry_safe(rq, rqnext, &d->rq_list, queuelist) { + list_del_init(&rq->queuelist); + blk_mq_start_request(rq); + blk_mq_end_request(rq, BLK_STS_IOERR); + } + /* fast fail all pending I/O */ if (d->blkq) { /* UP is cleared, freeze+quiesce to insure all are errored */ diff --git a/drivers/block/loop.c b/drivers/block/loop.c index f8d136684109..500840e4a74e 100644 --- a/drivers/block/loop.c +++ b/drivers/block/loop.c @@ -1248,12 +1248,6 @@ loop_set_status(struct loop_device *lo, const struct loop_info64 *info) lo->lo_flags &= ~LOOP_SET_STATUS_CLEARABLE_FLAGS; lo->lo_flags |= (info->lo_flags & LOOP_SET_STATUS_SETTABLE_FLAGS); - if (size_changed) { - loff_t new_size = get_size(lo->lo_offset, lo->lo_sizelimit, - lo->lo_backing_file); - loop_set_size(lo, new_size); - } - /* update the direct I/O flag if lo_offset changed */ loop_update_dio(lo); @@ -1261,6 +1255,11 @@ out_unfreeze: blk_mq_unfreeze_queue(lo->lo_queue, memflags); if (partscan) clear_bit(GD_SUPPRESS_PART_SCAN, &lo->lo_disk->state); + if (!err && size_changed) { + loff_t new_size = get_size(lo->lo_offset, lo->lo_sizelimit, + lo->lo_backing_file); + loop_set_size(lo, new_size); + } out_unlock: mutex_unlock(&lo->lo_mutex); if (partscan) diff --git a/drivers/block/ublk_drv.c b/drivers/block/ublk_drv.c index c637ea010d34..d36f44f5ee80 100644 --- a/drivers/block/ublk_drv.c +++ b/drivers/block/ublk_drv.c @@ -2825,6 +2825,9 @@ static int ublk_ctrl_add_dev(const struct ublksrv_ctrl_cmd *header) if (copy_from_user(&info, argp, sizeof(info))) return -EFAULT; + if (info.queue_depth > UBLK_MAX_QUEUE_DEPTH || info.nr_hw_queues > UBLK_MAX_NR_QUEUES) + return -EINVAL; + if (capable(CAP_SYS_ADMIN)) info.flags &= ~UBLK_F_UNPRIVILEGED_DEV; else if (!(info.flags & UBLK_F_UNPRIVILEGED_DEV)) diff --git a/drivers/bluetooth/btintel_pcie.c b/drivers/bluetooth/btintel_pcie.c index 50fe17f1e1d1..e1c688dd2d45 100644 --- a/drivers/bluetooth/btintel_pcie.c +++ b/drivers/bluetooth/btintel_pcie.c @@ -396,8 +396,13 @@ static int btintel_pcie_submit_rx(struct btintel_pcie_data *data) static int btintel_pcie_start_rx(struct btintel_pcie_data *data) { int i, ret; + struct rxq *rxq = &data->rxq; + + /* Post (BTINTEL_PCIE_RX_DESCS_COUNT - 3) buffers to overcome the + * hardware issues leading to race condition at the firmware. + */ - for (i = 0; i < BTINTEL_PCIE_RX_MAX_QUEUE; i++) { + for (i = 0; i < rxq->count - 3; i++) { ret = btintel_pcie_submit_rx(data); if (ret) return ret; @@ -1782,8 +1787,8 @@ static int btintel_pcie_alloc(struct btintel_pcie_data *data) * + size of index * Number of queues(2) * type of index array(4) * + size of context information */ - total = (sizeof(struct tfd) + sizeof(struct urbd0) + sizeof(struct frbd) - + sizeof(struct urbd1)) * BTINTEL_DESCS_COUNT; + total = (sizeof(struct tfd) + sizeof(struct urbd0)) * BTINTEL_PCIE_TX_DESCS_COUNT; + total += (sizeof(struct frbd) + sizeof(struct urbd1)) * BTINTEL_PCIE_RX_DESCS_COUNT; /* Add the sum of size of index array and size of ci struct */ total += (sizeof(u16) * BTINTEL_PCIE_NUM_QUEUES * 4) + sizeof(struct ctx_info); @@ -1808,36 +1813,36 @@ static int btintel_pcie_alloc(struct btintel_pcie_data *data) data->dma_v_addr = v_addr; /* Setup descriptor count */ - data->txq.count = BTINTEL_DESCS_COUNT; - data->rxq.count = BTINTEL_DESCS_COUNT; + data->txq.count = BTINTEL_PCIE_TX_DESCS_COUNT; + data->rxq.count = BTINTEL_PCIE_RX_DESCS_COUNT; /* Setup tfds */ data->txq.tfds_p_addr = p_addr; data->txq.tfds = v_addr; - p_addr += (sizeof(struct tfd) * BTINTEL_DESCS_COUNT); - v_addr += (sizeof(struct tfd) * BTINTEL_DESCS_COUNT); + p_addr += (sizeof(struct tfd) * BTINTEL_PCIE_TX_DESCS_COUNT); + v_addr += (sizeof(struct tfd) * BTINTEL_PCIE_TX_DESCS_COUNT); /* Setup urbd0 */ data->txq.urbd0s_p_addr = p_addr; data->txq.urbd0s = v_addr; - p_addr += (sizeof(struct urbd0) * BTINTEL_DESCS_COUNT); - v_addr += (sizeof(struct urbd0) * BTINTEL_DESCS_COUNT); + p_addr += (sizeof(struct urbd0) * BTINTEL_PCIE_TX_DESCS_COUNT); + v_addr += (sizeof(struct urbd0) * BTINTEL_PCIE_TX_DESCS_COUNT); /* Setup FRBD*/ data->rxq.frbds_p_addr = p_addr; data->rxq.frbds = v_addr; - p_addr += (sizeof(struct frbd) * BTINTEL_DESCS_COUNT); - v_addr += (sizeof(struct frbd) * BTINTEL_DESCS_COUNT); + p_addr += (sizeof(struct frbd) * BTINTEL_PCIE_RX_DESCS_COUNT); + v_addr += (sizeof(struct frbd) * BTINTEL_PCIE_RX_DESCS_COUNT); /* Setup urbd1 */ data->rxq.urbd1s_p_addr = p_addr; data->rxq.urbd1s = v_addr; - p_addr += (sizeof(struct urbd1) * BTINTEL_DESCS_COUNT); - v_addr += (sizeof(struct urbd1) * BTINTEL_DESCS_COUNT); + p_addr += (sizeof(struct urbd1) * BTINTEL_PCIE_RX_DESCS_COUNT); + v_addr += (sizeof(struct urbd1) * BTINTEL_PCIE_RX_DESCS_COUNT); /* Setup data buffers for txq */ err = btintel_pcie_setup_txq_bufs(data, &data->txq); @@ -2028,6 +2033,28 @@ static void btintel_pcie_release_hdev(struct btintel_pcie_data *data) data->hdev = NULL; } +static void btintel_pcie_disable_interrupts(struct btintel_pcie_data *data) +{ + spin_lock(&data->irq_lock); + btintel_pcie_wr_reg32(data, BTINTEL_PCIE_CSR_MSIX_FH_INT_MASK, data->fh_init_mask); + btintel_pcie_wr_reg32(data, BTINTEL_PCIE_CSR_MSIX_HW_INT_MASK, data->hw_init_mask); + spin_unlock(&data->irq_lock); +} + +static void btintel_pcie_enable_interrupts(struct btintel_pcie_data *data) +{ + spin_lock(&data->irq_lock); + btintel_pcie_wr_reg32(data, BTINTEL_PCIE_CSR_MSIX_FH_INT_MASK, ~data->fh_init_mask); + btintel_pcie_wr_reg32(data, BTINTEL_PCIE_CSR_MSIX_HW_INT_MASK, ~data->hw_init_mask); + spin_unlock(&data->irq_lock); +} + +static void btintel_pcie_synchronize_irqs(struct btintel_pcie_data *data) +{ + for (int i = 0; i < data->alloc_vecs; i++) + synchronize_irq(data->msix_entries[i].vector); +} + static int btintel_pcie_setup_internal(struct hci_dev *hdev) { struct btintel_pcie_data *data = hci_get_drvdata(hdev); @@ -2147,6 +2174,8 @@ static int btintel_pcie_setup(struct hci_dev *hdev) bt_dev_err(hdev, "Firmware download retry count: %d", fw_dl_retry); btintel_pcie_dump_debug_registers(hdev); + btintel_pcie_disable_interrupts(data); + btintel_pcie_synchronize_irqs(data); err = btintel_pcie_reset_bt(data); if (err) { bt_dev_err(hdev, "Failed to do shr reset: %d", err); @@ -2154,6 +2183,7 @@ static int btintel_pcie_setup(struct hci_dev *hdev) } usleep_range(10000, 12000); btintel_pcie_reset_ia(data); + btintel_pcie_enable_interrupts(data); btintel_pcie_config_msix(data); err = btintel_pcie_enable_bt(data); if (err) { @@ -2286,6 +2316,12 @@ static void btintel_pcie_remove(struct pci_dev *pdev) data = pci_get_drvdata(pdev); + btintel_pcie_disable_interrupts(data); + + btintel_pcie_synchronize_irqs(data); + + flush_work(&data->rx_work); + btintel_pcie_reset_bt(data); for (int i = 0; i < data->alloc_vecs; i++) { struct msix_entry *msix_entry; @@ -2298,8 +2334,6 @@ static void btintel_pcie_remove(struct pci_dev *pdev) btintel_pcie_release_hdev(data); - flush_work(&data->rx_work); - destroy_workqueue(data->workqueue); btintel_pcie_free(data); diff --git a/drivers/bluetooth/btintel_pcie.h b/drivers/bluetooth/btintel_pcie.h index 21b964b15c1c..7dad4523236c 100644 --- a/drivers/bluetooth/btintel_pcie.h +++ b/drivers/bluetooth/btintel_pcie.h @@ -154,8 +154,11 @@ enum msix_mbox_int_causes { /* Default interrupt timeout in msec */ #define BTINTEL_DEFAULT_INTR_TIMEOUT_MS 3000 -/* The number of descriptors in TX/RX queues */ -#define BTINTEL_DESCS_COUNT 16 +/* The number of descriptors in TX queues */ +#define BTINTEL_PCIE_TX_DESCS_COUNT 32 + +/* The number of descriptors in RX queues */ +#define BTINTEL_PCIE_RX_DESCS_COUNT 64 /* Number of Queue for TX and RX * It indicates the index of the IA(Index Array) @@ -177,9 +180,6 @@ enum { /* Doorbell vector for TFD */ #define BTINTEL_PCIE_TX_DB_VEC 0 -/* Number of pending RX requests for downlink */ -#define BTINTEL_PCIE_RX_MAX_QUEUE 6 - /* Doorbell vector for FRBD */ #define BTINTEL_PCIE_RX_DB_VEC 513 diff --git a/drivers/bluetooth/hci_qca.c b/drivers/bluetooth/hci_qca.c index 5fe5879881f5..3ec0be496820 100644 --- a/drivers/bluetooth/hci_qca.c +++ b/drivers/bluetooth/hci_qca.c @@ -2392,10 +2392,17 @@ static int qca_serdev_probe(struct serdev_device *serdev) */ qcadev->bt_power->pwrseq = devm_pwrseq_get(&serdev->dev, "bluetooth"); - if (IS_ERR(qcadev->bt_power->pwrseq)) - return PTR_ERR(qcadev->bt_power->pwrseq); - break; + /* + * Some modules have BT_EN enabled via a hardware pull-up, + * meaning it is not defined in the DTS and is not controlled + * through the power sequence. In such cases, fall through + * to follow the legacy flow. + */ + if (IS_ERR(qcadev->bt_power->pwrseq)) + qcadev->bt_power->pwrseq = NULL; + else + break; } fallthrough; case QCA_WCN3950: diff --git a/drivers/cpufreq/rcpufreq_dt.rs b/drivers/cpufreq/rcpufreq_dt.rs index 94ed81644fe1..43c87d0259b6 100644 --- a/drivers/cpufreq/rcpufreq_dt.rs +++ b/drivers/cpufreq/rcpufreq_dt.rs @@ -26,9 +26,9 @@ fn find_supply_name_exact(dev: &Device, name: &str) -> Option<CString> { } /// Finds supply name for the CPU from DT. -fn find_supply_names(dev: &Device, cpu: u32) -> Option<KVec<CString>> { +fn find_supply_names(dev: &Device, cpu: cpu::CpuId) -> Option<KVec<CString>> { // Try "cpu0" for older DTs, fallback to "cpu". - let name = (cpu == 0) + let name = (cpu.as_u32() == 0) .then(|| find_supply_name_exact(dev, "cpu0")) .flatten() .or_else(|| find_supply_name_exact(dev, "cpu"))?; diff --git a/drivers/dma-buf/dma-buf.c b/drivers/dma-buf/dma-buf.c index 890ecac04dac..2bcf9ceca997 100644 --- a/drivers/dma-buf/dma-buf.c +++ b/drivers/dma-buf/dma-buf.c @@ -1118,7 +1118,7 @@ struct sg_table *dma_buf_map_attachment(struct dma_buf_attachment *attach, * Catch exporters making buffers inaccessible even when * attachments preventing that exist. */ - WARN_ON_ONCE(ret == EBUSY); + WARN_ON_ONCE(ret == -EBUSY); if (ret) return ERR_PTR(ret); } diff --git a/drivers/dma-buf/udmabuf.c b/drivers/dma-buf/udmabuf.c index 7eee3eb47a8e..c9d0c68d2fcb 100644 --- a/drivers/dma-buf/udmabuf.c +++ b/drivers/dma-buf/udmabuf.c @@ -264,8 +264,7 @@ static int begin_cpu_udmabuf(struct dma_buf *buf, ubuf->sg = NULL; } } else { - dma_sync_sg_for_cpu(dev, ubuf->sg->sgl, ubuf->sg->nents, - direction); + dma_sync_sgtable_for_cpu(dev, ubuf->sg, direction); } return ret; @@ -280,7 +279,7 @@ static int end_cpu_udmabuf(struct dma_buf *buf, if (!ubuf->sg) return -EINVAL; - dma_sync_sg_for_device(dev, ubuf->sg->sgl, ubuf->sg->nents, direction); + dma_sync_sgtable_for_device(dev, ubuf->sg, direction); return 0; } diff --git a/drivers/edac/amd64_edac.c b/drivers/edac/amd64_edac.c index 58b1482a0fbb..b681c0663203 100644 --- a/drivers/edac/amd64_edac.c +++ b/drivers/edac/amd64_edac.c @@ -3879,6 +3879,7 @@ static int per_family_init(struct amd64_pvt *pvt) break; case 0x70 ... 0x7f: pvt->ctl_name = "F19h_M70h"; + pvt->max_mcs = 4; pvt->flags.zn_regs_v2 = 1; break; case 0x90 ... 0x9f: diff --git a/drivers/edac/igen6_edac.c b/drivers/edac/igen6_edac.c index 1930dc00c791..1cb5c67e78ae 100644 --- a/drivers/edac/igen6_edac.c +++ b/drivers/edac/igen6_edac.c @@ -125,7 +125,7 @@ #define MEM_SLICE_HASH_MASK(v) (GET_BITFIELD(v, 6, 19) << 6) #define MEM_SLICE_HASH_LSB_MASK_BIT(v) GET_BITFIELD(v, 24, 26) -static const struct res_config { +static struct res_config { bool machine_check; /* The number of present memory controllers. */ int num_imc; @@ -479,7 +479,7 @@ static u64 rpl_p_err_addr(u64 ecclog) return ECC_ERROR_LOG_ADDR45(ecclog); } -static const struct res_config ehl_cfg = { +static struct res_config ehl_cfg = { .num_imc = 1, .imc_base = 0x5000, .ibecc_base = 0xdc00, @@ -489,7 +489,7 @@ static const struct res_config ehl_cfg = { .err_addr_to_imc_addr = ehl_err_addr_to_imc_addr, }; -static const struct res_config icl_cfg = { +static struct res_config icl_cfg = { .num_imc = 1, .imc_base = 0x5000, .ibecc_base = 0xd800, @@ -499,7 +499,7 @@ static const struct res_config icl_cfg = { .err_addr_to_imc_addr = ehl_err_addr_to_imc_addr, }; -static const struct res_config tgl_cfg = { +static struct res_config tgl_cfg = { .machine_check = true, .num_imc = 2, .imc_base = 0x5000, @@ -513,7 +513,7 @@ static const struct res_config tgl_cfg = { .err_addr_to_imc_addr = tgl_err_addr_to_imc_addr, }; -static const struct res_config adl_cfg = { +static struct res_config adl_cfg = { .machine_check = true, .num_imc = 2, .imc_base = 0xd800, @@ -524,7 +524,7 @@ static const struct res_config adl_cfg = { .err_addr_to_imc_addr = adl_err_addr_to_imc_addr, }; -static const struct res_config adl_n_cfg = { +static struct res_config adl_n_cfg = { .machine_check = true, .num_imc = 1, .imc_base = 0xd800, @@ -535,7 +535,7 @@ static const struct res_config adl_n_cfg = { .err_addr_to_imc_addr = adl_err_addr_to_imc_addr, }; -static const struct res_config rpl_p_cfg = { +static struct res_config rpl_p_cfg = { .machine_check = true, .num_imc = 2, .imc_base = 0xd800, @@ -547,7 +547,7 @@ static const struct res_config rpl_p_cfg = { .err_addr_to_imc_addr = adl_err_addr_to_imc_addr, }; -static const struct res_config mtl_ps_cfg = { +static struct res_config mtl_ps_cfg = { .machine_check = true, .num_imc = 2, .imc_base = 0xd800, @@ -558,7 +558,7 @@ static const struct res_config mtl_ps_cfg = { .err_addr_to_imc_addr = adl_err_addr_to_imc_addr, }; -static const struct res_config mtl_p_cfg = { +static struct res_config mtl_p_cfg = { .machine_check = true, .num_imc = 2, .imc_base = 0xd800, @@ -569,7 +569,7 @@ static const struct res_config mtl_p_cfg = { .err_addr_to_imc_addr = adl_err_addr_to_imc_addr, }; -static const struct pci_device_id igen6_pci_tbl[] = { +static struct pci_device_id igen6_pci_tbl[] = { { PCI_VDEVICE(INTEL, DID_EHL_SKU5), (kernel_ulong_t)&ehl_cfg }, { PCI_VDEVICE(INTEL, DID_EHL_SKU6), (kernel_ulong_t)&ehl_cfg }, { PCI_VDEVICE(INTEL, DID_EHL_SKU7), (kernel_ulong_t)&ehl_cfg }, @@ -1350,9 +1350,11 @@ static int igen6_register_mcis(struct pci_dev *pdev, u64 mchbar) return -ENODEV; } - if (lmc < res_cfg->num_imc) + if (lmc < res_cfg->num_imc) { igen6_printk(KERN_WARNING, "Expected %d mcs, but only %d detected.", res_cfg->num_imc, lmc); + res_cfg->num_imc = lmc; + } return 0; diff --git a/drivers/gpio/gpio-loongson-64bit.c b/drivers/gpio/gpio-loongson-64bit.c index 26227669f026..70a01c5b8ad1 100644 --- a/drivers/gpio/gpio-loongson-64bit.c +++ b/drivers/gpio/gpio-loongson-64bit.c @@ -268,7 +268,7 @@ static const struct loongson_gpio_chip_data loongson_gpio_ls7a2000_data0 = { /* LS7A2000 ACPI GPIO */ static const struct loongson_gpio_chip_data loongson_gpio_ls7a2000_data1 = { .label = "ls7a2000_gpio", - .mode = BYTE_CTRL_MODE, + .mode = BIT_CTRL_MODE, .conf_offset = 0x4, .in_offset = 0x8, .out_offset = 0x0, diff --git a/drivers/gpio/gpio-mlxbf3.c b/drivers/gpio/gpio-mlxbf3.c index 10ea71273c89..9875e34bde72 100644 --- a/drivers/gpio/gpio-mlxbf3.c +++ b/drivers/gpio/gpio-mlxbf3.c @@ -190,7 +190,9 @@ static int mlxbf3_gpio_probe(struct platform_device *pdev) struct mlxbf3_gpio_context *gs; struct gpio_irq_chip *girq; struct gpio_chip *gc; + char *colon_ptr; int ret, irq; + long num; gs = devm_kzalloc(dev, sizeof(*gs), GFP_KERNEL); if (!gs) @@ -227,25 +229,39 @@ static int mlxbf3_gpio_probe(struct platform_device *pdev) gc->owner = THIS_MODULE; gc->add_pin_ranges = mlxbf3_gpio_add_pin_ranges; - irq = platform_get_irq(pdev, 0); - if (irq >= 0) { - girq = &gs->gc.irq; - gpio_irq_chip_set_chip(girq, &gpio_mlxbf3_irqchip); - girq->default_type = IRQ_TYPE_NONE; - /* This will let us handle the parent IRQ in the driver */ - girq->num_parents = 0; - girq->parents = NULL; - girq->parent_handler = NULL; - girq->handler = handle_bad_irq; - - /* - * Directly request the irq here instead of passing - * a flow-handler because the irq is shared. - */ - ret = devm_request_irq(dev, irq, mlxbf3_gpio_irq_handler, - IRQF_SHARED, dev_name(dev), gs); - if (ret) - return dev_err_probe(dev, ret, "failed to request IRQ"); + colon_ptr = strchr(dev_name(dev), ':'); + if (!colon_ptr) { + dev_err(dev, "invalid device name format\n"); + return -EINVAL; + } + + ret = kstrtol(++colon_ptr, 16, &num); + if (ret) { + dev_err(dev, "invalid device instance\n"); + return ret; + } + + if (!num) { + irq = platform_get_irq(pdev, 0); + if (irq >= 0) { + girq = &gs->gc.irq; + gpio_irq_chip_set_chip(girq, &gpio_mlxbf3_irqchip); + girq->default_type = IRQ_TYPE_NONE; + /* This will let us handle the parent IRQ in the driver */ + girq->num_parents = 0; + girq->parents = NULL; + girq->parent_handler = NULL; + girq->handler = handle_bad_irq; + + /* + * Directly request the irq here instead of passing + * a flow-handler because the irq is shared. + */ + ret = devm_request_irq(dev, irq, mlxbf3_gpio_irq_handler, + IRQF_SHARED, dev_name(dev), gs); + if (ret) + return dev_err_probe(dev, ret, "failed to request IRQ"); + } } platform_set_drvdata(pdev, gs); diff --git a/drivers/gpio/gpio-pca953x.c b/drivers/gpio/gpio-pca953x.c index b852e4997629..e80a96f39788 100644 --- a/drivers/gpio/gpio-pca953x.c +++ b/drivers/gpio/gpio-pca953x.c @@ -974,7 +974,7 @@ static int pca953x_irq_setup(struct pca953x_chip *chip, int irq_base) IRQF_ONESHOT | IRQF_SHARED, dev_name(dev), chip); if (ret) - return dev_err_probe(dev, client->irq, "failed to request irq\n"); + return dev_err_probe(dev, ret, "failed to request irq\n"); return 0; } diff --git a/drivers/gpio/gpio-spacemit-k1.c b/drivers/gpio/gpio-spacemit-k1.c index f027066365ff..3cc75c701ec4 100644 --- a/drivers/gpio/gpio-spacemit-k1.c +++ b/drivers/gpio/gpio-spacemit-k1.c @@ -278,6 +278,7 @@ static const struct of_device_id spacemit_gpio_dt_ids[] = { { .compatible = "spacemit,k1-gpio" }, { /* sentinel */ } }; +MODULE_DEVICE_TABLE(of, spacemit_gpio_dt_ids); static struct platform_driver spacemit_gpio_driver = { .probe = spacemit_gpio_probe, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c index 8e626f50b362..f81608330a3d 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c @@ -1902,7 +1902,7 @@ no_preempt: continue; } job = to_amdgpu_job(s_job); - if (preempted && (&job->hw_fence) == fence) + if (preempted && (&job->hw_fence.base) == fence) /* mark the job as preempted */ job->preemption_status |= AMDGPU_IB_PREEMPTED; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index e1bab6a96cb6..78f8755996f0 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -6019,16 +6019,12 @@ static int amdgpu_device_health_check(struct list_head *device_list_handle) return ret; } -static int amdgpu_device_halt_activities(struct amdgpu_device *adev, - struct amdgpu_job *job, - struct amdgpu_reset_context *reset_context, - struct list_head *device_list, - struct amdgpu_hive_info *hive, - bool need_emergency_restart) +static int amdgpu_device_recovery_prepare(struct amdgpu_device *adev, + struct list_head *device_list, + struct amdgpu_hive_info *hive) { - struct list_head *device_list_handle = NULL; struct amdgpu_device *tmp_adev = NULL; - int i, r = 0; + int r; /* * Build list of devices to reset. @@ -6045,26 +6041,54 @@ static int amdgpu_device_halt_activities(struct amdgpu_device *adev, } if (!list_is_first(&adev->reset_list, device_list)) list_rotate_to_front(&adev->reset_list, device_list); - device_list_handle = device_list; } else { list_add_tail(&adev->reset_list, device_list); - device_list_handle = device_list; } if (!amdgpu_sriov_vf(adev) && (!adev->pcie_reset_ctx.occurs_dpc)) { - r = amdgpu_device_health_check(device_list_handle); + r = amdgpu_device_health_check(device_list); if (r) return r; } - /* We need to lock reset domain only once both for XGMI and single device */ - tmp_adev = list_first_entry(device_list_handle, struct amdgpu_device, - reset_list); + return 0; +} + +static void amdgpu_device_recovery_get_reset_lock(struct amdgpu_device *adev, + struct list_head *device_list) +{ + struct amdgpu_device *tmp_adev = NULL; + + if (list_empty(device_list)) + return; + tmp_adev = + list_first_entry(device_list, struct amdgpu_device, reset_list); amdgpu_device_lock_reset_domain(tmp_adev->reset_domain); +} - /* block all schedulers and reset given job's ring */ - list_for_each_entry(tmp_adev, device_list_handle, reset_list) { +static void amdgpu_device_recovery_put_reset_lock(struct amdgpu_device *adev, + struct list_head *device_list) +{ + struct amdgpu_device *tmp_adev = NULL; + if (list_empty(device_list)) + return; + tmp_adev = + list_first_entry(device_list, struct amdgpu_device, reset_list); + amdgpu_device_unlock_reset_domain(tmp_adev->reset_domain); +} + +static int amdgpu_device_halt_activities( + struct amdgpu_device *adev, struct amdgpu_job *job, + struct amdgpu_reset_context *reset_context, + struct list_head *device_list, struct amdgpu_hive_info *hive, + bool need_emergency_restart) +{ + struct amdgpu_device *tmp_adev = NULL; + int i, r = 0; + + /* block all schedulers and reset given job's ring */ + list_for_each_entry(tmp_adev, device_list, reset_list) { amdgpu_device_set_mp1_state(tmp_adev); /* @@ -6252,11 +6276,6 @@ static void amdgpu_device_gpu_resume(struct amdgpu_device *adev, amdgpu_ras_set_error_query_ready(tmp_adev, true); } - - tmp_adev = list_first_entry(device_list, struct amdgpu_device, - reset_list); - amdgpu_device_unlock_reset_domain(tmp_adev->reset_domain); - } @@ -6324,10 +6343,16 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev, reset_context->hive = hive; INIT_LIST_HEAD(&device_list); + if (amdgpu_device_recovery_prepare(adev, &device_list, hive)) + goto end_reset; + + /* We need to lock reset domain only once both for XGMI and single device */ + amdgpu_device_recovery_get_reset_lock(adev, &device_list); + r = amdgpu_device_halt_activities(adev, job, reset_context, &device_list, hive, need_emergency_restart); if (r) - goto end_reset; + goto reset_unlock; if (need_emergency_restart) goto skip_sched_resume; @@ -6337,7 +6362,7 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev, * * job->base holds a reference to parent fence */ - if (job && dma_fence_is_signaled(&job->hw_fence)) { + if (job && dma_fence_is_signaled(&job->hw_fence.base)) { job_signaled = true; dev_info(adev->dev, "Guilty job already signaled, skipping HW reset"); goto skip_hw_reset; @@ -6345,13 +6370,15 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev, r = amdgpu_device_asic_reset(adev, &device_list, reset_context); if (r) - goto end_reset; + goto reset_unlock; skip_hw_reset: r = amdgpu_device_sched_resume(&device_list, reset_context, job_signaled); if (r) - goto end_reset; + goto reset_unlock; skip_sched_resume: amdgpu_device_gpu_resume(adev, &device_list, need_emergency_restart); +reset_unlock: + amdgpu_device_recovery_put_reset_lock(adev, &device_list); end_reset: if (hive) { mutex_unlock(&hive->hive_lock); @@ -6763,6 +6790,8 @@ pci_ers_result_t amdgpu_pci_error_detected(struct pci_dev *pdev, pci_channel_sta memset(&reset_context, 0, sizeof(reset_context)); INIT_LIST_HEAD(&device_list); + amdgpu_device_recovery_prepare(adev, &device_list, hive); + amdgpu_device_recovery_get_reset_lock(adev, &device_list); r = amdgpu_device_halt_activities(adev, NULL, &reset_context, &device_list, hive, false); if (hive) { @@ -6880,8 +6909,8 @@ out: if (hive) { list_for_each_entry(tmp_adev, &device_list, reset_list) amdgpu_device_unset_mp1_state(tmp_adev); - amdgpu_device_unlock_reset_domain(adev->reset_domain); } + amdgpu_device_recovery_put_reset_lock(adev, &device_list); } if (hive) { @@ -6927,6 +6956,7 @@ void amdgpu_pci_resume(struct pci_dev *pdev) amdgpu_device_sched_resume(&device_list, NULL, NULL); amdgpu_device_gpu_resume(adev, &device_list, false); + amdgpu_device_recovery_put_reset_lock(adev, &device_list); adev->pcie_reset_ctx.occurs_dpc = false; if (hive) { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c index 8cecf25996ed..5fec808d7f54 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c @@ -41,22 +41,6 @@ #include "amdgpu_trace.h" #include "amdgpu_reset.h" -/* - * Fences mark an event in the GPUs pipeline and are used - * for GPU/CPU synchronization. When the fence is written, - * it is expected that all buffers associated with that fence - * are no longer in use by the associated ring on the GPU and - * that the relevant GPU caches have been flushed. - */ - -struct amdgpu_fence { - struct dma_fence base; - - /* RB, DMA, etc. */ - struct amdgpu_ring *ring; - ktime_t start_timestamp; -}; - static struct kmem_cache *amdgpu_fence_slab; int amdgpu_fence_slab_init(void) @@ -151,12 +135,12 @@ int amdgpu_fence_emit(struct amdgpu_ring *ring, struct dma_fence **f, struct amd am_fence = kmem_cache_alloc(amdgpu_fence_slab, GFP_ATOMIC); if (am_fence == NULL) return -ENOMEM; - fence = &am_fence->base; - am_fence->ring = ring; } else { /* take use of job-embedded fence */ - fence = &job->hw_fence; + am_fence = &job->hw_fence; } + fence = &am_fence->base; + am_fence->ring = ring; seq = ++ring->fence_drv.sync_seq; if (job && job->job_run_counter) { @@ -718,7 +702,7 @@ void amdgpu_fence_driver_clear_job_fences(struct amdgpu_ring *ring) * it right here or we won't be able to track them in fence_drv * and they will remain unsignaled during sa_bo free. */ - job = container_of(old, struct amdgpu_job, hw_fence); + job = container_of(old, struct amdgpu_job, hw_fence.base); if (!job->base.s_fence && !dma_fence_is_signaled(old)) dma_fence_signal(old); RCU_INIT_POINTER(*ptr, NULL); @@ -780,7 +764,7 @@ static const char *amdgpu_fence_get_timeline_name(struct dma_fence *f) static const char *amdgpu_job_fence_get_timeline_name(struct dma_fence *f) { - struct amdgpu_job *job = container_of(f, struct amdgpu_job, hw_fence); + struct amdgpu_job *job = container_of(f, struct amdgpu_job, hw_fence.base); return (const char *)to_amdgpu_ring(job->base.sched)->name; } @@ -810,7 +794,7 @@ static bool amdgpu_fence_enable_signaling(struct dma_fence *f) */ static bool amdgpu_job_fence_enable_signaling(struct dma_fence *f) { - struct amdgpu_job *job = container_of(f, struct amdgpu_job, hw_fence); + struct amdgpu_job *job = container_of(f, struct amdgpu_job, hw_fence.base); if (!timer_pending(&to_amdgpu_ring(job->base.sched)->fence_drv.fallback_timer)) amdgpu_fence_schedule_fallback(to_amdgpu_ring(job->base.sched)); @@ -845,7 +829,7 @@ static void amdgpu_job_fence_free(struct rcu_head *rcu) struct dma_fence *f = container_of(rcu, struct dma_fence, rcu); /* free job if fence has a parent job */ - kfree(container_of(f, struct amdgpu_job, hw_fence)); + kfree(container_of(f, struct amdgpu_job, hw_fence.base)); } /** diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c index acb21fc8b3ce..ddb9d3269357 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c @@ -272,8 +272,8 @@ void amdgpu_job_free_resources(struct amdgpu_job *job) /* Check if any fences where initialized */ if (job->base.s_fence && job->base.s_fence->finished.ops) f = &job->base.s_fence->finished; - else if (job->hw_fence.ops) - f = &job->hw_fence; + else if (job->hw_fence.base.ops) + f = &job->hw_fence.base; else f = NULL; @@ -290,10 +290,10 @@ static void amdgpu_job_free_cb(struct drm_sched_job *s_job) amdgpu_sync_free(&job->explicit_sync); /* only put the hw fence if has embedded fence */ - if (!job->hw_fence.ops) + if (!job->hw_fence.base.ops) kfree(job); else - dma_fence_put(&job->hw_fence); + dma_fence_put(&job->hw_fence.base); } void amdgpu_job_set_gang_leader(struct amdgpu_job *job, @@ -322,10 +322,10 @@ void amdgpu_job_free(struct amdgpu_job *job) if (job->gang_submit != &job->base.s_fence->scheduled) dma_fence_put(job->gang_submit); - if (!job->hw_fence.ops) + if (!job->hw_fence.base.ops) kfree(job); else - dma_fence_put(&job->hw_fence); + dma_fence_put(&job->hw_fence.base); } struct dma_fence *amdgpu_job_submit(struct amdgpu_job *job) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.h index f2c049129661..931fed8892cc 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.h @@ -48,7 +48,7 @@ struct amdgpu_job { struct drm_sched_job base; struct amdgpu_vm *vm; struct amdgpu_sync explicit_sync; - struct dma_fence hw_fence; + struct amdgpu_fence hw_fence; struct dma_fence *gang_submit; uint32_t preamble_status; uint32_t preemption_status; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c index e6f0b035e20b..c14f63cefe67 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c @@ -3522,8 +3522,12 @@ int psp_init_sos_microcode(struct psp_context *psp, const char *chip_name) uint8_t *ucode_array_start_addr; int err = 0; - err = amdgpu_ucode_request(adev, &adev->psp.sos_fw, AMDGPU_UCODE_REQUIRED, - "amdgpu/%s_sos.bin", chip_name); + if (amdgpu_is_kicker_fw(adev)) + err = amdgpu_ucode_request(adev, &adev->psp.sos_fw, AMDGPU_UCODE_REQUIRED, + "amdgpu/%s_sos_kicker.bin", chip_name); + else + err = amdgpu_ucode_request(adev, &adev->psp.sos_fw, AMDGPU_UCODE_REQUIRED, + "amdgpu/%s_sos.bin", chip_name); if (err) goto out; @@ -3799,8 +3803,12 @@ int psp_init_ta_microcode(struct psp_context *psp, const char *chip_name) struct amdgpu_device *adev = psp->adev; int err; - err = amdgpu_ucode_request(adev, &adev->psp.ta_fw, AMDGPU_UCODE_REQUIRED, - "amdgpu/%s_ta.bin", chip_name); + if (amdgpu_is_kicker_fw(adev)) + err = amdgpu_ucode_request(adev, &adev->psp.ta_fw, AMDGPU_UCODE_REQUIRED, + "amdgpu/%s_ta_kicker.bin", chip_name); + else + err = amdgpu_ucode_request(adev, &adev->psp.ta_fw, AMDGPU_UCODE_REQUIRED, + "amdgpu/%s_ta.bin", chip_name); if (err) return err; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h index b95b47110769..e1f25218943a 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h @@ -127,6 +127,22 @@ struct amdgpu_fence_driver { struct dma_fence **fences; }; +/* + * Fences mark an event in the GPUs pipeline and are used + * for GPU/CPU synchronization. When the fence is written, + * it is expected that all buffers associated with that fence + * are no longer in use by the associated ring on the GPU and + * that the relevant GPU caches have been flushed. + */ + +struct amdgpu_fence { + struct dma_fence base; + + /* RB, DMA, etc. */ + struct amdgpu_ring *ring; + ktime_t start_timestamp; +}; + extern const struct drm_sched_backend_ops amdgpu_sched_ops; void amdgpu_fence_driver_clear_job_fences(struct amdgpu_ring *ring); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c index 6716ac281c49..9b54a1ece447 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c @@ -540,8 +540,10 @@ static int amdgpu_sdma_soft_reset(struct amdgpu_device *adev, u32 instance_id) case IP_VERSION(4, 4, 2): case IP_VERSION(4, 4, 4): case IP_VERSION(4, 4, 5): - /* For SDMA 4.x, use the existing DPM interface for backward compatibility */ - r = amdgpu_dpm_reset_sdma(adev, 1 << instance_id); + /* For SDMA 4.x, use the existing DPM interface for backward compatibility, + * we need to convert the logical instance ID to physical instance ID before reset. + */ + r = amdgpu_dpm_reset_sdma(adev, 1 << GET_INST(SDMA0, instance_id)); break; case IP_VERSION(5, 0, 0): case IP_VERSION(5, 0, 1): @@ -568,7 +570,7 @@ static int amdgpu_sdma_soft_reset(struct amdgpu_device *adev, u32 instance_id) /** * amdgpu_sdma_reset_engine - Reset a specific SDMA engine * @adev: Pointer to the AMDGPU device - * @instance_id: ID of the SDMA engine instance to reset + * @instance_id: Logical ID of the SDMA engine instance to reset * * Returns: 0 on success, or a negative error code on failure. */ @@ -601,7 +603,7 @@ int amdgpu_sdma_reset_engine(struct amdgpu_device *adev, uint32_t instance_id) /* Perform the SDMA reset for the specified instance */ ret = amdgpu_sdma_soft_reset(adev, instance_id); if (ret) { - dev_err(adev->dev, "Failed to reset SDMA instance %u\n", instance_id); + dev_err(adev->dev, "Failed to reset SDMA logical instance %u\n", instance_id); goto exit; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c index 2505c46a9c3d..eaddc441c51a 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c @@ -30,6 +30,10 @@ #define AMDGPU_UCODE_NAME_MAX (128) +static const struct kicker_device kicker_device_list[] = { + {0x744B, 0x00}, +}; + static void amdgpu_ucode_print_common_hdr(const struct common_firmware_header *hdr) { DRM_DEBUG("size_bytes: %u\n", le32_to_cpu(hdr->size_bytes)); @@ -1387,6 +1391,19 @@ static const char *amdgpu_ucode_legacy_naming(struct amdgpu_device *adev, int bl return NULL; } +bool amdgpu_is_kicker_fw(struct amdgpu_device *adev) +{ + int i; + + for (i = 0; i < ARRAY_SIZE(kicker_device_list); i++) { + if (adev->pdev->device == kicker_device_list[i].device && + adev->pdev->revision == kicker_device_list[i].revision) + return true; + } + + return false; +} + void amdgpu_ucode_ip_version_decode(struct amdgpu_device *adev, int block_type, char *ucode_prefix, int len) { int maj, min, rev; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h index 9e89c3487be5..6349aad6da35 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h @@ -605,6 +605,11 @@ struct amdgpu_firmware { uint32_t pldm_version; }; +struct kicker_device{ + unsigned short device; + u8 revision; +}; + void amdgpu_ucode_print_mc_hdr(const struct common_firmware_header *hdr); void amdgpu_ucode_print_smc_hdr(const struct common_firmware_header *hdr); void amdgpu_ucode_print_imu_hdr(const struct common_firmware_header *hdr); @@ -632,5 +637,6 @@ amdgpu_ucode_get_load_type(struct amdgpu_device *adev, int load_type); const char *amdgpu_ucode_name(enum AMDGPU_UCODE_ID ucode_id); void amdgpu_ucode_ip_version_decode(struct amdgpu_device *adev, int block_type, char *ucode_prefix, int len); +bool amdgpu_is_kicker_fw(struct amdgpu_device *adev); #endif diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c index afd6d59164bf..ec9b84f92d46 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c @@ -85,6 +85,7 @@ MODULE_FIRMWARE("amdgpu/gc_11_0_0_pfp.bin"); MODULE_FIRMWARE("amdgpu/gc_11_0_0_me.bin"); MODULE_FIRMWARE("amdgpu/gc_11_0_0_mec.bin"); MODULE_FIRMWARE("amdgpu/gc_11_0_0_rlc.bin"); +MODULE_FIRMWARE("amdgpu/gc_11_0_0_rlc_kicker.bin"); MODULE_FIRMWARE("amdgpu/gc_11_0_0_rlc_1.bin"); MODULE_FIRMWARE("amdgpu/gc_11_0_0_toc.bin"); MODULE_FIRMWARE("amdgpu/gc_11_0_1_pfp.bin"); @@ -759,6 +760,10 @@ static int gfx_v11_0_init_microcode(struct amdgpu_device *adev) err = amdgpu_ucode_request(adev, &adev->gfx.rlc_fw, AMDGPU_UCODE_REQUIRED, "amdgpu/gc_11_0_0_rlc_1.bin"); + else if (amdgpu_is_kicker_fw(adev)) + err = amdgpu_ucode_request(adev, &adev->gfx.rlc_fw, + AMDGPU_UCODE_REQUIRED, + "amdgpu/%s_rlc_kicker.bin", ucode_prefix); else err = amdgpu_ucode_request(adev, &adev->gfx.rlc_fw, AMDGPU_UCODE_REQUIRED, diff --git a/drivers/gpu/drm/amd/amdgpu/imu_v11_0.c b/drivers/gpu/drm/amd/amdgpu/imu_v11_0.c index cfa91d709d49..cc626036ed9c 100644 --- a/drivers/gpu/drm/amd/amdgpu/imu_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/imu_v11_0.c @@ -32,6 +32,7 @@ #include "gc/gc_11_0_0_sh_mask.h" MODULE_FIRMWARE("amdgpu/gc_11_0_0_imu.bin"); +MODULE_FIRMWARE("amdgpu/gc_11_0_0_imu_kicker.bin"); MODULE_FIRMWARE("amdgpu/gc_11_0_1_imu.bin"); MODULE_FIRMWARE("amdgpu/gc_11_0_2_imu.bin"); MODULE_FIRMWARE("amdgpu/gc_11_0_3_imu.bin"); @@ -51,8 +52,12 @@ static int imu_v11_0_init_microcode(struct amdgpu_device *adev) DRM_DEBUG("\n"); amdgpu_ucode_ip_version_decode(adev, GC_HWIP, ucode_prefix, sizeof(ucode_prefix)); - err = amdgpu_ucode_request(adev, &adev->gfx.imu_fw, AMDGPU_UCODE_REQUIRED, - "amdgpu/%s_imu.bin", ucode_prefix); + if (amdgpu_is_kicker_fw(adev)) + err = amdgpu_ucode_request(adev, &adev->gfx.imu_fw, AMDGPU_UCODE_REQUIRED, + "amdgpu/%s_imu_kicker.bin", ucode_prefix); + else + err = amdgpu_ucode_request(adev, &adev->gfx.imu_fw, AMDGPU_UCODE_REQUIRED, + "amdgpu/%s_imu.bin", ucode_prefix); if (err) goto out; diff --git a/drivers/gpu/drm/amd/amdgpu/psp_v13_0.c b/drivers/gpu/drm/amd/amdgpu/psp_v13_0.c index df612fd9cc50..ead616c11705 100644 --- a/drivers/gpu/drm/amd/amdgpu/psp_v13_0.c +++ b/drivers/gpu/drm/amd/amdgpu/psp_v13_0.c @@ -42,7 +42,9 @@ MODULE_FIRMWARE("amdgpu/psp_13_0_5_ta.bin"); MODULE_FIRMWARE("amdgpu/psp_13_0_8_toc.bin"); MODULE_FIRMWARE("amdgpu/psp_13_0_8_ta.bin"); MODULE_FIRMWARE("amdgpu/psp_13_0_0_sos.bin"); +MODULE_FIRMWARE("amdgpu/psp_13_0_0_sos_kicker.bin"); MODULE_FIRMWARE("amdgpu/psp_13_0_0_ta.bin"); +MODULE_FIRMWARE("amdgpu/psp_13_0_0_ta_kicker.bin"); MODULE_FIRMWARE("amdgpu/psp_13_0_7_sos.bin"); MODULE_FIRMWARE("amdgpu/psp_13_0_7_ta.bin"); MODULE_FIRMWARE("amdgpu/psp_13_0_10_sos.bin"); diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c b/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c index 9c169112a5e7..cef68df4c663 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c @@ -490,7 +490,7 @@ static void sdma_v4_4_2_inst_gfx_stop(struct amdgpu_device *adev, { struct amdgpu_ring *sdma[AMDGPU_MAX_SDMA_INSTANCES]; u32 doorbell_offset, doorbell; - u32 rb_cntl, ib_cntl; + u32 rb_cntl, ib_cntl, sdma_cntl; int i; for_each_inst(i, inst_mask) { @@ -502,6 +502,9 @@ static void sdma_v4_4_2_inst_gfx_stop(struct amdgpu_device *adev, ib_cntl = RREG32_SDMA(i, regSDMA_GFX_IB_CNTL); ib_cntl = REG_SET_FIELD(ib_cntl, SDMA_GFX_IB_CNTL, IB_ENABLE, 0); WREG32_SDMA(i, regSDMA_GFX_IB_CNTL, ib_cntl); + sdma_cntl = RREG32_SDMA(i, regSDMA_CNTL); + sdma_cntl = REG_SET_FIELD(sdma_cntl, SDMA_CNTL, UTC_L1_ENABLE, 0); + WREG32_SDMA(i, regSDMA_CNTL, sdma_cntl); if (sdma[i]->use_doorbell) { doorbell = RREG32_SDMA(i, regSDMA_GFX_DOORBELL); @@ -995,6 +998,7 @@ static int sdma_v4_4_2_inst_start(struct amdgpu_device *adev, /* set utc l1 enable flag always to 1 */ temp = RREG32_SDMA(i, regSDMA_CNTL); temp = REG_SET_FIELD(temp, SDMA_CNTL, UTC_L1_ENABLE, 1); + WREG32_SDMA(i, regSDMA_CNTL, temp); if (amdgpu_ip_version(adev, SDMA0_HWIP, 0) < IP_VERSION(4, 4, 5)) { /* enable context empty interrupt during initialization */ @@ -1670,7 +1674,7 @@ static bool sdma_v4_4_2_page_ring_is_guilty(struct amdgpu_ring *ring) static int sdma_v4_4_2_reset_queue(struct amdgpu_ring *ring, unsigned int vmid) { struct amdgpu_device *adev = ring->adev; - u32 id = GET_INST(SDMA0, ring->me); + u32 id = ring->me; int r; if (!(adev->sdma.supported_reset & AMDGPU_RESET_TYPE_PER_QUEUE)) @@ -1686,7 +1690,7 @@ static int sdma_v4_4_2_reset_queue(struct amdgpu_ring *ring, unsigned int vmid) static int sdma_v4_4_2_stop_queue(struct amdgpu_ring *ring) { struct amdgpu_device *adev = ring->adev; - u32 instance_id = GET_INST(SDMA0, ring->me); + u32 instance_id = ring->me; u32 inst_mask; uint64_t rptr; diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c index 9505ae96fbec..1813c3ed0aa6 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c @@ -1399,6 +1399,7 @@ static int sdma_v5_0_sw_init(struct amdgpu_ip_block *ip_block) return r; for (i = 0; i < adev->sdma.num_instances; i++) { + mutex_init(&adev->sdma.instance[i].engine_reset_mutex); adev->sdma.instance[i].funcs = &sdma_v5_0_sdma_funcs; ring = &adev->sdma.instance[i].ring; ring->ring_obj = NULL; diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c b/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c index a6e612b4a892..23f97da62808 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c @@ -1318,6 +1318,7 @@ static int sdma_v5_2_sw_init(struct amdgpu_ip_block *ip_block) } for (i = 0; i < adev->sdma.num_instances; i++) { + mutex_init(&adev->sdma.instance[i].engine_reset_mutex); adev->sdma.instance[i].funcs = &sdma_v5_2_sdma_funcs; ring = &adev->sdma.instance[i].ring; ring->ring_obj = NULL; diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v5_0_1.c b/drivers/gpu/drm/amd/amdgpu/vcn_v5_0_1.c index 338cf43c45fe..cdefd7fcb0da 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v5_0_1.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v5_0_1.c @@ -669,6 +669,9 @@ static int vcn_v5_0_1_start_dpg_mode(struct amdgpu_vcn_inst *vinst, if (indirect) amdgpu_vcn_psp_update_sram(adev, inst_idx, AMDGPU_UCODE_ID_VCN0_RAM); + /* resetting ring, fw should not check RB ring */ + fw_shared->sq.queue_mode |= FW_QUEUE_RING_RESET; + /* Pause dpg */ vcn_v5_0_1_pause_dpg_mode(vinst, &state); @@ -681,7 +684,7 @@ static int vcn_v5_0_1_start_dpg_mode(struct amdgpu_vcn_inst *vinst, tmp = RREG32_SOC15(VCN, vcn_inst, regVCN_RB_ENABLE); tmp &= ~(VCN_RB_ENABLE__RB1_EN_MASK); WREG32_SOC15(VCN, vcn_inst, regVCN_RB_ENABLE, tmp); - fw_shared->sq.queue_mode |= FW_QUEUE_RING_RESET; + WREG32_SOC15(VCN, vcn_inst, regUVD_RB_RPTR, 0); WREG32_SOC15(VCN, vcn_inst, regUVD_RB_WPTR, 0); @@ -692,6 +695,7 @@ static int vcn_v5_0_1_start_dpg_mode(struct amdgpu_vcn_inst *vinst, tmp = RREG32_SOC15(VCN, vcn_inst, regVCN_RB_ENABLE); tmp |= VCN_RB_ENABLE__RB1_EN_MASK; WREG32_SOC15(VCN, vcn_inst, regVCN_RB_ENABLE, tmp); + /* resetting done, fw can check RB ring */ fw_shared->sq.queue_mode &= ~(FW_QUEUE_RING_RESET | FW_QUEUE_DPG_HOLD_OFF); WREG32_SOC15(VCN, vcn_inst, regVCN_RB1_DB_CTRL, diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager_v9.c b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager_v9.c index 8fa6489b6f5d..505036968a77 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager_v9.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager_v9.c @@ -240,7 +240,7 @@ static int pm_map_queues_v9(struct packet_manager *pm, uint32_t *buffer, packet->bitfields2.engine_sel = engine_sel__mes_map_queues__compute_vi; - packet->bitfields2.gws_control_queue = q->gws ? 1 : 0; + packet->bitfields2.gws_control_queue = q->properties.is_gws ? 1 : 0; packet->bitfields2.extended_engine_sel = extended_engine_sel__mes_map_queues__legacy_engine_sel; packet->bitfields2.queue_type = diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c index baa2374acdeb..4ec73f33535e 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c @@ -510,6 +510,10 @@ static ssize_t node_show(struct kobject *kobj, struct attribute *attr, dev->node_props.capability |= HSA_CAP_AQL_QUEUE_DOUBLE_MAP; + if (KFD_GC_VERSION(dev->gpu) < IP_VERSION(10, 0, 0) && + (dev->gpu->adev->sdma.supported_reset & AMDGPU_RESET_TYPE_PER_QUEUE)) + dev->node_props.capability2 |= HSA_CAP2_PER_SDMA_QUEUE_RESET_SUPPORTED; + sysfs_show_32bit_prop(buffer, offs, "max_engine_clk_fcompute", dev->node_props.max_engine_clk_fcompute); @@ -2008,8 +2012,6 @@ static void kfd_topology_set_capabilities(struct kfd_topology_device *dev) if (!amdgpu_sriov_vf(dev->gpu->adev)) dev->node_props.capability |= HSA_CAP_PER_QUEUE_RESET_SUPPORTED; - if (dev->gpu->adev->sdma.supported_reset & AMDGPU_RESET_TYPE_PER_QUEUE) - dev->node_props.capability2 |= HSA_CAP2_PER_SDMA_QUEUE_RESET_SUPPORTED; } else { dev->node_props.debug_prop |= HSA_DBG_WATCH_ADDR_MASK_LO_BIT_GFX10 | HSA_DBG_WATCH_ADDR_MASK_HI_BIT; diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index d3100f641ac6..bc4cd11bfc79 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -4718,9 +4718,23 @@ static int get_brightness_range(const struct amdgpu_dm_backlight_caps *caps, return 1; } +/* Rescale from [min..max] to [0..AMDGPU_MAX_BL_LEVEL] */ +static inline u32 scale_input_to_fw(int min, int max, u64 input) +{ + return DIV_ROUND_CLOSEST_ULL(input * AMDGPU_MAX_BL_LEVEL, max - min); +} + +/* Rescale from [0..AMDGPU_MAX_BL_LEVEL] to [min..max] */ +static inline u32 scale_fw_to_input(int min, int max, u64 input) +{ + return min + DIV_ROUND_CLOSEST_ULL(input * (max - min), AMDGPU_MAX_BL_LEVEL); +} + static void convert_custom_brightness(const struct amdgpu_dm_backlight_caps *caps, - uint32_t *brightness) + unsigned int min, unsigned int max, + uint32_t *user_brightness) { + u32 brightness = scale_input_to_fw(min, max, *user_brightness); u8 prev_signal = 0, prev_lum = 0; int i = 0; @@ -4731,7 +4745,7 @@ static void convert_custom_brightness(const struct amdgpu_dm_backlight_caps *cap return; /* choose start to run less interpolation steps */ - if (caps->luminance_data[caps->data_points/2].input_signal > *brightness) + if (caps->luminance_data[caps->data_points/2].input_signal > brightness) i = caps->data_points/2; do { u8 signal = caps->luminance_data[i].input_signal; @@ -4742,17 +4756,18 @@ static void convert_custom_brightness(const struct amdgpu_dm_backlight_caps *cap * brightness < signal: interpolate between previous and current luminance numerator * brightness > signal: find next data point */ - if (*brightness > signal) { + if (brightness > signal) { prev_signal = signal; prev_lum = lum; i++; continue; } - if (*brightness < signal) + if (brightness < signal) lum = prev_lum + DIV_ROUND_CLOSEST((lum - prev_lum) * - (*brightness - prev_signal), + (brightness - prev_signal), signal - prev_signal); - *brightness = DIV_ROUND_CLOSEST(lum * *brightness, 101); + *user_brightness = scale_fw_to_input(min, max, + DIV_ROUND_CLOSEST(lum * brightness, 101)); return; } while (i < caps->data_points); } @@ -4765,11 +4780,10 @@ static u32 convert_brightness_from_user(const struct amdgpu_dm_backlight_caps *c if (!get_brightness_range(caps, &min, &max)) return brightness; - convert_custom_brightness(caps, &brightness); + convert_custom_brightness(caps, min, max, &brightness); - // Rescale 0..255 to min..max - return min + DIV_ROUND_CLOSEST((max - min) * brightness, - AMDGPU_MAX_BL_LEVEL); + // Rescale 0..max to min..max + return min + DIV_ROUND_CLOSEST_ULL((u64)(max - min) * brightness, max); } static u32 convert_brightness_to_user(const struct amdgpu_dm_backlight_caps *caps, @@ -4782,8 +4796,8 @@ static u32 convert_brightness_to_user(const struct amdgpu_dm_backlight_caps *cap if (brightness < min) return 0; - // Rescale min..max to 0..255 - return DIV_ROUND_CLOSEST(AMDGPU_MAX_BL_LEVEL * (brightness - min), + // Rescale min..max to 0..max + return DIV_ROUND_CLOSEST_ULL((u64)max * (brightness - min), max - min); } @@ -4908,7 +4922,7 @@ amdgpu_dm_register_backlight_device(struct amdgpu_dm_connector *aconnector) struct drm_device *drm = aconnector->base.dev; struct amdgpu_display_manager *dm = &drm_to_adev(drm)->dm; struct backlight_properties props = { 0 }; - struct amdgpu_dm_backlight_caps caps = { 0 }; + struct amdgpu_dm_backlight_caps *caps; char bl_name[16]; int min, max; @@ -4922,22 +4936,21 @@ amdgpu_dm_register_backlight_device(struct amdgpu_dm_connector *aconnector) return; } - amdgpu_acpi_get_backlight_caps(&caps); - if (caps.caps_valid && get_brightness_range(&caps, &min, &max)) { + caps = &dm->backlight_caps[aconnector->bl_idx]; + if (get_brightness_range(caps, &min, &max)) { if (power_supply_is_system_supplied() > 0) - props.brightness = (max - min) * DIV_ROUND_CLOSEST(caps.ac_level, 100); + props.brightness = (max - min) * DIV_ROUND_CLOSEST(caps->ac_level, 100); else - props.brightness = (max - min) * DIV_ROUND_CLOSEST(caps.dc_level, 100); + props.brightness = (max - min) * DIV_ROUND_CLOSEST(caps->dc_level, 100); /* min is zero, so max needs to be adjusted */ props.max_brightness = max - min; drm_dbg(drm, "Backlight caps: min: %d, max: %d, ac %d, dc %d\n", min, max, - caps.ac_level, caps.dc_level); + caps->ac_level, caps->dc_level); } else - props.brightness = AMDGPU_MAX_BL_LEVEL; + props.brightness = props.max_brightness = AMDGPU_MAX_BL_LEVEL; - if (caps.data_points && !(amdgpu_dc_debug_mask & DC_DISABLE_CUSTOM_BRIGHTNESS_CURVE)) + if (caps->data_points && !(amdgpu_dc_debug_mask & DC_DISABLE_CUSTOM_BRIGHTNESS_CURVE)) drm_info(drm, "Using custom brightness curve\n"); - props.max_brightness = AMDGPU_MAX_BL_LEVEL; props.type = BACKLIGHT_RAW; snprintf(bl_name, sizeof(bl_name), "amdgpu_bl%d", diff --git a/drivers/gpu/drm/amd/display/dc/core/dc.c b/drivers/gpu/drm/amd/display/dc/core/dc.c index 56d011a1323c..b34b5b52236d 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc.c @@ -241,6 +241,7 @@ static bool create_links( DC_LOG_DC("BIOS object table - end"); /* Create a link for each usb4 dpia port */ + dc->lowest_dpia_link_index = MAX_LINKS; for (i = 0; i < dc->res_pool->usb4_dpia_count; i++) { struct link_init_data link_init_params = {0}; struct dc_link *link; @@ -253,6 +254,9 @@ static bool create_links( link = dc->link_srv->create_link(&link_init_params); if (link) { + if (dc->lowest_dpia_link_index > dc->link_count) + dc->lowest_dpia_link_index = dc->link_count; + dc->links[dc->link_count] = link; link->dc = dc; ++dc->link_count; @@ -6376,6 +6380,35 @@ unsigned int dc_get_det_buffer_size_from_state(const struct dc_state *context) else return 0; } +/** + *********************************************************************************************** + * dc_get_host_router_index: Get index of host router from a dpia link + * + * This function return a host router index of the target link. If the target link is dpia link. + * + * @param [in] link: target link + * @param [out] host_router_index: host router index of the target link + * + * @return: true if the host router index is found and valid. + * + *********************************************************************************************** + */ +bool dc_get_host_router_index(const struct dc_link *link, unsigned int *host_router_index) +{ + struct dc *dc = link->ctx->dc; + + if (link->ep_type != DISPLAY_ENDPOINT_USB4_DPIA) + return false; + + if (link->link_index < dc->lowest_dpia_link_index) + return false; + + *host_router_index = (link->link_index - dc->lowest_dpia_link_index) / dc->caps.num_of_dpias_per_host_router; + if (*host_router_index < dc->caps.num_of_host_routers) + return true; + else + return false; +} bool dc_is_cursor_limit_pending(struct dc *dc) { diff --git a/drivers/gpu/drm/amd/display/dc/dc.h b/drivers/gpu/drm/amd/display/dc/dc.h index 1d917be36fc4..f41073c0147e 100644 --- a/drivers/gpu/drm/amd/display/dc/dc.h +++ b/drivers/gpu/drm/amd/display/dc/dc.h @@ -66,7 +66,8 @@ struct dmub_notification; #define MAX_STREAMS 6 #define MIN_VIEWPORT_SIZE 12 #define MAX_NUM_EDP 2 -#define MAX_HOST_ROUTERS_NUM 2 +#define MAX_HOST_ROUTERS_NUM 3 +#define MAX_DPIA_PER_HOST_ROUTER 2 /* Display Core Interfaces */ struct dc_versions { @@ -305,6 +306,8 @@ struct dc_caps { /* Conservative limit for DCC cases which require ODM4:1 to support*/ uint32_t dcc_plane_width_limit; struct dc_scl_caps scl_caps; + uint8_t num_of_host_routers; + uint8_t num_of_dpias_per_host_router; }; struct dc_bug_wa { @@ -1603,6 +1606,7 @@ struct dc { uint8_t link_count; struct dc_link *links[MAX_LINKS]; + uint8_t lowest_dpia_link_index; struct link_service *link_srv; struct dc_state *current_state; @@ -2595,6 +2599,8 @@ struct dc_power_profile dc_get_power_profile_for_dc_state(const struct dc_state unsigned int dc_get_det_buffer_size_from_state(const struct dc_state *context); +bool dc_get_host_router_index(const struct dc_link *link, unsigned int *host_router_index); + /* DSC Interfaces */ #include "dc_dsc.h" diff --git a/drivers/gpu/drm/amd/display/dc/dc_dp_types.h b/drivers/gpu/drm/amd/display/dc/dc_dp_types.h index 0bad8304ccf6..d346f8ae1634 100644 --- a/drivers/gpu/drm/amd/display/dc/dc_dp_types.h +++ b/drivers/gpu/drm/amd/display/dc/dc_dp_types.h @@ -1172,8 +1172,8 @@ struct dc_lttpr_caps { union dp_128b_132b_supported_lttpr_link_rates supported_128b_132b_rates; union dp_alpm_lttpr_cap alpm; uint8_t aux_rd_interval[MAX_REPEATER_CNT - 1]; - uint8_t lttpr_ieee_oui[3]; - uint8_t lttpr_device_id[6]; + uint8_t lttpr_ieee_oui[3]; // Always read from closest LTTPR to host + uint8_t lttpr_device_id[6]; // Always read from closest LTTPR to host }; struct dc_dongle_dfp_cap_ext { diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_translation_helper.c b/drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_translation_helper.c index d47cacfdb695..2aa6d44bb359 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_translation_helper.c +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_translation_helper.c @@ -788,6 +788,7 @@ static void populate_dml21_plane_config_from_plane_state(struct dml2_context *dm plane->pixel_format = dml2_420_10; break; case SURFACE_PIXEL_FORMAT_GRPH_ARGB16161616: + case SURFACE_PIXEL_FORMAT_GRPH_ABGR16161616: case SURFACE_PIXEL_FORMAT_GRPH_ARGB16161616F: case SURFACE_PIXEL_FORMAT_GRPH_ABGR16161616F: plane->pixel_format = dml2_444_64; diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_dcn4_calcs.c b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_dcn4_calcs.c index c4dad7164d31..5b62cd19d979 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_dcn4_calcs.c +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_dcn4_calcs.c @@ -4685,7 +4685,10 @@ static void calculate_tdlut_setting( //the tdlut is fetched during the 2 row times of prefetch. if (p->setup_for_tdlut) { *p->tdlut_groups_per_2row_ub = (unsigned int)math_ceil2((double) *p->tdlut_bytes_per_frame / *p->tdlut_bytes_per_group, 1); - *p->tdlut_opt_time = (*p->tdlut_bytes_per_frame - p->cursor_buffer_size * 1024) / tdlut_drain_rate; + if (*p->tdlut_bytes_per_frame > p->cursor_buffer_size * 1024) + *p->tdlut_opt_time = (*p->tdlut_bytes_per_frame - p->cursor_buffer_size * 1024) / tdlut_drain_rate; + else + *p->tdlut_opt_time = 0; *p->tdlut_drain_time = p->cursor_buffer_size * 1024 / tdlut_drain_rate; *p->tdlut_bytes_to_deliver = (unsigned int) (p->cursor_buffer_size * 1024.0); } diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml2_translation_helper.c b/drivers/gpu/drm/amd/display/dc/dml2/dml2_translation_helper.c index 5de775fd8fce..208630754c8a 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml2_translation_helper.c +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml2_translation_helper.c @@ -953,6 +953,7 @@ static void populate_dml_surface_cfg_from_plane_state(enum dml_project_id dml2_p out->SourcePixelFormat[location] = dml_420_10; break; case SURFACE_PIXEL_FORMAT_GRPH_ARGB16161616: + case SURFACE_PIXEL_FORMAT_GRPH_ABGR16161616: case SURFACE_PIXEL_FORMAT_GRPH_ARGB16161616F: case SURFACE_PIXEL_FORMAT_GRPH_ABGR16161616F: out->SourcePixelFormat[location] = dml_444_64; diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dce110/dce110_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dce110/dce110_hwseq.c index e8730cc40edb..38e17b1796e1 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dce110/dce110_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dce110/dce110_hwseq.c @@ -1225,7 +1225,7 @@ void dce110_blank_stream(struct pipe_ctx *pipe_ctx) return; if (link->local_sink && link->local_sink->sink_signal == SIGNAL_TYPE_EDP) { - if (!link->skip_implict_edp_power_control) + if (!link->skip_implict_edp_power_control && hws) hws->funcs.edp_backlight_control(link, false); link->dc->hwss.set_abm_immediate_disable(pipe_ctx); } diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_hwseq.c index c814d957305a..a267f574b619 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_hwseq.c @@ -1047,6 +1047,15 @@ void dcn35_calc_blocks_to_gate(struct dc *dc, struct dc_state *context, if (dc->caps.sequential_ono) { update_state->pg_pipe_res_update[PG_HUBP][pipe_ctx->stream_res.dsc->inst] = false; update_state->pg_pipe_res_update[PG_DPP][pipe_ctx->stream_res.dsc->inst] = false; + + /* All HUBP/DPP instances must be powered if the DSC inst != HUBP inst */ + if (!pipe_ctx->top_pipe && pipe_ctx->plane_res.hubp && + pipe_ctx->plane_res.hubp->inst != pipe_ctx->stream_res.dsc->inst) { + for (j = 0; j < dc->res_pool->pipe_count; ++j) { + update_state->pg_pipe_res_update[PG_HUBP][j] = false; + update_state->pg_pipe_res_update[PG_DPP][j] = false; + } + } } } @@ -1193,6 +1202,25 @@ void dcn35_calc_blocks_to_ungate(struct dc *dc, struct dc_state *context, update_state->pg_pipe_res_update[PG_HDMISTREAM][0] = true; if (dc->caps.sequential_ono) { + for (i = 0; i < dc->res_pool->pipe_count; i++) { + struct pipe_ctx *new_pipe = &context->res_ctx.pipe_ctx[i]; + + if (new_pipe->stream_res.dsc && !new_pipe->top_pipe && + update_state->pg_pipe_res_update[PG_DSC][new_pipe->stream_res.dsc->inst]) { + update_state->pg_pipe_res_update[PG_HUBP][new_pipe->stream_res.dsc->inst] = true; + update_state->pg_pipe_res_update[PG_DPP][new_pipe->stream_res.dsc->inst] = true; + + /* All HUBP/DPP instances must be powered if the DSC inst != HUBP inst */ + if (new_pipe->plane_res.hubp && + new_pipe->plane_res.hubp->inst != new_pipe->stream_res.dsc->inst) { + for (j = 0; j < dc->res_pool->pipe_count; ++j) { + update_state->pg_pipe_res_update[PG_HUBP][j] = true; + update_state->pg_pipe_res_update[PG_DPP][j] = true; + } + } + } + } + for (i = dc->res_pool->pipe_count - 1; i >= 0; i--) { if (update_state->pg_pipe_res_update[PG_HUBP][i] && update_state->pg_pipe_res_update[PG_DPP][i]) { diff --git a/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_capability.c b/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_capability.c index a5127c2d47ef..0f965380a9b4 100644 --- a/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_capability.c +++ b/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_capability.c @@ -385,9 +385,15 @@ bool dp_is_128b_132b_signal(struct pipe_ctx *pipe_ctx) bool dp_is_lttpr_present(struct dc_link *link) { /* Some sink devices report invalid LTTPR revision, so don't validate against that cap */ - return (dp_parse_lttpr_repeater_count(link->dpcd_caps.lttpr_caps.phy_repeater_cnt) != 0 && + uint32_t lttpr_count = dp_parse_lttpr_repeater_count(link->dpcd_caps.lttpr_caps.phy_repeater_cnt); + bool is_lttpr_present = (lttpr_count > 0 && link->dpcd_caps.lttpr_caps.max_lane_count > 0 && link->dpcd_caps.lttpr_caps.max_lane_count <= 4); + + if (lttpr_count > 0 && !is_lttpr_present) + DC_LOG_ERROR("LTTPR count is nonzero but invalid lane count reported. Assuming no LTTPR present.\n"); + + return is_lttpr_present; } /* in DP compliance test, DPR-120 may have @@ -1551,6 +1557,8 @@ enum dc_status dp_retrieve_lttpr_cap(struct dc_link *link) uint8_t lttpr_dpcd_data[10] = {0}; enum dc_status status; bool is_lttpr_present; + uint32_t lttpr_count; + uint32_t closest_lttpr_offset; /* Logic to determine LTTPR support*/ bool vbios_lttpr_interop = link->dc->caps.vbios_lttpr_aware; @@ -1602,20 +1610,22 @@ enum dc_status dp_retrieve_lttpr_cap(struct dc_link *link) lttpr_dpcd_data[DP_LTTPR_ALPM_CAPABILITIES - DP_LT_TUNABLE_PHY_REPEATER_FIELD_DATA_STRUCTURE_REV]; + lttpr_count = dp_parse_lttpr_repeater_count(link->dpcd_caps.lttpr_caps.phy_repeater_cnt); + /* If this chip cap is set, at least one retimer must exist in the chain * Override count to 1 if we receive a known bad count (0 or an invalid value) */ if (((link->chip_caps & AMD_EXT_DISPLAY_PATH_CAPS__EXT_CHIP_MASK) == AMD_EXT_DISPLAY_PATH_CAPS__DP_FIXED_VS_EN) && - (dp_parse_lttpr_repeater_count(link->dpcd_caps.lttpr_caps.phy_repeater_cnt) == 0)) { + lttpr_count == 0) { /* If you see this message consistently, either the host platform has FIXED_VS flag * incorrectly configured or the sink device is returning an invalid count. */ DC_LOG_ERROR("lttpr_caps phy_repeater_cnt is 0x%x, forcing it to 0x80.", link->dpcd_caps.lttpr_caps.phy_repeater_cnt); link->dpcd_caps.lttpr_caps.phy_repeater_cnt = 0x80; + lttpr_count = 1; DC_LOG_DC("lttpr_caps forced phy_repeater_cnt = %d\n", link->dpcd_caps.lttpr_caps.phy_repeater_cnt); } - /* Attempt to train in LTTPR transparent mode if repeater count exceeds 8. */ is_lttpr_present = dp_is_lttpr_present(link); DC_LOG_DC("is_lttpr_present = %d\n", is_lttpr_present); @@ -1623,11 +1633,25 @@ enum dc_status dp_retrieve_lttpr_cap(struct dc_link *link) if (is_lttpr_present) { CONN_DATA_DETECT(link, lttpr_dpcd_data, sizeof(lttpr_dpcd_data), "LTTPR Caps: "); - core_link_read_dpcd(link, DP_LTTPR_IEEE_OUI, link->dpcd_caps.lttpr_caps.lttpr_ieee_oui, sizeof(link->dpcd_caps.lttpr_caps.lttpr_ieee_oui)); - CONN_DATA_DETECT(link, link->dpcd_caps.lttpr_caps.lttpr_ieee_oui, sizeof(link->dpcd_caps.lttpr_caps.lttpr_ieee_oui), "LTTPR IEEE OUI: "); + // Identify closest LTTPR to determine if workarounds required for known embedded LTTPR + closest_lttpr_offset = dp_get_closest_lttpr_offset(lttpr_count); - core_link_read_dpcd(link, DP_LTTPR_DEVICE_ID, link->dpcd_caps.lttpr_caps.lttpr_device_id, sizeof(link->dpcd_caps.lttpr_caps.lttpr_device_id)); - CONN_DATA_DETECT(link, link->dpcd_caps.lttpr_caps.lttpr_device_id, sizeof(link->dpcd_caps.lttpr_caps.lttpr_device_id), "LTTPR Device ID: "); + core_link_read_dpcd(link, (DP_LTTPR_IEEE_OUI + closest_lttpr_offset), + link->dpcd_caps.lttpr_caps.lttpr_ieee_oui, sizeof(link->dpcd_caps.lttpr_caps.lttpr_ieee_oui)); + core_link_read_dpcd(link, (DP_LTTPR_DEVICE_ID + closest_lttpr_offset), + link->dpcd_caps.lttpr_caps.lttpr_device_id, sizeof(link->dpcd_caps.lttpr_caps.lttpr_device_id)); + + if (lttpr_count > 1) { + CONN_DATA_DETECT(link, link->dpcd_caps.lttpr_caps.lttpr_ieee_oui, sizeof(link->dpcd_caps.lttpr_caps.lttpr_ieee_oui), + "Closest LTTPR To Host's IEEE OUI: "); + CONN_DATA_DETECT(link, link->dpcd_caps.lttpr_caps.lttpr_device_id, sizeof(link->dpcd_caps.lttpr_caps.lttpr_device_id), + "Closest LTTPR To Host's LTTPR Device ID: "); + } else { + CONN_DATA_DETECT(link, link->dpcd_caps.lttpr_caps.lttpr_ieee_oui, sizeof(link->dpcd_caps.lttpr_caps.lttpr_ieee_oui), + "LTTPR IEEE OUI: "); + CONN_DATA_DETECT(link, link->dpcd_caps.lttpr_caps.lttpr_device_id, sizeof(link->dpcd_caps.lttpr_caps.lttpr_device_id), + "LTTPR Device ID: "); + } } return status; diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn31/dcn31_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dcn31/dcn31_resource.c index 7e0af5297dc4..51ca0b2959fc 100644 --- a/drivers/gpu/drm/amd/display/dc/resource/dcn31/dcn31_resource.c +++ b/drivers/gpu/drm/amd/display/dc/resource/dcn31/dcn31_resource.c @@ -1954,6 +1954,9 @@ static bool dcn31_resource_construct( dc->caps.color.mpc.ogam_rom_caps.hlg = 0; dc->caps.color.mpc.ocsc = 1; + dc->caps.num_of_host_routers = 2; + dc->caps.num_of_dpias_per_host_router = 2; + /* Use pipe context based otg sync logic */ dc->config.use_pipe_ctx_sync_logic = true; dc->config.disable_hbr_audio_dp2 = true; diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn314/dcn314_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dcn314/dcn314_resource.c index d96bc6cb73ad..8383e2e59be5 100644 --- a/drivers/gpu/drm/amd/display/dc/resource/dcn314/dcn314_resource.c +++ b/drivers/gpu/drm/amd/display/dc/resource/dcn314/dcn314_resource.c @@ -1885,6 +1885,9 @@ static bool dcn314_resource_construct( dc->caps.max_disp_clock_khz_at_vmin = 650000; + dc->caps.num_of_host_routers = 2; + dc->caps.num_of_dpias_per_host_router = 2; + /* Use pipe context based otg sync logic */ dc->config.use_pipe_ctx_sync_logic = true; diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn35/dcn35_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dcn35/dcn35_resource.c index 72c6cf047db0..e01aa2f2e13e 100644 --- a/drivers/gpu/drm/amd/display/dc/resource/dcn35/dcn35_resource.c +++ b/drivers/gpu/drm/amd/display/dc/resource/dcn35/dcn35_resource.c @@ -1894,6 +1894,9 @@ static bool dcn35_resource_construct( dc->caps.color.mpc.ogam_rom_caps.hlg = 0; dc->caps.color.mpc.ocsc = 1; + dc->caps.num_of_host_routers = 2; + dc->caps.num_of_dpias_per_host_router = 2; + /* max_disp_clock_khz_at_vmin is slightly lower than the STA value in order * to provide some margin. * It's expected for furture ASIC to have equal or higher value, in order to diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn351/dcn351_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dcn351/dcn351_resource.c index 989a270f7dea..4ebe4e00a4f8 100644 --- a/drivers/gpu/drm/amd/display/dc/resource/dcn351/dcn351_resource.c +++ b/drivers/gpu/drm/amd/display/dc/resource/dcn351/dcn351_resource.c @@ -1866,6 +1866,9 @@ static bool dcn351_resource_construct( dc->caps.color.mpc.ogam_rom_caps.hlg = 0; dc->caps.color.mpc.ocsc = 1; + dc->caps.num_of_host_routers = 2; + dc->caps.num_of_dpias_per_host_router = 2; + /* max_disp_clock_khz_at_vmin is slightly lower than the STA value in order * to provide some margin. * It's expected for furture ASIC to have equal or higher value, in order to diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn36/dcn36_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dcn36/dcn36_resource.c index 48e1f234185f..db36b8f9ce65 100644 --- a/drivers/gpu/drm/amd/display/dc/resource/dcn36/dcn36_resource.c +++ b/drivers/gpu/drm/amd/display/dc/resource/dcn36/dcn36_resource.c @@ -1867,6 +1867,9 @@ static bool dcn36_resource_construct( dc->caps.color.mpc.ogam_rom_caps.hlg = 0; dc->caps.color.mpc.ocsc = 1; + dc->caps.num_of_host_routers = 2; + dc->caps.num_of_dpias_per_host_router = 2; + /* max_disp_clock_khz_at_vmin is slightly lower than the STA value in order * to provide some margin. * It's expected for furture ASIC to have equal or higher value, in order to diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c index a7167668d189..1c7235935d14 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c @@ -58,6 +58,7 @@ MODULE_FIRMWARE("amdgpu/aldebaran_smc.bin"); MODULE_FIRMWARE("amdgpu/smu_13_0_0.bin"); +MODULE_FIRMWARE("amdgpu/smu_13_0_0_kicker.bin"); MODULE_FIRMWARE("amdgpu/smu_13_0_7.bin"); MODULE_FIRMWARE("amdgpu/smu_13_0_10.bin"); @@ -92,7 +93,7 @@ const int pmfw_decoded_link_width[7] = {0, 1, 2, 4, 8, 12, 16}; int smu_v13_0_init_microcode(struct smu_context *smu) { struct amdgpu_device *adev = smu->adev; - char ucode_prefix[15]; + char ucode_prefix[30]; int err = 0; const struct smc_firmware_header_v1_0 *hdr; const struct common_firmware_header *header; @@ -103,8 +104,13 @@ int smu_v13_0_init_microcode(struct smu_context *smu) return 0; amdgpu_ucode_ip_version_decode(adev, MP1_HWIP, ucode_prefix, sizeof(ucode_prefix)); - err = amdgpu_ucode_request(adev, &adev->pm.fw, AMDGPU_UCODE_REQUIRED, - "amdgpu/%s.bin", ucode_prefix); + + if (amdgpu_is_kicker_fw(adev)) + err = amdgpu_ucode_request(adev, &adev->pm.fw, AMDGPU_UCODE_REQUIRED, + "amdgpu/%s_kicker.bin", ucode_prefix); + else + err = amdgpu_ucode_request(adev, &adev->pm.fw, AMDGPU_UCODE_REQUIRED, + "amdgpu/%s.bin", ucode_prefix); if (err) goto out; diff --git a/drivers/gpu/drm/arm/malidp_planes.c b/drivers/gpu/drm/arm/malidp_planes.c index 34547edf1ee3..87f2e5ee8790 100644 --- a/drivers/gpu/drm/arm/malidp_planes.c +++ b/drivers/gpu/drm/arm/malidp_planes.c @@ -159,7 +159,7 @@ bool malidp_format_mod_supported(struct drm_device *drm, } if (!fourcc_mod_is_vendor(modifier, ARM)) { - DRM_ERROR("Unknown modifier (not Arm)\n"); + DRM_DEBUG_KMS("Unknown modifier (not Arm)\n"); return false; } diff --git a/drivers/gpu/drm/ast/ast_mode.c b/drivers/gpu/drm/ast/ast_mode.c index 1de832964e92..031980d8f3ab 100644 --- a/drivers/gpu/drm/ast/ast_mode.c +++ b/drivers/gpu/drm/ast/ast_mode.c @@ -29,7 +29,6 @@ */ #include <linux/delay.h> -#include <linux/export.h> #include <linux/pci.h> #include <drm/drm_atomic.h> diff --git a/drivers/gpu/drm/etnaviv/etnaviv_sched.c b/drivers/gpu/drm/etnaviv/etnaviv_sched.c index 76a3a3e517d8..71e2e6b9d713 100644 --- a/drivers/gpu/drm/etnaviv/etnaviv_sched.c +++ b/drivers/gpu/drm/etnaviv/etnaviv_sched.c @@ -35,6 +35,7 @@ static enum drm_gpu_sched_stat etnaviv_sched_timedout_job(struct drm_sched_job *sched_job) { struct etnaviv_gem_submit *submit = to_etnaviv_submit(sched_job); + struct drm_gpu_scheduler *sched = sched_job->sched; struct etnaviv_gpu *gpu = submit->gpu; u32 dma_addr, primid = 0; int change; @@ -89,7 +90,9 @@ static enum drm_gpu_sched_stat etnaviv_sched_timedout_job(struct drm_sched_job return DRM_GPU_SCHED_STAT_NOMINAL; out_no_timeout: - list_add(&sched_job->list, &sched_job->sched->pending_list); + spin_lock(&sched->job_list_lock); + list_add(&sched_job->list, &sched->pending_list); + spin_unlock(&sched->job_list_lock); return DRM_GPU_SCHED_STAT_NOMINAL; } diff --git a/drivers/gpu/drm/i915/display/vlv_dsi.c b/drivers/gpu/drm/i915/display/vlv_dsi.c index 346737f15fa9..21c1e10caf68 100644 --- a/drivers/gpu/drm/i915/display/vlv_dsi.c +++ b/drivers/gpu/drm/i915/display/vlv_dsi.c @@ -1056,7 +1056,7 @@ static void bxt_dsi_get_pipe_config(struct intel_encoder *encoder, BXT_MIPI_TRANS_VACTIVE(port)); adjusted_mode->crtc_vtotal = intel_de_read(display, - BXT_MIPI_TRANS_VTOTAL(port)); + BXT_MIPI_TRANS_VTOTAL(port)) + 1; hactive = adjusted_mode->crtc_hdisplay; hfp = intel_de_read(display, MIPI_HFP_COUNT(display, port)); @@ -1260,7 +1260,7 @@ static void set_dsi_timings(struct intel_encoder *encoder, intel_de_write(display, BXT_MIPI_TRANS_VACTIVE(port), adjusted_mode->crtc_vdisplay); intel_de_write(display, BXT_MIPI_TRANS_VTOTAL(port), - adjusted_mode->crtc_vtotal); + adjusted_mode->crtc_vtotal - 1); } intel_de_write(display, MIPI_HACTIVE_AREA_COUNT(display, port), diff --git a/drivers/gpu/drm/i915/i915_pmu.c b/drivers/gpu/drm/i915/i915_pmu.c index e5a188ce3185..990bfaba3ce4 100644 --- a/drivers/gpu/drm/i915/i915_pmu.c +++ b/drivers/gpu/drm/i915/i915_pmu.c @@ -112,7 +112,7 @@ static u32 config_mask(const u64 config) { unsigned int bit = config_bit(config); - if (__builtin_constant_p(config)) + if (__builtin_constant_p(bit)) BUILD_BUG_ON(bit > BITS_PER_TYPE(typeof_member(struct i915_pmu, enable)) - 1); @@ -121,7 +121,7 @@ static u32 config_mask(const u64 config) BITS_PER_TYPE(typeof_member(struct i915_pmu, enable)) - 1); - return BIT(config_bit(config)); + return BIT(bit); } static bool is_engine_event(struct perf_event *event) diff --git a/drivers/gpu/drm/meson/meson_encoder_hdmi.c b/drivers/gpu/drm/meson/meson_encoder_hdmi.c index 47136bbbe8c6..ab08d690d882 100644 --- a/drivers/gpu/drm/meson/meson_encoder_hdmi.c +++ b/drivers/gpu/drm/meson/meson_encoder_hdmi.c @@ -109,7 +109,7 @@ static void meson_encoder_hdmi_set_vclk(struct meson_encoder_hdmi *encoder_hdmi, venc_freq /= 2; dev_dbg(priv->dev, - "vclk:%lluHz phy=%lluHz venc=%lluHz hdmi=%lluHz enci=%d\n", + "phy:%lluHz vclk=%lluHz venc=%lluHz hdmi=%lluHz enci=%d\n", phy_freq, vclk_freq, venc_freq, hdmi_freq, priv->venc.hdmi_use_enci); diff --git a/drivers/gpu/drm/meson/meson_vclk.c b/drivers/gpu/drm/meson/meson_vclk.c index 3325580d885d..dfe0c28a0f05 100644 --- a/drivers/gpu/drm/meson/meson_vclk.c +++ b/drivers/gpu/drm/meson/meson_vclk.c @@ -110,10 +110,7 @@ #define HDMI_PLL_LOCK BIT(31) #define HDMI_PLL_LOCK_G12A (3 << 30) -#define PIXEL_FREQ_1000_1001(_freq) \ - DIV_ROUND_CLOSEST_ULL((_freq) * 1000ULL, 1001ULL) -#define PHY_FREQ_1000_1001(_freq) \ - (PIXEL_FREQ_1000_1001(DIV_ROUND_DOWN_ULL(_freq, 10ULL)) * 10) +#define FREQ_1000_1001(_freq) DIV_ROUND_CLOSEST_ULL((_freq) * 1000ULL, 1001ULL) /* VID PLL Dividers */ enum { @@ -772,6 +769,36 @@ static void meson_hdmi_pll_generic_set(struct meson_drm *priv, pll_freq); } +static bool meson_vclk_freqs_are_matching_param(unsigned int idx, + unsigned long long phy_freq, + unsigned long long vclk_freq) +{ + DRM_DEBUG_DRIVER("i = %d vclk_freq = %lluHz alt = %lluHz\n", + idx, params[idx].vclk_freq, + FREQ_1000_1001(params[idx].vclk_freq)); + DRM_DEBUG_DRIVER("i = %d phy_freq = %lluHz alt = %lluHz\n", + idx, params[idx].phy_freq, + FREQ_1000_1001(params[idx].phy_freq)); + + /* Match strict frequency */ + if (phy_freq == params[idx].phy_freq && + vclk_freq == params[idx].vclk_freq) + return true; + + /* Match 1000/1001 variant: vclk deviation has to be less than 1kHz + * (drm EDID is defined in 1kHz steps, so everything smaller must be + * rounding error) and the PHY freq deviation has to be less than + * 10kHz (as the TMDS clock is 10 times the pixel clock, so anything + * smaller must be rounding error as well). + */ + if (abs(vclk_freq - FREQ_1000_1001(params[idx].vclk_freq)) < 1000 && + abs(phy_freq - FREQ_1000_1001(params[idx].phy_freq)) < 10000) + return true; + + /* no match */ + return false; +} + enum drm_mode_status meson_vclk_vic_supported_freq(struct meson_drm *priv, unsigned long long phy_freq, @@ -790,19 +817,7 @@ meson_vclk_vic_supported_freq(struct meson_drm *priv, } for (i = 0 ; params[i].pixel_freq ; ++i) { - DRM_DEBUG_DRIVER("i = %d pixel_freq = %lluHz alt = %lluHz\n", - i, params[i].pixel_freq, - PIXEL_FREQ_1000_1001(params[i].pixel_freq)); - DRM_DEBUG_DRIVER("i = %d phy_freq = %lluHz alt = %lluHz\n", - i, params[i].phy_freq, - PHY_FREQ_1000_1001(params[i].phy_freq)); - /* Match strict frequency */ - if (phy_freq == params[i].phy_freq && - vclk_freq == params[i].vclk_freq) - return MODE_OK; - /* Match 1000/1001 variant */ - if (phy_freq == PHY_FREQ_1000_1001(params[i].phy_freq) && - vclk_freq == PIXEL_FREQ_1000_1001(params[i].vclk_freq)) + if (meson_vclk_freqs_are_matching_param(i, phy_freq, vclk_freq)) return MODE_OK; } @@ -1075,10 +1090,8 @@ void meson_vclk_setup(struct meson_drm *priv, unsigned int target, } for (freq = 0 ; params[freq].pixel_freq ; ++freq) { - if ((phy_freq == params[freq].phy_freq || - phy_freq == PHY_FREQ_1000_1001(params[freq].phy_freq)) && - (vclk_freq == params[freq].vclk_freq || - vclk_freq == PIXEL_FREQ_1000_1001(params[freq].vclk_freq))) { + if (meson_vclk_freqs_are_matching_param(freq, phy_freq, + vclk_freq)) { if (vclk_freq != params[freq].vclk_freq) vic_alternate_clock = true; else diff --git a/drivers/gpu/drm/mgag200/mgag200_ddc.c b/drivers/gpu/drm/mgag200/mgag200_ddc.c index 6d81ea8931e8..c31673eaa554 100644 --- a/drivers/gpu/drm/mgag200/mgag200_ddc.c +++ b/drivers/gpu/drm/mgag200/mgag200_ddc.c @@ -26,7 +26,6 @@ * Authors: Dave Airlie <airlied@redhat.com> */ -#include <linux/export.h> #include <linux/i2c-algo-bit.h> #include <linux/i2c.h> #include <linux/pci.h> diff --git a/drivers/gpu/drm/msm/adreno/a2xx_gpummu.c b/drivers/gpu/drm/msm/adreno/a2xx_gpummu.c index 39641551eeb6..4280f71e472a 100644 --- a/drivers/gpu/drm/msm/adreno/a2xx_gpummu.c +++ b/drivers/gpu/drm/msm/adreno/a2xx_gpummu.c @@ -71,10 +71,6 @@ static int a2xx_gpummu_unmap(struct msm_mmu *mmu, uint64_t iova, size_t len) return 0; } -static void a2xx_gpummu_resume_translation(struct msm_mmu *mmu) -{ -} - static void a2xx_gpummu_destroy(struct msm_mmu *mmu) { struct a2xx_gpummu *gpummu = to_a2xx_gpummu(mmu); @@ -90,7 +86,6 @@ static const struct msm_mmu_funcs funcs = { .map = a2xx_gpummu_map, .unmap = a2xx_gpummu_unmap, .destroy = a2xx_gpummu_destroy, - .resume_translation = a2xx_gpummu_resume_translation, }; struct msm_mmu *a2xx_gpummu_new(struct device *dev, struct msm_gpu *gpu) diff --git a/drivers/gpu/drm/msm/adreno/a5xx_gpu.c b/drivers/gpu/drm/msm/adreno/a5xx_gpu.c index 650e5bac225f..60aef0796236 100644 --- a/drivers/gpu/drm/msm/adreno/a5xx_gpu.c +++ b/drivers/gpu/drm/msm/adreno/a5xx_gpu.c @@ -131,6 +131,8 @@ static void a5xx_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit) struct msm_ringbuffer *ring = submit->ring; unsigned int i, ibs = 0; + adreno_check_and_reenable_stall(adreno_gpu); + if (IS_ENABLED(CONFIG_DRM_MSM_GPU_SUDO) && submit->in_rb) { ring->cur_ctx_seqno = 0; a5xx_submit_in_rb(gpu, submit); diff --git a/drivers/gpu/drm/msm/adreno/a6xx_gpu.c b/drivers/gpu/drm/msm/adreno/a6xx_gpu.c index bf3758f010f4..491fde0083a2 100644 --- a/drivers/gpu/drm/msm/adreno/a6xx_gpu.c +++ b/drivers/gpu/drm/msm/adreno/a6xx_gpu.c @@ -130,6 +130,20 @@ static void a6xx_set_pagetable(struct a6xx_gpu *a6xx_gpu, OUT_RING(ring, lower_32_bits(rbmemptr(ring, fence))); OUT_RING(ring, upper_32_bits(rbmemptr(ring, fence))); OUT_RING(ring, submit->seqno - 1); + + OUT_PKT7(ring, CP_THREAD_CONTROL, 1); + OUT_RING(ring, CP_SET_THREAD_BOTH); + + /* Reset state used to synchronize BR and BV */ + OUT_PKT7(ring, CP_RESET_CONTEXT_STATE, 1); + OUT_RING(ring, + CP_RESET_CONTEXT_STATE_0_CLEAR_ON_CHIP_TS | + CP_RESET_CONTEXT_STATE_0_CLEAR_RESOURCE_TABLE | + CP_RESET_CONTEXT_STATE_0_CLEAR_BV_BR_COUNTER | + CP_RESET_CONTEXT_STATE_0_RESET_GLOBAL_LOCAL_TS); + + OUT_PKT7(ring, CP_THREAD_CONTROL, 1); + OUT_RING(ring, CP_SET_THREAD_BR); } if (!sysprof) { @@ -212,6 +226,8 @@ static void a6xx_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit) struct msm_ringbuffer *ring = submit->ring; unsigned int i, ibs = 0; + adreno_check_and_reenable_stall(adreno_gpu); + a6xx_set_pagetable(a6xx_gpu, ring, submit); get_stats_counter(ring, REG_A6XX_RBBM_PERFCTR_CP(0), @@ -335,6 +351,8 @@ static void a7xx_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit) struct msm_ringbuffer *ring = submit->ring; unsigned int i, ibs = 0; + adreno_check_and_reenable_stall(adreno_gpu); + /* * Toggle concurrent binning for pagetable switch and set the thread to * BR since only it can execute the pagetable switch packets. diff --git a/drivers/gpu/drm/msm/adreno/adreno_device.c b/drivers/gpu/drm/msm/adreno/adreno_device.c index f5e1490d07c1..16e7ac444efd 100644 --- a/drivers/gpu/drm/msm/adreno/adreno_device.c +++ b/drivers/gpu/drm/msm/adreno/adreno_device.c @@ -137,9 +137,8 @@ err_disable_rpm: return NULL; } -static int find_chipid(struct device *dev, uint32_t *chipid) +static int find_chipid(struct device_node *node, uint32_t *chipid) { - struct device_node *node = dev->of_node; const char *compat; int ret; @@ -173,15 +172,36 @@ static int find_chipid(struct device *dev, uint32_t *chipid) /* and if that fails, fall back to legacy "qcom,chipid" property: */ ret = of_property_read_u32(node, "qcom,chipid", chipid); if (ret) { - DRM_DEV_ERROR(dev, "could not parse qcom,chipid: %d\n", ret); + DRM_ERROR("%pOF: could not parse qcom,chipid: %d\n", + node, ret); return ret; } - dev_warn(dev, "Using legacy qcom,chipid binding!\n"); + pr_warn("%pOF: Using legacy qcom,chipid binding!\n", node); return 0; } +bool adreno_has_gpu(struct device_node *node) +{ + const struct adreno_info *info; + uint32_t chip_id; + int ret; + + ret = find_chipid(node, &chip_id); + if (ret) + return false; + + info = adreno_info(chip_id); + if (!info) { + pr_warn("%pOF: Unknown GPU revision: %"ADRENO_CHIPID_FMT"\n", + node, ADRENO_CHIPID_ARGS(chip_id)); + return false; + } + + return true; +} + static int adreno_bind(struct device *dev, struct device *master, void *data) { static struct adreno_platform_config config = {}; @@ -191,19 +211,18 @@ static int adreno_bind(struct device *dev, struct device *master, void *data) struct msm_gpu *gpu; int ret; - ret = find_chipid(dev, &config.chip_id); - if (ret) + ret = find_chipid(dev->of_node, &config.chip_id); + /* We shouldn't have gotten this far if we can't parse the chip_id */ + if (WARN_ON(ret)) return ret; dev->platform_data = &config; priv->gpu_pdev = to_platform_device(dev); info = adreno_info(config.chip_id); - if (!info) { - dev_warn(drm->dev, "Unknown GPU revision: %"ADRENO_CHIPID_FMT"\n", - ADRENO_CHIPID_ARGS(config.chip_id)); + /* We shouldn't have gotten this far if we don't recognize the GPU: */ + if (WARN_ON(!info)) return -ENXIO; - } config.info = info; diff --git a/drivers/gpu/drm/msm/adreno/adreno_gpu.c b/drivers/gpu/drm/msm/adreno/adreno_gpu.c index 2348ffb35f7e..86bff915c3e7 100644 --- a/drivers/gpu/drm/msm/adreno/adreno_gpu.c +++ b/drivers/gpu/drm/msm/adreno/adreno_gpu.c @@ -259,24 +259,54 @@ u64 adreno_private_address_space_size(struct msm_gpu *gpu) return BIT(ttbr1_cfg->ias) - ADRENO_VM_START; } +void adreno_check_and_reenable_stall(struct adreno_gpu *adreno_gpu) +{ + struct msm_gpu *gpu = &adreno_gpu->base; + struct msm_drm_private *priv = gpu->dev->dev_private; + unsigned long flags; + + /* + * Wait until the cooldown period has passed and we would actually + * collect a crashdump to re-enable stall-on-fault. + */ + spin_lock_irqsave(&priv->fault_stall_lock, flags); + if (!priv->stall_enabled && + ktime_after(ktime_get(), priv->stall_reenable_time) && + !READ_ONCE(gpu->crashstate)) { + priv->stall_enabled = true; + + gpu->aspace->mmu->funcs->set_stall(gpu->aspace->mmu, true); + } + spin_unlock_irqrestore(&priv->fault_stall_lock, flags); +} + #define ARM_SMMU_FSR_TF BIT(1) #define ARM_SMMU_FSR_PF BIT(3) #define ARM_SMMU_FSR_EF BIT(4) +#define ARM_SMMU_FSR_SS BIT(30) int adreno_fault_handler(struct msm_gpu *gpu, unsigned long iova, int flags, struct adreno_smmu_fault_info *info, const char *block, u32 scratch[4]) { + struct msm_drm_private *priv = gpu->dev->dev_private; const char *type = "UNKNOWN"; - bool do_devcoredump = info && !READ_ONCE(gpu->crashstate); + bool do_devcoredump = info && (info->fsr & ARM_SMMU_FSR_SS) && + !READ_ONCE(gpu->crashstate); + unsigned long irq_flags; /* - * If we aren't going to be resuming later from fault_worker, then do - * it now. + * In case there is a subsequent storm of pagefaults, disable + * stall-on-fault for at least half a second. */ - if (!do_devcoredump) { - gpu->aspace->mmu->funcs->resume_translation(gpu->aspace->mmu); + spin_lock_irqsave(&priv->fault_stall_lock, irq_flags); + if (priv->stall_enabled) { + priv->stall_enabled = false; + + gpu->aspace->mmu->funcs->set_stall(gpu->aspace->mmu, false); } + priv->stall_reenable_time = ktime_add_ms(ktime_get(), 500); + spin_unlock_irqrestore(&priv->fault_stall_lock, irq_flags); /* * Print a default message if we couldn't get the data from the @@ -304,16 +334,18 @@ int adreno_fault_handler(struct msm_gpu *gpu, unsigned long iova, int flags, scratch[0], scratch[1], scratch[2], scratch[3]); if (do_devcoredump) { + struct msm_gpu_fault_info fault_info = {}; + /* Turn off the hangcheck timer to keep it from bothering us */ timer_delete(&gpu->hangcheck_timer); - gpu->fault_info.ttbr0 = info->ttbr0; - gpu->fault_info.iova = iova; - gpu->fault_info.flags = flags; - gpu->fault_info.type = type; - gpu->fault_info.block = block; + fault_info.ttbr0 = info->ttbr0; + fault_info.iova = iova; + fault_info.flags = flags; + fault_info.type = type; + fault_info.block = block; - kthread_queue_work(gpu->worker, &gpu->fault_work); + msm_gpu_fault_crashstate_capture(gpu, &fault_info); } return 0; diff --git a/drivers/gpu/drm/msm/adreno/adreno_gpu.h b/drivers/gpu/drm/msm/adreno/adreno_gpu.h index a8f4bf416e64..bc063594a359 100644 --- a/drivers/gpu/drm/msm/adreno/adreno_gpu.h +++ b/drivers/gpu/drm/msm/adreno/adreno_gpu.h @@ -636,6 +636,8 @@ int adreno_fault_handler(struct msm_gpu *gpu, unsigned long iova, int flags, struct adreno_smmu_fault_info *info, const char *block, u32 scratch[4]); +void adreno_check_and_reenable_stall(struct adreno_gpu *gpu); + int adreno_read_speedbin(struct device *dev, u32 *speedbin); /* diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder_phys_vid.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder_phys_vid.c index 8a618841e3ea..1c468ca5d692 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder_phys_vid.c +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder_phys_vid.c @@ -94,17 +94,21 @@ static void drm_mode_to_intf_timing_params( timing->vsync_polarity = 0; } - /* for DP/EDP, Shift timings to align it to bottom right */ - if (phys_enc->hw_intf->cap->type == INTF_DP) { + timing->wide_bus_en = dpu_encoder_is_widebus_enabled(phys_enc->parent); + timing->compression_en = dpu_encoder_is_dsc_enabled(phys_enc->parent); + + /* + * For DP/EDP, Shift timings to align it to bottom right. + * wide_bus_en is set for everything excluding SDM845 & + * porch changes cause DisplayPort failure and HDMI tearing. + */ + if (phys_enc->hw_intf->cap->type == INTF_DP && timing->wide_bus_en) { timing->h_back_porch += timing->h_front_porch; timing->h_front_porch = 0; timing->v_back_porch += timing->v_front_porch; timing->v_front_porch = 0; } - timing->wide_bus_en = dpu_encoder_is_widebus_enabled(phys_enc->parent); - timing->compression_en = dpu_encoder_is_dsc_enabled(phys_enc->parent); - /* * for DP, divide the horizonal parameters by 2 when * widebus is enabled diff --git a/drivers/gpu/drm/msm/dp/dp_display.c b/drivers/gpu/drm/msm/dp/dp_display.c index 386c4669c831..a48e6db4f156 100644 --- a/drivers/gpu/drm/msm/dp/dp_display.c +++ b/drivers/gpu/drm/msm/dp/dp_display.c @@ -128,6 +128,11 @@ static const struct msm_dp_desc msm_dp_desc_sa8775p[] = { {} }; +static const struct msm_dp_desc msm_dp_desc_sdm845[] = { + { .io_start = 0x0ae90000, .id = MSM_DP_CONTROLLER_0 }, + {} +}; + static const struct msm_dp_desc msm_dp_desc_sc7180[] = { { .io_start = 0x0ae90000, .id = MSM_DP_CONTROLLER_0, .wide_bus_supported = true }, {} @@ -180,7 +185,7 @@ static const struct of_device_id msm_dp_dt_match[] = { { .compatible = "qcom,sc8180x-edp", .data = &msm_dp_desc_sc8180x }, { .compatible = "qcom,sc8280xp-dp", .data = &msm_dp_desc_sc8280xp }, { .compatible = "qcom,sc8280xp-edp", .data = &msm_dp_desc_sc8280xp }, - { .compatible = "qcom,sdm845-dp", .data = &msm_dp_desc_sc7180 }, + { .compatible = "qcom,sdm845-dp", .data = &msm_dp_desc_sdm845 }, { .compatible = "qcom,sm8350-dp", .data = &msm_dp_desc_sc7180 }, { .compatible = "qcom,sm8650-dp", .data = &msm_dp_desc_sm8650 }, { .compatible = "qcom,x1e80100-dp", .data = &msm_dp_desc_x1e80100 }, diff --git a/drivers/gpu/drm/msm/dsi/phy/dsi_phy_10nm.c b/drivers/gpu/drm/msm/dsi/phy/dsi_phy_10nm.c index 9812b4d69197..af2e30f3f842 100644 --- a/drivers/gpu/drm/msm/dsi/phy/dsi_phy_10nm.c +++ b/drivers/gpu/drm/msm/dsi/phy/dsi_phy_10nm.c @@ -704,6 +704,13 @@ static int dsi_pll_10nm_init(struct msm_dsi_phy *phy) /* TODO: Remove this when we have proper display handover support */ msm_dsi_phy_pll_save_state(phy); + /* + * Store also proper vco_current_rate, because its value will be used in + * dsi_10nm_pll_restore_state(). + */ + if (!dsi_pll_10nm_vco_recalc_rate(&pll_10nm->clk_hw, VCO_REF_CLK_RATE)) + pll_10nm->vco_current_rate = pll_10nm->phy->cfg->min_pll_rate; + return 0; } diff --git a/drivers/gpu/drm/msm/msm_debugfs.c b/drivers/gpu/drm/msm/msm_debugfs.c index 7ab607252d18..6af72162cda4 100644 --- a/drivers/gpu/drm/msm/msm_debugfs.c +++ b/drivers/gpu/drm/msm/msm_debugfs.c @@ -208,6 +208,35 @@ DEFINE_DEBUGFS_ATTRIBUTE(shrink_fops, shrink_get, shrink_set, "0x%08llx\n"); +/* + * Return the number of microseconds to wait until stall-on-fault is + * re-enabled. If 0 then it is already enabled or will be re-enabled on the + * next submit (unless there's a leftover devcoredump). This is useful for + * kernel tests that intentionally produce a fault and check the devcoredump to + * wait until the cooldown period is over. + */ + +static int +stall_reenable_time_get(void *data, u64 *val) +{ + struct msm_drm_private *priv = data; + unsigned long irq_flags; + + spin_lock_irqsave(&priv->fault_stall_lock, irq_flags); + + if (priv->stall_enabled) + *val = 0; + else + *val = max(ktime_us_delta(priv->stall_reenable_time, ktime_get()), 0); + + spin_unlock_irqrestore(&priv->fault_stall_lock, irq_flags); + + return 0; +} + +DEFINE_DEBUGFS_ATTRIBUTE(stall_reenable_time_fops, + stall_reenable_time_get, NULL, + "%lld\n"); static int msm_gem_show(struct seq_file *m, void *arg) { @@ -319,6 +348,9 @@ static void msm_debugfs_gpu_init(struct drm_minor *minor) debugfs_create_bool("disable_err_irq", 0600, minor->debugfs_root, &priv->disable_err_irq); + debugfs_create_file("stall_reenable_time_us", 0400, minor->debugfs_root, + priv, &stall_reenable_time_fops); + gpu_devfreq = debugfs_create_dir("devfreq", minor->debugfs_root); debugfs_create_bool("idle_clamp",0600, gpu_devfreq, diff --git a/drivers/gpu/drm/msm/msm_drv.c b/drivers/gpu/drm/msm/msm_drv.c index f316e6776f67..d007687c2446 100644 --- a/drivers/gpu/drm/msm/msm_drv.c +++ b/drivers/gpu/drm/msm/msm_drv.c @@ -245,6 +245,10 @@ static int msm_drm_init(struct device *dev, const struct drm_driver *drv) drm_gem_lru_init(&priv->lru.willneed, &priv->lru.lock); drm_gem_lru_init(&priv->lru.dontneed, &priv->lru.lock); + /* Initialize stall-on-fault */ + spin_lock_init(&priv->fault_stall_lock); + priv->stall_enabled = true; + /* Teach lockdep about lock ordering wrt. shrinker: */ fs_reclaim_acquire(GFP_KERNEL); might_lock(&priv->lru.lock); @@ -926,7 +930,7 @@ static const struct drm_driver msm_driver = { * is no external component that we need to add since LVDS is within MDP4 * itself. */ -static int add_components_mdp(struct device *master_dev, +static int add_mdp_components(struct device *master_dev, struct component_match **matchptr) { struct device_node *np = master_dev->of_node; @@ -1030,7 +1034,7 @@ static int add_gpu_components(struct device *dev, if (!np) return 0; - if (of_device_is_available(np)) + if (of_device_is_available(np) && adreno_has_gpu(np)) drm_of_component_match_add(dev, matchptr, component_compare_of, np); of_node_put(np); @@ -1071,7 +1075,7 @@ int msm_drv_probe(struct device *master_dev, /* Add mdp components if we have KMS. */ if (kms_init) { - ret = add_components_mdp(master_dev, &match); + ret = add_mdp_components(master_dev, &match); if (ret) return ret; } diff --git a/drivers/gpu/drm/msm/msm_drv.h b/drivers/gpu/drm/msm/msm_drv.h index a65077855201..c8afb1ea6040 100644 --- a/drivers/gpu/drm/msm/msm_drv.h +++ b/drivers/gpu/drm/msm/msm_drv.h @@ -222,6 +222,29 @@ struct msm_drm_private { * the sw hangcheck mechanism. */ bool disable_err_irq; + + /** + * @fault_stall_lock: + * + * Serialize changes to stall-on-fault state. + */ + spinlock_t fault_stall_lock; + + /** + * @fault_stall_reenable_time: + * + * If stall_enabled is false, when to reenable stall-on-fault. + * Protected by @fault_stall_lock. + */ + ktime_t stall_reenable_time; + + /** + * @stall_enabled: + * + * Whether stall-on-fault is currently enabled. Protected by + * @fault_stall_lock. + */ + bool stall_enabled; }; const struct msm_format *mdp_get_format(struct msm_kms *kms, uint32_t format, uint64_t modifier); diff --git a/drivers/gpu/drm/msm/msm_gem_submit.c b/drivers/gpu/drm/msm/msm_gem_submit.c index 3e9aa2cc38ef..d4f71bb54e84 100644 --- a/drivers/gpu/drm/msm/msm_gem_submit.c +++ b/drivers/gpu/drm/msm/msm_gem_submit.c @@ -85,6 +85,15 @@ void __msm_gem_submit_destroy(struct kref *kref) container_of(kref, struct msm_gem_submit, ref); unsigned i; + /* + * In error paths, we could unref the submit without calling + * drm_sched_entity_push_job(), so msm_job_free() will never + * get called. Since drm_sched_job_cleanup() will NULL out + * s_fence, we can use that to detect this case. + */ + if (submit->base.s_fence) + drm_sched_job_cleanup(&submit->base); + if (submit->fence_id) { spin_lock(&submit->queue->idr_lock); idr_remove(&submit->queue->fence_idr, submit->fence_id); @@ -649,6 +658,7 @@ int msm_ioctl_gem_submit(struct drm_device *dev, void *data, struct msm_ringbuffer *ring; struct msm_submit_post_dep *post_deps = NULL; struct drm_syncobj **syncobjs_to_reset = NULL; + struct sync_file *sync_file = NULL; int out_fence_fd = -1; unsigned i; int ret; @@ -858,7 +868,7 @@ int msm_ioctl_gem_submit(struct drm_device *dev, void *data, } if (ret == 0 && args->flags & MSM_SUBMIT_FENCE_FD_OUT) { - struct sync_file *sync_file = sync_file_create(submit->user_fence); + sync_file = sync_file_create(submit->user_fence); if (!sync_file) { ret = -ENOMEM; } else { @@ -892,8 +902,11 @@ out: out_unlock: mutex_unlock(&queue->lock); out_post_unlock: - if (ret && (out_fence_fd >= 0)) + if (ret && (out_fence_fd >= 0)) { put_unused_fd(out_fence_fd); + if (sync_file) + fput(sync_file->file); + } if (!IS_ERR_OR_NULL(submit)) { msm_gem_submit_put(submit); diff --git a/drivers/gpu/drm/msm/msm_gpu.c b/drivers/gpu/drm/msm/msm_gpu.c index 197871fdf508..3947f7ba1421 100644 --- a/drivers/gpu/drm/msm/msm_gpu.c +++ b/drivers/gpu/drm/msm/msm_gpu.c @@ -257,7 +257,8 @@ out: } static void msm_gpu_crashstate_capture(struct msm_gpu *gpu, - struct msm_gem_submit *submit, char *comm, char *cmd) + struct msm_gem_submit *submit, struct msm_gpu_fault_info *fault_info, + char *comm, char *cmd) { struct msm_gpu_state *state; @@ -276,7 +277,8 @@ static void msm_gpu_crashstate_capture(struct msm_gpu *gpu, /* Fill in the additional crash state information */ state->comm = kstrdup(comm, GFP_KERNEL); state->cmd = kstrdup(cmd, GFP_KERNEL); - state->fault_info = gpu->fault_info; + if (fault_info) + state->fault_info = *fault_info; if (submit) { int i; @@ -308,7 +310,8 @@ static void msm_gpu_crashstate_capture(struct msm_gpu *gpu, } #else static void msm_gpu_crashstate_capture(struct msm_gpu *gpu, - struct msm_gem_submit *submit, char *comm, char *cmd) + struct msm_gem_submit *submit, struct msm_gpu_fault_info *fault_info, + char *comm, char *cmd) { } #endif @@ -405,7 +408,7 @@ static void recover_worker(struct kthread_work *work) /* Record the crash state */ pm_runtime_get_sync(&gpu->pdev->dev); - msm_gpu_crashstate_capture(gpu, submit, comm, cmd); + msm_gpu_crashstate_capture(gpu, submit, NULL, comm, cmd); kfree(cmd); kfree(comm); @@ -459,9 +462,8 @@ out_unlock: msm_gpu_retire(gpu); } -static void fault_worker(struct kthread_work *work) +void msm_gpu_fault_crashstate_capture(struct msm_gpu *gpu, struct msm_gpu_fault_info *fault_info) { - struct msm_gpu *gpu = container_of(work, struct msm_gpu, fault_work); struct msm_gem_submit *submit; struct msm_ringbuffer *cur_ring = gpu->funcs->active_ring(gpu); char *comm = NULL, *cmd = NULL; @@ -484,16 +486,13 @@ static void fault_worker(struct kthread_work *work) /* Record the crash state */ pm_runtime_get_sync(&gpu->pdev->dev); - msm_gpu_crashstate_capture(gpu, submit, comm, cmd); + msm_gpu_crashstate_capture(gpu, submit, fault_info, comm, cmd); pm_runtime_put_sync(&gpu->pdev->dev); kfree(cmd); kfree(comm); resume_smmu: - memset(&gpu->fault_info, 0, sizeof(gpu->fault_info)); - gpu->aspace->mmu->funcs->resume_translation(gpu->aspace->mmu); - mutex_unlock(&gpu->lock); } @@ -882,7 +881,6 @@ int msm_gpu_init(struct drm_device *drm, struct platform_device *pdev, init_waitqueue_head(&gpu->retire_event); kthread_init_work(&gpu->retire_work, retire_worker); kthread_init_work(&gpu->recover_work, recover_worker); - kthread_init_work(&gpu->fault_work, fault_worker); priv->hangcheck_period = DRM_MSM_HANGCHECK_DEFAULT_PERIOD; diff --git a/drivers/gpu/drm/msm/msm_gpu.h b/drivers/gpu/drm/msm/msm_gpu.h index e25009150579..5bf7cd985b9c 100644 --- a/drivers/gpu/drm/msm/msm_gpu.h +++ b/drivers/gpu/drm/msm/msm_gpu.h @@ -253,12 +253,6 @@ struct msm_gpu { #define DRM_MSM_HANGCHECK_PROGRESS_RETRIES 3 struct timer_list hangcheck_timer; - /* Fault info for most recent iova fault: */ - struct msm_gpu_fault_info fault_info; - - /* work for handling GPU ioval faults: */ - struct kthread_work fault_work; - /* work for handling GPU recovery: */ struct kthread_work recover_work; @@ -668,6 +662,7 @@ msm_gpu_create_private_address_space(struct msm_gpu *gpu, struct task_struct *ta void msm_gpu_cleanup(struct msm_gpu *gpu); struct msm_gpu *adreno_load_gpu(struct drm_device *dev); +bool adreno_has_gpu(struct device_node *node); void __init adreno_register(void); void __exit adreno_unregister(void); @@ -705,6 +700,8 @@ static inline void msm_gpu_crashstate_put(struct msm_gpu *gpu) mutex_unlock(&gpu->lock); } +void msm_gpu_fault_crashstate_capture(struct msm_gpu *gpu, struct msm_gpu_fault_info *fault_info); + /* * Simple macro to semi-cleanly add the MAP_PRIV flag for targets that can * support expanded privileges diff --git a/drivers/gpu/drm/msm/msm_iommu.c b/drivers/gpu/drm/msm/msm_iommu.c index fd73dcd3f30e..739ce2c283a4 100644 --- a/drivers/gpu/drm/msm/msm_iommu.c +++ b/drivers/gpu/drm/msm/msm_iommu.c @@ -345,7 +345,6 @@ static int msm_gpu_fault_handler(struct iommu_domain *domain, struct device *dev unsigned long iova, int flags, void *arg) { struct msm_iommu *iommu = arg; - struct msm_mmu *mmu = &iommu->base; struct adreno_smmu_priv *adreno_smmu = dev_get_drvdata(iommu->base.dev); struct adreno_smmu_fault_info info, *ptr = NULL; @@ -359,9 +358,6 @@ static int msm_gpu_fault_handler(struct iommu_domain *domain, struct device *dev pr_warn_ratelimited("*** fault: iova=%16lx, flags=%d\n", iova, flags); - if (mmu->funcs->resume_translation) - mmu->funcs->resume_translation(mmu); - return 0; } @@ -376,12 +372,12 @@ static int msm_disp_fault_handler(struct iommu_domain *domain, struct device *de return -ENOSYS; } -static void msm_iommu_resume_translation(struct msm_mmu *mmu) +static void msm_iommu_set_stall(struct msm_mmu *mmu, bool enable) { struct adreno_smmu_priv *adreno_smmu = dev_get_drvdata(mmu->dev); - if (adreno_smmu->resume_translation) - adreno_smmu->resume_translation(adreno_smmu->cookie, true); + if (adreno_smmu->set_stall) + adreno_smmu->set_stall(adreno_smmu->cookie, enable); } static void msm_iommu_detach(struct msm_mmu *mmu) @@ -431,7 +427,7 @@ static const struct msm_mmu_funcs funcs = { .map = msm_iommu_map, .unmap = msm_iommu_unmap, .destroy = msm_iommu_destroy, - .resume_translation = msm_iommu_resume_translation, + .set_stall = msm_iommu_set_stall, }; struct msm_mmu *msm_iommu_new(struct device *dev, unsigned long quirks) diff --git a/drivers/gpu/drm/msm/msm_mmu.h b/drivers/gpu/drm/msm/msm_mmu.h index daf91529e02b..0c694907140d 100644 --- a/drivers/gpu/drm/msm/msm_mmu.h +++ b/drivers/gpu/drm/msm/msm_mmu.h @@ -15,7 +15,7 @@ struct msm_mmu_funcs { size_t len, int prot); int (*unmap)(struct msm_mmu *mmu, uint64_t iova, size_t len); void (*destroy)(struct msm_mmu *mmu); - void (*resume_translation)(struct msm_mmu *mmu); + void (*set_stall)(struct msm_mmu *mmu, bool enable); }; enum msm_mmu_type { diff --git a/drivers/gpu/drm/msm/registers/adreno/adreno_pm4.xml b/drivers/gpu/drm/msm/registers/adreno/adreno_pm4.xml index 5a6ae9fc3194..462713401622 100644 --- a/drivers/gpu/drm/msm/registers/adreno/adreno_pm4.xml +++ b/drivers/gpu/drm/msm/registers/adreno/adreno_pm4.xml @@ -2255,7 +2255,8 @@ opcode: CP_LOAD_STATE4 (30) (4 dwords) <reg32 offset="0" name="0"> <bitfield name="CLEAR_ON_CHIP_TS" pos="0" type="boolean"/> <bitfield name="CLEAR_RESOURCE_TABLE" pos="1" type="boolean"/> - <bitfield name="CLEAR_GLOBAL_LOCAL_TS" pos="2" type="boolean"/> + <bitfield name="CLEAR_BV_BR_COUNTER" pos="2" type="boolean"/> + <bitfield name="RESET_GLOBAL_LOCAL_TS" pos="3" type="boolean"/> </reg32> </domain> diff --git a/drivers/gpu/drm/msm/registers/gen_header.py b/drivers/gpu/drm/msm/registers/gen_header.py index 3926485bb197..a409404627c7 100644 --- a/drivers/gpu/drm/msm/registers/gen_header.py +++ b/drivers/gpu/drm/msm/registers/gen_header.py @@ -11,6 +11,7 @@ import collections import argparse import time import datetime +import re class Error(Exception): def __init__(self, message): @@ -877,13 +878,14 @@ The rules-ng-ng source files this header was generated from are: """) maxlen = 0 for filepath in p.xml_files: - maxlen = max(maxlen, len(filepath)) + new_filepath = re.sub("^.+drivers","drivers",filepath) + maxlen = max(maxlen, len(new_filepath)) for filepath in p.xml_files: - pad = " " * (maxlen - len(filepath)) + pad = " " * (maxlen - len(new_filepath)) filesize = str(os.path.getsize(filepath)) filesize = " " * (7 - len(filesize)) + filesize filetime = time.ctime(os.path.getmtime(filepath)) - print("- " + filepath + pad + " (" + filesize + " bytes, from " + filetime + ")") + print("- " + new_filepath + pad + " (" + filesize + " bytes, from <stripped>)") if p.copyright_year: current_year = str(datetime.date.today().year) print() diff --git a/drivers/gpu/drm/nouveau/nouveau_backlight.c b/drivers/gpu/drm/nouveau/nouveau_backlight.c index d47442125fa1..9aae26eb7d8f 100644 --- a/drivers/gpu/drm/nouveau/nouveau_backlight.c +++ b/drivers/gpu/drm/nouveau/nouveau_backlight.c @@ -42,7 +42,7 @@ #include "nouveau_acpi.h" static struct ida bl_ida; -#define BL_NAME_SIZE 15 // 12 for name + 2 for digits + 1 for '\0' +#define BL_NAME_SIZE 24 // 12 for name + 11 for digits + 1 for '\0' static bool nouveau_get_backlight_name(char backlight_name[BL_NAME_SIZE], diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/gsp/rm/r535/rpc.c b/drivers/gpu/drm/nouveau/nvkm/subdev/gsp/rm/r535/rpc.c index 5acb98d137bd..9d06ff722fea 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/gsp/rm/r535/rpc.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/gsp/rm/r535/rpc.c @@ -637,12 +637,18 @@ r535_gsp_rpc_push(struct nvkm_gsp *gsp, void *payload, if (payload_size > max_payload_size) { const u32 fn = rpc->function; u32 remain_payload_size = payload_size; + void *next; - /* Adjust length, and send initial RPC. */ - rpc->length = sizeof(*rpc) + max_payload_size; - msg->checksum = rpc->length; + /* Send initial RPC. */ + next = r535_gsp_rpc_get(gsp, fn, max_payload_size); + if (IS_ERR(next)) { + repv = next; + goto done; + } - repv = r535_gsp_rpc_send(gsp, payload, NVKM_GSP_RPC_REPLY_NOWAIT, 0); + memcpy(next, payload, max_payload_size); + + repv = r535_gsp_rpc_send(gsp, next, NVKM_GSP_RPC_REPLY_NOWAIT, 0); if (IS_ERR(repv)) goto done; @@ -653,7 +659,6 @@ r535_gsp_rpc_push(struct nvkm_gsp *gsp, void *payload, while (remain_payload_size) { u32 size = min(remain_payload_size, max_payload_size); - void *next; next = r535_gsp_rpc_get(gsp, NV_VGPU_MSG_FUNCTION_CONTINUATION_RECORD, size); if (IS_ERR(next)) { @@ -674,6 +679,8 @@ r535_gsp_rpc_push(struct nvkm_gsp *gsp, void *payload, /* Wait for reply. */ repv = r535_gsp_rpc_handle_reply(gsp, fn, policy, payload_size + sizeof(*rpc)); + if (!IS_ERR(repv)) + kvfree(msg); } else { repv = r535_gsp_rpc_send(gsp, payload, policy, gsp_rpc_len); } diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/gsp/rm/r535/vmm.c b/drivers/gpu/drm/nouveau/nvkm/subdev/gsp/rm/r535/vmm.c index 52f2e5f14517..f25ea610cd99 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/gsp/rm/r535/vmm.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/gsp/rm/r535/vmm.c @@ -121,7 +121,7 @@ r535_mmu_vaspace_new(struct nvkm_vmm *vmm, u32 handle, bool external) page_shift -= desc->bits; ctrl->levels[i].physAddress = pd->pt[0]->addr; - ctrl->levels[i].size = (1 << desc->bits) * desc->size; + ctrl->levels[i].size = BIT_ULL(desc->bits) * desc->size; ctrl->levels[i].aperture = 1; ctrl->levels[i].pageShift = page_shift; diff --git a/drivers/gpu/drm/sitronix/Kconfig b/drivers/gpu/drm/sitronix/Kconfig index c069d0d41775..741d1bb4b83f 100644 --- a/drivers/gpu/drm/sitronix/Kconfig +++ b/drivers/gpu/drm/sitronix/Kconfig @@ -5,6 +5,7 @@ config DRM_ST7571_I2C select DRM_GEM_SHMEM_HELPER select DRM_KMS_HELPER select REGMAP_I2C + select VIDEOMODE_HELPERS help DRM driver for Sitronix ST7571 panels controlled over I2C. diff --git a/drivers/gpu/drm/solomon/ssd130x.c b/drivers/gpu/drm/solomon/ssd130x.c index dd2006d51c7a..eec43d1a5595 100644 --- a/drivers/gpu/drm/solomon/ssd130x.c +++ b/drivers/gpu/drm/solomon/ssd130x.c @@ -974,7 +974,7 @@ static void ssd130x_clear_screen(struct ssd130x_device *ssd130x, u8 *data_array) static void ssd132x_clear_screen(struct ssd130x_device *ssd130x, u8 *data_array) { - unsigned int columns = DIV_ROUND_UP(ssd130x->height, SSD132X_SEGMENT_WIDTH); + unsigned int columns = DIV_ROUND_UP(ssd130x->width, SSD132X_SEGMENT_WIDTH); unsigned int height = ssd130x->height; memset(data_array, 0, columns * height); diff --git a/drivers/gpu/drm/v3d/v3d_sched.c b/drivers/gpu/drm/v3d/v3d_sched.c index 35f131a46d07..42df9d3567e7 100644 --- a/drivers/gpu/drm/v3d/v3d_sched.c +++ b/drivers/gpu/drm/v3d/v3d_sched.c @@ -199,7 +199,6 @@ v3d_job_update_stats(struct v3d_job *job, enum v3d_queue queue) struct v3d_dev *v3d = job->v3d; struct v3d_file_priv *file = job->file->driver_priv; struct v3d_stats *global_stats = &v3d->queue[queue].stats; - struct v3d_stats *local_stats = &file->stats[queue]; u64 now = local_clock(); unsigned long flags; @@ -209,7 +208,12 @@ v3d_job_update_stats(struct v3d_job *job, enum v3d_queue queue) else preempt_disable(); - v3d_stats_update(local_stats, now); + /* Don't update the local stats if the file context has already closed */ + if (file) + v3d_stats_update(&file->stats[queue], now); + else + drm_dbg(&v3d->drm, "The file descriptor was closed before job completion\n"); + v3d_stats_update(global_stats, now); if (IS_ENABLED(CONFIG_LOCKDEP)) diff --git a/drivers/gpu/drm/vc4/vc4_hdmi.c b/drivers/gpu/drm/vc4/vc4_hdmi.c index a29a6ef266f9..163d092bd973 100644 --- a/drivers/gpu/drm/vc4/vc4_hdmi.c +++ b/drivers/gpu/drm/vc4/vc4_hdmi.c @@ -560,12 +560,6 @@ static int vc4_hdmi_connector_init(struct drm_device *dev, if (ret) return ret; - ret = drm_connector_hdmi_audio_init(connector, dev->dev, - &vc4_hdmi_audio_funcs, - 8, false, -1); - if (ret) - return ret; - drm_connector_helper_add(connector, &vc4_hdmi_connector_helper_funcs); /* @@ -2291,6 +2285,12 @@ static int vc4_hdmi_audio_init(struct vc4_hdmi *vc4_hdmi) return ret; } + ret = drm_connector_hdmi_audio_init(&vc4_hdmi->connector, dev, + &vc4_hdmi_audio_funcs, 8, false, + -1); + if (ret) + return ret; + dai_link->cpus = &vc4_hdmi->audio.cpu; dai_link->codecs = &vc4_hdmi->audio.codec; dai_link->platforms = &vc4_hdmi->audio.platform; diff --git a/drivers/gpu/drm/xe/xe_gt.c b/drivers/gpu/drm/xe/xe_gt.c index 0e5d243c9451..6c4cb9576fb6 100644 --- a/drivers/gpu/drm/xe/xe_gt.c +++ b/drivers/gpu/drm/xe/xe_gt.c @@ -118,7 +118,7 @@ static void xe_gt_enable_host_l2_vram(struct xe_gt *gt) xe_gt_mcr_multicast_write(gt, XE2_GAMREQSTRM_CTRL, reg); } - xe_gt_mcr_multicast_write(gt, XEHPC_L3CLOS_MASK(3), 0x3); + xe_gt_mcr_multicast_write(gt, XEHPC_L3CLOS_MASK(3), 0xF); xe_force_wake_put(gt_to_fw(gt), fw_ref); } diff --git a/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c b/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c index 084cbdeba8ea..e1362e608146 100644 --- a/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c +++ b/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c @@ -138,6 +138,14 @@ void xe_gt_tlb_invalidation_reset(struct xe_gt *gt) int pending_seqno; /* + * we can get here before the CTs are even initialized if we're wedging + * very early, in which case there are not going to be any pending + * fences so we can bail immediately. + */ + if (!xe_guc_ct_initialized(>->uc.guc.ct)) + return; + + /* * CT channel is already disabled at this point. No new TLB requests can * appear. */ diff --git a/drivers/gpu/drm/xe/xe_guc_ct.c b/drivers/gpu/drm/xe/xe_guc_ct.c index 2447de0ebedf..d0ac48d8f4f7 100644 --- a/drivers/gpu/drm/xe/xe_guc_ct.c +++ b/drivers/gpu/drm/xe/xe_guc_ct.c @@ -514,6 +514,9 @@ void xe_guc_ct_disable(struct xe_guc_ct *ct) */ void xe_guc_ct_stop(struct xe_guc_ct *ct) { + if (!xe_guc_ct_initialized(ct)) + return; + xe_guc_ct_set_state(ct, XE_GUC_CT_STATE_STOPPED); stop_g2h_handler(ct); } @@ -760,7 +763,7 @@ static int __guc_ct_send_locked(struct xe_guc_ct *ct, const u32 *action, u16 seqno; int ret; - xe_gt_assert(gt, ct->state != XE_GUC_CT_STATE_NOT_INITIALIZED); + xe_gt_assert(gt, xe_guc_ct_initialized(ct)); xe_gt_assert(gt, !g2h_len || !g2h_fence); xe_gt_assert(gt, !num_g2h || !g2h_fence); xe_gt_assert(gt, !g2h_len || num_g2h); @@ -1344,7 +1347,7 @@ static int g2h_read(struct xe_guc_ct *ct, u32 *msg, bool fast_path) u32 action; u32 *hxg; - xe_gt_assert(gt, ct->state != XE_GUC_CT_STATE_NOT_INITIALIZED); + xe_gt_assert(gt, xe_guc_ct_initialized(ct)); lockdep_assert_held(&ct->fast_lock); if (ct->state == XE_GUC_CT_STATE_DISABLED) diff --git a/drivers/gpu/drm/xe/xe_guc_ct.h b/drivers/gpu/drm/xe/xe_guc_ct.h index 82c4ae458dda..582aac106469 100644 --- a/drivers/gpu/drm/xe/xe_guc_ct.h +++ b/drivers/gpu/drm/xe/xe_guc_ct.h @@ -22,6 +22,11 @@ void xe_guc_ct_snapshot_print(struct xe_guc_ct_snapshot *snapshot, struct drm_pr void xe_guc_ct_snapshot_free(struct xe_guc_ct_snapshot *snapshot); void xe_guc_ct_print(struct xe_guc_ct *ct, struct drm_printer *p, bool want_ctb); +static inline bool xe_guc_ct_initialized(struct xe_guc_ct *ct) +{ + return ct->state != XE_GUC_CT_STATE_NOT_INITIALIZED; +} + static inline bool xe_guc_ct_enabled(struct xe_guc_ct *ct) { return ct->state == XE_GUC_CT_STATE_ENABLED; diff --git a/drivers/gpu/drm/xe/xe_guc_pc.c b/drivers/gpu/drm/xe/xe_guc_pc.c index 18c623992035..3beaaa7b25c1 100644 --- a/drivers/gpu/drm/xe/xe_guc_pc.c +++ b/drivers/gpu/drm/xe/xe_guc_pc.c @@ -1068,7 +1068,7 @@ int xe_guc_pc_start(struct xe_guc_pc *pc) goto out; } - memset(pc->bo->vmap.vaddr, 0, size); + xe_map_memset(xe, &pc->bo->vmap, 0, 0, size); slpc_shared_data_write(pc, header.size, size); earlier = ktime_get(); diff --git a/drivers/gpu/drm/xe/xe_guc_submit.c b/drivers/gpu/drm/xe/xe_guc_submit.c index 6d84a52b660a..9567f6700cf2 100644 --- a/drivers/gpu/drm/xe/xe_guc_submit.c +++ b/drivers/gpu/drm/xe/xe_guc_submit.c @@ -1762,6 +1762,9 @@ int xe_guc_submit_reset_prepare(struct xe_guc *guc) { int ret; + if (!guc->submission_state.initialized) + return 0; + /* * Using an atomic here rather than submission_state.lock as this * function can be called while holding the CT lock (engine reset diff --git a/drivers/gpu/drm/xe/xe_lrc.c b/drivers/gpu/drm/xe/xe_lrc.c index 63d74e27f54c..bf7c3981897d 100644 --- a/drivers/gpu/drm/xe/xe_lrc.c +++ b/drivers/gpu/drm/xe/xe_lrc.c @@ -941,11 +941,18 @@ static void xe_lrc_finish(struct xe_lrc *lrc) * store it in the PPHSWP. */ #define CONTEXT_ACTIVE 1ULL -static void xe_lrc_setup_utilization(struct xe_lrc *lrc) +static int xe_lrc_setup_utilization(struct xe_lrc *lrc) { - u32 *cmd; + u32 *cmd, *buf = NULL; - cmd = lrc->bb_per_ctx_bo->vmap.vaddr; + if (lrc->bb_per_ctx_bo->vmap.is_iomem) { + buf = kmalloc(lrc->bb_per_ctx_bo->size, GFP_KERNEL); + if (!buf) + return -ENOMEM; + cmd = buf; + } else { + cmd = lrc->bb_per_ctx_bo->vmap.vaddr; + } *cmd++ = MI_STORE_REGISTER_MEM | MI_SRM_USE_GGTT | MI_SRM_ADD_CS_OFFSET; *cmd++ = ENGINE_ID(0).addr; @@ -966,9 +973,16 @@ static void xe_lrc_setup_utilization(struct xe_lrc *lrc) *cmd++ = MI_BATCH_BUFFER_END; + if (buf) { + xe_map_memcpy_to(gt_to_xe(lrc->gt), &lrc->bb_per_ctx_bo->vmap, 0, + buf, (cmd - buf) * sizeof(*cmd)); + kfree(buf); + } + xe_lrc_write_ctx_reg(lrc, CTX_BB_PER_CTX_PTR, xe_bo_ggtt_addr(lrc->bb_per_ctx_bo) | 1); + return 0; } #define PVC_CTX_ASID (0x2e + 1) @@ -1125,7 +1139,9 @@ static int xe_lrc_init(struct xe_lrc *lrc, struct xe_hw_engine *hwe, map = __xe_lrc_start_seqno_map(lrc); xe_map_write32(lrc_to_xe(lrc), &map, lrc->fence_ctx.next_seqno - 1); - xe_lrc_setup_utilization(lrc); + err = xe_lrc_setup_utilization(lrc); + if (err) + goto err_lrc_finish; return 0; diff --git a/drivers/gpu/drm/xe/xe_svm.c b/drivers/gpu/drm/xe/xe_svm.c index 6345896585de..f0b167b3fb6a 100644 --- a/drivers/gpu/drm/xe/xe_svm.c +++ b/drivers/gpu/drm/xe/xe_svm.c @@ -764,7 +764,7 @@ static bool xe_svm_range_needs_migrate_to_vram(struct xe_svm_range *range, return false; } - if (range_size <= SZ_64K && !supports_4K_migration(vm->xe)) { + if (range_size < SZ_64K && !supports_4K_migration(vm->xe)) { drm_dbg(&vm->xe->drm, "Platform doesn't support SZ_4K range migration\n"); return false; } diff --git a/drivers/hwmon/ftsteutates.c b/drivers/hwmon/ftsteutates.c index a3a07662e491..8aeec16a7a90 100644 --- a/drivers/hwmon/ftsteutates.c +++ b/drivers/hwmon/ftsteutates.c @@ -423,13 +423,16 @@ static int fts_read(struct device *dev, enum hwmon_sensor_types type, u32 attr, break; case hwmon_pwm: switch (attr) { - case hwmon_pwm_auto_channels_temp: - if (data->fan_source[channel] == FTS_FAN_SOURCE_INVALID) + case hwmon_pwm_auto_channels_temp: { + u8 fan_source = data->fan_source[channel]; + + if (fan_source == FTS_FAN_SOURCE_INVALID || fan_source >= BITS_PER_LONG) *val = 0; else - *val = BIT(data->fan_source[channel]); + *val = BIT(fan_source); return 0; + } default: break; } diff --git a/drivers/hwmon/ltc4282.c b/drivers/hwmon/ltc4282.c index 7f38d2696239..f607fe8f7937 100644 --- a/drivers/hwmon/ltc4282.c +++ b/drivers/hwmon/ltc4282.c @@ -1512,13 +1512,6 @@ static int ltc4282_setup(struct ltc4282_state *st, struct device *dev) } if (device_property_read_bool(dev, "adi,fault-log-enable")) { - ret = regmap_set_bits(st->map, LTC4282_ADC_CTRL, - LTC4282_FAULT_LOG_EN_MASK); - if (ret) - return ret; - } - - if (device_property_read_bool(dev, "adi,fault-log-enable")) { ret = regmap_set_bits(st->map, LTC4282_ADC_CTRL, LTC4282_FAULT_LOG_EN_MASK); if (ret) return ret; diff --git a/drivers/hwmon/occ/common.c b/drivers/hwmon/occ/common.c index 9486db249c64..b3694a4209b9 100644 --- a/drivers/hwmon/occ/common.c +++ b/drivers/hwmon/occ/common.c @@ -459,12 +459,10 @@ static ssize_t occ_show_power_1(struct device *dev, return sysfs_emit(buf, "%llu\n", val); } -static u64 occ_get_powr_avg(u64 *accum, u32 *samples) +static u64 occ_get_powr_avg(u64 accum, u32 samples) { - u64 divisor = get_unaligned_be32(samples); - - return (divisor == 0) ? 0 : - div64_u64(get_unaligned_be64(accum) * 1000000ULL, divisor); + return (samples == 0) ? 0 : + mul_u64_u32_div(accum, 1000000UL, samples); } static ssize_t occ_show_power_2(struct device *dev, @@ -489,8 +487,8 @@ static ssize_t occ_show_power_2(struct device *dev, get_unaligned_be32(&power->sensor_id), power->function_id, power->apss_channel); case 1: - val = occ_get_powr_avg(&power->accumulator, - &power->update_tag); + val = occ_get_powr_avg(get_unaligned_be64(&power->accumulator), + get_unaligned_be32(&power->update_tag)); break; case 2: val = (u64)get_unaligned_be32(&power->update_tag) * @@ -527,8 +525,8 @@ static ssize_t occ_show_power_a0(struct device *dev, return sysfs_emit(buf, "%u_system\n", get_unaligned_be32(&power->sensor_id)); case 1: - val = occ_get_powr_avg(&power->system.accumulator, - &power->system.update_tag); + val = occ_get_powr_avg(get_unaligned_be64(&power->system.accumulator), + get_unaligned_be32(&power->system.update_tag)); break; case 2: val = (u64)get_unaligned_be32(&power->system.update_tag) * @@ -541,8 +539,8 @@ static ssize_t occ_show_power_a0(struct device *dev, return sysfs_emit(buf, "%u_proc\n", get_unaligned_be32(&power->sensor_id)); case 5: - val = occ_get_powr_avg(&power->proc.accumulator, - &power->proc.update_tag); + val = occ_get_powr_avg(get_unaligned_be64(&power->proc.accumulator), + get_unaligned_be32(&power->proc.update_tag)); break; case 6: val = (u64)get_unaligned_be32(&power->proc.update_tag) * @@ -555,8 +553,8 @@ static ssize_t occ_show_power_a0(struct device *dev, return sysfs_emit(buf, "%u_vdd\n", get_unaligned_be32(&power->sensor_id)); case 9: - val = occ_get_powr_avg(&power->vdd.accumulator, - &power->vdd.update_tag); + val = occ_get_powr_avg(get_unaligned_be64(&power->vdd.accumulator), + get_unaligned_be32(&power->vdd.update_tag)); break; case 10: val = (u64)get_unaligned_be32(&power->vdd.update_tag) * @@ -569,8 +567,8 @@ static ssize_t occ_show_power_a0(struct device *dev, return sysfs_emit(buf, "%u_vdn\n", get_unaligned_be32(&power->sensor_id)); case 13: - val = occ_get_powr_avg(&power->vdn.accumulator, - &power->vdn.update_tag); + val = occ_get_powr_avg(get_unaligned_be64(&power->vdn.accumulator), + get_unaligned_be32(&power->vdn.update_tag)); break; case 14: val = (u64)get_unaligned_be32(&power->vdn.update_tag) * @@ -747,29 +745,30 @@ static ssize_t occ_show_extended(struct device *dev, } /* - * Some helper macros to make it easier to define an occ_attribute. Since these - * are dynamically allocated, we shouldn't use the existing kernel macros which + * A helper to make it easier to define an occ_attribute. Since these + * are dynamically allocated, we cannot use the existing kernel macros which * stringify the name argument. */ -#define ATTR_OCC(_name, _mode, _show, _store) { \ - .attr = { \ - .name = _name, \ - .mode = VERIFY_OCTAL_PERMISSIONS(_mode), \ - }, \ - .show = _show, \ - .store = _store, \ -} - -#define SENSOR_ATTR_OCC(_name, _mode, _show, _store, _nr, _index) { \ - .dev_attr = ATTR_OCC(_name, _mode, _show, _store), \ - .index = _index, \ - .nr = _nr, \ +static void occ_init_attribute(struct occ_attribute *attr, int mode, + ssize_t (*show)(struct device *dev, struct device_attribute *attr, char *buf), + ssize_t (*store)(struct device *dev, struct device_attribute *attr, + const char *buf, size_t count), + int nr, int index, const char *fmt, ...) +{ + va_list args; + + va_start(args, fmt); + vsnprintf(attr->name, sizeof(attr->name), fmt, args); + va_end(args); + + attr->sensor.dev_attr.attr.name = attr->name; + attr->sensor.dev_attr.attr.mode = mode; + attr->sensor.dev_attr.show = show; + attr->sensor.dev_attr.store = store; + attr->sensor.index = index; + attr->sensor.nr = nr; } -#define OCC_INIT_ATTR(_name, _mode, _show, _store, _nr, _index) \ - ((struct sensor_device_attribute_2) \ - SENSOR_ATTR_OCC(_name, _mode, _show, _store, _nr, _index)) - /* * Allocate and instatiate sensor_device_attribute_2s. It's most efficient to * use our own instead of the built-in hwmon attribute types. @@ -855,14 +854,15 @@ static int occ_setup_sensor_attrs(struct occ *occ) sensors->extended.num_sensors = 0; } - occ->attrs = devm_kzalloc(dev, sizeof(*occ->attrs) * num_attrs, + occ->attrs = devm_kcalloc(dev, num_attrs, sizeof(*occ->attrs), GFP_KERNEL); if (!occ->attrs) return -ENOMEM; /* null-terminated list */ - occ->group.attrs = devm_kzalloc(dev, sizeof(*occ->group.attrs) * - num_attrs + 1, GFP_KERNEL); + occ->group.attrs = devm_kcalloc(dev, num_attrs + 1, + sizeof(*occ->group.attrs), + GFP_KERNEL); if (!occ->group.attrs) return -ENOMEM; @@ -872,43 +872,33 @@ static int occ_setup_sensor_attrs(struct occ *occ) s = i + 1; temp = ((struct temp_sensor_2 *)sensors->temp.data) + i; - snprintf(attr->name, sizeof(attr->name), "temp%d_label", s); - attr->sensor = OCC_INIT_ATTR(attr->name, 0444, show_temp, NULL, - 0, i); + occ_init_attribute(attr, 0444, show_temp, NULL, + 0, i, "temp%d_label", s); attr++; if (sensors->temp.version == 2 && temp->fru_type == OCC_FRU_TYPE_VRM) { - snprintf(attr->name, sizeof(attr->name), - "temp%d_alarm", s); + occ_init_attribute(attr, 0444, show_temp, NULL, + 1, i, "temp%d_alarm", s); } else { - snprintf(attr->name, sizeof(attr->name), - "temp%d_input", s); + occ_init_attribute(attr, 0444, show_temp, NULL, + 1, i, "temp%d_input", s); } - attr->sensor = OCC_INIT_ATTR(attr->name, 0444, show_temp, NULL, - 1, i); attr++; if (sensors->temp.version > 1) { - snprintf(attr->name, sizeof(attr->name), - "temp%d_fru_type", s); - attr->sensor = OCC_INIT_ATTR(attr->name, 0444, - show_temp, NULL, 2, i); + occ_init_attribute(attr, 0444, show_temp, NULL, + 2, i, "temp%d_fru_type", s); attr++; - snprintf(attr->name, sizeof(attr->name), - "temp%d_fault", s); - attr->sensor = OCC_INIT_ATTR(attr->name, 0444, - show_temp, NULL, 3, i); + occ_init_attribute(attr, 0444, show_temp, NULL, + 3, i, "temp%d_fault", s); attr++; if (sensors->temp.version == 0x10) { - snprintf(attr->name, sizeof(attr->name), - "temp%d_max", s); - attr->sensor = OCC_INIT_ATTR(attr->name, 0444, - show_temp, NULL, - 4, i); + occ_init_attribute(attr, 0444, show_temp, NULL, + 4, i, "temp%d_max", s); attr++; } } @@ -917,14 +907,12 @@ static int occ_setup_sensor_attrs(struct occ *occ) for (i = 0; i < sensors->freq.num_sensors; ++i) { s = i + 1; - snprintf(attr->name, sizeof(attr->name), "freq%d_label", s); - attr->sensor = OCC_INIT_ATTR(attr->name, 0444, show_freq, NULL, - 0, i); + occ_init_attribute(attr, 0444, show_freq, NULL, + 0, i, "freq%d_label", s); attr++; - snprintf(attr->name, sizeof(attr->name), "freq%d_input", s); - attr->sensor = OCC_INIT_ATTR(attr->name, 0444, show_freq, NULL, - 1, i); + occ_init_attribute(attr, 0444, show_freq, NULL, + 1, i, "freq%d_input", s); attr++; } @@ -940,32 +928,24 @@ static int occ_setup_sensor_attrs(struct occ *occ) s = (i * 4) + 1; for (j = 0; j < 4; ++j) { - snprintf(attr->name, sizeof(attr->name), - "power%d_label", s); - attr->sensor = OCC_INIT_ATTR(attr->name, 0444, - show_power, NULL, - nr++, i); + occ_init_attribute(attr, 0444, show_power, + NULL, nr++, i, + "power%d_label", s); attr++; - snprintf(attr->name, sizeof(attr->name), - "power%d_average", s); - attr->sensor = OCC_INIT_ATTR(attr->name, 0444, - show_power, NULL, - nr++, i); + occ_init_attribute(attr, 0444, show_power, + NULL, nr++, i, + "power%d_average", s); attr++; - snprintf(attr->name, sizeof(attr->name), - "power%d_average_interval", s); - attr->sensor = OCC_INIT_ATTR(attr->name, 0444, - show_power, NULL, - nr++, i); + occ_init_attribute(attr, 0444, show_power, + NULL, nr++, i, + "power%d_average_interval", s); attr++; - snprintf(attr->name, sizeof(attr->name), - "power%d_input", s); - attr->sensor = OCC_INIT_ATTR(attr->name, 0444, - show_power, NULL, - nr++, i); + occ_init_attribute(attr, 0444, show_power, + NULL, nr++, i, + "power%d_input", s); attr++; s++; @@ -977,28 +957,20 @@ static int occ_setup_sensor_attrs(struct occ *occ) for (i = 0; i < sensors->power.num_sensors; ++i) { s = i + 1; - snprintf(attr->name, sizeof(attr->name), - "power%d_label", s); - attr->sensor = OCC_INIT_ATTR(attr->name, 0444, - show_power, NULL, 0, i); + occ_init_attribute(attr, 0444, show_power, NULL, + 0, i, "power%d_label", s); attr++; - snprintf(attr->name, sizeof(attr->name), - "power%d_average", s); - attr->sensor = OCC_INIT_ATTR(attr->name, 0444, - show_power, NULL, 1, i); + occ_init_attribute(attr, 0444, show_power, NULL, + 1, i, "power%d_average", s); attr++; - snprintf(attr->name, sizeof(attr->name), - "power%d_average_interval", s); - attr->sensor = OCC_INIT_ATTR(attr->name, 0444, - show_power, NULL, 2, i); + occ_init_attribute(attr, 0444, show_power, NULL, + 2, i, "power%d_average_interval", s); attr++; - snprintf(attr->name, sizeof(attr->name), - "power%d_input", s); - attr->sensor = OCC_INIT_ATTR(attr->name, 0444, - show_power, NULL, 3, i); + occ_init_attribute(attr, 0444, show_power, NULL, + 3, i, "power%d_input", s); attr++; } @@ -1006,56 +978,43 @@ static int occ_setup_sensor_attrs(struct occ *occ) } if (sensors->caps.num_sensors >= 1) { - snprintf(attr->name, sizeof(attr->name), "power%d_label", s); - attr->sensor = OCC_INIT_ATTR(attr->name, 0444, show_caps, NULL, - 0, 0); + occ_init_attribute(attr, 0444, show_caps, NULL, + 0, 0, "power%d_label", s); attr++; - snprintf(attr->name, sizeof(attr->name), "power%d_cap", s); - attr->sensor = OCC_INIT_ATTR(attr->name, 0444, show_caps, NULL, - 1, 0); + occ_init_attribute(attr, 0444, show_caps, NULL, + 1, 0, "power%d_cap", s); attr++; - snprintf(attr->name, sizeof(attr->name), "power%d_input", s); - attr->sensor = OCC_INIT_ATTR(attr->name, 0444, show_caps, NULL, - 2, 0); + occ_init_attribute(attr, 0444, show_caps, NULL, + 2, 0, "power%d_input", s); attr++; - snprintf(attr->name, sizeof(attr->name), - "power%d_cap_not_redundant", s); - attr->sensor = OCC_INIT_ATTR(attr->name, 0444, show_caps, NULL, - 3, 0); + occ_init_attribute(attr, 0444, show_caps, NULL, + 3, 0, "power%d_cap_not_redundant", s); attr++; - snprintf(attr->name, sizeof(attr->name), "power%d_cap_max", s); - attr->sensor = OCC_INIT_ATTR(attr->name, 0444, show_caps, NULL, - 4, 0); + occ_init_attribute(attr, 0444, show_caps, NULL, + 4, 0, "power%d_cap_max", s); attr++; - snprintf(attr->name, sizeof(attr->name), "power%d_cap_min", s); - attr->sensor = OCC_INIT_ATTR(attr->name, 0444, show_caps, NULL, - 5, 0); + occ_init_attribute(attr, 0444, show_caps, NULL, + 5, 0, "power%d_cap_min", s); attr++; - snprintf(attr->name, sizeof(attr->name), "power%d_cap_user", - s); - attr->sensor = OCC_INIT_ATTR(attr->name, 0644, show_caps, - occ_store_caps_user, 6, 0); + occ_init_attribute(attr, 0644, show_caps, occ_store_caps_user, + 6, 0, "power%d_cap_user", s); attr++; if (sensors->caps.version > 1) { - snprintf(attr->name, sizeof(attr->name), - "power%d_cap_user_source", s); - attr->sensor = OCC_INIT_ATTR(attr->name, 0444, - show_caps, NULL, 7, 0); + occ_init_attribute(attr, 0444, show_caps, NULL, + 7, 0, "power%d_cap_user_source", s); attr++; if (sensors->caps.version > 2) { - snprintf(attr->name, sizeof(attr->name), - "power%d_cap_min_soft", s); - attr->sensor = OCC_INIT_ATTR(attr->name, 0444, - show_caps, NULL, - 8, 0); + occ_init_attribute(attr, 0444, show_caps, NULL, + 8, 0, + "power%d_cap_min_soft", s); attr++; } } @@ -1064,19 +1023,16 @@ static int occ_setup_sensor_attrs(struct occ *occ) for (i = 0; i < sensors->extended.num_sensors; ++i) { s = i + 1; - snprintf(attr->name, sizeof(attr->name), "extn%d_label", s); - attr->sensor = OCC_INIT_ATTR(attr->name, 0444, - occ_show_extended, NULL, 0, i); + occ_init_attribute(attr, 0444, occ_show_extended, NULL, + 0, i, "extn%d_label", s); attr++; - snprintf(attr->name, sizeof(attr->name), "extn%d_flags", s); - attr->sensor = OCC_INIT_ATTR(attr->name, 0444, - occ_show_extended, NULL, 1, i); + occ_init_attribute(attr, 0444, occ_show_extended, NULL, + 1, i, "extn%d_flags", s); attr++; - snprintf(attr->name, sizeof(attr->name), "extn%d_input", s); - attr->sensor = OCC_INIT_ATTR(attr->name, 0444, - occ_show_extended, NULL, 2, i); + occ_init_attribute(attr, 0444, occ_show_extended, NULL, + 2, i, "extn%d_input", s); attr++; } diff --git a/drivers/i2c/algos/i2c-algo-bit.c b/drivers/i2c/algos/i2c-algo-bit.c index eddf25b90ca8..6544d27e4419 100644 --- a/drivers/i2c/algos/i2c-algo-bit.c +++ b/drivers/i2c/algos/i2c-algo-bit.c @@ -619,8 +619,8 @@ static u32 bit_func(struct i2c_adapter *adap) /* -----exported algorithm data: ------------------------------------- */ const struct i2c_algorithm i2c_bit_algo = { - .master_xfer = bit_xfer, - .master_xfer_atomic = bit_xfer_atomic, + .xfer = bit_xfer, + .xfer_atomic = bit_xfer_atomic, .functionality = bit_func, }; EXPORT_SYMBOL(i2c_bit_algo); diff --git a/drivers/i2c/algos/i2c-algo-pca.c b/drivers/i2c/algos/i2c-algo-pca.c index 384af88e58ad..74b66aec33d4 100644 --- a/drivers/i2c/algos/i2c-algo-pca.c +++ b/drivers/i2c/algos/i2c-algo-pca.c @@ -361,8 +361,8 @@ static u32 pca_func(struct i2c_adapter *adap) } static const struct i2c_algorithm pca_algo = { - .master_xfer = pca_xfer, - .functionality = pca_func, + .xfer = pca_xfer, + .functionality = pca_func, }; static unsigned int pca_probe_chip(struct i2c_adapter *adap) diff --git a/drivers/i2c/algos/i2c-algo-pcf.c b/drivers/i2c/algos/i2c-algo-pcf.c index 740066ceaea3..fd563e845d4b 100644 --- a/drivers/i2c/algos/i2c-algo-pcf.c +++ b/drivers/i2c/algos/i2c-algo-pcf.c @@ -389,8 +389,8 @@ static u32 pcf_func(struct i2c_adapter *adap) /* exported algorithm data: */ static const struct i2c_algorithm pcf_algo = { - .master_xfer = pcf_xfer, - .functionality = pcf_func, + .xfer = pcf_xfer, + .functionality = pcf_func, }; /* diff --git a/drivers/i2c/busses/i2c-amd-mp2-plat.c b/drivers/i2c/busses/i2c-amd-mp2-plat.c index d9dd0e475d1a..188e24cc4d35 100644 --- a/drivers/i2c/busses/i2c-amd-mp2-plat.c +++ b/drivers/i2c/busses/i2c-amd-mp2-plat.c @@ -179,7 +179,7 @@ static u32 i2c_amd_func(struct i2c_adapter *a) } static const struct i2c_algorithm i2c_amd_algorithm = { - .master_xfer = i2c_amd_xfer, + .xfer = i2c_amd_xfer, .functionality = i2c_amd_func, }; diff --git a/drivers/i2c/busses/i2c-aspeed.c b/drivers/i2c/busses/i2c-aspeed.c index 1550d3d552ae..a26b74c71206 100644 --- a/drivers/i2c/busses/i2c-aspeed.c +++ b/drivers/i2c/busses/i2c-aspeed.c @@ -814,11 +814,11 @@ static int aspeed_i2c_unreg_slave(struct i2c_client *client) #endif /* CONFIG_I2C_SLAVE */ static const struct i2c_algorithm aspeed_i2c_algo = { - .master_xfer = aspeed_i2c_master_xfer, - .functionality = aspeed_i2c_functionality, + .xfer = aspeed_i2c_master_xfer, + .functionality = aspeed_i2c_functionality, #if IS_ENABLED(CONFIG_I2C_SLAVE) - .reg_slave = aspeed_i2c_reg_slave, - .unreg_slave = aspeed_i2c_unreg_slave, + .reg_slave = aspeed_i2c_reg_slave, + .unreg_slave = aspeed_i2c_unreg_slave, #endif /* CONFIG_I2C_SLAVE */ }; diff --git a/drivers/i2c/busses/i2c-at91-master.c b/drivers/i2c/busses/i2c-at91-master.c index 374fc50bb205..59795c1c24ff 100644 --- a/drivers/i2c/busses/i2c-at91-master.c +++ b/drivers/i2c/busses/i2c-at91-master.c @@ -739,8 +739,8 @@ static u32 at91_twi_func(struct i2c_adapter *adapter) } static const struct i2c_algorithm at91_twi_algorithm = { - .master_xfer = at91_twi_xfer, - .functionality = at91_twi_func, + .xfer = at91_twi_xfer, + .functionality = at91_twi_func, }; static int at91_twi_configure_dma(struct at91_twi_dev *dev, u32 phy_addr) diff --git a/drivers/i2c/busses/i2c-axxia.c b/drivers/i2c/busses/i2c-axxia.c index 50030256cd85..0555eeb6903a 100644 --- a/drivers/i2c/busses/i2c-axxia.c +++ b/drivers/i2c/busses/i2c-axxia.c @@ -706,7 +706,7 @@ static int axxia_i2c_unreg_slave(struct i2c_client *slave) } static const struct i2c_algorithm axxia_i2c_algo = { - .master_xfer = axxia_i2c_xfer, + .xfer = axxia_i2c_xfer, .functionality = axxia_i2c_func, .reg_slave = axxia_i2c_reg_slave, .unreg_slave = axxia_i2c_unreg_slave, diff --git a/drivers/i2c/busses/i2c-bcm-iproc.c b/drivers/i2c/busses/i2c-bcm-iproc.c index 63bc3c8f49d3..e418a4f23f15 100644 --- a/drivers/i2c/busses/i2c-bcm-iproc.c +++ b/drivers/i2c/busses/i2c-bcm-iproc.c @@ -1041,7 +1041,7 @@ static int bcm_iproc_i2c_unreg_slave(struct i2c_client *slave) } static struct i2c_algorithm bcm_iproc_algo = { - .master_xfer = bcm_iproc_i2c_xfer, + .xfer = bcm_iproc_i2c_xfer, .functionality = bcm_iproc_i2c_functionality, .reg_slave = bcm_iproc_i2c_reg_slave, .unreg_slave = bcm_iproc_i2c_unreg_slave, diff --git a/drivers/i2c/busses/i2c-cadence.c b/drivers/i2c/busses/i2c-cadence.c index 8df63aaf2a80..697d095afbe4 100644 --- a/drivers/i2c/busses/i2c-cadence.c +++ b/drivers/i2c/busses/i2c-cadence.c @@ -1231,12 +1231,12 @@ static int cdns_unreg_slave(struct i2c_client *slave) #endif static const struct i2c_algorithm cdns_i2c_algo = { - .master_xfer = cdns_i2c_master_xfer, - .master_xfer_atomic = cdns_i2c_master_xfer_atomic, - .functionality = cdns_i2c_func, + .xfer = cdns_i2c_master_xfer, + .xfer_atomic = cdns_i2c_master_xfer_atomic, + .functionality = cdns_i2c_func, #if IS_ENABLED(CONFIG_I2C_SLAVE) - .reg_slave = cdns_reg_slave, - .unreg_slave = cdns_unreg_slave, + .reg_slave = cdns_reg_slave, + .unreg_slave = cdns_unreg_slave, #endif }; diff --git a/drivers/i2c/busses/i2c-cgbc.c b/drivers/i2c/busses/i2c-cgbc.c index f054d167ac47..25a74fa51aa0 100644 --- a/drivers/i2c/busses/i2c-cgbc.c +++ b/drivers/i2c/busses/i2c-cgbc.c @@ -331,8 +331,8 @@ static u32 cgbc_i2c_func(struct i2c_adapter *adap) } static const struct i2c_algorithm cgbc_i2c_algorithm = { - .master_xfer = cgbc_i2c_xfer, - .functionality = cgbc_i2c_func, + .xfer = cgbc_i2c_xfer, + .functionality = cgbc_i2c_func, }; static struct i2c_algo_cgbc_data cgbc_i2c_algo_data[] = { diff --git a/drivers/i2c/busses/i2c-eg20t.c b/drivers/i2c/busses/i2c-eg20t.c index efdaddf99f9e..27ea3c130a16 100644 --- a/drivers/i2c/busses/i2c-eg20t.c +++ b/drivers/i2c/busses/i2c-eg20t.c @@ -690,7 +690,7 @@ static u32 pch_i2c_func(struct i2c_adapter *adap) } static const struct i2c_algorithm pch_algorithm = { - .master_xfer = pch_i2c_xfer, + .xfer = pch_i2c_xfer, .functionality = pch_i2c_func }; diff --git a/drivers/i2c/busses/i2c-emev2.c b/drivers/i2c/busses/i2c-emev2.c index 2512cef8e2a2..ece019b3d066 100644 --- a/drivers/i2c/busses/i2c-emev2.c +++ b/drivers/i2c/busses/i2c-emev2.c @@ -351,10 +351,10 @@ static int em_i2c_unreg_slave(struct i2c_client *slave) } static const struct i2c_algorithm em_i2c_algo = { - .master_xfer = em_i2c_xfer, + .xfer = em_i2c_xfer, .functionality = em_i2c_func, - .reg_slave = em_i2c_reg_slave, - .unreg_slave = em_i2c_unreg_slave, + .reg_slave = em_i2c_reg_slave, + .unreg_slave = em_i2c_unreg_slave, }; static int em_i2c_probe(struct platform_device *pdev) diff --git a/drivers/i2c/busses/i2c-exynos5.c b/drivers/i2c/busses/i2c-exynos5.c index 02f24479aa07..9c1c5f3c09f6 100644 --- a/drivers/i2c/busses/i2c-exynos5.c +++ b/drivers/i2c/busses/i2c-exynos5.c @@ -879,9 +879,9 @@ static u32 exynos5_i2c_func(struct i2c_adapter *adap) } static const struct i2c_algorithm exynos5_i2c_algorithm = { - .master_xfer = exynos5_i2c_xfer, - .master_xfer_atomic = exynos5_i2c_xfer_atomic, - .functionality = exynos5_i2c_func, + .xfer = exynos5_i2c_xfer, + .xfer_atomic = exynos5_i2c_xfer_atomic, + .functionality = exynos5_i2c_func, }; static int exynos5_i2c_probe(struct platform_device *pdev) diff --git a/drivers/i2c/busses/i2c-gxp.c b/drivers/i2c/busses/i2c-gxp.c index 0fc39caa6c87..2d117e7e3cb6 100644 --- a/drivers/i2c/busses/i2c-gxp.c +++ b/drivers/i2c/busses/i2c-gxp.c @@ -184,11 +184,11 @@ static int gxp_i2c_unreg_slave(struct i2c_client *slave) #endif static const struct i2c_algorithm gxp_i2c_algo = { - .master_xfer = gxp_i2c_master_xfer, + .xfer = gxp_i2c_master_xfer, .functionality = gxp_i2c_func, #if IS_ENABLED(CONFIG_I2C_SLAVE) - .reg_slave = gxp_i2c_reg_slave, - .unreg_slave = gxp_i2c_unreg_slave, + .reg_slave = gxp_i2c_reg_slave, + .unreg_slave = gxp_i2c_unreg_slave, #endif }; diff --git a/drivers/i2c/busses/i2c-img-scb.c b/drivers/i2c/busses/i2c-img-scb.c index 3278707bb885..a454f9f25146 100644 --- a/drivers/i2c/busses/i2c-img-scb.c +++ b/drivers/i2c/busses/i2c-img-scb.c @@ -1143,7 +1143,7 @@ static u32 img_i2c_func(struct i2c_adapter *adap) } static const struct i2c_algorithm img_i2c_algo = { - .master_xfer = img_i2c_xfer, + .xfer = img_i2c_xfer, .functionality = img_i2c_func, }; diff --git a/drivers/i2c/busses/i2c-imx-lpi2c.c b/drivers/i2c/busses/i2c-imx-lpi2c.c index 342d47e67586..064bc83840a6 100644 --- a/drivers/i2c/busses/i2c-imx-lpi2c.c +++ b/drivers/i2c/busses/i2c-imx-lpi2c.c @@ -1268,10 +1268,10 @@ static u32 lpi2c_imx_func(struct i2c_adapter *adapter) } static const struct i2c_algorithm lpi2c_imx_algo = { - .master_xfer = lpi2c_imx_xfer, - .functionality = lpi2c_imx_func, - .reg_target = lpi2c_imx_register_target, - .unreg_target = lpi2c_imx_unregister_target, + .xfer = lpi2c_imx_xfer, + .functionality = lpi2c_imx_func, + .reg_target = lpi2c_imx_register_target, + .unreg_target = lpi2c_imx_unregister_target, }; static const struct of_device_id lpi2c_imx_of_match[] = { diff --git a/drivers/i2c/busses/i2c-imx.c b/drivers/i2c/busses/i2c-imx.c index de01dfecb16e..e5732b0557fb 100644 --- a/drivers/i2c/busses/i2c-imx.c +++ b/drivers/i2c/busses/i2c-imx.c @@ -1692,11 +1692,11 @@ static u32 i2c_imx_func(struct i2c_adapter *adapter) } static const struct i2c_algorithm i2c_imx_algo = { - .master_xfer = i2c_imx_xfer, - .master_xfer_atomic = i2c_imx_xfer_atomic, + .xfer = i2c_imx_xfer, + .xfer_atomic = i2c_imx_xfer_atomic, .functionality = i2c_imx_func, - .reg_slave = i2c_imx_reg_slave, - .unreg_slave = i2c_imx_unreg_slave, + .reg_slave = i2c_imx_reg_slave, + .unreg_slave = i2c_imx_unreg_slave, }; static int i2c_imx_probe(struct platform_device *pdev) diff --git a/drivers/i2c/busses/i2c-k1.c b/drivers/i2c/busses/i2c-k1.c index 5965b4cf6220..b68a21fff0b5 100644 --- a/drivers/i2c/busses/i2c-k1.c +++ b/drivers/i2c/busses/i2c-k1.c @@ -477,7 +477,7 @@ static int spacemit_i2c_xfer(struct i2c_adapter *adapt, struct i2c_msg *msgs, in ret = spacemit_i2c_wait_bus_idle(i2c); if (!ret) - spacemit_i2c_xfer_msg(i2c); + ret = spacemit_i2c_xfer_msg(i2c); else if (ret < 0) dev_dbg(i2c->dev, "i2c transfer error: %d\n", ret); else diff --git a/drivers/i2c/busses/i2c-keba.c b/drivers/i2c/busses/i2c-keba.c index 7b9ed2592f5b..9420c8b342b5 100644 --- a/drivers/i2c/busses/i2c-keba.c +++ b/drivers/i2c/busses/i2c-keba.c @@ -500,7 +500,7 @@ static u32 ki2c_func(struct i2c_adapter *adap) } static const struct i2c_algorithm ki2c_algo = { - .master_xfer = ki2c_xfer, + .xfer = ki2c_xfer, .functionality = ki2c_func, }; diff --git a/drivers/i2c/busses/i2c-mchp-pci1xxxx.c b/drivers/i2c/busses/i2c-mchp-pci1xxxx.c index 5ef136c3ecb1..bc0f1a0c8ee1 100644 --- a/drivers/i2c/busses/i2c-mchp-pci1xxxx.c +++ b/drivers/i2c/busses/i2c-mchp-pci1xxxx.c @@ -1048,7 +1048,7 @@ static u32 pci1xxxx_i2c_get_funcs(struct i2c_adapter *adap) } static const struct i2c_algorithm pci1xxxx_i2c_algo = { - .master_xfer = pci1xxxx_i2c_xfer, + .xfer = pci1xxxx_i2c_xfer, .functionality = pci1xxxx_i2c_get_funcs, }; diff --git a/drivers/i2c/busses/i2c-meson.c b/drivers/i2c/busses/i2c-meson.c index e1d69537353b..0d9032953e48 100644 --- a/drivers/i2c/busses/i2c-meson.c +++ b/drivers/i2c/busses/i2c-meson.c @@ -448,8 +448,8 @@ static u32 meson_i2c_func(struct i2c_adapter *adap) } static const struct i2c_algorithm meson_i2c_algorithm = { - .master_xfer = meson_i2c_xfer, - .master_xfer_atomic = meson_i2c_xfer_atomic, + .xfer = meson_i2c_xfer, + .xfer_atomic = meson_i2c_xfer_atomic, .functionality = meson_i2c_func, }; diff --git a/drivers/i2c/busses/i2c-microchip-corei2c.c b/drivers/i2c/busses/i2c-microchip-corei2c.c index 492bf4c34722..f173bda1c98c 100644 --- a/drivers/i2c/busses/i2c-microchip-corei2c.c +++ b/drivers/i2c/busses/i2c-microchip-corei2c.c @@ -526,7 +526,7 @@ static int mchp_corei2c_smbus_xfer(struct i2c_adapter *adap, u16 addr, unsigned } static const struct i2c_algorithm mchp_corei2c_algo = { - .master_xfer = mchp_corei2c_xfer, + .xfer = mchp_corei2c_xfer, .functionality = mchp_corei2c_func, .smbus_xfer = mchp_corei2c_smbus_xfer, }; diff --git a/drivers/i2c/busses/i2c-mt65xx.c b/drivers/i2c/busses/i2c-mt65xx.c index 5bd342047d59..ab456c3717db 100644 --- a/drivers/i2c/busses/i2c-mt65xx.c +++ b/drivers/i2c/busses/i2c-mt65xx.c @@ -1342,7 +1342,7 @@ static u32 mtk_i2c_functionality(struct i2c_adapter *adap) } static const struct i2c_algorithm mtk_i2c_algorithm = { - .master_xfer = mtk_i2c_transfer, + .xfer = mtk_i2c_transfer, .functionality = mtk_i2c_functionality, }; diff --git a/drivers/i2c/busses/i2c-mxs.c b/drivers/i2c/busses/i2c-mxs.c index ad62d56b2186..08c9091a1e35 100644 --- a/drivers/i2c/busses/i2c-mxs.c +++ b/drivers/i2c/busses/i2c-mxs.c @@ -687,7 +687,7 @@ static irqreturn_t mxs_i2c_isr(int this_irq, void *dev_id) } static const struct i2c_algorithm mxs_i2c_algo = { - .master_xfer = mxs_i2c_xfer, + .xfer = mxs_i2c_xfer, .functionality = mxs_i2c_func, }; diff --git a/drivers/i2c/busses/i2c-nomadik.c b/drivers/i2c/busses/i2c-nomadik.c index d2877e4cc28d..19b648fc094d 100644 --- a/drivers/i2c/busses/i2c-nomadik.c +++ b/drivers/i2c/busses/i2c-nomadik.c @@ -996,8 +996,8 @@ static unsigned int nmk_i2c_functionality(struct i2c_adapter *adap) } static const struct i2c_algorithm nmk_i2c_algo = { - .master_xfer = nmk_i2c_xfer, - .functionality = nmk_i2c_functionality + .xfer = nmk_i2c_xfer, + .functionality = nmk_i2c_functionality }; static void nmk_i2c_of_probe(struct device_node *np, diff --git a/drivers/i2c/busses/i2c-npcm7xx.c b/drivers/i2c/busses/i2c-npcm7xx.c index 892e2d2988a7..8b7e15240fb0 100644 --- a/drivers/i2c/busses/i2c-npcm7xx.c +++ b/drivers/i2c/busses/i2c-npcm7xx.c @@ -2470,11 +2470,11 @@ static const struct i2c_adapter_quirks npcm_i2c_quirks = { }; static const struct i2c_algorithm npcm_i2c_algo = { - .master_xfer = npcm_i2c_master_xfer, + .xfer = npcm_i2c_master_xfer, .functionality = npcm_i2c_functionality, #if IS_ENABLED(CONFIG_I2C_SLAVE) - .reg_slave = npcm_i2c_reg_slave, - .unreg_slave = npcm_i2c_unreg_slave, + .reg_slave = npcm_i2c_reg_slave, + .unreg_slave = npcm_i2c_unreg_slave, #endif }; diff --git a/drivers/i2c/busses/i2c-omap.c b/drivers/i2c/busses/i2c-omap.c index 876791d20ed5..f1cc26ac5b80 100644 --- a/drivers/i2c/busses/i2c-omap.c +++ b/drivers/i2c/busses/i2c-omap.c @@ -1201,9 +1201,9 @@ omap_i2c_isr_thread(int this_irq, void *dev_id) } static const struct i2c_algorithm omap_i2c_algo = { - .master_xfer = omap_i2c_xfer_irq, - .master_xfer_atomic = omap_i2c_xfer_polling, - .functionality = omap_i2c_func, + .xfer = omap_i2c_xfer_irq, + .xfer_atomic = omap_i2c_xfer_polling, + .functionality = omap_i2c_func, }; static const struct i2c_adapter_quirks omap_i2c_quirks = { diff --git a/drivers/i2c/busses/i2c-pnx.c b/drivers/i2c/busses/i2c-pnx.c index 9a1af5bbd604..8daa0008bd05 100644 --- a/drivers/i2c/busses/i2c-pnx.c +++ b/drivers/i2c/busses/i2c-pnx.c @@ -580,7 +580,7 @@ static u32 i2c_pnx_func(struct i2c_adapter *adapter) } static const struct i2c_algorithm pnx_algorithm = { - .master_xfer = i2c_pnx_xfer, + .xfer = i2c_pnx_xfer, .functionality = i2c_pnx_func, }; diff --git a/drivers/i2c/busses/i2c-pxa.c b/drivers/i2c/busses/i2c-pxa.c index 4415a29f749b..968a8b8794da 100644 --- a/drivers/i2c/busses/i2c-pxa.c +++ b/drivers/i2c/busses/i2c-pxa.c @@ -1154,11 +1154,11 @@ static u32 i2c_pxa_functionality(struct i2c_adapter *adap) } static const struct i2c_algorithm i2c_pxa_algorithm = { - .master_xfer = i2c_pxa_xfer, - .functionality = i2c_pxa_functionality, + .xfer = i2c_pxa_xfer, + .functionality = i2c_pxa_functionality, #ifdef CONFIG_I2C_PXA_SLAVE - .reg_slave = i2c_pxa_slave_reg, - .unreg_slave = i2c_pxa_slave_unreg, + .reg_slave = i2c_pxa_slave_reg, + .unreg_slave = i2c_pxa_slave_unreg, #endif }; @@ -1244,11 +1244,11 @@ static int i2c_pxa_pio_xfer(struct i2c_adapter *adap, } static const struct i2c_algorithm i2c_pxa_pio_algorithm = { - .master_xfer = i2c_pxa_pio_xfer, - .functionality = i2c_pxa_functionality, + .xfer = i2c_pxa_pio_xfer, + .functionality = i2c_pxa_functionality, #ifdef CONFIG_I2C_PXA_SLAVE - .reg_slave = i2c_pxa_slave_reg, - .unreg_slave = i2c_pxa_slave_unreg, + .reg_slave = i2c_pxa_slave_reg, + .unreg_slave = i2c_pxa_slave_unreg, #endif }; diff --git a/drivers/i2c/busses/i2c-qcom-cci.c b/drivers/i2c/busses/i2c-qcom-cci.c index 05b73326afd4..a3afa11a71a1 100644 --- a/drivers/i2c/busses/i2c-qcom-cci.c +++ b/drivers/i2c/busses/i2c-qcom-cci.c @@ -462,8 +462,8 @@ static u32 cci_func(struct i2c_adapter *adap) } static const struct i2c_algorithm cci_algo = { - .master_xfer = cci_xfer, - .functionality = cci_func, + .xfer = cci_xfer, + .functionality = cci_func, }; static int cci_enable_clocks(struct cci *cci) diff --git a/drivers/i2c/busses/i2c-qcom-geni.c b/drivers/i2c/busses/i2c-qcom-geni.c index ccea575fb783..13889f52b6f7 100644 --- a/drivers/i2c/busses/i2c-qcom-geni.c +++ b/drivers/i2c/busses/i2c-qcom-geni.c @@ -727,8 +727,8 @@ static u32 geni_i2c_func(struct i2c_adapter *adap) } static const struct i2c_algorithm geni_i2c_algo = { - .master_xfer = geni_i2c_xfer, - .functionality = geni_i2c_func, + .xfer = geni_i2c_xfer, + .functionality = geni_i2c_func, }; #ifdef CONFIG_ACPI diff --git a/drivers/i2c/busses/i2c-qup.c b/drivers/i2c/busses/i2c-qup.c index 3a36d682ed57..6059f585843e 100644 --- a/drivers/i2c/busses/i2c-qup.c +++ b/drivers/i2c/busses/i2c-qup.c @@ -1634,13 +1634,13 @@ static u32 qup_i2c_func(struct i2c_adapter *adap) } static const struct i2c_algorithm qup_i2c_algo = { - .master_xfer = qup_i2c_xfer, - .functionality = qup_i2c_func, + .xfer = qup_i2c_xfer, + .functionality = qup_i2c_func, }; static const struct i2c_algorithm qup_i2c_algo_v2 = { - .master_xfer = qup_i2c_xfer_v2, - .functionality = qup_i2c_func, + .xfer = qup_i2c_xfer_v2, + .functionality = qup_i2c_func, }; /* diff --git a/drivers/i2c/busses/i2c-rcar.c b/drivers/i2c/busses/i2c-rcar.c index 5693a38da7b5..d51884ab99f4 100644 --- a/drivers/i2c/busses/i2c-rcar.c +++ b/drivers/i2c/busses/i2c-rcar.c @@ -1084,11 +1084,11 @@ static u32 rcar_i2c_func(struct i2c_adapter *adap) } static const struct i2c_algorithm rcar_i2c_algo = { - .master_xfer = rcar_i2c_master_xfer, - .master_xfer_atomic = rcar_i2c_master_xfer_atomic, - .functionality = rcar_i2c_func, - .reg_slave = rcar_reg_slave, - .unreg_slave = rcar_unreg_slave, + .xfer = rcar_i2c_master_xfer, + .xfer_atomic = rcar_i2c_master_xfer_atomic, + .functionality = rcar_i2c_func, + .reg_slave = rcar_reg_slave, + .unreg_slave = rcar_unreg_slave, }; static const struct i2c_adapter_quirks rcar_i2c_quirks = { diff --git a/drivers/i2c/busses/i2c-s3c2410.c b/drivers/i2c/busses/i2c-s3c2410.c index 0f3cf500df68..f4fa4703acbd 100644 --- a/drivers/i2c/busses/i2c-s3c2410.c +++ b/drivers/i2c/busses/i2c-s3c2410.c @@ -800,9 +800,9 @@ static u32 s3c24xx_i2c_func(struct i2c_adapter *adap) /* i2c bus registration info */ static const struct i2c_algorithm s3c24xx_i2c_algorithm = { - .master_xfer = s3c24xx_i2c_xfer, - .master_xfer_atomic = s3c24xx_i2c_xfer_atomic, - .functionality = s3c24xx_i2c_func, + .xfer = s3c24xx_i2c_xfer, + .xfer_atomic = s3c24xx_i2c_xfer_atomic, + .functionality = s3c24xx_i2c_func, }; /* diff --git a/drivers/i2c/busses/i2c-sh7760.c b/drivers/i2c/busses/i2c-sh7760.c index 620f12596763..43f33988b98f 100644 --- a/drivers/i2c/busses/i2c-sh7760.c +++ b/drivers/i2c/busses/i2c-sh7760.c @@ -379,8 +379,8 @@ static u32 sh7760_i2c_func(struct i2c_adapter *adap) } static const struct i2c_algorithm sh7760_i2c_algo = { - .master_xfer = sh7760_i2c_master_xfer, - .functionality = sh7760_i2c_func, + .xfer = sh7760_i2c_master_xfer, + .functionality = sh7760_i2c_func, }; /* calculate CCR register setting for a desired scl clock. SCL clock is diff --git a/drivers/i2c/busses/i2c-sh_mobile.c b/drivers/i2c/busses/i2c-sh_mobile.c index adfcee6c9fdc..dae8967f8749 100644 --- a/drivers/i2c/busses/i2c-sh_mobile.c +++ b/drivers/i2c/busses/i2c-sh_mobile.c @@ -740,8 +740,8 @@ static u32 sh_mobile_i2c_func(struct i2c_adapter *adapter) static const struct i2c_algorithm sh_mobile_i2c_algorithm = { .functionality = sh_mobile_i2c_func, - .master_xfer = sh_mobile_i2c_xfer, - .master_xfer_atomic = sh_mobile_i2c_xfer_atomic, + .xfer = sh_mobile_i2c_xfer, + .xfer_atomic = sh_mobile_i2c_xfer_atomic, }; static const struct i2c_adapter_quirks sh_mobile_i2c_quirks = { diff --git a/drivers/i2c/busses/i2c-stm32f7.c b/drivers/i2c/busses/i2c-stm32f7.c index 973a3a8c6d4a..e4aaeb2262d0 100644 --- a/drivers/i2c/busses/i2c-stm32f7.c +++ b/drivers/i2c/busses/i2c-stm32f7.c @@ -2151,8 +2151,8 @@ static u32 stm32f7_i2c_func(struct i2c_adapter *adap) } static const struct i2c_algorithm stm32f7_i2c_algo = { - .master_xfer = stm32f7_i2c_xfer, - .master_xfer_atomic = stm32f7_i2c_xfer_atomic, + .xfer = stm32f7_i2c_xfer, + .xfer_atomic = stm32f7_i2c_xfer_atomic, .smbus_xfer = stm32f7_i2c_smbus_xfer, .functionality = stm32f7_i2c_func, .reg_slave = stm32f7_i2c_reg_slave, diff --git a/drivers/i2c/busses/i2c-synquacer.c b/drivers/i2c/busses/i2c-synquacer.c index 31f8d08e32a4..1230f51e1624 100644 --- a/drivers/i2c/busses/i2c-synquacer.c +++ b/drivers/i2c/busses/i2c-synquacer.c @@ -520,8 +520,8 @@ static u32 synquacer_i2c_functionality(struct i2c_adapter *adap) } static const struct i2c_algorithm synquacer_i2c_algo = { - .master_xfer = synquacer_i2c_xfer, - .functionality = synquacer_i2c_functionality, + .xfer = synquacer_i2c_xfer, + .functionality = synquacer_i2c_functionality, }; static const struct i2c_adapter synquacer_i2c_ops = { diff --git a/drivers/i2c/busses/i2c-tegra.c b/drivers/i2c/busses/i2c-tegra.c index 049b4d154c23..0862b98007f5 100644 --- a/drivers/i2c/busses/i2c-tegra.c +++ b/drivers/i2c/busses/i2c-tegra.c @@ -1440,9 +1440,9 @@ static u32 tegra_i2c_func(struct i2c_adapter *adap) } static const struct i2c_algorithm tegra_i2c_algo = { - .master_xfer = tegra_i2c_xfer, - .master_xfer_atomic = tegra_i2c_xfer_atomic, - .functionality = tegra_i2c_func, + .xfer = tegra_i2c_xfer, + .xfer_atomic = tegra_i2c_xfer_atomic, + .functionality = tegra_i2c_func, }; /* payload size is only 12 bit */ diff --git a/drivers/i2c/busses/i2c-xiic.c b/drivers/i2c/busses/i2c-xiic.c index 6bc1575cea6c..607026c921d6 100644 --- a/drivers/i2c/busses/i2c-xiic.c +++ b/drivers/i2c/busses/i2c-xiic.c @@ -1398,8 +1398,8 @@ static u32 xiic_func(struct i2c_adapter *adap) } static const struct i2c_algorithm xiic_algorithm = { - .master_xfer = xiic_xfer, - .master_xfer_atomic = xiic_xfer_atomic, + .xfer = xiic_xfer, + .xfer_atomic = xiic_xfer_atomic, .functionality = xiic_func, }; diff --git a/drivers/i2c/busses/i2c-xlp9xx.c b/drivers/i2c/busses/i2c-xlp9xx.c index 4d5e49b6321b..ddb1c3e8bc9d 100644 --- a/drivers/i2c/busses/i2c-xlp9xx.c +++ b/drivers/i2c/busses/i2c-xlp9xx.c @@ -452,7 +452,7 @@ static u32 xlp9xx_i2c_functionality(struct i2c_adapter *adapter) } static const struct i2c_algorithm xlp9xx_i2c_algo = { - .master_xfer = xlp9xx_i2c_xfer, + .xfer = xlp9xx_i2c_xfer, .functionality = xlp9xx_i2c_functionality, }; diff --git a/drivers/i2c/i2c-atr.c b/drivers/i2c/i2c-atr.c index be7d6d41e0b2..dd194476b118 100644 --- a/drivers/i2c/i2c-atr.c +++ b/drivers/i2c/i2c-atr.c @@ -738,7 +738,7 @@ struct i2c_atr *i2c_atr_new(struct i2c_adapter *parent, struct device *dev, atr->flags = flags; if (parent->algo->master_xfer) - atr->algo.master_xfer = i2c_atr_master_xfer; + atr->algo.xfer = i2c_atr_master_xfer; if (parent->algo->smbus_xfer) atr->algo.smbus_xfer = i2c_atr_smbus_xfer; atr->algo.functionality = i2c_atr_functionality; diff --git a/drivers/i2c/i2c-mux.c b/drivers/i2c/i2c-mux.c index fda72e8be885..4d8690981a55 100644 --- a/drivers/i2c/i2c-mux.c +++ b/drivers/i2c/i2c-mux.c @@ -293,12 +293,12 @@ int i2c_mux_add_adapter(struct i2c_mux_core *muxc, */ if (parent->algo->master_xfer) { if (muxc->mux_locked) - priv->algo.master_xfer = i2c_mux_master_xfer; + priv->algo.xfer = i2c_mux_master_xfer; else - priv->algo.master_xfer = __i2c_mux_master_xfer; + priv->algo.xfer = __i2c_mux_master_xfer; } if (parent->algo->master_xfer_atomic) - priv->algo.master_xfer_atomic = priv->algo.master_xfer; + priv->algo.xfer_atomic = priv->algo.master_xfer; if (parent->algo->smbus_xfer) { if (muxc->mux_locked) diff --git a/drivers/i2c/muxes/i2c-demux-pinctrl.c b/drivers/i2c/muxes/i2c-demux-pinctrl.c index 77a740561fd7..f2a1f4744978 100644 --- a/drivers/i2c/muxes/i2c-demux-pinctrl.c +++ b/drivers/i2c/muxes/i2c-demux-pinctrl.c @@ -95,9 +95,9 @@ static int i2c_demux_activate_master(struct i2c_demux_pinctrl_priv *priv, u32 ne priv->cur_chan = new_chan; /* Now fill out current adapter structure. cur_chan must be up to date */ - priv->algo.master_xfer = i2c_demux_master_xfer; + priv->algo.xfer = i2c_demux_master_xfer; if (adap->algo->master_xfer_atomic) - priv->algo.master_xfer_atomic = i2c_demux_master_xfer; + priv->algo.xfer_atomic = i2c_demux_master_xfer; priv->algo.functionality = i2c_demux_functionality; snprintf(priv->cur_adap.name, sizeof(priv->cur_adap.name), diff --git a/drivers/idle/intel_idle.c b/drivers/idle/intel_idle.c index 8ccb483204fa..73747d20df85 100644 --- a/drivers/idle/intel_idle.c +++ b/drivers/idle/intel_idle.c @@ -152,8 +152,8 @@ static __always_inline int __intel_idle(struct cpuidle_device *dev, int index, bool irqoff) { struct cpuidle_state *state = &drv->states[index]; - unsigned long eax = flg2MWAIT(state->flags); - unsigned long ecx = 1*irqoff; /* break on interrupt flag */ + unsigned int eax = flg2MWAIT(state->flags); + unsigned int ecx = 1*irqoff; /* break on interrupt flag */ mwait_idle_with_hints(eax, ecx); @@ -226,9 +226,9 @@ static __cpuidle int intel_idle_xstate(struct cpuidle_device *dev, static __cpuidle int intel_idle_s2idle(struct cpuidle_device *dev, struct cpuidle_driver *drv, int index) { - unsigned long ecx = 1; /* break on interrupt flag */ struct cpuidle_state *state = &drv->states[index]; - unsigned long eax = flg2MWAIT(state->flags); + unsigned int eax = flg2MWAIT(state->flags); + unsigned int ecx = 1; /* break on interrupt flag */ if (state->flags & CPUIDLE_FLAG_INIT_XSTATE) fpu_idle_fpregs(); @@ -2507,6 +2507,8 @@ static int __init intel_idle_init(void) pr_debug("Local APIC timer is reliable in %s\n", boot_cpu_has(X86_FEATURE_ARAT) ? "all C-states" : "C1"); + arch_cpu_rescan_dead_smt_siblings(); + return 0; hp_setup_fail: @@ -2518,7 +2520,7 @@ init_driver_fail: return retval; } -device_initcall(intel_idle_init); +subsys_initcall_sync(intel_idle_init); /* * We are not really modular, but we used to support that. Meaning we also diff --git a/drivers/iommu/tegra-smmu.c b/drivers/iommu/tegra-smmu.c index 61897d50162d..e58fe9d8b9e7 100644 --- a/drivers/iommu/tegra-smmu.c +++ b/drivers/iommu/tegra-smmu.c @@ -559,11 +559,11 @@ static void tegra_smmu_set_pde(struct tegra_smmu_as *as, unsigned long iova, { unsigned int pd_index = iova_pd_index(iova); struct tegra_smmu *smmu = as->smmu; - struct tegra_pd *pd = as->pd; + u32 *pd = &as->pd->val[pd_index]; unsigned long offset = pd_index * sizeof(*pd); /* Set the page directory entry first */ - pd->val[pd_index] = value; + *pd = value; /* The flush the page directory entry from caches */ dma_sync_single_range_for_device(smmu->dev, as->pd_dma, offset, diff --git a/drivers/irqchip/irq-ath79-misc.c b/drivers/irqchip/irq-ath79-misc.c index 268cc18b781f..258b8e9a2d57 100644 --- a/drivers/irqchip/irq-ath79-misc.c +++ b/drivers/irqchip/irq-ath79-misc.c @@ -15,6 +15,8 @@ #include <linux/of_address.h> #include <linux/of_irq.h> +#include <asm/time.h> + #define AR71XX_RESET_REG_MISC_INT_STATUS 0 #define AR71XX_RESET_REG_MISC_INT_ENABLE 4 @@ -177,21 +179,3 @@ static int __init ar7240_misc_intc_of_init( IRQCHIP_DECLARE(ar7240_misc_intc, "qca,ar7240-misc-intc", ar7240_misc_intc_of_init); - -void __init ath79_misc_irq_init(void __iomem *regs, int irq, - int irq_base, bool is_ar71xx) -{ - struct irq_domain *domain; - - if (is_ar71xx) - ath79_misc_irq_chip.irq_mask_ack = ar71xx_misc_irq_mask; - else - ath79_misc_irq_chip.irq_ack = ar724x_misc_irq_ack; - - domain = irq_domain_create_legacy(NULL, ATH79_MISC_IRQ_COUNT, - irq_base, 0, &misc_irq_domain_ops, regs); - if (!domain) - panic("Failed to create MISC irqdomain"); - - ath79_misc_intc_domain_init(domain, irq); -} diff --git a/drivers/md/bcache/Kconfig b/drivers/md/bcache/Kconfig index d4697e79d5a3..b2d10063d35f 100644 --- a/drivers/md/bcache/Kconfig +++ b/drivers/md/bcache/Kconfig @@ -5,7 +5,6 @@ config BCACHE select BLOCK_HOLDER_DEPRECATED if SYSFS select CRC64 select CLOSURES - select MIN_HEAP help Allows a block device to be used as cache for other devices; uses a btree for indexing and the layout is optimized for SSDs. diff --git a/drivers/md/bcache/alloc.c b/drivers/md/bcache/alloc.c index 8998e61efa40..48ce750bf70a 100644 --- a/drivers/md/bcache/alloc.c +++ b/drivers/md/bcache/alloc.c @@ -164,61 +164,40 @@ static void bch_invalidate_one_bucket(struct cache *ca, struct bucket *b) * prio is worth 1/8th of what INITIAL_PRIO is worth. */ -static inline unsigned int new_bucket_prio(struct cache *ca, struct bucket *b) -{ - unsigned int min_prio = (INITIAL_PRIO - ca->set->min_prio) / 8; - - return (b->prio - ca->set->min_prio + min_prio) * GC_SECTORS_USED(b); -} - -static inline bool new_bucket_max_cmp(const void *l, const void *r, void *args) -{ - struct bucket **lhs = (struct bucket **)l; - struct bucket **rhs = (struct bucket **)r; - struct cache *ca = args; - - return new_bucket_prio(ca, *lhs) > new_bucket_prio(ca, *rhs); -} - -static inline bool new_bucket_min_cmp(const void *l, const void *r, void *args) -{ - struct bucket **lhs = (struct bucket **)l; - struct bucket **rhs = (struct bucket **)r; - struct cache *ca = args; +#define bucket_prio(b) \ +({ \ + unsigned int min_prio = (INITIAL_PRIO - ca->set->min_prio) / 8; \ + \ + (b->prio - ca->set->min_prio + min_prio) * GC_SECTORS_USED(b); \ +}) - return new_bucket_prio(ca, *lhs) < new_bucket_prio(ca, *rhs); -} +#define bucket_max_cmp(l, r) (bucket_prio(l) < bucket_prio(r)) +#define bucket_min_cmp(l, r) (bucket_prio(l) > bucket_prio(r)) static void invalidate_buckets_lru(struct cache *ca) { struct bucket *b; - const struct min_heap_callbacks bucket_max_cmp_callback = { - .less = new_bucket_max_cmp, - .swp = NULL, - }; - const struct min_heap_callbacks bucket_min_cmp_callback = { - .less = new_bucket_min_cmp, - .swp = NULL, - }; + ssize_t i; - ca->heap.nr = 0; + ca->heap.used = 0; for_each_bucket(b, ca) { if (!bch_can_invalidate_bucket(ca, b)) continue; - if (!min_heap_full(&ca->heap)) - min_heap_push(&ca->heap, &b, &bucket_max_cmp_callback, ca); - else if (!new_bucket_max_cmp(&b, min_heap_peek(&ca->heap), ca)) { + if (!heap_full(&ca->heap)) + heap_add(&ca->heap, b, bucket_max_cmp); + else if (bucket_max_cmp(b, heap_peek(&ca->heap))) { ca->heap.data[0] = b; - min_heap_sift_down(&ca->heap, 0, &bucket_max_cmp_callback, ca); + heap_sift(&ca->heap, 0, bucket_max_cmp); } } - min_heapify_all(&ca->heap, &bucket_min_cmp_callback, ca); + for (i = ca->heap.used / 2 - 1; i >= 0; --i) + heap_sift(&ca->heap, i, bucket_min_cmp); while (!fifo_full(&ca->free_inc)) { - if (!ca->heap.nr) { + if (!heap_pop(&ca->heap, b, bucket_min_cmp)) { /* * We don't want to be calling invalidate_buckets() * multiple times when it can't do anything @@ -227,8 +206,6 @@ static void invalidate_buckets_lru(struct cache *ca) wake_up_gc(ca->set); return; } - b = min_heap_peek(&ca->heap)[0]; - min_heap_pop(&ca->heap, &bucket_min_cmp_callback, ca); bch_invalidate_one_bucket(ca, b); } diff --git a/drivers/md/bcache/bcache.h b/drivers/md/bcache/bcache.h index 785b0d9008fa..1d33e40d26ea 100644 --- a/drivers/md/bcache/bcache.h +++ b/drivers/md/bcache/bcache.h @@ -458,7 +458,7 @@ struct cache { /* Allocation stuff: */ struct bucket *buckets; - DEFINE_MIN_HEAP(struct bucket *, cache_heap) heap; + DECLARE_HEAP(struct bucket *, heap); /* * If nonzero, we know we aren't going to find any buckets to invalidate diff --git a/drivers/md/bcache/bset.c b/drivers/md/bcache/bset.c index 68258a16e125..463eb13bd0b2 100644 --- a/drivers/md/bcache/bset.c +++ b/drivers/md/bcache/bset.c @@ -54,11 +54,9 @@ void bch_dump_bucket(struct btree_keys *b) int __bch_count_data(struct btree_keys *b) { unsigned int ret = 0; - struct btree_iter iter; + struct btree_iter_stack iter; struct bkey *k; - min_heap_init(&iter.heap, NULL, MAX_BSETS); - if (b->ops->is_extents) for_each_key(b, k, &iter) ret += KEY_SIZE(k); @@ -69,11 +67,9 @@ void __bch_check_keys(struct btree_keys *b, const char *fmt, ...) { va_list args; struct bkey *k, *p = NULL; - struct btree_iter iter; + struct btree_iter_stack iter; const char *err; - min_heap_init(&iter.heap, NULL, MAX_BSETS); - for_each_key(b, k, &iter) { if (b->ops->is_extents) { err = "Keys out of order"; @@ -114,9 +110,9 @@ bug: static void bch_btree_iter_next_check(struct btree_iter *iter) { - struct bkey *k = iter->heap.data->k, *next = bkey_next(k); + struct bkey *k = iter->data->k, *next = bkey_next(k); - if (next < iter->heap.data->end && + if (next < iter->data->end && bkey_cmp(k, iter->b->ops->is_extents ? &START_KEY(next) : next) > 0) { bch_dump_bucket(iter->b); @@ -883,14 +879,12 @@ unsigned int bch_btree_insert_key(struct btree_keys *b, struct bkey *k, unsigned int status = BTREE_INSERT_STATUS_NO_INSERT; struct bset *i = bset_tree_last(b)->data; struct bkey *m, *prev = NULL; - struct btree_iter iter; + struct btree_iter_stack iter; struct bkey preceding_key_on_stack = ZERO_KEY; struct bkey *preceding_key_p = &preceding_key_on_stack; BUG_ON(b->ops->is_extents && !KEY_SIZE(k)); - min_heap_init(&iter.heap, NULL, MAX_BSETS); - /* * If k has preceding key, preceding_key_p will be set to address * of k's preceding key; otherwise preceding_key_p will be set @@ -901,9 +895,9 @@ unsigned int bch_btree_insert_key(struct btree_keys *b, struct bkey *k, else preceding_key(k, &preceding_key_p); - m = bch_btree_iter_init(b, &iter, preceding_key_p); + m = bch_btree_iter_stack_init(b, &iter, preceding_key_p); - if (b->ops->insert_fixup(b, k, &iter, replace_key)) + if (b->ops->insert_fixup(b, k, &iter.iter, replace_key)) return status; status = BTREE_INSERT_STATUS_INSERT; @@ -1083,94 +1077,79 @@ struct bkey *__bch_bset_search(struct btree_keys *b, struct bset_tree *t, /* Btree iterator */ -typedef bool (new_btree_iter_cmp_fn)(const void *, const void *, void *); +typedef bool (btree_iter_cmp_fn)(struct btree_iter_set, + struct btree_iter_set); -static inline bool new_btree_iter_cmp(const void *l, const void *r, void __always_unused *args) +static inline bool btree_iter_cmp(struct btree_iter_set l, + struct btree_iter_set r) { - const struct btree_iter_set *_l = l; - const struct btree_iter_set *_r = r; - - return bkey_cmp(_l->k, _r->k) <= 0; + return bkey_cmp(l.k, r.k) > 0; } static inline bool btree_iter_end(struct btree_iter *iter) { - return !iter->heap.nr; + return !iter->used; } void bch_btree_iter_push(struct btree_iter *iter, struct bkey *k, struct bkey *end) { - const struct min_heap_callbacks callbacks = { - .less = new_btree_iter_cmp, - .swp = NULL, - }; - if (k != end) - BUG_ON(!min_heap_push(&iter->heap, - &((struct btree_iter_set) { k, end }), - &callbacks, - NULL)); + BUG_ON(!heap_add(iter, + ((struct btree_iter_set) { k, end }), + btree_iter_cmp)); } -static struct bkey *__bch_btree_iter_init(struct btree_keys *b, - struct btree_iter *iter, - struct bkey *search, - struct bset_tree *start) +static struct bkey *__bch_btree_iter_stack_init(struct btree_keys *b, + struct btree_iter_stack *iter, + struct bkey *search, + struct bset_tree *start) { struct bkey *ret = NULL; - iter->heap.size = ARRAY_SIZE(iter->heap.preallocated); - iter->heap.nr = 0; + iter->iter.size = ARRAY_SIZE(iter->stack_data); + iter->iter.used = 0; #ifdef CONFIG_BCACHE_DEBUG - iter->b = b; + iter->iter.b = b; #endif for (; start <= bset_tree_last(b); start++) { ret = bch_bset_search(b, start, search); - bch_btree_iter_push(iter, ret, bset_bkey_last(start->data)); + bch_btree_iter_push(&iter->iter, ret, bset_bkey_last(start->data)); } return ret; } -struct bkey *bch_btree_iter_init(struct btree_keys *b, - struct btree_iter *iter, +struct bkey *bch_btree_iter_stack_init(struct btree_keys *b, + struct btree_iter_stack *iter, struct bkey *search) { - return __bch_btree_iter_init(b, iter, search, b->set); + return __bch_btree_iter_stack_init(b, iter, search, b->set); } static inline struct bkey *__bch_btree_iter_next(struct btree_iter *iter, - new_btree_iter_cmp_fn *cmp) + btree_iter_cmp_fn *cmp) { struct btree_iter_set b __maybe_unused; struct bkey *ret = NULL; - const struct min_heap_callbacks callbacks = { - .less = cmp, - .swp = NULL, - }; if (!btree_iter_end(iter)) { bch_btree_iter_next_check(iter); - ret = iter->heap.data->k; - iter->heap.data->k = bkey_next(iter->heap.data->k); + ret = iter->data->k; + iter->data->k = bkey_next(iter->data->k); - if (iter->heap.data->k > iter->heap.data->end) { + if (iter->data->k > iter->data->end) { WARN_ONCE(1, "bset was corrupt!\n"); - iter->heap.data->k = iter->heap.data->end; + iter->data->k = iter->data->end; } - if (iter->heap.data->k == iter->heap.data->end) { - if (iter->heap.nr) { - b = min_heap_peek(&iter->heap)[0]; - min_heap_pop(&iter->heap, &callbacks, NULL); - } - } + if (iter->data->k == iter->data->end) + heap_pop(iter, b, cmp); else - min_heap_sift_down(&iter->heap, 0, &callbacks, NULL); + heap_sift(iter, 0, cmp); } return ret; @@ -1178,7 +1157,7 @@ static inline struct bkey *__bch_btree_iter_next(struct btree_iter *iter, struct bkey *bch_btree_iter_next(struct btree_iter *iter) { - return __bch_btree_iter_next(iter, new_btree_iter_cmp); + return __bch_btree_iter_next(iter, btree_iter_cmp); } @@ -1216,18 +1195,16 @@ static void btree_mergesort(struct btree_keys *b, struct bset *out, struct btree_iter *iter, bool fixup, bool remove_stale) { + int i; struct bkey *k, *last = NULL; BKEY_PADDED(k) tmp; bool (*bad)(struct btree_keys *, const struct bkey *) = remove_stale ? bch_ptr_bad : bch_ptr_invalid; - const struct min_heap_callbacks callbacks = { - .less = b->ops->sort_cmp, - .swp = NULL, - }; /* Heapify the iterator, using our comparison function */ - min_heapify_all(&iter->heap, &callbacks, NULL); + for (i = iter->used / 2 - 1; i >= 0; --i) + heap_sift(iter, i, b->ops->sort_cmp); while (!btree_iter_end(iter)) { if (b->ops->sort_fixup && fixup) @@ -1316,11 +1293,10 @@ void bch_btree_sort_partial(struct btree_keys *b, unsigned int start, struct bset_sort_state *state) { size_t order = b->page_order, keys = 0; - struct btree_iter iter; + struct btree_iter_stack iter; int oldsize = bch_count_data(b); - min_heap_init(&iter.heap, NULL, MAX_BSETS); - __bch_btree_iter_init(b, &iter, NULL, &b->set[start]); + __bch_btree_iter_stack_init(b, &iter, NULL, &b->set[start]); if (start) { unsigned int i; @@ -1331,7 +1307,7 @@ void bch_btree_sort_partial(struct btree_keys *b, unsigned int start, order = get_order(__set_bytes(b->set->data, keys)); } - __btree_sort(b, &iter, start, order, false, state); + __btree_sort(b, &iter.iter, start, order, false, state); EBUG_ON(oldsize >= 0 && bch_count_data(b) != oldsize); } @@ -1347,13 +1323,11 @@ void bch_btree_sort_into(struct btree_keys *b, struct btree_keys *new, struct bset_sort_state *state) { uint64_t start_time = local_clock(); - struct btree_iter iter; - - min_heap_init(&iter.heap, NULL, MAX_BSETS); + struct btree_iter_stack iter; - bch_btree_iter_init(b, &iter, NULL); + bch_btree_iter_stack_init(b, &iter, NULL); - btree_mergesort(b, new->set->data, &iter, false, true); + btree_mergesort(b, new->set->data, &iter.iter, false, true); bch_time_stats_update(&state->time, start_time); diff --git a/drivers/md/bcache/bset.h b/drivers/md/bcache/bset.h index f79441acd4c1..011f6062c4c0 100644 --- a/drivers/md/bcache/bset.h +++ b/drivers/md/bcache/bset.h @@ -187,9 +187,8 @@ struct bset_tree { }; struct btree_keys_ops { - bool (*sort_cmp)(const void *l, - const void *r, - void *args); + bool (*sort_cmp)(struct btree_iter_set l, + struct btree_iter_set r); struct bkey *(*sort_fixup)(struct btree_iter *iter, struct bkey *tmp); bool (*insert_fixup)(struct btree_keys *b, @@ -313,17 +312,23 @@ enum { BTREE_INSERT_STATUS_FRONT_MERGE, }; -struct btree_iter_set { - struct bkey *k, *end; -}; - /* Btree key iteration */ struct btree_iter { + size_t size, used; #ifdef CONFIG_BCACHE_DEBUG struct btree_keys *b; #endif - MIN_HEAP_PREALLOCATED(struct btree_iter_set, btree_iter_heap, MAX_BSETS) heap; + struct btree_iter_set { + struct bkey *k, *end; + } data[]; +}; + +/* Fixed-size btree_iter that can be allocated on the stack */ + +struct btree_iter_stack { + struct btree_iter iter; + struct btree_iter_set stack_data[MAX_BSETS]; }; typedef bool (*ptr_filter_fn)(struct btree_keys *b, const struct bkey *k); @@ -335,9 +340,9 @@ struct bkey *bch_btree_iter_next_filter(struct btree_iter *iter, void bch_btree_iter_push(struct btree_iter *iter, struct bkey *k, struct bkey *end); -struct bkey *bch_btree_iter_init(struct btree_keys *b, - struct btree_iter *iter, - struct bkey *search); +struct bkey *bch_btree_iter_stack_init(struct btree_keys *b, + struct btree_iter_stack *iter, + struct bkey *search); struct bkey *__bch_bset_search(struct btree_keys *b, struct bset_tree *t, const struct bkey *search); @@ -352,13 +357,14 @@ static inline struct bkey *bch_bset_search(struct btree_keys *b, return search ? __bch_bset_search(b, t, search) : t->data->start; } -#define for_each_key_filter(b, k, iter, filter) \ - for (bch_btree_iter_init((b), (iter), NULL); \ - ((k) = bch_btree_iter_next_filter((iter), (b), filter));) +#define for_each_key_filter(b, k, stack_iter, filter) \ + for (bch_btree_iter_stack_init((b), (stack_iter), NULL); \ + ((k) = bch_btree_iter_next_filter(&((stack_iter)->iter), (b), \ + filter));) -#define for_each_key(b, k, iter) \ - for (bch_btree_iter_init((b), (iter), NULL); \ - ((k) = bch_btree_iter_next(iter));) +#define for_each_key(b, k, stack_iter) \ + for (bch_btree_iter_stack_init((b), (stack_iter), NULL); \ + ((k) = bch_btree_iter_next(&((stack_iter)->iter)));) /* Sorting */ diff --git a/drivers/md/bcache/btree.c b/drivers/md/bcache/btree.c index 1d0100677357..210b59007d98 100644 --- a/drivers/md/bcache/btree.c +++ b/drivers/md/bcache/btree.c @@ -148,19 +148,19 @@ void bch_btree_node_read_done(struct btree *b) { const char *err = "bad btree header"; struct bset *i = btree_bset_first(b); - struct btree_iter iter; + struct btree_iter *iter; /* * c->fill_iter can allocate an iterator with more memory space * than static MAX_BSETS. * See the comment arount cache_set->fill_iter. */ - iter.heap.data = mempool_alloc(&b->c->fill_iter, GFP_NOIO); - iter.heap.size = b->c->cache->sb.bucket_size / b->c->cache->sb.block_size; - iter.heap.nr = 0; + iter = mempool_alloc(&b->c->fill_iter, GFP_NOIO); + iter->size = b->c->cache->sb.bucket_size / b->c->cache->sb.block_size; + iter->used = 0; #ifdef CONFIG_BCACHE_DEBUG - iter.b = &b->keys; + iter->b = &b->keys; #endif if (!i->seq) @@ -198,7 +198,7 @@ void bch_btree_node_read_done(struct btree *b) if (i != b->keys.set[0].data && !i->keys) goto err; - bch_btree_iter_push(&iter, i->start, bset_bkey_last(i)); + bch_btree_iter_push(iter, i->start, bset_bkey_last(i)); b->written += set_blocks(i, block_bytes(b->c->cache)); } @@ -210,7 +210,7 @@ void bch_btree_node_read_done(struct btree *b) if (i->seq == b->keys.set[0].data->seq) goto err; - bch_btree_sort_and_fix_extents(&b->keys, &iter, &b->c->sort); + bch_btree_sort_and_fix_extents(&b->keys, iter, &b->c->sort); i = b->keys.set[0].data; err = "short btree key"; @@ -222,7 +222,7 @@ void bch_btree_node_read_done(struct btree *b) bch_bset_init_next(&b->keys, write_block(b), bset_magic(&b->c->cache->sb)); out: - mempool_free(iter.heap.data, &b->c->fill_iter); + mempool_free(iter, &b->c->fill_iter); return; err: set_btree_node_io_error(b); @@ -1306,11 +1306,9 @@ static bool btree_gc_mark_node(struct btree *b, struct gc_stat *gc) uint8_t stale = 0; unsigned int keys = 0, good_keys = 0; struct bkey *k; - struct btree_iter iter; + struct btree_iter_stack iter; struct bset_tree *t; - min_heap_init(&iter.heap, NULL, MAX_BSETS); - gc->nodes++; for_each_key_filter(&b->keys, k, &iter, bch_ptr_invalid) { @@ -1569,11 +1567,9 @@ static int btree_gc_rewrite_node(struct btree *b, struct btree_op *op, static unsigned int btree_gc_count_keys(struct btree *b) { struct bkey *k; - struct btree_iter iter; + struct btree_iter_stack iter; unsigned int ret = 0; - min_heap_init(&iter.heap, NULL, MAX_BSETS); - for_each_key_filter(&b->keys, k, &iter, bch_ptr_bad) ret += bkey_u64s(k); @@ -1612,18 +1608,18 @@ static int btree_gc_recurse(struct btree *b, struct btree_op *op, int ret = 0; bool should_rewrite; struct bkey *k; - struct btree_iter iter; + struct btree_iter_stack iter; struct gc_merge_info r[GC_MERGE_NODES]; struct gc_merge_info *i, *last = r + ARRAY_SIZE(r) - 1; - min_heap_init(&iter.heap, NULL, MAX_BSETS); - bch_btree_iter_init(&b->keys, &iter, &b->c->gc_done); + bch_btree_iter_stack_init(&b->keys, &iter, &b->c->gc_done); for (i = r; i < r + ARRAY_SIZE(r); i++) i->b = ERR_PTR(-EINTR); while (1) { - k = bch_btree_iter_next_filter(&iter, &b->keys, bch_ptr_bad); + k = bch_btree_iter_next_filter(&iter.iter, &b->keys, + bch_ptr_bad); if (k) { r->b = bch_btree_node_get(b->c, op, k, b->level - 1, true, b); @@ -1918,9 +1914,7 @@ static int bch_btree_check_recurse(struct btree *b, struct btree_op *op) { int ret = 0; struct bkey *k, *p = NULL; - struct btree_iter iter; - - min_heap_init(&iter.heap, NULL, MAX_BSETS); + struct btree_iter_stack iter; for_each_key_filter(&b->keys, k, &iter, bch_ptr_invalid) bch_initial_mark_key(b->c, b->level, k); @@ -1928,10 +1922,10 @@ static int bch_btree_check_recurse(struct btree *b, struct btree_op *op) bch_initial_mark_key(b->c, b->level + 1, &b->key); if (b->level) { - bch_btree_iter_init(&b->keys, &iter, NULL); + bch_btree_iter_stack_init(&b->keys, &iter, NULL); do { - k = bch_btree_iter_next_filter(&iter, &b->keys, + k = bch_btree_iter_next_filter(&iter.iter, &b->keys, bch_ptr_bad); if (k) { btree_node_prefetch(b, k); @@ -1959,7 +1953,7 @@ static int bch_btree_check_thread(void *arg) struct btree_check_info *info = arg; struct btree_check_state *check_state = info->state; struct cache_set *c = check_state->c; - struct btree_iter iter; + struct btree_iter_stack iter; struct bkey *k, *p; int cur_idx, prev_idx, skip_nr; @@ -1967,11 +1961,9 @@ static int bch_btree_check_thread(void *arg) cur_idx = prev_idx = 0; ret = 0; - min_heap_init(&iter.heap, NULL, MAX_BSETS); - /* root node keys are checked before thread created */ - bch_btree_iter_init(&c->root->keys, &iter, NULL); - k = bch_btree_iter_next_filter(&iter, &c->root->keys, bch_ptr_bad); + bch_btree_iter_stack_init(&c->root->keys, &iter, NULL); + k = bch_btree_iter_next_filter(&iter.iter, &c->root->keys, bch_ptr_bad); BUG_ON(!k); p = k; @@ -1989,7 +1981,7 @@ static int bch_btree_check_thread(void *arg) skip_nr = cur_idx - prev_idx; while (skip_nr) { - k = bch_btree_iter_next_filter(&iter, + k = bch_btree_iter_next_filter(&iter.iter, &c->root->keys, bch_ptr_bad); if (k) @@ -2062,11 +2054,9 @@ int bch_btree_check(struct cache_set *c) int ret = 0; int i; struct bkey *k = NULL; - struct btree_iter iter; + struct btree_iter_stack iter; struct btree_check_state check_state; - min_heap_init(&iter.heap, NULL, MAX_BSETS); - /* check and mark root node keys */ for_each_key_filter(&c->root->keys, k, &iter, bch_ptr_invalid) bch_initial_mark_key(c, c->root->level, k); @@ -2560,12 +2550,11 @@ static int bch_btree_map_nodes_recurse(struct btree *b, struct btree_op *op, if (b->level) { struct bkey *k; - struct btree_iter iter; + struct btree_iter_stack iter; - min_heap_init(&iter.heap, NULL, MAX_BSETS); - bch_btree_iter_init(&b->keys, &iter, from); + bch_btree_iter_stack_init(&b->keys, &iter, from); - while ((k = bch_btree_iter_next_filter(&iter, &b->keys, + while ((k = bch_btree_iter_next_filter(&iter.iter, &b->keys, bch_ptr_bad))) { ret = bcache_btree(map_nodes_recurse, k, b, op, from, fn, flags); @@ -2594,12 +2583,12 @@ int bch_btree_map_keys_recurse(struct btree *b, struct btree_op *op, { int ret = MAP_CONTINUE; struct bkey *k; - struct btree_iter iter; + struct btree_iter_stack iter; - min_heap_init(&iter.heap, NULL, MAX_BSETS); - bch_btree_iter_init(&b->keys, &iter, from); + bch_btree_iter_stack_init(&b->keys, &iter, from); - while ((k = bch_btree_iter_next_filter(&iter, &b->keys, bch_ptr_bad))) { + while ((k = bch_btree_iter_next_filter(&iter.iter, &b->keys, + bch_ptr_bad))) { ret = !b->level ? fn(op, b, k) : bcache_btree(map_keys_recurse, k, diff --git a/drivers/md/bcache/extents.c b/drivers/md/bcache/extents.c index 4b84fda1530a..d626ffcbecb9 100644 --- a/drivers/md/bcache/extents.c +++ b/drivers/md/bcache/extents.c @@ -33,16 +33,15 @@ static void sort_key_next(struct btree_iter *iter, i->k = bkey_next(i->k); if (i->k == i->end) - *i = iter->heap.data[--iter->heap.nr]; + *i = iter->data[--iter->used]; } -static bool new_bch_key_sort_cmp(const void *l, const void *r, void *args) +static bool bch_key_sort_cmp(struct btree_iter_set l, + struct btree_iter_set r) { - struct btree_iter_set *_l = (struct btree_iter_set *)l; - struct btree_iter_set *_r = (struct btree_iter_set *)r; - int64_t c = bkey_cmp(_l->k, _r->k); + int64_t c = bkey_cmp(l.k, r.k); - return !(c ? c > 0 : _l->k < _r->k); + return c ? c > 0 : l.k < r.k; } static bool __ptr_invalid(struct cache_set *c, const struct bkey *k) @@ -239,7 +238,7 @@ static bool bch_btree_ptr_insert_fixup(struct btree_keys *bk, } const struct btree_keys_ops bch_btree_keys_ops = { - .sort_cmp = new_bch_key_sort_cmp, + .sort_cmp = bch_key_sort_cmp, .insert_fixup = bch_btree_ptr_insert_fixup, .key_invalid = bch_btree_ptr_invalid, .key_bad = bch_btree_ptr_bad, @@ -256,28 +255,22 @@ const struct btree_keys_ops bch_btree_keys_ops = { * Necessary for btree_sort_fixup() - if there are multiple keys that compare * equal in different sets, we have to process them newest to oldest. */ - -static bool new_bch_extent_sort_cmp(const void *l, const void *r, void __always_unused *args) +static bool bch_extent_sort_cmp(struct btree_iter_set l, + struct btree_iter_set r) { - struct btree_iter_set *_l = (struct btree_iter_set *)l; - struct btree_iter_set *_r = (struct btree_iter_set *)r; - int64_t c = bkey_cmp(&START_KEY(_l->k), &START_KEY(_r->k)); + int64_t c = bkey_cmp(&START_KEY(l.k), &START_KEY(r.k)); - return !(c ? c > 0 : _l->k < _r->k); + return c ? c > 0 : l.k < r.k; } static struct bkey *bch_extent_sort_fixup(struct btree_iter *iter, struct bkey *tmp) { - const struct min_heap_callbacks callbacks = { - .less = new_bch_extent_sort_cmp, - .swp = NULL, - }; - while (iter->heap.nr > 1) { - struct btree_iter_set *top = iter->heap.data, *i = top + 1; - - if (iter->heap.nr > 2 && - !new_bch_extent_sort_cmp(&i[0], &i[1], NULL)) + while (iter->used > 1) { + struct btree_iter_set *top = iter->data, *i = top + 1; + + if (iter->used > 2 && + bch_extent_sort_cmp(i[0], i[1])) i++; if (bkey_cmp(top->k, &START_KEY(i->k)) <= 0) @@ -285,7 +278,7 @@ static struct bkey *bch_extent_sort_fixup(struct btree_iter *iter, if (!KEY_SIZE(i->k)) { sort_key_next(iter, i); - min_heap_sift_down(&iter->heap, i - top, &callbacks, NULL); + heap_sift(iter, i - top, bch_extent_sort_cmp); continue; } @@ -295,7 +288,7 @@ static struct bkey *bch_extent_sort_fixup(struct btree_iter *iter, else bch_cut_front(top->k, i->k); - min_heap_sift_down(&iter->heap, i - top, &callbacks, NULL); + heap_sift(iter, i - top, bch_extent_sort_cmp); } else { /* can't happen because of comparison func */ BUG_ON(!bkey_cmp(&START_KEY(top->k), &START_KEY(i->k))); @@ -305,7 +298,7 @@ static struct bkey *bch_extent_sort_fixup(struct btree_iter *iter, bch_cut_back(&START_KEY(i->k), tmp); bch_cut_front(i->k, top->k); - min_heap_sift_down(&iter->heap, 0, &callbacks, NULL); + heap_sift(iter, 0, bch_extent_sort_cmp); return tmp; } else { @@ -625,7 +618,7 @@ static bool bch_extent_merge(struct btree_keys *bk, } const struct btree_keys_ops bch_extent_keys_ops = { - .sort_cmp = new_bch_extent_sort_cmp, + .sort_cmp = bch_extent_sort_cmp, .sort_fixup = bch_extent_sort_fixup, .insert_fixup = bch_extent_insert_fixup, .key_invalid = bch_extent_invalid, diff --git a/drivers/md/bcache/movinggc.c b/drivers/md/bcache/movinggc.c index 45ca134cbf02..26a6a535ec32 100644 --- a/drivers/md/bcache/movinggc.c +++ b/drivers/md/bcache/movinggc.c @@ -182,19 +182,16 @@ err: if (!IS_ERR_OR_NULL(w->private)) closure_sync(&cl); } -static bool new_bucket_cmp(const void *l, const void *r, void __always_unused *args) +static bool bucket_cmp(struct bucket *l, struct bucket *r) { - struct bucket **_l = (struct bucket **)l; - struct bucket **_r = (struct bucket **)r; - - return GC_SECTORS_USED(*_l) >= GC_SECTORS_USED(*_r); + return GC_SECTORS_USED(l) < GC_SECTORS_USED(r); } static unsigned int bucket_heap_top(struct cache *ca) { struct bucket *b; - return (b = min_heap_peek(&ca->heap)[0]) ? GC_SECTORS_USED(b) : 0; + return (b = heap_peek(&ca->heap)) ? GC_SECTORS_USED(b) : 0; } void bch_moving_gc(struct cache_set *c) @@ -202,10 +199,6 @@ void bch_moving_gc(struct cache_set *c) struct cache *ca = c->cache; struct bucket *b; unsigned long sectors_to_move, reserve_sectors; - const struct min_heap_callbacks callbacks = { - .less = new_bucket_cmp, - .swp = NULL, - }; if (!c->copy_gc_enabled) return; @@ -216,7 +209,7 @@ void bch_moving_gc(struct cache_set *c) reserve_sectors = ca->sb.bucket_size * fifo_used(&ca->free[RESERVE_MOVINGGC]); - ca->heap.nr = 0; + ca->heap.used = 0; for_each_bucket(b, ca) { if (GC_MARK(b) == GC_MARK_METADATA || @@ -225,31 +218,25 @@ void bch_moving_gc(struct cache_set *c) atomic_read(&b->pin)) continue; - if (!min_heap_full(&ca->heap)) { + if (!heap_full(&ca->heap)) { sectors_to_move += GC_SECTORS_USED(b); - min_heap_push(&ca->heap, &b, &callbacks, NULL); - } else if (!new_bucket_cmp(&b, min_heap_peek(&ca->heap), ca)) { + heap_add(&ca->heap, b, bucket_cmp); + } else if (bucket_cmp(b, heap_peek(&ca->heap))) { sectors_to_move -= bucket_heap_top(ca); sectors_to_move += GC_SECTORS_USED(b); ca->heap.data[0] = b; - min_heap_sift_down(&ca->heap, 0, &callbacks, NULL); + heap_sift(&ca->heap, 0, bucket_cmp); } } while (sectors_to_move > reserve_sectors) { - if (ca->heap.nr) { - b = min_heap_peek(&ca->heap)[0]; - min_heap_pop(&ca->heap, &callbacks, NULL); - } + heap_pop(&ca->heap, b, bucket_cmp); sectors_to_move -= GC_SECTORS_USED(b); } - while (ca->heap.nr) { - b = min_heap_peek(&ca->heap)[0]; - min_heap_pop(&ca->heap, &callbacks, NULL); + while (heap_pop(&ca->heap, b, bucket_cmp)) SET_GC_MOVE(b, 1); - } mutex_unlock(&c->bucket_lock); diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c index 1efb768b2890..2ea490b9d370 100644 --- a/drivers/md/bcache/super.c +++ b/drivers/md/bcache/super.c @@ -1912,7 +1912,8 @@ struct cache_set *bch_cache_set_alloc(struct cache_sb *sb) INIT_LIST_HEAD(&c->btree_cache_freed); INIT_LIST_HEAD(&c->data_buckets); - iter_size = ((meta_bucket_pages(sb) * PAGE_SECTORS) / sb->block_size) * + iter_size = sizeof(struct btree_iter) + + ((meta_bucket_pages(sb) * PAGE_SECTORS) / sb->block_size) * sizeof(struct btree_iter_set); c->devices = kcalloc(c->nr_uuids, sizeof(void *), GFP_KERNEL); diff --git a/drivers/md/bcache/sysfs.c b/drivers/md/bcache/sysfs.c index e8f696cb58c0..826b14cae4e5 100644 --- a/drivers/md/bcache/sysfs.c +++ b/drivers/md/bcache/sysfs.c @@ -660,9 +660,7 @@ static unsigned int bch_root_usage(struct cache_set *c) unsigned int bytes = 0; struct bkey *k; struct btree *b; - struct btree_iter iter; - - min_heap_init(&iter.heap, NULL, MAX_BSETS); + struct btree_iter_stack iter; goto lock_root; diff --git a/drivers/md/bcache/util.h b/drivers/md/bcache/util.h index 539454d8e2d0..f61ab1bada6c 100644 --- a/drivers/md/bcache/util.h +++ b/drivers/md/bcache/util.h @@ -9,7 +9,6 @@ #include <linux/kernel.h> #include <linux/sched/clock.h> #include <linux/llist.h> -#include <linux/min_heap.h> #include <linux/ratelimit.h> #include <linux/vmalloc.h> #include <linux/workqueue.h> @@ -31,10 +30,16 @@ struct closure; #endif +#define DECLARE_HEAP(type, name) \ + struct { \ + size_t size, used; \ + type *data; \ + } name + #define init_heap(heap, _size, gfp) \ ({ \ size_t _bytes; \ - (heap)->nr = 0; \ + (heap)->used = 0; \ (heap)->size = (_size); \ _bytes = (heap)->size * sizeof(*(heap)->data); \ (heap)->data = kvmalloc(_bytes, (gfp) & GFP_KERNEL); \ @@ -47,6 +52,64 @@ do { \ (heap)->data = NULL; \ } while (0) +#define heap_swap(h, i, j) swap((h)->data[i], (h)->data[j]) + +#define heap_sift(h, i, cmp) \ +do { \ + size_t _r, _j = i; \ + \ + for (; _j * 2 + 1 < (h)->used; _j = _r) { \ + _r = _j * 2 + 1; \ + if (_r + 1 < (h)->used && \ + cmp((h)->data[_r], (h)->data[_r + 1])) \ + _r++; \ + \ + if (cmp((h)->data[_r], (h)->data[_j])) \ + break; \ + heap_swap(h, _r, _j); \ + } \ +} while (0) + +#define heap_sift_down(h, i, cmp) \ +do { \ + while (i) { \ + size_t p = (i - 1) / 2; \ + if (cmp((h)->data[i], (h)->data[p])) \ + break; \ + heap_swap(h, i, p); \ + i = p; \ + } \ +} while (0) + +#define heap_add(h, d, cmp) \ +({ \ + bool _r = !heap_full(h); \ + if (_r) { \ + size_t _i = (h)->used++; \ + (h)->data[_i] = d; \ + \ + heap_sift_down(h, _i, cmp); \ + heap_sift(h, _i, cmp); \ + } \ + _r; \ +}) + +#define heap_pop(h, d, cmp) \ +({ \ + bool _r = (h)->used; \ + if (_r) { \ + (d) = (h)->data[0]; \ + (h)->used--; \ + heap_swap(h, 0, (h)->used); \ + heap_sift(h, 0, cmp); \ + } \ + _r; \ +}) + +#define heap_peek(h) ((h)->used ? (h)->data[0] : NULL) + +#define heap_full(h) ((h)->used == (h)->size) + #define DECLARE_FIFO(type, name) \ struct { \ size_t front, back, size, mask; \ diff --git a/drivers/md/bcache/writeback.c b/drivers/md/bcache/writeback.c index 453efbbdc8ee..302e75f1fc4b 100644 --- a/drivers/md/bcache/writeback.c +++ b/drivers/md/bcache/writeback.c @@ -908,16 +908,15 @@ static int bch_dirty_init_thread(void *arg) struct dirty_init_thrd_info *info = arg; struct bch_dirty_init_state *state = info->state; struct cache_set *c = state->c; - struct btree_iter iter; + struct btree_iter_stack iter; struct bkey *k, *p; int cur_idx, prev_idx, skip_nr; k = p = NULL; prev_idx = 0; - min_heap_init(&iter.heap, NULL, MAX_BSETS); - bch_btree_iter_init(&c->root->keys, &iter, NULL); - k = bch_btree_iter_next_filter(&iter, &c->root->keys, bch_ptr_bad); + bch_btree_iter_stack_init(&c->root->keys, &iter, NULL); + k = bch_btree_iter_next_filter(&iter.iter, &c->root->keys, bch_ptr_bad); BUG_ON(!k); p = k; @@ -931,7 +930,7 @@ static int bch_dirty_init_thread(void *arg) skip_nr = cur_idx - prev_idx; while (skip_nr) { - k = bch_btree_iter_next_filter(&iter, + k = bch_btree_iter_next_filter(&iter.iter, &c->root->keys, bch_ptr_bad); if (k) @@ -980,13 +979,11 @@ void bch_sectors_dirty_init(struct bcache_device *d) int i; struct btree *b = NULL; struct bkey *k = NULL; - struct btree_iter iter; + struct btree_iter_stack iter; struct sectors_dirty_init op; struct cache_set *c = d->c; struct bch_dirty_init_state state; - min_heap_init(&iter.heap, NULL, MAX_BSETS); - retry_lock: b = c->root; rw_lock(0, b, b->level); diff --git a/drivers/md/dm-crypt.c b/drivers/md/dm-crypt.c index 9dfdb63220d7..17157c4216a5 100644 --- a/drivers/md/dm-crypt.c +++ b/drivers/md/dm-crypt.c @@ -517,7 +517,10 @@ static int crypt_iv_lmk_one(struct crypt_config *cc, u8 *iv, { struct iv_lmk_private *lmk = &cc->iv_gen_private.lmk; SHASH_DESC_ON_STACK(desc, lmk->hash_tfm); - struct md5_state md5state; + union { + struct md5_state md5state; + u8 state[CRYPTO_MD5_STATESIZE]; + } u; __le32 buf[4]; int i, r; @@ -548,13 +551,13 @@ static int crypt_iv_lmk_one(struct crypt_config *cc, u8 *iv, return r; /* No MD5 padding here */ - r = crypto_shash_export(desc, &md5state); + r = crypto_shash_export(desc, &u.md5state); if (r) return r; for (i = 0; i < MD5_HASH_WORDS; i++) - __cpu_to_le32s(&md5state.hash[i]); - memcpy(iv, &md5state.hash, cc->iv_size); + __cpu_to_le32s(&u.md5state.hash[i]); + memcpy(iv, &u.md5state.hash, cc->iv_size); return 0; } diff --git a/drivers/md/dm-raid.c b/drivers/md/dm-raid.c index d296770478b2..e8c0a8c6fb51 100644 --- a/drivers/md/dm-raid.c +++ b/drivers/md/dm-raid.c @@ -2407,7 +2407,7 @@ static int super_init_validation(struct raid_set *rs, struct md_rdev *rdev) */ sb_retrieve_failed_devices(sb, failed_devices); rdev_for_each(r, mddev) { - if (test_bit(Journal, &rdev->flags) || + if (test_bit(Journal, &r->flags) || !r->sb_page) continue; sb2 = page_address(r->sb_page); diff --git a/drivers/mtd/mtdchar.c b/drivers/mtd/mtdchar.c index 391d81ad960c..8dc4f5c493fc 100644 --- a/drivers/mtd/mtdchar.c +++ b/drivers/mtd/mtdchar.c @@ -559,7 +559,7 @@ static int mtdchar_blkpg_ioctl(struct mtd_info *mtd, /* Sanitize user input */ p.devname[BLKPG_DEVNAMELTH - 1] = '\0'; - return mtd_add_partition(mtd, p.devname, p.start, p.length, NULL); + return mtd_add_partition(mtd, p.devname, p.start, p.length); case BLKPG_DEL_PARTITION: diff --git a/drivers/mtd/mtdcore.c b/drivers/mtd/mtdcore.c index 429d8c16baf0..5ba9a741f5ac 100644 --- a/drivers/mtd/mtdcore.c +++ b/drivers/mtd/mtdcore.c @@ -68,13 +68,7 @@ static struct class mtd_class = { .pm = MTD_CLS_PM_OPS, }; -static struct class mtd_master_class = { - .name = "mtd_master", - .pm = MTD_CLS_PM_OPS, -}; - static DEFINE_IDR(mtd_idr); -static DEFINE_IDR(mtd_master_idr); /* These are exported solely for the purpose of mtd_blkdevs.c. You should not use them for _anything_ else */ @@ -89,9 +83,8 @@ EXPORT_SYMBOL_GPL(__mtd_next_device); static LIST_HEAD(mtd_notifiers); -#define MTD_MASTER_DEVS 255 + #define MTD_DEVT(index) MKDEV(MTD_CHAR_MAJOR, (index)*2) -static dev_t mtd_master_devt; /* REVISIT once MTD uses the driver model better, whoever allocates * the mtd_info will probably want to use the release() hook... @@ -111,17 +104,6 @@ static void mtd_release(struct device *dev) device_destroy(&mtd_class, index + 1); } -static void mtd_master_release(struct device *dev) -{ - struct mtd_info *mtd = dev_get_drvdata(dev); - - idr_remove(&mtd_master_idr, mtd->index); - of_node_put(mtd_get_of_node(mtd)); - - if (mtd_is_partition(mtd)) - release_mtd_partition(mtd); -} - static void mtd_device_release(struct kref *kref) { struct mtd_info *mtd = container_of(kref, struct mtd_info, refcnt); @@ -385,11 +367,6 @@ static const struct device_type mtd_devtype = { .release = mtd_release, }; -static const struct device_type mtd_master_devtype = { - .name = "mtd_master", - .release = mtd_master_release, -}; - static bool mtd_expert_analysis_mode; #ifdef CONFIG_DEBUG_FS @@ -657,13 +634,13 @@ exit_parent: /** * add_mtd_device - register an MTD device * @mtd: pointer to new MTD device info structure - * @partitioned: create partitioned device * * Add a device to the list of MTD devices present in the system, and * notify each currently active MTD 'user' of its arrival. Returns * zero on success or non-zero on failure. */ -int add_mtd_device(struct mtd_info *mtd, bool partitioned) + +int add_mtd_device(struct mtd_info *mtd) { struct device_node *np = mtd_get_of_node(mtd); struct mtd_info *master = mtd_get_master(mtd); @@ -710,17 +687,10 @@ int add_mtd_device(struct mtd_info *mtd, bool partitioned) ofidx = -1; if (np) ofidx = of_alias_get_id(np, "mtd"); - if (partitioned) { - if (ofidx >= 0) - i = idr_alloc(&mtd_idr, mtd, ofidx, ofidx + 1, GFP_KERNEL); - else - i = idr_alloc(&mtd_idr, mtd, 0, 0, GFP_KERNEL); - } else { - if (ofidx >= 0) - i = idr_alloc(&mtd_master_idr, mtd, ofidx, ofidx + 1, GFP_KERNEL); - else - i = idr_alloc(&mtd_master_idr, mtd, 0, 0, GFP_KERNEL); - } + if (ofidx >= 0) + i = idr_alloc(&mtd_idr, mtd, ofidx, ofidx + 1, GFP_KERNEL); + else + i = idr_alloc(&mtd_idr, mtd, 0, 0, GFP_KERNEL); if (i < 0) { error = i; goto fail_locked; @@ -768,18 +738,10 @@ int add_mtd_device(struct mtd_info *mtd, bool partitioned) /* Caller should have set dev.parent to match the * physical device, if appropriate. */ - if (partitioned) { - mtd->dev.type = &mtd_devtype; - mtd->dev.class = &mtd_class; - mtd->dev.devt = MTD_DEVT(i); - dev_set_name(&mtd->dev, "mtd%d", i); - error = dev_set_name(&mtd->dev, "mtd%d", i); - } else { - mtd->dev.type = &mtd_master_devtype; - mtd->dev.class = &mtd_master_class; - mtd->dev.devt = MKDEV(MAJOR(mtd_master_devt), i); - error = dev_set_name(&mtd->dev, "mtd_master%d", i); - } + mtd->dev.type = &mtd_devtype; + mtd->dev.class = &mtd_class; + mtd->dev.devt = MTD_DEVT(i); + error = dev_set_name(&mtd->dev, "mtd%d", i); if (error) goto fail_devname; dev_set_drvdata(&mtd->dev, mtd); @@ -787,7 +749,6 @@ int add_mtd_device(struct mtd_info *mtd, bool partitioned) of_node_get(mtd_get_of_node(mtd)); error = device_register(&mtd->dev); if (error) { - pr_err("mtd: %s device_register fail %d\n", mtd->name, error); put_device(&mtd->dev); goto fail_added; } @@ -799,13 +760,10 @@ int add_mtd_device(struct mtd_info *mtd, bool partitioned) mtd_debugfs_populate(mtd); - if (partitioned) { - device_create(&mtd_class, mtd->dev.parent, MTD_DEVT(i) + 1, NULL, - "mtd%dro", i); - } + device_create(&mtd_class, mtd->dev.parent, MTD_DEVT(i) + 1, NULL, + "mtd%dro", i); - pr_debug("mtd: Giving out %spartitioned device %d to %s\n", - partitioned ? "" : "un-", i, mtd->name); + pr_debug("mtd: Giving out device %d to %s\n", i, mtd->name); /* No need to get a refcount on the module containing the notifier, since we hold the mtd_table_mutex */ list_for_each_entry(not, &mtd_notifiers, list) @@ -813,16 +771,13 @@ int add_mtd_device(struct mtd_info *mtd, bool partitioned) mutex_unlock(&mtd_table_mutex); - if (partitioned) { - if (of_property_read_bool(mtd_get_of_node(mtd), "linux,rootfs")) { - if (IS_BUILTIN(CONFIG_MTD)) { - pr_info("mtd: setting mtd%d (%s) as root device\n", - mtd->index, mtd->name); - ROOT_DEV = MKDEV(MTD_BLOCK_MAJOR, mtd->index); - } else { - pr_warn("mtd: can't set mtd%d (%s) as root device - mtd must be builtin\n", - mtd->index, mtd->name); - } + if (of_property_read_bool(mtd_get_of_node(mtd), "linux,rootfs")) { + if (IS_BUILTIN(CONFIG_MTD)) { + pr_info("mtd: setting mtd%d (%s) as root device\n", mtd->index, mtd->name); + ROOT_DEV = MKDEV(MTD_BLOCK_MAJOR, mtd->index); + } else { + pr_warn("mtd: can't set mtd%d (%s) as root device - mtd must be builtin\n", + mtd->index, mtd->name); } } @@ -838,10 +793,7 @@ fail_nvmem_add: fail_added: of_node_put(mtd_get_of_node(mtd)); fail_devname: - if (partitioned) - idr_remove(&mtd_idr, i); - else - idr_remove(&mtd_master_idr, i); + idr_remove(&mtd_idr, i); fail_locked: mutex_unlock(&mtd_table_mutex); return error; @@ -859,14 +811,12 @@ fail_locked: int del_mtd_device(struct mtd_info *mtd) { - struct mtd_notifier *not; - struct idr *idr; int ret; + struct mtd_notifier *not; mutex_lock(&mtd_table_mutex); - idr = mtd->dev.class == &mtd_class ? &mtd_idr : &mtd_master_idr; - if (idr_find(idr, mtd->index) != mtd) { + if (idr_find(&mtd_idr, mtd->index) != mtd) { ret = -ENODEV; goto out_error; } @@ -1106,7 +1056,6 @@ int mtd_device_parse_register(struct mtd_info *mtd, const char * const *types, const struct mtd_partition *parts, int nr_parts) { - struct mtd_info *parent; int ret, err; mtd_set_dev_defaults(mtd); @@ -1115,30 +1064,25 @@ int mtd_device_parse_register(struct mtd_info *mtd, const char * const *types, if (ret) goto out; - ret = add_mtd_device(mtd, false); - if (ret) - goto out; - if (IS_ENABLED(CONFIG_MTD_PARTITIONED_MASTER)) { - ret = mtd_add_partition(mtd, mtd->name, 0, MTDPART_SIZ_FULL, &parent); + ret = add_mtd_device(mtd); if (ret) goto out; - - } else { - parent = mtd; } /* Prefer parsed partitions over driver-provided fallback */ - ret = parse_mtd_partitions(parent, types, parser_data); + ret = parse_mtd_partitions(mtd, types, parser_data); if (ret == -EPROBE_DEFER) goto out; if (ret > 0) ret = 0; else if (nr_parts) - ret = add_mtd_partitions(parent, parts, nr_parts); - else if (!IS_ENABLED(CONFIG_MTD_PARTITIONED_MASTER)) - ret = mtd_add_partition(parent, mtd->name, 0, MTDPART_SIZ_FULL, NULL); + ret = add_mtd_partitions(mtd, parts, nr_parts); + else if (!device_is_registered(&mtd->dev)) + ret = add_mtd_device(mtd); + else + ret = 0; if (ret) goto out; @@ -1158,14 +1102,13 @@ int mtd_device_parse_register(struct mtd_info *mtd, const char * const *types, register_reboot_notifier(&mtd->reboot_notifier); } - return 0; out: - nvmem_unregister(mtd->otp_user_nvmem); - nvmem_unregister(mtd->otp_factory_nvmem); - - del_mtd_partitions(mtd); + if (ret) { + nvmem_unregister(mtd->otp_user_nvmem); + nvmem_unregister(mtd->otp_factory_nvmem); + } - if (device_is_registered(&mtd->dev)) { + if (ret && device_is_registered(&mtd->dev)) { err = del_mtd_device(mtd); if (err) pr_err("Error when deleting MTD device (%d)\n", err); @@ -1324,7 +1267,8 @@ int __get_mtd_device(struct mtd_info *mtd) mtd = mtd->parent; } - kref_get(&master->refcnt); + if (IS_ENABLED(CONFIG_MTD_PARTITIONED_MASTER)) + kref_get(&master->refcnt); return 0; } @@ -1418,7 +1362,8 @@ void __put_mtd_device(struct mtd_info *mtd) mtd = parent; } - kref_put(&master->refcnt, mtd_device_release); + if (IS_ENABLED(CONFIG_MTD_PARTITIONED_MASTER)) + kref_put(&master->refcnt, mtd_device_release); module_put(master->owner); @@ -2585,16 +2530,6 @@ static int __init init_mtd(void) if (ret) goto err_reg; - ret = class_register(&mtd_master_class); - if (ret) - goto err_reg2; - - ret = alloc_chrdev_region(&mtd_master_devt, 0, MTD_MASTER_DEVS, "mtd_master"); - if (ret < 0) { - pr_err("unable to allocate char dev region\n"); - goto err_chrdev; - } - mtd_bdi = mtd_bdi_init("mtd"); if (IS_ERR(mtd_bdi)) { ret = PTR_ERR(mtd_bdi); @@ -2619,10 +2554,6 @@ out_procfs: bdi_unregister(mtd_bdi); bdi_put(mtd_bdi); err_bdi: - unregister_chrdev_region(mtd_master_devt, MTD_MASTER_DEVS); -err_chrdev: - class_unregister(&mtd_master_class); -err_reg2: class_unregister(&mtd_class); err_reg: pr_err("Error registering mtd class or bdi: %d\n", ret); @@ -2636,12 +2567,9 @@ static void __exit cleanup_mtd(void) if (proc_mtd) remove_proc_entry("mtd", NULL); class_unregister(&mtd_class); - class_unregister(&mtd_master_class); - unregister_chrdev_region(mtd_master_devt, MTD_MASTER_DEVS); bdi_unregister(mtd_bdi); bdi_put(mtd_bdi); idr_destroy(&mtd_idr); - idr_destroy(&mtd_master_idr); } module_init(init_mtd); diff --git a/drivers/mtd/mtdcore.h b/drivers/mtd/mtdcore.h index 2258d31c5aa6..b014861a06a6 100644 --- a/drivers/mtd/mtdcore.h +++ b/drivers/mtd/mtdcore.h @@ -8,7 +8,7 @@ extern struct mutex mtd_table_mutex; extern struct backing_dev_info *mtd_bdi; struct mtd_info *__mtd_next_device(int i); -int __must_check add_mtd_device(struct mtd_info *mtd, bool partitioned); +int __must_check add_mtd_device(struct mtd_info *mtd); int del_mtd_device(struct mtd_info *mtd); int add_mtd_partitions(struct mtd_info *, const struct mtd_partition *, int); int del_mtd_partitions(struct mtd_info *); diff --git a/drivers/mtd/mtdpart.c b/drivers/mtd/mtdpart.c index 5a3db36d734e..994e8c51e674 100644 --- a/drivers/mtd/mtdpart.c +++ b/drivers/mtd/mtdpart.c @@ -86,7 +86,8 @@ static struct mtd_info *allocate_partition(struct mtd_info *parent, * parent conditional on that option. Note, this is a way to * distinguish between the parent and its partitions in sysfs. */ - child->dev.parent = &parent->dev; + child->dev.parent = IS_ENABLED(CONFIG_MTD_PARTITIONED_MASTER) || mtd_is_partition(parent) ? + &parent->dev : parent->dev.parent; child->dev.of_node = part->of_node; child->parent = parent; child->part.offset = part->offset; @@ -242,7 +243,7 @@ static int mtd_add_partition_attrs(struct mtd_info *new) } int mtd_add_partition(struct mtd_info *parent, const char *name, - long long offset, long long length, struct mtd_info **out) + long long offset, long long length) { struct mtd_info *master = mtd_get_master(parent); u64 parent_size = mtd_is_partition(parent) ? @@ -275,15 +276,12 @@ int mtd_add_partition(struct mtd_info *parent, const char *name, list_add_tail(&child->part.node, &parent->partitions); mutex_unlock(&master->master.partitions_lock); - ret = add_mtd_device(child, true); + ret = add_mtd_device(child); if (ret) goto err_remove_part; mtd_add_partition_attrs(child); - if (out) - *out = child; - return 0; err_remove_part: @@ -415,7 +413,7 @@ int add_mtd_partitions(struct mtd_info *parent, list_add_tail(&child->part.node, &parent->partitions); mutex_unlock(&master->master.partitions_lock); - ret = add_mtd_device(child, true); + ret = add_mtd_device(child); if (ret) { mutex_lock(&master->master.partitions_lock); list_del(&child->part.node); @@ -592,6 +590,9 @@ static int mtd_part_of_parse(struct mtd_info *master, int ret, err = 0; dev = &master->dev; + /* Use parent device (controller) if the top level MTD is not registered */ + if (!IS_ENABLED(CONFIG_MTD_PARTITIONED_MASTER) && !mtd_is_partition(master)) + dev = master->dev.parent; np = mtd_get_of_node(master); if (mtd_is_partition(master)) @@ -710,7 +711,6 @@ int parse_mtd_partitions(struct mtd_info *master, const char *const *types, if (ret < 0 && !err) err = ret; } - return err; } diff --git a/drivers/mtd/nand/spi/core.c b/drivers/mtd/nand/spi/core.c index 7099db7a62be..c411fe9be3ef 100644 --- a/drivers/mtd/nand/spi/core.c +++ b/drivers/mtd/nand/spi/core.c @@ -1585,6 +1585,7 @@ static void spinand_cleanup(struct spinand_device *spinand) { struct nand_device *nand = spinand_to_nand(spinand); + nanddev_ecc_engine_cleanup(nand); nanddev_cleanup(nand); spinand_manufacturer_cleanup(spinand); kfree(spinand->databuf); diff --git a/drivers/mtd/nand/spi/winbond.c b/drivers/mtd/nand/spi/winbond.c index 19f8dd4a6370..b7a28f001a38 100644 --- a/drivers/mtd/nand/spi/winbond.c +++ b/drivers/mtd/nand/spi/winbond.c @@ -25,7 +25,7 @@ static SPINAND_OP_VARIANTS(read_cache_octal_variants, SPINAND_PAGE_READ_FROM_CACHE_1S_1D_8D_OP(0, 2, NULL, 0, 105 * HZ_PER_MHZ), - SPINAND_PAGE_READ_FROM_CACHE_1S_8S_8S_OP(0, 16, NULL, 0, 86 * HZ_PER_MHZ), + SPINAND_PAGE_READ_FROM_CACHE_1S_8S_8S_OP(0, 16, NULL, 0, 162 * HZ_PER_MHZ), SPINAND_PAGE_READ_FROM_CACHE_1S_1S_8S_OP(0, 1, NULL, 0, 133 * HZ_PER_MHZ), SPINAND_PAGE_READ_FROM_CACHE_FAST_1S_1S_1S_OP(0, 1, NULL, 0), SPINAND_PAGE_READ_FROM_CACHE_1S_1S_1S_OP(0, 1, NULL, 0)); @@ -42,11 +42,11 @@ static SPINAND_OP_VARIANTS(update_cache_octal_variants, static SPINAND_OP_VARIANTS(read_cache_dual_quad_dtr_variants, SPINAND_PAGE_READ_FROM_CACHE_1S_4D_4D_OP(0, 8, NULL, 0, 80 * HZ_PER_MHZ), SPINAND_PAGE_READ_FROM_CACHE_1S_1D_4D_OP(0, 2, NULL, 0, 80 * HZ_PER_MHZ), - SPINAND_PAGE_READ_FROM_CACHE_1S_4S_4S_OP(0, 2, NULL, 0), + SPINAND_PAGE_READ_FROM_CACHE_1S_4S_4S_OP(0, 2, NULL, 0, 104 * HZ_PER_MHZ), SPINAND_PAGE_READ_FROM_CACHE_1S_1S_4S_OP(0, 1, NULL, 0), SPINAND_PAGE_READ_FROM_CACHE_1S_2D_2D_OP(0, 4, NULL, 0, 80 * HZ_PER_MHZ), SPINAND_PAGE_READ_FROM_CACHE_1S_1D_2D_OP(0, 2, NULL, 0, 80 * HZ_PER_MHZ), - SPINAND_PAGE_READ_FROM_CACHE_1S_2S_2S_OP(0, 1, NULL, 0), + SPINAND_PAGE_READ_FROM_CACHE_1S_2S_2S_OP(0, 1, NULL, 0, 104 * HZ_PER_MHZ), SPINAND_PAGE_READ_FROM_CACHE_1S_1S_2S_OP(0, 1, NULL, 0), SPINAND_PAGE_READ_FROM_CACHE_1S_1D_1D_OP(0, 2, NULL, 0, 80 * HZ_PER_MHZ), SPINAND_PAGE_READ_FROM_CACHE_FAST_1S_1S_1S_OP(0, 1, NULL, 0), @@ -289,7 +289,7 @@ static const struct spinand_info winbond_spinand_table[] = { SPINAND_ECCINFO(&w35n01jw_ooblayout, NULL)), SPINAND_INFO("W35N02JW", /* 1.8V */ SPINAND_ID(SPINAND_READID_METHOD_OPCODE_DUMMY, 0xdf, 0x22), - NAND_MEMORG(1, 4096, 128, 64, 512, 10, 2, 1, 1), + NAND_MEMORG(1, 4096, 128, 64, 512, 10, 1, 2, 1), NAND_ECCREQ(1, 512), SPINAND_INFO_OP_VARIANTS(&read_cache_octal_variants, &write_cache_octal_variants, @@ -298,7 +298,7 @@ static const struct spinand_info winbond_spinand_table[] = { SPINAND_ECCINFO(&w35n01jw_ooblayout, NULL)), SPINAND_INFO("W35N04JW", /* 1.8V */ SPINAND_ID(SPINAND_READID_METHOD_OPCODE_DUMMY, 0xdf, 0x23), - NAND_MEMORG(1, 4096, 128, 64, 512, 10, 4, 1, 1), + NAND_MEMORG(1, 4096, 128, 64, 512, 10, 1, 4, 1), NAND_ECCREQ(1, 512), SPINAND_INFO_OP_VARIANTS(&read_cache_octal_variants, &write_cache_octal_variants, diff --git a/drivers/net/can/m_can/tcan4x5x-core.c b/drivers/net/can/m_can/tcan4x5x-core.c index e5c162f8c589..8edaa339d590 100644 --- a/drivers/net/can/m_can/tcan4x5x-core.c +++ b/drivers/net/can/m_can/tcan4x5x-core.c @@ -411,10 +411,11 @@ static int tcan4x5x_can_probe(struct spi_device *spi) priv = cdev_to_priv(mcan_class); priv->power = devm_regulator_get_optional(&spi->dev, "vsup"); - if (PTR_ERR(priv->power) == -EPROBE_DEFER) { - ret = -EPROBE_DEFER; - goto out_m_can_class_free_dev; - } else { + if (IS_ERR(priv->power)) { + if (PTR_ERR(priv->power) == -EPROBE_DEFER) { + ret = -EPROBE_DEFER; + goto out_m_can_class_free_dev; + } priv->power = NULL; } diff --git a/drivers/net/dsa/b53/b53_common.c b/drivers/net/dsa/b53/b53_common.c index 862bdccb7439..dc2f4adac9bc 100644 --- a/drivers/net/dsa/b53/b53_common.c +++ b/drivers/net/dsa/b53/b53_common.c @@ -2034,9 +2034,6 @@ int b53_br_join(struct dsa_switch *ds, int port, struct dsa_bridge bridge, b53_get_vlan_entry(dev, pvid, vl); vl->members &= ~BIT(port); - if (vl->members == BIT(cpu_port)) - vl->members &= ~BIT(cpu_port); - vl->untag = vl->members; b53_set_vlan_entry(dev, pvid, vl); } @@ -2115,8 +2112,7 @@ void b53_br_leave(struct dsa_switch *ds, int port, struct dsa_bridge bridge) } b53_get_vlan_entry(dev, pvid, vl); - vl->members |= BIT(port) | BIT(cpu_port); - vl->untag |= BIT(port) | BIT(cpu_port); + vl->members |= BIT(port); b53_set_vlan_entry(dev, pvid, vl); } } diff --git a/drivers/net/ethernet/airoha/airoha_eth.c b/drivers/net/ethernet/airoha/airoha_eth.c index a7ec609d64de..06dea3a13e77 100644 --- a/drivers/net/ethernet/airoha/airoha_eth.c +++ b/drivers/net/ethernet/airoha/airoha_eth.c @@ -1065,23 +1065,18 @@ static void airoha_qdma_cleanup_tx_queue(struct airoha_queue *q) static int airoha_qdma_init_hfwd_queues(struct airoha_qdma *qdma) { + int size, index, num_desc = HW_DSCP_NUM; struct airoha_eth *eth = qdma->eth; int id = qdma - ð->qdma[0]; + u32 status, buf_size; dma_addr_t dma_addr; const char *name; - int size, index; - u32 status; - - size = HW_DSCP_NUM * sizeof(struct airoha_qdma_fwd_desc); - if (!dmam_alloc_coherent(eth->dev, size, &dma_addr, GFP_KERNEL)) - return -ENOMEM; - - airoha_qdma_wr(qdma, REG_FWD_DSCP_BASE, dma_addr); name = devm_kasprintf(eth->dev, GFP_KERNEL, "qdma%d-buf", id); if (!name) return -ENOMEM; + buf_size = id ? AIROHA_MAX_PACKET_SIZE / 2 : AIROHA_MAX_PACKET_SIZE; index = of_property_match_string(eth->dev->of_node, "memory-region-names", name); if (index >= 0) { @@ -1099,8 +1094,12 @@ static int airoha_qdma_init_hfwd_queues(struct airoha_qdma *qdma) rmem = of_reserved_mem_lookup(np); of_node_put(np); dma_addr = rmem->base; + /* Compute the number of hw descriptors according to the + * reserved memory size and the payload buffer size + */ + num_desc = div_u64(rmem->size, buf_size); } else { - size = AIROHA_MAX_PACKET_SIZE * HW_DSCP_NUM; + size = buf_size * num_desc; if (!dmam_alloc_coherent(eth->dev, size, &dma_addr, GFP_KERNEL)) return -ENOMEM; @@ -1108,15 +1107,21 @@ static int airoha_qdma_init_hfwd_queues(struct airoha_qdma *qdma) airoha_qdma_wr(qdma, REG_FWD_BUF_BASE, dma_addr); + size = num_desc * sizeof(struct airoha_qdma_fwd_desc); + if (!dmam_alloc_coherent(eth->dev, size, &dma_addr, GFP_KERNEL)) + return -ENOMEM; + + airoha_qdma_wr(qdma, REG_FWD_DSCP_BASE, dma_addr); + /* QDMA0: 2KB. QDMA1: 1KB */ airoha_qdma_rmw(qdma, REG_HW_FWD_DSCP_CFG, HW_FWD_DSCP_PAYLOAD_SIZE_MASK, - FIELD_PREP(HW_FWD_DSCP_PAYLOAD_SIZE_MASK, 0)); + FIELD_PREP(HW_FWD_DSCP_PAYLOAD_SIZE_MASK, !!id)); airoha_qdma_rmw(qdma, REG_FWD_DSCP_LOW_THR, FWD_DSCP_LOW_THR_MASK, FIELD_PREP(FWD_DSCP_LOW_THR_MASK, 128)); airoha_qdma_rmw(qdma, REG_LMGR_INIT_CFG, LMGR_INIT_START | LMGR_SRAM_MODE_MASK | HW_FWD_DESC_NUM_MASK, - FIELD_PREP(HW_FWD_DESC_NUM_MASK, HW_DSCP_NUM) | + FIELD_PREP(HW_FWD_DESC_NUM_MASK, num_desc) | LMGR_INIT_START | LMGR_SRAM_MODE_MASK); return read_poll_timeout(airoha_qdma_rr, status, diff --git a/drivers/net/ethernet/airoha/airoha_ppe.c b/drivers/net/ethernet/airoha/airoha_ppe.c index 9067d2fc7706..0e217acfc5ef 100644 --- a/drivers/net/ethernet/airoha/airoha_ppe.c +++ b/drivers/net/ethernet/airoha/airoha_ppe.c @@ -809,8 +809,10 @@ airoha_ppe_foe_flow_l2_entry_update(struct airoha_ppe *ppe, int idle; hwe = airoha_ppe_foe_get_entry(ppe, iter->hash); - ib1 = READ_ONCE(hwe->ib1); + if (!hwe) + continue; + ib1 = READ_ONCE(hwe->ib1); state = FIELD_GET(AIROHA_FOE_IB1_BIND_STATE, ib1); if (state != AIROHA_FOE_STATE_BIND) { iter->hash = 0xffff; diff --git a/drivers/net/ethernet/airoha/airoha_regs.h b/drivers/net/ethernet/airoha/airoha_regs.h index 04187eb40ec6..150c85995cc1 100644 --- a/drivers/net/ethernet/airoha/airoha_regs.h +++ b/drivers/net/ethernet/airoha/airoha_regs.h @@ -614,8 +614,9 @@ RX19_DONE_INT_MASK | RX18_DONE_INT_MASK | \ RX17_DONE_INT_MASK | RX16_DONE_INT_MASK) -#define RX_DONE_INT_MASK (RX_DONE_HIGH_INT_MASK | RX_DONE_LOW_INT_MASK) #define RX_DONE_HIGH_OFFSET fls(RX_DONE_HIGH_INT_MASK) +#define RX_DONE_INT_MASK \ + ((RX_DONE_HIGH_INT_MASK << RX_DONE_HIGH_OFFSET) | RX_DONE_LOW_INT_MASK) #define INT_RX2_MASK(_n) \ ((RX_NO_CPU_DSCP_HIGH_INT_MASK & (_n)) | \ diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index 869580b6f70d..ae89a981e052 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -2989,6 +2989,7 @@ static int __bnxt_poll_work(struct bnxt *bp, struct bnxt_cp_ring_info *cpr, { struct bnxt_napi *bnapi = cpr->bnapi; u32 raw_cons = cpr->cp_raw_cons; + bool flush_xdp = false; u32 cons; int rx_pkts = 0; u8 event = 0; @@ -3042,6 +3043,8 @@ static int __bnxt_poll_work(struct bnxt *bp, struct bnxt_cp_ring_info *cpr, else rc = bnxt_force_rx_discard(bp, cpr, &raw_cons, &event); + if (event & BNXT_REDIRECT_EVENT) + flush_xdp = true; if (likely(rc >= 0)) rx_pkts += rc; /* Increment rx_pkts when rc is -ENOMEM to count towards @@ -3066,7 +3069,7 @@ static int __bnxt_poll_work(struct bnxt *bp, struct bnxt_cp_ring_info *cpr, } } - if (event & BNXT_REDIRECT_EVENT) { + if (flush_xdp) { xdp_do_flush(); event &= ~BNXT_REDIRECT_EVENT; } @@ -10780,6 +10783,72 @@ void bnxt_del_one_rss_ctx(struct bnxt *bp, struct bnxt_rss_ctx *rss_ctx, bp->num_rss_ctx--; } +static bool bnxt_vnic_has_rx_ring(struct bnxt *bp, struct bnxt_vnic_info *vnic, + int rxr_id) +{ + u16 tbl_size = bnxt_get_rxfh_indir_size(bp->dev); + int i, vnic_rx; + + /* Ntuple VNIC always has all the rx rings. Any change of ring id + * must be updated because a future filter may use it. + */ + if (vnic->flags & BNXT_VNIC_NTUPLE_FLAG) + return true; + + for (i = 0; i < tbl_size; i++) { + if (vnic->flags & BNXT_VNIC_RSSCTX_FLAG) + vnic_rx = ethtool_rxfh_context_indir(vnic->rss_ctx)[i]; + else + vnic_rx = bp->rss_indir_tbl[i]; + + if (rxr_id == vnic_rx) + return true; + } + + return false; +} + +static int bnxt_set_vnic_mru_p5(struct bnxt *bp, struct bnxt_vnic_info *vnic, + u16 mru, int rxr_id) +{ + int rc; + + if (!bnxt_vnic_has_rx_ring(bp, vnic, rxr_id)) + return 0; + + if (mru) { + rc = bnxt_hwrm_vnic_set_rss_p5(bp, vnic, true); + if (rc) { + netdev_err(bp->dev, "hwrm vnic %d set rss failure rc: %d\n", + vnic->vnic_id, rc); + return rc; + } + } + vnic->mru = mru; + bnxt_hwrm_vnic_update(bp, vnic, + VNIC_UPDATE_REQ_ENABLES_MRU_VALID); + + return 0; +} + +static int bnxt_set_rss_ctx_vnic_mru(struct bnxt *bp, u16 mru, int rxr_id) +{ + struct ethtool_rxfh_context *ctx; + unsigned long context; + int rc; + + xa_for_each(&bp->dev->ethtool->rss_ctx, context, ctx) { + struct bnxt_rss_ctx *rss_ctx = ethtool_rxfh_context_priv(ctx); + struct bnxt_vnic_info *vnic = &rss_ctx->vnic; + + rc = bnxt_set_vnic_mru_p5(bp, vnic, mru, rxr_id); + if (rc) + return rc; + } + + return 0; +} + static void bnxt_hwrm_realloc_rss_ctx_vnic(struct bnxt *bp) { bool set_tpa = !!(bp->flags & BNXT_FLAG_TPA); @@ -15927,6 +15996,7 @@ static int bnxt_queue_start(struct net_device *dev, void *qmem, int idx) struct bnxt_vnic_info *vnic; struct bnxt_napi *bnapi; int i, rc; + u16 mru; rxr = &bp->rx_ring[idx]; clone = qmem; @@ -15977,21 +16047,15 @@ static int bnxt_queue_start(struct net_device *dev, void *qmem, int idx) napi_enable_locked(&bnapi->napi); bnxt_db_nq_arm(bp, &cpr->cp_db, cpr->cp_raw_cons); + mru = bp->dev->mtu + ETH_HLEN + VLAN_HLEN; for (i = 0; i < bp->nr_vnics; i++) { vnic = &bp->vnic_info[i]; - rc = bnxt_hwrm_vnic_set_rss_p5(bp, vnic, true); - if (rc) { - netdev_err(bp->dev, "hwrm vnic %d set rss failure rc: %d\n", - vnic->vnic_id, rc); + rc = bnxt_set_vnic_mru_p5(bp, vnic, mru, idx); + if (rc) return rc; - } - vnic->mru = bp->dev->mtu + ETH_HLEN + VLAN_HLEN; - bnxt_hwrm_vnic_update(bp, vnic, - VNIC_UPDATE_REQ_ENABLES_MRU_VALID); } - - return 0; + return bnxt_set_rss_ctx_vnic_mru(bp, mru, idx); err_reset: netdev_err(bp->dev, "Unexpected HWRM error during queue start rc: %d\n", @@ -16013,10 +16077,10 @@ static int bnxt_queue_stop(struct net_device *dev, void *qmem, int idx) for (i = 0; i < bp->nr_vnics; i++) { vnic = &bp->vnic_info[i]; - vnic->mru = 0; - bnxt_hwrm_vnic_update(bp, vnic, - VNIC_UPDATE_REQ_ENABLES_MRU_VALID); + + bnxt_set_vnic_mru_p5(bp, vnic, 0, idx); } + bnxt_set_rss_ctx_vnic_mru(bp, 0, idx); /* Make sure NAPI sees that the VNIC is disabled */ synchronize_net(); rxr = &bp->rx_ring[idx]; diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_ulp.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_ulp.c index 84c4812414fd..2450a369b792 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_ulp.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_ulp.c @@ -231,10 +231,9 @@ void bnxt_ulp_stop(struct bnxt *bp) return; mutex_lock(&edev->en_dev_lock); - if (!bnxt_ulp_registered(edev)) { - mutex_unlock(&edev->en_dev_lock); - return; - } + if (!bnxt_ulp_registered(edev) || + (edev->flags & BNXT_EN_FLAG_ULP_STOPPED)) + goto ulp_stop_exit; edev->flags |= BNXT_EN_FLAG_ULP_STOPPED; if (aux_priv) { @@ -250,6 +249,7 @@ void bnxt_ulp_stop(struct bnxt *bp) adrv->suspend(adev, pm); } } +ulp_stop_exit: mutex_unlock(&edev->en_dev_lock); } @@ -258,19 +258,13 @@ void bnxt_ulp_start(struct bnxt *bp, int err) struct bnxt_aux_priv *aux_priv = bp->aux_priv; struct bnxt_en_dev *edev = bp->edev; - if (!edev) - return; - - edev->flags &= ~BNXT_EN_FLAG_ULP_STOPPED; - - if (err) + if (!edev || err) return; mutex_lock(&edev->en_dev_lock); - if (!bnxt_ulp_registered(edev)) { - mutex_unlock(&edev->en_dev_lock); - return; - } + if (!bnxt_ulp_registered(edev) || + !(edev->flags & BNXT_EN_FLAG_ULP_STOPPED)) + goto ulp_start_exit; if (edev->ulp_tbl->msix_requested) bnxt_fill_msix_vecs(bp, edev->msix_entries); @@ -287,6 +281,8 @@ void bnxt_ulp_start(struct bnxt *bp, int err) adrv->resume(adev); } } +ulp_start_exit: + edev->flags &= ~BNXT_EN_FLAG_ULP_STOPPED; mutex_unlock(&edev->en_dev_lock); } diff --git a/drivers/net/ethernet/faraday/Kconfig b/drivers/net/ethernet/faraday/Kconfig index c699bd6bcbb9..474073c7f94d 100644 --- a/drivers/net/ethernet/faraday/Kconfig +++ b/drivers/net/ethernet/faraday/Kconfig @@ -31,6 +31,7 @@ config FTGMAC100 depends on ARM || COMPILE_TEST depends on !64BIT || BROKEN select PHYLIB + select FIXED_PHY select MDIO_ASPEED if MACH_ASPEED_G6 select CRC32 help diff --git a/drivers/net/ethernet/freescale/enetc/Kconfig b/drivers/net/ethernet/freescale/enetc/Kconfig index e917132d3714..54b0f0a5a6bb 100644 --- a/drivers/net/ethernet/freescale/enetc/Kconfig +++ b/drivers/net/ethernet/freescale/enetc/Kconfig @@ -1,6 +1,7 @@ # SPDX-License-Identifier: GPL-2.0 config FSL_ENETC_CORE tristate + select NXP_NETC_LIB if NXP_NTMP help This module supports common functionality between the PF and VF drivers for the NXP ENETC controller. @@ -22,6 +23,9 @@ config NXP_NETC_LIB Switch, such as NETC Table Management Protocol (NTMP) 2.0, common tc flower and debugfs interfaces and so on. +config NXP_NTMP + bool + config FSL_ENETC tristate "ENETC PF driver" depends on PCI_MSI @@ -45,7 +49,7 @@ config NXP_ENETC4 select FSL_ENETC_CORE select FSL_ENETC_MDIO select NXP_ENETC_PF_COMMON - select NXP_NETC_LIB + select NXP_NTMP select PHYLINK select DIMLIB help diff --git a/drivers/net/ethernet/freescale/enetc/enetc_hw.h b/drivers/net/ethernet/freescale/enetc/enetc_hw.h index 4098f01479bc..53e8d18c7a34 100644 --- a/drivers/net/ethernet/freescale/enetc/enetc_hw.h +++ b/drivers/net/ethernet/freescale/enetc/enetc_hw.h @@ -507,7 +507,7 @@ static inline u64 _enetc_rd_reg64(void __iomem *reg) tmp = ioread32(reg + 4); } while (high != tmp); - return le64_to_cpu((__le64)high << 32 | low); + return (u64)high << 32 | low; } #endif diff --git a/drivers/net/ethernet/intel/e1000/e1000_main.c b/drivers/net/ethernet/intel/e1000/e1000_main.c index 3f089c3d47b2..d8595e84326d 100644 --- a/drivers/net/ethernet/intel/e1000/e1000_main.c +++ b/drivers/net/ethernet/intel/e1000/e1000_main.c @@ -477,10 +477,6 @@ static void e1000_down_and_stop(struct e1000_adapter *adapter) cancel_delayed_work_sync(&adapter->phy_info_task); cancel_delayed_work_sync(&adapter->fifo_stall_task); - - /* Only kill reset task if adapter is not resetting */ - if (!test_bit(__E1000_RESETTING, &adapter->flags)) - cancel_work_sync(&adapter->reset_task); } void e1000_down(struct e1000_adapter *adapter) @@ -1266,6 +1262,10 @@ static void e1000_remove(struct pci_dev *pdev) unregister_netdev(netdev); + /* Only kill reset task if adapter is not resetting */ + if (!test_bit(__E1000_RESETTING, &adapter->flags)) + cancel_work_sync(&adapter->reset_task); + e1000_phy_hw_reset(hw); kfree(adapter->tx_ring); diff --git a/drivers/net/ethernet/intel/e1000e/netdev.c b/drivers/net/ethernet/intel/e1000e/netdev.c index a96f4cfa6e17..7719e15813ee 100644 --- a/drivers/net/ethernet/intel/e1000e/netdev.c +++ b/drivers/net/ethernet/intel/e1000e/netdev.c @@ -3534,9 +3534,6 @@ s32 e1000e_get_base_timinca(struct e1000_adapter *adapter, u32 *timinca) case e1000_pch_cnp: case e1000_pch_tgp: case e1000_pch_adp: - case e1000_pch_mtp: - case e1000_pch_lnp: - case e1000_pch_ptp: case e1000_pch_nvp: if (er32(TSYNCRXCTL) & E1000_TSYNCRXCTL_SYSCFI) { /* Stable 24MHz frequency */ @@ -3552,6 +3549,17 @@ s32 e1000e_get_base_timinca(struct e1000_adapter *adapter, u32 *timinca) adapter->cc.shift = shift; } break; + case e1000_pch_mtp: + case e1000_pch_lnp: + case e1000_pch_ptp: + /* System firmware can misreport this value, so set it to a + * stable 38400KHz frequency. + */ + incperiod = INCPERIOD_38400KHZ; + incvalue = INCVALUE_38400KHZ; + shift = INCVALUE_SHIFT_38400KHZ; + adapter->cc.shift = shift; + break; case e1000_82574: case e1000_82583: /* Stable 25MHz frequency */ diff --git a/drivers/net/ethernet/intel/e1000e/ptp.c b/drivers/net/ethernet/intel/e1000e/ptp.c index 89d57dd911dc..ea3c3eb2ef20 100644 --- a/drivers/net/ethernet/intel/e1000e/ptp.c +++ b/drivers/net/ethernet/intel/e1000e/ptp.c @@ -295,15 +295,17 @@ void e1000e_ptp_init(struct e1000_adapter *adapter) case e1000_pch_cnp: case e1000_pch_tgp: case e1000_pch_adp: - case e1000_pch_mtp: - case e1000_pch_lnp: - case e1000_pch_ptp: case e1000_pch_nvp: if (er32(TSYNCRXCTL) & E1000_TSYNCRXCTL_SYSCFI) adapter->ptp_clock_info.max_adj = MAX_PPB_24MHZ; else adapter->ptp_clock_info.max_adj = MAX_PPB_38400KHZ; break; + case e1000_pch_mtp: + case e1000_pch_lnp: + case e1000_pch_ptp: + adapter->ptp_clock_info.max_adj = MAX_PPB_38400KHZ; + break; case e1000_82574: case e1000_82583: adapter->ptp_clock_info.max_adj = MAX_PPB_25MHZ; diff --git a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c index 1120f8e4bb67..88e6bef69342 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c +++ b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c @@ -1546,8 +1546,8 @@ static void i40e_cleanup_reset_vf(struct i40e_vf *vf) * @vf: pointer to the VF structure * @flr: VFLR was issued or not * - * Returns true if the VF is in reset, resets successfully, or resets - * are disabled and false otherwise. + * Return: True if reset was performed successfully or if resets are disabled. + * False if reset is already in progress. **/ bool i40e_reset_vf(struct i40e_vf *vf, bool flr) { @@ -1566,7 +1566,7 @@ bool i40e_reset_vf(struct i40e_vf *vf, bool flr) /* If VF is being reset already we don't need to continue. */ if (test_and_set_bit(I40E_VF_STATE_RESETTING, &vf->vf_states)) - return true; + return false; i40e_trigger_vf_reset(vf, flr); @@ -4328,7 +4328,10 @@ int i40e_vc_process_vflr_event(struct i40e_pf *pf) reg = rd32(hw, I40E_GLGEN_VFLRSTAT(reg_idx)); if (reg & BIT(bit_idx)) /* i40e_reset_vf will clear the bit in GLGEN_VFLRSTAT */ - i40e_reset_vf(vf, true); + if (!i40e_reset_vf(vf, true)) { + /* At least one VF did not finish resetting, retry next time */ + set_bit(__I40E_VFLR_EVENT_PENDING, pf->state); + } } return 0; diff --git a/drivers/net/ethernet/intel/iavf/iavf_main.c b/drivers/net/ethernet/intel/iavf/iavf_main.c index 2c0bb41809a4..81d7249d1149 100644 --- a/drivers/net/ethernet/intel/iavf/iavf_main.c +++ b/drivers/net/ethernet/intel/iavf/iavf_main.c @@ -3209,6 +3209,17 @@ static void iavf_reset_task(struct work_struct *work) } continue_reset: + /* If we are still early in the state machine, just restart. */ + if (adapter->state <= __IAVF_INIT_FAILED) { + iavf_shutdown_adminq(hw); + iavf_change_state(adapter, __IAVF_STARTUP); + iavf_startup(adapter); + queue_delayed_work(adapter->wq, &adapter->watchdog_task, + msecs_to_jiffies(30)); + netdev_unlock(netdev); + return; + } + /* We don't use netif_running() because it may be true prior to * ndo_open() returning, so we can't assume it means all our open * tasks have finished, since we're not holding the rtnl_lock here. diff --git a/drivers/net/ethernet/intel/iavf/iavf_virtchnl.c b/drivers/net/ethernet/intel/iavf/iavf_virtchnl.c index a6f0e5990be2..07f0d0a0f1e2 100644 --- a/drivers/net/ethernet/intel/iavf/iavf_virtchnl.c +++ b/drivers/net/ethernet/intel/iavf/iavf_virtchnl.c @@ -79,6 +79,23 @@ iavf_poll_virtchnl_msg(struct iavf_hw *hw, struct iavf_arq_event_info *event, return iavf_status_to_errno(status); received_op = (enum virtchnl_ops)le32_to_cpu(event->desc.cookie_high); + + if (received_op == VIRTCHNL_OP_EVENT) { + struct iavf_adapter *adapter = hw->back; + struct virtchnl_pf_event *vpe = + (struct virtchnl_pf_event *)event->msg_buf; + + if (vpe->event != VIRTCHNL_EVENT_RESET_IMPENDING) + continue; + + dev_info(&adapter->pdev->dev, "Reset indication received from the PF\n"); + if (!(adapter->flags & IAVF_FLAG_RESET_PENDING)) + iavf_schedule_reset(adapter, + IAVF_FLAG_RESET_PENDING); + + return -EIO; + } + if (op_to_poll == received_op) break; } diff --git a/drivers/net/ethernet/intel/ice/ice_arfs.c b/drivers/net/ethernet/intel/ice/ice_arfs.c index 2bc5c7f59844..1f7834c03550 100644 --- a/drivers/net/ethernet/intel/ice/ice_arfs.c +++ b/drivers/net/ethernet/intel/ice/ice_arfs.c @@ -378,6 +378,50 @@ ice_arfs_is_perfect_flow_set(struct ice_hw *hw, __be16 l3_proto, u8 l4_proto) } /** + * ice_arfs_cmp - Check if aRFS filter matches this flow. + * @fltr_info: filter info of the saved ARFS entry. + * @fk: flow dissector keys. + * @n_proto: One of htons(ETH_P_IP) or htons(ETH_P_IPV6). + * @ip_proto: One of IPPROTO_TCP or IPPROTO_UDP. + * + * Since this function assumes limited values for n_proto and ip_proto, it + * is meant to be called only from ice_rx_flow_steer(). + * + * Return: + * * true - fltr_info refers to the same flow as fk. + * * false - fltr_info and fk refer to different flows. + */ +static bool +ice_arfs_cmp(const struct ice_fdir_fltr *fltr_info, const struct flow_keys *fk, + __be16 n_proto, u8 ip_proto) +{ + /* Determine if the filter is for IPv4 or IPv6 based on flow_type, + * which is one of ICE_FLTR_PTYPE_NONF_IPV{4,6}_{TCP,UDP}. + */ + bool is_v4 = fltr_info->flow_type == ICE_FLTR_PTYPE_NONF_IPV4_TCP || + fltr_info->flow_type == ICE_FLTR_PTYPE_NONF_IPV4_UDP; + + /* Following checks are arranged in the quickest and most discriminative + * fields first for early failure. + */ + if (is_v4) + return n_proto == htons(ETH_P_IP) && + fltr_info->ip.v4.src_port == fk->ports.src && + fltr_info->ip.v4.dst_port == fk->ports.dst && + fltr_info->ip.v4.src_ip == fk->addrs.v4addrs.src && + fltr_info->ip.v4.dst_ip == fk->addrs.v4addrs.dst && + fltr_info->ip.v4.proto == ip_proto; + + return fltr_info->ip.v6.src_port == fk->ports.src && + fltr_info->ip.v6.dst_port == fk->ports.dst && + fltr_info->ip.v6.proto == ip_proto && + !memcmp(&fltr_info->ip.v6.src_ip, &fk->addrs.v6addrs.src, + sizeof(struct in6_addr)) && + !memcmp(&fltr_info->ip.v6.dst_ip, &fk->addrs.v6addrs.dst, + sizeof(struct in6_addr)); +} + +/** * ice_rx_flow_steer - steer the Rx flow to where application is being run * @netdev: ptr to the netdev being adjusted * @skb: buffer with required header information @@ -448,6 +492,10 @@ ice_rx_flow_steer(struct net_device *netdev, const struct sk_buff *skb, continue; fltr_info = &arfs_entry->fltr_info; + + if (!ice_arfs_cmp(fltr_info, &fk, n_proto, ip_proto)) + continue; + ret = fltr_info->fltr_id; if (fltr_info->q_index == rxq_idx || diff --git a/drivers/net/ethernet/intel/ice/ice_eswitch.c b/drivers/net/ethernet/intel/ice/ice_eswitch.c index 6aae03771746..2e4f0969035f 100644 --- a/drivers/net/ethernet/intel/ice/ice_eswitch.c +++ b/drivers/net/ethernet/intel/ice/ice_eswitch.c @@ -508,10 +508,14 @@ err_create_repr: */ int ice_eswitch_attach_vf(struct ice_pf *pf, struct ice_vf *vf) { - struct ice_repr *repr = ice_repr_create_vf(vf); struct devlink *devlink = priv_to_devlink(pf); + struct ice_repr *repr; int err; + if (!ice_is_eswitch_mode_switchdev(pf)) + return 0; + + repr = ice_repr_create_vf(vf); if (IS_ERR(repr)) return PTR_ERR(repr); diff --git a/drivers/net/ethernet/intel/ice/ice_ptp.c b/drivers/net/ethernet/intel/ice/ice_ptp.c index b79a148ed0f2..55cad824c5b9 100644 --- a/drivers/net/ethernet/intel/ice/ice_ptp.c +++ b/drivers/net/ethernet/intel/ice/ice_ptp.c @@ -2299,6 +2299,7 @@ static int ice_capture_crosststamp(ktime_t *device, ts = ((u64)ts_hi << 32) | ts_lo; system->cycles = ts; system->cs_id = CSID_X86_ART; + system->use_nsecs = true; /* Read Device source clock time */ ts_lo = rd32(hw, cfg->dev_time_l[tmr_idx]); diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.c index 6f572589f1e5..6b5c9536d26d 100644 --- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.c +++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.c @@ -1822,7 +1822,7 @@ int otx2_nix_config_bp(struct otx2_nic *pfvf, bool enable) req->chan_cnt = IEEE_8021QAZ_MAX_TCS; req->bpid_per_chan = 1; } else { - req->chan_cnt = 1; + req->chan_cnt = pfvf->hw.rx_chan_cnt; req->bpid_per_chan = 0; } @@ -1847,7 +1847,7 @@ int otx2_nix_cpt_config_bp(struct otx2_nic *pfvf, bool enable) req->chan_cnt = IEEE_8021QAZ_MAX_TCS; req->bpid_per_chan = 1; } else { - req->chan_cnt = 1; + req->chan_cnt = pfvf->hw.rx_chan_cnt; req->bpid_per_chan = 0; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c index ea078c9f5d15..3cb8d3bf9044 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c @@ -43,7 +43,6 @@ #include "en/fs_ethtool.h" #define LANES_UNKNOWN 0 -#define MAX_LANES 8 void mlx5e_ethtool_get_drvinfo(struct mlx5e_priv *priv, struct ethtool_drvinfo *drvinfo) @@ -1098,10 +1097,8 @@ static void get_link_properties(struct net_device *netdev, speed = info->speed; lanes = info->lanes; duplex = DUPLEX_FULL; - } else if (data_rate_oper) { + } else if (data_rate_oper) speed = 100 * data_rate_oper; - lanes = MAX_LANES; - } out: link_ksettings->base.duplex = duplex; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c index f1d908f61134..fef418e1ed1a 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c @@ -2028,9 +2028,8 @@ err_out: return err; } -static bool mlx5_flow_has_geneve_opt(struct mlx5e_tc_flow *flow) +static bool mlx5_flow_has_geneve_opt(struct mlx5_flow_spec *spec) { - struct mlx5_flow_spec *spec = &flow->attr->parse_attr->spec; void *headers_v = MLX5_ADDR_OF(fte_match_param, spec->match_value, misc_parameters_3); @@ -2069,7 +2068,7 @@ static void mlx5e_tc_del_fdb_flow(struct mlx5e_priv *priv, } complete_all(&flow->del_hw_done); - if (mlx5_flow_has_geneve_opt(flow)) + if (mlx5_flow_has_geneve_opt(&attr->parse_attr->spec)) mlx5_geneve_tlv_option_del(priv->mdev->geneve); if (flow->decap_route) @@ -2574,12 +2573,13 @@ static int parse_tunnel_attr(struct mlx5e_priv *priv, err = mlx5e_tc_tun_parse(filter_dev, priv, tmp_spec, f, match_level); if (err) { - kvfree(tmp_spec); NL_SET_ERR_MSG_MOD(extack, "Failed to parse tunnel attributes"); netdev_warn(priv->netdev, "Failed to parse tunnel attributes"); - return err; + } else { + err = mlx5e_tc_set_attr_rx_tun(flow, tmp_spec); } - err = mlx5e_tc_set_attr_rx_tun(flow, tmp_spec); + if (mlx5_flow_has_geneve_opt(tmp_spec)) + mlx5_geneve_tlv_option_del(priv->mdev->geneve); kvfree(tmp_spec); if (err) return err; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c index 7fb8a3381f84..4917d185d0c3 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c @@ -1295,12 +1295,15 @@ mlx5_eswitch_enable_pf_vf_vports(struct mlx5_eswitch *esw, ret = mlx5_eswitch_load_pf_vf_vport(esw, MLX5_VPORT_ECPF, enabled_events); if (ret) goto ecpf_err; - if (mlx5_core_ec_sriov_enabled(esw->dev)) { - ret = mlx5_eswitch_load_ec_vf_vports(esw, esw->esw_funcs.num_ec_vfs, - enabled_events); - if (ret) - goto ec_vf_err; - } + } + + /* Enable ECVF vports */ + if (mlx5_core_ec_sriov_enabled(esw->dev)) { + ret = mlx5_eswitch_load_ec_vf_vports(esw, + esw->esw_funcs.num_ec_vfs, + enabled_events); + if (ret) + goto ec_vf_err; } /* Enable VF vports */ @@ -1331,9 +1334,11 @@ void mlx5_eswitch_disable_pf_vf_vports(struct mlx5_eswitch *esw) { mlx5_eswitch_unload_vf_vports(esw, esw->esw_funcs.num_vfs); + if (mlx5_core_ec_sriov_enabled(esw->dev)) + mlx5_eswitch_unload_ec_vf_vports(esw, + esw->esw_funcs.num_ec_vfs); + if (mlx5_ecpf_vport_exists(esw->dev)) { - if (mlx5_core_ec_sriov_enabled(esw->dev)) - mlx5_eswitch_unload_ec_vf_vports(esw, esw->esw_funcs.num_vfs); mlx5_eswitch_unload_pf_vf_vport(esw, MLX5_VPORT_ECPF); } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c index 23a7e8e7adfa..a8046200d376 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c @@ -2228,6 +2228,7 @@ try_add_to_existing_fg(struct mlx5_flow_table *ft, struct mlx5_flow_handle *rule; struct match_list *iter; bool take_write = false; + bool try_again = false; struct fs_fte *fte; u64 version = 0; int err; @@ -2292,6 +2293,7 @@ skip_search: nested_down_write_ref_node(&g->node, FS_LOCK_PARENT); if (!g->node.active) { + try_again = true; up_write_ref_node(&g->node, false); continue; } @@ -2313,7 +2315,8 @@ skip_search: tree_put_node(&fte->node, false); return rule; } - rule = ERR_PTR(-ENOENT); + err = try_again ? -EAGAIN : -ENOENT; + rule = ERR_PTR(err); out: kmem_cache_free(steering->ftes_cache, fte); return rule; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c b/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c index 972e8e9df585..9bc9bd83c232 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c @@ -291,7 +291,7 @@ static void free_4k(struct mlx5_core_dev *dev, u64 addr, u32 function) static int alloc_system_page(struct mlx5_core_dev *dev, u32 function) { struct device *device = mlx5_core_dma_dev(dev); - int nid = dev_to_node(device); + int nid = dev->priv.numa_node; struct page *page; u64 zero_addr = 1; u64 addr; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/action.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/action.c index fb62f3bc4bd4..447ea3f8722c 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/action.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/action.c @@ -1370,8 +1370,8 @@ mlx5hws_action_create_dest_array(struct mlx5hws_context *ctx, struct mlx5hws_cmd_set_fte_attr fte_attr = {0}; struct mlx5hws_cmd_forward_tbl *fw_island; struct mlx5hws_action *action; - u32 i /*, packet_reformat_id*/; - int ret; + int ret, last_dest_idx = -1; + u32 i; if (num_dest <= 1) { mlx5hws_err(ctx, "Action must have multiple dests\n"); @@ -1401,11 +1401,8 @@ mlx5hws_action_create_dest_array(struct mlx5hws_context *ctx, dest_list[i].destination_id = dests[i].dest->dest_obj.obj_id; fte_attr.action_flags |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST; fte_attr.ignore_flow_level = ignore_flow_level; - /* ToDo: In SW steering we have a handling of 'go to WIRE' - * destination here by upper layer setting 'is_wire_ft' flag - * if the destination is wire. - * This is because uplink should be last dest in the list. - */ + if (dests[i].is_wire_ft) + last_dest_idx = i; break; case MLX5HWS_ACTION_TYP_VPORT: dest_list[i].destination_type = MLX5_FLOW_DESTINATION_TYPE_VPORT; @@ -1429,6 +1426,9 @@ mlx5hws_action_create_dest_array(struct mlx5hws_context *ctx, } } + if (last_dest_idx != -1) + swap(dest_list[last_dest_idx], dest_list[num_dest - 1]); + fte_attr.dests_num = num_dest; fte_attr.dests = dest_list; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/bwc_complex.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/bwc_complex.c index 70768953a4f6..ca7501c57468 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/bwc_complex.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/bwc_complex.c @@ -1070,7 +1070,7 @@ hws_bwc_rule_complex_hash_node_get(struct mlx5hws_bwc_rule *bwc_rule, struct mlx5hws_bwc_matcher *bwc_matcher = bwc_rule->bwc_matcher; struct mlx5hws_bwc_complex_rule_hash_node *node, *old_node; struct rhashtable *refcount_hash; - int i; + int ret, i; bwc_rule->complex_hash_node = NULL; @@ -1078,7 +1078,11 @@ hws_bwc_rule_complex_hash_node_get(struct mlx5hws_bwc_rule *bwc_rule, if (unlikely(!node)) return -ENOMEM; - node->tag = ida_alloc(&bwc_matcher->complex->metadata_ida, GFP_KERNEL); + ret = ida_alloc(&bwc_matcher->complex->metadata_ida, GFP_KERNEL); + if (ret < 0) + goto err_free_node; + node->tag = ret; + refcount_set(&node->refcount, 1); /* Clear match buffer - turn off all the unrelated fields @@ -1094,6 +1098,11 @@ hws_bwc_rule_complex_hash_node_get(struct mlx5hws_bwc_rule *bwc_rule, old_node = rhashtable_lookup_get_insert_fast(refcount_hash, &node->hash_node, hws_refcount_hash); + if (IS_ERR(old_node)) { + ret = PTR_ERR(old_node); + goto err_free_ida; + } + if (old_node) { /* Rule with the same tag already exists - update refcount */ refcount_inc(&old_node->refcount); @@ -1112,6 +1121,12 @@ hws_bwc_rule_complex_hash_node_get(struct mlx5hws_bwc_rule *bwc_rule, bwc_rule->complex_hash_node = node; return 0; + +err_free_ida: + ida_free(&bwc_matcher->complex->metadata_ida, node->tag); +err_free_node: + kfree(node); + return ret; } static void diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/definer.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/definer.c index 5cc0dc002ac1..d45e1145d197 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/definer.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/definer.c @@ -785,6 +785,9 @@ hws_definer_conv_outer(struct mlx5hws_definer_conv_data *cd, HWS_SET_HDR(fc, match_param, IP_PROTOCOL_O, outer_headers.ip_protocol, eth_l3_outer.protocol_next_header); + HWS_SET_HDR(fc, match_param, IP_VERSION_O, + outer_headers.ip_version, + eth_l3_outer.ip_version); HWS_SET_HDR(fc, match_param, IP_TTL_O, outer_headers.ttl_hoplimit, eth_l3_outer.time_to_live_hop_limit); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/fs_hws.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/fs_hws.c index 9d1c0e4b224a..bf4643d0ce17 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/fs_hws.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/fs_hws.c @@ -966,6 +966,9 @@ static int mlx5_fs_fte_get_hws_actions(struct mlx5_flow_root_namespace *ns, switch (attr->type) { case MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE: dest_action = mlx5_fs_get_dest_action_ft(fs_ctx, dst); + if (dst->dest_attr.ft->flags & + MLX5_FLOW_TABLE_UPLINK_VPORT) + dest_actions[num_dest_actions].is_wire_ft = true; break; case MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE_NUM: dest_action = mlx5_fs_get_dest_action_table_num(fs_ctx, @@ -1357,6 +1360,7 @@ mlx5_cmd_hws_packet_reformat_alloc(struct mlx5_flow_root_namespace *ns, pkt_reformat->fs_hws_action.pr_data = pr_data; } + mutex_init(&pkt_reformat->fs_hws_action.lock); pkt_reformat->owner = MLX5_FLOW_RESOURCE_OWNER_HWS; pkt_reformat->fs_hws_action.hws_action = hws_action; return 0; @@ -1503,7 +1507,6 @@ static int mlx5_cmd_hws_modify_header_alloc(struct mlx5_flow_root_namespace *ns, err = -ENOMEM; goto release_mh; } - mutex_init(&modify_hdr->fs_hws_action.lock); modify_hdr->fs_hws_action.mh_data = mh_data; modify_hdr->fs_hws_action.fs_pool = pool; modify_hdr->owner = MLX5_FLOW_RESOURCE_OWNER_SW; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/mlx5hws.h b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/mlx5hws.h index 9bbadc4d8a0b..d8ac6c196211 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/mlx5hws.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/mlx5hws.h @@ -213,6 +213,7 @@ struct mlx5hws_action_dest_attr { struct mlx5hws_action *dest; /* Optional reformat action */ struct mlx5hws_action *reformat; + bool is_wire_ft; }; /** diff --git a/drivers/net/ethernet/mellanox/mlxbf_gige/mlxbf_gige_main.c b/drivers/net/ethernet/mellanox/mlxbf_gige/mlxbf_gige_main.c index fb2e5b844c15..d76d7a945899 100644 --- a/drivers/net/ethernet/mellanox/mlxbf_gige/mlxbf_gige_main.c +++ b/drivers/net/ethernet/mellanox/mlxbf_gige/mlxbf_gige_main.c @@ -447,8 +447,10 @@ static int mlxbf_gige_probe(struct platform_device *pdev) priv->llu_plu_irq = platform_get_irq(pdev, MLXBF_GIGE_LLU_PLU_INTR_IDX); phy_irq = acpi_dev_gpio_irq_get_by(ACPI_COMPANION(&pdev->dev), "phy", 0); - if (phy_irq < 0) { - dev_err(&pdev->dev, "Error getting PHY irq. Use polling instead"); + if (phy_irq == -EPROBE_DEFER) { + err = -EPROBE_DEFER; + goto out; + } else if (phy_irq < 0) { phy_irq = PHY_POLL; } diff --git a/drivers/net/ethernet/meta/fbnic/fbnic_fw.c b/drivers/net/ethernet/meta/fbnic/fbnic_fw.c index e2368075ab8c..4521d0483d18 100644 --- a/drivers/net/ethernet/meta/fbnic/fbnic_fw.c +++ b/drivers/net/ethernet/meta/fbnic/fbnic_fw.c @@ -127,11 +127,8 @@ static int fbnic_mbx_map_msg(struct fbnic_dev *fbd, int mbx_idx, return -EBUSY; addr = dma_map_single(fbd->dev, msg, PAGE_SIZE, direction); - if (dma_mapping_error(fbd->dev, addr)) { - free_page((unsigned long)msg); - + if (dma_mapping_error(fbd->dev, addr)) return -ENOSPC; - } mbx->buf_info[tail].msg = msg; mbx->buf_info[tail].addr = addr; diff --git a/drivers/net/ethernet/microchip/lan743x_ptp.h b/drivers/net/ethernet/microchip/lan743x_ptp.h index e8d073bfa2ca..f33dc83c5700 100644 --- a/drivers/net/ethernet/microchip/lan743x_ptp.h +++ b/drivers/net/ethernet/microchip/lan743x_ptp.h @@ -18,9 +18,9 @@ */ #define LAN743X_PTP_N_EVENT_CHAN 2 #define LAN743X_PTP_N_PEROUT LAN743X_PTP_N_EVENT_CHAN -#define LAN743X_PTP_N_EXTTS 4 -#define LAN743X_PTP_N_PPS 0 #define PCI11X1X_PTP_IO_MAX_CHANNELS 8 +#define LAN743X_PTP_N_EXTTS PCI11X1X_PTP_IO_MAX_CHANNELS +#define LAN743X_PTP_N_PPS 0 #define PTP_CMD_CTL_TIMEOUT_CNT 50 struct lan743x_adapter; diff --git a/drivers/net/ethernet/microsoft/mana/gdma_main.c b/drivers/net/ethernet/microsoft/mana/gdma_main.c index 3504507477c6..52cf7112762c 100644 --- a/drivers/net/ethernet/microsoft/mana/gdma_main.c +++ b/drivers/net/ethernet/microsoft/mana/gdma_main.c @@ -31,6 +31,9 @@ static void mana_gd_init_pf_regs(struct pci_dev *pdev) gc->db_page_base = gc->bar0_va + mana_gd_r64(gc, GDMA_PF_REG_DB_PAGE_OFF); + gc->phys_db_page_base = gc->bar0_pa + + mana_gd_r64(gc, GDMA_PF_REG_DB_PAGE_OFF); + sriov_base_off = mana_gd_r64(gc, GDMA_SRIOV_REG_CFG_BASE_OFF); sriov_base_va = gc->bar0_va + sriov_base_off; diff --git a/drivers/net/ethernet/pensando/ionic/ionic_main.c b/drivers/net/ethernet/pensando/ionic/ionic_main.c index daf1e82cb76b..0e60a6bef99a 100644 --- a/drivers/net/ethernet/pensando/ionic/ionic_main.c +++ b/drivers/net/ethernet/pensando/ionic/ionic_main.c @@ -516,9 +516,9 @@ static int __ionic_dev_cmd_wait(struct ionic *ionic, unsigned long max_seconds, unsigned long start_time; unsigned long max_wait; unsigned long duration; - int done = 0; bool fw_up; int opcode; + bool done; int err; /* Wait for dev cmd to complete, retrying if we get EAGAIN, @@ -526,6 +526,7 @@ static int __ionic_dev_cmd_wait(struct ionic *ionic, unsigned long max_seconds, */ max_wait = jiffies + (max_seconds * HZ); try_again: + done = false; opcode = idev->opcode; start_time = jiffies; for (fw_up = ionic_is_fw_running(idev); diff --git a/drivers/net/ethernet/pensando/ionic/ionic_txrx.c b/drivers/net/ethernet/pensando/ionic/ionic_txrx.c index 2ac59564ded1..d10b58ebf603 100644 --- a/drivers/net/ethernet/pensando/ionic/ionic_txrx.c +++ b/drivers/net/ethernet/pensando/ionic/ionic_txrx.c @@ -321,7 +321,7 @@ static int ionic_xdp_post_frame(struct ionic_queue *q, struct xdp_frame *frame, len, DMA_TO_DEVICE); } else /* XDP_REDIRECT */ { dma_addr = ionic_tx_map_single(q, frame->data, len); - if (!dma_addr) + if (dma_addr == DMA_MAPPING_ERROR) return -EIO; } @@ -357,7 +357,7 @@ static int ionic_xdp_post_frame(struct ionic_queue *q, struct xdp_frame *frame, } else { dma_addr = ionic_tx_map_frag(q, frag, 0, skb_frag_size(frag)); - if (dma_mapping_error(q->dev, dma_addr)) { + if (dma_addr == DMA_MAPPING_ERROR) { ionic_tx_desc_unmap_bufs(q, desc_info); return -EIO; } @@ -1083,7 +1083,7 @@ static dma_addr_t ionic_tx_map_single(struct ionic_queue *q, net_warn_ratelimited("%s: DMA single map failed on %s!\n", dev_name(dev), q->name); q_to_tx_stats(q)->dma_map_err++; - return 0; + return DMA_MAPPING_ERROR; } return dma_addr; } @@ -1100,7 +1100,7 @@ static dma_addr_t ionic_tx_map_frag(struct ionic_queue *q, net_warn_ratelimited("%s: DMA frag map failed on %s!\n", dev_name(dev), q->name); q_to_tx_stats(q)->dma_map_err++; - return 0; + return DMA_MAPPING_ERROR; } return dma_addr; } @@ -1116,7 +1116,7 @@ static int ionic_tx_map_skb(struct ionic_queue *q, struct sk_buff *skb, int frag_idx; dma_addr = ionic_tx_map_single(q, skb->data, skb_headlen(skb)); - if (!dma_addr) + if (dma_addr == DMA_MAPPING_ERROR) return -EIO; buf_info->dma_addr = dma_addr; buf_info->len = skb_headlen(skb); @@ -1126,7 +1126,7 @@ static int ionic_tx_map_skb(struct ionic_queue *q, struct sk_buff *skb, nfrags = skb_shinfo(skb)->nr_frags; for (frag_idx = 0; frag_idx < nfrags; frag_idx++, frag++) { dma_addr = ionic_tx_map_frag(q, frag, 0, skb_frag_size(frag)); - if (!dma_addr) + if (dma_addr == DMA_MAPPING_ERROR) goto dma_fail; buf_info->dma_addr = dma_addr; buf_info->len = skb_frag_size(frag); diff --git a/drivers/net/ethernet/qlogic/qed/qed_mng_tlv.c b/drivers/net/ethernet/qlogic/qed/qed_mng_tlv.c index f55eed092f25..7d78f072b0a1 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_mng_tlv.c +++ b/drivers/net/ethernet/qlogic/qed/qed_mng_tlv.c @@ -242,7 +242,7 @@ static int qed_mfw_get_tlv_group(u8 tlv_type, u8 *tlv_group) } /* Returns size of the data buffer or, -1 in case TLV data is not available. */ -static int +static noinline_for_stack int qed_mfw_get_gen_tlv_value(struct qed_drv_tlv_hdr *p_tlv, struct qed_mfw_tlv_generic *p_drv_buf, struct qed_tlv_parsed_buf *p_buf) @@ -304,7 +304,7 @@ qed_mfw_get_gen_tlv_value(struct qed_drv_tlv_hdr *p_tlv, return -1; } -static int +static noinline_for_stack int qed_mfw_get_eth_tlv_value(struct qed_drv_tlv_hdr *p_tlv, struct qed_mfw_tlv_eth *p_drv_buf, struct qed_tlv_parsed_buf *p_buf) @@ -438,7 +438,7 @@ qed_mfw_get_tlv_time_value(struct qed_mfw_tlv_time *p_time, return QED_MFW_TLV_TIME_SIZE; } -static int +static noinline_for_stack int qed_mfw_get_fcoe_tlv_value(struct qed_drv_tlv_hdr *p_tlv, struct qed_mfw_tlv_fcoe *p_drv_buf, struct qed_tlv_parsed_buf *p_buf) @@ -1073,7 +1073,7 @@ qed_mfw_get_fcoe_tlv_value(struct qed_drv_tlv_hdr *p_tlv, return -1; } -static int +static noinline_for_stack int qed_mfw_get_iscsi_tlv_value(struct qed_drv_tlv_hdr *p_tlv, struct qed_mfw_tlv_iscsi *p_drv_buf, struct qed_tlv_parsed_buf *p_buf) diff --git a/drivers/net/ethernet/ti/icssg/icssg_common.c b/drivers/net/ethernet/ti/icssg/icssg_common.c index 5b8fdb882172..12f25cec6255 100644 --- a/drivers/net/ethernet/ti/icssg/icssg_common.c +++ b/drivers/net/ethernet/ti/icssg/icssg_common.c @@ -98,20 +98,11 @@ void prueth_xmit_free(struct prueth_tx_chn *tx_chn, { struct cppi5_host_desc_t *first_desc, *next_desc; dma_addr_t buf_dma, next_desc_dma; - struct prueth_swdata *swdata; - struct page *page; u32 buf_dma_len; first_desc = desc; next_desc = first_desc; - swdata = cppi5_hdesc_get_swdata(desc); - if (swdata->type == PRUETH_SWDATA_PAGE) { - page = swdata->data.page; - page_pool_recycle_direct(page->pp, swdata->data.page); - goto free_desc; - } - cppi5_hdesc_get_obuf(first_desc, &buf_dma, &buf_dma_len); k3_udma_glue_tx_cppi5_to_dma_addr(tx_chn->tx_chn, &buf_dma); @@ -135,7 +126,6 @@ void prueth_xmit_free(struct prueth_tx_chn *tx_chn, k3_cppi_desc_pool_free(tx_chn->desc_pool, next_desc); } -free_desc: k3_cppi_desc_pool_free(tx_chn->desc_pool, first_desc); } EXPORT_SYMBOL_GPL(prueth_xmit_free); @@ -612,13 +602,8 @@ u32 emac_xmit_xdp_frame(struct prueth_emac *emac, k3_udma_glue_tx_dma_to_cppi5_addr(tx_chn->tx_chn, &buf_dma); cppi5_hdesc_attach_buf(first_desc, buf_dma, xdpf->len, buf_dma, xdpf->len); swdata = cppi5_hdesc_get_swdata(first_desc); - if (page) { - swdata->type = PRUETH_SWDATA_PAGE; - swdata->data.page = page; - } else { - swdata->type = PRUETH_SWDATA_XDPF; - swdata->data.xdpf = xdpf; - } + swdata->type = PRUETH_SWDATA_XDPF; + swdata->data.xdpf = xdpf; /* Report BQL before sending the packet */ netif_txq = netdev_get_tx_queue(ndev, tx_chn->id); diff --git a/drivers/net/ethernet/wangxun/libwx/wx_lib.c b/drivers/net/ethernet/wangxun/libwx/wx_lib.c index 7f2e6cddfeb1..c57cc4f27249 100644 --- a/drivers/net/ethernet/wangxun/libwx/wx_lib.c +++ b/drivers/net/ethernet/wangxun/libwx/wx_lib.c @@ -2623,7 +2623,7 @@ static int wx_alloc_page_pool(struct wx_ring *rx_ring) struct page_pool_params pp_params = { .flags = PP_FLAG_DMA_MAP | PP_FLAG_DMA_SYNC_DEV, .order = 0, - .pool_size = rx_ring->size, + .pool_size = rx_ring->count, .nid = dev_to_node(rx_ring->dev), .dev = rx_ring->dev, .dma_dir = DMA_FROM_DEVICE, diff --git a/drivers/net/macsec.c b/drivers/net/macsec.c index 3d315e30ee47..7edbe76b5455 100644 --- a/drivers/net/macsec.c +++ b/drivers/net/macsec.c @@ -247,15 +247,39 @@ static sci_t make_sci(const u8 *addr, __be16 port) return sci; } -static sci_t macsec_frame_sci(struct macsec_eth_header *hdr, bool sci_present) +static sci_t macsec_active_sci(struct macsec_secy *secy) { - sci_t sci; + struct macsec_rx_sc *rx_sc = rcu_dereference_bh(secy->rx_sc); + + /* Case single RX SC */ + if (rx_sc && !rcu_dereference_bh(rx_sc->next)) + return (rx_sc->active) ? rx_sc->sci : 0; + /* Case no RX SC or multiple */ + else + return 0; +} + +static sci_t macsec_frame_sci(struct macsec_eth_header *hdr, bool sci_present, + struct macsec_rxh_data *rxd) +{ + struct macsec_dev *macsec; + sci_t sci = 0; - if (sci_present) + /* SC = 1 */ + if (sci_present) { memcpy(&sci, hdr->secure_channel_id, sizeof(hdr->secure_channel_id)); - else + /* SC = 0; ES = 0 */ + } else if ((!(hdr->tci_an & (MACSEC_TCI_ES | MACSEC_TCI_SC))) && + (list_is_singular(&rxd->secys))) { + /* Only one SECY should exist on this scenario */ + macsec = list_first_or_null_rcu(&rxd->secys, struct macsec_dev, + secys); + if (macsec) + return macsec_active_sci(&macsec->secy); + } else { sci = make_sci(hdr->eth.h_source, MACSEC_PORT_ES); + } return sci; } @@ -1109,7 +1133,7 @@ static rx_handler_result_t macsec_handle_frame(struct sk_buff **pskb) struct macsec_rxh_data *rxd; struct macsec_dev *macsec; unsigned int len; - sci_t sci; + sci_t sci = 0; u32 hdr_pn; bool cbit; struct pcpu_rx_sc_stats *rxsc_stats; @@ -1156,11 +1180,14 @@ static rx_handler_result_t macsec_handle_frame(struct sk_buff **pskb) macsec_skb_cb(skb)->has_sci = !!(hdr->tci_an & MACSEC_TCI_SC); macsec_skb_cb(skb)->assoc_num = hdr->tci_an & MACSEC_AN_MASK; - sci = macsec_frame_sci(hdr, macsec_skb_cb(skb)->has_sci); rcu_read_lock(); rxd = macsec_data_rcu(skb->dev); + sci = macsec_frame_sci(hdr, macsec_skb_cb(skb)->has_sci, rxd); + if (!sci) + goto drop_nosc; + list_for_each_entry_rcu(macsec, &rxd->secys, secys) { struct macsec_rx_sc *sc = find_rx_sc(&macsec->secy, sci); @@ -1283,6 +1310,7 @@ drop: macsec_rxsa_put(rx_sa); drop_nosa: macsec_rxsc_put(rx_sc); +drop_nosc: rcu_read_unlock(); drop_direct: kfree_skb(skb); diff --git a/drivers/net/netconsole.c b/drivers/net/netconsole.c index 4289ccd3e41b..176935a8645f 100644 --- a/drivers/net/netconsole.c +++ b/drivers/net/netconsole.c @@ -1252,7 +1252,6 @@ static int sysdata_append_release(struct netconsole_target *nt, int offset) */ static int prepare_extradata(struct netconsole_target *nt) { - u32 fields = SYSDATA_CPU_NR | SYSDATA_TASKNAME; int extradata_len; /* userdata was appended when configfs write helper was called @@ -1260,7 +1259,7 @@ static int prepare_extradata(struct netconsole_target *nt) */ extradata_len = nt->userdata_length; - if (!(nt->sysdata_fields & fields)) + if (!nt->sysdata_fields) goto out; if (nt->sysdata_fields & SYSDATA_CPU_NR) diff --git a/drivers/net/netdevsim/netdev.c b/drivers/net/netdevsim/netdev.c index af545d42961c..fa5fbd97ad69 100644 --- a/drivers/net/netdevsim/netdev.c +++ b/drivers/net/netdevsim/netdev.c @@ -371,7 +371,8 @@ static int nsim_poll(struct napi_struct *napi, int budget) int done; done = nsim_rcv(rq, budget); - napi_complete(napi); + if (done < budget) + napi_complete_done(napi, done); return done; } diff --git a/drivers/net/phy/mdio_bus.c b/drivers/net/phy/mdio_bus.c index a6bcb0fee863..fda2e27c1810 100644 --- a/drivers/net/phy/mdio_bus.c +++ b/drivers/net/phy/mdio_bus.c @@ -445,6 +445,9 @@ int __mdiobus_read(struct mii_bus *bus, int addr, u32 regnum) lockdep_assert_held_once(&bus->mdio_lock); + if (addr >= PHY_MAX_ADDR) + return -ENXIO; + if (bus->read) retval = bus->read(bus, addr, regnum); else @@ -474,6 +477,9 @@ int __mdiobus_write(struct mii_bus *bus, int addr, u32 regnum, u16 val) lockdep_assert_held_once(&bus->mdio_lock); + if (addr >= PHY_MAX_ADDR) + return -ENXIO; + if (bus->write) err = bus->write(bus, addr, regnum, val); else @@ -535,6 +541,9 @@ int __mdiobus_c45_read(struct mii_bus *bus, int addr, int devad, u32 regnum) lockdep_assert_held_once(&bus->mdio_lock); + if (addr >= PHY_MAX_ADDR) + return -ENXIO; + if (bus->read_c45) retval = bus->read_c45(bus, addr, devad, regnum); else @@ -566,6 +575,9 @@ int __mdiobus_c45_write(struct mii_bus *bus, int addr, int devad, u32 regnum, lockdep_assert_held_once(&bus->mdio_lock); + if (addr >= PHY_MAX_ADDR) + return -ENXIO; + if (bus->write_c45) err = bus->write_c45(bus, addr, devad, regnum, val); else diff --git a/drivers/net/phy/phy_caps.c b/drivers/net/phy/phy_caps.c index 703321689726..38417e288611 100644 --- a/drivers/net/phy/phy_caps.c +++ b/drivers/net/phy/phy_caps.c @@ -188,6 +188,9 @@ phy_caps_lookup_by_linkmode_rev(const unsigned long *linkmodes, bool fdx_only) * When @exact is not set, we return either an exact match, or matching capabilities * at lower speed, or the lowest matching speed, or NULL. * + * Non-exact matches will try to return an exact speed and duplex match, but may + * return matching capabilities with same speed but a different duplex. + * * Returns: a matched link_capabilities according to the above process, NULL * otherwise. */ @@ -195,7 +198,7 @@ const struct link_capabilities * phy_caps_lookup(int speed, unsigned int duplex, const unsigned long *supported, bool exact) { - const struct link_capabilities *lcap, *last = NULL; + const struct link_capabilities *lcap, *match = NULL, *last = NULL; for_each_link_caps_desc_speed(lcap) { if (linkmode_intersects(lcap->linkmodes, supported)) { @@ -204,16 +207,19 @@ phy_caps_lookup(int speed, unsigned int duplex, const unsigned long *supported, if (lcap->speed == speed && lcap->duplex == duplex) { return lcap; } else if (!exact) { - if (lcap->speed <= speed) - return lcap; + if (!match && lcap->speed <= speed) + match = lcap; + + if (lcap->speed < speed) + break; } } } - if (!exact) - return last; + if (!match && !exact) + match = last; - return NULL; + return match; } EXPORT_SYMBOL_GPL(phy_caps_lookup); diff --git a/drivers/net/usb/qmi_wwan.c b/drivers/net/usb/qmi_wwan.c index b586b1c13a47..f5647ee0adde 100644 --- a/drivers/net/usb/qmi_wwan.c +++ b/drivers/net/usb/qmi_wwan.c @@ -1426,6 +1426,7 @@ static const struct usb_device_id products[] = { {QMI_QUIRK_SET_DTR(0x22de, 0x9051, 2)}, /* Hucom Wireless HM-211S/K */ {QMI_FIXED_INTF(0x22de, 0x9061, 3)}, /* WeTelecom WPD-600N */ {QMI_QUIRK_SET_DTR(0x1e0e, 0x9001, 5)}, /* SIMCom 7100E, 7230E, 7600E ++ */ + {QMI_QUIRK_SET_DTR(0x1e0e, 0x9071, 3)}, /* SIMCom 8230C ++ */ {QMI_QUIRK_SET_DTR(0x2c7c, 0x0121, 4)}, /* Quectel EC21 Mini PCIe */ {QMI_QUIRK_SET_DTR(0x2c7c, 0x0191, 4)}, /* Quectel EG91 */ {QMI_QUIRK_SET_DTR(0x2c7c, 0x0195, 4)}, /* Quectel EG95 */ diff --git a/drivers/net/usb/r8152.c b/drivers/net/usb/r8152.c index d6589b24c68d..44cba7acfe7d 100644 --- a/drivers/net/usb/r8152.c +++ b/drivers/net/usb/r8152.c @@ -10054,6 +10054,7 @@ static const struct usb_device_id rtl8152_table[] = { { USB_DEVICE(VENDOR_ID_LINKSYS, 0x0041) }, { USB_DEVICE(VENDOR_ID_NVIDIA, 0x09ff) }, { USB_DEVICE(VENDOR_ID_TPLINK, 0x0601) }, + { USB_DEVICE(VENDOR_ID_TPLINK, 0x0602) }, { USB_DEVICE(VENDOR_ID_DLINK, 0xb301) }, { USB_DEVICE(VENDOR_ID_DELL, 0xb097) }, { USB_DEVICE(VENDOR_ID_ASUS, 0x1976) }, diff --git a/drivers/net/veth.c b/drivers/net/veth.c index e58a0f1b5c5b..a3046142cb8e 100644 --- a/drivers/net/veth.c +++ b/drivers/net/veth.c @@ -909,7 +909,7 @@ static int veth_xdp_rcv(struct veth_rq *rq, int budget, /* NAPI functions as RCU section */ peer_dev = rcu_dereference_check(priv->peer, rcu_read_lock_bh_held()); - peer_txq = netdev_get_tx_queue(peer_dev, queue_idx); + peer_txq = peer_dev ? netdev_get_tx_queue(peer_dev, queue_idx) : NULL; for (i = 0; i < budget; i++) { void *ptr = __ptr_ring_consume(&rq->xdp_ring); @@ -959,7 +959,7 @@ static int veth_xdp_rcv(struct veth_rq *rq, int budget, rq->stats.vs.xdp_packets += done; u64_stats_update_end(&rq->stats.syncp); - if (unlikely(netif_tx_queue_stopped(peer_txq))) + if (peer_txq && unlikely(netif_tx_queue_stopped(peer_txq))) netif_tx_wake_queue(peer_txq); return done; diff --git a/drivers/net/wireless/ath/ath10k/mac.c b/drivers/net/wireless/ath/ath10k/mac.c index 8c7ffea0fa44..07fe05384cdf 100644 --- a/drivers/net/wireless/ath/ath10k/mac.c +++ b/drivers/net/wireless/ath/ath10k/mac.c @@ -4,6 +4,7 @@ * Copyright (c) 2011-2017 Qualcomm Atheros, Inc. * Copyright (c) 2018-2019, The Linux Foundation. All rights reserved. * Copyright (c) 2021-2024 Qualcomm Innovation Center, Inc. All rights reserved. + * Copyright (c) Qualcomm Technologies, Inc. and/or its subsidiaries. */ #include "mac.h" @@ -1022,6 +1023,26 @@ static inline int ath10k_vdev_setup_sync(struct ath10k *ar) return ar->last_wmi_vdev_start_status; } +static inline int ath10k_vdev_delete_sync(struct ath10k *ar) +{ + unsigned long time_left; + + lockdep_assert_held(&ar->conf_mutex); + + if (!test_bit(WMI_SERVICE_SYNC_DELETE_CMDS, ar->wmi.svc_map)) + return 0; + + if (test_bit(ATH10K_FLAG_CRASH_FLUSH, &ar->dev_flags)) + return -ESHUTDOWN; + + time_left = wait_for_completion_timeout(&ar->vdev_delete_done, + ATH10K_VDEV_DELETE_TIMEOUT_HZ); + if (time_left == 0) + return -ETIMEDOUT; + + return 0; +} + static int ath10k_monitor_vdev_start(struct ath10k *ar, int vdev_id) { struct cfg80211_chan_def *chandef = NULL; @@ -5900,7 +5921,6 @@ static void ath10k_remove_interface(struct ieee80211_hw *hw, struct ath10k *ar = hw->priv; struct ath10k_vif *arvif = (void *)vif->drv_priv; struct ath10k_peer *peer; - unsigned long time_left; int ret; int i; @@ -5940,13 +5960,10 @@ static void ath10k_remove_interface(struct ieee80211_hw *hw, ath10k_warn(ar, "failed to delete WMI vdev %i: %d\n", arvif->vdev_id, ret); - if (test_bit(WMI_SERVICE_SYNC_DELETE_CMDS, ar->wmi.svc_map)) { - time_left = wait_for_completion_timeout(&ar->vdev_delete_done, - ATH10K_VDEV_DELETE_TIMEOUT_HZ); - if (time_left == 0) { - ath10k_warn(ar, "Timeout in receiving vdev delete response\n"); - goto out; - } + ret = ath10k_vdev_delete_sync(ar); + if (ret) { + ath10k_warn(ar, "Error in receiving vdev delete response: %d\n", ret); + goto out; } /* Some firmware revisions don't notify host about self-peer removal diff --git a/drivers/net/wireless/ath/ath10k/snoc.c b/drivers/net/wireless/ath/ath10k/snoc.c index b2bf9d72b92f..d51f2e5a79a4 100644 --- a/drivers/net/wireless/ath/ath10k/snoc.c +++ b/drivers/net/wireless/ath/ath10k/snoc.c @@ -938,7 +938,9 @@ static int ath10k_snoc_hif_start(struct ath10k *ar) dev_set_threaded(ar->napi_dev, true); ath10k_core_napi_enable(ar); - ath10k_snoc_irq_enable(ar); + /* IRQs are left enabled when we restart due to a firmware crash */ + if (!test_bit(ATH10K_SNOC_FLAG_RECOVERY, &ar_snoc->flags)) + ath10k_snoc_irq_enable(ar); ath10k_snoc_rx_post(ar); clear_bit(ATH10K_SNOC_FLAG_RECOVERY, &ar_snoc->flags); diff --git a/drivers/net/wireless/ath/ath11k/core.c b/drivers/net/wireless/ath/ath11k/core.c index 2e9f8a5e61e4..22a101136135 100644 --- a/drivers/net/wireless/ath/ath11k/core.c +++ b/drivers/net/wireless/ath/ath11k/core.c @@ -990,6 +990,7 @@ void ath11k_fw_stats_init(struct ath11k *ar) INIT_LIST_HEAD(&ar->fw_stats.bcn); init_completion(&ar->fw_stats_complete); + init_completion(&ar->fw_stats_done); } void ath11k_fw_stats_free(struct ath11k_fw_stats *stats) @@ -2134,6 +2135,20 @@ int ath11k_core_qmi_firmware_ready(struct ath11k_base *ab) { int ret; + switch (ath11k_crypto_mode) { + case ATH11K_CRYPT_MODE_SW: + set_bit(ATH11K_FLAG_HW_CRYPTO_DISABLED, &ab->dev_flags); + set_bit(ATH11K_FLAG_RAW_MODE, &ab->dev_flags); + break; + case ATH11K_CRYPT_MODE_HW: + clear_bit(ATH11K_FLAG_HW_CRYPTO_DISABLED, &ab->dev_flags); + clear_bit(ATH11K_FLAG_RAW_MODE, &ab->dev_flags); + break; + default: + ath11k_info(ab, "invalid crypto_mode: %d\n", ath11k_crypto_mode); + return -EINVAL; + } + ret = ath11k_core_start_firmware(ab, ab->fw_mode); if (ret) { ath11k_err(ab, "failed to start firmware: %d\n", ret); @@ -2152,20 +2167,6 @@ int ath11k_core_qmi_firmware_ready(struct ath11k_base *ab) goto err_firmware_stop; } - switch (ath11k_crypto_mode) { - case ATH11K_CRYPT_MODE_SW: - set_bit(ATH11K_FLAG_HW_CRYPTO_DISABLED, &ab->dev_flags); - set_bit(ATH11K_FLAG_RAW_MODE, &ab->dev_flags); - break; - case ATH11K_CRYPT_MODE_HW: - clear_bit(ATH11K_FLAG_HW_CRYPTO_DISABLED, &ab->dev_flags); - clear_bit(ATH11K_FLAG_RAW_MODE, &ab->dev_flags); - break; - default: - ath11k_info(ab, "invalid crypto_mode: %d\n", ath11k_crypto_mode); - return -EINVAL; - } - if (ath11k_frame_mode == ATH11K_HW_TXRX_RAW) set_bit(ATH11K_FLAG_RAW_MODE, &ab->dev_flags); diff --git a/drivers/net/wireless/ath/ath11k/core.h b/drivers/net/wireless/ath/ath11k/core.h index 339d4fca1ed5..6b2f207975e3 100644 --- a/drivers/net/wireless/ath/ath11k/core.h +++ b/drivers/net/wireless/ath/ath11k/core.h @@ -600,6 +600,8 @@ struct ath11k_fw_stats { struct list_head pdevs; struct list_head vdevs; struct list_head bcn; + u32 num_vdev_recvd; + u32 num_bcn_recvd; }; struct ath11k_dbg_htt_stats { @@ -784,7 +786,7 @@ struct ath11k { u8 alpha2[REG_ALPHA2_LEN + 1]; struct ath11k_fw_stats fw_stats; struct completion fw_stats_complete; - bool fw_stats_done; + struct completion fw_stats_done; /* protected by conf_mutex */ bool ps_state_enable; diff --git a/drivers/net/wireless/ath/ath11k/debugfs.c b/drivers/net/wireless/ath/ath11k/debugfs.c index bf192529e3fe..5d46f8e4c231 100644 --- a/drivers/net/wireless/ath/ath11k/debugfs.c +++ b/drivers/net/wireless/ath/ath11k/debugfs.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: BSD-3-Clause-Clear /* * Copyright (c) 2018-2020 The Linux Foundation. All rights reserved. - * Copyright (c) 2021-2024 Qualcomm Innovation Center, Inc. All rights reserved. + * Copyright (c) 2021-2025 Qualcomm Innovation Center, Inc. All rights reserved. */ #include <linux/vmalloc.h> @@ -93,57 +93,14 @@ void ath11k_debugfs_add_dbring_entry(struct ath11k *ar, spin_unlock_bh(&dbr_data->lock); } -static void ath11k_debugfs_fw_stats_reset(struct ath11k *ar) -{ - spin_lock_bh(&ar->data_lock); - ar->fw_stats_done = false; - ath11k_fw_stats_pdevs_free(&ar->fw_stats.pdevs); - ath11k_fw_stats_vdevs_free(&ar->fw_stats.vdevs); - spin_unlock_bh(&ar->data_lock); -} - void ath11k_debugfs_fw_stats_process(struct ath11k *ar, struct ath11k_fw_stats *stats) { struct ath11k_base *ab = ar->ab; - struct ath11k_pdev *pdev; - bool is_end; - static unsigned int num_vdev, num_bcn; - size_t total_vdevs_started = 0; - int i; - - /* WMI_REQUEST_PDEV_STAT request has been already processed */ - - if (stats->stats_id == WMI_REQUEST_RSSI_PER_CHAIN_STAT) { - ar->fw_stats_done = true; - return; - } - - if (stats->stats_id == WMI_REQUEST_VDEV_STAT) { - if (list_empty(&stats->vdevs)) { - ath11k_warn(ab, "empty vdev stats"); - return; - } - /* FW sends all the active VDEV stats irrespective of PDEV, - * hence limit until the count of all VDEVs started - */ - for (i = 0; i < ab->num_radios; i++) { - pdev = rcu_dereference(ab->pdevs_active[i]); - if (pdev && pdev->ar) - total_vdevs_started += ar->num_started_vdevs; - } - - is_end = ((++num_vdev) == total_vdevs_started); - - list_splice_tail_init(&stats->vdevs, - &ar->fw_stats.vdevs); - - if (is_end) { - ar->fw_stats_done = true; - num_vdev = 0; - } - return; - } + bool is_end = true; + /* WMI_REQUEST_PDEV_STAT, WMI_REQUEST_RSSI_PER_CHAIN_STAT and + * WMI_REQUEST_VDEV_STAT requests have been already processed. + */ if (stats->stats_id == WMI_REQUEST_BCN_STAT) { if (list_empty(&stats->bcn)) { ath11k_warn(ab, "empty bcn stats"); @@ -152,97 +109,18 @@ void ath11k_debugfs_fw_stats_process(struct ath11k *ar, struct ath11k_fw_stats * /* Mark end until we reached the count of all started VDEVs * within the PDEV */ - is_end = ((++num_bcn) == ar->num_started_vdevs); + if (ar->num_started_vdevs) + is_end = ((++ar->fw_stats.num_bcn_recvd) == + ar->num_started_vdevs); list_splice_tail_init(&stats->bcn, &ar->fw_stats.bcn); - if (is_end) { - ar->fw_stats_done = true; - num_bcn = 0; - } + if (is_end) + complete(&ar->fw_stats_done); } } -static int ath11k_debugfs_fw_stats_request(struct ath11k *ar, - struct stats_request_params *req_param) -{ - struct ath11k_base *ab = ar->ab; - unsigned long timeout, time_left; - int ret; - - lockdep_assert_held(&ar->conf_mutex); - - /* FW stats can get split when exceeding the stats data buffer limit. - * In that case, since there is no end marking for the back-to-back - * received 'update stats' event, we keep a 3 seconds timeout in case, - * fw_stats_done is not marked yet - */ - timeout = jiffies + secs_to_jiffies(3); - - ath11k_debugfs_fw_stats_reset(ar); - - reinit_completion(&ar->fw_stats_complete); - - ret = ath11k_wmi_send_stats_request_cmd(ar, req_param); - - if (ret) { - ath11k_warn(ab, "could not request fw stats (%d)\n", - ret); - return ret; - } - - time_left = wait_for_completion_timeout(&ar->fw_stats_complete, 1 * HZ); - - if (!time_left) - return -ETIMEDOUT; - - for (;;) { - if (time_after(jiffies, timeout)) - break; - - spin_lock_bh(&ar->data_lock); - if (ar->fw_stats_done) { - spin_unlock_bh(&ar->data_lock); - break; - } - spin_unlock_bh(&ar->data_lock); - } - return 0; -} - -int ath11k_debugfs_get_fw_stats(struct ath11k *ar, u32 pdev_id, - u32 vdev_id, u32 stats_id) -{ - struct ath11k_base *ab = ar->ab; - struct stats_request_params req_param; - int ret; - - mutex_lock(&ar->conf_mutex); - - if (ar->state != ATH11K_STATE_ON) { - ret = -ENETDOWN; - goto err_unlock; - } - - req_param.pdev_id = pdev_id; - req_param.vdev_id = vdev_id; - req_param.stats_id = stats_id; - - ret = ath11k_debugfs_fw_stats_request(ar, &req_param); - if (ret) - ath11k_warn(ab, "failed to request fw stats: %d\n", ret); - - ath11k_dbg(ab, ATH11K_DBG_WMI, - "debug get fw stat pdev id %d vdev id %d stats id 0x%x\n", - pdev_id, vdev_id, stats_id); - -err_unlock: - mutex_unlock(&ar->conf_mutex); - - return ret; -} - static int ath11k_open_pdev_stats(struct inode *inode, struct file *file) { struct ath11k *ar = inode->i_private; @@ -268,7 +146,7 @@ static int ath11k_open_pdev_stats(struct inode *inode, struct file *file) req_param.vdev_id = 0; req_param.stats_id = WMI_REQUEST_PDEV_STAT; - ret = ath11k_debugfs_fw_stats_request(ar, &req_param); + ret = ath11k_mac_fw_stats_request(ar, &req_param); if (ret) { ath11k_warn(ab, "failed to request fw pdev stats: %d\n", ret); goto err_free; @@ -339,7 +217,7 @@ static int ath11k_open_vdev_stats(struct inode *inode, struct file *file) req_param.vdev_id = 0; req_param.stats_id = WMI_REQUEST_VDEV_STAT; - ret = ath11k_debugfs_fw_stats_request(ar, &req_param); + ret = ath11k_mac_fw_stats_request(ar, &req_param); if (ret) { ath11k_warn(ar->ab, "failed to request fw vdev stats: %d\n", ret); goto err_free; @@ -415,7 +293,7 @@ static int ath11k_open_bcn_stats(struct inode *inode, struct file *file) continue; req_param.vdev_id = arvif->vdev_id; - ret = ath11k_debugfs_fw_stats_request(ar, &req_param); + ret = ath11k_mac_fw_stats_request(ar, &req_param); if (ret) { ath11k_warn(ar->ab, "failed to request fw bcn stats: %d\n", ret); goto err_free; diff --git a/drivers/net/wireless/ath/ath11k/debugfs.h b/drivers/net/wireless/ath/ath11k/debugfs.h index a39e458637b0..ed7fec177588 100644 --- a/drivers/net/wireless/ath/ath11k/debugfs.h +++ b/drivers/net/wireless/ath/ath11k/debugfs.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: BSD-3-Clause-Clear */ /* * Copyright (c) 2018-2019 The Linux Foundation. All rights reserved. - * Copyright (c) 2021-2022 Qualcomm Innovation Center, Inc. All rights reserved. + * Copyright (c) 2021-2022, 2025 Qualcomm Innovation Center, Inc. All rights reserved. */ #ifndef _ATH11K_DEBUGFS_H_ @@ -273,8 +273,6 @@ void ath11k_debugfs_unregister(struct ath11k *ar); void ath11k_debugfs_fw_stats_process(struct ath11k *ar, struct ath11k_fw_stats *stats); void ath11k_debugfs_fw_stats_init(struct ath11k *ar); -int ath11k_debugfs_get_fw_stats(struct ath11k *ar, u32 pdev_id, - u32 vdev_id, u32 stats_id); static inline bool ath11k_debugfs_is_pktlog_lite_mode_enabled(struct ath11k *ar) { @@ -381,12 +379,6 @@ static inline int ath11k_debugfs_rx_filter(struct ath11k *ar) return 0; } -static inline int ath11k_debugfs_get_fw_stats(struct ath11k *ar, - u32 pdev_id, u32 vdev_id, u32 stats_id) -{ - return 0; -} - static inline void ath11k_debugfs_add_dbring_entry(struct ath11k *ar, enum wmi_direct_buffer_module id, diff --git a/drivers/net/wireless/ath/ath11k/mac.c b/drivers/net/wireless/ath/ath11k/mac.c index 08d7b136851f..13301ca317a5 100644 --- a/drivers/net/wireless/ath/ath11k/mac.c +++ b/drivers/net/wireless/ath/ath11k/mac.c @@ -8997,6 +8997,81 @@ static void ath11k_mac_put_chain_rssi(struct station_info *sinfo, } } +static void ath11k_mac_fw_stats_reset(struct ath11k *ar) +{ + spin_lock_bh(&ar->data_lock); + ath11k_fw_stats_pdevs_free(&ar->fw_stats.pdevs); + ath11k_fw_stats_vdevs_free(&ar->fw_stats.vdevs); + ar->fw_stats.num_vdev_recvd = 0; + ar->fw_stats.num_bcn_recvd = 0; + spin_unlock_bh(&ar->data_lock); +} + +int ath11k_mac_fw_stats_request(struct ath11k *ar, + struct stats_request_params *req_param) +{ + struct ath11k_base *ab = ar->ab; + unsigned long time_left; + int ret; + + lockdep_assert_held(&ar->conf_mutex); + + ath11k_mac_fw_stats_reset(ar); + + reinit_completion(&ar->fw_stats_complete); + reinit_completion(&ar->fw_stats_done); + + ret = ath11k_wmi_send_stats_request_cmd(ar, req_param); + + if (ret) { + ath11k_warn(ab, "could not request fw stats (%d)\n", + ret); + return ret; + } + + time_left = wait_for_completion_timeout(&ar->fw_stats_complete, 1 * HZ); + if (!time_left) + return -ETIMEDOUT; + + /* FW stats can get split when exceeding the stats data buffer limit. + * In that case, since there is no end marking for the back-to-back + * received 'update stats' event, we keep a 3 seconds timeout in case, + * fw_stats_done is not marked yet + */ + time_left = wait_for_completion_timeout(&ar->fw_stats_done, 3 * HZ); + if (!time_left) + return -ETIMEDOUT; + + return 0; +} + +static int ath11k_mac_get_fw_stats(struct ath11k *ar, u32 pdev_id, + u32 vdev_id, u32 stats_id) +{ + struct ath11k_base *ab = ar->ab; + struct stats_request_params req_param; + int ret; + + lockdep_assert_held(&ar->conf_mutex); + + if (ar->state != ATH11K_STATE_ON) + return -ENETDOWN; + + req_param.pdev_id = pdev_id; + req_param.vdev_id = vdev_id; + req_param.stats_id = stats_id; + + ret = ath11k_mac_fw_stats_request(ar, &req_param); + if (ret) + ath11k_warn(ab, "failed to request fw stats: %d\n", ret); + + ath11k_dbg(ab, ATH11K_DBG_WMI, + "debug get fw stat pdev id %d vdev id %d stats id 0x%x\n", + pdev_id, vdev_id, stats_id); + + return ret; +} + static void ath11k_mac_op_sta_statistics(struct ieee80211_hw *hw, struct ieee80211_vif *vif, struct ieee80211_sta *sta, @@ -9031,11 +9106,12 @@ static void ath11k_mac_op_sta_statistics(struct ieee80211_hw *hw, ath11k_mac_put_chain_rssi(sinfo, arsta, "ppdu", false); + mutex_lock(&ar->conf_mutex); if (!(sinfo->filled & BIT_ULL(NL80211_STA_INFO_CHAIN_SIGNAL)) && arsta->arvif->vdev_type == WMI_VDEV_TYPE_STA && ar->ab->hw_params.supports_rssi_stats && - !ath11k_debugfs_get_fw_stats(ar, ar->pdev->pdev_id, 0, - WMI_REQUEST_RSSI_PER_CHAIN_STAT)) { + !ath11k_mac_get_fw_stats(ar, ar->pdev->pdev_id, 0, + WMI_REQUEST_RSSI_PER_CHAIN_STAT)) { ath11k_mac_put_chain_rssi(sinfo, arsta, "fw stats", true); } @@ -9043,9 +9119,10 @@ static void ath11k_mac_op_sta_statistics(struct ieee80211_hw *hw, if (!signal && arsta->arvif->vdev_type == WMI_VDEV_TYPE_STA && ar->ab->hw_params.supports_rssi_stats && - !(ath11k_debugfs_get_fw_stats(ar, ar->pdev->pdev_id, 0, - WMI_REQUEST_VDEV_STAT))) + !(ath11k_mac_get_fw_stats(ar, ar->pdev->pdev_id, 0, + WMI_REQUEST_VDEV_STAT))) signal = arsta->rssi_beacon; + mutex_unlock(&ar->conf_mutex); ath11k_dbg(ar->ab, ATH11K_DBG_MAC, "sta statistics db2dbm %u rssi comb %d rssi beacon %d\n", @@ -9380,38 +9457,6 @@ exit: return ret; } -static int ath11k_fw_stats_request(struct ath11k *ar, - struct stats_request_params *req_param) -{ - struct ath11k_base *ab = ar->ab; - unsigned long time_left; - int ret; - - lockdep_assert_held(&ar->conf_mutex); - - spin_lock_bh(&ar->data_lock); - ar->fw_stats_done = false; - ath11k_fw_stats_pdevs_free(&ar->fw_stats.pdevs); - spin_unlock_bh(&ar->data_lock); - - reinit_completion(&ar->fw_stats_complete); - - ret = ath11k_wmi_send_stats_request_cmd(ar, req_param); - if (ret) { - ath11k_warn(ab, "could not request fw stats (%d)\n", - ret); - return ret; - } - - time_left = wait_for_completion_timeout(&ar->fw_stats_complete, - 1 * HZ); - - if (!time_left) - return -ETIMEDOUT; - - return 0; -} - static int ath11k_mac_op_get_txpower(struct ieee80211_hw *hw, struct ieee80211_vif *vif, unsigned int link_id, @@ -9419,7 +9464,6 @@ static int ath11k_mac_op_get_txpower(struct ieee80211_hw *hw, { struct ath11k *ar = hw->priv; struct ath11k_base *ab = ar->ab; - struct stats_request_params req_param = {0}; struct ath11k_fw_stats_pdev *pdev; int ret; @@ -9431,9 +9475,6 @@ static int ath11k_mac_op_get_txpower(struct ieee80211_hw *hw, */ mutex_lock(&ar->conf_mutex); - if (ar->state != ATH11K_STATE_ON) - goto err_fallback; - /* Firmware doesn't provide Tx power during CAC hence no need to fetch * the stats. */ @@ -9442,10 +9483,8 @@ static int ath11k_mac_op_get_txpower(struct ieee80211_hw *hw, return -EAGAIN; } - req_param.pdev_id = ar->pdev->pdev_id; - req_param.stats_id = WMI_REQUEST_PDEV_STAT; - - ret = ath11k_fw_stats_request(ar, &req_param); + ret = ath11k_mac_get_fw_stats(ar, ar->pdev->pdev_id, 0, + WMI_REQUEST_PDEV_STAT); if (ret) { ath11k_warn(ab, "failed to request fw pdev stats: %d\n", ret); goto err_fallback; diff --git a/drivers/net/wireless/ath/ath11k/mac.h b/drivers/net/wireless/ath/ath11k/mac.h index f5800fbecff8..5e61eea1bb03 100644 --- a/drivers/net/wireless/ath/ath11k/mac.h +++ b/drivers/net/wireless/ath/ath11k/mac.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: BSD-3-Clause-Clear */ /* * Copyright (c) 2018-2019 The Linux Foundation. All rights reserved. - * Copyright (c) 2021-2023 Qualcomm Innovation Center, Inc. All rights reserved. + * Copyright (c) 2021-2023, 2025 Qualcomm Innovation Center, Inc. All rights reserved. */ #ifndef ATH11K_MAC_H @@ -179,4 +179,6 @@ int ath11k_mac_vif_set_keepalive(struct ath11k_vif *arvif, void ath11k_mac_fill_reg_tpc_info(struct ath11k *ar, struct ieee80211_vif *vif, struct ieee80211_chanctx_conf *ctx); +int ath11k_mac_fw_stats_request(struct ath11k *ar, + struct stats_request_params *req_param); #endif diff --git a/drivers/net/wireless/ath/ath11k/wmi.c b/drivers/net/wireless/ath/ath11k/wmi.c index d7f852bebf4a..56af2e9634f4 100644 --- a/drivers/net/wireless/ath/ath11k/wmi.c +++ b/drivers/net/wireless/ath/ath11k/wmi.c @@ -8158,6 +8158,11 @@ static void ath11k_peer_assoc_conf_event(struct ath11k_base *ab, struct sk_buff static void ath11k_update_stats_event(struct ath11k_base *ab, struct sk_buff *skb) { struct ath11k_fw_stats stats = {}; + size_t total_vdevs_started = 0; + struct ath11k_pdev *pdev; + bool is_end = true; + int i; + struct ath11k *ar; int ret; @@ -8184,25 +8189,57 @@ static void ath11k_update_stats_event(struct ath11k_base *ab, struct sk_buff *sk spin_lock_bh(&ar->data_lock); - /* WMI_REQUEST_PDEV_STAT can be requested via .get_txpower mac ops or via + /* WMI_REQUEST_PDEV_STAT, WMI_REQUEST_VDEV_STAT and + * WMI_REQUEST_RSSI_PER_CHAIN_STAT can be requested via mac ops or via * debugfs fw stats. Therefore, processing it separately. */ if (stats.stats_id == WMI_REQUEST_PDEV_STAT) { list_splice_tail_init(&stats.pdevs, &ar->fw_stats.pdevs); - ar->fw_stats_done = true; + complete(&ar->fw_stats_done); + goto complete; + } + + if (stats.stats_id == WMI_REQUEST_RSSI_PER_CHAIN_STAT) { + complete(&ar->fw_stats_done); goto complete; } - /* WMI_REQUEST_VDEV_STAT, WMI_REQUEST_BCN_STAT and WMI_REQUEST_RSSI_PER_CHAIN_STAT - * are currently requested only via debugfs fw stats. Hence, processing these - * in debugfs context + if (stats.stats_id == WMI_REQUEST_VDEV_STAT) { + if (list_empty(&stats.vdevs)) { + ath11k_warn(ab, "empty vdev stats"); + goto complete; + } + /* FW sends all the active VDEV stats irrespective of PDEV, + * hence limit until the count of all VDEVs started + */ + for (i = 0; i < ab->num_radios; i++) { + pdev = rcu_dereference(ab->pdevs_active[i]); + if (pdev && pdev->ar) + total_vdevs_started += ar->num_started_vdevs; + } + + if (total_vdevs_started) + is_end = ((++ar->fw_stats.num_vdev_recvd) == + total_vdevs_started); + + list_splice_tail_init(&stats.vdevs, + &ar->fw_stats.vdevs); + + if (is_end) + complete(&ar->fw_stats_done); + + goto complete; + } + + /* WMI_REQUEST_BCN_STAT is currently requested only via debugfs fw stats. + * Hence, processing it in debugfs context */ ath11k_debugfs_fw_stats_process(ar, &stats); complete: complete(&ar->fw_stats_complete); - rcu_read_unlock(); spin_unlock_bh(&ar->data_lock); + rcu_read_unlock(); /* Since the stats's pdev, vdev and beacon list are spliced and reinitialised * at this point, no need to free the individual list. diff --git a/drivers/net/wireless/ath/ath12k/core.c b/drivers/net/wireless/ath/ath12k/core.c index 31d851d8e688..89ae80934b30 100644 --- a/drivers/net/wireless/ath/ath12k/core.c +++ b/drivers/net/wireless/ath/ath12k/core.c @@ -1216,6 +1216,7 @@ void ath12k_fw_stats_init(struct ath12k *ar) INIT_LIST_HEAD(&ar->fw_stats.pdevs); INIT_LIST_HEAD(&ar->fw_stats.bcn); init_completion(&ar->fw_stats_complete); + init_completion(&ar->fw_stats_done); } void ath12k_fw_stats_free(struct ath12k_fw_stats *stats) @@ -1228,8 +1229,9 @@ void ath12k_fw_stats_free(struct ath12k_fw_stats *stats) void ath12k_fw_stats_reset(struct ath12k *ar) { spin_lock_bh(&ar->data_lock); - ar->fw_stats.fw_stats_done = false; ath12k_fw_stats_free(&ar->fw_stats); + ar->fw_stats.num_vdev_recvd = 0; + ar->fw_stats.num_bcn_recvd = 0; spin_unlock_bh(&ar->data_lock); } @@ -2129,7 +2131,8 @@ int ath12k_core_init(struct ath12k_base *ab) if (!ag) { mutex_unlock(&ath12k_hw_group_mutex); ath12k_warn(ab, "unable to get hw group\n"); - return -ENODEV; + ret = -ENODEV; + goto err_unregister_notifier; } mutex_unlock(&ath12k_hw_group_mutex); @@ -2144,7 +2147,7 @@ int ath12k_core_init(struct ath12k_base *ab) if (ret) { mutex_unlock(&ag->mutex); ath12k_warn(ab, "unable to create hw group\n"); - goto err; + goto err_destroy_hw_group; } } @@ -2152,9 +2155,12 @@ int ath12k_core_init(struct ath12k_base *ab) return 0; -err: +err_destroy_hw_group: ath12k_core_hw_group_destroy(ab->ag); ath12k_core_hw_group_unassign(ab); +err_unregister_notifier: + ath12k_core_panic_notifier_unregister(ab); + return ret; } diff --git a/drivers/net/wireless/ath/ath12k/core.h b/drivers/net/wireless/ath/ath12k/core.h index 941db6e49d6e..7bcd9c70309f 100644 --- a/drivers/net/wireless/ath/ath12k/core.h +++ b/drivers/net/wireless/ath/ath12k/core.h @@ -601,6 +601,12 @@ struct ath12k_sta { #define ATH12K_NUM_CHANS 101 #define ATH12K_MAX_5GHZ_CHAN 173 +static inline bool ath12k_is_2ghz_channel_freq(u32 freq) +{ + return freq >= ATH12K_MIN_2GHZ_FREQ && + freq <= ATH12K_MAX_2GHZ_FREQ; +} + enum ath12k_hw_state { ATH12K_HW_STATE_OFF, ATH12K_HW_STATE_ON, @@ -626,7 +632,8 @@ struct ath12k_fw_stats { struct list_head pdevs; struct list_head vdevs; struct list_head bcn; - bool fw_stats_done; + u32 num_vdev_recvd; + u32 num_bcn_recvd; }; struct ath12k_dbg_htt_stats { @@ -806,6 +813,7 @@ struct ath12k { bool regdom_set_by_user; struct completion fw_stats_complete; + struct completion fw_stats_done; struct completion mlo_setup_done; u32 mlo_setup_status; diff --git a/drivers/net/wireless/ath/ath12k/debugfs.c b/drivers/net/wireless/ath/ath12k/debugfs.c index dd624d73b8b2..23da93afaa5c 100644 --- a/drivers/net/wireless/ath/ath12k/debugfs.c +++ b/drivers/net/wireless/ath/ath12k/debugfs.c @@ -1251,64 +1251,6 @@ void ath12k_debugfs_soc_destroy(struct ath12k_base *ab) */ } -void -ath12k_debugfs_fw_stats_process(struct ath12k *ar, - struct ath12k_fw_stats *stats) -{ - struct ath12k_base *ab = ar->ab; - struct ath12k_pdev *pdev; - bool is_end; - static unsigned int num_vdev, num_bcn; - size_t total_vdevs_started = 0; - int i; - - if (stats->stats_id == WMI_REQUEST_VDEV_STAT) { - if (list_empty(&stats->vdevs)) { - ath12k_warn(ab, "empty vdev stats"); - return; - } - /* FW sends all the active VDEV stats irrespective of PDEV, - * hence limit until the count of all VDEVs started - */ - rcu_read_lock(); - for (i = 0; i < ab->num_radios; i++) { - pdev = rcu_dereference(ab->pdevs_active[i]); - if (pdev && pdev->ar) - total_vdevs_started += pdev->ar->num_started_vdevs; - } - rcu_read_unlock(); - - is_end = ((++num_vdev) == total_vdevs_started); - - list_splice_tail_init(&stats->vdevs, - &ar->fw_stats.vdevs); - - if (is_end) { - ar->fw_stats.fw_stats_done = true; - num_vdev = 0; - } - return; - } - if (stats->stats_id == WMI_REQUEST_BCN_STAT) { - if (list_empty(&stats->bcn)) { - ath12k_warn(ab, "empty beacon stats"); - return; - } - /* Mark end until we reached the count of all started VDEVs - * within the PDEV - */ - is_end = ((++num_bcn) == ar->num_started_vdevs); - - list_splice_tail_init(&stats->bcn, - &ar->fw_stats.bcn); - - if (is_end) { - ar->fw_stats.fw_stats_done = true; - num_bcn = 0; - } - } -} - static int ath12k_open_vdev_stats(struct inode *inode, struct file *file) { struct ath12k *ar = inode->i_private; diff --git a/drivers/net/wireless/ath/ath12k/debugfs.h b/drivers/net/wireless/ath/ath12k/debugfs.h index ebef7dace344..21641a8a0346 100644 --- a/drivers/net/wireless/ath/ath12k/debugfs.h +++ b/drivers/net/wireless/ath/ath12k/debugfs.h @@ -12,8 +12,6 @@ void ath12k_debugfs_soc_create(struct ath12k_base *ab); void ath12k_debugfs_soc_destroy(struct ath12k_base *ab); void ath12k_debugfs_register(struct ath12k *ar); void ath12k_debugfs_unregister(struct ath12k *ar); -void ath12k_debugfs_fw_stats_process(struct ath12k *ar, - struct ath12k_fw_stats *stats); void ath12k_debugfs_op_vif_add(struct ieee80211_hw *hw, struct ieee80211_vif *vif); void ath12k_debugfs_pdev_create(struct ath12k_base *ab); @@ -126,11 +124,6 @@ static inline void ath12k_debugfs_unregister(struct ath12k *ar) { } -static inline void ath12k_debugfs_fw_stats_process(struct ath12k *ar, - struct ath12k_fw_stats *stats) -{ -} - static inline bool ath12k_debugfs_is_extd_rx_stats_enabled(struct ath12k *ar) { return false; diff --git a/drivers/net/wireless/ath/ath12k/hal.h b/drivers/net/wireless/ath/ath12k/hal.h index 0ee9c6b26dab..c1750b5dc03c 100644 --- a/drivers/net/wireless/ath/ath12k/hal.h +++ b/drivers/net/wireless/ath/ath12k/hal.h @@ -585,7 +585,8 @@ enum hal_reo_cmd_type { * or cache was blocked * @HAL_REO_CMD_FAILED: Command execution failed, could be due to * invalid queue desc - * @HAL_REO_CMD_RESOURCE_BLOCKED: + * @HAL_REO_CMD_RESOURCE_BLOCKED: Command could not be executed because + * one or more descriptors were blocked * @HAL_REO_CMD_DRAIN: */ enum hal_reo_cmd_status { diff --git a/drivers/net/wireless/ath/ath12k/hw.c b/drivers/net/wireless/ath/ath12k/hw.c index 7e2cf0fb2085..8254dc10b53b 100644 --- a/drivers/net/wireless/ath/ath12k/hw.c +++ b/drivers/net/wireless/ath/ath12k/hw.c @@ -951,6 +951,8 @@ static const struct ath12k_hw_regs qcn9274_v1_regs = { .hal_umac_ce0_dest_reg_base = 0x01b81000, .hal_umac_ce1_src_reg_base = 0x01b82000, .hal_umac_ce1_dest_reg_base = 0x01b83000, + + .gcc_gcc_pcie_hot_rst = 0x1e38338, }; static const struct ath12k_hw_regs qcn9274_v2_regs = { @@ -1042,6 +1044,8 @@ static const struct ath12k_hw_regs qcn9274_v2_regs = { .hal_umac_ce0_dest_reg_base = 0x01b81000, .hal_umac_ce1_src_reg_base = 0x01b82000, .hal_umac_ce1_dest_reg_base = 0x01b83000, + + .gcc_gcc_pcie_hot_rst = 0x1e38338, }; static const struct ath12k_hw_regs ipq5332_regs = { @@ -1215,6 +1219,8 @@ static const struct ath12k_hw_regs wcn7850_regs = { .hal_umac_ce0_dest_reg_base = 0x01b81000, .hal_umac_ce1_src_reg_base = 0x01b82000, .hal_umac_ce1_dest_reg_base = 0x01b83000, + + .gcc_gcc_pcie_hot_rst = 0x1e40304, }; static const struct ath12k_hw_hal_params ath12k_hw_hal_params_qcn9274 = { diff --git a/drivers/net/wireless/ath/ath12k/hw.h b/drivers/net/wireless/ath/ath12k/hw.h index 0fbc17649df4..0a75bc5abfa2 100644 --- a/drivers/net/wireless/ath/ath12k/hw.h +++ b/drivers/net/wireless/ath/ath12k/hw.h @@ -375,6 +375,8 @@ struct ath12k_hw_regs { u32 hal_reo_cmd_ring_base; u32 hal_reo_status_ring_base; + + u32 gcc_gcc_pcie_hot_rst; }; static inline const char *ath12k_bd_ie_type_str(enum ath12k_bd_ie_type type) diff --git a/drivers/net/wireless/ath/ath12k/mac.c b/drivers/net/wireless/ath/ath12k/mac.c index 88b59f3ff87a..59ec422992d3 100644 --- a/drivers/net/wireless/ath/ath12k/mac.c +++ b/drivers/net/wireless/ath/ath12k/mac.c @@ -4360,7 +4360,7 @@ int ath12k_mac_get_fw_stats(struct ath12k *ar, { struct ath12k_base *ab = ar->ab; struct ath12k_hw *ah = ath12k_ar_to_ah(ar); - unsigned long timeout, time_left; + unsigned long time_left; int ret; guard(mutex)(&ah->hw_mutex); @@ -4368,19 +4368,13 @@ int ath12k_mac_get_fw_stats(struct ath12k *ar, if (ah->state != ATH12K_HW_STATE_ON) return -ENETDOWN; - /* FW stats can get split when exceeding the stats data buffer limit. - * In that case, since there is no end marking for the back-to-back - * received 'update stats' event, we keep a 3 seconds timeout in case, - * fw_stats_done is not marked yet - */ - timeout = jiffies + msecs_to_jiffies(3 * 1000); ath12k_fw_stats_reset(ar); reinit_completion(&ar->fw_stats_complete); + reinit_completion(&ar->fw_stats_done); ret = ath12k_wmi_send_stats_request_cmd(ar, param->stats_id, param->vdev_id, param->pdev_id); - if (ret) { ath12k_warn(ab, "failed to request fw stats: %d\n", ret); return ret; @@ -4391,7 +4385,6 @@ int ath12k_mac_get_fw_stats(struct ath12k *ar, param->pdev_id, param->vdev_id, param->stats_id); time_left = wait_for_completion_timeout(&ar->fw_stats_complete, 1 * HZ); - if (!time_left) { ath12k_warn(ab, "time out while waiting for get fw stats\n"); return -ETIMEDOUT; @@ -4400,20 +4393,15 @@ int ath12k_mac_get_fw_stats(struct ath12k *ar, /* Firmware sends WMI_UPDATE_STATS_EVENTID back-to-back * when stats data buffer limit is reached. fw_stats_complete * is completed once host receives first event from firmware, but - * still end might not be marked in the TLV. - * Below loop is to confirm that firmware completed sending all the event - * and fw_stats_done is marked true when end is marked in the TLV. + * still there could be more events following. Below is to wait + * until firmware completes sending all the events. */ - for (;;) { - if (time_after(jiffies, timeout)) - break; - spin_lock_bh(&ar->data_lock); - if (ar->fw_stats.fw_stats_done) { - spin_unlock_bh(&ar->data_lock); - break; - } - spin_unlock_bh(&ar->data_lock); + time_left = wait_for_completion_timeout(&ar->fw_stats_done, 3 * HZ); + if (!time_left) { + ath12k_warn(ab, "time out while waiting for fw stats done\n"); + return -ETIMEDOUT; } + return 0; } @@ -5890,6 +5878,327 @@ exit: return ret; } +static bool ath12k_mac_is_freq_on_mac(struct ath12k_hw_mode_freq_range_arg *freq_range, + u32 freq, u8 mac_id) +{ + return (freq >= freq_range[mac_id].low_2ghz_freq && + freq <= freq_range[mac_id].high_2ghz_freq) || + (freq >= freq_range[mac_id].low_5ghz_freq && + freq <= freq_range[mac_id].high_5ghz_freq); +} + +static bool +ath12k_mac_2_freq_same_mac_in_freq_range(struct ath12k_base *ab, + struct ath12k_hw_mode_freq_range_arg *freq_range, + u32 freq_link1, u32 freq_link2) +{ + u8 i; + + for (i = 0; i < MAX_RADIOS; i++) { + if (ath12k_mac_is_freq_on_mac(freq_range, freq_link1, i) && + ath12k_mac_is_freq_on_mac(freq_range, freq_link2, i)) + return true; + } + + return false; +} + +static bool ath12k_mac_is_hw_dbs_capable(struct ath12k_base *ab) +{ + return test_bit(WMI_TLV_SERVICE_DUAL_BAND_SIMULTANEOUS_SUPPORT, + ab->wmi_ab.svc_map) && + ab->wmi_ab.hw_mode_info.support_dbs; +} + +static bool ath12k_mac_2_freq_same_mac_in_dbs(struct ath12k_base *ab, + u32 freq_link1, u32 freq_link2) +{ + struct ath12k_hw_mode_freq_range_arg *freq_range; + + if (!ath12k_mac_is_hw_dbs_capable(ab)) + return true; + + freq_range = ab->wmi_ab.hw_mode_info.freq_range_caps[ATH12K_HW_MODE_DBS]; + return ath12k_mac_2_freq_same_mac_in_freq_range(ab, freq_range, + freq_link1, freq_link2); +} + +static bool ath12k_mac_is_hw_sbs_capable(struct ath12k_base *ab) +{ + return test_bit(WMI_TLV_SERVICE_DUAL_BAND_SIMULTANEOUS_SUPPORT, + ab->wmi_ab.svc_map) && + ab->wmi_ab.hw_mode_info.support_sbs; +} + +static bool ath12k_mac_2_freq_same_mac_in_sbs(struct ath12k_base *ab, + u32 freq_link1, u32 freq_link2) +{ + struct ath12k_hw_mode_info *info = &ab->wmi_ab.hw_mode_info; + struct ath12k_hw_mode_freq_range_arg *sbs_uppr_share; + struct ath12k_hw_mode_freq_range_arg *sbs_low_share; + struct ath12k_hw_mode_freq_range_arg *sbs_range; + + if (!ath12k_mac_is_hw_sbs_capable(ab)) + return true; + + if (ab->wmi_ab.sbs_lower_band_end_freq) { + sbs_uppr_share = info->freq_range_caps[ATH12K_HW_MODE_SBS_UPPER_SHARE]; + sbs_low_share = info->freq_range_caps[ATH12K_HW_MODE_SBS_LOWER_SHARE]; + + return ath12k_mac_2_freq_same_mac_in_freq_range(ab, sbs_low_share, + freq_link1, freq_link2) || + ath12k_mac_2_freq_same_mac_in_freq_range(ab, sbs_uppr_share, + freq_link1, freq_link2); + } + + sbs_range = info->freq_range_caps[ATH12K_HW_MODE_SBS]; + return ath12k_mac_2_freq_same_mac_in_freq_range(ab, sbs_range, + freq_link1, freq_link2); +} + +static bool ath12k_mac_freqs_on_same_mac(struct ath12k_base *ab, + u32 freq_link1, u32 freq_link2) +{ + return ath12k_mac_2_freq_same_mac_in_dbs(ab, freq_link1, freq_link2) && + ath12k_mac_2_freq_same_mac_in_sbs(ab, freq_link1, freq_link2); +} + +static int ath12k_mac_mlo_sta_set_link_active(struct ath12k_base *ab, + enum wmi_mlo_link_force_reason reason, + enum wmi_mlo_link_force_mode mode, + u8 *mlo_vdev_id_lst, + u8 num_mlo_vdev, + u8 *mlo_inactive_vdev_lst, + u8 num_mlo_inactive_vdev) +{ + struct wmi_mlo_link_set_active_arg param = {0}; + u32 entry_idx, entry_offset, vdev_idx; + u8 vdev_id; + + param.reason = reason; + param.force_mode = mode; + + for (vdev_idx = 0; vdev_idx < num_mlo_vdev; vdev_idx++) { + vdev_id = mlo_vdev_id_lst[vdev_idx]; + entry_idx = vdev_id / 32; + entry_offset = vdev_id % 32; + if (entry_idx >= WMI_MLO_LINK_NUM_SZ) { + ath12k_warn(ab, "Invalid entry_idx %d num_mlo_vdev %d vdev %d", + entry_idx, num_mlo_vdev, vdev_id); + return -EINVAL; + } + param.vdev_bitmap[entry_idx] |= 1 << entry_offset; + /* update entry number if entry index changed */ + if (param.num_vdev_bitmap < entry_idx + 1) + param.num_vdev_bitmap = entry_idx + 1; + } + + ath12k_dbg(ab, ATH12K_DBG_MAC, + "num_vdev_bitmap %d vdev_bitmap[0] = 0x%x, vdev_bitmap[1] = 0x%x", + param.num_vdev_bitmap, param.vdev_bitmap[0], param.vdev_bitmap[1]); + + if (mode == WMI_MLO_LINK_FORCE_MODE_ACTIVE_INACTIVE) { + for (vdev_idx = 0; vdev_idx < num_mlo_inactive_vdev; vdev_idx++) { + vdev_id = mlo_inactive_vdev_lst[vdev_idx]; + entry_idx = vdev_id / 32; + entry_offset = vdev_id % 32; + if (entry_idx >= WMI_MLO_LINK_NUM_SZ) { + ath12k_warn(ab, "Invalid entry_idx %d num_mlo_vdev %d vdev %d", + entry_idx, num_mlo_inactive_vdev, vdev_id); + return -EINVAL; + } + param.inactive_vdev_bitmap[entry_idx] |= 1 << entry_offset; + /* update entry number if entry index changed */ + if (param.num_inactive_vdev_bitmap < entry_idx + 1) + param.num_inactive_vdev_bitmap = entry_idx + 1; + } + + ath12k_dbg(ab, ATH12K_DBG_MAC, + "num_vdev_bitmap %d inactive_vdev_bitmap[0] = 0x%x, inactive_vdev_bitmap[1] = 0x%x", + param.num_inactive_vdev_bitmap, + param.inactive_vdev_bitmap[0], + param.inactive_vdev_bitmap[1]); + } + + if (mode == WMI_MLO_LINK_FORCE_MODE_ACTIVE_LINK_NUM || + mode == WMI_MLO_LINK_FORCE_MODE_INACTIVE_LINK_NUM) { + param.num_link_entry = 1; + param.link_num[0].num_of_link = num_mlo_vdev - 1; + } + + return ath12k_wmi_send_mlo_link_set_active_cmd(ab, ¶m); +} + +static int ath12k_mac_mlo_sta_update_link_active(struct ath12k_base *ab, + struct ieee80211_hw *hw, + struct ath12k_vif *ahvif) +{ + u8 mlo_vdev_id_lst[IEEE80211_MLD_MAX_NUM_LINKS] = {0}; + u32 mlo_freq_list[IEEE80211_MLD_MAX_NUM_LINKS] = {0}; + unsigned long links = ahvif->links_map; + enum wmi_mlo_link_force_reason reason; + struct ieee80211_chanctx_conf *conf; + enum wmi_mlo_link_force_mode mode; + struct ieee80211_bss_conf *info; + struct ath12k_link_vif *arvif; + u8 num_mlo_vdev = 0; + u8 link_id; + + for_each_set_bit(link_id, &links, IEEE80211_MLD_MAX_NUM_LINKS) { + arvif = wiphy_dereference(hw->wiphy, ahvif->link[link_id]); + /* make sure vdev is created on this device */ + if (!arvif || !arvif->is_created || arvif->ar->ab != ab) + continue; + + info = ath12k_mac_get_link_bss_conf(arvif); + conf = wiphy_dereference(hw->wiphy, info->chanctx_conf); + mlo_freq_list[num_mlo_vdev] = conf->def.chan->center_freq; + + mlo_vdev_id_lst[num_mlo_vdev] = arvif->vdev_id; + num_mlo_vdev++; + } + + /* It is not allowed to activate more links than a single device + * supported. Something goes wrong if we reach here. + */ + if (num_mlo_vdev > ATH12K_NUM_MAX_ACTIVE_LINKS_PER_DEVICE) { + WARN_ON_ONCE(1); + return -EINVAL; + } + + /* if 2 links are established and both link channels fall on the + * same hardware MAC, send command to firmware to deactivate one + * of them. + */ + if (num_mlo_vdev == 2 && + ath12k_mac_freqs_on_same_mac(ab, mlo_freq_list[0], + mlo_freq_list[1])) { + mode = WMI_MLO_LINK_FORCE_MODE_INACTIVE_LINK_NUM; + reason = WMI_MLO_LINK_FORCE_REASON_NEW_CONNECT; + return ath12k_mac_mlo_sta_set_link_active(ab, reason, mode, + mlo_vdev_id_lst, num_mlo_vdev, + NULL, 0); + } + + return 0; +} + +static bool ath12k_mac_are_sbs_chan(struct ath12k_base *ab, u32 freq_1, u32 freq_2) +{ + if (!ath12k_mac_is_hw_sbs_capable(ab)) + return false; + + if (ath12k_is_2ghz_channel_freq(freq_1) || + ath12k_is_2ghz_channel_freq(freq_2)) + return false; + + return !ath12k_mac_2_freq_same_mac_in_sbs(ab, freq_1, freq_2); +} + +static bool ath12k_mac_are_dbs_chan(struct ath12k_base *ab, u32 freq_1, u32 freq_2) +{ + if (!ath12k_mac_is_hw_dbs_capable(ab)) + return false; + + return !ath12k_mac_2_freq_same_mac_in_dbs(ab, freq_1, freq_2); +} + +static int ath12k_mac_select_links(struct ath12k_base *ab, + struct ieee80211_vif *vif, + struct ieee80211_hw *hw, + u16 *selected_links) +{ + unsigned long useful_links = ieee80211_vif_usable_links(vif); + struct ath12k_vif *ahvif = ath12k_vif_to_ahvif(vif); + u8 num_useful_links = hweight_long(useful_links); + struct ieee80211_chanctx_conf *chanctx; + struct ath12k_link_vif *assoc_arvif; + u32 assoc_link_freq, partner_freq; + u16 sbs_links = 0, dbs_links = 0; + struct ieee80211_bss_conf *info; + struct ieee80211_channel *chan; + struct ieee80211_sta *sta; + struct ath12k_sta *ahsta; + u8 link_id; + + /* activate all useful links if less than max supported */ + if (num_useful_links <= ATH12K_NUM_MAX_ACTIVE_LINKS_PER_DEVICE) { + *selected_links = useful_links; + return 0; + } + + /* only in station mode we can get here, so it's safe + * to use ap_addr + */ + rcu_read_lock(); + sta = ieee80211_find_sta(vif, vif->cfg.ap_addr); + if (!sta) { + rcu_read_unlock(); + ath12k_warn(ab, "failed to find sta with addr %pM\n", vif->cfg.ap_addr); + return -EINVAL; + } + + ahsta = ath12k_sta_to_ahsta(sta); + assoc_arvif = wiphy_dereference(hw->wiphy, ahvif->link[ahsta->assoc_link_id]); + info = ath12k_mac_get_link_bss_conf(assoc_arvif); + chanctx = rcu_dereference(info->chanctx_conf); + assoc_link_freq = chanctx->def.chan->center_freq; + rcu_read_unlock(); + ath12k_dbg(ab, ATH12K_DBG_MAC, "assoc link %u freq %u\n", + assoc_arvif->link_id, assoc_link_freq); + + /* assoc link is already activated and has to be kept active, + * only need to select a partner link from others. + */ + useful_links &= ~BIT(assoc_arvif->link_id); + for_each_set_bit(link_id, &useful_links, IEEE80211_MLD_MAX_NUM_LINKS) { + info = wiphy_dereference(hw->wiphy, vif->link_conf[link_id]); + if (!info) { + ath12k_warn(ab, "failed to get link info for link: %u\n", + link_id); + return -ENOLINK; + } + + chan = info->chanreq.oper.chan; + if (!chan) { + ath12k_warn(ab, "failed to get chan for link: %u\n", link_id); + return -EINVAL; + } + + partner_freq = chan->center_freq; + if (ath12k_mac_are_sbs_chan(ab, assoc_link_freq, partner_freq)) { + sbs_links |= BIT(link_id); + ath12k_dbg(ab, ATH12K_DBG_MAC, "new SBS link %u freq %u\n", + link_id, partner_freq); + continue; + } + + if (ath12k_mac_are_dbs_chan(ab, assoc_link_freq, partner_freq)) { + dbs_links |= BIT(link_id); + ath12k_dbg(ab, ATH12K_DBG_MAC, "new DBS link %u freq %u\n", + link_id, partner_freq); + continue; + } + + ath12k_dbg(ab, ATH12K_DBG_MAC, "non DBS/SBS link %u freq %u\n", + link_id, partner_freq); + } + + /* choose the first candidate no matter how many is in the list */ + if (sbs_links) + link_id = __ffs(sbs_links); + else if (dbs_links) + link_id = __ffs(dbs_links); + else + link_id = ffs(useful_links) - 1; + + ath12k_dbg(ab, ATH12K_DBG_MAC, "select partner link %u\n", link_id); + + *selected_links = BIT(assoc_arvif->link_id) | BIT(link_id); + + return 0; +} + static int ath12k_mac_op_sta_state(struct ieee80211_hw *hw, struct ieee80211_vif *vif, struct ieee80211_sta *sta, @@ -5899,10 +6208,13 @@ static int ath12k_mac_op_sta_state(struct ieee80211_hw *hw, struct ath12k_vif *ahvif = ath12k_vif_to_ahvif(vif); struct ath12k_sta *ahsta = ath12k_sta_to_ahsta(sta); struct ath12k_hw *ah = ath12k_hw_to_ah(hw); + struct ath12k_base *prev_ab = NULL, *ab; struct ath12k_link_vif *arvif; struct ath12k_link_sta *arsta; unsigned long valid_links; - u8 link_id = 0; + u16 selected_links = 0; + u8 link_id = 0, i; + struct ath12k *ar; int ret; lockdep_assert_wiphy(hw->wiphy); @@ -5972,8 +6284,24 @@ static int ath12k_mac_op_sta_state(struct ieee80211_hw *hw, * about to move to the associated state. */ if (ieee80211_vif_is_mld(vif) && vif->type == NL80211_IFTYPE_STATION && - old_state == IEEE80211_STA_AUTH && new_state == IEEE80211_STA_ASSOC) - ieee80211_set_active_links(vif, ieee80211_vif_usable_links(vif)); + old_state == IEEE80211_STA_AUTH && new_state == IEEE80211_STA_ASSOC) { + /* TODO: for now only do link selection for single device + * MLO case. Other cases would be handled in the future. + */ + ab = ah->radio[0].ab; + if (ab->ag->num_devices == 1) { + ret = ath12k_mac_select_links(ab, vif, hw, &selected_links); + if (ret) { + ath12k_warn(ab, + "failed to get selected links: %d\n", ret); + goto exit; + } + } else { + selected_links = ieee80211_vif_usable_links(vif); + } + + ieee80211_set_active_links(vif, selected_links); + } /* Handle all the other state transitions in generic way */ valid_links = ahsta->links_map; @@ -5997,6 +6325,24 @@ static int ath12k_mac_op_sta_state(struct ieee80211_hw *hw, } } + if (ieee80211_vif_is_mld(vif) && vif->type == NL80211_IFTYPE_STATION && + old_state == IEEE80211_STA_ASSOC && new_state == IEEE80211_STA_AUTHORIZED) { + for_each_ar(ah, ar, i) { + ab = ar->ab; + if (prev_ab == ab) + continue; + + ret = ath12k_mac_mlo_sta_update_link_active(ab, hw, ahvif); + if (ret) { + ath12k_warn(ab, + "failed to update link active state on connect %d\n", + ret); + goto exit; + } + + prev_ab = ab; + } + } /* IEEE80211_STA_NONE -> IEEE80211_STA_NOTEXIST: * Remove the station from driver (handle ML sta here since that * needs special handling. Normal sta will be handled in generic diff --git a/drivers/net/wireless/ath/ath12k/mac.h b/drivers/net/wireless/ath/ath12k/mac.h index e6e74b45bfa4..cc81b1f5680f 100644 --- a/drivers/net/wireless/ath/ath12k/mac.h +++ b/drivers/net/wireless/ath/ath12k/mac.h @@ -54,6 +54,8 @@ struct ath12k_generic_iter { #define ATH12K_DEFAULT_SCAN_LINK IEEE80211_MLD_MAX_NUM_LINKS #define ATH12K_NUM_MAX_LINKS (IEEE80211_MLD_MAX_NUM_LINKS + 1) +#define ATH12K_NUM_MAX_ACTIVE_LINKS_PER_DEVICE 2 + enum ath12k_supported_bw { ATH12K_BW_20 = 0, ATH12K_BW_40 = 1, diff --git a/drivers/net/wireless/ath/ath12k/pci.c b/drivers/net/wireless/ath/ath12k/pci.c index 489d546390fc..1f3cfd9b89fd 100644 --- a/drivers/net/wireless/ath/ath12k/pci.c +++ b/drivers/net/wireless/ath/ath12k/pci.c @@ -292,10 +292,10 @@ static void ath12k_pci_enable_ltssm(struct ath12k_base *ab) ath12k_dbg(ab, ATH12K_DBG_PCI, "pci ltssm 0x%x\n", val); - val = ath12k_pci_read32(ab, GCC_GCC_PCIE_HOT_RST); + val = ath12k_pci_read32(ab, GCC_GCC_PCIE_HOT_RST(ab)); val |= GCC_GCC_PCIE_HOT_RST_VAL; - ath12k_pci_write32(ab, GCC_GCC_PCIE_HOT_RST, val); - val = ath12k_pci_read32(ab, GCC_GCC_PCIE_HOT_RST); + ath12k_pci_write32(ab, GCC_GCC_PCIE_HOT_RST(ab), val); + val = ath12k_pci_read32(ab, GCC_GCC_PCIE_HOT_RST(ab)); ath12k_dbg(ab, ATH12K_DBG_PCI, "pci pcie_hot_rst 0x%x\n", val); diff --git a/drivers/net/wireless/ath/ath12k/pci.h b/drivers/net/wireless/ath/ath12k/pci.h index 0b4c459d6d8e..d1ec8aad7f6c 100644 --- a/drivers/net/wireless/ath/ath12k/pci.h +++ b/drivers/net/wireless/ath/ath12k/pci.h @@ -28,7 +28,9 @@ #define PCIE_PCIE_PARF_LTSSM 0x1e081b0 #define PARM_LTSSM_VALUE 0x111 -#define GCC_GCC_PCIE_HOT_RST 0x1e38338 +#define GCC_GCC_PCIE_HOT_RST(ab) \ + ((ab)->hw_params->regs->gcc_gcc_pcie_hot_rst) + #define GCC_GCC_PCIE_HOT_RST_VAL 0x10 #define PCIE_PCIE_INT_ALL_CLEAR 0x1e08228 diff --git a/drivers/net/wireless/ath/ath12k/wmi.c b/drivers/net/wireless/ath/ath12k/wmi.c index 60e2444fe08c..465f877fc0fb 100644 --- a/drivers/net/wireless/ath/ath12k/wmi.c +++ b/drivers/net/wireless/ath/ath12k/wmi.c @@ -91,6 +91,11 @@ struct ath12k_wmi_svc_rdy_ext2_parse { bool dma_ring_cap_done; bool spectral_bin_scaling_done; bool mac_phy_caps_ext_done; + bool hal_reg_caps_ext2_done; + bool scan_radio_caps_ext2_done; + bool twt_caps_done; + bool htt_msdu_idx_to_qtype_map_done; + bool dbs_or_sbs_cap_ext_done; }; struct ath12k_wmi_rdy_parse { @@ -4395,6 +4400,7 @@ static int ath12k_wmi_hw_mode_caps_parse(struct ath12k_base *soc, static int ath12k_wmi_hw_mode_caps(struct ath12k_base *soc, u16 len, const void *ptr, void *data) { + struct ath12k_svc_ext_info *svc_ext_info = &soc->wmi_ab.svc_ext_info; struct ath12k_wmi_svc_rdy_ext_parse *svc_rdy_ext = data; const struct ath12k_wmi_hw_mode_cap_params *hw_mode_caps; enum wmi_host_hw_mode_config_type mode, pref; @@ -4427,8 +4433,11 @@ static int ath12k_wmi_hw_mode_caps(struct ath12k_base *soc, } } - ath12k_dbg(soc, ATH12K_DBG_WMI, "preferred_hw_mode:%d\n", - soc->wmi_ab.preferred_hw_mode); + svc_ext_info->num_hw_modes = svc_rdy_ext->n_hw_mode_caps; + + ath12k_dbg(soc, ATH12K_DBG_WMI, "num hw modes %u preferred_hw_mode %d\n", + svc_ext_info->num_hw_modes, soc->wmi_ab.preferred_hw_mode); + if (soc->wmi_ab.preferred_hw_mode == WMI_HOST_HW_MODE_MAX) return -EINVAL; @@ -4658,6 +4667,65 @@ free_dir_buff: return ret; } +static void +ath12k_wmi_save_mac_phy_info(struct ath12k_base *ab, + const struct ath12k_wmi_mac_phy_caps_params *mac_phy_cap, + struct ath12k_svc_ext_mac_phy_info *mac_phy_info) +{ + mac_phy_info->phy_id = __le32_to_cpu(mac_phy_cap->phy_id); + mac_phy_info->supported_bands = __le32_to_cpu(mac_phy_cap->supported_bands); + mac_phy_info->hw_freq_range.low_2ghz_freq = + __le32_to_cpu(mac_phy_cap->low_2ghz_chan_freq); + mac_phy_info->hw_freq_range.high_2ghz_freq = + __le32_to_cpu(mac_phy_cap->high_2ghz_chan_freq); + mac_phy_info->hw_freq_range.low_5ghz_freq = + __le32_to_cpu(mac_phy_cap->low_5ghz_chan_freq); + mac_phy_info->hw_freq_range.high_5ghz_freq = + __le32_to_cpu(mac_phy_cap->high_5ghz_chan_freq); +} + +static void +ath12k_wmi_save_all_mac_phy_info(struct ath12k_base *ab, + struct ath12k_wmi_svc_rdy_ext_parse *svc_rdy_ext) +{ + struct ath12k_svc_ext_info *svc_ext_info = &ab->wmi_ab.svc_ext_info; + const struct ath12k_wmi_mac_phy_caps_params *mac_phy_cap; + const struct ath12k_wmi_hw_mode_cap_params *hw_mode_cap; + struct ath12k_svc_ext_mac_phy_info *mac_phy_info; + u32 hw_mode_id, phy_bit_map; + u8 hw_idx; + + mac_phy_info = &svc_ext_info->mac_phy_info[0]; + mac_phy_cap = svc_rdy_ext->mac_phy_caps; + + for (hw_idx = 0; hw_idx < svc_ext_info->num_hw_modes; hw_idx++) { + hw_mode_cap = &svc_rdy_ext->hw_mode_caps[hw_idx]; + hw_mode_id = __le32_to_cpu(hw_mode_cap->hw_mode_id); + phy_bit_map = __le32_to_cpu(hw_mode_cap->phy_id_map); + + while (phy_bit_map) { + ath12k_wmi_save_mac_phy_info(ab, mac_phy_cap, mac_phy_info); + mac_phy_info->hw_mode_config_type = + le32_get_bits(hw_mode_cap->hw_mode_config_type, + WMI_HW_MODE_CAP_CFG_TYPE); + ath12k_dbg(ab, ATH12K_DBG_WMI, + "hw_idx %u hw_mode_id %u hw_mode_config_type %u supported_bands %u phy_id %u 2 GHz [%u - %u] 5 GHz [%u - %u]\n", + hw_idx, hw_mode_id, + mac_phy_info->hw_mode_config_type, + mac_phy_info->supported_bands, mac_phy_info->phy_id, + mac_phy_info->hw_freq_range.low_2ghz_freq, + mac_phy_info->hw_freq_range.high_2ghz_freq, + mac_phy_info->hw_freq_range.low_5ghz_freq, + mac_phy_info->hw_freq_range.high_5ghz_freq); + + mac_phy_cap++; + mac_phy_info++; + + phy_bit_map >>= 1; + } + } +} + static int ath12k_wmi_svc_rdy_ext_parse(struct ath12k_base *ab, u16 tag, u16 len, const void *ptr, void *data) @@ -4706,6 +4774,8 @@ static int ath12k_wmi_svc_rdy_ext_parse(struct ath12k_base *ab, return ret; } + ath12k_wmi_save_all_mac_phy_info(ab, svc_rdy_ext); + svc_rdy_ext->mac_phy_done = true; } else if (!svc_rdy_ext->ext_hal_reg_done) { ret = ath12k_wmi_ext_hal_reg_caps(ab, len, ptr, svc_rdy_ext); @@ -4922,10 +4992,449 @@ static int ath12k_wmi_tlv_mac_phy_caps_ext(struct ath12k_base *ab, u16 tag, return 0; } +static void +ath12k_wmi_update_freq_info(struct ath12k_base *ab, + struct ath12k_svc_ext_mac_phy_info *mac_cap, + enum ath12k_hw_mode mode, + u32 phy_id) +{ + struct ath12k_hw_mode_info *hw_mode_info = &ab->wmi_ab.hw_mode_info; + struct ath12k_hw_mode_freq_range_arg *mac_range; + + mac_range = &hw_mode_info->freq_range_caps[mode][phy_id]; + + if (mac_cap->supported_bands & WMI_HOST_WLAN_2GHZ_CAP) { + mac_range->low_2ghz_freq = max_t(u32, + mac_cap->hw_freq_range.low_2ghz_freq, + ATH12K_MIN_2GHZ_FREQ); + mac_range->high_2ghz_freq = mac_cap->hw_freq_range.high_2ghz_freq ? + min_t(u32, + mac_cap->hw_freq_range.high_2ghz_freq, + ATH12K_MAX_2GHZ_FREQ) : + ATH12K_MAX_2GHZ_FREQ; + } + + if (mac_cap->supported_bands & WMI_HOST_WLAN_5GHZ_CAP) { + mac_range->low_5ghz_freq = max_t(u32, + mac_cap->hw_freq_range.low_5ghz_freq, + ATH12K_MIN_5GHZ_FREQ); + mac_range->high_5ghz_freq = mac_cap->hw_freq_range.high_5ghz_freq ? + min_t(u32, + mac_cap->hw_freq_range.high_5ghz_freq, + ATH12K_MAX_6GHZ_FREQ) : + ATH12K_MAX_6GHZ_FREQ; + } +} + +static bool +ath12k_wmi_all_phy_range_updated(struct ath12k_base *ab, + enum ath12k_hw_mode hwmode) +{ + struct ath12k_hw_mode_info *hw_mode_info = &ab->wmi_ab.hw_mode_info; + struct ath12k_hw_mode_freq_range_arg *mac_range; + u8 phy_id; + + for (phy_id = 0; phy_id < MAX_RADIOS; phy_id++) { + mac_range = &hw_mode_info->freq_range_caps[hwmode][phy_id]; + /* modify SBS/DBS range only when both phy for DBS are filled */ + if (!mac_range->low_2ghz_freq && !mac_range->low_5ghz_freq) + return false; + } + + return true; +} + +static void ath12k_wmi_update_dbs_freq_info(struct ath12k_base *ab) +{ + struct ath12k_hw_mode_info *hw_mode_info = &ab->wmi_ab.hw_mode_info; + struct ath12k_hw_mode_freq_range_arg *mac_range; + u8 phy_id; + + mac_range = hw_mode_info->freq_range_caps[ATH12K_HW_MODE_DBS]; + /* Reset 5 GHz range for shared mac for DBS */ + for (phy_id = 0; phy_id < MAX_RADIOS; phy_id++) { + if (mac_range[phy_id].low_2ghz_freq && + mac_range[phy_id].low_5ghz_freq) { + mac_range[phy_id].low_5ghz_freq = 0; + mac_range[phy_id].high_5ghz_freq = 0; + } + } +} + +static u32 +ath12k_wmi_get_highest_5ghz_freq_from_range(struct ath12k_hw_mode_freq_range_arg *range) +{ + u32 highest_freq = 0; + u8 phy_id; + + for (phy_id = 0; phy_id < MAX_RADIOS; phy_id++) { + if (range[phy_id].high_5ghz_freq > highest_freq) + highest_freq = range[phy_id].high_5ghz_freq; + } + + return highest_freq ? highest_freq : ATH12K_MAX_6GHZ_FREQ; +} + +static u32 +ath12k_wmi_get_lowest_5ghz_freq_from_range(struct ath12k_hw_mode_freq_range_arg *range) +{ + u32 lowest_freq = 0; + u8 phy_id; + + for (phy_id = 0; phy_id < MAX_RADIOS; phy_id++) { + if ((!lowest_freq && range[phy_id].low_5ghz_freq) || + range[phy_id].low_5ghz_freq < lowest_freq) + lowest_freq = range[phy_id].low_5ghz_freq; + } + + return lowest_freq ? lowest_freq : ATH12K_MIN_5GHZ_FREQ; +} + +static void +ath12k_wmi_fill_upper_share_sbs_freq(struct ath12k_base *ab, + u16 sbs_range_sep, + struct ath12k_hw_mode_freq_range_arg *ref_freq) +{ + struct ath12k_hw_mode_info *hw_mode_info = &ab->wmi_ab.hw_mode_info; + struct ath12k_hw_mode_freq_range_arg *upper_sbs_freq_range; + u8 phy_id; + + upper_sbs_freq_range = + hw_mode_info->freq_range_caps[ATH12K_HW_MODE_SBS_UPPER_SHARE]; + + for (phy_id = 0; phy_id < MAX_RADIOS; phy_id++) { + upper_sbs_freq_range[phy_id].low_2ghz_freq = + ref_freq[phy_id].low_2ghz_freq; + upper_sbs_freq_range[phy_id].high_2ghz_freq = + ref_freq[phy_id].high_2ghz_freq; + + /* update for shared mac */ + if (upper_sbs_freq_range[phy_id].low_2ghz_freq) { + upper_sbs_freq_range[phy_id].low_5ghz_freq = sbs_range_sep + 10; + upper_sbs_freq_range[phy_id].high_5ghz_freq = + ath12k_wmi_get_highest_5ghz_freq_from_range(ref_freq); + } else { + upper_sbs_freq_range[phy_id].low_5ghz_freq = + ath12k_wmi_get_lowest_5ghz_freq_from_range(ref_freq); + upper_sbs_freq_range[phy_id].high_5ghz_freq = sbs_range_sep; + } + } +} + +static void +ath12k_wmi_fill_lower_share_sbs_freq(struct ath12k_base *ab, + u16 sbs_range_sep, + struct ath12k_hw_mode_freq_range_arg *ref_freq) +{ + struct ath12k_hw_mode_info *hw_mode_info = &ab->wmi_ab.hw_mode_info; + struct ath12k_hw_mode_freq_range_arg *lower_sbs_freq_range; + u8 phy_id; + + lower_sbs_freq_range = + hw_mode_info->freq_range_caps[ATH12K_HW_MODE_SBS_LOWER_SHARE]; + + for (phy_id = 0; phy_id < MAX_RADIOS; phy_id++) { + lower_sbs_freq_range[phy_id].low_2ghz_freq = + ref_freq[phy_id].low_2ghz_freq; + lower_sbs_freq_range[phy_id].high_2ghz_freq = + ref_freq[phy_id].high_2ghz_freq; + + /* update for shared mac */ + if (lower_sbs_freq_range[phy_id].low_2ghz_freq) { + lower_sbs_freq_range[phy_id].low_5ghz_freq = + ath12k_wmi_get_lowest_5ghz_freq_from_range(ref_freq); + lower_sbs_freq_range[phy_id].high_5ghz_freq = sbs_range_sep; + } else { + lower_sbs_freq_range[phy_id].low_5ghz_freq = sbs_range_sep + 10; + lower_sbs_freq_range[phy_id].high_5ghz_freq = + ath12k_wmi_get_highest_5ghz_freq_from_range(ref_freq); + } + } +} + +static const char *ath12k_wmi_hw_mode_to_str(enum ath12k_hw_mode hw_mode) +{ + static const char * const mode_str[] = { + [ATH12K_HW_MODE_SMM] = "SMM", + [ATH12K_HW_MODE_DBS] = "DBS", + [ATH12K_HW_MODE_SBS] = "SBS", + [ATH12K_HW_MODE_SBS_UPPER_SHARE] = "SBS_UPPER_SHARE", + [ATH12K_HW_MODE_SBS_LOWER_SHARE] = "SBS_LOWER_SHARE", + }; + + if (hw_mode >= ARRAY_SIZE(mode_str)) + return "Unknown"; + + return mode_str[hw_mode]; +} + +static void +ath12k_wmi_dump_freq_range_per_mac(struct ath12k_base *ab, + struct ath12k_hw_mode_freq_range_arg *freq_range, + enum ath12k_hw_mode hw_mode) +{ + u8 i; + + for (i = 0; i < MAX_RADIOS; i++) + if (freq_range[i].low_2ghz_freq || freq_range[i].low_5ghz_freq) + ath12k_dbg(ab, ATH12K_DBG_WMI, + "frequency range: %s(%d) mac %d 2 GHz [%d - %d] 5 GHz [%d - %d]", + ath12k_wmi_hw_mode_to_str(hw_mode), + hw_mode, i, + freq_range[i].low_2ghz_freq, + freq_range[i].high_2ghz_freq, + freq_range[i].low_5ghz_freq, + freq_range[i].high_5ghz_freq); +} + +static void ath12k_wmi_dump_freq_range(struct ath12k_base *ab) +{ + struct ath12k_hw_mode_freq_range_arg *freq_range; + u8 i; + + for (i = ATH12K_HW_MODE_SMM; i < ATH12K_HW_MODE_MAX; i++) { + freq_range = ab->wmi_ab.hw_mode_info.freq_range_caps[i]; + ath12k_wmi_dump_freq_range_per_mac(ab, freq_range, i); + } +} + +static int ath12k_wmi_modify_sbs_freq(struct ath12k_base *ab, u8 phy_id) +{ + struct ath12k_hw_mode_info *hw_mode_info = &ab->wmi_ab.hw_mode_info; + struct ath12k_hw_mode_freq_range_arg *sbs_mac_range, *shared_mac_range; + struct ath12k_hw_mode_freq_range_arg *non_shared_range; + u8 shared_phy_id; + + sbs_mac_range = &hw_mode_info->freq_range_caps[ATH12K_HW_MODE_SBS][phy_id]; + + /* if SBS mac range has both 2.4 and 5 GHz ranges, i.e. shared phy_id + * keep the range as it is in SBS + */ + if (sbs_mac_range->low_2ghz_freq && sbs_mac_range->low_5ghz_freq) + return 0; + + if (sbs_mac_range->low_2ghz_freq && !sbs_mac_range->low_5ghz_freq) { + ath12k_err(ab, "Invalid DBS/SBS mode with only 2.4Ghz"); + ath12k_wmi_dump_freq_range_per_mac(ab, sbs_mac_range, ATH12K_HW_MODE_SBS); + return -EINVAL; + } + + non_shared_range = sbs_mac_range; + /* if SBS mac range has only 5 GHz then it's the non-shared phy, so + * modify the range as per the shared mac. + */ + shared_phy_id = phy_id ? 0 : 1; + shared_mac_range = + &hw_mode_info->freq_range_caps[ATH12K_HW_MODE_SBS][shared_phy_id]; + + if (shared_mac_range->low_5ghz_freq > non_shared_range->low_5ghz_freq) { + ath12k_dbg(ab, ATH12K_DBG_WMI, "high 5 GHz shared"); + /* If the shared mac lower 5 GHz frequency is greater than + * non-shared mac lower 5 GHz frequency then the shared mac has + * high 5 GHz shared with 2.4 GHz. So non-shared mac's 5 GHz high + * freq should be less than the shared mac's low 5 GHz freq. + */ + if (non_shared_range->high_5ghz_freq >= + shared_mac_range->low_5ghz_freq) + non_shared_range->high_5ghz_freq = + max_t(u32, shared_mac_range->low_5ghz_freq - 10, + non_shared_range->low_5ghz_freq); + } else if (shared_mac_range->high_5ghz_freq < + non_shared_range->high_5ghz_freq) { + ath12k_dbg(ab, ATH12K_DBG_WMI, "low 5 GHz shared"); + /* If the shared mac high 5 GHz frequency is less than + * non-shared mac high 5 GHz frequency then the shared mac has + * low 5 GHz shared with 2.4 GHz. So non-shared mac's 5 GHz low + * freq should be greater than the shared mac's high 5 GHz freq. + */ + if (shared_mac_range->high_5ghz_freq >= + non_shared_range->low_5ghz_freq) + non_shared_range->low_5ghz_freq = + min_t(u32, shared_mac_range->high_5ghz_freq + 10, + non_shared_range->high_5ghz_freq); + } else { + ath12k_warn(ab, "invalid SBS range with all 5 GHz shared"); + return -EINVAL; + } + + return 0; +} + +static void ath12k_wmi_update_sbs_freq_info(struct ath12k_base *ab) +{ + struct ath12k_hw_mode_info *hw_mode_info = &ab->wmi_ab.hw_mode_info; + struct ath12k_hw_mode_freq_range_arg *mac_range; + u16 sbs_range_sep; + u8 phy_id; + int ret; + + mac_range = hw_mode_info->freq_range_caps[ATH12K_HW_MODE_SBS]; + + /* If sbs_lower_band_end_freq has a value, then the frequency range + * will be split using that value. + */ + sbs_range_sep = ab->wmi_ab.sbs_lower_band_end_freq; + if (sbs_range_sep) { + ath12k_wmi_fill_upper_share_sbs_freq(ab, sbs_range_sep, + mac_range); + ath12k_wmi_fill_lower_share_sbs_freq(ab, sbs_range_sep, + mac_range); + /* Hardware specifies the range boundary with sbs_range_sep, + * (i.e. the boundary between 5 GHz high and 5 GHz low), + * reset the original one to make sure it will not get used. + */ + memset(mac_range, 0, sizeof(*mac_range) * MAX_RADIOS); + return; + } + + /* If sbs_lower_band_end_freq is not set that means firmware will send one + * shared mac range and one non-shared mac range. so update that freq. + */ + for (phy_id = 0; phy_id < MAX_RADIOS; phy_id++) { + ret = ath12k_wmi_modify_sbs_freq(ab, phy_id); + if (ret) { + memset(mac_range, 0, sizeof(*mac_range) * MAX_RADIOS); + break; + } + } +} + +static void +ath12k_wmi_update_mac_freq_info(struct ath12k_base *ab, + enum wmi_host_hw_mode_config_type hw_config_type, + u32 phy_id, + struct ath12k_svc_ext_mac_phy_info *mac_cap) +{ + if (phy_id >= MAX_RADIOS) { + ath12k_err(ab, "mac more than two not supported: %d", phy_id); + return; + } + + ath12k_dbg(ab, ATH12K_DBG_WMI, + "hw_mode_cfg %d mac %d band 0x%x SBS cutoff freq %d 2 GHz [%d - %d] 5 GHz [%d - %d]", + hw_config_type, phy_id, mac_cap->supported_bands, + ab->wmi_ab.sbs_lower_band_end_freq, + mac_cap->hw_freq_range.low_2ghz_freq, + mac_cap->hw_freq_range.high_2ghz_freq, + mac_cap->hw_freq_range.low_5ghz_freq, + mac_cap->hw_freq_range.high_5ghz_freq); + + switch (hw_config_type) { + case WMI_HOST_HW_MODE_SINGLE: + if (phy_id) { + ath12k_dbg(ab, ATH12K_DBG_WMI, "mac phy 1 is not supported"); + break; + } + ath12k_wmi_update_freq_info(ab, mac_cap, ATH12K_HW_MODE_SMM, phy_id); + break; + + case WMI_HOST_HW_MODE_DBS: + if (!ath12k_wmi_all_phy_range_updated(ab, ATH12K_HW_MODE_DBS)) + ath12k_wmi_update_freq_info(ab, mac_cap, + ATH12K_HW_MODE_DBS, phy_id); + break; + case WMI_HOST_HW_MODE_DBS_SBS: + case WMI_HOST_HW_MODE_DBS_OR_SBS: + ath12k_wmi_update_freq_info(ab, mac_cap, ATH12K_HW_MODE_DBS, phy_id); + if (ab->wmi_ab.sbs_lower_band_end_freq || + mac_cap->hw_freq_range.low_5ghz_freq || + mac_cap->hw_freq_range.low_2ghz_freq) + ath12k_wmi_update_freq_info(ab, mac_cap, ATH12K_HW_MODE_SBS, + phy_id); + + if (ath12k_wmi_all_phy_range_updated(ab, ATH12K_HW_MODE_DBS)) + ath12k_wmi_update_dbs_freq_info(ab); + if (ath12k_wmi_all_phy_range_updated(ab, ATH12K_HW_MODE_SBS)) + ath12k_wmi_update_sbs_freq_info(ab); + break; + case WMI_HOST_HW_MODE_SBS: + case WMI_HOST_HW_MODE_SBS_PASSIVE: + ath12k_wmi_update_freq_info(ab, mac_cap, ATH12K_HW_MODE_SBS, phy_id); + if (ath12k_wmi_all_phy_range_updated(ab, ATH12K_HW_MODE_SBS)) + ath12k_wmi_update_sbs_freq_info(ab); + + break; + default: + break; + } +} + +static bool ath12k_wmi_sbs_range_present(struct ath12k_base *ab) +{ + if (ath12k_wmi_all_phy_range_updated(ab, ATH12K_HW_MODE_SBS) || + (ab->wmi_ab.sbs_lower_band_end_freq && + ath12k_wmi_all_phy_range_updated(ab, ATH12K_HW_MODE_SBS_LOWER_SHARE) && + ath12k_wmi_all_phy_range_updated(ab, ATH12K_HW_MODE_SBS_UPPER_SHARE))) + return true; + + return false; +} + +static int ath12k_wmi_update_hw_mode_list(struct ath12k_base *ab) +{ + struct ath12k_svc_ext_info *svc_ext_info = &ab->wmi_ab.svc_ext_info; + struct ath12k_hw_mode_info *info = &ab->wmi_ab.hw_mode_info; + enum wmi_host_hw_mode_config_type hw_config_type; + struct ath12k_svc_ext_mac_phy_info *tmp; + bool dbs_mode = false, sbs_mode = false; + u32 i, j = 0; + + if (!svc_ext_info->num_hw_modes) { + ath12k_err(ab, "invalid number of hw modes"); + return -EINVAL; + } + + ath12k_dbg(ab, ATH12K_DBG_WMI, "updated HW mode list: num modes %d", + svc_ext_info->num_hw_modes); + + memset(info->freq_range_caps, 0, sizeof(info->freq_range_caps)); + + for (i = 0; i < svc_ext_info->num_hw_modes; i++) { + if (j >= ATH12K_MAX_MAC_PHY_CAP) + return -EINVAL; + + /* Update for MAC0 */ + tmp = &svc_ext_info->mac_phy_info[j++]; + hw_config_type = tmp->hw_mode_config_type; + ath12k_wmi_update_mac_freq_info(ab, hw_config_type, tmp->phy_id, tmp); + + /* SBS and DBS have dual MAC. Up to 2 MACs are considered. */ + if (hw_config_type == WMI_HOST_HW_MODE_DBS || + hw_config_type == WMI_HOST_HW_MODE_SBS_PASSIVE || + hw_config_type == WMI_HOST_HW_MODE_SBS || + hw_config_type == WMI_HOST_HW_MODE_DBS_OR_SBS) { + if (j >= ATH12K_MAX_MAC_PHY_CAP) + return -EINVAL; + /* Update for MAC1 */ + tmp = &svc_ext_info->mac_phy_info[j++]; + ath12k_wmi_update_mac_freq_info(ab, hw_config_type, + tmp->phy_id, tmp); + + if (hw_config_type == WMI_HOST_HW_MODE_DBS || + hw_config_type == WMI_HOST_HW_MODE_DBS_OR_SBS) + dbs_mode = true; + + if (ath12k_wmi_sbs_range_present(ab) && + (hw_config_type == WMI_HOST_HW_MODE_SBS_PASSIVE || + hw_config_type == WMI_HOST_HW_MODE_SBS || + hw_config_type == WMI_HOST_HW_MODE_DBS_OR_SBS)) + sbs_mode = true; + } + } + + info->support_dbs = dbs_mode; + info->support_sbs = sbs_mode; + + ath12k_wmi_dump_freq_range(ab); + + return 0; +} + static int ath12k_wmi_svc_rdy_ext2_parse(struct ath12k_base *ab, u16 tag, u16 len, const void *ptr, void *data) { + const struct ath12k_wmi_dbs_or_sbs_cap_params *dbs_or_sbs_caps; struct ath12k_wmi_pdev *wmi_handle = &ab->wmi_ab.wmi[0]; struct ath12k_wmi_svc_rdy_ext2_parse *parse = data; int ret; @@ -4967,7 +5476,32 @@ static int ath12k_wmi_svc_rdy_ext2_parse(struct ath12k_base *ab, } parse->mac_phy_caps_ext_done = true; + } else if (!parse->hal_reg_caps_ext2_done) { + parse->hal_reg_caps_ext2_done = true; + } else if (!parse->scan_radio_caps_ext2_done) { + parse->scan_radio_caps_ext2_done = true; + } else if (!parse->twt_caps_done) { + parse->twt_caps_done = true; + } else if (!parse->htt_msdu_idx_to_qtype_map_done) { + parse->htt_msdu_idx_to_qtype_map_done = true; + } else if (!parse->dbs_or_sbs_cap_ext_done) { + dbs_or_sbs_caps = ptr; + ab->wmi_ab.sbs_lower_band_end_freq = + __le32_to_cpu(dbs_or_sbs_caps->sbs_lower_band_end_freq); + + ath12k_dbg(ab, ATH12K_DBG_WMI, "sbs_lower_band_end_freq %u\n", + ab->wmi_ab.sbs_lower_band_end_freq); + + ret = ath12k_wmi_update_hw_mode_list(ab); + if (ret) { + ath12k_warn(ab, "failed to update hw mode list: %d\n", + ret); + return ret; + } + + parse->dbs_or_sbs_cap_ext_done = true; } + break; default: break; @@ -7626,6 +8160,64 @@ static int ath12k_wmi_pull_fw_stats(struct ath12k_base *ab, struct sk_buff *skb, &parse); } +static void ath12k_wmi_fw_stats_process(struct ath12k *ar, + struct ath12k_fw_stats *stats) +{ + struct ath12k_base *ab = ar->ab; + struct ath12k_pdev *pdev; + bool is_end = true; + size_t total_vdevs_started = 0; + int i; + + if (stats->stats_id == WMI_REQUEST_VDEV_STAT) { + if (list_empty(&stats->vdevs)) { + ath12k_warn(ab, "empty vdev stats"); + return; + } + /* FW sends all the active VDEV stats irrespective of PDEV, + * hence limit until the count of all VDEVs started + */ + rcu_read_lock(); + for (i = 0; i < ab->num_radios; i++) { + pdev = rcu_dereference(ab->pdevs_active[i]); + if (pdev && pdev->ar) + total_vdevs_started += pdev->ar->num_started_vdevs; + } + rcu_read_unlock(); + + if (total_vdevs_started) + is_end = ((++ar->fw_stats.num_vdev_recvd) == + total_vdevs_started); + + list_splice_tail_init(&stats->vdevs, + &ar->fw_stats.vdevs); + + if (is_end) + complete(&ar->fw_stats_done); + + return; + } + + if (stats->stats_id == WMI_REQUEST_BCN_STAT) { + if (list_empty(&stats->bcn)) { + ath12k_warn(ab, "empty beacon stats"); + return; + } + /* Mark end until we reached the count of all started VDEVs + * within the PDEV + */ + if (ar->num_started_vdevs) + is_end = ((++ar->fw_stats.num_bcn_recvd) == + ar->num_started_vdevs); + + list_splice_tail_init(&stats->bcn, + &ar->fw_stats.bcn); + + if (is_end) + complete(&ar->fw_stats_done); + } +} + static void ath12k_update_stats_event(struct ath12k_base *ab, struct sk_buff *skb) { struct ath12k_fw_stats stats = {}; @@ -7655,19 +8247,15 @@ static void ath12k_update_stats_event(struct ath12k_base *ab, struct sk_buff *sk spin_lock_bh(&ar->data_lock); - /* WMI_REQUEST_PDEV_STAT can be requested via .get_txpower mac ops or via - * debugfs fw stats. Therefore, processing it separately. - */ + /* Handle WMI_REQUEST_PDEV_STAT status update */ if (stats.stats_id == WMI_REQUEST_PDEV_STAT) { list_splice_tail_init(&stats.pdevs, &ar->fw_stats.pdevs); - ar->fw_stats.fw_stats_done = true; + complete(&ar->fw_stats_done); goto complete; } - /* WMI_REQUEST_VDEV_STAT and WMI_REQUEST_BCN_STAT are currently requested only - * via debugfs fw stats. Hence, processing these in debugfs context. - */ - ath12k_debugfs_fw_stats_process(ar, &stats); + /* Handle WMI_REQUEST_VDEV_STAT and WMI_REQUEST_BCN_STAT updates. */ + ath12k_wmi_fw_stats_process(ar, &stats); complete: complete(&ar->fw_stats_complete); @@ -9911,3 +10499,224 @@ int ath12k_wmi_send_vdev_set_tpc_power(struct ath12k *ar, return 0; } + +static int +ath12k_wmi_fill_disallowed_bmap(struct ath12k_base *ab, + struct wmi_disallowed_mlo_mode_bitmap_params *dislw_bmap, + struct wmi_mlo_link_set_active_arg *arg) +{ + struct wmi_ml_disallow_mode_bmap_arg *dislw_bmap_arg; + u8 i; + + if (arg->num_disallow_mode_comb > + ARRAY_SIZE(arg->disallow_bmap)) { + ath12k_warn(ab, "invalid num_disallow_mode_comb: %d", + arg->num_disallow_mode_comb); + return -EINVAL; + } + + dislw_bmap_arg = &arg->disallow_bmap[0]; + for (i = 0; i < arg->num_disallow_mode_comb; i++) { + dislw_bmap->tlv_header = + ath12k_wmi_tlv_cmd_hdr(0, sizeof(*dislw_bmap)); + dislw_bmap->disallowed_mode_bitmap = + cpu_to_le32(dislw_bmap_arg->disallowed_mode); + dislw_bmap->ieee_link_id_comb = + le32_encode_bits(dislw_bmap_arg->ieee_link_id[0], + WMI_DISALW_MLO_MODE_BMAP_IEEE_LINK_ID_COMB_1) | + le32_encode_bits(dislw_bmap_arg->ieee_link_id[1], + WMI_DISALW_MLO_MODE_BMAP_IEEE_LINK_ID_COMB_2) | + le32_encode_bits(dislw_bmap_arg->ieee_link_id[2], + WMI_DISALW_MLO_MODE_BMAP_IEEE_LINK_ID_COMB_3) | + le32_encode_bits(dislw_bmap_arg->ieee_link_id[3], + WMI_DISALW_MLO_MODE_BMAP_IEEE_LINK_ID_COMB_4); + + ath12k_dbg(ab, ATH12K_DBG_WMI, + "entry %d disallowed_mode %d ieee_link_id_comb 0x%x", + i, dislw_bmap_arg->disallowed_mode, + dislw_bmap_arg->ieee_link_id_comb); + dislw_bmap++; + dislw_bmap_arg++; + } + + return 0; +} + +int ath12k_wmi_send_mlo_link_set_active_cmd(struct ath12k_base *ab, + struct wmi_mlo_link_set_active_arg *arg) +{ + struct wmi_disallowed_mlo_mode_bitmap_params *disallowed_mode_bmap; + struct wmi_mlo_set_active_link_number_params *link_num_param; + u32 num_link_num_param = 0, num_vdev_bitmap = 0; + struct ath12k_wmi_base *wmi_ab = &ab->wmi_ab; + struct wmi_mlo_link_set_active_cmd *cmd; + u32 num_inactive_vdev_bitmap = 0; + u32 num_disallow_mode_comb = 0; + struct wmi_tlv *tlv; + struct sk_buff *skb; + __le32 *vdev_bitmap; + void *buf_ptr; + int i, ret; + u32 len; + + if (!arg->num_vdev_bitmap && !arg->num_link_entry) { + ath12k_warn(ab, "Invalid num_vdev_bitmap and num_link_entry"); + return -EINVAL; + } + + switch (arg->force_mode) { + case WMI_MLO_LINK_FORCE_MODE_ACTIVE_LINK_NUM: + case WMI_MLO_LINK_FORCE_MODE_INACTIVE_LINK_NUM: + num_link_num_param = arg->num_link_entry; + fallthrough; + case WMI_MLO_LINK_FORCE_MODE_ACTIVE: + case WMI_MLO_LINK_FORCE_MODE_INACTIVE: + case WMI_MLO_LINK_FORCE_MODE_NO_FORCE: + num_vdev_bitmap = arg->num_vdev_bitmap; + break; + case WMI_MLO_LINK_FORCE_MODE_ACTIVE_INACTIVE: + num_vdev_bitmap = arg->num_vdev_bitmap; + num_inactive_vdev_bitmap = arg->num_inactive_vdev_bitmap; + break; + default: + ath12k_warn(ab, "Invalid force mode: %u", arg->force_mode); + return -EINVAL; + } + + num_disallow_mode_comb = arg->num_disallow_mode_comb; + len = sizeof(*cmd) + + TLV_HDR_SIZE + sizeof(*link_num_param) * num_link_num_param + + TLV_HDR_SIZE + sizeof(*vdev_bitmap) * num_vdev_bitmap + + TLV_HDR_SIZE + TLV_HDR_SIZE + TLV_HDR_SIZE + + TLV_HDR_SIZE + sizeof(*disallowed_mode_bmap) * num_disallow_mode_comb; + if (arg->force_mode == WMI_MLO_LINK_FORCE_MODE_ACTIVE_INACTIVE) + len += sizeof(*vdev_bitmap) * num_inactive_vdev_bitmap; + + skb = ath12k_wmi_alloc_skb(wmi_ab, len); + if (!skb) + return -ENOMEM; + + cmd = (struct wmi_mlo_link_set_active_cmd *)skb->data; + cmd->tlv_header = ath12k_wmi_tlv_cmd_hdr(WMI_TAG_MLO_LINK_SET_ACTIVE_CMD, + sizeof(*cmd)); + cmd->force_mode = cpu_to_le32(arg->force_mode); + cmd->reason = cpu_to_le32(arg->reason); + ath12k_dbg(ab, ATH12K_DBG_WMI, + "mode %d reason %d num_link_num_param %d num_vdev_bitmap %d inactive %d num_disallow_mode_comb %d", + arg->force_mode, arg->reason, num_link_num_param, + num_vdev_bitmap, num_inactive_vdev_bitmap, + num_disallow_mode_comb); + + buf_ptr = skb->data + sizeof(*cmd); + tlv = buf_ptr; + tlv->header = ath12k_wmi_tlv_hdr(WMI_TAG_ARRAY_STRUCT, + sizeof(*link_num_param) * num_link_num_param); + buf_ptr += TLV_HDR_SIZE; + + if (num_link_num_param) { + cmd->ctrl_flags = + le32_encode_bits(arg->ctrl_flags.dync_force_link_num ? 1 : 0, + CRTL_F_DYNC_FORCE_LINK_NUM); + + link_num_param = buf_ptr; + for (i = 0; i < num_link_num_param; i++) { + link_num_param->tlv_header = + ath12k_wmi_tlv_cmd_hdr(0, sizeof(*link_num_param)); + link_num_param->num_of_link = + cpu_to_le32(arg->link_num[i].num_of_link); + link_num_param->vdev_type = + cpu_to_le32(arg->link_num[i].vdev_type); + link_num_param->vdev_subtype = + cpu_to_le32(arg->link_num[i].vdev_subtype); + link_num_param->home_freq = + cpu_to_le32(arg->link_num[i].home_freq); + ath12k_dbg(ab, ATH12K_DBG_WMI, + "entry %d num_of_link %d vdev type %d subtype %d freq %d control_flags %d", + i, arg->link_num[i].num_of_link, + arg->link_num[i].vdev_type, + arg->link_num[i].vdev_subtype, + arg->link_num[i].home_freq, + __le32_to_cpu(cmd->ctrl_flags)); + link_num_param++; + } + + buf_ptr += sizeof(*link_num_param) * num_link_num_param; + } + + tlv = buf_ptr; + tlv->header = ath12k_wmi_tlv_hdr(WMI_TAG_ARRAY_UINT32, + sizeof(*vdev_bitmap) * num_vdev_bitmap); + buf_ptr += TLV_HDR_SIZE; + + if (num_vdev_bitmap) { + vdev_bitmap = buf_ptr; + for (i = 0; i < num_vdev_bitmap; i++) { + vdev_bitmap[i] = cpu_to_le32(arg->vdev_bitmap[i]); + ath12k_dbg(ab, ATH12K_DBG_WMI, "entry %d vdev_id_bitmap 0x%x", + i, arg->vdev_bitmap[i]); + } + + buf_ptr += sizeof(*vdev_bitmap) * num_vdev_bitmap; + } + + if (arg->force_mode == WMI_MLO_LINK_FORCE_MODE_ACTIVE_INACTIVE) { + tlv = buf_ptr; + tlv->header = ath12k_wmi_tlv_hdr(WMI_TAG_ARRAY_UINT32, + sizeof(*vdev_bitmap) * + num_inactive_vdev_bitmap); + buf_ptr += TLV_HDR_SIZE; + + if (num_inactive_vdev_bitmap) { + vdev_bitmap = buf_ptr; + for (i = 0; i < num_inactive_vdev_bitmap; i++) { + vdev_bitmap[i] = + cpu_to_le32(arg->inactive_vdev_bitmap[i]); + ath12k_dbg(ab, ATH12K_DBG_WMI, + "entry %d inactive_vdev_id_bitmap 0x%x", + i, arg->inactive_vdev_bitmap[i]); + } + + buf_ptr += sizeof(*vdev_bitmap) * num_inactive_vdev_bitmap; + } + } else { + /* add empty vdev bitmap2 tlv */ + tlv = buf_ptr; + tlv->header = ath12k_wmi_tlv_hdr(WMI_TAG_ARRAY_UINT32, 0); + buf_ptr += TLV_HDR_SIZE; + } + + /* add empty ieee_link_id_bitmap tlv */ + tlv = buf_ptr; + tlv->header = ath12k_wmi_tlv_hdr(WMI_TAG_ARRAY_UINT32, 0); + buf_ptr += TLV_HDR_SIZE; + + /* add empty ieee_link_id_bitmap2 tlv */ + tlv = buf_ptr; + tlv->header = ath12k_wmi_tlv_hdr(WMI_TAG_ARRAY_UINT32, 0); + buf_ptr += TLV_HDR_SIZE; + + tlv = buf_ptr; + tlv->header = ath12k_wmi_tlv_hdr(WMI_TAG_ARRAY_STRUCT, + sizeof(*disallowed_mode_bmap) * + arg->num_disallow_mode_comb); + buf_ptr += TLV_HDR_SIZE; + + ret = ath12k_wmi_fill_disallowed_bmap(ab, buf_ptr, arg); + if (ret) + goto free_skb; + + ret = ath12k_wmi_cmd_send(&wmi_ab->wmi[0], skb, WMI_MLO_LINK_SET_ACTIVE_CMDID); + if (ret) { + ath12k_warn(ab, + "failed to send WMI_MLO_LINK_SET_ACTIVE_CMDID: %d\n", ret); + goto free_skb; + } + + ath12k_dbg(ab, ATH12K_DBG_WMI, "WMI mlo link set active cmd"); + + return ret; + +free_skb: + dev_kfree_skb(skb); + return ret; +} diff --git a/drivers/net/wireless/ath/ath12k/wmi.h b/drivers/net/wireless/ath/ath12k/wmi.h index ac18f75e0449..c640ffa180c8 100644 --- a/drivers/net/wireless/ath/ath12k/wmi.h +++ b/drivers/net/wireless/ath/ath12k/wmi.h @@ -1974,6 +1974,7 @@ enum wmi_tlv_tag { WMI_TAG_TPC_STATS_CTL_PWR_TABLE_EVENT, WMI_TAG_VDEV_SET_TPC_POWER_CMD = 0x3B5, WMI_TAG_VDEV_CH_POWER_INFO, + WMI_TAG_MLO_LINK_SET_ACTIVE_CMD = 0x3BE, WMI_TAG_EHT_RATE_SET = 0x3C4, WMI_TAG_DCS_AWGN_INT_TYPE = 0x3C5, WMI_TAG_MLO_TX_SEND_PARAMS, @@ -2617,6 +2618,8 @@ struct ath12k_wmi_soc_mac_phy_hw_mode_caps_params { __le32 num_chainmask_tables; } __packed; +#define WMI_HW_MODE_CAP_CFG_TYPE GENMASK(27, 0) + struct ath12k_wmi_hw_mode_cap_params { __le32 tlv_header; __le32 hw_mode_id; @@ -2666,6 +2669,12 @@ struct ath12k_wmi_mac_phy_caps_params { __le32 he_cap_info_2g_ext; __le32 he_cap_info_5g_ext; __le32 he_cap_info_internal; + __le32 wireless_modes; + __le32 low_2ghz_chan_freq; + __le32 high_2ghz_chan_freq; + __le32 low_5ghz_chan_freq; + __le32 high_5ghz_chan_freq; + __le32 nss_ratio; } __packed; struct ath12k_wmi_hal_reg_caps_ext_params { @@ -2739,6 +2748,11 @@ struct wmi_service_ready_ext2_event { __le32 default_num_msduq_supported_per_tid; } __packed; +struct ath12k_wmi_dbs_or_sbs_cap_params { + __le32 hw_mode_id; + __le32 sbs_lower_band_end_freq; +} __packed; + struct ath12k_wmi_caps_ext_params { __le32 hw_mode_id; __le32 pdev_and_hw_link_ids; @@ -5049,6 +5063,53 @@ struct ath12k_wmi_pdev { u32 rx_decap_mode; }; +struct ath12k_hw_mode_freq_range_arg { + u32 low_2ghz_freq; + u32 high_2ghz_freq; + u32 low_5ghz_freq; + u32 high_5ghz_freq; +}; + +struct ath12k_svc_ext_mac_phy_info { + enum wmi_host_hw_mode_config_type hw_mode_config_type; + u32 phy_id; + u32 supported_bands; + struct ath12k_hw_mode_freq_range_arg hw_freq_range; +}; + +#define ATH12K_MAX_MAC_PHY_CAP 8 + +struct ath12k_svc_ext_info { + u32 num_hw_modes; + struct ath12k_svc_ext_mac_phy_info mac_phy_info[ATH12K_MAX_MAC_PHY_CAP]; +}; + +/** + * enum ath12k_hw_mode - enum for host mode + * @ATH12K_HW_MODE_SMM: Single mac mode + * @ATH12K_HW_MODE_DBS: DBS mode + * @ATH12K_HW_MODE_SBS: SBS mode with either high share or low share + * @ATH12K_HW_MODE_SBS_UPPER_SHARE: Higher 5 GHz shared with 2.4 GHz + * @ATH12K_HW_MODE_SBS_LOWER_SHARE: Lower 5 GHz shared with 2.4 GHz + * @ATH12K_HW_MODE_MAX: Max, used to indicate invalid mode + */ +enum ath12k_hw_mode { + ATH12K_HW_MODE_SMM, + ATH12K_HW_MODE_DBS, + ATH12K_HW_MODE_SBS, + ATH12K_HW_MODE_SBS_UPPER_SHARE, + ATH12K_HW_MODE_SBS_LOWER_SHARE, + ATH12K_HW_MODE_MAX, +}; + +struct ath12k_hw_mode_info { + bool support_dbs:1; + bool support_sbs:1; + + struct ath12k_hw_mode_freq_range_arg freq_range_caps[ATH12K_HW_MODE_MAX] + [MAX_RADIOS]; +}; + struct ath12k_wmi_base { struct ath12k_base *ab; struct ath12k_wmi_pdev wmi[MAX_RADIOS]; @@ -5066,6 +5127,10 @@ struct ath12k_wmi_base { enum wmi_host_hw_mode_config_type preferred_hw_mode; struct ath12k_wmi_target_cap_arg *targ_cap; + + struct ath12k_svc_ext_info svc_ext_info; + u32 sbs_lower_band_end_freq; + struct ath12k_hw_mode_info hw_mode_info; }; struct wmi_pdev_set_bios_interface_cmd { @@ -5997,6 +6062,118 @@ struct wmi_vdev_set_tpc_power_cmd { */ } __packed; +#define CRTL_F_DYNC_FORCE_LINK_NUM GENMASK(3, 2) + +struct wmi_mlo_link_set_active_cmd { + __le32 tlv_header; + __le32 force_mode; + __le32 reason; + __le32 use_ieee_link_id_bitmap; + struct ath12k_wmi_mac_addr_params ap_mld_mac_addr; + __le32 ctrl_flags; +} __packed; + +struct wmi_mlo_set_active_link_number_params { + __le32 tlv_header; + __le32 num_of_link; + __le32 vdev_type; + __le32 vdev_subtype; + __le32 home_freq; +} __packed; + +#define WMI_DISALW_MLO_MODE_BMAP_IEEE_LINK_ID_COMB_1 GENMASK(7, 0) +#define WMI_DISALW_MLO_MODE_BMAP_IEEE_LINK_ID_COMB_2 GENMASK(15, 8) +#define WMI_DISALW_MLO_MODE_BMAP_IEEE_LINK_ID_COMB_3 GENMASK(23, 16) +#define WMI_DISALW_MLO_MODE_BMAP_IEEE_LINK_ID_COMB_4 GENMASK(31, 24) + +struct wmi_disallowed_mlo_mode_bitmap_params { + __le32 tlv_header; + __le32 disallowed_mode_bitmap; + __le32 ieee_link_id_comb; +} __packed; + +enum wmi_mlo_link_force_mode { + WMI_MLO_LINK_FORCE_MODE_ACTIVE = 1, + WMI_MLO_LINK_FORCE_MODE_INACTIVE = 2, + WMI_MLO_LINK_FORCE_MODE_ACTIVE_LINK_NUM = 3, + WMI_MLO_LINK_FORCE_MODE_INACTIVE_LINK_NUM = 4, + WMI_MLO_LINK_FORCE_MODE_NO_FORCE = 5, + WMI_MLO_LINK_FORCE_MODE_ACTIVE_INACTIVE = 6, + WMI_MLO_LINK_FORCE_MODE_NON_FORCE_UPDATE = 7, +}; + +enum wmi_mlo_link_force_reason { + WMI_MLO_LINK_FORCE_REASON_NEW_CONNECT = 1, + WMI_MLO_LINK_FORCE_REASON_NEW_DISCONNECT = 2, + WMI_MLO_LINK_FORCE_REASON_LINK_REMOVAL = 3, + WMI_MLO_LINK_FORCE_REASON_TDLS = 4, + WMI_MLO_LINK_FORCE_REASON_REVERT_FAILURE = 5, + WMI_MLO_LINK_FORCE_REASON_LINK_DELETE = 6, + WMI_MLO_LINK_FORCE_REASON_SINGLE_LINK_EMLSR_OP = 7, +}; + +struct wmi_mlo_link_num_arg { + u32 num_of_link; + u32 vdev_type; + u32 vdev_subtype; + u32 home_freq; +}; + +struct wmi_mlo_control_flags_arg { + bool overwrite_force_active_bitmap; + bool overwrite_force_inactive_bitmap; + bool dync_force_link_num; + bool post_re_evaluate; + u8 post_re_evaluate_loops; + bool dont_reschedule_workqueue; +}; + +struct wmi_ml_link_force_cmd_arg { + u8 ap_mld_mac_addr[ETH_ALEN]; + u16 ieee_link_id_bitmap; + u16 ieee_link_id_bitmap2; + u8 link_num; +}; + +struct wmi_ml_disallow_mode_bmap_arg { + u32 disallowed_mode; + union { + u32 ieee_link_id_comb; + u8 ieee_link_id[4]; + }; +}; + +/* maximum size of link number param array + * for MLO link set active command + */ +#define WMI_MLO_LINK_NUM_SZ 2 + +/* maximum size of vdev bitmap array for + * MLO link set active command + */ +#define WMI_MLO_VDEV_BITMAP_SZ 2 + +/* Max number of disallowed bitmap combination + * sent to firmware + */ +#define WMI_ML_MAX_DISALLOW_BMAP_COMB 4 + +struct wmi_mlo_link_set_active_arg { + enum wmi_mlo_link_force_mode force_mode; + enum wmi_mlo_link_force_reason reason; + u32 num_link_entry; + u32 num_vdev_bitmap; + u32 num_inactive_vdev_bitmap; + struct wmi_mlo_link_num_arg link_num[WMI_MLO_LINK_NUM_SZ]; + u32 vdev_bitmap[WMI_MLO_VDEV_BITMAP_SZ]; + u32 inactive_vdev_bitmap[WMI_MLO_VDEV_BITMAP_SZ]; + struct wmi_mlo_control_flags_arg ctrl_flags; + bool use_ieee_link_id; + struct wmi_ml_link_force_cmd_arg force_cmd; + u32 num_disallow_mode_comb; + struct wmi_ml_disallow_mode_bmap_arg disallow_bmap[WMI_ML_MAX_DISALLOW_BMAP_COMB]; +}; + void ath12k_wmi_init_qcn9274(struct ath12k_base *ab, struct ath12k_wmi_resource_config_arg *config); void ath12k_wmi_init_wcn7850(struct ath12k_base *ab, @@ -6195,5 +6372,6 @@ bool ath12k_wmi_supports_6ghz_cc_ext(struct ath12k *ar); int ath12k_wmi_send_vdev_set_tpc_power(struct ath12k *ar, u32 vdev_id, struct ath12k_reg_tpc_power_info *param); - +int ath12k_wmi_send_mlo_link_set_active_cmd(struct ath12k_base *ab, + struct wmi_mlo_link_set_active_arg *param); #endif diff --git a/drivers/net/wireless/ath/ath6kl/bmi.c b/drivers/net/wireless/ath/ath6kl/bmi.c index af98e871199d..5a9e93fd1ef4 100644 --- a/drivers/net/wireless/ath/ath6kl/bmi.c +++ b/drivers/net/wireless/ath/ath6kl/bmi.c @@ -87,7 +87,9 @@ int ath6kl_bmi_get_target_info(struct ath6kl *ar, * We need to do some backwards compatibility to make this work. */ if (le32_to_cpu(targ_info->byte_count) != sizeof(*targ_info)) { - WARN_ON(1); + ath6kl_err("mismatched byte count %d vs. expected %zd\n", + le32_to_cpu(targ_info->byte_count), + sizeof(*targ_info)); return -EINVAL; } diff --git a/drivers/net/wireless/ath/carl9170/usb.c b/drivers/net/wireless/ath/carl9170/usb.c index a3e03580cd9f..564ca6a61985 100644 --- a/drivers/net/wireless/ath/carl9170/usb.c +++ b/drivers/net/wireless/ath/carl9170/usb.c @@ -438,14 +438,21 @@ static void carl9170_usb_rx_complete(struct urb *urb) if (atomic_read(&ar->rx_anch_urbs) == 0) { /* - * The system is too slow to cope with - * the enormous workload. We have simply - * run out of active rx urbs and this - * unfortunately leads to an unpredictable - * device. + * At this point, either the system is too slow to + * cope with the enormous workload (so we have simply + * run out of active rx urbs and this unfortunately + * leads to an unpredictable device), or the device + * is not fully functional after an unsuccessful + * firmware loading attempts (so it doesn't pass + * ieee80211_register_hw() and there is no internal + * workqueue at all). */ - ieee80211_queue_work(ar->hw, &ar->ping_work); + if (ar->registered) + ieee80211_queue_work(ar->hw, &ar->ping_work); + else + pr_warn_once("device %s is not registered\n", + dev_name(&ar->udev->dev)); } } else { /* diff --git a/drivers/net/wireless/ath/wil6210/interrupt.c b/drivers/net/wireless/ath/wil6210/interrupt.c index 67172385a5d6..89d4394cedcf 100644 --- a/drivers/net/wireless/ath/wil6210/interrupt.c +++ b/drivers/net/wireless/ath/wil6210/interrupt.c @@ -179,9 +179,11 @@ void wil_mask_irq(struct wil6210_priv *wil) wil_dbg_irq(wil, "mask_irq\n"); wil6210_mask_irq_tx(wil); - wil6210_mask_irq_tx_edma(wil); + if (wil->use_enhanced_dma_hw) + wil6210_mask_irq_tx_edma(wil); wil6210_mask_irq_rx(wil); - wil6210_mask_irq_rx_edma(wil); + if (wil->use_enhanced_dma_hw) + wil6210_mask_irq_rx_edma(wil); wil6210_mask_irq_misc(wil, true); wil6210_mask_irq_pseudo(wil); } @@ -190,10 +192,12 @@ void wil_unmask_irq(struct wil6210_priv *wil) { wil_dbg_irq(wil, "unmask_irq\n"); - wil_w(wil, RGF_DMA_EP_RX_ICR + offsetof(struct RGF_ICR, ICC), - WIL_ICR_ICC_VALUE); - wil_w(wil, RGF_DMA_EP_TX_ICR + offsetof(struct RGF_ICR, ICC), - WIL_ICR_ICC_VALUE); + if (wil->use_enhanced_dma_hw) { + wil_w(wil, RGF_DMA_EP_RX_ICR + offsetof(struct RGF_ICR, ICC), + WIL_ICR_ICC_VALUE); + wil_w(wil, RGF_DMA_EP_TX_ICR + offsetof(struct RGF_ICR, ICC), + WIL_ICR_ICC_VALUE); + } wil_w(wil, RGF_DMA_EP_MISC_ICR + offsetof(struct RGF_ICR, ICC), WIL_ICR_ICC_MISC_VALUE); wil_w(wil, RGF_INT_GEN_TX_ICR + offsetof(struct RGF_ICR, ICC), @@ -845,10 +849,12 @@ void wil6210_clear_irq(struct wil6210_priv *wil) offsetof(struct RGF_ICR, ICR)); wil_clear32(wil->csr + HOSTADDR(RGF_DMA_EP_TX_ICR) + offsetof(struct RGF_ICR, ICR)); - wil_clear32(wil->csr + HOSTADDR(RGF_INT_GEN_RX_ICR) + - offsetof(struct RGF_ICR, ICR)); - wil_clear32(wil->csr + HOSTADDR(RGF_INT_GEN_TX_ICR) + - offsetof(struct RGF_ICR, ICR)); + if (wil->use_enhanced_dma_hw) { + wil_clear32(wil->csr + HOSTADDR(RGF_INT_GEN_RX_ICR) + + offsetof(struct RGF_ICR, ICR)); + wil_clear32(wil->csr + HOSTADDR(RGF_INT_GEN_TX_ICR) + + offsetof(struct RGF_ICR, ICR)); + } wil_clear32(wil->csr + HOSTADDR(RGF_DMA_EP_MISC_ICR) + offsetof(struct RGF_ICR, ICR)); wmb(); /* make sure write completed */ diff --git a/drivers/net/wireless/intel/iwlegacy/4965-rs.c b/drivers/net/wireless/intel/iwlegacy/4965-rs.c index 0e5130d1fccd..031d88bf6393 100644 --- a/drivers/net/wireless/intel/iwlegacy/4965-rs.c +++ b/drivers/net/wireless/intel/iwlegacy/4965-rs.c @@ -203,7 +203,8 @@ il4965_rs_extract_rate(u32 rate_n_flags) return (u8) (rate_n_flags & 0xFF); } -static void +/* noinline works around https://github.com/llvm/llvm-project/issues/143908 */ +static noinline_for_stack void il4965_rs_rate_scale_clear_win(struct il_rate_scale_data *win) { win->data = 0; diff --git a/drivers/net/wireless/intel/iwlwifi/dvm/main.c b/drivers/net/wireless/intel/iwlwifi/dvm/main.c index dbfd45948e8b..66211426aa3a 100644 --- a/drivers/net/wireless/intel/iwlwifi/dvm/main.c +++ b/drivers/net/wireless/intel/iwlwifi/dvm/main.c @@ -1316,6 +1316,7 @@ static struct iwl_op_mode *iwl_op_mode_dvm_start(struct iwl_trans *trans, sizeof(trans->conf.no_reclaim_cmds)); memcpy(trans->conf.no_reclaim_cmds, no_reclaim_cmds, sizeof(no_reclaim_cmds)); + trans->conf.n_no_reclaim_cmds = ARRAY_SIZE(no_reclaim_cmds); switch (iwlwifi_mod_params.amsdu_size) { case IWL_AMSDU_DEF: diff --git a/drivers/net/wireless/intel/iwlwifi/mld/mld.c b/drivers/net/wireless/intel/iwlwifi/mld/mld.c index e8820e7cf8fa..1774bb84dd3f 100644 --- a/drivers/net/wireless/intel/iwlwifi/mld/mld.c +++ b/drivers/net/wireless/intel/iwlwifi/mld/mld.c @@ -77,6 +77,7 @@ void iwl_construct_mld(struct iwl_mld *mld, struct iwl_trans *trans, /* Setup async RX handling */ spin_lock_init(&mld->async_handlers_lock); + INIT_LIST_HEAD(&mld->async_handlers_list); wiphy_work_init(&mld->async_handlers_wk, iwl_mld_async_handlers_wk); diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/mld-mac.c b/drivers/net/wireless/intel/iwlwifi/mvm/mld-mac.c index 81ca9ff67be9..3f8b840871d3 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/mld-mac.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/mld-mac.c @@ -32,9 +32,9 @@ static void iwl_mvm_mld_mac_ctxt_cmd_common(struct iwl_mvm *mvm, unsigned int link_id; int cmd_ver = iwl_fw_lookup_cmd_ver(mvm->fw, WIDE_ID(MAC_CONF_GROUP, - MAC_CONFIG_CMD), 0); + MAC_CONFIG_CMD), 1); - if (WARN_ON(cmd_ver < 1 && cmd_ver > 3)) + if (WARN_ON(cmd_ver > 3)) return; cmd->id_and_color = cpu_to_le32(mvmvif->id); diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/ctxt-info.c b/drivers/net/wireless/intel/iwlwifi/pcie/ctxt-info.c index cb36baac14da..4f2be0c1bd97 100644 --- a/drivers/net/wireless/intel/iwlwifi/pcie/ctxt-info.c +++ b/drivers/net/wireless/intel/iwlwifi/pcie/ctxt-info.c @@ -166,7 +166,7 @@ int iwl_pcie_ctxt_info_init(struct iwl_trans *trans, struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans); struct iwl_context_info *ctxt_info; struct iwl_context_info_rbd_cfg *rx_cfg; - u32 control_flags = 0, rb_size; + u32 control_flags = 0, rb_size, cb_size; dma_addr_t phys; int ret; @@ -202,11 +202,12 @@ int iwl_pcie_ctxt_info_init(struct iwl_trans *trans, rb_size = IWL_CTXT_INFO_RB_SIZE_4K; } - WARN_ON(RX_QUEUE_CB_SIZE(iwl_trans_get_num_rbds(trans)) > 12); + cb_size = RX_QUEUE_CB_SIZE(iwl_trans_get_num_rbds(trans)); + if (WARN_ON(cb_size > 12)) + cb_size = 12; + control_flags = IWL_CTXT_INFO_TFD_FORMAT_LONG; - control_flags |= - u32_encode_bits(RX_QUEUE_CB_SIZE(iwl_trans_get_num_rbds(trans)), - IWL_CTXT_INFO_RB_CB_SIZE); + control_flags |= u32_encode_bits(cb_size, IWL_CTXT_INFO_RB_CB_SIZE); control_flags |= u32_encode_bits(rb_size, IWL_CTXT_INFO_RB_SIZE); ctxt_info->control.control_flags = cpu_to_le32(control_flags); diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/drv.c b/drivers/net/wireless/intel/iwlwifi/pcie/drv.c index 656f8b06c27b..0a9e0dbb58fb 100644 --- a/drivers/net/wireless/intel/iwlwifi/pcie/drv.c +++ b/drivers/net/wireless/intel/iwlwifi/pcie/drv.c @@ -1501,11 +1501,27 @@ static int _iwl_pci_resume(struct device *device, bool restore) * Scratch value was altered, this means the device was powered off, we * need to reset it completely. * Note: MAC (bits 0:7) will be cleared upon suspend even with wowlan, - * so assume that any bits there mean that the device is usable. + * but not bits [15:8]. So if we have bits set in lower word, assume + * the device is alive. + * For older devices, just try silently to grab the NIC. */ - if (trans->mac_cfg->device_family >= IWL_DEVICE_FAMILY_BZ && - !iwl_read32(trans, CSR_FUNC_SCRATCH)) - device_was_powered_off = true; + if (trans->mac_cfg->device_family >= IWL_DEVICE_FAMILY_BZ) { + if (!(iwl_read32(trans, CSR_FUNC_SCRATCH) & + CSR_FUNC_SCRATCH_POWER_OFF_MASK)) + device_was_powered_off = true; + } else { + /* + * bh are re-enabled by iwl_trans_pcie_release_nic_access, + * so re-enable them if _iwl_trans_pcie_grab_nic_access fails. + */ + local_bh_disable(); + if (_iwl_trans_pcie_grab_nic_access(trans, true)) { + iwl_trans_pcie_release_nic_access(trans); + } else { + device_was_powered_off = true; + local_bh_enable(); + } + } if (restore || device_was_powered_off) { trans->state = IWL_TRANS_NO_FW; diff --git a/drivers/net/wireless/marvell/mwifiex/11n.c b/drivers/net/wireless/marvell/mwifiex/11n.c index 738bafc3749b..66f0f5377ac1 100644 --- a/drivers/net/wireless/marvell/mwifiex/11n.c +++ b/drivers/net/wireless/marvell/mwifiex/11n.c @@ -403,14 +403,12 @@ mwifiex_cmd_append_11n_tlv(struct mwifiex_private *priv, if (sband->ht_cap.cap & IEEE80211_HT_CAP_SUP_WIDTH_20_40 && bss_desc->bcn_ht_oper->ht_param & - IEEE80211_HT_PARAM_CHAN_WIDTH_ANY) { - chan_list->chan_scan_param[0].radio_type |= - CHAN_BW_40MHZ << 2; + IEEE80211_HT_PARAM_CHAN_WIDTH_ANY) SET_SECONDARYCHAN(chan_list->chan_scan_param[0]. radio_type, (bss_desc->bcn_ht_oper->ht_param & IEEE80211_HT_PARAM_CHA_SEC_OFFSET)); - } + *buffer += struct_size(chan_list, chan_scan_param, 1); ret_len += struct_size(chan_list, chan_scan_param, 1); } diff --git a/drivers/nvme/host/ioctl.c b/drivers/nvme/host/ioctl.c index 0b50da2f1175..6b3ac8ae3f34 100644 --- a/drivers/nvme/host/ioctl.c +++ b/drivers/nvme/host/ioctl.c @@ -429,21 +429,14 @@ static enum rq_end_io_ret nvme_uring_cmd_end_io(struct request *req, pdu->result = le64_to_cpu(nvme_req(req)->result.u64); /* - * For iopoll, complete it directly. Note that using the uring_cmd - * helper for this is safe only because we check blk_rq_is_poll(). - * As that returns false if we're NOT on a polled queue, then it's - * safe to use the polled completion helper. - * - * Otherwise, move the completion to task work. + * IOPOLL could potentially complete this request directly, but + * if multiple rings are polling on the same queue, then it's possible + * for one ring to find completions for another ring. Punting the + * completion via task_work will always direct it to the right + * location, rather than potentially complete requests for ringA + * under iopoll invocations from ringB. */ - if (blk_rq_is_poll(req)) { - if (pdu->bio) - blk_rq_unmap_user(pdu->bio); - io_uring_cmd_iopoll_done(ioucmd, pdu->result, pdu->status); - } else { - io_uring_cmd_do_in_task_lazy(ioucmd, nvme_uring_task_cb); - } - + io_uring_cmd_do_in_task_lazy(ioucmd, nvme_uring_task_cb); return RQ_END_IO_FREE; } diff --git a/drivers/pci/hotplug/pciehp_hpc.c b/drivers/pci/hotplug/pciehp_hpc.c index ebd342bda235..91d2d92717d9 100644 --- a/drivers/pci/hotplug/pciehp_hpc.c +++ b/drivers/pci/hotplug/pciehp_hpc.c @@ -771,7 +771,7 @@ static irqreturn_t pciehp_ist(int irq, void *dev_id) u16 ignored_events = PCI_EXP_SLTSTA_DLLSC; if (!ctrl->inband_presence_disabled) - ignored_events |= events & PCI_EXP_SLTSTA_PDC; + ignored_events |= PCI_EXP_SLTSTA_PDC; events &= ~ignored_events; pciehp_ignore_link_change(ctrl, pdev, irq, ignored_events); diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c index e9448d55113b..9e42090fb108 100644 --- a/drivers/pci/pci.c +++ b/drivers/pci/pci.c @@ -3217,14 +3217,14 @@ void pci_pm_init(struct pci_dev *dev) /* find PCI PM capability in list */ pm = pci_find_capability(dev, PCI_CAP_ID_PM); if (!pm) - return; + goto poweron; /* Check device's ability to generate PME# */ pci_read_config_word(dev, pm + PCI_PM_PMC, &pmc); if ((pmc & PCI_PM_CAP_VER_MASK) > 3) { pci_err(dev, "unsupported PM cap regs version (%u)\n", pmc & PCI_PM_CAP_VER_MASK); - return; + goto poweron; } dev->pm_cap = pm; @@ -3269,6 +3269,7 @@ void pci_pm_init(struct pci_dev *dev) pci_read_config_word(dev, PCI_STATUS, &status); if (status & PCI_STATUS_IMM_READY) dev->imm_ready = 1; +poweron: pci_pm_power_up_and_verify_state(dev); pm_runtime_forbid(&dev->dev); pm_runtime_set_active(&dev->dev); diff --git a/drivers/pinctrl/pinctrl-st.c b/drivers/pinctrl/pinctrl-st.c index fe2d52e434db..8a2ef74862d3 100644 --- a/drivers/pinctrl/pinctrl-st.c +++ b/drivers/pinctrl/pinctrl-st.c @@ -374,11 +374,6 @@ static struct st_pio_control *st_get_pio_control( } /* Low level functions.. */ -static inline int st_gpio_bank(int gpio) -{ - return gpio/ST_GPIO_PINS_PER_BANK; -} - static inline int st_gpio_pin(int gpio) { return gpio%ST_GPIO_PINS_PER_BANK; diff --git a/drivers/pinctrl/pinctrl-tb10x.c b/drivers/pinctrl/pinctrl-tb10x.c index d6bb8f58978d..4edb20e61951 100644 --- a/drivers/pinctrl/pinctrl-tb10x.c +++ b/drivers/pinctrl/pinctrl-tb10x.c @@ -823,7 +823,7 @@ static struct platform_driver tb10x_pinctrl_pdrv = { .remove = tb10x_pinctrl_remove, .driver = { .name = "tb10x_pinctrl", - .of_match_table = of_match_ptr(tb10x_pinctrl_dt_ids), + .of_match_table = tb10x_pinctrl_dt_ids, } }; diff --git a/drivers/pinctrl/qcom/pinctrl-apq8064.c b/drivers/pinctrl/qcom/pinctrl-apq8064.c index 20c3b9025044..3654913f1ae5 100644 --- a/drivers/pinctrl/qcom/pinctrl-apq8064.c +++ b/drivers/pinctrl/qcom/pinctrl-apq8064.c @@ -629,7 +629,6 @@ static struct platform_driver apq8064_pinctrl_driver = { .of_match_table = apq8064_pinctrl_of_match, }, .probe = apq8064_pinctrl_probe, - .remove = msm_pinctrl_remove, }; static int __init apq8064_pinctrl_init(void) diff --git a/drivers/pinctrl/qcom/pinctrl-apq8084.c b/drivers/pinctrl/qcom/pinctrl-apq8084.c index 3fc0a40762b6..27693cd64881 100644 --- a/drivers/pinctrl/qcom/pinctrl-apq8084.c +++ b/drivers/pinctrl/qcom/pinctrl-apq8084.c @@ -1207,7 +1207,6 @@ static struct platform_driver apq8084_pinctrl_driver = { .of_match_table = apq8084_pinctrl_of_match, }, .probe = apq8084_pinctrl_probe, - .remove = msm_pinctrl_remove, }; static int __init apq8084_pinctrl_init(void) diff --git a/drivers/pinctrl/qcom/pinctrl-ipq4019.c b/drivers/pinctrl/qcom/pinctrl-ipq4019.c index 1f7944dd829d..6ede3149b6e1 100644 --- a/drivers/pinctrl/qcom/pinctrl-ipq4019.c +++ b/drivers/pinctrl/qcom/pinctrl-ipq4019.c @@ -710,7 +710,6 @@ static struct platform_driver ipq4019_pinctrl_driver = { .of_match_table = ipq4019_pinctrl_of_match, }, .probe = ipq4019_pinctrl_probe, - .remove = msm_pinctrl_remove, }; static int __init ipq4019_pinctrl_init(void) diff --git a/drivers/pinctrl/qcom/pinctrl-ipq5018.c b/drivers/pinctrl/qcom/pinctrl-ipq5018.c index e2951f81c3ee..10b99d5d8a11 100644 --- a/drivers/pinctrl/qcom/pinctrl-ipq5018.c +++ b/drivers/pinctrl/qcom/pinctrl-ipq5018.c @@ -754,7 +754,6 @@ static struct platform_driver ipq5018_pinctrl_driver = { .of_match_table = ipq5018_pinctrl_of_match, }, .probe = ipq5018_pinctrl_probe, - .remove = msm_pinctrl_remove, }; static int __init ipq5018_pinctrl_init(void) diff --git a/drivers/pinctrl/qcom/pinctrl-ipq5332.c b/drivers/pinctrl/qcom/pinctrl-ipq5332.c index 625f8014051f..1ac2fc09c119 100644 --- a/drivers/pinctrl/qcom/pinctrl-ipq5332.c +++ b/drivers/pinctrl/qcom/pinctrl-ipq5332.c @@ -834,7 +834,6 @@ static struct platform_driver ipq5332_pinctrl_driver = { .of_match_table = ipq5332_pinctrl_of_match, }, .probe = ipq5332_pinctrl_probe, - .remove = msm_pinctrl_remove, }; static int __init ipq5332_pinctrl_init(void) diff --git a/drivers/pinctrl/qcom/pinctrl-ipq5424.c b/drivers/pinctrl/qcom/pinctrl-ipq5424.c index 0d610b076da3..7ff1f8acc1a3 100644 --- a/drivers/pinctrl/qcom/pinctrl-ipq5424.c +++ b/drivers/pinctrl/qcom/pinctrl-ipq5424.c @@ -791,7 +791,6 @@ static struct platform_driver ipq5424_pinctrl_driver = { .of_match_table = ipq5424_pinctrl_of_match, }, .probe = ipq5424_pinctrl_probe, - .remove = msm_pinctrl_remove, }; static int __init ipq5424_pinctrl_init(void) diff --git a/drivers/pinctrl/qcom/pinctrl-ipq6018.c b/drivers/pinctrl/qcom/pinctrl-ipq6018.c index 0ad08647dbcd..a4ba980252e1 100644 --- a/drivers/pinctrl/qcom/pinctrl-ipq6018.c +++ b/drivers/pinctrl/qcom/pinctrl-ipq6018.c @@ -1080,7 +1080,6 @@ static struct platform_driver ipq6018_pinctrl_driver = { .of_match_table = ipq6018_pinctrl_of_match, }, .probe = ipq6018_pinctrl_probe, - .remove = msm_pinctrl_remove, }; static int __init ipq6018_pinctrl_init(void) diff --git a/drivers/pinctrl/qcom/pinctrl-ipq8064.c b/drivers/pinctrl/qcom/pinctrl-ipq8064.c index e2bb94e86aef..0a9e357e64c6 100644 --- a/drivers/pinctrl/qcom/pinctrl-ipq8064.c +++ b/drivers/pinctrl/qcom/pinctrl-ipq8064.c @@ -631,7 +631,6 @@ static struct platform_driver ipq8064_pinctrl_driver = { .of_match_table = ipq8064_pinctrl_of_match, }, .probe = ipq8064_pinctrl_probe, - .remove = msm_pinctrl_remove, }; static int __init ipq8064_pinctrl_init(void) diff --git a/drivers/pinctrl/qcom/pinctrl-ipq8074.c b/drivers/pinctrl/qcom/pinctrl-ipq8074.c index 337f3a1c92c1..482f13282fc2 100644 --- a/drivers/pinctrl/qcom/pinctrl-ipq8074.c +++ b/drivers/pinctrl/qcom/pinctrl-ipq8074.c @@ -1041,7 +1041,6 @@ static struct platform_driver ipq8074_pinctrl_driver = { .of_match_table = ipq8074_pinctrl_of_match, }, .probe = ipq8074_pinctrl_probe, - .remove = msm_pinctrl_remove, }; static int __init ipq8074_pinctrl_init(void) diff --git a/drivers/pinctrl/qcom/pinctrl-ipq9574.c b/drivers/pinctrl/qcom/pinctrl-ipq9574.c index e2491617b236..89c05d8eb550 100644 --- a/drivers/pinctrl/qcom/pinctrl-ipq9574.c +++ b/drivers/pinctrl/qcom/pinctrl-ipq9574.c @@ -799,7 +799,6 @@ static struct platform_driver ipq9574_pinctrl_driver = { .of_match_table = ipq9574_pinctrl_of_match, }, .probe = ipq9574_pinctrl_probe, - .remove = msm_pinctrl_remove, }; static int __init ipq9574_pinctrl_init(void) diff --git a/drivers/pinctrl/qcom/pinctrl-mdm9607.c b/drivers/pinctrl/qcom/pinctrl-mdm9607.c index e7cd3ef1cf3e..3e18ba124fed 100644 --- a/drivers/pinctrl/qcom/pinctrl-mdm9607.c +++ b/drivers/pinctrl/qcom/pinctrl-mdm9607.c @@ -1059,7 +1059,6 @@ static struct platform_driver mdm9607_pinctrl_driver = { .of_match_table = mdm9607_pinctrl_of_match, }, .probe = mdm9607_pinctrl_probe, - .remove = msm_pinctrl_remove, }; static int __init mdm9607_pinctrl_init(void) diff --git a/drivers/pinctrl/qcom/pinctrl-mdm9615.c b/drivers/pinctrl/qcom/pinctrl-mdm9615.c index 0a2ae383d3d5..bea1ca3d1b7f 100644 --- a/drivers/pinctrl/qcom/pinctrl-mdm9615.c +++ b/drivers/pinctrl/qcom/pinctrl-mdm9615.c @@ -446,7 +446,6 @@ static struct platform_driver mdm9615_pinctrl_driver = { .of_match_table = mdm9615_pinctrl_of_match, }, .probe = mdm9615_pinctrl_probe, - .remove = msm_pinctrl_remove, }; static int __init mdm9615_pinctrl_init(void) diff --git a/drivers/pinctrl/qcom/pinctrl-msm.c b/drivers/pinctrl/qcom/pinctrl-msm.c index f012ea88aa22..5c4687de1464 100644 --- a/drivers/pinctrl/qcom/pinctrl-msm.c +++ b/drivers/pinctrl/qcom/pinctrl-msm.c @@ -1442,7 +1442,7 @@ static int msm_gpio_init(struct msm_pinctrl *pctrl) girq->handler = handle_bad_irq; girq->parents[0] = pctrl->irq; - ret = gpiochip_add_data(&pctrl->chip, pctrl); + ret = devm_gpiochip_add_data(pctrl->dev, &pctrl->chip, pctrl); if (ret) { dev_err(pctrl->dev, "Failed register gpiochip\n"); return ret; @@ -1463,7 +1463,6 @@ static int msm_gpio_init(struct msm_pinctrl *pctrl) dev_name(pctrl->dev), 0, 0, chip->ngpio); if (ret) { dev_err(pctrl->dev, "Failed to add pin range\n"); - gpiochip_remove(&pctrl->chip); return ret; } } @@ -1599,13 +1598,5 @@ int msm_pinctrl_probe(struct platform_device *pdev, } EXPORT_SYMBOL(msm_pinctrl_probe); -void msm_pinctrl_remove(struct platform_device *pdev) -{ - struct msm_pinctrl *pctrl = platform_get_drvdata(pdev); - - gpiochip_remove(&pctrl->chip); -} -EXPORT_SYMBOL(msm_pinctrl_remove); - MODULE_DESCRIPTION("Qualcomm Technologies, Inc. TLMM driver"); MODULE_LICENSE("GPL v2"); diff --git a/drivers/pinctrl/qcom/pinctrl-msm.h b/drivers/pinctrl/qcom/pinctrl-msm.h index 63852ed70295..d7dc0947bb16 100644 --- a/drivers/pinctrl/qcom/pinctrl-msm.h +++ b/drivers/pinctrl/qcom/pinctrl-msm.h @@ -171,6 +171,5 @@ extern const struct dev_pm_ops msm_pinctrl_dev_pm_ops; int msm_pinctrl_probe(struct platform_device *pdev, const struct msm_pinctrl_soc_data *soc_data); -void msm_pinctrl_remove(struct platform_device *pdev); #endif diff --git a/drivers/pinctrl/qcom/pinctrl-msm8226.c b/drivers/pinctrl/qcom/pinctrl-msm8226.c index 64fee70f1772..f9a957347340 100644 --- a/drivers/pinctrl/qcom/pinctrl-msm8226.c +++ b/drivers/pinctrl/qcom/pinctrl-msm8226.c @@ -654,7 +654,6 @@ static struct platform_driver msm8226_pinctrl_driver = { .of_match_table = msm8226_pinctrl_of_match, }, .probe = msm8226_pinctrl_probe, - .remove = msm_pinctrl_remove, }; static int __init msm8226_pinctrl_init(void) diff --git a/drivers/pinctrl/qcom/pinctrl-msm8660.c b/drivers/pinctrl/qcom/pinctrl-msm8660.c index 999a5f867eb5..4dbc19ffd80e 100644 --- a/drivers/pinctrl/qcom/pinctrl-msm8660.c +++ b/drivers/pinctrl/qcom/pinctrl-msm8660.c @@ -981,7 +981,6 @@ static struct platform_driver msm8660_pinctrl_driver = { .of_match_table = msm8660_pinctrl_of_match, }, .probe = msm8660_pinctrl_probe, - .remove = msm_pinctrl_remove, }; static int __init msm8660_pinctrl_init(void) diff --git a/drivers/pinctrl/qcom/pinctrl-msm8909.c b/drivers/pinctrl/qcom/pinctrl-msm8909.c index 756856d20d6b..0aa4f77b774f 100644 --- a/drivers/pinctrl/qcom/pinctrl-msm8909.c +++ b/drivers/pinctrl/qcom/pinctrl-msm8909.c @@ -929,7 +929,6 @@ static struct platform_driver msm8909_pinctrl_driver = { .of_match_table = msm8909_pinctrl_of_match, }, .probe = msm8909_pinctrl_probe, - .remove = msm_pinctrl_remove, }; static int __init msm8909_pinctrl_init(void) diff --git a/drivers/pinctrl/qcom/pinctrl-msm8916.c b/drivers/pinctrl/qcom/pinctrl-msm8916.c index cea5c54f92fe..0dfc6dd33d58 100644 --- a/drivers/pinctrl/qcom/pinctrl-msm8916.c +++ b/drivers/pinctrl/qcom/pinctrl-msm8916.c @@ -969,7 +969,6 @@ static struct platform_driver msm8916_pinctrl_driver = { .of_match_table = msm8916_pinctrl_of_match, }, .probe = msm8916_pinctrl_probe, - .remove = msm_pinctrl_remove, }; static int __init msm8916_pinctrl_init(void) diff --git a/drivers/pinctrl/qcom/pinctrl-msm8917.c b/drivers/pinctrl/qcom/pinctrl-msm8917.c index 350636807b07..2e1a94ab18b2 100644 --- a/drivers/pinctrl/qcom/pinctrl-msm8917.c +++ b/drivers/pinctrl/qcom/pinctrl-msm8917.c @@ -1607,7 +1607,6 @@ static struct platform_driver msm8917_pinctrl_driver = { .of_match_table = msm8917_pinctrl_of_match, }, .probe = msm8917_pinctrl_probe, - .remove = msm_pinctrl_remove, }; static int __init msm8917_pinctrl_init(void) diff --git a/drivers/pinctrl/qcom/pinctrl-msm8953.c b/drivers/pinctrl/qcom/pinctrl-msm8953.c index 998351bdfee1..956383341a7a 100644 --- a/drivers/pinctrl/qcom/pinctrl-msm8953.c +++ b/drivers/pinctrl/qcom/pinctrl-msm8953.c @@ -1816,7 +1816,6 @@ static struct platform_driver msm8953_pinctrl_driver = { .of_match_table = msm8953_pinctrl_of_match, }, .probe = msm8953_pinctrl_probe, - .remove = msm_pinctrl_remove, }; static int __init msm8953_pinctrl_init(void) diff --git a/drivers/pinctrl/qcom/pinctrl-msm8960.c b/drivers/pinctrl/qcom/pinctrl-msm8960.c index ebe230b3b437..a937ea867de7 100644 --- a/drivers/pinctrl/qcom/pinctrl-msm8960.c +++ b/drivers/pinctrl/qcom/pinctrl-msm8960.c @@ -1246,7 +1246,6 @@ static struct platform_driver msm8960_pinctrl_driver = { .of_match_table = msm8960_pinctrl_of_match, }, .probe = msm8960_pinctrl_probe, - .remove = msm_pinctrl_remove, }; static int __init msm8960_pinctrl_init(void) diff --git a/drivers/pinctrl/qcom/pinctrl-msm8976.c b/drivers/pinctrl/qcom/pinctrl-msm8976.c index c30d80e4e98c..3bcb03387781 100644 --- a/drivers/pinctrl/qcom/pinctrl-msm8976.c +++ b/drivers/pinctrl/qcom/pinctrl-msm8976.c @@ -1096,7 +1096,6 @@ static struct platform_driver msm8976_pinctrl_driver = { .of_match_table = msm8976_pinctrl_of_match, }, .probe = msm8976_pinctrl_probe, - .remove = msm_pinctrl_remove, }; static int __init msm8976_pinctrl_init(void) diff --git a/drivers/pinctrl/qcom/pinctrl-msm8994.c b/drivers/pinctrl/qcom/pinctrl-msm8994.c index b1a6759ab4a5..7a3b6cbccb68 100644 --- a/drivers/pinctrl/qcom/pinctrl-msm8994.c +++ b/drivers/pinctrl/qcom/pinctrl-msm8994.c @@ -1343,7 +1343,6 @@ static struct platform_driver msm8994_pinctrl_driver = { .of_match_table = msm8994_pinctrl_of_match, }, .probe = msm8994_pinctrl_probe, - .remove = msm_pinctrl_remove, }; static int __init msm8994_pinctrl_init(void) diff --git a/drivers/pinctrl/qcom/pinctrl-msm8996.c b/drivers/pinctrl/qcom/pinctrl-msm8996.c index 1b5d80eaab83..d86d83106d3b 100644 --- a/drivers/pinctrl/qcom/pinctrl-msm8996.c +++ b/drivers/pinctrl/qcom/pinctrl-msm8996.c @@ -1920,7 +1920,6 @@ static struct platform_driver msm8996_pinctrl_driver = { .of_match_table = msm8996_pinctrl_of_match, }, .probe = msm8996_pinctrl_probe, - .remove = msm_pinctrl_remove, }; static int __init msm8996_pinctrl_init(void) diff --git a/drivers/pinctrl/qcom/pinctrl-msm8998.c b/drivers/pinctrl/qcom/pinctrl-msm8998.c index b7cbf32b3125..1daee815888f 100644 --- a/drivers/pinctrl/qcom/pinctrl-msm8998.c +++ b/drivers/pinctrl/qcom/pinctrl-msm8998.c @@ -1535,7 +1535,6 @@ static struct platform_driver msm8998_pinctrl_driver = { .of_match_table = msm8998_pinctrl_of_match, }, .probe = msm8998_pinctrl_probe, - .remove = msm_pinctrl_remove, }; static int __init msm8998_pinctrl_init(void) diff --git a/drivers/pinctrl/qcom/pinctrl-msm8x74.c b/drivers/pinctrl/qcom/pinctrl-msm8x74.c index 238c83f6ec4f..8253aa25775b 100644 --- a/drivers/pinctrl/qcom/pinctrl-msm8x74.c +++ b/drivers/pinctrl/qcom/pinctrl-msm8x74.c @@ -1083,7 +1083,6 @@ static struct platform_driver msm8x74_pinctrl_driver = { .of_match_table = msm8x74_pinctrl_of_match, }, .probe = msm8x74_pinctrl_probe, - .remove = msm_pinctrl_remove, }; static int __init msm8x74_pinctrl_init(void) diff --git a/drivers/pinctrl/qcom/pinctrl-qcm2290.c b/drivers/pinctrl/qcom/pinctrl-qcm2290.c index f885af571ec9..eeeec6434f6a 100644 --- a/drivers/pinctrl/qcom/pinctrl-qcm2290.c +++ b/drivers/pinctrl/qcom/pinctrl-qcm2290.c @@ -167,6 +167,10 @@ static const struct pinctrl_pin_desc qcm2290_pins[] = { PINCTRL_PIN(62, "GPIO_62"), PINCTRL_PIN(63, "GPIO_63"), PINCTRL_PIN(64, "GPIO_64"), + PINCTRL_PIN(65, "GPIO_65"), + PINCTRL_PIN(66, "GPIO_66"), + PINCTRL_PIN(67, "GPIO_67"), + PINCTRL_PIN(68, "GPIO_68"), PINCTRL_PIN(69, "GPIO_69"), PINCTRL_PIN(70, "GPIO_70"), PINCTRL_PIN(71, "GPIO_71"), @@ -181,12 +185,17 @@ static const struct pinctrl_pin_desc qcm2290_pins[] = { PINCTRL_PIN(80, "GPIO_80"), PINCTRL_PIN(81, "GPIO_81"), PINCTRL_PIN(82, "GPIO_82"), + PINCTRL_PIN(83, "GPIO_83"), + PINCTRL_PIN(84, "GPIO_84"), + PINCTRL_PIN(85, "GPIO_85"), PINCTRL_PIN(86, "GPIO_86"), PINCTRL_PIN(87, "GPIO_87"), PINCTRL_PIN(88, "GPIO_88"), PINCTRL_PIN(89, "GPIO_89"), PINCTRL_PIN(90, "GPIO_90"), PINCTRL_PIN(91, "GPIO_91"), + PINCTRL_PIN(92, "GPIO_92"), + PINCTRL_PIN(93, "GPIO_93"), PINCTRL_PIN(94, "GPIO_94"), PINCTRL_PIN(95, "GPIO_95"), PINCTRL_PIN(96, "GPIO_96"), @@ -1125,7 +1134,6 @@ static struct platform_driver qcm2290_pinctrl_driver = { .of_match_table = qcm2290_pinctrl_of_match, }, .probe = qcm2290_pinctrl_probe, - .remove = msm_pinctrl_remove, }; static int __init qcm2290_pinctrl_init(void) diff --git a/drivers/pinctrl/qcom/pinctrl-qcs404.c b/drivers/pinctrl/qcom/pinctrl-qcs404.c index ae7224012f8a..54e3b4435349 100644 --- a/drivers/pinctrl/qcom/pinctrl-qcs404.c +++ b/drivers/pinctrl/qcom/pinctrl-qcs404.c @@ -1644,7 +1644,6 @@ static struct platform_driver qcs404_pinctrl_driver = { .of_match_table = qcs404_pinctrl_of_match, }, .probe = qcs404_pinctrl_probe, - .remove = msm_pinctrl_remove, }; static int __init qcs404_pinctrl_init(void) diff --git a/drivers/pinctrl/qcom/pinctrl-qcs615.c b/drivers/pinctrl/qcom/pinctrl-qcs615.c index 17ca743c2210..2a943bc46a62 100644 --- a/drivers/pinctrl/qcom/pinctrl-qcs615.c +++ b/drivers/pinctrl/qcom/pinctrl-qcs615.c @@ -1087,7 +1087,6 @@ static struct platform_driver qcs615_tlmm_driver = { .of_match_table = qcs615_tlmm_of_match, }, .probe = qcs615_tlmm_probe, - .remove = msm_pinctrl_remove, }; static int __init qcs615_tlmm_init(void) diff --git a/drivers/pinctrl/qcom/pinctrl-qcs8300.c b/drivers/pinctrl/qcom/pinctrl-qcs8300.c index 5f5f7c4ac644..d6437e26392b 100644 --- a/drivers/pinctrl/qcom/pinctrl-qcs8300.c +++ b/drivers/pinctrl/qcom/pinctrl-qcs8300.c @@ -1227,7 +1227,6 @@ static struct platform_driver qcs8300_pinctrl_driver = { .of_match_table = qcs8300_pinctrl_of_match, }, .probe = qcs8300_pinctrl_probe, - .remove = msm_pinctrl_remove, }; static int __init qcs8300_pinctrl_init(void) diff --git a/drivers/pinctrl/qcom/pinctrl-qdf2xxx.c b/drivers/pinctrl/qcom/pinctrl-qdf2xxx.c index b5808fcfb13c..9ecc4d40e4dc 100644 --- a/drivers/pinctrl/qcom/pinctrl-qdf2xxx.c +++ b/drivers/pinctrl/qcom/pinctrl-qdf2xxx.c @@ -145,7 +145,6 @@ static struct platform_driver qdf2xxx_pinctrl_driver = { .acpi_match_table = qdf2xxx_acpi_ids, }, .probe = qdf2xxx_pinctrl_probe, - .remove = msm_pinctrl_remove, }; static int __init qdf2xxx_pinctrl_init(void) diff --git a/drivers/pinctrl/qcom/pinctrl-qdu1000.c b/drivers/pinctrl/qcom/pinctrl-qdu1000.c index 47bc529ef550..eacb89fa3888 100644 --- a/drivers/pinctrl/qcom/pinctrl-qdu1000.c +++ b/drivers/pinctrl/qcom/pinctrl-qdu1000.c @@ -1248,7 +1248,6 @@ static struct platform_driver qdu1000_tlmm_driver = { .of_match_table = qdu1000_tlmm_of_match, }, .probe = qdu1000_tlmm_probe, - .remove = msm_pinctrl_remove, }; static int __init qdu1000_tlmm_init(void) diff --git a/drivers/pinctrl/qcom/pinctrl-sa8775p.c b/drivers/pinctrl/qcom/pinctrl-sa8775p.c index a5b38221aea8..1b62eb3e6620 100644 --- a/drivers/pinctrl/qcom/pinctrl-sa8775p.c +++ b/drivers/pinctrl/qcom/pinctrl-sa8775p.c @@ -1540,7 +1540,6 @@ static struct platform_driver sa8775p_pinctrl_driver = { .of_match_table = sa8775p_pinctrl_of_match, }, .probe = sa8775p_pinctrl_probe, - .remove = msm_pinctrl_remove, }; static int __init sa8775p_pinctrl_init(void) diff --git a/drivers/pinctrl/qcom/pinctrl-sar2130p.c b/drivers/pinctrl/qcom/pinctrl-sar2130p.c index 19a2e37826c7..3dd1b5e5cfee 100644 --- a/drivers/pinctrl/qcom/pinctrl-sar2130p.c +++ b/drivers/pinctrl/qcom/pinctrl-sar2130p.c @@ -1486,7 +1486,6 @@ static struct platform_driver sar2130p_tlmm_driver = { .of_match_table = sar2130p_tlmm_of_match, }, .probe = sar2130p_tlmm_probe, - .remove = msm_pinctrl_remove, }; static int __init sar2130p_tlmm_init(void) diff --git a/drivers/pinctrl/qcom/pinctrl-sc7180.c b/drivers/pinctrl/qcom/pinctrl-sc7180.c index 6eb0c73791c0..c43fe10b71ad 100644 --- a/drivers/pinctrl/qcom/pinctrl-sc7180.c +++ b/drivers/pinctrl/qcom/pinctrl-sc7180.c @@ -1159,7 +1159,6 @@ static struct platform_driver sc7180_pinctrl_driver = { .of_match_table = sc7180_pinctrl_of_match, }, .probe = sc7180_pinctrl_probe, - .remove = msm_pinctrl_remove, }; static int __init sc7180_pinctrl_init(void) diff --git a/drivers/pinctrl/qcom/pinctrl-sc7280.c b/drivers/pinctrl/qcom/pinctrl-sc7280.c index 0c10eeb60b55..1b070e9d41f5 100644 --- a/drivers/pinctrl/qcom/pinctrl-sc7280.c +++ b/drivers/pinctrl/qcom/pinctrl-sc7280.c @@ -1505,7 +1505,6 @@ static struct platform_driver sc7280_pinctrl_driver = { .of_match_table = sc7280_pinctrl_of_match, }, .probe = sc7280_pinctrl_probe, - .remove = msm_pinctrl_remove, }; static int __init sc7280_pinctrl_init(void) diff --git a/drivers/pinctrl/qcom/pinctrl-sc8180x.c b/drivers/pinctrl/qcom/pinctrl-sc8180x.c index d6a79ad41a40..26dd165d1543 100644 --- a/drivers/pinctrl/qcom/pinctrl-sc8180x.c +++ b/drivers/pinctrl/qcom/pinctrl-sc8180x.c @@ -1720,7 +1720,6 @@ static struct platform_driver sc8180x_pinctrl_driver = { .acpi_match_table = sc8180x_pinctrl_acpi_match, }, .probe = sc8180x_pinctrl_probe, - .remove = msm_pinctrl_remove, }; static int __init sc8180x_pinctrl_init(void) diff --git a/drivers/pinctrl/qcom/pinctrl-sc8280xp.c b/drivers/pinctrl/qcom/pinctrl-sc8280xp.c index 96f4fb5a5d29..6ccd7e5648d4 100644 --- a/drivers/pinctrl/qcom/pinctrl-sc8280xp.c +++ b/drivers/pinctrl/qcom/pinctrl-sc8280xp.c @@ -1926,7 +1926,6 @@ static struct platform_driver sc8280xp_pinctrl_driver = { .of_match_table = sc8280xp_pinctrl_of_match, }, .probe = sc8280xp_pinctrl_probe, - .remove = msm_pinctrl_remove, }; static int __init sc8280xp_pinctrl_init(void) diff --git a/drivers/pinctrl/qcom/pinctrl-sdm660.c b/drivers/pinctrl/qcom/pinctrl-sdm660.c index 907e4ffca5e7..1a78288f1bc8 100644 --- a/drivers/pinctrl/qcom/pinctrl-sdm660.c +++ b/drivers/pinctrl/qcom/pinctrl-sdm660.c @@ -1442,7 +1442,6 @@ static struct platform_driver sdm660_pinctrl_driver = { .of_match_table = sdm660_pinctrl_of_match, }, .probe = sdm660_pinctrl_probe, - .remove = msm_pinctrl_remove, }; static int __init sdm660_pinctrl_init(void) diff --git a/drivers/pinctrl/qcom/pinctrl-sdm670.c b/drivers/pinctrl/qcom/pinctrl-sdm670.c index c76183ba95e1..0fe1fa94cd6d 100644 --- a/drivers/pinctrl/qcom/pinctrl-sdm670.c +++ b/drivers/pinctrl/qcom/pinctrl-sdm670.c @@ -1337,7 +1337,6 @@ static struct platform_driver sdm670_pinctrl_driver = { .of_match_table = sdm670_pinctrl_of_match, }, .probe = sdm670_pinctrl_probe, - .remove = msm_pinctrl_remove, }; static int __init sdm670_pinctrl_init(void) diff --git a/drivers/pinctrl/qcom/pinctrl-sdm845.c b/drivers/pinctrl/qcom/pinctrl-sdm845.c index cc05c415ed15..0446e291aa48 100644 --- a/drivers/pinctrl/qcom/pinctrl-sdm845.c +++ b/drivers/pinctrl/qcom/pinctrl-sdm845.c @@ -1351,7 +1351,6 @@ static struct platform_driver sdm845_pinctrl_driver = { .acpi_match_table = ACPI_PTR(sdm845_pinctrl_acpi_match), }, .probe = sdm845_pinctrl_probe, - .remove = msm_pinctrl_remove, }; static int __init sdm845_pinctrl_init(void) diff --git a/drivers/pinctrl/qcom/pinctrl-sdx55.c b/drivers/pinctrl/qcom/pinctrl-sdx55.c index 8826db9d21d0..2c17bf889146 100644 --- a/drivers/pinctrl/qcom/pinctrl-sdx55.c +++ b/drivers/pinctrl/qcom/pinctrl-sdx55.c @@ -990,7 +990,6 @@ static struct platform_driver sdx55_pinctrl_driver = { .of_match_table = sdx55_pinctrl_of_match, }, .probe = sdx55_pinctrl_probe, - .remove = msm_pinctrl_remove, }; static int __init sdx55_pinctrl_init(void) diff --git a/drivers/pinctrl/qcom/pinctrl-sdx65.c b/drivers/pinctrl/qcom/pinctrl-sdx65.c index f6f319c997fc..85b5c0206dbd 100644 --- a/drivers/pinctrl/qcom/pinctrl-sdx65.c +++ b/drivers/pinctrl/qcom/pinctrl-sdx65.c @@ -939,7 +939,6 @@ static struct platform_driver sdx65_pinctrl_driver = { .of_match_table = sdx65_pinctrl_of_match, }, .probe = sdx65_pinctrl_probe, - .remove = msm_pinctrl_remove, }; static int __init sdx65_pinctrl_init(void) diff --git a/drivers/pinctrl/qcom/pinctrl-sdx75.c b/drivers/pinctrl/qcom/pinctrl-sdx75.c index 3cfe8c7f04df..ab13a3a57a83 100644 --- a/drivers/pinctrl/qcom/pinctrl-sdx75.c +++ b/drivers/pinctrl/qcom/pinctrl-sdx75.c @@ -1124,7 +1124,6 @@ static struct platform_driver sdx75_pinctrl_driver = { .of_match_table = sdx75_pinctrl_of_match, }, .probe = sdx75_pinctrl_probe, - .remove = msm_pinctrl_remove, }; static int __init sdx75_pinctrl_init(void) diff --git a/drivers/pinctrl/qcom/pinctrl-sm4450.c b/drivers/pinctrl/qcom/pinctrl-sm4450.c index 622f20e6f6f8..1ecdf1ab4f27 100644 --- a/drivers/pinctrl/qcom/pinctrl-sm4450.c +++ b/drivers/pinctrl/qcom/pinctrl-sm4450.c @@ -994,7 +994,6 @@ static struct platform_driver sm4450_tlmm_driver = { .of_match_table = sm4450_tlmm_of_match, }, .probe = sm4450_tlmm_probe, - .remove = msm_pinctrl_remove, }; MODULE_DEVICE_TABLE(of, sm4450_tlmm_of_match); diff --git a/drivers/pinctrl/qcom/pinctrl-sm6115.c b/drivers/pinctrl/qcom/pinctrl-sm6115.c index 4e91c75ad952..c273efa43996 100644 --- a/drivers/pinctrl/qcom/pinctrl-sm6115.c +++ b/drivers/pinctrl/qcom/pinctrl-sm6115.c @@ -907,7 +907,6 @@ static struct platform_driver sm6115_tlmm_driver = { .of_match_table = sm6115_tlmm_of_match, }, .probe = sm6115_tlmm_probe, - .remove = msm_pinctrl_remove, }; static int __init sm6115_tlmm_init(void) diff --git a/drivers/pinctrl/qcom/pinctrl-sm6125.c b/drivers/pinctrl/qcom/pinctrl-sm6125.c index c188842047aa..5092f20e0c1b 100644 --- a/drivers/pinctrl/qcom/pinctrl-sm6125.c +++ b/drivers/pinctrl/qcom/pinctrl-sm6125.c @@ -1266,7 +1266,6 @@ static struct platform_driver sm6125_tlmm_driver = { .of_match_table = sm6125_tlmm_of_match, }, .probe = sm6125_tlmm_probe, - .remove = msm_pinctrl_remove, }; static int __init sm6125_tlmm_init(void) diff --git a/drivers/pinctrl/qcom/pinctrl-sm6350.c b/drivers/pinctrl/qcom/pinctrl-sm6350.c index f3828c07b134..ba4686c86c54 100644 --- a/drivers/pinctrl/qcom/pinctrl-sm6350.c +++ b/drivers/pinctrl/qcom/pinctrl-sm6350.c @@ -1373,7 +1373,6 @@ static struct platform_driver sm6350_tlmm_driver = { .of_match_table = sm6350_tlmm_of_match, }, .probe = sm6350_tlmm_probe, - .remove = msm_pinctrl_remove, }; static int __init sm6350_tlmm_init(void) diff --git a/drivers/pinctrl/qcom/pinctrl-sm6375.c b/drivers/pinctrl/qcom/pinctrl-sm6375.c index c82c8516932e..49031571e65e 100644 --- a/drivers/pinctrl/qcom/pinctrl-sm6375.c +++ b/drivers/pinctrl/qcom/pinctrl-sm6375.c @@ -1516,7 +1516,6 @@ static struct platform_driver sm6375_tlmm_driver = { .of_match_table = sm6375_tlmm_of_match, }, .probe = sm6375_tlmm_probe, - .remove = msm_pinctrl_remove, }; static int __init sm6375_tlmm_init(void) diff --git a/drivers/pinctrl/qcom/pinctrl-sm7150.c b/drivers/pinctrl/qcom/pinctrl-sm7150.c index 3c7fd8af6635..6e89966cd70e 100644 --- a/drivers/pinctrl/qcom/pinctrl-sm7150.c +++ b/drivers/pinctrl/qcom/pinctrl-sm7150.c @@ -1255,7 +1255,6 @@ static struct platform_driver sm7150_tlmm_driver = { .of_match_table = sm7150_tlmm_of_match, }, .probe = sm7150_tlmm_probe, - .remove = msm_pinctrl_remove, }; static int __init sm7150_tlmm_init(void) diff --git a/drivers/pinctrl/qcom/pinctrl-sm8150.c b/drivers/pinctrl/qcom/pinctrl-sm8150.c index 01aea9c70b7a..794ed99463f7 100644 --- a/drivers/pinctrl/qcom/pinctrl-sm8150.c +++ b/drivers/pinctrl/qcom/pinctrl-sm8150.c @@ -1542,7 +1542,6 @@ static struct platform_driver sm8150_pinctrl_driver = { .of_match_table = sm8150_pinctrl_of_match, }, .probe = sm8150_pinctrl_probe, - .remove = msm_pinctrl_remove, }; static int __init sm8150_pinctrl_init(void) diff --git a/drivers/pinctrl/qcom/pinctrl-sm8250.c b/drivers/pinctrl/qcom/pinctrl-sm8250.c index e9961a49ff98..fb6f005d64f5 100644 --- a/drivers/pinctrl/qcom/pinctrl-sm8250.c +++ b/drivers/pinctrl/qcom/pinctrl-sm8250.c @@ -1351,7 +1351,6 @@ static struct platform_driver sm8250_pinctrl_driver = { .of_match_table = sm8250_pinctrl_of_match, }, .probe = sm8250_pinctrl_probe, - .remove = msm_pinctrl_remove, }; static int __init sm8250_pinctrl_init(void) diff --git a/drivers/pinctrl/qcom/pinctrl-sm8350.c b/drivers/pinctrl/qcom/pinctrl-sm8350.c index 9c69458bd910..c8a3f39ce6f1 100644 --- a/drivers/pinctrl/qcom/pinctrl-sm8350.c +++ b/drivers/pinctrl/qcom/pinctrl-sm8350.c @@ -1642,7 +1642,6 @@ static struct platform_driver sm8350_tlmm_driver = { .of_match_table = sm8350_tlmm_of_match, }, .probe = sm8350_tlmm_probe, - .remove = msm_pinctrl_remove, }; static int __init sm8350_tlmm_init(void) diff --git a/drivers/pinctrl/qcom/pinctrl-sm8450.c b/drivers/pinctrl/qcom/pinctrl-sm8450.c index d11bb1ee9e3d..f2e52d5a0f93 100644 --- a/drivers/pinctrl/qcom/pinctrl-sm8450.c +++ b/drivers/pinctrl/qcom/pinctrl-sm8450.c @@ -1677,7 +1677,6 @@ static struct platform_driver sm8450_tlmm_driver = { .of_match_table = sm8450_tlmm_of_match, }, .probe = sm8450_tlmm_probe, - .remove = msm_pinctrl_remove, }; static int __init sm8450_tlmm_init(void) diff --git a/drivers/pinctrl/qcom/pinctrl-sm8550.c b/drivers/pinctrl/qcom/pinctrl-sm8550.c index 3c847d9cb5d9..1b4496cb39eb 100644 --- a/drivers/pinctrl/qcom/pinctrl-sm8550.c +++ b/drivers/pinctrl/qcom/pinctrl-sm8550.c @@ -1762,7 +1762,6 @@ static struct platform_driver sm8550_tlmm_driver = { .of_match_table = sm8550_tlmm_of_match, }, .probe = sm8550_tlmm_probe, - .remove = msm_pinctrl_remove, }; static int __init sm8550_tlmm_init(void) diff --git a/drivers/pinctrl/qcom/pinctrl-sm8650.c b/drivers/pinctrl/qcom/pinctrl-sm8650.c index 104708252d12..449a0077f4b1 100644 --- a/drivers/pinctrl/qcom/pinctrl-sm8650.c +++ b/drivers/pinctrl/qcom/pinctrl-sm8650.c @@ -1742,7 +1742,6 @@ static struct platform_driver sm8650_tlmm_driver = { .of_match_table = sm8650_tlmm_of_match, }, .probe = sm8650_tlmm_probe, - .remove = msm_pinctrl_remove, }; static int __init sm8650_tlmm_init(void) diff --git a/drivers/pinctrl/qcom/pinctrl-sm8750.c b/drivers/pinctrl/qcom/pinctrl-sm8750.c index b94fb4ee0ec3..8516693d1db5 100644 --- a/drivers/pinctrl/qcom/pinctrl-sm8750.c +++ b/drivers/pinctrl/qcom/pinctrl-sm8750.c @@ -1711,7 +1711,6 @@ static struct platform_driver sm8750_tlmm_driver = { .of_match_table = sm8750_tlmm_of_match, }, .probe = sm8750_tlmm_probe, - .remove = msm_pinctrl_remove, }; static int __init sm8750_tlmm_init(void) diff --git a/drivers/pinctrl/qcom/pinctrl-x1e80100.c b/drivers/pinctrl/qcom/pinctrl-x1e80100.c index 419cb8facb2f..d4b215f34c39 100644 --- a/drivers/pinctrl/qcom/pinctrl-x1e80100.c +++ b/drivers/pinctrl/qcom/pinctrl-x1e80100.c @@ -1861,7 +1861,6 @@ static struct platform_driver x1e80100_pinctrl_driver = { .of_match_table = x1e80100_pinctrl_of_match, }, .probe = x1e80100_pinctrl_probe, - .remove = msm_pinctrl_remove, }; static int __init x1e80100_pinctrl_init(void) diff --git a/drivers/pinctrl/sunxi/pinctrl-sunxi-dt.c b/drivers/pinctrl/sunxi/pinctrl-sunxi-dt.c index 1833078f6877..4e34b0cd3b73 100644 --- a/drivers/pinctrl/sunxi/pinctrl-sunxi-dt.c +++ b/drivers/pinctrl/sunxi/pinctrl-sunxi-dt.c @@ -143,7 +143,7 @@ static struct sunxi_desc_pin *init_pins_table(struct device *dev, */ static int prepare_function_table(struct device *dev, struct device_node *pnode, struct sunxi_desc_pin *pins, int npins, - const u8 *irq_bank_muxes) + unsigned pin_base, const u8 *irq_bank_muxes) { struct device_node *node; struct property *prop; @@ -166,7 +166,7 @@ static int prepare_function_table(struct device *dev, struct device_node *pnode, */ for (i = 0; i < npins; i++) { struct sunxi_desc_pin *pin = &pins[i]; - int bank = pin->pin.number / PINS_PER_BANK; + int bank = (pin->pin.number - pin_base) / PINS_PER_BANK; if (irq_bank_muxes[bank]) { pin->variant++; @@ -211,7 +211,7 @@ static int prepare_function_table(struct device *dev, struct device_node *pnode, last_bank = 0; for (i = 0; i < npins; i++) { struct sunxi_desc_pin *pin = &pins[i]; - int bank = pin->pin.number / PINS_PER_BANK; + int bank = (pin->pin.number - pin_base) / PINS_PER_BANK; int lastfunc = pin->variant + 1; int irq_mux = irq_bank_muxes[bank]; @@ -353,7 +353,7 @@ int sunxi_pinctrl_dt_table_init(struct platform_device *pdev, return PTR_ERR(pins); ret = prepare_function_table(&pdev->dev, pnode, pins, desc->npins, - irq_bank_muxes); + desc->pin_base, irq_bank_muxes); if (ret) return ret; diff --git a/drivers/platform/x86/amd/hsmp/hsmp.c b/drivers/platform/x86/amd/hsmp/hsmp.c index 538b36b97095..885e2f8136fd 100644 --- a/drivers/platform/x86/amd/hsmp/hsmp.c +++ b/drivers/platform/x86/amd/hsmp/hsmp.c @@ -97,7 +97,7 @@ static int __hsmp_send_message(struct hsmp_socket *sock, struct hsmp_message *ms short_sleep = jiffies + msecs_to_jiffies(HSMP_SHORT_SLEEP); timeout = jiffies + msecs_to_jiffies(HSMP_MSG_TIMEOUT); - while (time_before(jiffies, timeout)) { + while (true) { ret = sock->amd_hsmp_rdwr(sock, mbinfo->msg_resp_off, &mbox_status, HSMP_RD); if (ret) { dev_err(sock->dev, "Error %d reading mailbox status\n", ret); @@ -106,6 +106,10 @@ static int __hsmp_send_message(struct hsmp_socket *sock, struct hsmp_message *ms if (mbox_status != HSMP_STATUS_NOT_READY) break; + + if (!time_before(jiffies, timeout)) + break; + if (time_before(jiffies, short_sleep)) usleep_range(50, 100); else @@ -210,13 +214,7 @@ int hsmp_send_message(struct hsmp_message *msg) return -ENODEV; sock = &hsmp_pdev.sock[msg->sock_ind]; - /* - * The time taken by smu operation to complete is between - * 10us to 1ms. Sometime it may take more time. - * In SMP system timeout of 100 millisecs should - * be enough for the previous thread to finish the operation - */ - ret = down_timeout(&sock->hsmp_sem, msecs_to_jiffies(HSMP_MSG_TIMEOUT)); + ret = down_interruptible(&sock->hsmp_sem); if (ret < 0) return ret; diff --git a/drivers/platform/x86/amd/pmc/pmc-quirks.c b/drivers/platform/x86/amd/pmc/pmc-quirks.c index 5c7c01f66cde..f292111bd065 100644 --- a/drivers/platform/x86/amd/pmc/pmc-quirks.c +++ b/drivers/platform/x86/amd/pmc/pmc-quirks.c @@ -225,6 +225,15 @@ static const struct dmi_system_id fwbug_list[] = { DMI_MATCH(DMI_BOARD_NAME, "WUJIE14-GX4HRXL"), } }, + /* https://bugzilla.kernel.org/show_bug.cgi?id=220116 */ + { + .ident = "PCSpecialist Lafite Pro V 14M", + .driver_data = &quirk_spurious_8042, + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "PCSpecialist"), + DMI_MATCH(DMI_PRODUCT_NAME, "Lafite Pro V 14M"), + } + }, {} }; diff --git a/drivers/platform/x86/amd/pmc/pmc.c b/drivers/platform/x86/amd/pmc/pmc.c index 37c7a57afee5..0b9b23eb7c2c 100644 --- a/drivers/platform/x86/amd/pmc/pmc.c +++ b/drivers/platform/x86/amd/pmc/pmc.c @@ -157,6 +157,8 @@ static int amd_pmc_setup_smu_logging(struct amd_pmc_dev *dev) return -ENOMEM; } + memset_io(dev->smu_virt_addr, 0, sizeof(struct smu_metrics)); + /* Start the logging */ amd_pmc_send_cmd(dev, 0, NULL, SMU_MSG_LOG_RESET, false); amd_pmc_send_cmd(dev, 0, NULL, SMU_MSG_LOG_START, false); diff --git a/drivers/platform/x86/amd/pmf/core.c b/drivers/platform/x86/amd/pmf/core.c index 76910601cac8..ef988605c4da 100644 --- a/drivers/platform/x86/amd/pmf/core.c +++ b/drivers/platform/x86/amd/pmf/core.c @@ -280,7 +280,7 @@ int amd_pmf_set_dram_addr(struct amd_pmf_dev *dev, bool alloc_buffer) dev_err(dev->dev, "Invalid CPU id: 0x%x", dev->cpu_id); } - dev->buf = kzalloc(dev->mtable_size, GFP_KERNEL); + dev->buf = devm_kzalloc(dev->dev, dev->mtable_size, GFP_KERNEL); if (!dev->buf) return -ENOMEM; } @@ -493,7 +493,6 @@ static void amd_pmf_remove(struct platform_device *pdev) mutex_destroy(&dev->lock); mutex_destroy(&dev->update_mutex); mutex_destroy(&dev->cb_mutex); - kfree(dev->buf); } static const struct attribute_group *amd_pmf_driver_groups[] = { diff --git a/drivers/platform/x86/amd/pmf/tee-if.c b/drivers/platform/x86/amd/pmf/tee-if.c index d3bd12ad036a..4f626ebcb619 100644 --- a/drivers/platform/x86/amd/pmf/tee-if.c +++ b/drivers/platform/x86/amd/pmf/tee-if.c @@ -358,30 +358,28 @@ static ssize_t amd_pmf_get_pb_data(struct file *filp, const char __user *buf, return -EINVAL; /* re-alloc to the new buffer length of the policy binary */ - new_policy_buf = memdup_user(buf, length); - if (IS_ERR(new_policy_buf)) - return PTR_ERR(new_policy_buf); + new_policy_buf = devm_kzalloc(dev->dev, length, GFP_KERNEL); + if (!new_policy_buf) + return -ENOMEM; + + if (copy_from_user(new_policy_buf, buf, length)) { + devm_kfree(dev->dev, new_policy_buf); + return -EFAULT; + } - kfree(dev->policy_buf); + devm_kfree(dev->dev, dev->policy_buf); dev->policy_buf = new_policy_buf; dev->policy_sz = length; - if (!amd_pmf_pb_valid(dev)) { - ret = -EINVAL; - goto cleanup; - } + if (!amd_pmf_pb_valid(dev)) + return -EINVAL; amd_pmf_hex_dump_pb(dev); ret = amd_pmf_start_policy_engine(dev); if (ret < 0) - goto cleanup; + return ret; return length; - -cleanup: - kfree(dev->policy_buf); - dev->policy_buf = NULL; - return ret; } static const struct file_operations pb_fops = { @@ -422,12 +420,12 @@ static int amd_pmf_ta_open_session(struct tee_context *ctx, u32 *id, const uuid_ rc = tee_client_open_session(ctx, &sess_arg, NULL); if (rc < 0 || sess_arg.ret != 0) { pr_err("Failed to open TEE session err:%#x, rc:%d\n", sess_arg.ret, rc); - return rc; + return rc ?: -EINVAL; } *id = sess_arg.session; - return rc; + return 0; } static int amd_pmf_register_input_device(struct amd_pmf_dev *dev) @@ -462,7 +460,9 @@ static int amd_pmf_tee_init(struct amd_pmf_dev *dev, const uuid_t *uuid) dev->tee_ctx = tee_client_open_context(NULL, amd_pmf_amdtee_ta_match, NULL, NULL); if (IS_ERR(dev->tee_ctx)) { dev_err(dev->dev, "Failed to open TEE context\n"); - return PTR_ERR(dev->tee_ctx); + ret = PTR_ERR(dev->tee_ctx); + dev->tee_ctx = NULL; + return ret; } ret = amd_pmf_ta_open_session(dev->tee_ctx, &dev->session_id, uuid); @@ -502,9 +502,12 @@ out_ctx: static void amd_pmf_tee_deinit(struct amd_pmf_dev *dev) { + if (!dev->tee_ctx) + return; tee_shm_free(dev->fw_shm_pool); tee_client_close_session(dev->tee_ctx, dev->session_id); tee_client_close_context(dev->tee_ctx); + dev->tee_ctx = NULL; } int amd_pmf_init_smart_pc(struct amd_pmf_dev *dev) @@ -527,64 +530,45 @@ int amd_pmf_init_smart_pc(struct amd_pmf_dev *dev) ret = amd_pmf_set_dram_addr(dev, true); if (ret) - goto err_cancel_work; + return ret; dev->policy_base = devm_ioremap_resource(dev->dev, dev->res); - if (IS_ERR(dev->policy_base)) { - ret = PTR_ERR(dev->policy_base); - goto err_free_dram_buf; - } + if (IS_ERR(dev->policy_base)) + return PTR_ERR(dev->policy_base); - dev->policy_buf = kzalloc(dev->policy_sz, GFP_KERNEL); - if (!dev->policy_buf) { - ret = -ENOMEM; - goto err_free_dram_buf; - } + dev->policy_buf = devm_kzalloc(dev->dev, dev->policy_sz, GFP_KERNEL); + if (!dev->policy_buf) + return -ENOMEM; memcpy_fromio(dev->policy_buf, dev->policy_base, dev->policy_sz); if (!amd_pmf_pb_valid(dev)) { dev_info(dev->dev, "No Smart PC policy present\n"); - ret = -EINVAL; - goto err_free_policy; + return -EINVAL; } amd_pmf_hex_dump_pb(dev); - dev->prev_data = kzalloc(sizeof(*dev->prev_data), GFP_KERNEL); - if (!dev->prev_data) { - ret = -ENOMEM; - goto err_free_policy; - } + dev->prev_data = devm_kzalloc(dev->dev, sizeof(*dev->prev_data), GFP_KERNEL); + if (!dev->prev_data) + return -ENOMEM; for (i = 0; i < ARRAY_SIZE(amd_pmf_ta_uuid); i++) { ret = amd_pmf_tee_init(dev, &amd_pmf_ta_uuid[i]); if (ret) - goto err_free_prev_data; + return ret; ret = amd_pmf_start_policy_engine(dev); - switch (ret) { - case TA_PMF_TYPE_SUCCESS: - status = true; - break; - case TA_ERROR_CRYPTO_INVALID_PARAM: - case TA_ERROR_CRYPTO_BIN_TOO_LARGE: - amd_pmf_tee_deinit(dev); - status = false; - break; - default: - ret = -EINVAL; - amd_pmf_tee_deinit(dev); - goto err_free_prev_data; - } - + dev_dbg(dev->dev, "start policy engine ret: %d\n", ret); + status = ret == TA_PMF_TYPE_SUCCESS; if (status) break; + amd_pmf_tee_deinit(dev); } if (!status && !pb_side_load) { ret = -EINVAL; - goto err_free_prev_data; + goto err; } if (pb_side_load) @@ -592,22 +576,12 @@ int amd_pmf_init_smart_pc(struct amd_pmf_dev *dev) ret = amd_pmf_register_input_device(dev); if (ret) - goto err_pmf_remove_pb; + goto err; return 0; -err_pmf_remove_pb: - if (pb_side_load && dev->esbin) - amd_pmf_remove_pb(dev); - amd_pmf_tee_deinit(dev); -err_free_prev_data: - kfree(dev->prev_data); -err_free_policy: - kfree(dev->policy_buf); -err_free_dram_buf: - kfree(dev->buf); -err_cancel_work: - cancel_delayed_work_sync(&dev->pb_work); +err: + amd_pmf_deinit_smart_pc(dev); return ret; } @@ -621,11 +595,5 @@ void amd_pmf_deinit_smart_pc(struct amd_pmf_dev *dev) amd_pmf_remove_pb(dev); cancel_delayed_work_sync(&dev->pb_work); - kfree(dev->prev_data); - dev->prev_data = NULL; - kfree(dev->policy_buf); - dev->policy_buf = NULL; - kfree(dev->buf); - dev->buf = NULL; amd_pmf_tee_deinit(dev); } diff --git a/drivers/platform/x86/dell/alienware-wmi-wmax.c b/drivers/platform/x86/dell/alienware-wmi-wmax.c index c42f9228b0b2..20ec122a9fe0 100644 --- a/drivers/platform/x86/dell/alienware-wmi-wmax.c +++ b/drivers/platform/x86/dell/alienware-wmi-wmax.c @@ -119,7 +119,7 @@ static const struct dmi_system_id awcc_dmi_table[] __initconst = { DMI_MATCH(DMI_SYS_VENDOR, "Alienware"), DMI_MATCH(DMI_PRODUCT_NAME, "Alienware m16 R1 AMD"), }, - .driver_data = &g_series_quirks, + .driver_data = &generic_quirks, }, { .ident = "Alienware m16 R2", diff --git a/drivers/platform/x86/dell/dell_rbu.c b/drivers/platform/x86/dell/dell_rbu.c index e30ca325938c..9dd9f2cb074f 100644 --- a/drivers/platform/x86/dell/dell_rbu.c +++ b/drivers/platform/x86/dell/dell_rbu.c @@ -45,7 +45,7 @@ MODULE_AUTHOR("Abhay Salunke <abhay_salunke@dell.com>"); MODULE_DESCRIPTION("Driver for updating BIOS image on DELL systems"); MODULE_LICENSE("GPL"); -MODULE_VERSION("3.2"); +MODULE_VERSION("3.3"); #define BIOS_SCAN_LIMIT 0xffffffff #define MAX_IMAGE_LENGTH 16 @@ -91,7 +91,7 @@ static void init_packet_head(void) rbu_data.imagesize = 0; } -static int create_packet(void *data, size_t length) +static int create_packet(void *data, size_t length) __must_hold(&rbu_data.lock) { struct packet_data *newpacket; int ordernum = 0; @@ -292,7 +292,7 @@ static int packet_read_list(char *data, size_t * pread_length) remaining_bytes = *pread_length; bytes_read = rbu_data.packet_read_count; - list_for_each_entry(newpacket, (&packet_data_head.list)->next, list) { + list_for_each_entry(newpacket, &packet_data_head.list, list) { bytes_copied = do_packet_read(pdest, newpacket, remaining_bytes, bytes_read, &temp_count); remaining_bytes -= bytes_copied; @@ -315,14 +315,14 @@ static void packet_empty_list(void) { struct packet_data *newpacket, *tmp; - list_for_each_entry_safe(newpacket, tmp, (&packet_data_head.list)->next, list) { + list_for_each_entry_safe(newpacket, tmp, &packet_data_head.list, list) { list_del(&newpacket->list); /* * zero out the RBU packet memory before freeing * to make sure there are no stale RBU packets left in memory */ - memset(newpacket->data, 0, rbu_data.packetsize); + memset(newpacket->data, 0, newpacket->length); set_memory_wb((unsigned long)newpacket->data, 1 << newpacket->ordernum); free_pages((unsigned long) newpacket->data, diff --git a/drivers/platform/x86/ideapad-laptop.c b/drivers/platform/x86/ideapad-laptop.c index ede483573fe0..b5e4da6a6779 100644 --- a/drivers/platform/x86/ideapad-laptop.c +++ b/drivers/platform/x86/ideapad-laptop.c @@ -15,6 +15,7 @@ #include <linux/bug.h> #include <linux/cleanup.h> #include <linux/debugfs.h> +#include <linux/delay.h> #include <linux/device.h> #include <linux/dmi.h> #include <linux/i8042.h> @@ -267,6 +268,20 @@ static void ideapad_shared_exit(struct ideapad_private *priv) */ #define IDEAPAD_EC_TIMEOUT 200 /* in ms */ +/* + * Some models (e.g., ThinkBook since 2024) have a low tolerance for being + * polled too frequently. Doing so may break the state machine in the EC, + * resulting in a hard shutdown. + * + * It is also observed that frequent polls may disturb the ongoing operation + * and notably delay the availability of EC response. + * + * These values are used as the delay before the first poll and the interval + * between subsequent polls to solve the above issues. + */ +#define IDEAPAD_EC_POLL_MIN_US 150 +#define IDEAPAD_EC_POLL_MAX_US 300 + static int eval_int(acpi_handle handle, const char *name, unsigned long *res) { unsigned long long result; @@ -383,7 +398,7 @@ static int read_ec_data(acpi_handle handle, unsigned long cmd, unsigned long *da end_jiffies = jiffies + msecs_to_jiffies(IDEAPAD_EC_TIMEOUT) + 1; while (time_before(jiffies, end_jiffies)) { - schedule(); + usleep_range(IDEAPAD_EC_POLL_MIN_US, IDEAPAD_EC_POLL_MAX_US); err = eval_vpcr(handle, 1, &val); if (err) @@ -414,7 +429,7 @@ static int write_ec_cmd(acpi_handle handle, unsigned long cmd, unsigned long dat end_jiffies = jiffies + msecs_to_jiffies(IDEAPAD_EC_TIMEOUT) + 1; while (time_before(jiffies, end_jiffies)) { - schedule(); + usleep_range(IDEAPAD_EC_POLL_MIN_US, IDEAPAD_EC_POLL_MAX_US); err = eval_vpcr(handle, 1, &val); if (err) diff --git a/drivers/platform/x86/intel/pmc/core.h b/drivers/platform/x86/intel/pmc/core.h index e136d18b1d38..4a94a4ee031e 100644 --- a/drivers/platform/x86/intel/pmc/core.h +++ b/drivers/platform/x86/intel/pmc/core.h @@ -299,6 +299,13 @@ enum ppfear_regs { #define PTL_PCD_PMC_MMIO_REG_LEN 0x31A8 /* SSRAM PMC Device ID */ +/* LNL */ +#define PMC_DEVID_LNL_SOCM 0xa87f + +/* PTL */ +#define PMC_DEVID_PTL_PCDH 0xe37f +#define PMC_DEVID_PTL_PCDP 0xe47f + /* ARL */ #define PMC_DEVID_ARL_SOCM 0x777f #define PMC_DEVID_ARL_SOCS 0xae7f diff --git a/drivers/platform/x86/intel/pmc/ssram_telemetry.c b/drivers/platform/x86/intel/pmc/ssram_telemetry.c index b207247eb5dd..93579152188e 100644 --- a/drivers/platform/x86/intel/pmc/ssram_telemetry.c +++ b/drivers/platform/x86/intel/pmc/ssram_telemetry.c @@ -187,6 +187,9 @@ static const struct pci_device_id intel_pmc_ssram_telemetry_pci_ids[] = { { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PMC_DEVID_MTL_SOCM) }, { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PMC_DEVID_ARL_SOCS) }, { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PMC_DEVID_ARL_SOCM) }, + { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PMC_DEVID_LNL_SOCM) }, + { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PMC_DEVID_PTL_PCDH) }, + { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PMC_DEVID_PTL_PCDP) }, { } }; MODULE_DEVICE_TABLE(pci, intel_pmc_ssram_telemetry_pci_ids); diff --git a/drivers/platform/x86/intel/tpmi_power_domains.c b/drivers/platform/x86/intel/tpmi_power_domains.c index 0c5c88eb7baf..9d8247bb9cfa 100644 --- a/drivers/platform/x86/intel/tpmi_power_domains.c +++ b/drivers/platform/x86/intel/tpmi_power_domains.c @@ -228,8 +228,10 @@ static int __init tpmi_init(void) domain_die_map = kcalloc(size_mul(topology_max_packages(), MAX_POWER_DOMAINS), sizeof(*domain_die_map), GFP_KERNEL); - if (!domain_die_map) + if (!domain_die_map) { + ret = -ENOMEM; goto free_domain_mask; + } ret = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "platform/x86/tpmi_power_domains:online", diff --git a/drivers/platform/x86/intel/uncore-frequency/uncore-frequency-common.c b/drivers/platform/x86/intel/uncore-frequency/uncore-frequency-common.c index 0f8aea18275b..65897fae17df 100644 --- a/drivers/platform/x86/intel/uncore-frequency/uncore-frequency-common.c +++ b/drivers/platform/x86/intel/uncore-frequency/uncore-frequency-common.c @@ -58,7 +58,7 @@ static ssize_t show_agent_types(struct kobject *kobj, struct kobj_attribute *att if (length) length += sysfs_emit_at(buf, length, " "); - length += sysfs_emit_at(buf, length, agent_name[agent]); + length += sysfs_emit_at(buf, length, "%s", agent_name[agent]); } length += sysfs_emit_at(buf, length, "\n"); diff --git a/drivers/platform/x86/intel/uncore-frequency/uncore-frequency-tpmi.c b/drivers/platform/x86/intel/uncore-frequency/uncore-frequency-tpmi.c index 1c7b2f2716ca..44d9948ed224 100644 --- a/drivers/platform/x86/intel/uncore-frequency/uncore-frequency-tpmi.c +++ b/drivers/platform/x86/intel/uncore-frequency/uncore-frequency-tpmi.c @@ -511,10 +511,13 @@ static int uncore_probe(struct auxiliary_device *auxdev, const struct auxiliary_ /* Get the package ID from the TPMI core */ plat_info = tpmi_get_platform_data(auxdev); - if (plat_info) - pkg = plat_info->package_id; - else + if (unlikely(!plat_info)) { dev_info(&auxdev->dev, "Platform information is NULL\n"); + ret = -ENODEV; + goto err_rem_common; + } + + pkg = plat_info->package_id; for (i = 0; i < num_resources; ++i) { struct tpmi_uncore_power_domain_info *pd_info; diff --git a/drivers/platform/x86/samsung-galaxybook.c b/drivers/platform/x86/samsung-galaxybook.c index 5878a351993e..3c13e13d4885 100644 --- a/drivers/platform/x86/samsung-galaxybook.c +++ b/drivers/platform/x86/samsung-galaxybook.c @@ -1403,6 +1403,7 @@ static int galaxybook_probe(struct platform_device *pdev) } static const struct acpi_device_id galaxybook_device_ids[] = { + { "SAM0426" }, { "SAM0427" }, { "SAM0428" }, { "SAM0429" }, diff --git a/drivers/ptp/ptp_clock.c b/drivers/ptp/ptp_clock.c index 35a5994bf64f..36f57d7b4a66 100644 --- a/drivers/ptp/ptp_clock.c +++ b/drivers/ptp/ptp_clock.c @@ -121,7 +121,8 @@ static int ptp_clock_adjtime(struct posix_clock *pc, struct __kernel_timex *tx) struct ptp_clock_info *ops; int err = -EOPNOTSUPP; - if (ptp_clock_freerun(ptp)) { + if (tx->modes & (ADJ_SETOFFSET | ADJ_FREQUENCY | ADJ_OFFSET) && + ptp_clock_freerun(ptp)) { pr_err("ptp: physical clock is free running\n"); return -EBUSY; } diff --git a/drivers/ptp/ptp_private.h b/drivers/ptp/ptp_private.h index 18934e28469e..a6aad743c282 100644 --- a/drivers/ptp/ptp_private.h +++ b/drivers/ptp/ptp_private.h @@ -100,10 +100,20 @@ static inline bool ptp_vclock_in_use(struct ptp_clock *ptp) { bool in_use = false; + /* Virtual clocks can't be stacked on top of virtual clocks. + * Avoid acquiring the n_vclocks_mux on virtual clocks, to allow this + * function to be called from code paths where the n_vclocks_mux of the + * parent physical clock is already held. Functionally that's not an + * issue, but lockdep would complain, because they have the same lock + * class. + */ + if (ptp->is_virtual_clock) + return false; + if (mutex_lock_interruptible(&ptp->n_vclocks_mux)) return true; - if (!ptp->is_virtual_clock && ptp->n_vclocks) + if (ptp->n_vclocks) in_use = true; mutex_unlock(&ptp->n_vclocks_mux); diff --git a/drivers/rapidio/rio_cm.c b/drivers/rapidio/rio_cm.c index 97287e838ce1..66464674223f 100644 --- a/drivers/rapidio/rio_cm.c +++ b/drivers/rapidio/rio_cm.c @@ -783,6 +783,9 @@ static int riocm_ch_send(u16 ch_id, void *buf, int len) if (buf == NULL || ch_id == 0 || len == 0 || len > RIO_MAX_MSG_SIZE) return -EINVAL; + if (len < sizeof(struct rio_ch_chan_hdr)) + return -EINVAL; /* insufficient data from user */ + ch = riocm_get_channel(ch_id); if (!ch) { riocm_error("%s(%d) ch_%d not found", current->comm, diff --git a/drivers/regulator/fan53555.c b/drivers/regulator/fan53555.c index bd9447dac596..c282236959b1 100644 --- a/drivers/regulator/fan53555.c +++ b/drivers/regulator/fan53555.c @@ -147,6 +147,7 @@ struct fan53555_device_info { unsigned int slew_mask; const unsigned int *ramp_delay_table; unsigned int n_ramp_values; + unsigned int enable_time; unsigned int slew_rate; }; @@ -282,6 +283,7 @@ static int fan53526_voltages_setup_fairchild(struct fan53555_device_info *di) di->slew_mask = CTL_SLEW_MASK; di->ramp_delay_table = slew_rates; di->n_ramp_values = ARRAY_SIZE(slew_rates); + di->enable_time = 250; di->vsel_count = FAN53526_NVOLTAGES; return 0; @@ -296,10 +298,12 @@ static int fan53555_voltages_setup_fairchild(struct fan53555_device_info *di) case FAN53555_CHIP_REV_00: di->vsel_min = 600000; di->vsel_step = 10000; + di->enable_time = 400; break; case FAN53555_CHIP_REV_13: di->vsel_min = 800000; di->vsel_step = 10000; + di->enable_time = 400; break; default: dev_err(di->dev, @@ -311,13 +315,19 @@ static int fan53555_voltages_setup_fairchild(struct fan53555_device_info *di) case FAN53555_CHIP_ID_01: case FAN53555_CHIP_ID_03: case FAN53555_CHIP_ID_05: + di->vsel_min = 600000; + di->vsel_step = 10000; + di->enable_time = 400; + break; case FAN53555_CHIP_ID_08: di->vsel_min = 600000; di->vsel_step = 10000; + di->enable_time = 175; break; case FAN53555_CHIP_ID_04: di->vsel_min = 603000; di->vsel_step = 12826; + di->enable_time = 400; break; default: dev_err(di->dev, @@ -350,6 +360,7 @@ static int fan53555_voltages_setup_rockchip(struct fan53555_device_info *di) di->slew_mask = CTL_SLEW_MASK; di->ramp_delay_table = slew_rates; di->n_ramp_values = ARRAY_SIZE(slew_rates); + di->enable_time = 360; di->vsel_count = FAN53555_NVOLTAGES; return 0; @@ -372,6 +383,7 @@ static int rk8602_voltages_setup_rockchip(struct fan53555_device_info *di) di->slew_mask = CTL_SLEW_MASK; di->ramp_delay_table = slew_rates; di->n_ramp_values = ARRAY_SIZE(slew_rates); + di->enable_time = 360; di->vsel_count = RK8602_NVOLTAGES; return 0; @@ -395,6 +407,7 @@ static int fan53555_voltages_setup_silergy(struct fan53555_device_info *di) di->slew_mask = CTL_SLEW_MASK; di->ramp_delay_table = slew_rates; di->n_ramp_values = ARRAY_SIZE(slew_rates); + di->enable_time = 400; di->vsel_count = FAN53555_NVOLTAGES; return 0; @@ -594,6 +607,7 @@ static int fan53555_regulator_register(struct fan53555_device_info *di, rdesc->ramp_mask = di->slew_mask; rdesc->ramp_delay_table = di->ramp_delay_table; rdesc->n_ramp_values = di->n_ramp_values; + rdesc->enable_time = di->enable_time; rdesc->owner = THIS_MODULE; rdev = devm_regulator_register(di->dev, &di->desc, config); diff --git a/drivers/regulator/max20086-regulator.c b/drivers/regulator/max20086-regulator.c index b4fe76e33ff2..fcdd2d0317a5 100644 --- a/drivers/regulator/max20086-regulator.c +++ b/drivers/regulator/max20086-regulator.c @@ -5,6 +5,7 @@ // Copyright (C) 2022 Laurent Pinchart <laurent.pinchart@idesonboard.com> // Copyright (C) 2018 Avnet, Inc. +#include <linux/cleanup.h> #include <linux/err.h> #include <linux/gpio/consumer.h> #include <linux/i2c.h> @@ -133,11 +134,11 @@ static int max20086_regulators_register(struct max20086 *chip) static int max20086_parse_regulators_dt(struct max20086 *chip, bool *boot_on) { struct of_regulator_match *matches; - struct device_node *node; unsigned int i; int ret; - node = of_get_child_by_name(chip->dev->of_node, "regulators"); + struct device_node *node __free(device_node) = + of_get_child_by_name(chip->dev->of_node, "regulators"); if (!node) { dev_err(chip->dev, "regulators node not found\n"); return -ENODEV; @@ -153,7 +154,6 @@ static int max20086_parse_regulators_dt(struct max20086 *chip, bool *boot_on) ret = of_regulator_match(chip->dev, node, matches, chip->info->num_outputs); - of_node_put(node); if (ret < 0) { dev_err(chip->dev, "Failed to match regulators\n"); return -EINVAL; diff --git a/drivers/s390/scsi/zfcp_sysfs.c b/drivers/s390/scsi/zfcp_sysfs.c index 41e36af35488..90a84ae98b97 100644 --- a/drivers/s390/scsi/zfcp_sysfs.c +++ b/drivers/s390/scsi/zfcp_sysfs.c @@ -449,6 +449,8 @@ static ssize_t zfcp_sysfs_unit_add_store(struct device *dev, if (kstrtoull(buf, 0, (unsigned long long *) &fcp_lun)) return -EINVAL; + flush_work(&port->rport_work); + retval = zfcp_unit_add(port, fcp_lun); if (retval) return retval; diff --git a/drivers/scsi/elx/efct/efct_hw.c b/drivers/scsi/elx/efct/efct_hw.c index 5a5525054d71..5b079b8b7a08 100644 --- a/drivers/scsi/elx/efct/efct_hw.c +++ b/drivers/scsi/elx/efct/efct_hw.c @@ -1120,7 +1120,7 @@ int efct_hw_parse_filter(struct efct_hw *hw, void *value) { int rc = 0; - char *p = NULL; + char *p = NULL, *pp = NULL; char *token; u32 idx = 0; @@ -1132,6 +1132,7 @@ efct_hw_parse_filter(struct efct_hw *hw, void *value) efc_log_err(hw->os, "p is NULL\n"); return -ENOMEM; } + pp = p; idx = 0; while ((token = strsep(&p, ",")) && *token) { @@ -1144,7 +1145,7 @@ efct_hw_parse_filter(struct efct_hw *hw, void *value) if (idx == ARRAY_SIZE(hw->config.filter_def)) break; } - kfree(p); + kfree(pp); return rc; } diff --git a/drivers/scsi/fnic/fdls_disc.c b/drivers/scsi/fnic/fdls_disc.c index f8ab69c51dab..ae37f85f618b 100644 --- a/drivers/scsi/fnic/fdls_disc.c +++ b/drivers/scsi/fnic/fdls_disc.c @@ -763,50 +763,86 @@ static void fdls_send_fabric_abts(struct fnic_iport_s *iport) iport->fabric.timer_pending = 1; } -static void fdls_send_fdmi_abts(struct fnic_iport_s *iport) +static uint8_t *fdls_alloc_init_fdmi_abts_frame(struct fnic_iport_s *iport, + uint16_t oxid) { - uint8_t *frame; + struct fc_frame_header *pfdmi_abts; uint8_t d_id[3]; + uint8_t *frame; struct fnic *fnic = iport->fnic; - struct fc_frame_header *pfabric_abts; - unsigned long fdmi_tov; - uint16_t oxid; - uint16_t frame_size = FNIC_ETH_FCOE_HDRS_OFFSET + - sizeof(struct fc_frame_header); frame = fdls_alloc_frame(iport); if (frame == NULL) { FNIC_FCS_DBG(KERN_ERR, fnic->host, fnic->fnic_num, "Failed to allocate frame to send FDMI ABTS"); - return; + return NULL; } - pfabric_abts = (struct fc_frame_header *) (frame + FNIC_ETH_FCOE_HDRS_OFFSET); + pfdmi_abts = (struct fc_frame_header *) (frame + FNIC_ETH_FCOE_HDRS_OFFSET); fdls_init_fabric_abts_frame(frame, iport); hton24(d_id, FC_FID_MGMT_SERV); - FNIC_STD_SET_D_ID(*pfabric_abts, d_id); + FNIC_STD_SET_D_ID(*pfdmi_abts, d_id); + FNIC_STD_SET_OX_ID(*pfdmi_abts, oxid); + + return frame; +} + +static void fdls_send_fdmi_abts(struct fnic_iport_s *iport) +{ + uint8_t *frame; + struct fnic *fnic = iport->fnic; + unsigned long fdmi_tov; + uint16_t frame_size = FNIC_ETH_FCOE_HDRS_OFFSET + + sizeof(struct fc_frame_header); if (iport->fabric.fdmi_pending & FDLS_FDMI_PLOGI_PENDING) { - oxid = iport->active_oxid_fdmi_plogi; - FNIC_STD_SET_OX_ID(*pfabric_abts, oxid); + frame = fdls_alloc_init_fdmi_abts_frame(iport, + iport->active_oxid_fdmi_plogi); + if (frame == NULL) + return; + + FNIC_FCS_DBG(KERN_INFO, fnic->host, fnic->fnic_num, + "0x%x: FDLS send FDMI PLOGI abts. iport->fabric.state: %d oxid: 0x%x", + iport->fcid, iport->fabric.state, iport->active_oxid_fdmi_plogi); fnic_send_fcoe_frame(iport, frame, frame_size); } else { if (iport->fabric.fdmi_pending & FDLS_FDMI_REG_HBA_PENDING) { - oxid = iport->active_oxid_fdmi_rhba; - FNIC_STD_SET_OX_ID(*pfabric_abts, oxid); + frame = fdls_alloc_init_fdmi_abts_frame(iport, + iport->active_oxid_fdmi_rhba); + if (frame == NULL) + return; + + FNIC_FCS_DBG(KERN_INFO, fnic->host, fnic->fnic_num, + "0x%x: FDLS send FDMI RHBA abts. iport->fabric.state: %d oxid: 0x%x", + iport->fcid, iport->fabric.state, iport->active_oxid_fdmi_rhba); fnic_send_fcoe_frame(iport, frame, frame_size); } if (iport->fabric.fdmi_pending & FDLS_FDMI_RPA_PENDING) { - oxid = iport->active_oxid_fdmi_rpa; - FNIC_STD_SET_OX_ID(*pfabric_abts, oxid); + frame = fdls_alloc_init_fdmi_abts_frame(iport, + iport->active_oxid_fdmi_rpa); + if (frame == NULL) { + if (iport->fabric.fdmi_pending & FDLS_FDMI_REG_HBA_PENDING) + goto arm_timer; + else + return; + } + + FNIC_FCS_DBG(KERN_INFO, fnic->host, fnic->fnic_num, + "0x%x: FDLS send FDMI RPA abts. iport->fabric.state: %d oxid: 0x%x", + iport->fcid, iport->fabric.state, iport->active_oxid_fdmi_rpa); fnic_send_fcoe_frame(iport, frame, frame_size); } } +arm_timer: fdmi_tov = jiffies + msecs_to_jiffies(2 * iport->e_d_tov); mod_timer(&iport->fabric.fdmi_timer, round_jiffies(fdmi_tov)); iport->fabric.fdmi_pending |= FDLS_FDMI_ABORT_PENDING; + + FNIC_FCS_DBG(KERN_INFO, fnic->host, fnic->fnic_num, + "0x%x: iport->fabric.fdmi_pending: 0x%x", + iport->fcid, iport->fabric.fdmi_pending); } static void fdls_send_fabric_flogi(struct fnic_iport_s *iport) @@ -2245,6 +2281,21 @@ void fdls_fabric_timer_callback(struct timer_list *t) spin_unlock_irqrestore(&fnic->fnic_lock, flags); } +void fdls_fdmi_retry_plogi(struct fnic_iport_s *iport) +{ + struct fnic *fnic = iport->fnic; + + iport->fabric.fdmi_pending = 0; + /* If max retries not exhausted, start over from fdmi plogi */ + if (iport->fabric.fdmi_retry < FDLS_FDMI_MAX_RETRY) { + iport->fabric.fdmi_retry++; + FNIC_FCS_DBG(KERN_INFO, fnic->host, fnic->fnic_num, + "Retry FDMI PLOGI. FDMI retry: %d", + iport->fabric.fdmi_retry); + fdls_send_fdmi_plogi(iport); + } +} + void fdls_fdmi_timer_callback(struct timer_list *t) { struct fnic_fdls_fabric_s *fabric = timer_container_of(fabric, t, @@ -2257,7 +2308,7 @@ void fdls_fdmi_timer_callback(struct timer_list *t) spin_lock_irqsave(&fnic->fnic_lock, flags); FNIC_FCS_DBG(KERN_INFO, fnic->host, fnic->fnic_num, - "fdmi timer callback : 0x%x\n", iport->fabric.fdmi_pending); + "iport->fabric.fdmi_pending: 0x%x\n", iport->fabric.fdmi_pending); if (!iport->fabric.fdmi_pending) { /* timer expired after fdmi responses received. */ @@ -2265,7 +2316,7 @@ void fdls_fdmi_timer_callback(struct timer_list *t) return; } FNIC_FCS_DBG(KERN_INFO, fnic->host, fnic->fnic_num, - "fdmi timer callback : 0x%x\n", iport->fabric.fdmi_pending); + "iport->fabric.fdmi_pending: 0x%x\n", iport->fabric.fdmi_pending); /* if not abort pending, send an abort */ if (!(iport->fabric.fdmi_pending & FDLS_FDMI_ABORT_PENDING)) { @@ -2274,33 +2325,37 @@ void fdls_fdmi_timer_callback(struct timer_list *t) return; } FNIC_FCS_DBG(KERN_INFO, fnic->host, fnic->fnic_num, - "fdmi timer callback : 0x%x\n", iport->fabric.fdmi_pending); + "iport->fabric.fdmi_pending: 0x%x\n", iport->fabric.fdmi_pending); /* ABTS pending for an active fdmi request that is pending. * That means FDMI ABTS timed out * Schedule to free the OXID after 2*r_a_tov and proceed */ if (iport->fabric.fdmi_pending & FDLS_FDMI_PLOGI_PENDING) { + FNIC_FCS_DBG(KERN_INFO, fnic->host, fnic->fnic_num, + "FDMI PLOGI ABTS timed out. Schedule oxid free: 0x%x\n", + iport->active_oxid_fdmi_plogi); fdls_schedule_oxid_free(iport, &iport->active_oxid_fdmi_plogi); } else { - if (iport->fabric.fdmi_pending & FDLS_FDMI_REG_HBA_PENDING) + if (iport->fabric.fdmi_pending & FDLS_FDMI_REG_HBA_PENDING) { + FNIC_FCS_DBG(KERN_INFO, fnic->host, fnic->fnic_num, + "FDMI RHBA ABTS timed out. Schedule oxid free: 0x%x\n", + iport->active_oxid_fdmi_rhba); fdls_schedule_oxid_free(iport, &iport->active_oxid_fdmi_rhba); - if (iport->fabric.fdmi_pending & FDLS_FDMI_RPA_PENDING) + } + if (iport->fabric.fdmi_pending & FDLS_FDMI_RPA_PENDING) { + FNIC_FCS_DBG(KERN_INFO, fnic->host, fnic->fnic_num, + "FDMI RPA ABTS timed out. Schedule oxid free: 0x%x\n", + iport->active_oxid_fdmi_rpa); fdls_schedule_oxid_free(iport, &iport->active_oxid_fdmi_rpa); + } } FNIC_FCS_DBG(KERN_INFO, fnic->host, fnic->fnic_num, - "fdmi timer callback : 0x%x\n", iport->fabric.fdmi_pending); + "iport->fabric.fdmi_pending: 0x%x\n", iport->fabric.fdmi_pending); - iport->fabric.fdmi_pending = 0; - /* If max retries not exhaused, start over from fdmi plogi */ - if (iport->fabric.fdmi_retry < FDLS_FDMI_MAX_RETRY) { - iport->fabric.fdmi_retry++; - FNIC_FCS_DBG(KERN_INFO, fnic->host, fnic->fnic_num, - "retry fdmi timer %d", iport->fabric.fdmi_retry); - fdls_send_fdmi_plogi(iport); - } + fdls_fdmi_retry_plogi(iport); FNIC_FCS_DBG(KERN_INFO, fnic->host, fnic->fnic_num, - "fdmi timer callback : 0x%x\n", iport->fabric.fdmi_pending); + "iport->fabric.fdmi_pending: 0x%x\n", iport->fabric.fdmi_pending); spin_unlock_irqrestore(&fnic->fnic_lock, flags); } @@ -3715,13 +3770,60 @@ static void fdls_process_fdmi_abts_rsp(struct fnic_iport_s *iport, switch (FNIC_FRAME_TYPE(oxid)) { case FNIC_FRAME_TYPE_FDMI_PLOGI: + FNIC_FCS_DBG(KERN_INFO, fnic->host, fnic->fnic_num, + "Received FDMI PLOGI ABTS rsp with oxid: 0x%x", oxid); + FNIC_FCS_DBG(KERN_INFO, fnic->host, fnic->fnic_num, + "0x%x: iport->fabric.fdmi_pending: 0x%x", + iport->fcid, iport->fabric.fdmi_pending); fdls_free_oxid(iport, oxid, &iport->active_oxid_fdmi_plogi); + + iport->fabric.fdmi_pending &= ~FDLS_FDMI_PLOGI_PENDING; + iport->fabric.fdmi_pending &= ~FDLS_FDMI_ABORT_PENDING; + FNIC_FCS_DBG(KERN_INFO, fnic->host, fnic->fnic_num, + "0x%x: iport->fabric.fdmi_pending: 0x%x", + iport->fcid, iport->fabric.fdmi_pending); break; case FNIC_FRAME_TYPE_FDMI_RHBA: + FNIC_FCS_DBG(KERN_INFO, fnic->host, fnic->fnic_num, + "Received FDMI RHBA ABTS rsp with oxid: 0x%x", oxid); + FNIC_FCS_DBG(KERN_INFO, fnic->host, fnic->fnic_num, + "0x%x: iport->fabric.fdmi_pending: 0x%x", + iport->fcid, iport->fabric.fdmi_pending); + + iport->fabric.fdmi_pending &= ~FDLS_FDMI_REG_HBA_PENDING; + + /* If RPA is still pending, don't turn off ABORT PENDING. + * We count on the timer to detect the ABTS timeout and take + * corrective action. + */ + if (!(iport->fabric.fdmi_pending & FDLS_FDMI_RPA_PENDING)) + iport->fabric.fdmi_pending &= ~FDLS_FDMI_ABORT_PENDING; + fdls_free_oxid(iport, oxid, &iport->active_oxid_fdmi_rhba); + FNIC_FCS_DBG(KERN_INFO, fnic->host, fnic->fnic_num, + "0x%x: iport->fabric.fdmi_pending: 0x%x", + iport->fcid, iport->fabric.fdmi_pending); break; case FNIC_FRAME_TYPE_FDMI_RPA: + FNIC_FCS_DBG(KERN_INFO, fnic->host, fnic->fnic_num, + "Received FDMI RPA ABTS rsp with oxid: 0x%x", oxid); + FNIC_FCS_DBG(KERN_INFO, fnic->host, fnic->fnic_num, + "0x%x: iport->fabric.fdmi_pending: 0x%x", + iport->fcid, iport->fabric.fdmi_pending); + + iport->fabric.fdmi_pending &= ~FDLS_FDMI_RPA_PENDING; + + /* If RHBA is still pending, don't turn off ABORT PENDING. + * We count on the timer to detect the ABTS timeout and take + * corrective action. + */ + if (!(iport->fabric.fdmi_pending & FDLS_FDMI_REG_HBA_PENDING)) + iport->fabric.fdmi_pending &= ~FDLS_FDMI_ABORT_PENDING; + fdls_free_oxid(iport, oxid, &iport->active_oxid_fdmi_rpa); + FNIC_FCS_DBG(KERN_INFO, fnic->host, fnic->fnic_num, + "0x%x: iport->fabric.fdmi_pending: 0x%x", + iport->fcid, iport->fabric.fdmi_pending); break; default: FNIC_FCS_DBG(KERN_INFO, fnic->host, fnic->fnic_num, @@ -3730,10 +3832,16 @@ static void fdls_process_fdmi_abts_rsp(struct fnic_iport_s *iport, break; } - timer_delete_sync(&iport->fabric.fdmi_timer); - iport->fabric.fdmi_pending &= ~FDLS_FDMI_ABORT_PENDING; - - fdls_send_fdmi_plogi(iport); + /* + * Only if ABORT PENDING is off, delete the timer, and if no other + * operations are pending, retry FDMI. + * Otherwise, let the timer pop and take the appropriate action. + */ + if (!(iport->fabric.fdmi_pending & FDLS_FDMI_ABORT_PENDING)) { + timer_delete_sync(&iport->fabric.fdmi_timer); + if (!iport->fabric.fdmi_pending) + fdls_fdmi_retry_plogi(iport); + } } static void @@ -4972,9 +5080,12 @@ void fnic_fdls_link_down(struct fnic_iport_s *iport) fdls_delete_tport(iport, tport); } - if ((fnic_fdmi_support == 1) && (iport->fabric.fdmi_pending > 0)) { - timer_delete_sync(&iport->fabric.fdmi_timer); - iport->fabric.fdmi_pending = 0; + if (fnic_fdmi_support == 1) { + if (iport->fabric.fdmi_pending > 0) { + timer_delete_sync(&iport->fabric.fdmi_timer); + iport->fabric.fdmi_pending = 0; + } + iport->flags &= ~FNIC_FDMI_ACTIVE; } FNIC_FCS_DBG(KERN_INFO, fnic->host, fnic->fnic_num, diff --git a/drivers/scsi/fnic/fnic.h b/drivers/scsi/fnic/fnic.h index 6c5f6046b1f5..c2fdc6553e62 100644 --- a/drivers/scsi/fnic/fnic.h +++ b/drivers/scsi/fnic/fnic.h @@ -30,7 +30,7 @@ #define DRV_NAME "fnic" #define DRV_DESCRIPTION "Cisco FCoE HBA Driver" -#define DRV_VERSION "1.8.0.0" +#define DRV_VERSION "1.8.0.2" #define PFX DRV_NAME ": " #define DFX DRV_NAME "%d: " diff --git a/drivers/scsi/fnic/fnic_fcs.c b/drivers/scsi/fnic/fnic_fcs.c index 1e8cd64f9a5c..103ab6f1f7cd 100644 --- a/drivers/scsi/fnic/fnic_fcs.c +++ b/drivers/scsi/fnic/fnic_fcs.c @@ -636,6 +636,8 @@ static int fnic_send_frame(struct fnic *fnic, void *frame, int frame_len) unsigned long flags; pa = dma_map_single(&fnic->pdev->dev, frame, frame_len, DMA_TO_DEVICE); + if (dma_mapping_error(&fnic->pdev->dev, pa)) + return -ENOMEM; if ((fnic_fc_trace_set_data(fnic->fnic_num, FNIC_FC_SEND | 0x80, (char *) frame, diff --git a/drivers/scsi/fnic/fnic_fdls.h b/drivers/scsi/fnic/fnic_fdls.h index 8e610b65ad57..531d0b37e450 100644 --- a/drivers/scsi/fnic/fnic_fdls.h +++ b/drivers/scsi/fnic/fnic_fdls.h @@ -394,6 +394,7 @@ void fdls_send_tport_abts(struct fnic_iport_s *iport, bool fdls_delete_tport(struct fnic_iport_s *iport, struct fnic_tport_s *tport); void fdls_fdmi_timer_callback(struct timer_list *t); +void fdls_fdmi_retry_plogi(struct fnic_iport_s *iport); /* fnic_fcs.c */ void fnic_fdls_init(struct fnic *fnic, int usefip); diff --git a/drivers/scsi/fnic/fnic_scsi.c b/drivers/scsi/fnic/fnic_scsi.c index 7133b254cbe4..75b29a018d1f 100644 --- a/drivers/scsi/fnic/fnic_scsi.c +++ b/drivers/scsi/fnic/fnic_scsi.c @@ -1046,7 +1046,7 @@ static void fnic_fcpio_icmnd_cmpl_handler(struct fnic *fnic, unsigned int cq_ind if (icmnd_cmpl->scsi_status == SAM_STAT_TASK_SET_FULL) atomic64_inc(&fnic_stats->misc_stats.queue_fulls); - FNIC_SCSI_DBG(KERN_INFO, fnic->host, fnic->fnic_num, + FNIC_SCSI_DBG(KERN_DEBUG, fnic->host, fnic->fnic_num, "xfer_len: %llu", xfer_len); break; diff --git a/drivers/scsi/megaraid/megaraid_sas_base.c b/drivers/scsi/megaraid/megaraid_sas_base.c index 3aac0e17cb00..9179f8aee964 100644 --- a/drivers/scsi/megaraid/megaraid_sas_base.c +++ b/drivers/scsi/megaraid/megaraid_sas_base.c @@ -5910,7 +5910,11 @@ megasas_set_high_iops_queue_affinity_and_hint(struct megasas_instance *instance) const struct cpumask *mask; if (instance->perf_mode == MR_BALANCED_PERF_MODE) { - mask = cpumask_of_node(dev_to_node(&instance->pdev->dev)); + int nid = dev_to_node(&instance->pdev->dev); + + if (nid == NUMA_NO_NODE) + nid = 0; + mask = cpumask_of_node(nid); for (i = 0; i < instance->low_latency_index_start; i++) { irq = pci_irq_vector(instance->pdev, i); diff --git a/drivers/scsi/mvsas/mv_defs.h b/drivers/scsi/mvsas/mv_defs.h index 8ef174cd4d37..3e4124177b2a 100644 --- a/drivers/scsi/mvsas/mv_defs.h +++ b/drivers/scsi/mvsas/mv_defs.h @@ -215,7 +215,7 @@ enum hw_register_bits { /* MVS_Px_INT_STAT, MVS_Px_INT_MASK (per-phy events) */ PHYEV_DEC_ERR = (1U << 24), /* Phy Decoding Error */ - PHYEV_DCDR_ERR = (1U << 23), /* STP Deocder Error */ + PHYEV_DCDR_ERR = (1U << 23), /* STP Decoder Error */ PHYEV_CRC_ERR = (1U << 22), /* STP CRC Error */ PHYEV_UNASSOC_FIS = (1U << 19), /* unassociated FIS rx'd */ PHYEV_AN = (1U << 18), /* SATA async notification */ @@ -347,7 +347,7 @@ enum sas_cmd_port_registers { CMD_SATA_PORT_MEM_CTL0 = 0x158, /* SATA Port Memory Control 0 */ CMD_SATA_PORT_MEM_CTL1 = 0x15c, /* SATA Port Memory Control 1 */ CMD_XOR_MEM_BIST_CTL = 0x160, /* XOR Memory BIST Control */ - CMD_XOR_MEM_BIST_STAT = 0x164, /* XOR Memroy BIST Status */ + CMD_XOR_MEM_BIST_STAT = 0x164, /* XOR Memory BIST Status */ CMD_DMA_MEM_BIST_CTL = 0x168, /* DMA Memory BIST Control */ CMD_DMA_MEM_BIST_STAT = 0x16c, /* DMA Memory BIST Status */ CMD_PORT_MEM_BIST_CTL = 0x170, /* Port Memory BIST Control */ diff --git a/drivers/scsi/scsi_error.c b/drivers/scsi/scsi_error.c index 376b8897ab90..746ff6a1f309 100644 --- a/drivers/scsi/scsi_error.c +++ b/drivers/scsi/scsi_error.c @@ -665,7 +665,8 @@ enum scsi_disposition scsi_check_sense(struct scsi_cmnd *scmd) * if the device is in the process of becoming ready, we * should retry. */ - if ((sshdr.asc == 0x04) && (sshdr.ascq == 0x01)) + if ((sshdr.asc == 0x04) && + (sshdr.ascq == 0x01 || sshdr.ascq == 0x0a)) return NEEDS_RETRY; /* * if the device is not started, we need to wake diff --git a/drivers/scsi/scsi_transport_iscsi.c b/drivers/scsi/scsi_transport_iscsi.c index 0b8c91bf793f..c75a806496d6 100644 --- a/drivers/scsi/scsi_transport_iscsi.c +++ b/drivers/scsi/scsi_transport_iscsi.c @@ -3499,7 +3499,7 @@ static int iscsi_new_flashnode(struct iscsi_transport *transport, pr_err("%s could not find host no %u\n", __func__, ev->u.new_flashnode.host_no); err = -ENODEV; - goto put_host; + goto exit_new_fnode; } index = transport->new_flashnode(shost, data, len); @@ -3509,7 +3509,6 @@ static int iscsi_new_flashnode(struct iscsi_transport *transport, else err = -EIO; -put_host: scsi_host_put(shost); exit_new_fnode: @@ -3534,7 +3533,7 @@ static int iscsi_del_flashnode(struct iscsi_transport *transport, pr_err("%s could not find host no %u\n", __func__, ev->u.del_flashnode.host_no); err = -ENODEV; - goto put_host; + goto exit_del_fnode; } idx = ev->u.del_flashnode.flashnode_idx; @@ -3576,7 +3575,7 @@ static int iscsi_login_flashnode(struct iscsi_transport *transport, pr_err("%s could not find host no %u\n", __func__, ev->u.login_flashnode.host_no); err = -ENODEV; - goto put_host; + goto exit_login_fnode; } idx = ev->u.login_flashnode.flashnode_idx; @@ -3628,7 +3627,7 @@ static int iscsi_logout_flashnode(struct iscsi_transport *transport, pr_err("%s could not find host no %u\n", __func__, ev->u.logout_flashnode.host_no); err = -ENODEV; - goto put_host; + goto exit_logout_fnode; } idx = ev->u.logout_flashnode.flashnode_idx; @@ -3678,7 +3677,7 @@ static int iscsi_logout_flashnode_sid(struct iscsi_transport *transport, pr_err("%s could not find host no %u\n", __func__, ev->u.logout_flashnode.host_no); err = -ENODEV; - goto put_host; + goto exit_logout_sid; } session = iscsi_session_lookup(ev->u.logout_flashnode_sid.sid); diff --git a/drivers/scsi/storvsc_drv.c b/drivers/scsi/storvsc_drv.c index 2e6b2412d2c9..d9e59204a9c3 100644 --- a/drivers/scsi/storvsc_drv.c +++ b/drivers/scsi/storvsc_drv.c @@ -362,7 +362,7 @@ MODULE_PARM_DESC(ring_avail_percent_lowater, /* * Timeout in seconds for all devices managed by this driver. */ -static int storvsc_timeout = 180; +static const int storvsc_timeout = 180; #if IS_ENABLED(CONFIG_SCSI_FC_ATTRS) static struct scsi_transport_template *fc_transport_template; @@ -768,7 +768,7 @@ static void handle_multichannel_storage(struct hv_device *device, int max_chns) return; } - t = wait_for_completion_timeout(&request->wait_event, 10*HZ); + t = wait_for_completion_timeout(&request->wait_event, storvsc_timeout * HZ); if (t == 0) { dev_err(dev, "Failed to create sub-channel: timed out\n"); return; @@ -833,7 +833,7 @@ static int storvsc_execute_vstor_op(struct hv_device *device, if (ret != 0) return ret; - t = wait_for_completion_timeout(&request->wait_event, 5*HZ); + t = wait_for_completion_timeout(&request->wait_event, storvsc_timeout * HZ); if (t == 0) return -ETIMEDOUT; @@ -1350,6 +1350,8 @@ static int storvsc_connect_to_vsp(struct hv_device *device, u32 ring_size, return ret; ret = storvsc_channel_init(device, is_fc); + if (ret) + vmbus_close(device->channel); return ret; } @@ -1668,7 +1670,7 @@ static int storvsc_host_reset_handler(struct scsi_cmnd *scmnd) if (ret != 0) return FAILED; - t = wait_for_completion_timeout(&request->wait_event, 5*HZ); + t = wait_for_completion_timeout(&request->wait_event, storvsc_timeout * HZ); if (t == 0) return TIMEOUT_ERROR; diff --git a/drivers/spi/spi-cadence-quadspi.c b/drivers/spi/spi-cadence-quadspi.c index fe0f122f07b0..aa1932ba17cb 100644 --- a/drivers/spi/spi-cadence-quadspi.c +++ b/drivers/spi/spi-cadence-quadspi.c @@ -1958,10 +1958,10 @@ static int cqspi_probe(struct platform_device *pdev) goto probe_setup_failed; } - ret = devm_pm_runtime_enable(dev); - if (ret) { - if (cqspi->rx_chan) - dma_release_channel(cqspi->rx_chan); + pm_runtime_enable(dev); + + if (cqspi->rx_chan) { + dma_release_channel(cqspi->rx_chan); goto probe_setup_failed; } @@ -1981,6 +1981,7 @@ static int cqspi_probe(struct platform_device *pdev) return 0; probe_setup_failed: cqspi_controller_enable(cqspi, 0); + pm_runtime_disable(dev); probe_reset_failed: if (cqspi->is_jh7110) cqspi_jh7110_disable_clk(pdev, cqspi); @@ -1999,7 +2000,8 @@ static void cqspi_remove(struct platform_device *pdev) if (cqspi->rx_chan) dma_release_channel(cqspi->rx_chan); - clk_disable_unprepare(cqspi->clk); + if (pm_runtime_get_sync(&pdev->dev) >= 0) + clk_disable(cqspi->clk); if (cqspi->is_jh7110) cqspi_jh7110_disable_clk(pdev, cqspi); diff --git a/drivers/spi/spi-loongson-core.c b/drivers/spi/spi-loongson-core.c index 4fec226456d1..b46f072a0387 100644 --- a/drivers/spi/spi-loongson-core.c +++ b/drivers/spi/spi-loongson-core.c @@ -5,6 +5,7 @@ #include <linux/clk.h> #include <linux/delay.h> #include <linux/err.h> +#include <linux/export.h> #include <linux/init.h> #include <linux/interrupt.h> #include <linux/io.h> diff --git a/drivers/spi/spi-offload.c b/drivers/spi/spi-offload.c index e674097bf3be..d336f4d228d5 100644 --- a/drivers/spi/spi-offload.c +++ b/drivers/spi/spi-offload.c @@ -297,7 +297,7 @@ int spi_offload_trigger_enable(struct spi_offload *offload, if (trigger->ops->enable) { ret = trigger->ops->enable(trigger, config); if (ret) { - if (offload->ops->trigger_disable) + if (offload->ops && offload->ops->trigger_disable) offload->ops->trigger_disable(offload); return ret; } diff --git a/drivers/spi/spi-omap2-mcspi.c b/drivers/spi/spi-omap2-mcspi.c index 29c616e2c408..70bb74b3bd9c 100644 --- a/drivers/spi/spi-omap2-mcspi.c +++ b/drivers/spi/spi-omap2-mcspi.c @@ -134,6 +134,7 @@ struct omap2_mcspi { size_t max_xfer_len; u32 ref_clk_hz; bool use_multi_mode; + bool last_msg_kept_cs; }; struct omap2_mcspi_cs { @@ -1269,6 +1270,10 @@ static int omap2_mcspi_prepare_message(struct spi_controller *ctlr, * multi-mode is applicable. */ mcspi->use_multi_mode = true; + + if (mcspi->last_msg_kept_cs) + mcspi->use_multi_mode = false; + list_for_each_entry(tr, &msg->transfers, transfer_list) { if (!tr->bits_per_word) bits_per_word = msg->spi->bits_per_word; @@ -1287,18 +1292,19 @@ static int omap2_mcspi_prepare_message(struct spi_controller *ctlr, mcspi->use_multi_mode = false; } - /* Check if transfer asks to change the CS status after the transfer */ - if (!tr->cs_change) - mcspi->use_multi_mode = false; - - /* - * If at least one message is not compatible, switch back to single mode - * - * The bits_per_word of certain transfer can be different, but it will have no - * impact on the signal itself. - */ - if (!mcspi->use_multi_mode) - break; + if (list_is_last(&tr->transfer_list, &msg->transfers)) { + /* Check if transfer asks to keep the CS status after the whole message */ + if (tr->cs_change) { + mcspi->use_multi_mode = false; + mcspi->last_msg_kept_cs = true; + } else { + mcspi->last_msg_kept_cs = false; + } + } else { + /* Check if transfer asks to change the CS status after the transfer */ + if (!tr->cs_change) + mcspi->use_multi_mode = false; + } } omap2_mcspi_set_mode(ctlr); diff --git a/drivers/spi/spi-pci1xxxx.c b/drivers/spi/spi-pci1xxxx.c index c6489e90b8b9..e27642c4dea4 100644 --- a/drivers/spi/spi-pci1xxxx.c +++ b/drivers/spi/spi-pci1xxxx.c @@ -762,10 +762,10 @@ static int pci1xxxx_spi_probe(struct pci_dev *pdev, const struct pci_device_id * return -EINVAL; num_vector = pci_alloc_irq_vectors(pdev, 1, hw_inst_cnt, - PCI_IRQ_ALL_TYPES); + PCI_IRQ_INTX | PCI_IRQ_MSI); if (num_vector < 0) { dev_err(&pdev->dev, "Error allocating MSI vectors\n"); - return ret; + return num_vector; } init_completion(&spi_sub_ptr->spi_xfer_done); diff --git a/drivers/spi/spi-stm32-ospi.c b/drivers/spi/spi-stm32-ospi.c index 7c1fa55fbc47..4ab7e86f4bd5 100644 --- a/drivers/spi/spi-stm32-ospi.c +++ b/drivers/spi/spi-stm32-ospi.c @@ -804,7 +804,7 @@ static int stm32_ospi_get_resources(struct platform_device *pdev) return ret; } - ospi->rstc = devm_reset_control_array_get_exclusive(dev); + ospi->rstc = devm_reset_control_array_get_exclusive_released(dev); if (IS_ERR(ospi->rstc)) return dev_err_probe(dev, PTR_ERR(ospi->rstc), "Can't get reset\n"); @@ -936,12 +936,16 @@ static int stm32_ospi_probe(struct platform_device *pdev) if (ret < 0) goto err_pm_enable; - if (ospi->rstc) { - reset_control_assert(ospi->rstc); - udelay(2); - reset_control_deassert(ospi->rstc); + ret = reset_control_acquire(ospi->rstc); + if (ret) { + dev_err_probe(dev, ret, "Can not acquire reset %d\n", ret); + goto err_pm_resume; } + reset_control_assert(ospi->rstc); + udelay(2); + reset_control_deassert(ospi->rstc); + ret = spi_register_controller(ctrl); if (ret) { /* Disable ospi */ @@ -987,6 +991,8 @@ static void stm32_ospi_remove(struct platform_device *pdev) if (ospi->dma_chrx) dma_release_channel(ospi->dma_chrx); + reset_control_release(ospi->rstc); + pm_runtime_put_sync_suspend(ospi->dev); pm_runtime_force_suspend(ospi->dev); } @@ -997,6 +1003,8 @@ static int __maybe_unused stm32_ospi_suspend(struct device *dev) pinctrl_pm_select_sleep_state(dev); + reset_control_release(ospi->rstc); + return pm_runtime_force_suspend(ospi->dev); } @@ -1016,6 +1024,12 @@ static int __maybe_unused stm32_ospi_resume(struct device *dev) if (ret < 0) return ret; + ret = reset_control_acquire(ospi->rstc); + if (ret) { + dev_err(dev, "Can not acquire reset\n"); + return ret; + } + writel_relaxed(ospi->cr_reg, regs_base + OSPI_CR); writel_relaxed(ospi->dcr_reg, regs_base + OSPI_DCR1); pm_runtime_mark_last_busy(ospi->dev); diff --git a/drivers/spi/spi-tegra210-quad.c b/drivers/spi/spi-tegra210-quad.c index 3581757a269b..3be7499db21e 100644 --- a/drivers/spi/spi-tegra210-quad.c +++ b/drivers/spi/spi-tegra210-quad.c @@ -407,9 +407,6 @@ tegra_qspi_read_rx_fifo_to_client_rxbuf(struct tegra_qspi *tqspi, struct spi_tra static void tegra_qspi_copy_client_txbuf_to_qspi_txbuf(struct tegra_qspi *tqspi, struct spi_transfer *t) { - dma_sync_single_for_cpu(tqspi->dev, tqspi->tx_dma_phys, - tqspi->dma_buf_size, DMA_TO_DEVICE); - /* * In packed mode, each word in FIFO may contain multiple packets * based on bits per word. So all bytes in each FIFO word are valid. @@ -442,17 +439,11 @@ tegra_qspi_copy_client_txbuf_to_qspi_txbuf(struct tegra_qspi *tqspi, struct spi_ tqspi->cur_tx_pos += write_bytes; } - - dma_sync_single_for_device(tqspi->dev, tqspi->tx_dma_phys, - tqspi->dma_buf_size, DMA_TO_DEVICE); } static void tegra_qspi_copy_qspi_rxbuf_to_client_rxbuf(struct tegra_qspi *tqspi, struct spi_transfer *t) { - dma_sync_single_for_cpu(tqspi->dev, tqspi->rx_dma_phys, - tqspi->dma_buf_size, DMA_FROM_DEVICE); - if (tqspi->is_packed) { tqspi->cur_rx_pos += tqspi->curr_dma_words * tqspi->bytes_per_word; } else { @@ -478,9 +469,6 @@ tegra_qspi_copy_qspi_rxbuf_to_client_rxbuf(struct tegra_qspi *tqspi, struct spi_ tqspi->cur_rx_pos += read_bytes; } - - dma_sync_single_for_device(tqspi->dev, tqspi->rx_dma_phys, - tqspi->dma_buf_size, DMA_FROM_DEVICE); } static void tegra_qspi_dma_complete(void *args) @@ -701,8 +689,6 @@ static int tegra_qspi_start_dma_based_transfer(struct tegra_qspi *tqspi, struct return ret; } - dma_sync_single_for_device(tqspi->dev, tqspi->rx_dma_phys, - tqspi->dma_buf_size, DMA_FROM_DEVICE); ret = tegra_qspi_start_rx_dma(tqspi, t, len); if (ret < 0) { dev_err(tqspi->dev, "failed to start RX DMA: %d\n", ret); diff --git a/drivers/target/target_core_pr.c b/drivers/target/target_core_pr.c index 34cf2c399b39..70905805cb17 100644 --- a/drivers/target/target_core_pr.c +++ b/drivers/target/target_core_pr.c @@ -1842,7 +1842,9 @@ out: } kmem_cache_free(t10_pr_reg_cache, dest_pr_reg); - core_scsi3_lunacl_undepend_item(dest_se_deve); + + if (dest_se_deve) + core_scsi3_lunacl_undepend_item(dest_se_deve); if (is_local) continue; diff --git a/drivers/ufs/core/ufshcd.c b/drivers/ufs/core/ufshcd.c index 4410e7d93b7d..50adfb8b335b 100644 --- a/drivers/ufs/core/ufshcd.c +++ b/drivers/ufs/core/ufshcd.c @@ -6623,9 +6623,14 @@ static void ufshcd_err_handler(struct work_struct *work) up(&hba->host_sem); return; } - ufshcd_set_eh_in_progress(hba); spin_unlock_irqrestore(hba->host->host_lock, flags); + ufshcd_err_handling_prepare(hba); + + spin_lock_irqsave(hba->host->host_lock, flags); + ufshcd_set_eh_in_progress(hba); + spin_unlock_irqrestore(hba->host->host_lock, flags); + /* Complete requests that have door-bell cleared by h/w */ ufshcd_complete_requests(hba, false); spin_lock_irqsave(hba->host->host_lock, flags); @@ -7802,7 +7807,8 @@ static int ufshcd_host_reset_and_restore(struct ufs_hba *hba) hba->silence_err_logs = false; /* scale up clocks to max frequency before full reinitialization */ - ufshcd_scale_clks(hba, ULONG_MAX, true); + if (ufshcd_is_clkscaling_supported(hba)) + ufshcd_scale_clks(hba, ULONG_MAX, true); err = ufshcd_hba_enable(hba); diff --git a/fs/bcachefs/bcachefs.h b/fs/bcachefs/bcachefs.h index 3651a296d506..5a1cede2febf 100644 --- a/fs/bcachefs/bcachefs.h +++ b/fs/bcachefs/bcachefs.h @@ -296,7 +296,6 @@ do { \ #define bch2_fmt(_c, fmt) bch2_log_msg(_c, fmt "\n") void bch2_print_str(struct bch_fs *, const char *, const char *); -void bch2_print_str_nonblocking(struct bch_fs *, const char *, const char *); __printf(2, 3) void bch2_print_opts(struct bch_opts *, const char *, ...); diff --git a/fs/bcachefs/btree_gc.c b/fs/bcachefs/btree_gc.c index 9ddcbe1bda78..e92cf3928c63 100644 --- a/fs/bcachefs/btree_gc.c +++ b/fs/bcachefs/btree_gc.c @@ -397,7 +397,11 @@ again: continue; } - ret = btree_check_node_boundaries(trans, b, prev, cur, pulled_from_scan); + ret = lockrestart_do(trans, + btree_check_node_boundaries(trans, b, prev, cur, pulled_from_scan)); + if (ret < 0) + goto err; + if (ret == DID_FILL_FROM_SCAN) { new_pass = true; ret = 0; @@ -438,7 +442,8 @@ again: if (!ret && !IS_ERR_OR_NULL(prev)) { BUG_ON(cur); - ret = btree_repair_node_end(trans, b, prev, pulled_from_scan); + ret = lockrestart_do(trans, + btree_repair_node_end(trans, b, prev, pulled_from_scan)); if (ret == DID_FILL_FROM_SCAN) { new_pass = true; ret = 0; @@ -519,6 +524,46 @@ fsck_err: bch2_bkey_buf_exit(&prev_k, c); bch2_bkey_buf_exit(&cur_k, c); printbuf_exit(&buf); + bch_err_fn(c, ret); + return ret; +} + +static int bch2_check_root(struct btree_trans *trans, enum btree_id i, + bool *reconstructed_root) +{ + struct bch_fs *c = trans->c; + struct btree_root *r = bch2_btree_id_root(c, i); + struct printbuf buf = PRINTBUF; + int ret = 0; + + bch2_btree_id_to_text(&buf, i); + + if (r->error) { + bch_info(c, "btree root %s unreadable, must recover from scan", buf.buf); + + r->alive = false; + r->error = 0; + + if (!bch2_btree_has_scanned_nodes(c, i)) { + __fsck_err(trans, + FSCK_CAN_FIX|(!btree_id_important(i) ? FSCK_AUTOFIX : 0), + btree_root_unreadable_and_scan_found_nothing, + "no nodes found for btree %s, continue?", buf.buf); + bch2_btree_root_alloc_fake_trans(trans, i, 0); + } else { + bch2_btree_root_alloc_fake_trans(trans, i, 1); + bch2_shoot_down_journal_keys(c, i, 1, BTREE_MAX_DEPTH, POS_MIN, SPOS_MAX); + ret = bch2_get_scanned_nodes(c, i, 0, POS_MIN, SPOS_MAX); + if (ret) + goto err; + } + + *reconstructed_root = true; + } +err: +fsck_err: + printbuf_exit(&buf); + bch_err_fn(c, ret); return ret; } @@ -526,42 +571,18 @@ int bch2_check_topology(struct bch_fs *c) { struct btree_trans *trans = bch2_trans_get(c); struct bpos pulled_from_scan = POS_MIN; - struct printbuf buf = PRINTBUF; int ret = 0; bch2_trans_srcu_unlock(trans); for (unsigned i = 0; i < btree_id_nr_alive(c) && !ret; i++) { - struct btree_root *r = bch2_btree_id_root(c, i); bool reconstructed_root = false; +recover: + ret = lockrestart_do(trans, bch2_check_root(trans, i, &reconstructed_root)); + if (ret) + break; - printbuf_reset(&buf); - bch2_btree_id_to_text(&buf, i); - - if (r->error) { -reconstruct_root: - bch_info(c, "btree root %s unreadable, must recover from scan", buf.buf); - - r->alive = false; - r->error = 0; - - if (!bch2_btree_has_scanned_nodes(c, i)) { - __fsck_err(trans, - FSCK_CAN_FIX|(!btree_id_important(i) ? FSCK_AUTOFIX : 0), - btree_root_unreadable_and_scan_found_nothing, - "no nodes found for btree %s, continue?", buf.buf); - bch2_btree_root_alloc_fake_trans(trans, i, 0); - } else { - bch2_btree_root_alloc_fake_trans(trans, i, 1); - bch2_shoot_down_journal_keys(c, i, 1, BTREE_MAX_DEPTH, POS_MIN, SPOS_MAX); - ret = bch2_get_scanned_nodes(c, i, 0, POS_MIN, SPOS_MAX); - if (ret) - break; - } - - reconstructed_root = true; - } - + struct btree_root *r = bch2_btree_id_root(c, i); struct btree *b = r->b; btree_node_lock_nopath_nofail(trans, &b->c, SIX_LOCK_read); @@ -575,17 +596,21 @@ reconstruct_root: r->b = NULL; - if (!reconstructed_root) - goto reconstruct_root; + if (!reconstructed_root) { + r->error = -EIO; + goto recover; + } + struct printbuf buf = PRINTBUF; + bch2_btree_id_to_text(&buf, i); bch_err(c, "empty btree root %s", buf.buf); + printbuf_exit(&buf); bch2_btree_root_alloc_fake_trans(trans, i, 0); r->alive = false; ret = 0; } } -fsck_err: - printbuf_exit(&buf); + bch2_trans_put(trans); return ret; } diff --git a/fs/bcachefs/btree_io.c b/fs/bcachefs/btree_io.c index 57eff3012a7b..d8f3c4c65e90 100644 --- a/fs/bcachefs/btree_io.c +++ b/fs/bcachefs/btree_io.c @@ -741,16 +741,22 @@ static int validate_bset(struct bch_fs *c, struct bch_dev *ca, BCH_VERSION_MAJOR(version), BCH_VERSION_MINOR(version)); - if (btree_err_on(version < c->sb.version_min, + if (c->recovery.curr_pass != BCH_RECOVERY_PASS_scan_for_btree_nodes && + btree_err_on(version < c->sb.version_min, -BCH_ERR_btree_node_read_err_fixable, c, NULL, b, i, NULL, btree_node_bset_older_than_sb_min, "bset version %u older than superblock version_min %u", version, c->sb.version_min)) { - mutex_lock(&c->sb_lock); - c->disk_sb.sb->version_min = cpu_to_le16(version); - bch2_write_super(c); - mutex_unlock(&c->sb_lock); + if (bch2_version_compatible(version)) { + mutex_lock(&c->sb_lock); + c->disk_sb.sb->version_min = cpu_to_le16(version); + bch2_write_super(c); + mutex_unlock(&c->sb_lock); + } else { + /* We have no idea what's going on: */ + i->version = cpu_to_le16(c->sb.version); + } } if (btree_err_on(BCH_VERSION_MAJOR(version) > @@ -1045,6 +1051,7 @@ got_good_key: le16_add_cpu(&i->u64s, -next_good_key); memmove_u64s_down(k, (u64 *) k + next_good_key, (u64 *) vstruct_end(i) - (u64 *) k); set_btree_node_need_rewrite(b); + set_btree_node_need_rewrite_error(b); } fsck_err: printbuf_exit(&buf); @@ -1305,6 +1312,7 @@ int bch2_btree_node_read_done(struct bch_fs *c, struct bch_dev *ca, (u64 *) vstruct_end(i) - (u64 *) k); set_btree_bset_end(b, b->set); set_btree_node_need_rewrite(b); + set_btree_node_need_rewrite_error(b); continue; } if (ret) @@ -1329,12 +1337,16 @@ int bch2_btree_node_read_done(struct bch_fs *c, struct bch_dev *ca, bkey_for_each_ptr(bch2_bkey_ptrs(bkey_i_to_s(&b->key)), ptr) { struct bch_dev *ca2 = bch2_dev_rcu(c, ptr->dev); - if (!ca2 || ca2->mi.state != BCH_MEMBER_STATE_rw) + if (!ca2 || ca2->mi.state != BCH_MEMBER_STATE_rw) { set_btree_node_need_rewrite(b); + set_btree_node_need_rewrite_degraded(b); + } } - if (!ptr_written) + if (!ptr_written) { set_btree_node_need_rewrite(b); + set_btree_node_need_rewrite_ptr_written_zero(b); + } fsck_err: mempool_free(iter, &c->fill_iter); printbuf_exit(&buf); diff --git a/fs/bcachefs/btree_locking.c b/fs/bcachefs/btree_locking.c index 47035aae232e..91a51aef82f1 100644 --- a/fs/bcachefs/btree_locking.c +++ b/fs/bcachefs/btree_locking.c @@ -213,7 +213,7 @@ static noinline __noreturn void break_cycle_fail(struct lock_graph *g) prt_newline(&buf); } - bch2_print_str_nonblocking(g->g->trans->c, KERN_ERR, buf.buf); + bch2_print_str(g->g->trans->c, KERN_ERR, buf.buf); printbuf_exit(&buf); BUG(); } diff --git a/fs/bcachefs/btree_locking.h b/fs/bcachefs/btree_locking.h index 9adca77e2580..f2173a3316f4 100644 --- a/fs/bcachefs/btree_locking.h +++ b/fs/bcachefs/btree_locking.h @@ -417,8 +417,10 @@ static inline void btree_path_set_should_be_locked(struct btree_trans *trans, st EBUG_ON(!btree_node_locked(path, path->level)); EBUG_ON(path->uptodate); - path->should_be_locked = true; - trace_btree_path_should_be_locked(trans, path); + if (!path->should_be_locked) { + path->should_be_locked = true; + trace_btree_path_should_be_locked(trans, path); + } } static inline void __btree_path_set_level_up(struct btree_trans *trans, diff --git a/fs/bcachefs/btree_types.h b/fs/bcachefs/btree_types.h index c61c4171ae50..3aa4a602bd02 100644 --- a/fs/bcachefs/btree_types.h +++ b/fs/bcachefs/btree_types.h @@ -617,6 +617,9 @@ enum btree_write_type { x(dying) \ x(fake) \ x(need_rewrite) \ + x(need_rewrite_error) \ + x(need_rewrite_degraded) \ + x(need_rewrite_ptr_written_zero) \ x(never_write) \ x(pinned) @@ -641,6 +644,32 @@ static inline void clear_btree_node_ ## flag(struct btree *b) \ BTREE_FLAGS() #undef x +#define BTREE_NODE_REWRITE_REASON() \ + x(none) \ + x(unknown) \ + x(error) \ + x(degraded) \ + x(ptr_written_zero) + +enum btree_node_rewrite_reason { +#define x(n) BTREE_NODE_REWRITE_##n, + BTREE_NODE_REWRITE_REASON() +#undef x +}; + +static inline enum btree_node_rewrite_reason btree_node_rewrite_reason(struct btree *b) +{ + if (btree_node_need_rewrite_ptr_written_zero(b)) + return BTREE_NODE_REWRITE_ptr_written_zero; + if (btree_node_need_rewrite_degraded(b)) + return BTREE_NODE_REWRITE_degraded; + if (btree_node_need_rewrite_error(b)) + return BTREE_NODE_REWRITE_error; + if (btree_node_need_rewrite(b)) + return BTREE_NODE_REWRITE_unknown; + return BTREE_NODE_REWRITE_none; +} + static inline struct btree_write *btree_current_write(struct btree *b) { return b->writes + btree_node_write_idx(b); diff --git a/fs/bcachefs/btree_update_interior.c b/fs/bcachefs/btree_update_interior.c index d2ecb782919b..e77584607f0d 100644 --- a/fs/bcachefs/btree_update_interior.c +++ b/fs/bcachefs/btree_update_interior.c @@ -1138,6 +1138,13 @@ static void bch2_btree_update_done(struct btree_update *as, struct btree_trans * start_time); } +static const char * const btree_node_reawrite_reason_strs[] = { +#define x(n) #n, + BTREE_NODE_REWRITE_REASON() +#undef x + NULL, +}; + static struct btree_update * bch2_btree_update_start(struct btree_trans *trans, struct btree_path *path, unsigned level_start, bool split, @@ -1232,6 +1239,15 @@ bch2_btree_update_start(struct btree_trans *trans, struct btree_path *path, list_add_tail(&as->list, &c->btree_interior_update_list); mutex_unlock(&c->btree_interior_update_lock); + struct btree *b = btree_path_node(path, path->level); + as->node_start = b->data->min_key; + as->node_end = b->data->max_key; + as->node_needed_rewrite = btree_node_rewrite_reason(b); + as->node_written = b->written; + as->node_sectors = btree_buf_bytes(b) >> 9; + as->node_remaining = __bch2_btree_u64s_remaining(b, + btree_bkey_last(b, bset_tree_last(b))); + /* * We don't want to allocate if we're in an error state, that can cause * deadlock on emergency shutdown due to open buckets getting stuck in @@ -2108,6 +2124,9 @@ int __bch2_foreground_maybe_merge(struct btree_trans *trans, if (ret) goto err; + as->node_start = prev->data->min_key; + as->node_end = next->data->max_key; + trace_and_count(c, btree_node_merge, trans, b); n = bch2_btree_node_alloc(as, trans, b->c.level); @@ -2681,9 +2700,19 @@ static void bch2_btree_update_to_text(struct printbuf *out, struct btree_update prt_str(out, " "); bch2_btree_id_to_text(out, as->btree_id); - prt_printf(out, " l=%u-%u mode=%s nodes_written=%u cl.remaining=%u journal_seq=%llu\n", + prt_printf(out, " l=%u-%u ", as->update_level_start, - as->update_level_end, + as->update_level_end); + bch2_bpos_to_text(out, as->node_start); + prt_char(out, ' '); + bch2_bpos_to_text(out, as->node_end); + prt_printf(out, "\nwritten %u/%u u64s_remaining %u need_rewrite %s", + as->node_written, + as->node_sectors, + as->node_remaining, + btree_node_reawrite_reason_strs[as->node_needed_rewrite]); + + prt_printf(out, "\nmode=%s nodes_written=%u cl.remaining=%u journal_seq=%llu\n", bch2_btree_update_modes[as->mode], as->nodes_written, closure_nr_remaining(&as->cl), diff --git a/fs/bcachefs/btree_update_interior.h b/fs/bcachefs/btree_update_interior.h index 7fe793788a79..b649c36c3fbb 100644 --- a/fs/bcachefs/btree_update_interior.h +++ b/fs/bcachefs/btree_update_interior.h @@ -57,6 +57,13 @@ struct btree_update { unsigned took_gc_lock:1; enum btree_id btree_id; + struct bpos node_start; + struct bpos node_end; + enum btree_node_rewrite_reason node_needed_rewrite; + u16 node_written; + u16 node_sectors; + u16 node_remaining; + unsigned update_level_start; unsigned update_level_end; diff --git a/fs/bcachefs/chardev.c b/fs/bcachefs/chardev.c index 2d38466eddfd..fde3c2380e28 100644 --- a/fs/bcachefs/chardev.c +++ b/fs/bcachefs/chardev.c @@ -399,7 +399,7 @@ static long bch2_ioctl_data(struct bch_fs *c, return ret; } -static long bch2_ioctl_fs_usage(struct bch_fs *c, +static noinline_for_stack long bch2_ioctl_fs_usage(struct bch_fs *c, struct bch_ioctl_fs_usage __user *user_arg) { struct bch_ioctl_fs_usage arg = {}; @@ -469,7 +469,7 @@ err: } /* obsolete, didn't allow for new data types: */ -static long bch2_ioctl_dev_usage(struct bch_fs *c, +static noinline_for_stack long bch2_ioctl_dev_usage(struct bch_fs *c, struct bch_ioctl_dev_usage __user *user_arg) { struct bch_ioctl_dev_usage arg; diff --git a/fs/bcachefs/disk_accounting.c b/fs/bcachefs/disk_accounting.c index 3d59a57a5256..f7528cd69c73 100644 --- a/fs/bcachefs/disk_accounting.c +++ b/fs/bcachefs/disk_accounting.c @@ -618,7 +618,9 @@ int bch2_gc_accounting_done(struct bch_fs *c) for (unsigned j = 0; j < nr; j++) src_v[j] -= dst_v[j]; - if (fsck_err(trans, accounting_mismatch, "%s", buf.buf)) { + bch2_trans_unlock_long(trans); + + if (fsck_err(c, accounting_mismatch, "%s", buf.buf)) { percpu_up_write(&c->mark_lock); ret = commit_do(trans, NULL, NULL, 0, bch2_disk_accounting_mod(trans, &acc_k, src_v, nr, false)); diff --git a/fs/bcachefs/error.c b/fs/bcachefs/error.c index 63951e293c47..a8ec6aae5738 100644 --- a/fs/bcachefs/error.c +++ b/fs/bcachefs/error.c @@ -69,7 +69,7 @@ static bool bch2_fs_trans_inconsistent(struct bch_fs *c, struct btree_trans *tra if (trans) bch2_trans_updates_to_text(&buf, trans); bool ret = __bch2_inconsistent_error(c, &buf); - bch2_print_str_nonblocking(c, KERN_ERR, buf.buf); + bch2_print_str(c, KERN_ERR, buf.buf); printbuf_exit(&buf); return ret; @@ -620,6 +620,9 @@ print: if (s) s->ret = ret; + + if (trans) + ret = bch2_trans_log_str(trans, bch2_sb_error_strs[err]) ?: ret; err_unlock: mutex_unlock(&c->fsck_error_msgs_lock); err: diff --git a/fs/bcachefs/fs.c b/fs/bcachefs/fs.c index 85d13f800165..3063a8ddc2df 100644 --- a/fs/bcachefs/fs.c +++ b/fs/bcachefs/fs.c @@ -2490,6 +2490,14 @@ static int bch2_fs_get_tree(struct fs_context *fc) if (ret) goto err_stop_fs; + /* + * We might be doing a RO mount because other options required it, or we + * have no alloc info and it's a small image with no room to regenerate + * it + */ + if (c->opts.read_only) + fc->sb_flags |= SB_RDONLY; + sb = sget(fc->fs_type, NULL, bch2_set_super, fc->sb_flags|SB_NOSEC, c); ret = PTR_ERR_OR_ZERO(sb); if (ret) diff --git a/fs/bcachefs/io_read.c b/fs/bcachefs/io_read.c index a77779afad01..04bbdcf58e40 100644 --- a/fs/bcachefs/io_read.c +++ b/fs/bcachefs/io_read.c @@ -343,6 +343,10 @@ static struct bch_read_bio *promote_alloc(struct btree_trans *trans, *bounce = true; *read_full = promote_full; + + if (have_io_error(failed)) + orig->self_healing = true; + return promote; nopromote: trace_io_read_nopromote(c, ret); @@ -635,12 +639,15 @@ static void bch2_rbio_retry(struct work_struct *work) prt_str(&buf, "(internal move) "); prt_str(&buf, "data read error, "); - if (!ret) + if (!ret) { prt_str(&buf, "successful retry"); - else + if (rbio->self_healing) + prt_str(&buf, ", self healing"); + } else prt_str(&buf, bch2_err_str(ret)); prt_newline(&buf); + if (!bkey_deleted(&sk.k->k)) { bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(sk.k)); prt_newline(&buf); diff --git a/fs/bcachefs/io_read.h b/fs/bcachefs/io_read.h index 45c959018919..9c5ddbf861b3 100644 --- a/fs/bcachefs/io_read.h +++ b/fs/bcachefs/io_read.h @@ -44,6 +44,7 @@ struct bch_read_bio { have_ioref:1, narrow_crcs:1, saw_error:1, + self_healing:1, context:2; }; u16 _state; diff --git a/fs/bcachefs/movinggc.c b/fs/bcachefs/movinggc.c index 6d7b1d5f7697..27e68d470ad0 100644 --- a/fs/bcachefs/movinggc.c +++ b/fs/bcachefs/movinggc.c @@ -28,7 +28,7 @@ #include <linux/wait.h> struct buckets_in_flight { - struct rhashtable table; + struct rhashtable *table; struct move_bucket *first; struct move_bucket *last; size_t nr; @@ -98,7 +98,7 @@ out: static void move_bucket_free(struct buckets_in_flight *list, struct move_bucket *b) { - int ret = rhashtable_remove_fast(&list->table, &b->hash, + int ret = rhashtable_remove_fast(list->table, &b->hash, bch_move_bucket_params); BUG_ON(ret); kfree(b); @@ -133,7 +133,7 @@ static void move_buckets_wait(struct moving_context *ctxt, static bool bucket_in_flight(struct buckets_in_flight *list, struct move_bucket_key k) { - return rhashtable_lookup_fast(&list->table, &k, bch_move_bucket_params); + return rhashtable_lookup_fast(list->table, &k, bch_move_bucket_params); } static int bch2_copygc_get_buckets(struct moving_context *ctxt, @@ -185,7 +185,7 @@ static int bch2_copygc_get_buckets(struct moving_context *ctxt, goto err; } - ret2 = rhashtable_lookup_insert_fast(&buckets_in_flight->table, &b_i->hash, + ret2 = rhashtable_lookup_insert_fast(buckets_in_flight->table, &b_i->hash, bch_move_bucket_params); BUG_ON(ret2); @@ -350,10 +350,13 @@ static int bch2_copygc_thread(void *arg) struct buckets_in_flight buckets = {}; u64 last, wait; - int ret = rhashtable_init(&buckets.table, &bch_move_bucket_params); + buckets.table = kzalloc(sizeof(*buckets.table), GFP_KERNEL); + int ret = !buckets.table + ? -ENOMEM + : rhashtable_init(buckets.table, &bch_move_bucket_params); bch_err_msg(c, ret, "allocating copygc buckets in flight"); if (ret) - return ret; + goto err; set_freezable(); @@ -421,11 +424,12 @@ static int bch2_copygc_thread(void *arg) } move_buckets_wait(&ctxt, &buckets, true); - rhashtable_destroy(&buckets.table); + rhashtable_destroy(buckets.table); bch2_moving_ctxt_exit(&ctxt); bch2_move_stats_exit(&move_stats, c); - - return 0; +err: + kfree(buckets.table); + return ret; } void bch2_copygc_stop(struct bch_fs *c) diff --git a/fs/bcachefs/namei.c b/fs/bcachefs/namei.c index 24120037c031..779c22eb3979 100644 --- a/fs/bcachefs/namei.c +++ b/fs/bcachefs/namei.c @@ -175,6 +175,16 @@ int bch2_create_trans(struct btree_trans *trans, new_inode->bi_dir_offset = dir_offset; } + if (S_ISDIR(mode)) { + ret = bch2_maybe_propagate_has_case_insensitive(trans, + (subvol_inum) { + new_inode->bi_subvol ?: dir.subvol, + new_inode->bi_inum }, + new_inode); + if (ret) + goto err; + } + if (S_ISDIR(mode) && !new_inode->bi_subvol) new_inode->bi_depth = dir_u->bi_depth + 1; diff --git a/fs/bcachefs/rcu_pending.c b/fs/bcachefs/rcu_pending.c index bef2aa1b8bcd..b1438be9d690 100644 --- a/fs/bcachefs/rcu_pending.c +++ b/fs/bcachefs/rcu_pending.c @@ -182,11 +182,6 @@ static inline void kfree_bulk(size_t nr, void ** p) while (nr--) kfree(*p); } - -#define local_irq_save(flags) \ -do { \ - flags = 0; \ -} while (0) #endif static noinline void __process_finished_items(struct rcu_pending *pending, @@ -429,9 +424,15 @@ __rcu_pending_enqueue(struct rcu_pending *pending, struct rcu_head *head, BUG_ON((ptr != NULL) != (pending->process == RCU_PENDING_KVFREE_FN)); - local_irq_save(flags); - p = this_cpu_ptr(pending->p); - spin_lock(&p->lock); + /* We could technically be scheduled before taking the lock and end up + * using a different cpu's rcu_pending_pcpu: that's ok, it needs a lock + * anyways + * + * And we have to do it this way to avoid breaking PREEMPT_RT, which + * redefines how spinlocks work: + */ + p = raw_cpu_ptr(pending->p); + spin_lock_irqsave(&p->lock, flags); rcu_gp_poll_state_t seq = __get_state_synchronize_rcu(pending->srcu); restart: if (may_sleep && @@ -520,9 +521,8 @@ check_expired: goto free_node; } - local_irq_save(flags); - p = this_cpu_ptr(pending->p); - spin_lock(&p->lock); + p = raw_cpu_ptr(pending->p); + spin_lock_irqsave(&p->lock, flags); goto restart; } diff --git a/fs/bcachefs/recovery.c b/fs/bcachefs/recovery.c index 1e68e61f08e8..0b21fa6ff062 100644 --- a/fs/bcachefs/recovery.c +++ b/fs/bcachefs/recovery.c @@ -99,9 +99,11 @@ int bch2_btree_lost_data(struct bch_fs *c, goto out; case BTREE_ID_snapshots: ret = __bch2_run_explicit_recovery_pass(c, msg, BCH_RECOVERY_PASS_reconstruct_snapshots, 0) ?: ret; + ret = __bch2_run_explicit_recovery_pass(c, msg, BCH_RECOVERY_PASS_check_topology, 0) ?: ret; ret = __bch2_run_explicit_recovery_pass(c, msg, BCH_RECOVERY_PASS_scan_for_btree_nodes, 0) ?: ret; goto out; default: + ret = __bch2_run_explicit_recovery_pass(c, msg, BCH_RECOVERY_PASS_check_topology, 0) ?: ret; ret = __bch2_run_explicit_recovery_pass(c, msg, BCH_RECOVERY_PASS_scan_for_btree_nodes, 0) ?: ret; goto out; } @@ -271,13 +273,24 @@ static int bch2_journal_replay_key(struct btree_trans *trans, goto out; struct btree_path *path = btree_iter_path(trans, &iter); - if (unlikely(!btree_path_node(path, k->level))) { + if (unlikely(!btree_path_node(path, k->level) && + !k->allocated)) { + struct bch_fs *c = trans->c; + + if (!(c->recovery.passes_complete & (BIT_ULL(BCH_RECOVERY_PASS_scan_for_btree_nodes)| + BIT_ULL(BCH_RECOVERY_PASS_check_topology)))) { + bch_err(c, "have key in journal replay for btree depth that does not exist, confused"); + ret = -EINVAL; + } +#if 0 bch2_trans_iter_exit(trans, &iter); bch2_trans_node_iter_init(trans, &iter, k->btree_id, k->k->k.p, BTREE_MAX_DEPTH, 0, iter_flags); ret = bch2_btree_iter_traverse(trans, &iter) ?: bch2_btree_increase_depth(trans, iter.path, 0) ?: -BCH_ERR_transaction_restart_nested; +#endif + k->overwritten = true; goto out; } @@ -739,9 +752,11 @@ int bch2_fs_recovery(struct bch_fs *c) ? min(c->opts.recovery_pass_last, BCH_RECOVERY_PASS_snapshots_read) : BCH_RECOVERY_PASS_snapshots_read; c->opts.nochanges = true; - c->opts.read_only = true; } + if (c->opts.nochanges) + c->opts.read_only = true; + mutex_lock(&c->sb_lock); struct bch_sb_field_ext *ext = bch2_sb_field_get(c->disk_sb.sb, ext); bool write_sb = false; @@ -1093,9 +1108,6 @@ use_clean: out: bch2_flush_fsck_errs(c); - if (!IS_ERR(clean)) - kfree(clean); - if (!ret && test_bit(BCH_FS_need_delete_dead_snapshots, &c->flags) && !c->opts.nochanges) { @@ -1104,6 +1116,9 @@ out: } bch_err_fn(c, ret); +final_out: + if (!IS_ERR(clean)) + kfree(clean); return ret; err: fsck_err: @@ -1117,7 +1132,7 @@ fsck_err: bch2_print_str(c, KERN_ERR, buf.buf); printbuf_exit(&buf); } - return ret; + goto final_out; } int bch2_fs_initialize(struct bch_fs *c) diff --git a/fs/bcachefs/recovery_passes.c b/fs/bcachefs/recovery_passes.c index 605588e33fb3..35ac0d64d73a 100644 --- a/fs/bcachefs/recovery_passes.c +++ b/fs/bcachefs/recovery_passes.c @@ -294,8 +294,13 @@ static bool recovery_pass_needs_set(struct bch_fs *c, enum bch_run_recovery_pass_flags *flags) { struct bch_fs_recovery *r = &c->recovery; - bool in_recovery = test_bit(BCH_FS_in_recovery, &c->flags); - bool persistent = !in_recovery || !(*flags & RUN_RECOVERY_PASS_nopersistent); + + /* + * Never run scan_for_btree_nodes persistently: check_topology will run + * it if required + */ + if (pass == BCH_RECOVERY_PASS_scan_for_btree_nodes) + *flags |= RUN_RECOVERY_PASS_nopersistent; if ((*flags & RUN_RECOVERY_PASS_ratelimit) && !bch2_recovery_pass_want_ratelimit(c, pass)) @@ -310,6 +315,8 @@ static bool recovery_pass_needs_set(struct bch_fs *c, * Otherwise, we run run_explicit_recovery_pass when we find damage, so * it should run again even if it's already run: */ + bool in_recovery = test_bit(BCH_FS_in_recovery, &c->flags); + bool persistent = !in_recovery || !(*flags & RUN_RECOVERY_PASS_nopersistent); if (persistent ? !(c->sb.recovery_passes_required & BIT_ULL(pass)) @@ -334,6 +341,7 @@ int __bch2_run_explicit_recovery_pass(struct bch_fs *c, struct bch_fs_recovery *r = &c->recovery; int ret = 0; + lockdep_assert_held(&c->sb_lock); bch2_printbuf_make_room(out, 1024); @@ -446,7 +454,7 @@ int bch2_require_recovery_pass(struct bch_fs *c, int bch2_run_print_explicit_recovery_pass(struct bch_fs *c, enum bch_recovery_pass pass) { - enum bch_run_recovery_pass_flags flags = RUN_RECOVERY_PASS_nopersistent; + enum bch_run_recovery_pass_flags flags = 0; if (!recovery_pass_needs_set(c, pass, &flags)) return 0; diff --git a/fs/bcachefs/sb-downgrade.c b/fs/bcachefs/sb-downgrade.c index b61f88450a6d..1506d05e0665 100644 --- a/fs/bcachefs/sb-downgrade.c +++ b/fs/bcachefs/sb-downgrade.c @@ -253,6 +253,7 @@ DOWNGRADE_TABLE() static int downgrade_table_extra(struct bch_fs *c, darray_char *table) { + unsigned dst_offset = table->nr; struct bch_sb_field_downgrade_entry *dst = (void *) &darray_top(*table); unsigned bytes = sizeof(*dst) + sizeof(dst->errors[0]) * le16_to_cpu(dst->nr_errors); int ret = 0; @@ -268,6 +269,9 @@ static int downgrade_table_extra(struct bch_fs *c, darray_char *table) if (ret) return ret; + dst = (void *) &table->data[dst_offset]; + dst->nr_errors = cpu_to_le16(nr_errors + 1); + /* open coded __set_bit_le64, as dst is packed and * dst->recovery_passes is misaligned */ unsigned b = BCH_RECOVERY_PASS_STABLE_check_allocations; @@ -278,7 +282,6 @@ static int downgrade_table_extra(struct bch_fs *c, darray_char *table) break; } - dst->nr_errors = cpu_to_le16(nr_errors); return ret; } diff --git a/fs/bcachefs/sb-errors_format.h b/fs/bcachefs/sb-errors_format.h index 6fdbf265e4c0..d06e73884871 100644 --- a/fs/bcachefs/sb-errors_format.h +++ b/fs/bcachefs/sb-errors_format.h @@ -134,7 +134,7 @@ enum bch_fsck_flags { x(bucket_gens_to_invalid_buckets, 121, FSCK_AUTOFIX) \ x(bucket_gens_nonzero_for_invalid_buckets, 122, FSCK_AUTOFIX) \ x(need_discard_freespace_key_to_invalid_dev_bucket, 123, 0) \ - x(need_discard_freespace_key_bad, 124, 0) \ + x(need_discard_freespace_key_bad, 124, FSCK_AUTOFIX) \ x(discarding_bucket_not_in_need_discard_btree, 291, 0) \ x(backpointer_bucket_offset_wrong, 125, 0) \ x(backpointer_level_bad, 294, 0) \ @@ -165,7 +165,7 @@ enum bch_fsck_flags { x(ptr_to_missing_replicas_entry, 149, FSCK_AUTOFIX) \ x(ptr_to_missing_stripe, 150, 0) \ x(ptr_to_incorrect_stripe, 151, 0) \ - x(ptr_gen_newer_than_bucket_gen, 152, 0) \ + x(ptr_gen_newer_than_bucket_gen, 152, FSCK_AUTOFIX) \ x(ptr_too_stale, 153, 0) \ x(stale_dirty_ptr, 154, FSCK_AUTOFIX) \ x(ptr_bucket_data_type_mismatch, 155, 0) \ @@ -236,7 +236,7 @@ enum bch_fsck_flags { x(inode_multiple_links_but_nlink_0, 207, FSCK_AUTOFIX) \ x(inode_wrong_backpointer, 208, FSCK_AUTOFIX) \ x(inode_wrong_nlink, 209, FSCK_AUTOFIX) \ - x(inode_has_child_snapshots_wrong, 287, 0) \ + x(inode_has_child_snapshots_wrong, 287, FSCK_AUTOFIX) \ x(inode_unreachable, 210, FSCK_AUTOFIX) \ x(inode_journal_seq_in_future, 299, FSCK_AUTOFIX) \ x(inode_i_sectors_underflow, 312, FSCK_AUTOFIX) \ @@ -279,8 +279,8 @@ enum bch_fsck_flags { x(root_dir_missing, 239, 0) \ x(root_inode_not_dir, 240, 0) \ x(dir_loop, 241, 0) \ - x(hash_table_key_duplicate, 242, 0) \ - x(hash_table_key_wrong_offset, 243, 0) \ + x(hash_table_key_duplicate, 242, FSCK_AUTOFIX) \ + x(hash_table_key_wrong_offset, 243, FSCK_AUTOFIX) \ x(unlinked_inode_not_on_deleted_list, 244, FSCK_AUTOFIX) \ x(reflink_p_front_pad_bad, 245, 0) \ x(journal_entry_dup_same_device, 246, 0) \ diff --git a/fs/bcachefs/sb-members.c b/fs/bcachefs/sb-members.c index 363eb0c6eb7c..6245e342a8a8 100644 --- a/fs/bcachefs/sb-members.c +++ b/fs/bcachefs/sb-members.c @@ -325,9 +325,17 @@ static void bch2_sb_members_v1_to_text(struct printbuf *out, struct bch_sb *sb, { struct bch_sb_field_members_v1 *mi = field_to_type(f, members_v1); struct bch_sb_field_disk_groups *gi = bch2_sb_field_get(sb, disk_groups); - unsigned i; - for (i = 0; i < sb->nr_devices; i++) + if (vstruct_end(&mi->field) <= (void *) &mi->_members[0]) { + prt_printf(out, "field ends before start of entries"); + return; + } + + unsigned nr = (vstruct_end(&mi->field) - (void *) &mi->_members[0]) / sizeof(mi->_members[0]); + if (nr != sb->nr_devices) + prt_printf(out, "nr_devices mismatch: have %i entries, should be %u", nr, sb->nr_devices); + + for (unsigned i = 0; i < min(sb->nr_devices, nr); i++) member_to_text(out, members_v1_get(mi, i), gi, sb, i); } @@ -341,9 +349,27 @@ static void bch2_sb_members_v2_to_text(struct printbuf *out, struct bch_sb *sb, { struct bch_sb_field_members_v2 *mi = field_to_type(f, members_v2); struct bch_sb_field_disk_groups *gi = bch2_sb_field_get(sb, disk_groups); - unsigned i; - for (i = 0; i < sb->nr_devices; i++) + if (vstruct_end(&mi->field) <= (void *) &mi->_members[0]) { + prt_printf(out, "field ends before start of entries"); + return; + } + + if (!le16_to_cpu(mi->member_bytes)) { + prt_printf(out, "member_bytes 0"); + return; + } + + unsigned nr = (vstruct_end(&mi->field) - (void *) &mi->_members[0]) / le16_to_cpu(mi->member_bytes); + if (nr != sb->nr_devices) + prt_printf(out, "nr_devices mismatch: have %i entries, should be %u", nr, sb->nr_devices); + + /* + * We call to_text() on superblock sections that haven't passed + * validate, so we can't trust sb->nr_devices. + */ + + for (unsigned i = 0; i < min(sb->nr_devices, nr); i++) member_to_text(out, members_v2_get(mi, i), gi, sb, i); } diff --git a/fs/bcachefs/super.c b/fs/bcachefs/super.c index 397a69da5a75..a5b97c9c5163 100644 --- a/fs/bcachefs/super.c +++ b/fs/bcachefs/super.c @@ -104,7 +104,7 @@ const char * const bch2_dev_write_refs[] = { #undef x static void __bch2_print_str(struct bch_fs *c, const char *prefix, - const char *str, bool nonblocking) + const char *str) { #ifdef __KERNEL__ struct stdio_redirect *stdio = bch2_fs_stdio_redirect(c); @@ -114,17 +114,12 @@ static void __bch2_print_str(struct bch_fs *c, const char *prefix, return; } #endif - bch2_print_string_as_lines(KERN_ERR, str, nonblocking); + bch2_print_string_as_lines(KERN_ERR, str); } void bch2_print_str(struct bch_fs *c, const char *prefix, const char *str) { - __bch2_print_str(c, prefix, str, false); -} - -void bch2_print_str_nonblocking(struct bch_fs *c, const char *prefix, const char *str) -{ - __bch2_print_str(c, prefix, str, true); + __bch2_print_str(c, prefix, str); } __printf(2, 0) @@ -1072,12 +1067,13 @@ noinline_for_stack static void print_mount_opts(struct bch_fs *c) { enum bch_opt_id i; - struct printbuf p = PRINTBUF; - bool first = true; + CLASS(printbuf, p)(); + bch2_log_msg_start(c, &p); prt_str(&p, "starting version "); bch2_version_to_text(&p, c->sb.version); + bool first = true; for (i = 0; i < bch2_opts_nr; i++) { const struct bch_option *opt = &bch2_opt_table[i]; u64 v = bch2_opt_get_by_id(&c->opts, i); @@ -1094,17 +1090,24 @@ static void print_mount_opts(struct bch_fs *c) } if (c->sb.version_incompat_allowed != c->sb.version) { - prt_printf(&p, "\n allowing incompatible features above "); + prt_printf(&p, "\nallowing incompatible features above "); bch2_version_to_text(&p, c->sb.version_incompat_allowed); } if (c->opts.verbose) { - prt_printf(&p, "\n features: "); + prt_printf(&p, "\nfeatures: "); prt_bitflags(&p, bch2_sb_features, c->sb.features); } - bch_info(c, "%s", p.buf); - printbuf_exit(&p); + if (c->sb.multi_device) { + prt_printf(&p, "\nwith devices"); + for_each_online_member(c, ca, BCH_DEV_READ_REF_bch2_online_devs) { + prt_char(&p, ' '); + prt_str(&p, ca->name); + } + } + + bch2_print_str(c, KERN_INFO, p.buf); } static bool bch2_fs_may_start(struct bch_fs *c) @@ -1995,6 +1998,22 @@ int bch2_dev_add(struct bch_fs *c, const char *path) goto err_late; } + /* + * We just changed the superblock UUID, invalidate cache and send a + * uevent to update /dev/disk/by-uuid + */ + invalidate_bdev(ca->disk_sb.bdev); + + char uuid_str[37]; + snprintf(uuid_str, sizeof(uuid_str), "UUID=%pUb", &c->sb.uuid); + + char *envp[] = { + "CHANGE=uuid", + uuid_str, + NULL, + }; + kobject_uevent_env(&ca->disk_sb.bdev->bd_device.kobj, KOBJ_CHANGE, envp); + up_write(&c->state_lock); out: printbuf_exit(&label); diff --git a/fs/bcachefs/util.c b/fs/bcachefs/util.c index dc3817f545fa..df9a6071fe18 100644 --- a/fs/bcachefs/util.c +++ b/fs/bcachefs/util.c @@ -262,8 +262,7 @@ static bool string_is_spaces(const char *str) return true; } -void bch2_print_string_as_lines(const char *prefix, const char *lines, - bool nonblocking) +void bch2_print_string_as_lines(const char *prefix, const char *lines) { bool locked = false; const char *p; @@ -273,12 +272,7 @@ void bch2_print_string_as_lines(const char *prefix, const char *lines, return; } - if (!nonblocking) { - console_lock(); - locked = true; - } else { - locked = console_trylock(); - } + locked = console_trylock(); while (*lines) { p = strchrnul(lines, '\n'); diff --git a/fs/bcachefs/util.h b/fs/bcachefs/util.h index 0a4b1d433621..6488f098d140 100644 --- a/fs/bcachefs/util.h +++ b/fs/bcachefs/util.h @@ -214,7 +214,7 @@ u64 bch2_read_flag_list(const char *, const char * const[]); void bch2_prt_u64_base2_nbits(struct printbuf *, u64, unsigned); void bch2_prt_u64_base2(struct printbuf *, u64); -void bch2_print_string_as_lines(const char *, const char *, bool); +void bch2_print_string_as_lines(const char *, const char *); typedef DARRAY(unsigned long) bch_stacktrace; int bch2_save_backtrace(bch_stacktrace *stack, struct task_struct *, unsigned, gfp_t); diff --git a/fs/btrfs/delayed-inode.c b/fs/btrfs/delayed-inode.c index c7cc24a5dd5e..8c597fa60523 100644 --- a/fs/btrfs/delayed-inode.c +++ b/fs/btrfs/delayed-inode.c @@ -1377,7 +1377,10 @@ static int btrfs_wq_run_delayed_node(struct btrfs_delayed_root *delayed_root, void btrfs_assert_delayed_root_empty(struct btrfs_fs_info *fs_info) { - WARN_ON(btrfs_first_delayed_node(fs_info->delayed_root)); + struct btrfs_delayed_node *node = btrfs_first_delayed_node(fs_info->delayed_root); + + if (WARN_ON(node)) + refcount_dec(&node->refs); } static bool could_end_wait(struct btrfs_delayed_root *delayed_root, int seq) diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 1beb9458f622..0d6ad7512f21 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -1835,6 +1835,8 @@ void btrfs_put_root(struct btrfs_root *root) if (refcount_dec_and_test(&root->refs)) { if (WARN_ON(!xa_empty(&root->inodes))) xa_destroy(&root->inodes); + if (WARN_ON(!xa_empty(&root->delayed_nodes))) + xa_destroy(&root->delayed_nodes); WARN_ON(test_bit(BTRFS_ROOT_DEAD_RELOC_TREE, &root->state)); if (root->anon_dev) free_anon_bdev(root->anon_dev); @@ -2156,8 +2158,7 @@ static int load_global_roots_objectid(struct btrfs_root *tree_root, found = true; root = read_tree_root_path(tree_root, path, &key); if (IS_ERR(root)) { - if (!btrfs_test_opt(fs_info, IGNOREBADROOTS)) - ret = PTR_ERR(root); + ret = PTR_ERR(root); break; } set_bit(BTRFS_ROOT_TRACK_DIRTY, &root->state); @@ -4310,8 +4311,8 @@ void __cold close_ctree(struct btrfs_fs_info *fs_info) * * So wait for all ongoing ordered extents to complete and then run * delayed iputs. This works because once we reach this point no one - * can either create new ordered extents nor create delayed iputs - * through some other means. + * can create new ordered extents, but delayed iputs can still be added + * by a reclaim worker (see comments further below). * * Also note that btrfs_wait_ordered_roots() is not safe here, because * it waits for BTRFS_ORDERED_COMPLETE to be set on an ordered extent, @@ -4322,15 +4323,29 @@ void __cold close_ctree(struct btrfs_fs_info *fs_info) btrfs_flush_workqueue(fs_info->endio_write_workers); /* Ordered extents for free space inodes. */ btrfs_flush_workqueue(fs_info->endio_freespace_worker); + /* + * Run delayed iputs in case an async reclaim worker is waiting for them + * to be run as mentioned above. + */ btrfs_run_delayed_iputs(fs_info); - /* There should be no more workload to generate new delayed iputs. */ - set_bit(BTRFS_FS_STATE_NO_DELAYED_IPUT, &fs_info->fs_state); cancel_work_sync(&fs_info->async_reclaim_work); cancel_work_sync(&fs_info->async_data_reclaim_work); cancel_work_sync(&fs_info->preempt_reclaim_work); cancel_work_sync(&fs_info->em_shrinker_work); + /* + * Run delayed iputs again because an async reclaim worker may have + * added new ones if it was flushing delalloc: + * + * shrink_delalloc() -> btrfs_start_delalloc_roots() -> + * start_delalloc_inodes() -> btrfs_add_delayed_iput() + */ + btrfs_run_delayed_iputs(fs_info); + + /* There should be no more workload to generate new delayed iputs. */ + set_bit(BTRFS_FS_STATE_NO_DELAYED_IPUT, &fs_info->fs_state); + /* Cancel or finish ongoing discard work */ btrfs_discard_cleanup(fs_info); diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index 849199768664..1dc931c4937f 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -4312,7 +4312,6 @@ static int try_release_subpage_extent_buffer(struct folio *folio) spin_unlock(&eb->refs_lock); continue; } - xa_unlock_irq(&fs_info->buffer_tree); /* * If tree ref isn't set then we know the ref on this eb is a @@ -4329,6 +4328,7 @@ static int try_release_subpage_extent_buffer(struct folio *folio) * check the folio private at the end. And * release_extent_buffer() will release the refs_lock. */ + xa_unlock_irq(&fs_info->buffer_tree); release_extent_buffer(eb); xa_lock_irq(&fs_info->buffer_tree); } diff --git a/fs/btrfs/free-space-tree.c b/fs/btrfs/free-space-tree.c index 0c573d46639a..a3e2a2a81461 100644 --- a/fs/btrfs/free-space-tree.c +++ b/fs/btrfs/free-space-tree.c @@ -1115,11 +1115,21 @@ static int populate_free_space_tree(struct btrfs_trans_handle *trans, ret = btrfs_search_slot_for_read(extent_root, &key, path, 1, 0); if (ret < 0) goto out_locked; - ASSERT(ret == 0); + /* + * If ret is 1 (no key found), it means this is an empty block group, + * without any extents allocated from it and there's no block group + * item (key BTRFS_BLOCK_GROUP_ITEM_KEY) located in the extent tree + * because we are using the block group tree feature, so block group + * items are stored in the block group tree. It also means there are no + * extents allocated for block groups with a start offset beyond this + * block group's end offset (this is the last, highest, block group). + */ + if (!btrfs_fs_compat_ro(trans->fs_info, BLOCK_GROUP_TREE)) + ASSERT(ret == 0); start = block_group->start; end = block_group->start + block_group->length; - while (1) { + while (ret == 0) { btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]); if (key.type == BTRFS_EXTENT_ITEM_KEY || @@ -1149,8 +1159,6 @@ static int populate_free_space_tree(struct btrfs_trans_handle *trans, ret = btrfs_next_item(extent_root, path); if (ret < 0) goto out_locked; - if (ret) - break; } if (start < end) { ret = __add_to_free_space_tree(trans, block_group, path2, diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index c0c778243bf1..26d6ed170a19 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -4250,9 +4250,9 @@ static int __btrfs_unlink_inode(struct btrfs_trans_handle *trans, ret = btrfs_del_inode_ref(trans, root, name, ino, dir_ino, &index); if (ret) { - btrfs_info(fs_info, - "failed to delete reference to %.*s, inode %llu parent %llu", - name->len, name->name, ino, dir_ino); + btrfs_crit(fs_info, + "failed to delete reference to %.*s, root %llu inode %llu parent %llu", + name->len, name->name, btrfs_root_id(root), ino, dir_ino); btrfs_abort_transaction(trans, ret); goto err; } @@ -8059,6 +8059,7 @@ static int btrfs_rename_exchange(struct inode *old_dir, int ret; int ret2; bool need_abort = false; + bool logs_pinned = false; struct fscrypt_name old_fname, new_fname; struct fscrypt_str *old_name, *new_name; @@ -8182,6 +8183,31 @@ static int btrfs_rename_exchange(struct inode *old_dir, inode_inc_iversion(new_inode); simple_rename_timestamp(old_dir, old_dentry, new_dir, new_dentry); + if (old_ino != BTRFS_FIRST_FREE_OBJECTID && + new_ino != BTRFS_FIRST_FREE_OBJECTID) { + /* + * If we are renaming in the same directory (and it's not for + * root entries) pin the log early to prevent any concurrent + * task from logging the directory after we removed the old + * entries and before we add the new entries, otherwise that + * task can sync a log without any entry for the inodes we are + * renaming and therefore replaying that log, if a power failure + * happens after syncing the log, would result in deleting the + * inodes. + * + * If the rename affects two different directories, we want to + * make sure the that there's no log commit that contains + * updates for only one of the directories but not for the + * other. + * + * If we are renaming an entry for a root, we don't care about + * log updates since we called btrfs_set_log_full_commit(). + */ + btrfs_pin_log_trans(root); + btrfs_pin_log_trans(dest); + logs_pinned = true; + } + if (old_dentry->d_parent != new_dentry->d_parent) { btrfs_record_unlink_dir(trans, BTRFS_I(old_dir), BTRFS_I(old_inode), true); @@ -8253,30 +8279,23 @@ static int btrfs_rename_exchange(struct inode *old_dir, BTRFS_I(new_inode)->dir_index = new_idx; /* - * Now pin the logs of the roots. We do it to ensure that no other task - * can sync the logs while we are in progress with the rename, because - * that could result in an inconsistency in case any of the inodes that - * are part of this rename operation were logged before. + * Do the log updates for all inodes. + * + * If either entry is for a root we don't need to update the logs since + * we've called btrfs_set_log_full_commit() before. */ - if (old_ino != BTRFS_FIRST_FREE_OBJECTID) - btrfs_pin_log_trans(root); - if (new_ino != BTRFS_FIRST_FREE_OBJECTID) - btrfs_pin_log_trans(dest); - - /* Do the log updates for all inodes. */ - if (old_ino != BTRFS_FIRST_FREE_OBJECTID) + if (logs_pinned) { btrfs_log_new_name(trans, old_dentry, BTRFS_I(old_dir), old_rename_ctx.index, new_dentry->d_parent); - if (new_ino != BTRFS_FIRST_FREE_OBJECTID) btrfs_log_new_name(trans, new_dentry, BTRFS_I(new_dir), new_rename_ctx.index, old_dentry->d_parent); + } - /* Now unpin the logs. */ - if (old_ino != BTRFS_FIRST_FREE_OBJECTID) +out_fail: + if (logs_pinned) { btrfs_end_log_trans(root); - if (new_ino != BTRFS_FIRST_FREE_OBJECTID) btrfs_end_log_trans(dest); -out_fail: + } ret2 = btrfs_end_transaction(trans); ret = ret ? ret : ret2; out_notrans: @@ -8326,6 +8345,7 @@ static int btrfs_rename(struct mnt_idmap *idmap, int ret2; u64 old_ino = btrfs_ino(BTRFS_I(old_inode)); struct fscrypt_name old_fname, new_fname; + bool logs_pinned = false; if (btrfs_ino(BTRFS_I(new_dir)) == BTRFS_EMPTY_SUBVOL_DIR_OBJECTID) return -EPERM; @@ -8460,6 +8480,29 @@ static int btrfs_rename(struct mnt_idmap *idmap, inode_inc_iversion(old_inode); simple_rename_timestamp(old_dir, old_dentry, new_dir, new_dentry); + if (old_ino != BTRFS_FIRST_FREE_OBJECTID) { + /* + * If we are renaming in the same directory (and it's not a + * root entry) pin the log to prevent any concurrent task from + * logging the directory after we removed the old entry and + * before we add the new entry, otherwise that task can sync + * a log without any entry for the inode we are renaming and + * therefore replaying that log, if a power failure happens + * after syncing the log, would result in deleting the inode. + * + * If the rename affects two different directories, we want to + * make sure the that there's no log commit that contains + * updates for only one of the directories but not for the + * other. + * + * If we are renaming an entry for a root, we don't care about + * log updates since we called btrfs_set_log_full_commit(). + */ + btrfs_pin_log_trans(root); + btrfs_pin_log_trans(dest); + logs_pinned = true; + } + if (old_dentry->d_parent != new_dentry->d_parent) btrfs_record_unlink_dir(trans, BTRFS_I(old_dir), BTRFS_I(old_inode), true); @@ -8524,7 +8567,7 @@ static int btrfs_rename(struct mnt_idmap *idmap, if (old_inode->i_nlink == 1) BTRFS_I(old_inode)->dir_index = index; - if (old_ino != BTRFS_FIRST_FREE_OBJECTID) + if (logs_pinned) btrfs_log_new_name(trans, old_dentry, BTRFS_I(old_dir), rename_ctx.index, new_dentry->d_parent); @@ -8540,6 +8583,10 @@ static int btrfs_rename(struct mnt_idmap *idmap, } } out_fail: + if (logs_pinned) { + btrfs_end_log_trans(root); + btrfs_end_log_trans(dest); + } ret2 = btrfs_end_transaction(trans); ret = ret ? ret : ret2; out_notrans: diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index 913acef3f0a9..4eda35bdba71 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -3139,7 +3139,7 @@ static long btrfs_ioctl_scrub(struct file *file, void __user *arg) return -EPERM; if (btrfs_fs_incompat(fs_info, EXTENT_TREE_V2)) { - btrfs_err(fs_info, "scrub is not supported on extent tree v2 yet"); + btrfs_err(fs_info, "scrub: extent tree v2 not yet supported"); return -EINVAL; } diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c index ce36fafc771e..7cd5e76a783c 100644 --- a/fs/btrfs/scrub.c +++ b/fs/btrfs/scrub.c @@ -557,7 +557,7 @@ static int scrub_print_warning_inode(u64 inum, u64 offset, u64 num_bytes, */ for (i = 0; i < ipath->fspath->elem_cnt; ++i) btrfs_warn_in_rcu(fs_info, -"%s at logical %llu on dev %s, physical %llu, root %llu, inode %llu, offset %llu, length %u, links %u (path: %s)", +"scrub: %s at logical %llu on dev %s, physical %llu root %llu inode %llu offset %llu length %u links %u (path: %s)", swarn->errstr, swarn->logical, btrfs_dev_name(swarn->dev), swarn->physical, @@ -571,7 +571,7 @@ static int scrub_print_warning_inode(u64 inum, u64 offset, u64 num_bytes, err: btrfs_warn_in_rcu(fs_info, - "%s at logical %llu on dev %s, physical %llu, root %llu, inode %llu, offset %llu: path resolving failed with ret=%d", + "scrub: %s at logical %llu on dev %s, physical %llu root %llu inode %llu offset %llu: path resolving failed with ret=%d", swarn->errstr, swarn->logical, btrfs_dev_name(swarn->dev), swarn->physical, @@ -596,7 +596,7 @@ static void scrub_print_common_warning(const char *errstr, struct btrfs_device * /* Super block error, no need to search extent tree. */ if (is_super) { - btrfs_warn_in_rcu(fs_info, "%s on device %s, physical %llu", + btrfs_warn_in_rcu(fs_info, "scrub: %s on device %s, physical %llu", errstr, btrfs_dev_name(dev), physical); return; } @@ -631,14 +631,14 @@ static void scrub_print_common_warning(const char *errstr, struct btrfs_device * &ref_level); if (ret < 0) { btrfs_warn(fs_info, - "failed to resolve tree backref for logical %llu: %d", - swarn.logical, ret); + "scrub: failed to resolve tree backref for logical %llu: %d", + swarn.logical, ret); break; } if (ret > 0) break; btrfs_warn_in_rcu(fs_info, -"%s at logical %llu on dev %s, physical %llu: metadata %s (level %d) in tree %llu", +"scrub: %s at logical %llu on dev %s, physical %llu: metadata %s (level %d) in tree %llu", errstr, swarn.logical, btrfs_dev_name(dev), swarn.physical, (ref_level ? "node" : "leaf"), ref_level, ref_root); @@ -718,7 +718,7 @@ static void scrub_verify_one_metadata(struct scrub_stripe *stripe, int sector_nr scrub_bitmap_set_meta_error(stripe, sector_nr, sectors_per_tree); scrub_bitmap_set_error(stripe, sector_nr, sectors_per_tree); btrfs_warn_rl(fs_info, - "tree block %llu mirror %u has bad bytenr, has %llu want %llu", + "scrub: tree block %llu mirror %u has bad bytenr, has %llu want %llu", logical, stripe->mirror_num, btrfs_stack_header_bytenr(header), logical); return; @@ -728,7 +728,7 @@ static void scrub_verify_one_metadata(struct scrub_stripe *stripe, int sector_nr scrub_bitmap_set_meta_error(stripe, sector_nr, sectors_per_tree); scrub_bitmap_set_error(stripe, sector_nr, sectors_per_tree); btrfs_warn_rl(fs_info, - "tree block %llu mirror %u has bad fsid, has %pU want %pU", + "scrub: tree block %llu mirror %u has bad fsid, has %pU want %pU", logical, stripe->mirror_num, header->fsid, fs_info->fs_devices->fsid); return; @@ -738,7 +738,7 @@ static void scrub_verify_one_metadata(struct scrub_stripe *stripe, int sector_nr scrub_bitmap_set_meta_error(stripe, sector_nr, sectors_per_tree); scrub_bitmap_set_error(stripe, sector_nr, sectors_per_tree); btrfs_warn_rl(fs_info, - "tree block %llu mirror %u has bad chunk tree uuid, has %pU want %pU", + "scrub: tree block %llu mirror %u has bad chunk tree uuid, has %pU want %pU", logical, stripe->mirror_num, header->chunk_tree_uuid, fs_info->chunk_tree_uuid); return; @@ -760,7 +760,7 @@ static void scrub_verify_one_metadata(struct scrub_stripe *stripe, int sector_nr scrub_bitmap_set_meta_error(stripe, sector_nr, sectors_per_tree); scrub_bitmap_set_error(stripe, sector_nr, sectors_per_tree); btrfs_warn_rl(fs_info, - "tree block %llu mirror %u has bad csum, has " CSUM_FMT " want " CSUM_FMT, +"scrub: tree block %llu mirror %u has bad csum, has " CSUM_FMT " want " CSUM_FMT, logical, stripe->mirror_num, CSUM_FMT_VALUE(fs_info->csum_size, on_disk_csum), CSUM_FMT_VALUE(fs_info->csum_size, calculated_csum)); @@ -771,7 +771,7 @@ static void scrub_verify_one_metadata(struct scrub_stripe *stripe, int sector_nr scrub_bitmap_set_meta_gen_error(stripe, sector_nr, sectors_per_tree); scrub_bitmap_set_error(stripe, sector_nr, sectors_per_tree); btrfs_warn_rl(fs_info, - "tree block %llu mirror %u has bad generation, has %llu want %llu", + "scrub: tree block %llu mirror %u has bad generation, has %llu want %llu", logical, stripe->mirror_num, btrfs_stack_header_generation(header), stripe->sectors[sector_nr].generation); @@ -814,7 +814,7 @@ static void scrub_verify_one_sector(struct scrub_stripe *stripe, int sector_nr) */ if (unlikely(sector_nr + sectors_per_tree > stripe->nr_sectors)) { btrfs_warn_rl(fs_info, - "tree block at %llu crosses stripe boundary %llu", + "scrub: tree block at %llu crosses stripe boundary %llu", stripe->logical + (sector_nr << fs_info->sectorsize_bits), stripe->logical); @@ -1046,12 +1046,12 @@ skip: if (repaired) { if (dev) { btrfs_err_rl_in_rcu(fs_info, - "fixed up error at logical %llu on dev %s physical %llu", + "scrub: fixed up error at logical %llu on dev %s physical %llu", stripe->logical, btrfs_dev_name(dev), physical); } else { btrfs_err_rl_in_rcu(fs_info, - "fixed up error at logical %llu on mirror %u", + "scrub: fixed up error at logical %llu on mirror %u", stripe->logical, stripe->mirror_num); } continue; @@ -1060,12 +1060,12 @@ skip: /* The remaining are all for unrepaired. */ if (dev) { btrfs_err_rl_in_rcu(fs_info, - "unable to fixup (regular) error at logical %llu on dev %s physical %llu", +"scrub: unable to fixup (regular) error at logical %llu on dev %s physical %llu", stripe->logical, btrfs_dev_name(dev), physical); } else { btrfs_err_rl_in_rcu(fs_info, - "unable to fixup (regular) error at logical %llu on mirror %u", + "scrub: unable to fixup (regular) error at logical %llu on mirror %u", stripe->logical, stripe->mirror_num); } @@ -1593,8 +1593,7 @@ static int sync_write_pointer_for_zoned(struct scrub_ctx *sctx, u64 logical, physical, sctx->write_pointer); if (ret) - btrfs_err(fs_info, - "zoned: failed to recover write pointer"); + btrfs_err(fs_info, "scrub: zoned: failed to recover write pointer"); } mutex_unlock(&sctx->wr_lock); btrfs_dev_clear_zone_empty(sctx->wr_tgtdev, physical); @@ -1658,7 +1657,7 @@ static int scrub_find_fill_first_stripe(struct btrfs_block_group *bg, int ret; if (unlikely(!extent_root || !csum_root)) { - btrfs_err(fs_info, "no valid extent or csum root for scrub"); + btrfs_err(fs_info, "scrub: no valid extent or csum root found"); return -EUCLEAN; } memset(stripe->sectors, 0, sizeof(struct scrub_sector_verification) * @@ -1907,7 +1906,7 @@ static bool stripe_has_metadata_error(struct scrub_stripe *stripe) struct btrfs_fs_info *fs_info = stripe->bg->fs_info; btrfs_err(fs_info, - "stripe %llu has unrepaired metadata sector at %llu", + "scrub: stripe %llu has unrepaired metadata sector at logical %llu", stripe->logical, stripe->logical + (i << fs_info->sectorsize_bits)); return true; @@ -2167,7 +2166,7 @@ static int scrub_raid56_parity_stripe(struct scrub_ctx *sctx, bitmap_and(&error, &error, &has_extent, stripe->nr_sectors); if (!bitmap_empty(&error, stripe->nr_sectors)) { btrfs_err(fs_info, -"unrepaired sectors detected, full stripe %llu data stripe %u errors %*pbl", +"scrub: unrepaired sectors detected, full stripe %llu data stripe %u errors %*pbl", full_stripe_start, i, stripe->nr_sectors, &error); ret = -EIO; @@ -2789,14 +2788,14 @@ int scrub_enumerate_chunks(struct scrub_ctx *sctx, ro_set = 0; } else if (ret == -ETXTBSY) { btrfs_warn(fs_info, - "skipping scrub of block group %llu due to active swapfile", + "scrub: skipping scrub of block group %llu due to active swapfile", cache->start); scrub_pause_off(fs_info); ret = 0; goto skip_unfreeze; } else { - btrfs_warn(fs_info, - "failed setting block group ro: %d", ret); + btrfs_warn(fs_info, "scrub: failed setting block group ro: %d", + ret); btrfs_unfreeze_block_group(cache); btrfs_put_block_group(cache); scrub_pause_off(fs_info); @@ -2892,13 +2891,13 @@ static int scrub_one_super(struct scrub_ctx *sctx, struct btrfs_device *dev, ret = btrfs_check_super_csum(fs_info, sb); if (ret != 0) { btrfs_err_rl(fs_info, - "super block at physical %llu devid %llu has bad csum", + "scrub: super block at physical %llu devid %llu has bad csum", physical, dev->devid); return -EIO; } if (btrfs_super_generation(sb) != generation) { btrfs_err_rl(fs_info, -"super block at physical %llu devid %llu has bad generation %llu expect %llu", +"scrub: super block at physical %llu devid %llu has bad generation %llu expect %llu", physical, dev->devid, btrfs_super_generation(sb), generation); return -EUCLEAN; @@ -3059,7 +3058,7 @@ int btrfs_scrub_dev(struct btrfs_fs_info *fs_info, u64 devid, u64 start, !test_bit(BTRFS_DEV_STATE_WRITEABLE, &dev->dev_state)) { mutex_unlock(&fs_info->fs_devices->device_list_mutex); btrfs_err_in_rcu(fs_info, - "scrub on devid %llu: filesystem on %s is not writable", + "scrub: devid %llu: filesystem on %s is not writable", devid, btrfs_dev_name(dev)); ret = -EROFS; goto out; diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index 97e933113b82..858b609e292c 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c @@ -668,15 +668,15 @@ static noinline int replay_one_extent(struct btrfs_trans_handle *trans, extent_end = ALIGN(start + size, fs_info->sectorsize); } else { - ret = 0; - goto out; + btrfs_err(fs_info, + "unexpected extent type=%d root=%llu inode=%llu offset=%llu", + found_type, btrfs_root_id(root), key->objectid, key->offset); + return -EUCLEAN; } inode = read_one_inode(root, key->objectid); - if (!inode) { - ret = -EIO; - goto out; - } + if (!inode) + return -EIO; /* * first check to see if we already have this extent in the @@ -961,7 +961,8 @@ static noinline int drop_one_dir_item(struct btrfs_trans_handle *trans, ret = unlink_inode_for_log_replay(trans, dir, inode, &name); out: kfree(name.name); - iput(&inode->vfs_inode); + if (inode) + iput(&inode->vfs_inode); return ret; } @@ -1176,8 +1177,8 @@ again: ret = unlink_inode_for_log_replay(trans, victim_parent, inode, &victim_name); + iput(&victim_parent->vfs_inode); } - iput(&victim_parent->vfs_inode); kfree(victim_name.name); if (ret) return ret; diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index 89835071cfea..f475b4b7c457 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -3282,6 +3282,12 @@ int btrfs_remove_chunk(struct btrfs_trans_handle *trans, u64 chunk_offset) device->bytes_used - dev_extent_len); atomic64_add(dev_extent_len, &fs_info->free_chunk_space); btrfs_clear_space_info_full(fs_info); + + if (list_empty(&device->post_commit_list)) { + list_add_tail(&device->post_commit_list, + &trans->transaction->dev_update_list); + } + mutex_unlock(&fs_info->chunk_mutex); } } diff --git a/fs/btrfs/zoned.c b/fs/btrfs/zoned.c index b5b0156d5b95..9430b34d3cbb 100644 --- a/fs/btrfs/zoned.c +++ b/fs/btrfs/zoned.c @@ -1403,7 +1403,8 @@ static int btrfs_load_block_group_single(struct btrfs_block_group *bg, static int btrfs_load_block_group_dup(struct btrfs_block_group *bg, struct btrfs_chunk_map *map, struct zone_info *zone_info, - unsigned long *active) + unsigned long *active, + u64 last_alloc) { struct btrfs_fs_info *fs_info = bg->fs_info; @@ -1426,6 +1427,13 @@ static int btrfs_load_block_group_dup(struct btrfs_block_group *bg, zone_info[1].physical); return -EIO; } + + if (zone_info[0].alloc_offset == WP_CONVENTIONAL) + zone_info[0].alloc_offset = last_alloc; + + if (zone_info[1].alloc_offset == WP_CONVENTIONAL) + zone_info[1].alloc_offset = last_alloc; + if (zone_info[0].alloc_offset != zone_info[1].alloc_offset) { btrfs_err(bg->fs_info, "zoned: write pointer offset mismatch of zones in DUP profile"); @@ -1446,7 +1454,8 @@ static int btrfs_load_block_group_dup(struct btrfs_block_group *bg, static int btrfs_load_block_group_raid1(struct btrfs_block_group *bg, struct btrfs_chunk_map *map, struct zone_info *zone_info, - unsigned long *active) + unsigned long *active, + u64 last_alloc) { struct btrfs_fs_info *fs_info = bg->fs_info; int i; @@ -1461,10 +1470,12 @@ static int btrfs_load_block_group_raid1(struct btrfs_block_group *bg, bg->zone_capacity = min_not_zero(zone_info[0].capacity, zone_info[1].capacity); for (i = 0; i < map->num_stripes; i++) { - if (zone_info[i].alloc_offset == WP_MISSING_DEV || - zone_info[i].alloc_offset == WP_CONVENTIONAL) + if (zone_info[i].alloc_offset == WP_MISSING_DEV) continue; + if (zone_info[i].alloc_offset == WP_CONVENTIONAL) + zone_info[i].alloc_offset = last_alloc; + if ((zone_info[0].alloc_offset != zone_info[i].alloc_offset) && !btrfs_test_opt(fs_info, DEGRADED)) { btrfs_err(fs_info, @@ -1494,7 +1505,8 @@ static int btrfs_load_block_group_raid1(struct btrfs_block_group *bg, static int btrfs_load_block_group_raid0(struct btrfs_block_group *bg, struct btrfs_chunk_map *map, struct zone_info *zone_info, - unsigned long *active) + unsigned long *active, + u64 last_alloc) { struct btrfs_fs_info *fs_info = bg->fs_info; @@ -1505,10 +1517,29 @@ static int btrfs_load_block_group_raid0(struct btrfs_block_group *bg, } for (int i = 0; i < map->num_stripes; i++) { - if (zone_info[i].alloc_offset == WP_MISSING_DEV || - zone_info[i].alloc_offset == WP_CONVENTIONAL) + if (zone_info[i].alloc_offset == WP_MISSING_DEV) continue; + if (zone_info[i].alloc_offset == WP_CONVENTIONAL) { + u64 stripe_nr, full_stripe_nr; + u64 stripe_offset; + int stripe_index; + + stripe_nr = div64_u64(last_alloc, map->stripe_size); + stripe_offset = stripe_nr * map->stripe_size; + full_stripe_nr = div_u64(stripe_nr, map->num_stripes); + div_u64_rem(stripe_nr, map->num_stripes, &stripe_index); + + zone_info[i].alloc_offset = + full_stripe_nr * map->stripe_size; + + if (stripe_index > i) + zone_info[i].alloc_offset += map->stripe_size; + else if (stripe_index == i) + zone_info[i].alloc_offset += + (last_alloc - stripe_offset); + } + if (test_bit(0, active) != test_bit(i, active)) { if (!btrfs_zone_activate(bg)) return -EIO; @@ -1526,7 +1557,8 @@ static int btrfs_load_block_group_raid0(struct btrfs_block_group *bg, static int btrfs_load_block_group_raid10(struct btrfs_block_group *bg, struct btrfs_chunk_map *map, struct zone_info *zone_info, - unsigned long *active) + unsigned long *active, + u64 last_alloc) { struct btrfs_fs_info *fs_info = bg->fs_info; @@ -1537,8 +1569,7 @@ static int btrfs_load_block_group_raid10(struct btrfs_block_group *bg, } for (int i = 0; i < map->num_stripes; i++) { - if (zone_info[i].alloc_offset == WP_MISSING_DEV || - zone_info[i].alloc_offset == WP_CONVENTIONAL) + if (zone_info[i].alloc_offset == WP_MISSING_DEV) continue; if (test_bit(0, active) != test_bit(i, active)) { @@ -1549,6 +1580,29 @@ static int btrfs_load_block_group_raid10(struct btrfs_block_group *bg, set_bit(BLOCK_GROUP_FLAG_ZONE_IS_ACTIVE, &bg->runtime_flags); } + if (zone_info[i].alloc_offset == WP_CONVENTIONAL) { + u64 stripe_nr, full_stripe_nr; + u64 stripe_offset; + int stripe_index; + + stripe_nr = div64_u64(last_alloc, map->stripe_size); + stripe_offset = stripe_nr * map->stripe_size; + full_stripe_nr = div_u64(stripe_nr, + map->num_stripes / map->sub_stripes); + div_u64_rem(stripe_nr, + (map->num_stripes / map->sub_stripes), + &stripe_index); + + zone_info[i].alloc_offset = + full_stripe_nr * map->stripe_size; + + if (stripe_index > (i / map->sub_stripes)) + zone_info[i].alloc_offset += map->stripe_size; + else if (stripe_index == (i / map->sub_stripes)) + zone_info[i].alloc_offset += + (last_alloc - stripe_offset); + } + if ((i % map->sub_stripes) == 0) { bg->zone_capacity += zone_info[i].capacity; bg->alloc_offset += zone_info[i].alloc_offset; @@ -1637,18 +1691,22 @@ int btrfs_load_block_group_zone_info(struct btrfs_block_group *cache, bool new) ret = btrfs_load_block_group_single(cache, &zone_info[0], active); break; case BTRFS_BLOCK_GROUP_DUP: - ret = btrfs_load_block_group_dup(cache, map, zone_info, active); + ret = btrfs_load_block_group_dup(cache, map, zone_info, active, + last_alloc); break; case BTRFS_BLOCK_GROUP_RAID1: case BTRFS_BLOCK_GROUP_RAID1C3: case BTRFS_BLOCK_GROUP_RAID1C4: - ret = btrfs_load_block_group_raid1(cache, map, zone_info, active); + ret = btrfs_load_block_group_raid1(cache, map, zone_info, + active, last_alloc); break; case BTRFS_BLOCK_GROUP_RAID0: - ret = btrfs_load_block_group_raid0(cache, map, zone_info, active); + ret = btrfs_load_block_group_raid0(cache, map, zone_info, + active, last_alloc); break; case BTRFS_BLOCK_GROUP_RAID10: - ret = btrfs_load_block_group_raid10(cache, map, zone_info, active); + ret = btrfs_load_block_group_raid10(cache, map, zone_info, + active, last_alloc); break; case BTRFS_BLOCK_GROUP_RAID5: case BTRFS_BLOCK_GROUP_RAID6: diff --git a/fs/erofs/fileio.c b/fs/erofs/fileio.c index 7d81f504bff0..df5cc63f2c01 100644 --- a/fs/erofs/fileio.c +++ b/fs/erofs/fileio.c @@ -47,6 +47,7 @@ static void erofs_fileio_ki_complete(struct kiocb *iocb, long ret) static void erofs_fileio_rq_submit(struct erofs_fileio_rq *rq) { + const struct cred *old_cred; struct iov_iter iter; int ret; @@ -60,7 +61,9 @@ static void erofs_fileio_rq_submit(struct erofs_fileio_rq *rq) rq->iocb.ki_flags = IOCB_DIRECT; iov_iter_bvec(&iter, ITER_DEST, rq->bvecs, rq->bio.bi_vcnt, rq->bio.bi_iter.bi_size); + old_cred = override_creds(rq->iocb.ki_filp->f_cred); ret = vfs_iocb_iter_read(rq->iocb.ki_filp, &rq->iocb, &iter); + revert_creds(old_cred); if (ret != -EIOCBQUEUED) erofs_fileio_ki_complete(&rq->iocb, ret); } diff --git a/fs/erofs/zmap.c b/fs/erofs/zmap.c index 14ea47f954f5..0bebc6e3a4d7 100644 --- a/fs/erofs/zmap.c +++ b/fs/erofs/zmap.c @@ -597,6 +597,10 @@ static int z_erofs_map_blocks_ext(struct inode *inode, if (la > map->m_la) { r = mid; + if (la > lend) { + DBG_BUGON(1); + return -EFSCORRUPTED; + } lend = la; } else { l = mid + 1; @@ -635,12 +639,6 @@ static int z_erofs_map_blocks_ext(struct inode *inode, } } map->m_llen = lend - map->m_la; - if (!last && map->m_llen < sb->s_blocksize) { - erofs_err(sb, "extent too small %llu @ offset %llu of nid %llu", - map->m_llen, map->m_la, vi->nid); - DBG_BUGON(1); - return -EFSCORRUPTED; - } return 0; } diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index 6bd3de64f2a8..696131e655ed 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -35,6 +35,17 @@ #include <trace/events/f2fs.h> #include <uapi/linux/f2fs.h> +static void f2fs_zero_post_eof_page(struct inode *inode, loff_t new_size) +{ + loff_t old_size = i_size_read(inode); + + if (old_size >= new_size) + return; + + /* zero or drop pages only in range of [old_size, new_size] */ + truncate_pagecache(inode, old_size); +} + static vm_fault_t f2fs_filemap_fault(struct vm_fault *vmf) { struct inode *inode = file_inode(vmf->vma->vm_file); @@ -103,8 +114,13 @@ static vm_fault_t f2fs_vm_page_mkwrite(struct vm_fault *vmf) f2fs_bug_on(sbi, f2fs_has_inline_data(inode)); + filemap_invalidate_lock(inode->i_mapping); + f2fs_zero_post_eof_page(inode, (folio->index + 1) << PAGE_SHIFT); + filemap_invalidate_unlock(inode->i_mapping); + file_update_time(vmf->vma->vm_file); filemap_invalidate_lock_shared(inode->i_mapping); + folio_lock(folio); if (unlikely(folio->mapping != inode->i_mapping || folio_pos(folio) > i_size_read(inode) || @@ -1109,6 +1125,8 @@ int f2fs_setattr(struct mnt_idmap *idmap, struct dentry *dentry, f2fs_down_write(&fi->i_gc_rwsem[WRITE]); filemap_invalidate_lock(inode->i_mapping); + if (attr->ia_size > old_size) + f2fs_zero_post_eof_page(inode, attr->ia_size); truncate_setsize(inode, attr->ia_size); if (attr->ia_size <= old_size) @@ -1227,6 +1245,10 @@ static int f2fs_punch_hole(struct inode *inode, loff_t offset, loff_t len) if (ret) return ret; + filemap_invalidate_lock(inode->i_mapping); + f2fs_zero_post_eof_page(inode, offset + len); + filemap_invalidate_unlock(inode->i_mapping); + pg_start = ((unsigned long long) offset) >> PAGE_SHIFT; pg_end = ((unsigned long long) offset + len) >> PAGE_SHIFT; @@ -1510,6 +1532,8 @@ static int f2fs_do_collapse(struct inode *inode, loff_t offset, loff_t len) f2fs_down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]); filemap_invalidate_lock(inode->i_mapping); + f2fs_zero_post_eof_page(inode, offset + len); + f2fs_lock_op(sbi); f2fs_drop_extent_tree(inode); truncate_pagecache(inode, offset); @@ -1631,6 +1655,10 @@ static int f2fs_zero_range(struct inode *inode, loff_t offset, loff_t len, if (ret) return ret; + filemap_invalidate_lock(mapping); + f2fs_zero_post_eof_page(inode, offset + len); + filemap_invalidate_unlock(mapping); + pg_start = ((unsigned long long) offset) >> PAGE_SHIFT; pg_end = ((unsigned long long) offset + len) >> PAGE_SHIFT; @@ -1762,6 +1790,8 @@ static int f2fs_insert_range(struct inode *inode, loff_t offset, loff_t len) /* avoid gc operation during block exchange */ f2fs_down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]); filemap_invalidate_lock(mapping); + + f2fs_zero_post_eof_page(inode, offset + len); truncate_pagecache(inode, offset); while (!ret && idx > pg_start) { @@ -1819,6 +1849,10 @@ static int f2fs_expand_inode_data(struct inode *inode, loff_t offset, if (err) return err; + filemap_invalidate_lock(inode->i_mapping); + f2fs_zero_post_eof_page(inode, offset + len); + filemap_invalidate_unlock(inode->i_mapping); + f2fs_balance_fs(sbi, true); pg_start = ((unsigned long long)offset) >> PAGE_SHIFT; @@ -4860,6 +4894,10 @@ static ssize_t f2fs_write_checks(struct kiocb *iocb, struct iov_iter *from) err = file_modified(file); if (err) return err; + + filemap_invalidate_lock(inode->i_mapping); + f2fs_zero_post_eof_page(inode, iocb->ki_pos + iov_iter_count(from)); + filemap_invalidate_unlock(inode->i_mapping); return count; } diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index 1cb4cba7f961..bfe104db284e 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -2078,7 +2078,6 @@ write_node: if (!__write_node_folio(folio, false, &submitted, wbc, do_balance, io_type, NULL)) { - folio_unlock(folio); folio_batch_release(&fbatch); ret = -EIO; goto out; diff --git a/fs/file.c b/fs/file.c index 3a3146664cf3..b6db031545e6 100644 --- a/fs/file.c +++ b/fs/file.c @@ -1198,8 +1198,12 @@ bool file_seek_cur_needs_f_lock(struct file *file) if (!(file->f_mode & FMODE_ATOMIC_POS) && !file->f_op->iterate_shared) return false; - VFS_WARN_ON_ONCE((file_count(file) > 1) && - !mutex_is_locked(&file->f_pos_lock)); + /* + * Note that we are not guaranteed to be called after fdget_pos() on + * this file obj, in which case the caller is expected to provide the + * appropriate locking. + */ + return true; } diff --git a/fs/namei.c b/fs/namei.c index 4bb889fc980b..f761cafaeaad 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -2917,7 +2917,8 @@ static int lookup_one_common(struct mnt_idmap *idmap, * @base: base directory to lookup from * * Look up a dentry by name in the dcache, returning NULL if it does not - * currently exist. The function does not try to create a dentry. + * currently exist. The function does not try to create a dentry and if one + * is found it doesn't try to revalidate it. * * Note that this routine is purely a helper for filesystem usage and should * not be called by generic code. It does no permission checking. @@ -2933,7 +2934,7 @@ struct dentry *try_lookup_noperm(struct qstr *name, struct dentry *base) if (err) return ERR_PTR(err); - return lookup_dcache(name, base, 0); + return d_lookup(base, name); } EXPORT_SYMBOL(try_lookup_noperm); @@ -3057,14 +3058,22 @@ EXPORT_SYMBOL(lookup_one_positive_unlocked); * Note that this routine is purely a helper for filesystem usage and should * not be called by generic code. It does no permission checking. * - * Unlike lookup_noperm, it should be called without the parent + * Unlike lookup_noperm(), it should be called without the parent * i_rwsem held, and will take the i_rwsem itself if necessary. + * + * Unlike try_lookup_noperm() it *does* revalidate the dentry if it already + * existed. */ struct dentry *lookup_noperm_unlocked(struct qstr *name, struct dentry *base) { struct dentry *ret; + int err; - ret = try_lookup_noperm(name, base); + err = lookup_noperm_common(name, base); + if (err) + return ERR_PTR(err); + + ret = lookup_dcache(name, base, 0); if (!ret) ret = lookup_slow(name, base, 0); return ret; diff --git a/fs/namespace.c b/fs/namespace.c index e13d9ab4f564..54c59e091919 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -2310,21 +2310,62 @@ out: return dst_mnt; } -/* Caller should check returned pointer for errors */ +static inline bool extend_array(struct path **res, struct path **to_free, + unsigned n, unsigned *count, unsigned new_count) +{ + struct path *p; -struct vfsmount *collect_mounts(const struct path *path) + if (likely(n < *count)) + return true; + p = kmalloc_array(new_count, sizeof(struct path), GFP_KERNEL); + if (p && *count) + memcpy(p, *res, *count * sizeof(struct path)); + *count = new_count; + kfree(*to_free); + *to_free = *res = p; + return p; +} + +struct path *collect_paths(const struct path *path, + struct path *prealloc, unsigned count) { - struct mount *tree; - namespace_lock(); - if (!check_mnt(real_mount(path->mnt))) - tree = ERR_PTR(-EINVAL); - else - tree = copy_tree(real_mount(path->mnt), path->dentry, - CL_COPY_ALL | CL_PRIVATE); - namespace_unlock(); - if (IS_ERR(tree)) - return ERR_CAST(tree); - return &tree->mnt; + struct mount *root = real_mount(path->mnt); + struct mount *child; + struct path *res = prealloc, *to_free = NULL; + unsigned n = 0; + + guard(rwsem_read)(&namespace_sem); + + if (!check_mnt(root)) + return ERR_PTR(-EINVAL); + if (!extend_array(&res, &to_free, 0, &count, 32)) + return ERR_PTR(-ENOMEM); + res[n++] = *path; + list_for_each_entry(child, &root->mnt_mounts, mnt_child) { + if (!is_subdir(child->mnt_mountpoint, path->dentry)) + continue; + for (struct mount *m = child; m; m = next_mnt(m, child)) { + if (!extend_array(&res, &to_free, n, &count, 2 * count)) + return ERR_PTR(-ENOMEM); + res[n].mnt = &m->mnt; + res[n].dentry = m->mnt.mnt_root; + n++; + } + } + if (!extend_array(&res, &to_free, n, &count, count + 1)) + return ERR_PTR(-ENOMEM); + memset(res + n, 0, (count - n) * sizeof(struct path)); + for (struct path *p = res; p->mnt; p++) + path_get(p); + return res; +} + +void drop_collected_paths(struct path *paths, struct path *prealloc) +{ + for (struct path *p = paths; p->mnt; p++) + path_put(p); + if (paths != prealloc) + kfree(paths); } static void free_mnt_ns(struct mnt_namespace *); @@ -2401,15 +2442,6 @@ void dissolve_on_fput(struct vfsmount *mnt) free_mnt_ns(ns); } -void drop_collected_mounts(struct vfsmount *mnt) -{ - namespace_lock(); - lock_mount_hash(); - umount_tree(real_mount(mnt), 0); - unlock_mount_hash(); - namespace_unlock(); -} - static bool __has_locked_children(struct mount *mnt, struct dentry *dentry) { struct mount *child; @@ -2511,21 +2543,6 @@ struct vfsmount *clone_private_mount(const struct path *path) } EXPORT_SYMBOL_GPL(clone_private_mount); -int iterate_mounts(int (*f)(struct vfsmount *, void *), void *arg, - struct vfsmount *root) -{ - struct mount *mnt; - int res = f(root, arg); - if (res) - return res; - list_for_each_entry(mnt, &real_mount(root)->mnt_list, mnt_list) { - res = f(&mnt->mnt, arg); - if (res) - return res; - } - return 0; -} - static void lock_mnt_tree(struct mount *mnt) { struct mount *p; @@ -2751,14 +2768,14 @@ static int attach_recursive_mnt(struct mount *source_mnt, hlist_for_each_entry_safe(child, n, &tree_list, mnt_hash) { struct mount *q; hlist_del_init(&child->mnt_hash); - q = __lookup_mnt(&child->mnt_parent->mnt, - child->mnt_mountpoint); - if (q) - mnt_change_mountpoint(child, smp, q); /* Notice when we are propagating across user namespaces */ if (child->mnt_parent->mnt_ns->user_ns != user_ns) lock_mnt_tree(child); child->mnt.mnt_flags &= ~MNT_LOCKED; + q = __lookup_mnt(&child->mnt_parent->mnt, + child->mnt_mountpoint); + if (q) + mnt_change_mountpoint(child, smp, q); commit_tree(child); } put_mountpoint(smp); @@ -5290,16 +5307,12 @@ SYSCALL_DEFINE5(open_tree_attr, int, dfd, const char __user *, filename, kattr.kflags |= MOUNT_KATTR_RECURSE; ret = wants_mount_setattr(uattr, usize, &kattr); - if (ret < 0) - return ret; - - if (ret) { + if (ret > 0) { ret = do_mount_setattr(&file->f_path, &kattr); - if (ret) - return ret; - finish_mount_kattr(&kattr); } + if (ret) + return ret; } fd = get_unused_fd_flags(flags & O_CLOEXEC); @@ -6262,7 +6275,11 @@ void put_mnt_ns(struct mnt_namespace *ns) { if (!refcount_dec_and_test(&ns->ns.count)) return; - drop_collected_mounts(&ns->root->mnt); + namespace_lock(); + lock_mount_hash(); + umount_tree(ns->root, 0); + unlock_mount_hash(); + namespace_unlock(); free_mnt_ns(ns); } diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c index ccb00aa93be0..e00b2aea8da2 100644 --- a/fs/nfsd/nfs4callback.c +++ b/fs/nfsd/nfs4callback.c @@ -1409,6 +1409,7 @@ void nfsd41_cb_referring_call(struct nfsd4_callback *cb, out: if (!rcl->__nr_referring_calls) { cb->cb_nr_referring_call_list--; + list_del(&rcl->__list); kfree(rcl); } } diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c index 3f3e9f6c4250..6a42cc7a845a 100644 --- a/fs/nfsd/nfsctl.c +++ b/fs/nfsd/nfsctl.c @@ -1611,7 +1611,7 @@ out_unlock: */ int nfsd_nl_threads_set_doit(struct sk_buff *skb, struct genl_info *info) { - int *nthreads, count = 0, nrpools, i, ret = -EOPNOTSUPP, rem; + int *nthreads, nrpools = 0, i, ret = -EOPNOTSUPP, rem; struct net *net = genl_info_net(info); struct nfsd_net *nn = net_generic(net, nfsd_net_id); const struct nlattr *attr; @@ -1623,12 +1623,11 @@ int nfsd_nl_threads_set_doit(struct sk_buff *skb, struct genl_info *info) /* count number of SERVER_THREADS values */ nlmsg_for_each_attr(attr, info->nlhdr, GENL_HDRLEN, rem) { if (nla_type(attr) == NFSD_A_SERVER_THREADS) - count++; + nrpools++; } mutex_lock(&nfsd_mutex); - nrpools = max(count, nfsd_nrpools(net)); nthreads = kcalloc(nrpools, sizeof(int), GFP_KERNEL); if (!nthreads) { ret = -ENOMEM; diff --git a/fs/overlayfs/namei.c b/fs/overlayfs/namei.c index 0b8b28392eb7..2043f0369059 100644 --- a/fs/overlayfs/namei.c +++ b/fs/overlayfs/namei.c @@ -1393,7 +1393,7 @@ out: bool ovl_lower_positive(struct dentry *dentry) { struct ovl_entry *poe = OVL_E(dentry->d_parent); - struct qstr *name = &dentry->d_name; + const struct qstr *name = &dentry->d_name; const struct cred *old_cred; unsigned int i; bool positive = false; @@ -1416,9 +1416,15 @@ bool ovl_lower_positive(struct dentry *dentry) struct dentry *this; struct ovl_path *parentpath = &ovl_lowerstack(poe)[i]; + /* + * We need to make a non-const copy of dentry->d_name, + * because lookup_one_positive_unlocked() will hash name + * with parentpath base, which is on another (lower fs). + */ this = lookup_one_positive_unlocked( mnt_idmap(parentpath->layer->mnt), - name, parentpath->dentry); + &QSTR_LEN(name->name, name->len), + parentpath->dentry); if (IS_ERR(this)) { switch (PTR_ERR(this)) { case -ENOENT: diff --git a/fs/overlayfs/overlayfs.h b/fs/overlayfs/overlayfs.h index 8baaba0a3fe5..497323128e5f 100644 --- a/fs/overlayfs/overlayfs.h +++ b/fs/overlayfs/overlayfs.h @@ -246,9 +246,11 @@ static inline struct dentry *ovl_do_mkdir(struct ovl_fs *ofs, struct dentry *dentry, umode_t mode) { - dentry = vfs_mkdir(ovl_upper_mnt_idmap(ofs), dir, dentry, mode); - pr_debug("mkdir(%pd2, 0%o) = %i\n", dentry, mode, PTR_ERR_OR_ZERO(dentry)); - return dentry; + struct dentry *ret; + + ret = vfs_mkdir(ovl_upper_mnt_idmap(ofs), dir, dentry, mode); + pr_debug("mkdir(%pd2, 0%o) = %i\n", dentry, mode, PTR_ERR_OR_ZERO(ret)); + return ret; } static inline int ovl_do_mknod(struct ovl_fs *ofs, diff --git a/fs/pidfs.c b/fs/pidfs.c index c1f0a067be40..69919be1c9d8 100644 --- a/fs/pidfs.c +++ b/fs/pidfs.c @@ -366,7 +366,7 @@ static long pidfd_info(struct file *file, unsigned int cmd, unsigned long arg) kinfo.pid = task_pid_vnr(task); kinfo.mask |= PIDFD_INFO_PID; - if (kinfo.pid == 0 || kinfo.tgid == 0 || (kinfo.ppid == 0 && kinfo.pid != 1)) + if (kinfo.pid == 0 || kinfo.tgid == 0) return -ESRCH; copy_out: diff --git a/fs/pnode.h b/fs/pnode.h index 34b6247af01d..2d026fb98b18 100644 --- a/fs/pnode.h +++ b/fs/pnode.h @@ -28,8 +28,6 @@ #define CL_SHARED_TO_SLAVE 0x20 #define CL_COPY_MNT_NS_FILE 0x40 -#define CL_COPY_ALL (CL_COPY_UNBINDABLE | CL_COPY_MNT_NS_FILE) - static inline void set_mnt_shared(struct mount *mnt) { mnt->mnt.mnt_flags &= ~MNT_SHARED_MASK; diff --git a/fs/resctrl/ctrlmondata.c b/fs/resctrl/ctrlmondata.c index 6ed2dfd4dbbd..d98e0d2de09f 100644 --- a/fs/resctrl/ctrlmondata.c +++ b/fs/resctrl/ctrlmondata.c @@ -594,9 +594,10 @@ int rdtgroup_mondata_show(struct seq_file *m, void *arg) struct rmid_read rr = {0}; struct rdt_mon_domain *d; struct rdtgroup *rdtgrp; + int domid, cpu, ret = 0; struct rdt_resource *r; + struct cacheinfo *ci; struct mon_data *md; - int domid, ret = 0; rdtgrp = rdtgroup_kn_lock_live(of->kn); if (!rdtgrp) { @@ -623,10 +624,14 @@ int rdtgroup_mondata_show(struct seq_file *m, void *arg) * one that matches this cache id. */ list_for_each_entry(d, &r->mon_domains, hdr.list) { - if (d->ci->id == domid) { - rr.ci = d->ci; + if (d->ci_id == domid) { + rr.ci_id = d->ci_id; + cpu = cpumask_any(&d->hdr.cpu_mask); + ci = get_cpu_cacheinfo_level(cpu, RESCTRL_L3_CACHE); + if (!ci) + continue; mon_event_read(&rr, r, NULL, rdtgrp, - &d->ci->shared_cpu_map, evtid, false); + &ci->shared_cpu_map, evtid, false); goto checkresult; } } diff --git a/fs/resctrl/internal.h b/fs/resctrl/internal.h index 9a8cf6f11151..0a1eedba2b03 100644 --- a/fs/resctrl/internal.h +++ b/fs/resctrl/internal.h @@ -98,7 +98,7 @@ struct mon_data { * domains in @r sharing L3 @ci.id * @evtid: Which monitor event to read. * @first: Initialize MBM counter when true. - * @ci: Cacheinfo for L3. Only set when @d is NULL. Used when summing domains. + * @ci_id: Cacheinfo id for L3. Only set when @d is NULL. Used when summing domains. * @err: Error encountered when reading counter. * @val: Returned value of event counter. If @rgrp is a parent resource group, * @val includes the sum of event counts from its child resource groups. @@ -112,7 +112,7 @@ struct rmid_read { struct rdt_mon_domain *d; enum resctrl_event_id evtid; bool first; - struct cacheinfo *ci; + unsigned int ci_id; int err; u64 val; void *arch_mon_ctx; diff --git a/fs/resctrl/monitor.c b/fs/resctrl/monitor.c index bde2801289d3..f5637855c3ac 100644 --- a/fs/resctrl/monitor.c +++ b/fs/resctrl/monitor.c @@ -361,6 +361,7 @@ static int __mon_event_count(u32 closid, u32 rmid, struct rmid_read *rr) { int cpu = smp_processor_id(); struct rdt_mon_domain *d; + struct cacheinfo *ci; struct mbm_state *m; int err, ret; u64 tval = 0; @@ -388,7 +389,8 @@ static int __mon_event_count(u32 closid, u32 rmid, struct rmid_read *rr) } /* Summing domains that share a cache, must be on a CPU for that cache. */ - if (!cpumask_test_cpu(cpu, &rr->ci->shared_cpu_map)) + ci = get_cpu_cacheinfo_level(cpu, RESCTRL_L3_CACHE); + if (!ci || ci->id != rr->ci_id) return -EINVAL; /* @@ -400,7 +402,7 @@ static int __mon_event_count(u32 closid, u32 rmid, struct rmid_read *rr) */ ret = -EINVAL; list_for_each_entry(d, &rr->r->mon_domains, hdr.list) { - if (d->ci->id != rr->ci->id) + if (d->ci_id != rr->ci_id) continue; err = resctrl_arch_rmid_read(rr->r, d, closid, rmid, rr->evtid, &tval, rr->arch_mon_ctx); diff --git a/fs/resctrl/rdtgroup.c b/fs/resctrl/rdtgroup.c index 1beb124e25f6..77d08229d855 100644 --- a/fs/resctrl/rdtgroup.c +++ b/fs/resctrl/rdtgroup.c @@ -3036,7 +3036,7 @@ static void rmdir_mondata_subdir_allrdtgrp(struct rdt_resource *r, char name[32]; snc_mode = r->mon_scope == RESCTRL_L3_NODE; - sprintf(name, "mon_%s_%02d", r->name, snc_mode ? d->ci->id : d->hdr.id); + sprintf(name, "mon_%s_%02d", r->name, snc_mode ? d->ci_id : d->hdr.id); if (snc_mode) sprintf(subname, "mon_sub_%s_%02d", r->name, d->hdr.id); @@ -3061,7 +3061,7 @@ static int mon_add_all_files(struct kernfs_node *kn, struct rdt_mon_domain *d, return -EPERM; list_for_each_entry(mevt, &r->evt_list, list) { - domid = do_sum ? d->ci->id : d->hdr.id; + domid = do_sum ? d->ci_id : d->hdr.id; priv = mon_get_kn_priv(r->rid, domid, mevt, do_sum); if (WARN_ON_ONCE(!priv)) return -EINVAL; @@ -3089,7 +3089,7 @@ static int mkdir_mondata_subdir(struct kernfs_node *parent_kn, lockdep_assert_held(&rdtgroup_mutex); snc_mode = r->mon_scope == RESCTRL_L3_NODE; - sprintf(name, "mon_%s_%02d", r->name, snc_mode ? d->ci->id : d->hdr.id); + sprintf(name, "mon_%s_%02d", r->name, snc_mode ? d->ci_id : d->hdr.id); kn = kernfs_find_and_get(parent_kn, name); if (kn) { /* diff --git a/fs/smb/client/cached_dir.c b/fs/smb/client/cached_dir.c index 5200a0f3cafc..368e870624da 100644 --- a/fs/smb/client/cached_dir.c +++ b/fs/smb/client/cached_dir.c @@ -509,8 +509,17 @@ void close_all_cached_dirs(struct cifs_sb_info *cifs_sb) spin_lock(&cfids->cfid_list_lock); list_for_each_entry(cfid, &cfids->entries, entry) { tmp_list = kmalloc(sizeof(*tmp_list), GFP_ATOMIC); - if (tmp_list == NULL) - break; + if (tmp_list == NULL) { + /* + * If the malloc() fails, we won't drop all + * dentries, and unmounting is likely to trigger + * a 'Dentry still in use' error. + */ + cifs_tcon_dbg(VFS, "Out of memory while dropping dentries\n"); + spin_unlock(&cfids->cfid_list_lock); + spin_unlock(&cifs_sb->tlink_tree_lock); + goto done; + } spin_lock(&cfid->fid_lock); tmp_list->dentry = cfid->dentry; cfid->dentry = NULL; @@ -522,6 +531,7 @@ void close_all_cached_dirs(struct cifs_sb_info *cifs_sb) } spin_unlock(&cifs_sb->tlink_tree_lock); +done: list_for_each_entry_safe(tmp_list, q, &entry, entry) { list_del(&tmp_list->entry); dput(tmp_list->dentry); diff --git a/fs/smb/client/cached_dir.h b/fs/smb/client/cached_dir.h index 1dfe79d947a6..a28f7cae3caa 100644 --- a/fs/smb/client/cached_dir.h +++ b/fs/smb/client/cached_dir.h @@ -21,12 +21,12 @@ struct cached_dirent { struct cached_dirents { bool is_valid:1; bool is_failed:1; - struct dir_context *ctx; /* - * Only used to make sure we only take entries - * from a single context. Never dereferenced. - */ + struct file *file; /* + * Used to associate the cache with a single + * open file instance. + */ struct mutex de_mutex; - int pos; /* Expected ctx->pos */ + loff_t pos; /* Expected ctx->pos */ struct list_head entries; }; diff --git a/fs/smb/client/cifs_debug.c b/fs/smb/client/cifs_debug.c index c0196be0e65f..3fdf75737d43 100644 --- a/fs/smb/client/cifs_debug.c +++ b/fs/smb/client/cifs_debug.c @@ -1105,7 +1105,7 @@ static ssize_t cifs_security_flags_proc_write(struct file *file, if ((count < 1) || (count > 11)) return -EINVAL; - memset(flags_string, 0, 12); + memset(flags_string, 0, sizeof(flags_string)); if (copy_from_user(flags_string, buffer, count)) return -EFAULT; diff --git a/fs/smb/client/cifs_ioctl.h b/fs/smb/client/cifs_ioctl.h index 26327442e383..b51ce64fcccf 100644 --- a/fs/smb/client/cifs_ioctl.h +++ b/fs/smb/client/cifs_ioctl.h @@ -61,7 +61,7 @@ struct smb_query_info { struct smb3_key_debug_info { __u64 Suid; __u16 cipher_type; - __u8 auth_key[16]; /* SMB2_NTLMV2_SESSKEY_SIZE */ + __u8 auth_key[SMB2_NTLMV2_SESSKEY_SIZE]; __u8 smb3encryptionkey[SMB3_SIGN_KEY_SIZE]; __u8 smb3decryptionkey[SMB3_SIGN_KEY_SIZE]; } __packed; diff --git a/fs/smb/client/connect.c b/fs/smb/client/connect.c index 28bc33496623..c48869c29e15 100644 --- a/fs/smb/client/connect.c +++ b/fs/smb/client/connect.c @@ -3718,9 +3718,15 @@ int cifs_mount_get_tcon(struct cifs_mount_ctx *mnt_ctx) goto out; } - /* if new SMB3.11 POSIX extensions are supported do not remap / and \ */ - if (tcon->posix_extensions) + /* + * if new SMB3.11 POSIX extensions are supported, do not change anything in the + * path (i.e., do not remap / and \ and do not map any special characters) + */ + if (tcon->posix_extensions) { cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_POSIX_PATHS; + cifs_sb->mnt_cifs_flags &= ~(CIFS_MOUNT_MAP_SFM_CHR | + CIFS_MOUNT_MAP_SPECIAL_CHR); + } #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY /* tell server which Unix caps we support */ @@ -4193,6 +4199,7 @@ retry: return 0; } + server->lstrp = jiffies; server->tcpStatus = CifsInNegotiate; spin_unlock(&server->srv_lock); diff --git a/fs/smb/client/file.c b/fs/smb/client/file.c index d2df10b8e6fd..e9212da32f01 100644 --- a/fs/smb/client/file.c +++ b/fs/smb/client/file.c @@ -52,6 +52,7 @@ static void cifs_prepare_write(struct netfs_io_subrequest *subreq) struct netfs_io_stream *stream = &req->rreq.io_streams[subreq->stream_nr]; struct TCP_Server_Info *server; struct cifsFileInfo *open_file = req->cfile; + struct cifs_sb_info *cifs_sb = CIFS_SB(wdata->rreq->inode->i_sb); size_t wsize = req->rreq.wsize; int rc; @@ -63,6 +64,10 @@ static void cifs_prepare_write(struct netfs_io_subrequest *subreq) server = cifs_pick_channel(tlink_tcon(open_file->tlink)->ses); wdata->server = server; + if (cifs_sb->ctx->wsize == 0) + cifs_negotiate_wsize(server, cifs_sb->ctx, + tlink_tcon(req->cfile->tlink)); + retry: if (open_file->invalidHandle) { rc = cifs_reopen_file(open_file, false); @@ -160,10 +165,9 @@ static int cifs_prepare_read(struct netfs_io_subrequest *subreq) server = cifs_pick_channel(tlink_tcon(req->cfile->tlink)->ses); rdata->server = server; - if (cifs_sb->ctx->rsize == 0) { + if (cifs_sb->ctx->rsize == 0) cifs_negotiate_rsize(server, cifs_sb->ctx, tlink_tcon(req->cfile->tlink)); - } rc = server->ops->wait_mtu_credits(server, cifs_sb->ctx->rsize, &size, &rdata->credits); @@ -999,15 +1003,18 @@ int cifs_open(struct inode *inode, struct file *file) rc = cifs_get_readable_path(tcon, full_path, &cfile); } if (rc == 0) { - if (file->f_flags == cfile->f_flags) { + unsigned int oflags = file->f_flags & ~(O_CREAT|O_EXCL|O_TRUNC); + unsigned int cflags = cfile->f_flags & ~(O_CREAT|O_EXCL|O_TRUNC); + + if (cifs_convert_flags(oflags, 0) == cifs_convert_flags(cflags, 0) && + (oflags & (O_SYNC|O_DIRECT)) == (cflags & (O_SYNC|O_DIRECT))) { file->private_data = cfile; spin_lock(&CIFS_I(inode)->deferred_lock); cifs_del_deferred_close(cfile); spin_unlock(&CIFS_I(inode)->deferred_lock); goto use_cache; - } else { - _cifsFileInfo_put(cfile, true, false); } + _cifsFileInfo_put(cfile, true, false); } else { /* hard link on the defeered close file */ rc = cifs_get_hardlink_path(tcon, inode, file); diff --git a/fs/smb/client/ioctl.c b/fs/smb/client/ioctl.c index 56439da4f119..0a9935ce05a5 100644 --- a/fs/smb/client/ioctl.c +++ b/fs/smb/client/ioctl.c @@ -506,7 +506,7 @@ long cifs_ioctl(struct file *filep, unsigned int command, unsigned long arg) le16_to_cpu(tcon->ses->server->cipher_type); pkey_inf.Suid = tcon->ses->Suid; memcpy(pkey_inf.auth_key, tcon->ses->auth_key.response, - 16 /* SMB2_NTLMV2_SESSKEY_SIZE */); + SMB2_NTLMV2_SESSKEY_SIZE); memcpy(pkey_inf.smb3decryptionkey, tcon->ses->smb3decryptionkey, SMB3_SIGN_KEY_SIZE); memcpy(pkey_inf.smb3encryptionkey, diff --git a/fs/smb/client/readdir.c b/fs/smb/client/readdir.c index f9f11cbf89be..ba0193cf9033 100644 --- a/fs/smb/client/readdir.c +++ b/fs/smb/client/readdir.c @@ -851,9 +851,9 @@ static bool emit_cached_dirents(struct cached_dirents *cde, } static void update_cached_dirents_count(struct cached_dirents *cde, - struct dir_context *ctx) + struct file *file) { - if (cde->ctx != ctx) + if (cde->file != file) return; if (cde->is_valid || cde->is_failed) return; @@ -862,9 +862,9 @@ static void update_cached_dirents_count(struct cached_dirents *cde, } static void finished_cached_dirents_count(struct cached_dirents *cde, - struct dir_context *ctx) + struct dir_context *ctx, struct file *file) { - if (cde->ctx != ctx) + if (cde->file != file) return; if (cde->is_valid || cde->is_failed) return; @@ -877,11 +877,12 @@ static void finished_cached_dirents_count(struct cached_dirents *cde, static void add_cached_dirent(struct cached_dirents *cde, struct dir_context *ctx, const char *name, int namelen, - struct cifs_fattr *fattr) + struct cifs_fattr *fattr, + struct file *file) { struct cached_dirent *de; - if (cde->ctx != ctx) + if (cde->file != file) return; if (cde->is_valid || cde->is_failed) return; @@ -911,7 +912,8 @@ static void add_cached_dirent(struct cached_dirents *cde, static bool cifs_dir_emit(struct dir_context *ctx, const char *name, int namelen, struct cifs_fattr *fattr, - struct cached_fid *cfid) + struct cached_fid *cfid, + struct file *file) { bool rc; ino_t ino = cifs_uniqueid_to_ino_t(fattr->cf_uniqueid); @@ -923,7 +925,7 @@ static bool cifs_dir_emit(struct dir_context *ctx, if (cfid) { mutex_lock(&cfid->dirents.de_mutex); add_cached_dirent(&cfid->dirents, ctx, name, namelen, - fattr); + fattr, file); mutex_unlock(&cfid->dirents.de_mutex); } @@ -1023,7 +1025,7 @@ static int cifs_filldir(char *find_entry, struct file *file, cifs_prime_dcache(file_dentry(file), &name, &fattr); return !cifs_dir_emit(ctx, name.name, name.len, - &fattr, cfid); + &fattr, cfid, file); } @@ -1074,8 +1076,8 @@ int cifs_readdir(struct file *file, struct dir_context *ctx) * we need to initialize scanning and storing the * directory content. */ - if (ctx->pos == 0 && cfid->dirents.ctx == NULL) { - cfid->dirents.ctx = ctx; + if (ctx->pos == 0 && cfid->dirents.file == NULL) { + cfid->dirents.file = file; cfid->dirents.pos = 2; } /* @@ -1143,7 +1145,7 @@ int cifs_readdir(struct file *file, struct dir_context *ctx) } else { if (cfid) { mutex_lock(&cfid->dirents.de_mutex); - finished_cached_dirents_count(&cfid->dirents, ctx); + finished_cached_dirents_count(&cfid->dirents, ctx, file); mutex_unlock(&cfid->dirents.de_mutex); } cifs_dbg(FYI, "Could not find entry\n"); @@ -1184,7 +1186,7 @@ int cifs_readdir(struct file *file, struct dir_context *ctx) ctx->pos++; if (cfid) { mutex_lock(&cfid->dirents.de_mutex); - update_cached_dirents_count(&cfid->dirents, ctx); + update_cached_dirents_count(&cfid->dirents, file); mutex_unlock(&cfid->dirents.de_mutex); } diff --git a/fs/smb/client/reparse.c b/fs/smb/client/reparse.c index bb25e77c5540..511611206dab 100644 --- a/fs/smb/client/reparse.c +++ b/fs/smb/client/reparse.c @@ -1172,7 +1172,6 @@ out: if (!have_xattr_dev && (tag == IO_REPARSE_TAG_LX_CHR || tag == IO_REPARSE_TAG_LX_BLK)) return false; - fattr->cf_dtype = S_DT(fattr->cf_mode); return true; } diff --git a/fs/smb/client/sess.c b/fs/smb/client/sess.c index ec0db32c7d98..330bc3d25bad 100644 --- a/fs/smb/client/sess.c +++ b/fs/smb/client/sess.c @@ -498,8 +498,7 @@ cifs_ses_add_channel(struct cifs_ses *ses, ctx->domainauto = ses->domainAuto; ctx->domainname = ses->domainName; - /* no hostname for extra channels */ - ctx->server_hostname = ""; + ctx->server_hostname = ses->server->hostname; ctx->username = ses->user_name; ctx->password = ses->password; diff --git a/fs/smb/client/smbdirect.c b/fs/smb/client/smbdirect.c index 5ae847919da5..cbc85bca006f 100644 --- a/fs/smb/client/smbdirect.c +++ b/fs/smb/client/smbdirect.c @@ -2589,13 +2589,14 @@ static ssize_t smb_extract_folioq_to_rdma(struct iov_iter *iter, size_t fsize = folioq_folio_size(folioq, slot); if (offset < fsize) { - size_t part = umin(maxsize - ret, fsize - offset); + size_t part = umin(maxsize, fsize - offset); if (!smb_set_sge(rdma, folio_page(folio, 0), offset, part)) return -EIO; offset += part; ret += part; + maxsize -= part; } if (offset >= fsize) { @@ -2610,7 +2611,7 @@ static ssize_t smb_extract_folioq_to_rdma(struct iov_iter *iter, slot = 0; } } - } while (rdma->nr_sge < rdma->max_sge || maxsize > 0); + } while (rdma->nr_sge < rdma->max_sge && maxsize > 0); iter->folioq = folioq; iter->folioq_slot = slot; diff --git a/fs/smb/server/connection.c b/fs/smb/server/connection.c index 83764c230e9d..3f04a2977ba8 100644 --- a/fs/smb/server/connection.c +++ b/fs/smb/server/connection.c @@ -40,7 +40,7 @@ void ksmbd_conn_free(struct ksmbd_conn *conn) kvfree(conn->request_buf); kfree(conn->preauth_info); if (atomic_dec_and_test(&conn->refcnt)) { - ksmbd_free_transport(conn->transport); + conn->transport->ops->free_transport(conn->transport); kfree(conn); } } diff --git a/fs/smb/server/connection.h b/fs/smb/server/connection.h index 6efed923bd68..dd3e0e3f7bf0 100644 --- a/fs/smb/server/connection.h +++ b/fs/smb/server/connection.h @@ -133,6 +133,7 @@ struct ksmbd_transport_ops { void *buf, unsigned int len, struct smb2_buffer_desc_v1 *desc, unsigned int desc_len); + void (*free_transport)(struct ksmbd_transport *kt); }; struct ksmbd_transport { diff --git a/fs/smb/server/smb2pdu.c b/fs/smb/server/smb2pdu.c index 1a308171b599..fafa86273f12 100644 --- a/fs/smb/server/smb2pdu.c +++ b/fs/smb/server/smb2pdu.c @@ -1607,17 +1607,18 @@ static int krb5_authenticate(struct ksmbd_work *work, out_len = work->response_sz - (le16_to_cpu(rsp->SecurityBufferOffset) + 4); - /* Check previous session */ - prev_sess_id = le64_to_cpu(req->PreviousSessionId); - if (prev_sess_id && prev_sess_id != sess->id) - destroy_previous_session(conn, sess->user, prev_sess_id); - retval = ksmbd_krb5_authenticate(sess, in_blob, in_len, out_blob, &out_len); if (retval) { ksmbd_debug(SMB, "krb5 authentication failed\n"); return -EINVAL; } + + /* Check previous session */ + prev_sess_id = le64_to_cpu(req->PreviousSessionId); + if (prev_sess_id && prev_sess_id != sess->id) + destroy_previous_session(conn, sess->user, prev_sess_id); + rsp->SecurityBufferLength = cpu_to_le16(out_len); if ((conn->sign || server_conf.enforced_signing) || @@ -4871,8 +4872,13 @@ static int get_file_standard_info(struct smb2_query_info_rsp *rsp, sinfo = (struct smb2_file_standard_info *)rsp->Buffer; delete_pending = ksmbd_inode_pending_delete(fp); - sinfo->AllocationSize = cpu_to_le64(stat.blocks << 9); - sinfo->EndOfFile = S_ISDIR(stat.mode) ? 0 : cpu_to_le64(stat.size); + if (ksmbd_stream_fd(fp) == false) { + sinfo->AllocationSize = cpu_to_le64(stat.blocks << 9); + sinfo->EndOfFile = S_ISDIR(stat.mode) ? 0 : cpu_to_le64(stat.size); + } else { + sinfo->AllocationSize = cpu_to_le64(fp->stream.size); + sinfo->EndOfFile = cpu_to_le64(fp->stream.size); + } sinfo->NumberOfLinks = cpu_to_le32(get_nlink(&stat) - delete_pending); sinfo->DeletePending = delete_pending; sinfo->Directory = S_ISDIR(stat.mode) ? 1 : 0; @@ -4935,9 +4941,14 @@ static int get_file_all_info(struct ksmbd_work *work, file_info->ChangeTime = cpu_to_le64(time); file_info->Attributes = fp->f_ci->m_fattr; file_info->Pad1 = 0; - file_info->AllocationSize = - cpu_to_le64(stat.blocks << 9); - file_info->EndOfFile = S_ISDIR(stat.mode) ? 0 : cpu_to_le64(stat.size); + if (ksmbd_stream_fd(fp) == false) { + file_info->AllocationSize = + cpu_to_le64(stat.blocks << 9); + file_info->EndOfFile = S_ISDIR(stat.mode) ? 0 : cpu_to_le64(stat.size); + } else { + file_info->AllocationSize = cpu_to_le64(fp->stream.size); + file_info->EndOfFile = cpu_to_le64(fp->stream.size); + } file_info->NumberOfLinks = cpu_to_le32(get_nlink(&stat) - delete_pending); file_info->DeletePending = delete_pending; @@ -4946,7 +4957,10 @@ static int get_file_all_info(struct ksmbd_work *work, file_info->IndexNumber = cpu_to_le64(stat.ino); file_info->EASize = 0; file_info->AccessFlags = fp->daccess; - file_info->CurrentByteOffset = cpu_to_le64(fp->filp->f_pos); + if (ksmbd_stream_fd(fp) == false) + file_info->CurrentByteOffset = cpu_to_le64(fp->filp->f_pos); + else + file_info->CurrentByteOffset = cpu_to_le64(fp->stream.pos); file_info->Mode = fp->coption; file_info->AlignmentRequirement = 0; conv_len = smbConvertToUTF16((__le16 *)file_info->FileName, filename, @@ -5134,8 +5148,13 @@ static int get_file_network_open_info(struct smb2_query_info_rsp *rsp, time = ksmbd_UnixTimeToNT(stat.ctime); file_info->ChangeTime = cpu_to_le64(time); file_info->Attributes = fp->f_ci->m_fattr; - file_info->AllocationSize = cpu_to_le64(stat.blocks << 9); - file_info->EndOfFile = S_ISDIR(stat.mode) ? 0 : cpu_to_le64(stat.size); + if (ksmbd_stream_fd(fp) == false) { + file_info->AllocationSize = cpu_to_le64(stat.blocks << 9); + file_info->EndOfFile = S_ISDIR(stat.mode) ? 0 : cpu_to_le64(stat.size); + } else { + file_info->AllocationSize = cpu_to_le64(fp->stream.size); + file_info->EndOfFile = cpu_to_le64(fp->stream.size); + } file_info->Reserved = cpu_to_le32(0); rsp->OutputBufferLength = cpu_to_le32(sizeof(struct smb2_file_ntwrk_info)); @@ -5158,7 +5177,11 @@ static void get_file_position_info(struct smb2_query_info_rsp *rsp, struct smb2_file_pos_info *file_info; file_info = (struct smb2_file_pos_info *)rsp->Buffer; - file_info->CurrentByteOffset = cpu_to_le64(fp->filp->f_pos); + if (ksmbd_stream_fd(fp) == false) + file_info->CurrentByteOffset = cpu_to_le64(fp->filp->f_pos); + else + file_info->CurrentByteOffset = cpu_to_le64(fp->stream.pos); + rsp->OutputBufferLength = cpu_to_le32(sizeof(struct smb2_file_pos_info)); } @@ -5247,8 +5270,13 @@ static int find_file_posix_info(struct smb2_query_info_rsp *rsp, file_info->ChangeTime = cpu_to_le64(time); file_info->DosAttributes = fp->f_ci->m_fattr; file_info->Inode = cpu_to_le64(stat.ino); - file_info->EndOfFile = cpu_to_le64(stat.size); - file_info->AllocationSize = cpu_to_le64(stat.blocks << 9); + if (ksmbd_stream_fd(fp) == false) { + file_info->EndOfFile = cpu_to_le64(stat.size); + file_info->AllocationSize = cpu_to_le64(stat.blocks << 9); + } else { + file_info->EndOfFile = cpu_to_le64(fp->stream.size); + file_info->AllocationSize = cpu_to_le64(fp->stream.size); + } file_info->HardLinks = cpu_to_le32(stat.nlink); file_info->Mode = cpu_to_le32(stat.mode & 0777); switch (stat.mode & S_IFMT) { @@ -6190,6 +6218,9 @@ static int set_file_allocation_info(struct ksmbd_work *work, if (!(fp->daccess & FILE_WRITE_DATA_LE)) return -EACCES; + if (ksmbd_stream_fd(fp) == true) + return 0; + rc = vfs_getattr(&fp->filp->f_path, &stat, STATX_BASIC_STATS, AT_STATX_SYNC_AS_STAT); if (rc) @@ -6248,7 +6279,8 @@ static int set_end_of_file_info(struct ksmbd_work *work, struct ksmbd_file *fp, * truncate of some filesystem like FAT32 fill zero data in * truncated range. */ - if (inode->i_sb->s_magic != MSDOS_SUPER_MAGIC) { + if (inode->i_sb->s_magic != MSDOS_SUPER_MAGIC && + ksmbd_stream_fd(fp) == false) { ksmbd_debug(SMB, "truncated to newsize %lld\n", newsize); rc = ksmbd_vfs_truncate(work, fp, newsize); if (rc) { @@ -6321,7 +6353,13 @@ static int set_file_position_info(struct ksmbd_file *fp, return -EINVAL; } - fp->filp->f_pos = current_byte_offset; + if (ksmbd_stream_fd(fp) == false) + fp->filp->f_pos = current_byte_offset; + else { + if (current_byte_offset > XATTR_SIZE_MAX) + current_byte_offset = XATTR_SIZE_MAX; + fp->stream.pos = current_byte_offset; + } return 0; } diff --git a/fs/smb/server/transport_rdma.c b/fs/smb/server/transport_rdma.c index 4998df04ab95..64a428a06ace 100644 --- a/fs/smb/server/transport_rdma.c +++ b/fs/smb/server/transport_rdma.c @@ -159,7 +159,8 @@ struct smb_direct_transport { }; #define KSMBD_TRANS(t) ((struct ksmbd_transport *)&((t)->transport)) - +#define SMBD_TRANS(t) ((struct smb_direct_transport *)container_of(t, \ + struct smb_direct_transport, transport)) enum { SMB_DIRECT_MSG_NEGOTIATE_REQ = 0, SMB_DIRECT_MSG_DATA_TRANSFER @@ -410,6 +411,11 @@ err: return NULL; } +static void smb_direct_free_transport(struct ksmbd_transport *kt) +{ + kfree(SMBD_TRANS(kt)); +} + static void free_transport(struct smb_direct_transport *t) { struct smb_direct_recvmsg *recvmsg; @@ -455,7 +461,6 @@ static void free_transport(struct smb_direct_transport *t) smb_direct_destroy_pools(t); ksmbd_conn_free(KSMBD_TRANS(t)->conn); - kfree(t); } static struct smb_direct_sendmsg @@ -2281,4 +2286,5 @@ static const struct ksmbd_transport_ops ksmbd_smb_direct_transport_ops = { .read = smb_direct_read, .rdma_read = smb_direct_rdma_read, .rdma_write = smb_direct_rdma_write, + .free_transport = smb_direct_free_transport, }; diff --git a/fs/smb/server/transport_tcp.c b/fs/smb/server/transport_tcp.c index abedf510899a..4e9f98db9ff4 100644 --- a/fs/smb/server/transport_tcp.c +++ b/fs/smb/server/transport_tcp.c @@ -93,7 +93,7 @@ static struct tcp_transport *alloc_transport(struct socket *client_sk) return t; } -void ksmbd_free_transport(struct ksmbd_transport *kt) +static void ksmbd_tcp_free_transport(struct ksmbd_transport *kt) { struct tcp_transport *t = TCP_TRANS(kt); @@ -656,4 +656,5 @@ static const struct ksmbd_transport_ops ksmbd_tcp_transport_ops = { .read = ksmbd_tcp_read, .writev = ksmbd_tcp_writev, .disconnect = ksmbd_tcp_disconnect, + .free_transport = ksmbd_tcp_free_transport, }; diff --git a/fs/smb/server/vfs.c b/fs/smb/server/vfs.c index ba45e809555a..0f3aad12e495 100644 --- a/fs/smb/server/vfs.c +++ b/fs/smb/server/vfs.c @@ -293,6 +293,7 @@ static int ksmbd_vfs_stream_read(struct ksmbd_file *fp, char *buf, loff_t *pos, if (v_len - *pos < count) count = v_len - *pos; + fp->stream.pos = v_len; memcpy(buf, &stream_buf[*pos], count); @@ -456,8 +457,8 @@ static int ksmbd_vfs_stream_write(struct ksmbd_file *fp, char *buf, loff_t *pos, true); if (err < 0) goto out; - - fp->filp->f_pos = *pos; + else + fp->stream.pos = size; err = 0; out: kvfree(stream_buf); diff --git a/fs/smb/server/vfs_cache.h b/fs/smb/server/vfs_cache.h index 5bbb179736c2..0708155b5caf 100644 --- a/fs/smb/server/vfs_cache.h +++ b/fs/smb/server/vfs_cache.h @@ -44,6 +44,7 @@ struct ksmbd_lock { struct stream { char *name; ssize_t size; + loff_t pos; }; struct ksmbd_inode { diff --git a/fs/super.c b/fs/super.c index 21799e213fd7..80418ca8e215 100644 --- a/fs/super.c +++ b/fs/super.c @@ -964,8 +964,10 @@ void iterate_supers_type(struct file_system_type *type, spin_unlock(&sb_lock); locked = super_lock_shared(sb); - if (locked) + if (locked) { f(sb, arg); + super_unlock_shared(sb); + } spin_lock(&sb_lock); if (p) diff --git a/fs/xattr.c b/fs/xattr.c index 8ec5b0204bfd..600ae97969cf 100644 --- a/fs/xattr.c +++ b/fs/xattr.c @@ -1479,6 +1479,7 @@ ssize_t simple_xattr_list(struct inode *inode, struct simple_xattrs *xattrs, buffer += err; } remaining_size -= err; + err = 0; read_lock(&xattrs->lock); for (rbp = rb_first(&xattrs->rb_root); rbp; rbp = rb_next(rbp)) { diff --git a/include/crypto/hash.h b/include/crypto/hash.h index 6f6b9de12cd3..db294d452e8c 100644 --- a/include/crypto/hash.h +++ b/include/crypto/hash.h @@ -202,6 +202,8 @@ struct shash_desc { #define HASH_REQUEST_CLONE(name, gfp) \ hash_request_clone(name, sizeof(__##name##_req), gfp) +#define CRYPTO_HASH_STATESIZE(coresize, blocksize) (coresize + blocksize + 1) + /** * struct shash_alg - synchronous message digest definition * @init: see struct ahash_alg diff --git a/include/crypto/internal/simd.h b/include/crypto/internal/simd.h index 7e7f1ac3b7fd..9e338e7aafbd 100644 --- a/include/crypto/internal/simd.h +++ b/include/crypto/internal/simd.h @@ -44,9 +44,11 @@ void simd_unregister_aeads(struct aead_alg *algs, int count, * * This delegates to may_use_simd(), except that this also returns false if SIMD * in crypto code has been temporarily disabled on this CPU by the crypto - * self-tests, in order to test the no-SIMD fallback code. + * self-tests, in order to test the no-SIMD fallback code. This override is + * currently limited to configurations where the "full" self-tests are enabled, + * because it might be a bit too invasive to be part of the "fast" self-tests. */ -#ifdef CONFIG_CRYPTO_SELFTESTS +#ifdef CONFIG_CRYPTO_SELFTESTS_FULL DECLARE_PER_CPU(bool, crypto_simd_disabled_for_test); #define crypto_simd_usable() \ (may_use_simd() && !this_cpu_read(crypto_simd_disabled_for_test)) diff --git a/include/crypto/md5.h b/include/crypto/md5.h index 198b5d69b92f..28ee533a0507 100644 --- a/include/crypto/md5.h +++ b/include/crypto/md5.h @@ -2,6 +2,7 @@ #ifndef _CRYPTO_MD5_H #define _CRYPTO_MD5_H +#include <crypto/hash.h> #include <linux/types.h> #define MD5_DIGEST_SIZE 16 @@ -15,6 +16,9 @@ #define MD5_H2 0x98badcfeUL #define MD5_H3 0x10325476UL +#define CRYPTO_MD5_STATESIZE \ + CRYPTO_HASH_STATESIZE(MD5_STATE_SIZE, MD5_HMAC_BLOCK_SIZE) + extern const u8 md5_zero_message_hash[MD5_DIGEST_SIZE]; struct md5_state { diff --git a/include/linux/atmdev.h b/include/linux/atmdev.h index 9b02961d65ee..45f2f278b50a 100644 --- a/include/linux/atmdev.h +++ b/include/linux/atmdev.h @@ -249,6 +249,12 @@ static inline void atm_account_tx(struct atm_vcc *vcc, struct sk_buff *skb) ATM_SKB(skb)->atm_options = vcc->atm_options; } +static inline void atm_return_tx(struct atm_vcc *vcc, struct sk_buff *skb) +{ + WARN_ON_ONCE(refcount_sub_and_test(ATM_SKB(skb)->acct_truesize, + &sk_atm(vcc)->sk_wmem_alloc)); +} + static inline void atm_force_charge(struct atm_vcc *vcc,int truesize) { atomic_add(truesize, &sk_atm(vcc)->sk_rmem_alloc); diff --git a/include/linux/bio.h b/include/linux/bio.h index 9c37c66ef9ca..46ffac5caab7 100644 --- a/include/linux/bio.h +++ b/include/linux/bio.h @@ -291,7 +291,7 @@ static inline void bio_first_folio(struct folio_iter *fi, struct bio *bio, fi->folio = page_folio(bvec->bv_page); fi->offset = bvec->bv_offset + - PAGE_SIZE * (bvec->bv_page - &fi->folio->page); + PAGE_SIZE * folio_page_idx(fi->folio, bvec->bv_page); fi->_seg_count = bvec->bv_len; fi->length = min(folio_size(fi->folio) - fi->offset, fi->_seg_count); fi->_next = folio_next(fi->folio); diff --git a/include/linux/bpf-cgroup-defs.h b/include/linux/bpf-cgroup-defs.h index 0985221d5478..c9e6b26abab6 100644 --- a/include/linux/bpf-cgroup-defs.h +++ b/include/linux/bpf-cgroup-defs.h @@ -63,6 +63,7 @@ struct cgroup_bpf { */ struct hlist_head progs[MAX_CGROUP_BPF_ATTACH_TYPE]; u8 flags[MAX_CGROUP_BPF_ATTACH_TYPE]; + u64 revisions[MAX_CGROUP_BPF_ATTACH_TYPE]; /* list of cgroup shared storages */ struct list_head storages; diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 5b25d278409b..5dd556e89cce 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -2288,6 +2288,9 @@ bpf_prog_run_array_uprobe(const struct bpf_prog_array *array, return ret; } +bool bpf_jit_bypass_spec_v1(void); +bool bpf_jit_bypass_spec_v4(void); + #ifdef CONFIG_BPF_SYSCALL DECLARE_PER_CPU(int, bpf_prog_active); extern struct mutex bpf_stats_enabled_mutex; @@ -2475,12 +2478,16 @@ static inline bool bpf_allow_uninit_stack(const struct bpf_token *token) static inline bool bpf_bypass_spec_v1(const struct bpf_token *token) { - return cpu_mitigations_off() || bpf_token_capable(token, CAP_PERFMON); + return bpf_jit_bypass_spec_v1() || + cpu_mitigations_off() || + bpf_token_capable(token, CAP_PERFMON); } static inline bool bpf_bypass_spec_v4(const struct bpf_token *token) { - return cpu_mitigations_off() || bpf_token_capable(token, CAP_PERFMON); + return bpf_jit_bypass_spec_v4() || + cpu_mitigations_off() || + bpf_token_capable(token, CAP_PERFMON); } int bpf_map_new_fd(struct bpf_map *map, int flags); diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h index 256274acb1d8..7e459e839f8b 100644 --- a/include/linux/bpf_verifier.h +++ b/include/linux/bpf_verifier.h @@ -344,7 +344,7 @@ struct bpf_func_state { #define MAX_CALL_FRAMES 8 -/* instruction history flags, used in bpf_insn_hist_entry.flags field */ +/* instruction history flags, used in bpf_jmp_history_entry.flags field */ enum { /* instruction references stack slot through PTR_TO_STACK register; * we also store stack's frame number in lower 3 bits (MAX_CALL_FRAMES is 8) @@ -366,7 +366,7 @@ enum { static_assert(INSN_F_FRAMENO_MASK + 1 >= MAX_CALL_FRAMES); static_assert(INSN_F_SPI_MASK + 1 >= MAX_BPF_STACK / 8); -struct bpf_insn_hist_entry { +struct bpf_jmp_history_entry { u32 idx; /* insn idx can't be bigger than 1 million */ u32 prev_idx : 20; @@ -449,32 +449,20 @@ struct bpf_verifier_state { /* first and last insn idx of this verifier state */ u32 first_insn_idx; u32 last_insn_idx; - /* If this state is a part of states loop this field points to some - * parent of this state such that: - * - it is also a member of the same states loop; - * - DFS states traversal starting from initial state visits loop_entry - * state before this state. - * Used to compute topmost loop entry for state loops. - * State loops might appear because of open coded iterators logic. - * See get_loop_entry() for more information. + /* if this state is a backedge state then equal_state + * records cached state to which this state is equal. */ - struct bpf_verifier_state *loop_entry; - /* Sub-range of env->insn_hist[] corresponding to this state's - * instruction history. - * Backtracking is using it to go from last to first. - * For most states instruction history is short, 0-3 instructions. + struct bpf_verifier_state *equal_state; + /* jmp history recorded from first to last. + * backtracking is using it to go from last to first. + * For most states jmp_history_cnt is [0-3]. * For loops can go up to ~40. */ - u32 insn_hist_start; - u32 insn_hist_end; + struct bpf_jmp_history_entry *jmp_history; + u32 jmp_history_cnt; u32 dfs_depth; u32 callback_unroll_depth; u32 may_goto_depth; - /* If this state was ever pointed-to by other state's loop_entry field - * this flag would be set to true. Used to avoid freeing such states - * while they are still in use. - */ - u32 used_as_loop_entry; }; #define bpf_get_spilled_reg(slot, frame, mask) \ @@ -580,7 +568,8 @@ struct bpf_insn_aux_data { u64 map_key_state; /* constant (32 bit) key tracking for maps */ int ctx_field_size; /* the ctx field size for load insn, maybe 0 */ u32 seen; /* this insn was processed by the verifier at env->pass_cnt */ - bool sanitize_stack_spill; /* subject to Spectre v4 sanitation */ + bool nospec; /* do not execute this instruction speculatively */ + bool nospec_result; /* result is unsafe under speculation, nospec must follow */ bool zext_dst; /* this insn zero extends dst reg */ bool needs_zext; /* alu op needs to clear upper bits */ bool storage_get_func_atomic; /* bpf_*_storage_get() with atomic memory alloc */ @@ -609,6 +598,11 @@ struct bpf_insn_aux_data { * accepts callback function as a parameter. */ bool calls_callback; + /* + * CFG strongly connected component this instruction belongs to, + * zero if it is a singleton SCC. + */ + u32 scc; /* registers alive before this instruction. */ u16 live_regs_before; }; @@ -718,6 +712,38 @@ struct bpf_idset { u32 ids[BPF_ID_MAP_SIZE]; }; +/* see verifier.c:compute_scc_callchain() */ +struct bpf_scc_callchain { + /* call sites from bpf_verifier_state->frame[*]->callsite leading to this SCC */ + u32 callsites[MAX_CALL_FRAMES - 1]; + /* last frame in a chain is identified by SCC id */ + u32 scc; +}; + +/* verifier state waiting for propagate_backedges() */ +struct bpf_scc_backedge { + struct bpf_scc_backedge *next; + struct bpf_verifier_state state; +}; + +struct bpf_scc_visit { + struct bpf_scc_callchain callchain; + /* first state in current verification path that entered SCC + * identified by the callchain + */ + struct bpf_verifier_state *entry_state; + struct bpf_scc_backedge *backedges; /* list of backedges */ + u32 num_backedges; +}; + +/* An array of bpf_scc_visit structs sharing tht same bpf_scc_callchain->scc + * but having different bpf_scc_callchain->callsites. + */ +struct bpf_scc_info { + u32 num_visits; + struct bpf_scc_visit visits[]; +}; + /* single container for all structs * one verifier_env per bpf_check() call */ @@ -775,9 +801,7 @@ struct bpf_verifier_env { int cur_postorder; } cfg; struct backtrack_state bt; - struct bpf_insn_hist_entry *insn_hist; - struct bpf_insn_hist_entry *cur_hist_ent; - u32 insn_hist_cap; + struct bpf_jmp_history_entry *cur_hist_ent; u32 pass_cnt; /* number of times do_check() was called */ u32 subprog_cnt; /* number of instructions analyzed by the verifier */ @@ -799,6 +823,7 @@ struct bpf_verifier_env { u32 longest_mark_read_walk; u32 free_list_size; u32 explored_states_size; + u32 num_backedges; bpfptr_t fd_array; /* bit mask to keep track of whether a register has been accessed @@ -816,6 +841,9 @@ struct bpf_verifier_env { char tmp_str_buf[TMP_STR_BUF_LEN]; struct bpf_insn insn_buf[INSN_BUF_SIZE]; struct bpf_insn epilogue_buf[INSN_BUF_SIZE]; + /* array of pointers to bpf_scc_info indexed by SCC id */ + struct bpf_scc_info **scc_info; + u32 scc_cnt; }; static inline struct bpf_func_info_aux *subprog_aux(struct bpf_verifier_env *env, int subprog) diff --git a/include/linux/btf.h b/include/linux/btf.h index b2983706292f..a40beb9cf160 100644 --- a/include/linux/btf.h +++ b/include/linux/btf.h @@ -221,6 +221,8 @@ bool btf_is_vmlinux(const struct btf *btf); struct module *btf_try_get_module(const struct btf *btf); u32 btf_nr_types(const struct btf *btf); struct btf *btf_base_btf(const struct btf *btf); +bool btf_type_is_i32(const struct btf_type *t); +bool btf_type_is_i64(const struct btf_type *t); bool btf_member_is_reg_int(const struct btf *btf, const struct btf_type *s, const struct btf_member *m, u32 expected_offset, u32 expected_size); diff --git a/include/linux/bvec.h b/include/linux/bvec.h index 204b22a99c4b..0a80e1f9aa20 100644 --- a/include/linux/bvec.h +++ b/include/linux/bvec.h @@ -57,9 +57,12 @@ static inline void bvec_set_page(struct bio_vec *bv, struct page *page, * @offset: offset into the folio */ static inline void bvec_set_folio(struct bio_vec *bv, struct folio *folio, - unsigned int len, unsigned int offset) + size_t len, size_t offset) { - bvec_set_page(bv, &folio->page, len, offset); + unsigned long nr = offset / PAGE_SIZE; + + WARN_ON_ONCE(len > UINT_MAX); + bvec_set_page(bv, folio_page(folio, nr), len, offset % PAGE_SIZE); } /** diff --git a/include/linux/cpu.h b/include/linux/cpu.h index e6089abc28e2..96a3a0d6a60e 100644 --- a/include/linux/cpu.h +++ b/include/linux/cpu.h @@ -120,6 +120,7 @@ extern void cpu_maps_update_begin(void); extern void cpu_maps_update_done(void); int bringup_hibernate_cpu(unsigned int sleep_cpu); void bringup_nonboot_cpus(unsigned int max_cpus); +int arch_cpu_rescan_dead_smt_siblings(void); #else /* CONFIG_SMP */ #define cpuhp_tasks_frozen 0 @@ -134,6 +135,8 @@ static inline void cpu_maps_update_done(void) static inline int add_cpu(unsigned int cpu) { return 0;} +static inline int arch_cpu_rescan_dead_smt_siblings(void) { return 0; } + #endif /* CONFIG_SMP */ extern const struct bus_type cpu_subsys; diff --git a/include/linux/execmem.h b/include/linux/execmem.h index ca42d5e46ccc..3be35680a54f 100644 --- a/include/linux/execmem.h +++ b/include/linux/execmem.h @@ -54,7 +54,7 @@ enum execmem_range_flags { EXECMEM_ROX_CACHE = (1 << 1), }; -#if defined(CONFIG_ARCH_HAS_EXECMEM_ROX) && defined(CONFIG_EXECMEM) +#ifdef CONFIG_ARCH_HAS_EXECMEM_ROX /** * execmem_fill_trapping_insns - set memory to contain instructions that * will trap @@ -94,15 +94,9 @@ int execmem_make_temp_rw(void *ptr, size_t size); * Return: 0 on success or negative error code on failure. */ int execmem_restore_rox(void *ptr, size_t size); - -/* - * Called from mark_readonly(), where the system transitions to ROX. - */ -void execmem_cache_make_ro(void); #else static inline int execmem_make_temp_rw(void *ptr, size_t size) { return 0; } static inline int execmem_restore_rox(void *ptr, size_t size) { return 0; } -static inline void execmem_cache_make_ro(void) { } #endif /** diff --git a/include/linux/filter.h b/include/linux/filter.h index f5cf4d35d83e..eca229752cbe 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -82,7 +82,7 @@ struct ctl_table_header; #define BPF_CALL_ARGS 0xe0 /* unused opcode to mark speculation barrier for mitigating - * Speculative Store Bypass + * Spectre v1 and v4 */ #define BPF_NOSPEC 0xc0 diff --git a/include/linux/fs.h b/include/linux/fs.h index 96c7925a6551..b085f161ed22 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -399,7 +399,9 @@ struct readahead_control; { IOCB_WAITQ, "WAITQ" }, \ { IOCB_NOIO, "NOIO" }, \ { IOCB_ALLOC_CACHE, "ALLOC_CACHE" }, \ - { IOCB_DIO_CALLER_COMP, "CALLER_COMP" } + { IOCB_DIO_CALLER_COMP, "CALLER_COMP" }, \ + { IOCB_AIO_RW, "AIO_RW" }, \ + { IOCB_HAS_METADATA, "AIO_HAS_METADATA" } struct kiocb { struct file *ki_filp; @@ -2274,10 +2276,12 @@ static inline bool file_has_valid_mmap_hooks(struct file *file) return true; } +int compat_vma_mmap_prepare(struct file *file, struct vm_area_struct *vma); + static inline int call_mmap(struct file *file, struct vm_area_struct *vma) { - if (WARN_ON_ONCE(file->f_op->mmap_prepare)) - return -EINVAL; + if (file->f_op->mmap_prepare) + return compat_vma_mmap_prepare(file, vma); return file->f_op->mmap(file, vma); } diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h index ce377f7fb912..22f39e5e2ff1 100644 --- a/include/linux/ieee80211.h +++ b/include/linux/ieee80211.h @@ -1278,7 +1278,7 @@ struct ieee80211_ext { u8 sa[ETH_ALEN]; __le32 timestamp; u8 change_seq; - u8 variable[0]; + u8 variable[]; } __packed s1g_beacon; } u; } __packed __aligned(2); @@ -1536,7 +1536,7 @@ struct ieee80211_mgmt { u8 action_code; u8 dialog_token; __le16 capability; - u8 variable[0]; + u8 variable[]; } __packed tdls_discover_resp; struct { u8 action_code; @@ -1721,35 +1721,35 @@ struct ieee80211_tdls_data { struct { u8 dialog_token; __le16 capability; - u8 variable[0]; + u8 variable[]; } __packed setup_req; struct { __le16 status_code; u8 dialog_token; __le16 capability; - u8 variable[0]; + u8 variable[]; } __packed setup_resp; struct { __le16 status_code; u8 dialog_token; - u8 variable[0]; + u8 variable[]; } __packed setup_cfm; struct { __le16 reason_code; - u8 variable[0]; + u8 variable[]; } __packed teardown; struct { u8 dialog_token; - u8 variable[0]; + u8 variable[]; } __packed discover_req; struct { u8 target_channel; u8 oper_class; - u8 variable[0]; + u8 variable[]; } __packed chan_switch_req; struct { __le16 status_code; - u8 variable[0]; + u8 variable[]; } __packed chan_switch_resp; } u; } __packed; diff --git a/include/linux/key.h b/include/linux/key.h index ba05de8579ec..81b8f05c6898 100644 --- a/include/linux/key.h +++ b/include/linux/key.h @@ -236,7 +236,7 @@ struct key { #define KEY_FLAG_ROOT_CAN_INVAL 7 /* set if key can be invalidated by root without permission */ #define KEY_FLAG_KEEP 8 /* set if key should not be removed */ #define KEY_FLAG_UID_KEYRING 9 /* set if key is a user or user session keyring */ -#define KEY_FLAG_FINAL_PUT 10 /* set if final put has happened on key */ +#define KEY_FLAG_USER_ALIVE 10 /* set if final put has not happened on key yet */ /* the key type and key description string * - the desc is used to match a key against search criteria diff --git a/include/linux/libata.h b/include/linux/libata.h index 31be45fd47a6..1e5aec839041 100644 --- a/include/linux/libata.h +++ b/include/linux/libata.h @@ -1352,7 +1352,7 @@ int ata_acpi_stm(struct ata_port *ap, const struct ata_acpi_gtm *stm); int ata_acpi_gtm(struct ata_port *ap, struct ata_acpi_gtm *stm); unsigned int ata_acpi_gtm_xfermask(struct ata_device *dev, const struct ata_acpi_gtm *gtm); -int ata_acpi_cbl_80wire(struct ata_port *ap, const struct ata_acpi_gtm *gtm); +int ata_acpi_cbl_pata_type(struct ata_port *ap); #else static inline const struct ata_acpi_gtm *ata_acpi_init_gtm(struct ata_port *ap) { @@ -1377,10 +1377,9 @@ static inline unsigned int ata_acpi_gtm_xfermask(struct ata_device *dev, return 0; } -static inline int ata_acpi_cbl_80wire(struct ata_port *ap, - const struct ata_acpi_gtm *gtm) +static inline int ata_acpi_cbl_pata_type(struct ata_port *ap) { - return 0; + return ATA_CBL_PATA40; } #endif diff --git a/include/linux/module.h b/include/linux/module.h index 92e1420fccdf..5faa1fb1f4b4 100644 --- a/include/linux/module.h +++ b/include/linux/module.h @@ -586,11 +586,6 @@ struct module { atomic_t refcnt; #endif -#ifdef CONFIG_MITIGATION_ITS - int its_num_pages; - void **its_page_array; -#endif - #ifdef CONFIG_CONSTRUCTORS /* Constructor functions. */ ctor_fn_t *ctors; diff --git a/include/linux/mount.h b/include/linux/mount.h index 4880f434c021..1a508beba446 100644 --- a/include/linux/mount.h +++ b/include/linux/mount.h @@ -116,10 +116,8 @@ extern int may_umount_tree(struct vfsmount *); extern int may_umount(struct vfsmount *); int do_mount(const char *, const char __user *, const char *, unsigned long, void *); -extern struct vfsmount *collect_mounts(const struct path *); -extern void drop_collected_mounts(struct vfsmount *); -extern int iterate_mounts(int (*)(struct vfsmount *, void *), void *, - struct vfsmount *); +extern struct path *collect_paths(const struct path *, struct path *, unsigned); +extern void drop_collected_paths(struct path *, struct path *); extern void kern_unmount_array(struct vfsmount *mnt[], unsigned int num); extern int cifs_root_data(char **dev, char **opts); diff --git a/include/linux/mtd/partitions.h b/include/linux/mtd/partitions.h index 5daf80df9e89..b74a539ec581 100644 --- a/include/linux/mtd/partitions.h +++ b/include/linux/mtd/partitions.h @@ -108,7 +108,7 @@ extern void deregister_mtd_parser(struct mtd_part_parser *parser); deregister_mtd_parser) int mtd_add_partition(struct mtd_info *master, const char *name, - long long offset, long long length, struct mtd_info **part); + long long offset, long long length); int mtd_del_partition(struct mtd_info *master, int partno); uint64_t mtd_get_device_size(const struct mtd_info *mtd); diff --git a/include/linux/mtd/spinand.h b/include/linux/mtd/spinand.h index 811a0f356315..15eaa09da998 100644 --- a/include/linux/mtd/spinand.h +++ b/include/linux/mtd/spinand.h @@ -113,11 +113,12 @@ SPI_MEM_DTR_OP_DATA_IN(len, buf, 2), \ SPI_MEM_OP_MAX_FREQ(freq)) -#define SPINAND_PAGE_READ_FROM_CACHE_1S_2S_2S_OP(addr, ndummy, buf, len) \ +#define SPINAND_PAGE_READ_FROM_CACHE_1S_2S_2S_OP(addr, ndummy, buf, len, ...) \ SPI_MEM_OP(SPI_MEM_OP_CMD(0xbb, 1), \ SPI_MEM_OP_ADDR(2, addr, 2), \ SPI_MEM_OP_DUMMY(ndummy, 2), \ - SPI_MEM_OP_DATA_IN(len, buf, 2)) + SPI_MEM_OP_DATA_IN(len, buf, 2), \ + SPI_MEM_OP_MAX_FREQ(__VA_ARGS__ + 0)) #define SPINAND_PAGE_READ_FROM_CACHE_3A_1S_2S_2S_OP(addr, ndummy, buf, len) \ SPI_MEM_OP(SPI_MEM_OP_CMD(0xbb, 1), \ @@ -151,11 +152,12 @@ SPI_MEM_DTR_OP_DATA_IN(len, buf, 4), \ SPI_MEM_OP_MAX_FREQ(freq)) -#define SPINAND_PAGE_READ_FROM_CACHE_1S_4S_4S_OP(addr, ndummy, buf, len) \ +#define SPINAND_PAGE_READ_FROM_CACHE_1S_4S_4S_OP(addr, ndummy, buf, len, ...) \ SPI_MEM_OP(SPI_MEM_OP_CMD(0xeb, 1), \ SPI_MEM_OP_ADDR(2, addr, 4), \ SPI_MEM_OP_DUMMY(ndummy, 4), \ - SPI_MEM_OP_DATA_IN(len, buf, 4)) + SPI_MEM_OP_DATA_IN(len, buf, 4), \ + SPI_MEM_OP_MAX_FREQ(__VA_ARGS__ + 0)) #define SPINAND_PAGE_READ_FROM_CACHE_3A_1S_4S_4S_OP(addr, ndummy, buf, len) \ SPI_MEM_OP(SPI_MEM_OP_CMD(0xeb, 1), \ diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index 52dc7cfab0e0..ec9d96025683 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -635,8 +635,46 @@ struct perf_addr_filter_range { unsigned long size; }; -/** - * enum perf_event_state - the states of an event: +/* + * The normal states are: + * + * ACTIVE --. + * ^ | + * | | + * sched_{in,out}() | + * | | + * v | + * ,---> INACTIVE --+ <-. + * | | | + * | {dis,en}able() + * sched_in() | | + * | OFF <--' --+ + * | | + * `---> ERROR ------' + * + * That is: + * + * sched_in: INACTIVE -> {ACTIVE,ERROR} + * sched_out: ACTIVE -> INACTIVE + * disable: {ACTIVE,INACTIVE} -> OFF + * enable: {OFF,ERROR} -> INACTIVE + * + * Where {OFF,ERROR} are disabled states. + * + * Then we have the {EXIT,REVOKED,DEAD} states which are various shades of + * defunct events: + * + * - EXIT means task that the even was assigned to died, but child events + * still live, and further children can still be created. But the event + * itself will never be active again. It can only transition to + * {REVOKED,DEAD}; + * + * - REVOKED means the PMU the event was associated with is gone; all + * functionality is stopped but the event is still alive. Can only + * transition to DEAD; + * + * - DEAD event really is DYING tearing down state and freeing bits. + * */ enum perf_event_state { PERF_EVENT_STATE_DEAD = -5, diff --git a/include/linux/resctrl.h b/include/linux/resctrl.h index 9ba771f2ddea..6fb4894b8cfd 100644 --- a/include/linux/resctrl.h +++ b/include/linux/resctrl.h @@ -159,7 +159,7 @@ struct rdt_ctrl_domain { /** * struct rdt_mon_domain - group of CPUs sharing a resctrl monitor resource * @hdr: common header for different domain types - * @ci: cache info for this domain + * @ci_id: cache info id for this domain * @rmid_busy_llc: bitmap of which limbo RMIDs are above threshold * @mbm_total: saved state for MBM total bandwidth * @mbm_local: saved state for MBM local bandwidth @@ -170,7 +170,7 @@ struct rdt_ctrl_domain { */ struct rdt_mon_domain { struct rdt_domain_hdr hdr; - struct cacheinfo *ci; + unsigned int ci_id; unsigned long *rmid_busy_llc; struct mbm_state *mbm_total; struct mbm_state *mbm_local; diff --git a/include/linux/scatterlist.h b/include/linux/scatterlist.h index 0cdbfc42f153..6f8a4965f9b9 100644 --- a/include/linux/scatterlist.h +++ b/include/linux/scatterlist.h @@ -99,7 +99,7 @@ static inline bool sg_is_last(struct scatterlist *sg) * @sg: The current sg entry * * Description: - * Usually the next entry will be @sg@ + 1, but if this sg element is part + * Usually the next entry will be @sg + 1, but if this sg element is part * of a chained scatterlist, it could jump to the start of a new * scatterlist array. * @@ -254,7 +254,7 @@ static inline void __sg_chain(struct scatterlist *chain_sg, * @sgl: Second scatterlist * * Description: - * Links @prv@ and @sgl@ together, to form a longer scatterlist. + * Links @prv and @sgl together, to form a longer scatterlist. * **/ static inline void sg_chain(struct scatterlist *prv, unsigned int prv_nents, diff --git a/include/linux/tnum.h b/include/linux/tnum.h index 3c13240077b8..57ed3035cc30 100644 --- a/include/linux/tnum.h +++ b/include/linux/tnum.h @@ -40,6 +40,8 @@ struct tnum tnum_arshift(struct tnum a, u8 min_shift, u8 insn_bitness); struct tnum tnum_add(struct tnum a, struct tnum b); /* Subtract two tnums, return @a - @b */ struct tnum tnum_sub(struct tnum a, struct tnum b); +/* Neg of a tnum, return 0 - @a */ +struct tnum tnum_neg(struct tnum a); /* Bitwise-AND, return @a & @b */ struct tnum tnum_and(struct tnum a, struct tnum b); /* Bitwise-OR, return @a | @b */ diff --git a/include/linux/uaccess.h b/include/linux/uaccess.h index 7c06f4795670..1beb5b395d81 100644 --- a/include/linux/uaccess.h +++ b/include/linux/uaccess.h @@ -296,6 +296,8 @@ static inline bool pagefault_disabled(void) */ #define faulthandler_disabled() (pagefault_disabled() || in_atomic()) +DEFINE_LOCK_GUARD_0(pagefault, pagefault_disable(), pagefault_enable()) + #ifndef CONFIG_ARCH_HAS_SUBPAGE_FAULTS /** diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h index 2b261e74e2c4..9fc8f544e20e 100644 --- a/include/net/bluetooth/hci_core.h +++ b/include/net/bluetooth/hci_core.h @@ -29,6 +29,7 @@ #include <linux/idr.h> #include <linux/leds.h> #include <linux/rculist.h> +#include <linux/srcu.h> #include <net/bluetooth/hci.h> #include <net/bluetooth/hci_drv.h> @@ -242,6 +243,7 @@ struct adv_info { __u8 mesh; __u8 instance; __u8 handle; + __u8 sid; __u32 flags; __u16 timeout; __u16 remaining_time; @@ -346,6 +348,7 @@ struct adv_monitor { struct hci_dev { struct list_head list; + struct srcu_struct srcu; struct mutex lock; struct ida unset_handle_ida; @@ -546,6 +549,7 @@ struct hci_dev { struct hci_conn_hash conn_hash; struct list_head mesh_pending; + struct mutex mgmt_pending_lock; struct list_head mgmt_pending; struct list_head reject_list; struct list_head accept_list; @@ -1550,13 +1554,14 @@ struct hci_conn *hci_connect_sco(struct hci_dev *hdev, int type, bdaddr_t *dst, u16 timeout); struct hci_conn *hci_bind_cis(struct hci_dev *hdev, bdaddr_t *dst, __u8 dst_type, struct bt_iso_qos *qos); -struct hci_conn *hci_bind_bis(struct hci_dev *hdev, bdaddr_t *dst, +struct hci_conn *hci_bind_bis(struct hci_dev *hdev, bdaddr_t *dst, __u8 sid, struct bt_iso_qos *qos, __u8 base_len, __u8 *base); struct hci_conn *hci_connect_cis(struct hci_dev *hdev, bdaddr_t *dst, __u8 dst_type, struct bt_iso_qos *qos); struct hci_conn *hci_connect_bis(struct hci_dev *hdev, bdaddr_t *dst, - __u8 dst_type, struct bt_iso_qos *qos, + __u8 dst_type, __u8 sid, + struct bt_iso_qos *qos, __u8 data_len, __u8 *data); struct hci_conn *hci_pa_create_sync(struct hci_dev *hdev, bdaddr_t *dst, __u8 dst_type, __u8 sid, struct bt_iso_qos *qos); @@ -1831,6 +1836,7 @@ int hci_remove_remote_oob_data(struct hci_dev *hdev, bdaddr_t *bdaddr, void hci_adv_instances_clear(struct hci_dev *hdev); struct adv_info *hci_find_adv_instance(struct hci_dev *hdev, u8 instance); +struct adv_info *hci_find_adv_sid(struct hci_dev *hdev, u8 sid); struct adv_info *hci_get_next_instance(struct hci_dev *hdev, u8 instance); struct adv_info *hci_add_adv_instance(struct hci_dev *hdev, u8 instance, u32 flags, u16 adv_data_len, u8 *adv_data, @@ -1838,7 +1844,7 @@ struct adv_info *hci_add_adv_instance(struct hci_dev *hdev, u8 instance, u16 timeout, u16 duration, s8 tx_power, u32 min_interval, u32 max_interval, u8 mesh_handle); -struct adv_info *hci_add_per_instance(struct hci_dev *hdev, u8 instance, +struct adv_info *hci_add_per_instance(struct hci_dev *hdev, u8 instance, u8 sid, u32 flags, u8 data_len, u8 *data, u32 min_interval, u32 max_interval); int hci_set_adv_instance_data(struct hci_dev *hdev, u8 instance, @@ -2400,7 +2406,6 @@ void mgmt_advertising_added(struct sock *sk, struct hci_dev *hdev, u8 instance); void mgmt_advertising_removed(struct sock *sk, struct hci_dev *hdev, u8 instance); -void mgmt_adv_monitor_removed(struct hci_dev *hdev, u16 handle); int mgmt_phy_configuration_changed(struct hci_dev *hdev, struct sock *skip); void mgmt_adv_monitor_device_lost(struct hci_dev *hdev, u16 handle, bdaddr_t *bdaddr, u8 addr_type); diff --git a/include/net/bluetooth/hci_sync.h b/include/net/bluetooth/hci_sync.h index 72558c826aa1..5224f57f6af2 100644 --- a/include/net/bluetooth/hci_sync.h +++ b/include/net/bluetooth/hci_sync.h @@ -115,8 +115,8 @@ int hci_enable_ext_advertising_sync(struct hci_dev *hdev, u8 instance); int hci_enable_advertising_sync(struct hci_dev *hdev); int hci_enable_advertising(struct hci_dev *hdev); -int hci_start_per_adv_sync(struct hci_dev *hdev, u8 instance, u8 data_len, - u8 *data, u32 flags, u16 min_interval, +int hci_start_per_adv_sync(struct hci_dev *hdev, u8 instance, u8 sid, + u8 data_len, u8 *data, u32 flags, u16 min_interval, u16 max_interval, u16 sync_interval); int hci_disable_per_advertising_sync(struct hci_dev *hdev, u8 instance); diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h index 629368ab2787..638948be4c50 100644 --- a/include/net/sch_generic.h +++ b/include/net/sch_generic.h @@ -973,14 +973,6 @@ static inline void qdisc_qstats_qlen_backlog(struct Qdisc *sch, __u32 *qlen, *backlog = qstats.backlog; } -static inline void qdisc_tree_flush_backlog(struct Qdisc *sch) -{ - __u32 qlen, backlog; - - qdisc_qstats_qlen_backlog(sch, &qlen, &backlog); - qdisc_tree_reduce_backlog(sch, qlen, backlog); -} - static inline void qdisc_purge_queue(struct Qdisc *sch) { __u32 qlen, backlog; diff --git a/include/net/sock.h b/include/net/sock.h index 92e7c1aae3cc..4c37015b7cf7 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -3010,8 +3010,11 @@ int sock_ioctl_inout(struct sock *sk, unsigned int cmd, int sk_ioctl(struct sock *sk, unsigned int cmd, void __user *arg); static inline bool sk_is_readable(struct sock *sk) { - if (sk->sk_prot->sock_is_readable) - return sk->sk_prot->sock_is_readable(sk); + const struct proto *prot = READ_ONCE(sk->sk_prot); + + if (prot->sock_is_readable) + return prot->sock_is_readable(sk); + return false; } #endif /* _SOCK_H */ diff --git a/include/trace/events/erofs.h b/include/trace/events/erofs.h index a5f4b9234f46..dad7360f42f9 100644 --- a/include/trace/events/erofs.h +++ b/include/trace/events/erofs.h @@ -211,24 +211,6 @@ TRACE_EVENT(erofs_map_blocks_exit, show_mflags(__entry->mflags), __entry->ret) ); -TRACE_EVENT(erofs_destroy_inode, - TP_PROTO(struct inode *inode), - - TP_ARGS(inode), - - TP_STRUCT__entry( - __field( dev_t, dev ) - __field( erofs_nid_t, nid ) - ), - - TP_fast_assign( - __entry->dev = inode->i_sb->s_dev; - __entry->nid = EROFS_I(inode)->nid; - ), - - TP_printk("dev = (%d,%d), nid = %llu", show_dev_nid(__entry)) -); - #endif /* _TRACE_EROFS_H */ /* This part must be outside protection */ diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 0b4a2f124d11..719ba230032f 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -1794,6 +1794,13 @@ union bpf_attr { }; __u64 expected_revision; } netkit; + struct { + union { + __u32 relative_fd; + __u32 relative_id; + }; + __u64 expected_revision; + } cgroup; }; } link_create; @@ -2403,7 +2410,7 @@ union bpf_attr { * into it. An example is available in file * *samples/bpf/trace_output_user.c* in the Linux kernel source * tree (the eBPF program counterpart is in - * *samples/bpf/trace_output_kern.c*). + * *samples/bpf/trace_output.bpf.c*). * * **bpf_perf_event_output**\ () achieves better performance * than **bpf_trace_printk**\ () for sharing data with user @@ -6653,11 +6660,15 @@ struct bpf_link_info { struct { __aligned_u64 tp_name; /* in/out: tp_name buffer ptr */ __u32 tp_name_len; /* in/out: tp_name buffer len */ + __u32 :32; + __u64 cookie; } raw_tracepoint; struct { __u32 attach_type; __u32 target_obj_id; /* prog_id for PROG_EXT, otherwise btf object id */ __u32 target_btf_id; /* BTF type id inside the object */ + __u32 :32; + __u64 cookie; } tracing; struct { __u64 cgroup_id; diff --git a/include/uapi/linux/ethtool_netlink.h b/include/uapi/linux/ethtool_netlink.h index 9ff72cfb2e98..09a75bdb6560 100644 --- a/include/uapi/linux/ethtool_netlink.h +++ b/include/uapi/linux/ethtool_netlink.h @@ -208,10 +208,6 @@ enum { ETHTOOL_A_STATS_PHY_MAX = (__ETHTOOL_A_STATS_PHY_CNT - 1) }; -/* generic netlink info */ -#define ETHTOOL_GENL_NAME "ethtool" -#define ETHTOOL_GENL_VERSION 1 - #define ETHTOOL_MCGRP_MONITOR_NAME "monitor" #endif /* _UAPI_LINUX_ETHTOOL_NETLINK_H_ */ diff --git a/include/uapi/linux/ethtool_netlink_generated.h b/include/uapi/linux/ethtool_netlink_generated.h index 9a02f579de22..aa8ab5227c1e 100644 --- a/include/uapi/linux/ethtool_netlink_generated.h +++ b/include/uapi/linux/ethtool_netlink_generated.h @@ -6,8 +6,8 @@ #ifndef _UAPI_LINUX_ETHTOOL_NETLINK_GENERATED_H #define _UAPI_LINUX_ETHTOOL_NETLINK_GENERATED_H -#define ETHTOOL_FAMILY_NAME "ethtool" -#define ETHTOOL_FAMILY_VERSION 1 +#define ETHTOOL_GENL_NAME "ethtool" +#define ETHTOOL_GENL_VERSION 1 enum { ETHTOOL_UDP_TUNNEL_TYPE_VXLAN, diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index d00b85cb168c..37891580d05d 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -178,6 +178,7 @@ struct kvm_xen_exit { #define KVM_EXIT_NOTIFY 37 #define KVM_EXIT_LOONGARCH_IOCSR 38 #define KVM_EXIT_MEMORY_FAULT 39 +#define KVM_EXIT_TDX 40 /* For KVM_EXIT_INTERNAL_ERROR */ /* Emulate instruction failed. */ @@ -447,6 +448,27 @@ struct kvm_run { __u64 gpa; __u64 size; } memory_fault; + /* KVM_EXIT_TDX */ + struct { + __u64 flags; + __u64 nr; + union { + struct { + __u64 ret; + __u64 data[5]; + } unknown; + struct { + __u64 ret; + __u64 gpa; + __u64 size; + } get_quote; + struct { + __u64 ret; + __u64 leaf; + __u64 r11, r12, r13, r14; + } get_tdvmcall_info; + }; + } tdx; /* Fix the size of the union. */ char padding[256]; }; diff --git a/include/uapi/linux/mptcp_pm.h b/include/uapi/linux/mptcp_pm.h index 84fa8a21dfd0..6ac84b2f636c 100644 --- a/include/uapi/linux/mptcp_pm.h +++ b/include/uapi/linux/mptcp_pm.h @@ -27,14 +27,14 @@ * token, rem_id. * @MPTCP_EVENT_SUB_ESTABLISHED: A new subflow has been established. 'error' * should not be set. Attributes: token, family, loc_id, rem_id, saddr4 | - * saddr6, daddr4 | daddr6, sport, dport, backup, if_idx [, error]. + * saddr6, daddr4 | daddr6, sport, dport, backup, if-idx [, error]. * @MPTCP_EVENT_SUB_CLOSED: A subflow has been closed. An error (copy of * sk_err) could be set if an error has been detected for this subflow. * Attributes: token, family, loc_id, rem_id, saddr4 | saddr6, daddr4 | - * daddr6, sport, dport, backup, if_idx [, error]. + * daddr6, sport, dport, backup, if-idx [, error]. * @MPTCP_EVENT_SUB_PRIORITY: The priority of a subflow has changed. 'error' * should not be set. Attributes: token, family, loc_id, rem_id, saddr4 | - * saddr6, daddr4 | daddr6, sport, dport, backup, if_idx [, error]. + * saddr6, daddr4 | daddr6, sport, dport, backup, if-idx [, error]. * @MPTCP_EVENT_LISTENER_CREATED: A new PM listener is created. Attributes: * family, sport, saddr4 | saddr6. * @MPTCP_EVENT_LISTENER_CLOSED: A PM listener is closed. Attributes: family, diff --git a/include/uapi/linux/vm_sockets.h b/include/uapi/linux/vm_sockets.h index ed07181d4eff..e05280e41522 100644 --- a/include/uapi/linux/vm_sockets.h +++ b/include/uapi/linux/vm_sockets.h @@ -17,6 +17,10 @@ #ifndef _UAPI_VM_SOCKETS_H #define _UAPI_VM_SOCKETS_H +#ifndef __KERNEL__ +#include <sys/socket.h> /* for struct sockaddr and sa_family_t */ +#endif + #include <linux/socket.h> #include <linux/types.h> diff --git a/init/initramfs.c b/init/initramfs.c index 72bad44a1d41..097673b97784 100644 --- a/init/initramfs.c +++ b/init/initramfs.c @@ -1,6 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 #include <linux/init.h> #include <linux/async.h> +#include <linux/export.h> #include <linux/fs.h> #include <linux/slab.h> #include <linux/types.h> diff --git a/init/main.c b/init/main.c index ed576c7f475d..225a58279acd 100644 --- a/init/main.c +++ b/init/main.c @@ -13,6 +13,7 @@ #define DEBUG /* Enable initcall_debug */ #include <linux/types.h> +#include <linux/export.h> #include <linux/extable.h> #include <linux/module.h> #include <linux/proc_fs.h> diff --git a/io_uring/fdinfo.c b/io_uring/fdinfo.c index e9355276ab5d..9798d6fb4ec7 100644 --- a/io_uring/fdinfo.c +++ b/io_uring/fdinfo.c @@ -141,18 +141,26 @@ static void __io_uring_show_fdinfo(struct io_ring_ctx *ctx, struct seq_file *m) if (ctx->flags & IORING_SETUP_SQPOLL) { struct io_sq_data *sq = ctx->sq_data; + struct task_struct *tsk; + rcu_read_lock(); + tsk = rcu_dereference(sq->thread); /* * sq->thread might be NULL if we raced with the sqpoll * thread termination. */ - if (sq->thread) { + if (tsk) { + get_task_struct(tsk); + rcu_read_unlock(); + getrusage(tsk, RUSAGE_SELF, &sq_usage); + put_task_struct(tsk); sq_pid = sq->task_pid; sq_cpu = sq->sq_cpu; - getrusage(sq->thread, RUSAGE_SELF, &sq_usage); sq_total_time = (sq_usage.ru_stime.tv_sec * 1000000 + sq_usage.ru_stime.tv_usec); sq_work_time = sq->work_time; + } else { + rcu_read_unlock(); } } diff --git a/io_uring/io-wq.c b/io_uring/io-wq.c index cd1fcb115739..be91edf34f01 100644 --- a/io_uring/io-wq.c +++ b/io_uring/io-wq.c @@ -1259,8 +1259,10 @@ struct io_wq *io_wq_create(unsigned bounded, struct io_wq_data *data) atomic_set(&wq->worker_refs, 1); init_completion(&wq->worker_done); ret = cpuhp_state_add_instance_nocalls(io_wq_online, &wq->cpuhp_node); - if (ret) + if (ret) { + put_task_struct(wq->task); goto err; + } return wq; err: diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c index cf759c172083..5111ec040c53 100644 --- a/io_uring/io_uring.c +++ b/io_uring/io_uring.c @@ -1523,6 +1523,9 @@ static __cold void io_iopoll_try_reap_events(struct io_ring_ctx *ctx) } } mutex_unlock(&ctx->uring_lock); + + if (ctx->flags & IORING_SETUP_DEFER_TASKRUN) + io_move_task_work_from_local(ctx); } static int io_iopoll_check(struct io_ring_ctx *ctx, unsigned int min_events) @@ -2906,7 +2909,7 @@ static __cold void io_ring_exit_work(struct work_struct *work) struct task_struct *tsk; io_sq_thread_park(sqd); - tsk = sqd->thread; + tsk = sqpoll_task_locked(sqd); if (tsk && tsk->io_uring && tsk->io_uring->io_wq) io_wq_cancel_cb(tsk->io_uring->io_wq, io_cancel_ctx_cb, ctx, true); @@ -3142,7 +3145,7 @@ __cold void io_uring_cancel_generic(bool cancel_all, struct io_sq_data *sqd) s64 inflight; DEFINE_WAIT(wait); - WARN_ON_ONCE(sqd && sqd->thread != current); + WARN_ON_ONCE(sqd && sqpoll_task_locked(sqd) != current); if (!current->io_uring) return; diff --git a/io_uring/io_uring.h b/io_uring/io_uring.h index d59c12277d58..66c1ca73f55e 100644 --- a/io_uring/io_uring.h +++ b/io_uring/io_uring.h @@ -98,8 +98,6 @@ struct llist_node *io_handle_tw_list(struct llist_node *node, unsigned int *coun struct llist_node *tctx_task_work_run(struct io_uring_task *tctx, unsigned int max_entries, unsigned int *count); void tctx_task_work(struct callback_head *cb); __cold void io_uring_cancel_generic(bool cancel_all, struct io_sq_data *sqd); -int io_uring_alloc_task_context(struct task_struct *task, - struct io_ring_ctx *ctx); int io_ring_add_registered_file(struct io_uring_task *tctx, struct file *file, int start, int end); diff --git a/io_uring/kbuf.c b/io_uring/kbuf.c index 2ea65f3cef72..ce95e3af44a9 100644 --- a/io_uring/kbuf.c +++ b/io_uring/kbuf.c @@ -270,8 +270,11 @@ static int io_ring_buffers_peek(struct io_kiocb *req, struct buf_sel_arg *arg, /* truncate end piece, if needed, for non partial buffers */ if (len > arg->max_len) { len = arg->max_len; - if (!(bl->flags & IOBL_INC)) + if (!(bl->flags & IOBL_INC)) { + if (iov != arg->iovs) + break; buf->len = len; + } } iov->iov_base = u64_to_user_ptr(buf->addr); diff --git a/io_uring/net.c b/io_uring/net.c index e16633fd6630..9550d4c8f866 100644 --- a/io_uring/net.c +++ b/io_uring/net.c @@ -821,7 +821,7 @@ static inline bool io_recv_finish(struct io_kiocb *req, int *ret, if (sr->flags & IORING_RECVSEND_BUNDLE) { size_t this_ret = *ret - sr->done_io; - cflags |= io_put_kbufs(req, *ret, io_bundle_nbufs(kmsg, this_ret), + cflags |= io_put_kbufs(req, this_ret, io_bundle_nbufs(kmsg, this_ret), issue_flags); if (sr->retry) cflags = req->cqe.flags | (cflags & CQE_F_MASK); diff --git a/io_uring/register.c b/io_uring/register.c index cc23a4c205cd..a59589249fce 100644 --- a/io_uring/register.c +++ b/io_uring/register.c @@ -273,6 +273,8 @@ static __cold int io_register_iowq_max_workers(struct io_ring_ctx *ctx, if (ctx->flags & IORING_SETUP_SQPOLL) { sqd = ctx->sq_data; if (sqd) { + struct task_struct *tsk; + /* * Observe the correct sqd->lock -> ctx->uring_lock * ordering. Fine to drop uring_lock here, we hold @@ -282,8 +284,9 @@ static __cold int io_register_iowq_max_workers(struct io_ring_ctx *ctx, mutex_unlock(&ctx->uring_lock); mutex_lock(&sqd->lock); mutex_lock(&ctx->uring_lock); - if (sqd->thread) - tctx = sqd->thread->io_uring; + tsk = sqpoll_task_locked(sqd); + if (tsk) + tctx = tsk->io_uring; } } else { tctx = current->io_uring; diff --git a/io_uring/rsrc.c b/io_uring/rsrc.c index c592ceace97d..d724602697e7 100644 --- a/io_uring/rsrc.c +++ b/io_uring/rsrc.c @@ -809,10 +809,8 @@ static struct io_rsrc_node *io_sqe_buffer_register(struct io_ring_ctx *ctx, imu->nr_bvecs = nr_pages; ret = io_buffer_account_pin(ctx, pages, nr_pages, imu, last_hpage); - if (ret) { - unpin_user_pages(pages, nr_pages); + if (ret) goto done; - } size = iov->iov_len; /* store original address for later verification */ @@ -842,6 +840,8 @@ done: if (ret) { if (imu) io_free_imu(ctx, imu); + if (pages) + unpin_user_pages(pages, nr_pages); io_cache_free(&ctx->node_cache, node); node = ERR_PTR(ret); } @@ -1177,6 +1177,8 @@ static int io_clone_buffers(struct io_ring_ctx *ctx, struct io_ring_ctx *src_ctx return -EINVAL; if (check_add_overflow(arg->nr, arg->dst_off, &nbufs)) return -EOVERFLOW; + if (nbufs > IORING_MAX_REG_BUFFERS) + return -EINVAL; ret = io_rsrc_data_alloc(&data, max(nbufs, ctx->buf_table.nr)); if (ret) diff --git a/io_uring/sqpoll.c b/io_uring/sqpoll.c index 03c699493b5a..a3f11349ce06 100644 --- a/io_uring/sqpoll.c +++ b/io_uring/sqpoll.c @@ -16,6 +16,7 @@ #include <uapi/linux/io_uring.h> #include "io_uring.h" +#include "tctx.h" #include "napi.h" #include "sqpoll.h" @@ -30,7 +31,7 @@ enum { void io_sq_thread_unpark(struct io_sq_data *sqd) __releases(&sqd->lock) { - WARN_ON_ONCE(sqd->thread == current); + WARN_ON_ONCE(sqpoll_task_locked(sqd) == current); /* * Do the dance but not conditional clear_bit() because it'd race with @@ -46,24 +47,32 @@ void io_sq_thread_unpark(struct io_sq_data *sqd) void io_sq_thread_park(struct io_sq_data *sqd) __acquires(&sqd->lock) { - WARN_ON_ONCE(data_race(sqd->thread) == current); + struct task_struct *tsk; atomic_inc(&sqd->park_pending); set_bit(IO_SQ_THREAD_SHOULD_PARK, &sqd->state); mutex_lock(&sqd->lock); - if (sqd->thread) - wake_up_process(sqd->thread); + + tsk = sqpoll_task_locked(sqd); + if (tsk) { + WARN_ON_ONCE(tsk == current); + wake_up_process(tsk); + } } void io_sq_thread_stop(struct io_sq_data *sqd) { - WARN_ON_ONCE(sqd->thread == current); + struct task_struct *tsk; + WARN_ON_ONCE(test_bit(IO_SQ_THREAD_SHOULD_STOP, &sqd->state)); set_bit(IO_SQ_THREAD_SHOULD_STOP, &sqd->state); mutex_lock(&sqd->lock); - if (sqd->thread) - wake_up_process(sqd->thread); + tsk = sqpoll_task_locked(sqd); + if (tsk) { + WARN_ON_ONCE(tsk == current); + wake_up_process(tsk); + } mutex_unlock(&sqd->lock); wait_for_completion(&sqd->exited); } @@ -270,7 +279,8 @@ static int io_sq_thread(void *data) /* offload context creation failed, just exit */ if (!current->io_uring) { mutex_lock(&sqd->lock); - sqd->thread = NULL; + rcu_assign_pointer(sqd->thread, NULL); + put_task_struct(current); mutex_unlock(&sqd->lock); goto err_out; } @@ -379,7 +389,8 @@ static int io_sq_thread(void *data) io_sq_tw(&retry_list, UINT_MAX); io_uring_cancel_generic(true, sqd); - sqd->thread = NULL; + rcu_assign_pointer(sqd->thread, NULL); + put_task_struct(current); list_for_each_entry(ctx, &sqd->ctx_list, sqd_list) atomic_or(IORING_SQ_NEED_WAKEUP, &ctx->rings->sq_flags); io_run_task_work(); @@ -409,7 +420,6 @@ void io_sqpoll_wait_sq(struct io_ring_ctx *ctx) __cold int io_sq_offload_create(struct io_ring_ctx *ctx, struct io_uring_params *p) { - struct task_struct *task_to_put = NULL; int ret; /* Retain compatibility with failing for an invalid attach attempt */ @@ -484,8 +494,11 @@ __cold int io_sq_offload_create(struct io_ring_ctx *ctx, goto err_sqpoll; } - sqd->thread = tsk; - task_to_put = get_task_struct(tsk); + mutex_lock(&sqd->lock); + rcu_assign_pointer(sqd->thread, tsk); + mutex_unlock(&sqd->lock); + + get_task_struct(tsk); ret = io_uring_alloc_task_context(tsk, ctx); wake_up_new_task(tsk); if (ret) @@ -495,16 +508,11 @@ __cold int io_sq_offload_create(struct io_ring_ctx *ctx, ret = -EINVAL; goto err; } - - if (task_to_put) - put_task_struct(task_to_put); return 0; err_sqpoll: complete(&ctx->sq_data->exited); err: io_sq_thread_finish(ctx); - if (task_to_put) - put_task_struct(task_to_put); return ret; } @@ -515,10 +523,13 @@ __cold int io_sqpoll_wq_cpu_affinity(struct io_ring_ctx *ctx, int ret = -EINVAL; if (sqd) { + struct task_struct *tsk; + io_sq_thread_park(sqd); /* Don't set affinity for a dying thread */ - if (sqd->thread) - ret = io_wq_cpu_affinity(sqd->thread->io_uring, mask); + tsk = sqpoll_task_locked(sqd); + if (tsk) + ret = io_wq_cpu_affinity(tsk->io_uring, mask); io_sq_thread_unpark(sqd); } diff --git a/io_uring/sqpoll.h b/io_uring/sqpoll.h index 4171666b1cf4..b83dcdec9765 100644 --- a/io_uring/sqpoll.h +++ b/io_uring/sqpoll.h @@ -8,7 +8,7 @@ struct io_sq_data { /* ctx's that are using this sqd */ struct list_head ctx_list; - struct task_struct *thread; + struct task_struct __rcu *thread; struct wait_queue_head wait; unsigned sq_thread_idle; @@ -29,3 +29,9 @@ void io_sq_thread_unpark(struct io_sq_data *sqd); void io_put_sq_data(struct io_sq_data *sqd); void io_sqpoll_wait_sq(struct io_ring_ctx *ctx); int io_sqpoll_wq_cpu_affinity(struct io_ring_ctx *ctx, cpumask_var_t mask); + +static inline struct task_struct *sqpoll_task_locked(struct io_sq_data *sqd) +{ + return rcu_dereference_protected(sqd->thread, + lockdep_is_held(&sqd->lock)); +} diff --git a/kernel/audit_tree.c b/kernel/audit_tree.c index f2f38903b2fe..b0eae2a3c895 100644 --- a/kernel/audit_tree.c +++ b/kernel/audit_tree.c @@ -668,12 +668,6 @@ int audit_remove_tree_rule(struct audit_krule *rule) return 0; } -static int compare_root(struct vfsmount *mnt, void *arg) -{ - return inode_to_key(d_backing_inode(mnt->mnt_root)) == - (unsigned long)arg; -} - void audit_trim_trees(void) { struct list_head cursor; @@ -683,8 +677,9 @@ void audit_trim_trees(void) while (cursor.next != &tree_list) { struct audit_tree *tree; struct path path; - struct vfsmount *root_mnt; struct audit_node *node; + struct path *paths; + struct path array[16]; int err; tree = container_of(cursor.next, struct audit_tree, list); @@ -696,9 +691,9 @@ void audit_trim_trees(void) if (err) goto skip_it; - root_mnt = collect_mounts(&path); + paths = collect_paths(&path, array, 16); path_put(&path); - if (IS_ERR(root_mnt)) + if (IS_ERR(paths)) goto skip_it; spin_lock(&hash_lock); @@ -706,14 +701,17 @@ void audit_trim_trees(void) struct audit_chunk *chunk = find_chunk(node); /* this could be NULL if the watch is dying else where... */ node->index |= 1U<<31; - if (iterate_mounts(compare_root, - (void *)(chunk->key), - root_mnt)) - node->index &= ~(1U<<31); + for (struct path *p = paths; p->dentry; p++) { + struct inode *inode = p->dentry->d_inode; + if (inode_to_key(inode) == chunk->key) { + node->index &= ~(1U<<31); + break; + } + } } spin_unlock(&hash_lock); trim_marked(tree); - drop_collected_mounts(root_mnt); + drop_collected_paths(paths, array); skip_it: put_tree(tree); mutex_lock(&audit_filter_mutex); @@ -742,9 +740,14 @@ void audit_put_tree(struct audit_tree *tree) put_tree(tree); } -static int tag_mount(struct vfsmount *mnt, void *arg) +static int tag_mounts(struct path *paths, struct audit_tree *tree) { - return tag_chunk(d_backing_inode(mnt->mnt_root), arg); + for (struct path *p = paths; p->dentry; p++) { + int err = tag_chunk(p->dentry->d_inode, tree); + if (err) + return err; + } + return 0; } /* @@ -801,7 +804,8 @@ int audit_add_tree_rule(struct audit_krule *rule) { struct audit_tree *seed = rule->tree, *tree; struct path path; - struct vfsmount *mnt; + struct path array[16]; + struct path *paths; int err; rule->tree = NULL; @@ -828,16 +832,16 @@ int audit_add_tree_rule(struct audit_krule *rule) err = kern_path(tree->pathname, 0, &path); if (err) goto Err; - mnt = collect_mounts(&path); + paths = collect_paths(&path, array, 16); path_put(&path); - if (IS_ERR(mnt)) { - err = PTR_ERR(mnt); + if (IS_ERR(paths)) { + err = PTR_ERR(paths); goto Err; } get_tree(tree); - err = iterate_mounts(tag_mount, tree, mnt); - drop_collected_mounts(mnt); + err = tag_mounts(paths, tree); + drop_collected_paths(paths, array); if (!err) { struct audit_node *node; @@ -872,20 +876,21 @@ int audit_tag_tree(char *old, char *new) struct list_head cursor, barrier; int failed = 0; struct path path1, path2; - struct vfsmount *tagged; + struct path array[16]; + struct path *paths; int err; err = kern_path(new, 0, &path2); if (err) return err; - tagged = collect_mounts(&path2); + paths = collect_paths(&path2, array, 16); path_put(&path2); - if (IS_ERR(tagged)) - return PTR_ERR(tagged); + if (IS_ERR(paths)) + return PTR_ERR(paths); err = kern_path(old, 0, &path1); if (err) { - drop_collected_mounts(tagged); + drop_collected_paths(paths, array); return err; } @@ -914,7 +919,7 @@ int audit_tag_tree(char *old, char *new) continue; } - failed = iterate_mounts(tag_mount, tree, tagged); + failed = tag_mounts(paths, tree); if (failed) { put_tree(tree); mutex_lock(&audit_filter_mutex); @@ -955,7 +960,7 @@ int audit_tag_tree(char *old, char *new) list_del(&cursor); mutex_unlock(&audit_filter_mutex); path_put(&path1); - drop_collected_mounts(tagged); + drop_collected_paths(paths, array); return failed; } diff --git a/kernel/bpf/arraymap.c b/kernel/bpf/arraymap.c index eb28c0f219ee..3d080916faf9 100644 --- a/kernel/bpf/arraymap.c +++ b/kernel/bpf/arraymap.c @@ -530,8 +530,6 @@ static int array_map_check_btf(const struct bpf_map *map, const struct btf_type *key_type, const struct btf_type *value_type) { - u32 int_data; - /* One exception for keyless BTF: .bss/.data/.rodata map */ if (btf_type_is_void(key_type)) { if (map->map_type != BPF_MAP_TYPE_ARRAY || @@ -544,14 +542,11 @@ static int array_map_check_btf(const struct bpf_map *map, return 0; } - if (BTF_INFO_KIND(key_type->info) != BTF_KIND_INT) - return -EINVAL; - - int_data = *(u32 *)(key_type + 1); - /* bpf array can only take a u32 key. This check makes sure + /* + * Bpf array can only take a u32 key. This check makes sure * that the btf matches the attr used during map_create. */ - if (BTF_INT_BITS(int_data) != 32 || BTF_INT_OFFSET(int_data)) + if (!btf_type_is_i32(key_type)) return -EINVAL; return 0; diff --git a/kernel/bpf/bpf_iter.c b/kernel/bpf/bpf_iter.c index 380e9a7cac75..303ab1f42d3a 100644 --- a/kernel/bpf/bpf_iter.c +++ b/kernel/bpf/bpf_iter.c @@ -38,8 +38,7 @@ static DEFINE_MUTEX(link_mutex); /* incremented on every opened seq_file */ static atomic64_t session_id; -static int prepare_seq_file(struct file *file, struct bpf_iter_link *link, - const struct bpf_iter_seq_info *seq_info); +static int prepare_seq_file(struct file *file, struct bpf_iter_link *link); static void bpf_iter_inc_seq_num(struct seq_file *seq) { @@ -257,7 +256,7 @@ static int iter_open(struct inode *inode, struct file *file) { struct bpf_iter_link *link = inode->i_private; - return prepare_seq_file(file, link, __get_seq_info(link)); + return prepare_seq_file(file, link); } static int iter_release(struct inode *inode, struct file *file) @@ -586,9 +585,9 @@ static void init_seq_meta(struct bpf_iter_priv_data *priv_data, priv_data->done_stop = false; } -static int prepare_seq_file(struct file *file, struct bpf_iter_link *link, - const struct bpf_iter_seq_info *seq_info) +static int prepare_seq_file(struct file *file, struct bpf_iter_link *link) { + const struct bpf_iter_seq_info *seq_info = __get_seq_info(link); struct bpf_iter_priv_data *priv_data; struct bpf_iter_target_info *tinfo; struct bpf_prog *prog; @@ -653,7 +652,7 @@ int bpf_iter_new_fd(struct bpf_link *link) } iter_link = container_of(link, struct bpf_iter_link, link); - err = prepare_seq_file(file, iter_link, __get_seq_info(iter_link)); + err = prepare_seq_file(file, iter_link); if (err) goto free_file; diff --git a/kernel/bpf/bpf_local_storage.c b/kernel/bpf/bpf_local_storage.c index fa56c30833ff..b931fbceb54d 100644 --- a/kernel/bpf/bpf_local_storage.c +++ b/kernel/bpf/bpf_local_storage.c @@ -722,13 +722,7 @@ int bpf_local_storage_map_check_btf(const struct bpf_map *map, const struct btf_type *key_type, const struct btf_type *value_type) { - u32 int_data; - - if (BTF_INFO_KIND(key_type->info) != BTF_KIND_INT) - return -EINVAL; - - int_data = *(u32 *)(key_type + 1); - if (BTF_INT_BITS(int_data) != 32 || BTF_INT_OFFSET(int_data)) + if (!btf_type_is_i32(key_type)) return -EINVAL; return 0; diff --git a/kernel/bpf/bpf_lru_list.c b/kernel/bpf/bpf_lru_list.c index 3dabdd137d10..2d6e1c98d8ad 100644 --- a/kernel/bpf/bpf_lru_list.c +++ b/kernel/bpf/bpf_lru_list.c @@ -337,12 +337,12 @@ static void bpf_lru_list_pop_free_to_local(struct bpf_lru *lru, list) { __bpf_lru_node_move_to_free(l, node, local_free_list(loc_l), BPF_LRU_LOCAL_LIST_T_FREE); - if (++nfree == LOCAL_FREE_TARGET) + if (++nfree == lru->target_free) break; } - if (nfree < LOCAL_FREE_TARGET) - __bpf_lru_list_shrink(lru, l, LOCAL_FREE_TARGET - nfree, + if (nfree < lru->target_free) + __bpf_lru_list_shrink(lru, l, lru->target_free - nfree, local_free_list(loc_l), BPF_LRU_LOCAL_LIST_T_FREE); @@ -577,6 +577,9 @@ static void bpf_common_lru_populate(struct bpf_lru *lru, void *buf, list_add(&node->list, &l->lists[BPF_LRU_LIST_T_FREE]); buf += elem_size; } + + lru->target_free = clamp((nr_elems / num_possible_cpus()) / 2, + 1, LOCAL_FREE_TARGET); } static void bpf_percpu_lru_populate(struct bpf_lru *lru, void *buf, diff --git a/kernel/bpf/bpf_lru_list.h b/kernel/bpf/bpf_lru_list.h index cbd8d3720c2b..fe2661a58ea9 100644 --- a/kernel/bpf/bpf_lru_list.h +++ b/kernel/bpf/bpf_lru_list.h @@ -58,6 +58,7 @@ struct bpf_lru { del_from_htab_func del_from_htab; void *del_arg; unsigned int hash_offset; + unsigned int target_free; unsigned int nr_scans; bool percpu; }; diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c index 1d2cf898e21e..05fd64a371af 100644 --- a/kernel/bpf/btf.c +++ b/kernel/bpf/btf.c @@ -858,26 +858,37 @@ const struct btf_type *btf_type_by_id(const struct btf *btf, u32 type_id) EXPORT_SYMBOL_GPL(btf_type_by_id); /* - * Regular int is not a bit field and it must be either - * u8/u16/u32/u64 or __int128. + * Check that the type @t is a regular int. This means that @t is not + * a bit field and it has the same size as either of u8/u16/u32/u64 + * or __int128. If @expected_size is not zero, then size of @t should + * be the same. A caller should already have checked that the type @t + * is an integer. */ +static bool __btf_type_int_is_regular(const struct btf_type *t, size_t expected_size) +{ + u32 int_data = btf_type_int(t); + u8 nr_bits = BTF_INT_BITS(int_data); + u8 nr_bytes = BITS_ROUNDUP_BYTES(nr_bits); + + return BITS_PER_BYTE_MASKED(nr_bits) == 0 && + BTF_INT_OFFSET(int_data) == 0 && + (nr_bytes <= 16 && is_power_of_2(nr_bytes)) && + (expected_size == 0 || nr_bytes == expected_size); +} + static bool btf_type_int_is_regular(const struct btf_type *t) { - u8 nr_bits, nr_bytes; - u32 int_data; + return __btf_type_int_is_regular(t, 0); +} - int_data = btf_type_int(t); - nr_bits = BTF_INT_BITS(int_data); - nr_bytes = BITS_ROUNDUP_BYTES(nr_bits); - if (BITS_PER_BYTE_MASKED(nr_bits) || - BTF_INT_OFFSET(int_data) || - (nr_bytes != sizeof(u8) && nr_bytes != sizeof(u16) && - nr_bytes != sizeof(u32) && nr_bytes != sizeof(u64) && - nr_bytes != (2 * sizeof(u64)))) { - return false; - } +bool btf_type_is_i32(const struct btf_type *t) +{ + return btf_type_is_int(t) && __btf_type_int_is_regular(t, 4); +} - return true; +bool btf_type_is_i64(const struct btf_type *t) +{ + return btf_type_is_int(t) && __btf_type_int_is_regular(t, 8); } /* @@ -3443,7 +3454,8 @@ btf_find_graph_root(const struct btf *btf, const struct btf_type *pt, node_field_name = strstr(value_type, ":"); if (!node_field_name) return -EINVAL; - value_type = kstrndup(value_type, node_field_name - value_type, GFP_KERNEL | __GFP_NOWARN); + value_type = kstrndup(value_type, node_field_name - value_type, + GFP_KERNEL_ACCOUNT | __GFP_NOWARN); if (!value_type) return -ENOMEM; id = btf_find_by_name_kind(btf, value_type, BTF_KIND_STRUCT); @@ -3958,7 +3970,7 @@ struct btf_record *btf_parse_fields(const struct btf *btf, const struct btf_type /* This needs to be kzalloc to zero out padding and unused fields, see * comment in btf_record_equal. */ - rec = kzalloc(struct_size(rec, fields, cnt), GFP_KERNEL | __GFP_NOWARN); + rec = kzalloc(struct_size(rec, fields, cnt), GFP_KERNEL_ACCOUNT | __GFP_NOWARN); if (!rec) return ERR_PTR(-ENOMEM); @@ -9019,7 +9031,7 @@ static struct bpf_cand_cache *populate_cand_cache(struct bpf_cand_cache *cands, bpf_free_cands_from_cache(*cc); *cc = NULL; } - new_cands = kmemdup(cands, sizeof_cands(cands->cnt), GFP_KERNEL); + new_cands = kmemdup(cands, sizeof_cands(cands->cnt), GFP_KERNEL_ACCOUNT); if (!new_cands) { bpf_free_cands(cands); return ERR_PTR(-ENOMEM); @@ -9027,7 +9039,7 @@ static struct bpf_cand_cache *populate_cand_cache(struct bpf_cand_cache *cands, /* strdup the name, since it will stay in cache. * the cands->name points to strings in prog's BTF and the prog can be unloaded. */ - new_cands->name = kmemdup_nul(cands->name, cands->name_len, GFP_KERNEL); + new_cands->name = kmemdup_nul(cands->name, cands->name_len, GFP_KERNEL_ACCOUNT); bpf_free_cands(cands); if (!new_cands->name) { kfree(new_cands); @@ -9111,7 +9123,7 @@ bpf_core_add_cands(struct bpf_cand_cache *cands, const struct btf *targ_btf, continue; /* most of the time there is only one candidate for a given kind+name pair */ - new_cands = kmalloc(sizeof_cands(cands->cnt + 1), GFP_KERNEL); + new_cands = kmalloc(sizeof_cands(cands->cnt + 1), GFP_KERNEL_ACCOUNT); if (!new_cands) { bpf_free_cands(cands); return ERR_PTR(-ENOMEM); @@ -9228,7 +9240,7 @@ int bpf_core_apply(struct bpf_core_ctx *ctx, const struct bpf_core_relo *relo, /* ~4k of temp memory necessary to convert LLVM spec like "0:1:0:5" * into arrays of btf_ids of struct fields and array indices. */ - specs = kcalloc(3, sizeof(*specs), GFP_KERNEL); + specs = kcalloc(3, sizeof(*specs), GFP_KERNEL_ACCOUNT); if (!specs) return -ENOMEM; @@ -9253,7 +9265,7 @@ int bpf_core_apply(struct bpf_core_ctx *ctx, const struct bpf_core_relo *relo, goto out; } if (cc->cnt) { - cands.cands = kcalloc(cc->cnt, sizeof(*cands.cands), GFP_KERNEL); + cands.cands = kcalloc(cc->cnt, sizeof(*cands.cands), GFP_KERNEL_ACCOUNT); if (!cands.cands) { err = -ENOMEM; goto out; diff --git a/kernel/bpf/cgroup.c b/kernel/bpf/cgroup.c index 9122c39870bf..cd220e861d67 100644 --- a/kernel/bpf/cgroup.c +++ b/kernel/bpf/cgroup.c @@ -658,6 +658,116 @@ static struct bpf_prog_list *find_attach_entry(struct hlist_head *progs, return NULL; } +static struct bpf_link *bpf_get_anchor_link(u32 flags, u32 id_or_fd) +{ + struct bpf_link *link = ERR_PTR(-EINVAL); + + if (flags & BPF_F_ID) + link = bpf_link_by_id(id_or_fd); + else if (id_or_fd) + link = bpf_link_get_from_fd(id_or_fd); + return link; +} + +static struct bpf_prog *bpf_get_anchor_prog(u32 flags, u32 id_or_fd) +{ + struct bpf_prog *prog = ERR_PTR(-EINVAL); + + if (flags & BPF_F_ID) + prog = bpf_prog_by_id(id_or_fd); + else if (id_or_fd) + prog = bpf_prog_get(id_or_fd); + return prog; +} + +static struct bpf_prog_list *get_prog_list(struct hlist_head *progs, struct bpf_prog *prog, + struct bpf_cgroup_link *link, u32 flags, u32 id_or_fd) +{ + bool is_link = flags & BPF_F_LINK, is_id = flags & BPF_F_ID; + struct bpf_prog_list *pltmp, *pl = ERR_PTR(-EINVAL); + bool preorder = flags & BPF_F_PREORDER; + struct bpf_link *anchor_link = NULL; + struct bpf_prog *anchor_prog = NULL; + bool is_before, is_after; + + is_before = flags & BPF_F_BEFORE; + is_after = flags & BPF_F_AFTER; + if (is_link || is_id || id_or_fd) { + /* flags must have either BPF_F_BEFORE or BPF_F_AFTER */ + if (is_before == is_after) + return ERR_PTR(-EINVAL); + if ((is_link && !link) || (!is_link && !prog)) + return ERR_PTR(-EINVAL); + } else if (!hlist_empty(progs)) { + /* flags cannot have both BPF_F_BEFORE and BPF_F_AFTER */ + if (is_before && is_after) + return ERR_PTR(-EINVAL); + } + + if (is_link) { + anchor_link = bpf_get_anchor_link(flags, id_or_fd); + if (IS_ERR(anchor_link)) + return ERR_PTR(PTR_ERR(anchor_link)); + } else if (is_id || id_or_fd) { + anchor_prog = bpf_get_anchor_prog(flags, id_or_fd); + if (IS_ERR(anchor_prog)) + return ERR_PTR(PTR_ERR(anchor_prog)); + } + + if (!anchor_prog && !anchor_link) { + /* if there is no anchor_prog/anchor_link, then BPF_F_PREORDER + * doesn't matter since either prepend or append to a combined + * list of progs will end up with correct result. + */ + hlist_for_each_entry(pltmp, progs, node) { + if (is_before) + return pltmp; + if (pltmp->node.next) + continue; + return pltmp; + } + return NULL; + } + + hlist_for_each_entry(pltmp, progs, node) { + if ((anchor_prog && anchor_prog == pltmp->prog) || + (anchor_link && anchor_link == &pltmp->link->link)) { + if (!!(pltmp->flags & BPF_F_PREORDER) != preorder) + goto out; + pl = pltmp; + goto out; + } + } + + pl = ERR_PTR(-ENOENT); +out: + if (anchor_link) + bpf_link_put(anchor_link); + else + bpf_prog_put(anchor_prog); + return pl; +} + +static int insert_pl_to_hlist(struct bpf_prog_list *pl, struct hlist_head *progs, + struct bpf_prog *prog, struct bpf_cgroup_link *link, + u32 flags, u32 id_or_fd) +{ + struct bpf_prog_list *pltmp; + + pltmp = get_prog_list(progs, prog, link, flags, id_or_fd); + if (IS_ERR(pltmp)) + return PTR_ERR(pltmp); + + if (!pltmp) + hlist_add_head(&pl->node, progs); + else if (flags & BPF_F_BEFORE) + hlist_add_before(&pl->node, &pltmp->node); + else + hlist_add_behind(&pl->node, &pltmp->node); + + return 0; +} + /** * __cgroup_bpf_attach() - Attach the program or the link to a cgroup, and * propagate the change to descendants @@ -667,6 +777,8 @@ static struct bpf_prog_list *find_attach_entry(struct hlist_head *progs, * @replace_prog: Previously attached program to replace if BPF_F_REPLACE is set * @type: Type of attach operation * @flags: Option flags + * @id_or_fd: Relative prog id or fd + * @revision: bpf_prog_list revision * * Exactly one of @prog or @link can be non-null. * Must be called with cgroup_mutex held. @@ -674,7 +786,8 @@ static struct bpf_prog_list *find_attach_entry(struct hlist_head *progs, static int __cgroup_bpf_attach(struct cgroup *cgrp, struct bpf_prog *prog, struct bpf_prog *replace_prog, struct bpf_cgroup_link *link, - enum bpf_attach_type type, u32 flags) + enum bpf_attach_type type, u32 flags, u32 id_or_fd, + u64 revision) { u32 saved_flags = (flags & (BPF_F_ALLOW_OVERRIDE | BPF_F_ALLOW_MULTI)); struct bpf_prog *old_prog = NULL; @@ -690,6 +803,9 @@ static int __cgroup_bpf_attach(struct cgroup *cgrp, ((flags & BPF_F_REPLACE) && !(flags & BPF_F_ALLOW_MULTI))) /* invalid combination */ return -EINVAL; + if ((flags & BPF_F_REPLACE) && (flags & (BPF_F_BEFORE | BPF_F_AFTER))) + /* only either replace or insertion with before/after */ + return -EINVAL; if (link && (prog || replace_prog)) /* only either link or prog/replace_prog can be specified */ return -EINVAL; @@ -700,6 +816,8 @@ static int __cgroup_bpf_attach(struct cgroup *cgrp, atype = bpf_cgroup_atype_find(type, new_prog->aux->attach_btf_id); if (atype < 0) return -EINVAL; + if (revision && revision != cgrp->bpf.revisions[atype]) + return -ESTALE; progs = &cgrp->bpf.progs[atype]; @@ -728,22 +846,18 @@ static int __cgroup_bpf_attach(struct cgroup *cgrp, if (pl) { old_prog = pl->prog; } else { - struct hlist_node *last = NULL; - pl = kmalloc(sizeof(*pl), GFP_KERNEL); if (!pl) { bpf_cgroup_storages_free(new_storage); return -ENOMEM; } - if (hlist_empty(progs)) - hlist_add_head(&pl->node, progs); - else - hlist_for_each(last, progs) { - if (last->next) - continue; - hlist_add_behind(&pl->node, last); - break; - } + + err = insert_pl_to_hlist(pl, progs, prog, link, flags, id_or_fd); + if (err) { + kfree(pl); + bpf_cgroup_storages_free(new_storage); + return err; + } } pl->prog = prog; @@ -762,6 +876,7 @@ static int __cgroup_bpf_attach(struct cgroup *cgrp, if (err) goto cleanup_trampoline; + cgrp->bpf.revisions[atype] += 1; if (old_prog) { if (type == BPF_LSM_CGROUP) bpf_trampoline_unlink_cgroup_shim(old_prog); @@ -793,12 +908,13 @@ static int cgroup_bpf_attach(struct cgroup *cgrp, struct bpf_prog *prog, struct bpf_prog *replace_prog, struct bpf_cgroup_link *link, enum bpf_attach_type type, - u32 flags) + u32 flags, u32 id_or_fd, u64 revision) { int ret; cgroup_lock(); - ret = __cgroup_bpf_attach(cgrp, prog, replace_prog, link, type, flags); + ret = __cgroup_bpf_attach(cgrp, prog, replace_prog, link, type, flags, + id_or_fd, revision); cgroup_unlock(); return ret; } @@ -886,6 +1002,7 @@ static int __cgroup_bpf_replace(struct cgroup *cgrp, if (!found) return -ENOENT; + cgrp->bpf.revisions[atype] += 1; old_prog = xchg(&link->link.prog, new_prog); replace_effective_prog(cgrp, atype, link); bpf_prog_put(old_prog); @@ -1011,12 +1128,14 @@ found: * @prog: A program to detach or NULL * @link: A link to detach or NULL * @type: Type of detach operation + * @revision: bpf_prog_list revision * * At most one of @prog or @link can be non-NULL. * Must be called with cgroup_mutex held. */ static int __cgroup_bpf_detach(struct cgroup *cgrp, struct bpf_prog *prog, - struct bpf_cgroup_link *link, enum bpf_attach_type type) + struct bpf_cgroup_link *link, enum bpf_attach_type type, + u64 revision) { enum cgroup_bpf_attach_type atype; struct bpf_prog *old_prog; @@ -1034,6 +1153,9 @@ static int __cgroup_bpf_detach(struct cgroup *cgrp, struct bpf_prog *prog, if (atype < 0) return -EINVAL; + if (revision && revision != cgrp->bpf.revisions[atype]) + return -ESTALE; + progs = &cgrp->bpf.progs[atype]; flags = cgrp->bpf.flags[atype]; @@ -1059,6 +1181,7 @@ static int __cgroup_bpf_detach(struct cgroup *cgrp, struct bpf_prog *prog, /* now can actually delete it from this cgroup list */ hlist_del(&pl->node); + cgrp->bpf.revisions[atype] += 1; kfree(pl); if (hlist_empty(progs)) @@ -1074,12 +1197,12 @@ static int __cgroup_bpf_detach(struct cgroup *cgrp, struct bpf_prog *prog, } static int cgroup_bpf_detach(struct cgroup *cgrp, struct bpf_prog *prog, - enum bpf_attach_type type) + enum bpf_attach_type type, u64 revision) { int ret; cgroup_lock(); - ret = __cgroup_bpf_detach(cgrp, prog, NULL, type); + ret = __cgroup_bpf_detach(cgrp, prog, NULL, type, revision); cgroup_unlock(); return ret; } @@ -1097,6 +1220,7 @@ static int __cgroup_bpf_query(struct cgroup *cgrp, const union bpf_attr *attr, struct bpf_prog_array *effective; int cnt, ret = 0, i; int total_cnt = 0; + u64 revision = 0; u32 flags; if (effective_query && prog_attach_flags) @@ -1134,6 +1258,10 @@ static int __cgroup_bpf_query(struct cgroup *cgrp, const union bpf_attr *attr, return -EFAULT; if (copy_to_user(&uattr->query.prog_cnt, &total_cnt, sizeof(total_cnt))) return -EFAULT; + if (!effective_query && from_atype == to_atype) + revision = cgrp->bpf.revisions[from_atype]; + if (copy_to_user(&uattr->query.revision, &revision, sizeof(revision))) + return -EFAULT; if (attr->query.prog_cnt == 0 || !prog_ids || !total_cnt) /* return early if user requested only program count + flags */ return 0; @@ -1216,7 +1344,8 @@ int cgroup_bpf_prog_attach(const union bpf_attr *attr, } ret = cgroup_bpf_attach(cgrp, prog, replace_prog, NULL, - attr->attach_type, attr->attach_flags); + attr->attach_type, attr->attach_flags, + attr->relative_fd, attr->expected_revision); if (replace_prog) bpf_prog_put(replace_prog); @@ -1238,7 +1367,7 @@ int cgroup_bpf_prog_detach(const union bpf_attr *attr, enum bpf_prog_type ptype) if (IS_ERR(prog)) prog = NULL; - ret = cgroup_bpf_detach(cgrp, prog, attr->attach_type); + ret = cgroup_bpf_detach(cgrp, prog, attr->attach_type, attr->expected_revision); if (prog) bpf_prog_put(prog); @@ -1267,7 +1396,7 @@ static void bpf_cgroup_link_release(struct bpf_link *link) } WARN_ON(__cgroup_bpf_detach(cg_link->cgroup, NULL, cg_link, - cg_link->type)); + cg_link->type, 0)); if (cg_link->type == BPF_LSM_CGROUP) bpf_trampoline_unlink_cgroup_shim(cg_link->link.prog); @@ -1339,6 +1468,13 @@ static const struct bpf_link_ops bpf_cgroup_link_lops = { .fill_link_info = bpf_cgroup_link_fill_link_info, }; +#define BPF_F_LINK_ATTACH_MASK \ + (BPF_F_ID | \ + BPF_F_BEFORE | \ + BPF_F_AFTER | \ + BPF_F_PREORDER | \ + BPF_F_LINK) + int cgroup_bpf_link_attach(const union bpf_attr *attr, struct bpf_prog *prog) { struct bpf_link_primer link_primer; @@ -1346,7 +1482,7 @@ int cgroup_bpf_link_attach(const union bpf_attr *attr, struct bpf_prog *prog) struct cgroup *cgrp; int err; - if (attr->link_create.flags) + if (attr->link_create.flags & (~BPF_F_LINK_ATTACH_MASK)) return -EINVAL; cgrp = cgroup_get_from_fd(attr->link_create.target_fd); @@ -1370,7 +1506,9 @@ int cgroup_bpf_link_attach(const union bpf_attr *attr, struct bpf_prog *prog) } err = cgroup_bpf_attach(cgrp, NULL, NULL, link, - link->type, BPF_F_ALLOW_MULTI); + link->type, BPF_F_ALLOW_MULTI | attr->link_create.flags, + attr->link_create.cgroup.relative_fd, + attr->link_create.cgroup.expected_revision); if (err) { bpf_link_cleanup(&link_primer); goto out_put_cgroup; @@ -2134,7 +2272,7 @@ static const struct bpf_func_proto bpf_sysctl_get_name_proto = { .gpl_only = false, .ret_type = RET_INTEGER, .arg1_type = ARG_PTR_TO_CTX, - .arg2_type = ARG_PTR_TO_MEM, + .arg2_type = ARG_PTR_TO_MEM | MEM_WRITE, .arg3_type = ARG_CONST_SIZE, .arg4_type = ARG_ANYTHING, }; diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c index c20babbf998f..e536a34a32c8 100644 --- a/kernel/bpf/core.c +++ b/kernel/bpf/core.c @@ -2102,14 +2102,15 @@ out: #undef COND_JMP /* ST, STX and LDX*/ ST_NOSPEC: - /* Speculation barrier for mitigating Speculative Store Bypass. - * In case of arm64, we rely on the firmware mitigation as - * controlled via the ssbd kernel parameter. Whenever the - * mitigation is enabled, it works for all of the kernel code - * with no need to provide any additional instructions here. - * In case of x86, we use 'lfence' insn for mitigation. We - * reuse preexisting logic from Spectre v1 mitigation that - * happens to produce the required code on x86 for v4 as well. + /* Speculation barrier for mitigating Speculative Store Bypass, + * Bounds-Check Bypass and Type Confusion. In case of arm64, we + * rely on the firmware mitigation as controlled via the ssbd + * kernel parameter. Whenever the mitigation is enabled, it + * works for all of the kernel code with no need to provide any + * additional instructions here. In case of x86, we use 'lfence' + * insn for mitigation. We reuse preexisting logic from Spectre + * v1 mitigation that happens to produce the required code on + * x86 for v4 as well. */ barrier_nospec(); CONT; @@ -3034,6 +3035,21 @@ bool __weak bpf_jit_needs_zext(void) return false; } +/* By default, enable the verifier's mitigations against Spectre v1 and v4 for + * all archs. The value returned must not change at runtime as there is + * currently no support for reloading programs that were loaded without + * mitigations. + */ +bool __weak bpf_jit_bypass_spec_v1(void) +{ + return false; +} + +bool __weak bpf_jit_bypass_spec_v4(void) +{ + return false; +} + /* Return true if the JIT inlines the call to the helper corresponding to * the imm. * diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c index 9ff1b4090289..6d051416c184 100644 --- a/kernel/bpf/helpers.c +++ b/kernel/bpf/helpers.c @@ -24,6 +24,7 @@ #include <linux/bpf_mem_alloc.h> #include <linux/kasan.h> #include <linux/bpf_verifier.h> +#include <linux/uaccess.h> #include "../../lib/kstrtox.h" @@ -3278,6 +3279,376 @@ __bpf_kfunc void __bpf_trap(void) { } +/* + * Kfuncs for string operations. + * + * Since strings are not necessarily %NUL-terminated, we cannot directly call + * in-kernel implementations. Instead, we open-code the implementations using + * __get_kernel_nofault instead of plain dereference to make them safe. + */ + +/** + * bpf_strcmp - Compare two strings + * @s1__ign: One string + * @s2__ign: Another string + * + * Return: + * * %0 - Strings are equal + * * %-1 - @s1__ign is smaller + * * %1 - @s2__ign is smaller + * * %-EFAULT - Cannot read one of the strings + * * %-E2BIG - One of strings is too large + * * %-ERANGE - One of strings is outside of kernel address space + */ +__bpf_kfunc int bpf_strcmp(const char *s1__ign, const char *s2__ign) +{ + char c1, c2; + int i; + + if (!copy_from_kernel_nofault_allowed(s1__ign, 1) || + !copy_from_kernel_nofault_allowed(s2__ign, 1)) { + return -ERANGE; + } + + guard(pagefault)(); + for (i = 0; i < XATTR_SIZE_MAX; i++) { + __get_kernel_nofault(&c1, s1__ign, char, err_out); + __get_kernel_nofault(&c2, s2__ign, char, err_out); + if (c1 != c2) + return c1 < c2 ? -1 : 1; + if (c1 == '\0') + return 0; + s1__ign++; + s2__ign++; + } + return -E2BIG; +err_out: + return -EFAULT; +} + +/** + * bpf_strnchr - Find a character in a length limited string + * @s__ign: The string to be searched + * @count: The number of characters to be searched + * @c: The character to search for + * + * Note that the %NUL-terminator is considered part of the string, and can + * be searched for. + * + * Return: + * * >=0 - Index of the first occurrence of @c within @s__ign + * * %-ENOENT - @c not found in the first @count characters of @s__ign + * * %-EFAULT - Cannot read @s__ign + * * %-E2BIG - @s__ign is too large + * * %-ERANGE - @s__ign is outside of kernel address space + */ +__bpf_kfunc int bpf_strnchr(const char *s__ign, size_t count, char c) +{ + char sc; + int i; + + if (!copy_from_kernel_nofault_allowed(s__ign, 1)) + return -ERANGE; + + guard(pagefault)(); + for (i = 0; i < count && i < XATTR_SIZE_MAX; i++) { + __get_kernel_nofault(&sc, s__ign, char, err_out); + if (sc == c) + return i; + if (sc == '\0') + return -ENOENT; + s__ign++; + } + return i == XATTR_SIZE_MAX ? -E2BIG : -ENOENT; +err_out: + return -EFAULT; +} + +/** + * bpf_strchr - Find the first occurrence of a character in a string + * @s__ign: The string to be searched + * @c: The character to search for + * + * Note that the %NUL-terminator is considered part of the string, and can + * be searched for. + * + * Return: + * * >=0 - The index of the first occurrence of @c within @s__ign + * * %-ENOENT - @c not found in @s__ign + * * %-EFAULT - Cannot read @s__ign + * * %-E2BIG - @s__ign is too large + * * %-ERANGE - @s__ign is outside of kernel address space + */ +__bpf_kfunc int bpf_strchr(const char *s__ign, char c) +{ + return bpf_strnchr(s__ign, XATTR_SIZE_MAX, c); +} + +/** + * bpf_strchrnul - Find and return a character in a string, or end of string + * @s__ign: The string to be searched + * @c: The character to search for + * + * Return: + * * >=0 - Index of the first occurrence of @c within @s__ign or index of + * the null byte at the end of @s__ign when @c is not found + * * %-EFAULT - Cannot read @s__ign + * * %-E2BIG - @s__ign is too large + * * %-ERANGE - @s__ign is outside of kernel address space + */ +__bpf_kfunc int bpf_strchrnul(const char *s__ign, char c) +{ + char sc; + int i; + + if (!copy_from_kernel_nofault_allowed(s__ign, 1)) + return -ERANGE; + + guard(pagefault)(); + for (i = 0; i < XATTR_SIZE_MAX; i++) { + __get_kernel_nofault(&sc, s__ign, char, err_out); + if (sc == '\0' || sc == c) + return i; + s__ign++; + } + return -E2BIG; +err_out: + return -EFAULT; +} + +/** + * bpf_strrchr - Find the last occurrence of a character in a string + * @s__ign: The string to be searched + * @c: The character to search for + * + * Return: + * * >=0 - Index of the last occurrence of @c within @s__ign + * * %-ENOENT - @c not found in @s__ign + * * %-EFAULT - Cannot read @s__ign + * * %-E2BIG - @s__ign is too large + * * %-ERANGE - @s__ign is outside of kernel address space + */ +__bpf_kfunc int bpf_strrchr(const char *s__ign, int c) +{ + char sc; + int i, last = -ENOENT; + + if (!copy_from_kernel_nofault_allowed(s__ign, 1)) + return -ERANGE; + + guard(pagefault)(); + for (i = 0; i < XATTR_SIZE_MAX; i++) { + __get_kernel_nofault(&sc, s__ign, char, err_out); + if (sc == c) + last = i; + if (sc == '\0') + return last; + s__ign++; + } + return -E2BIG; +err_out: + return -EFAULT; +} + +/** + * bpf_strlen - Calculate the length of a length-limited string + * @s__ign: The string + * @count: The maximum number of characters to count + * + * Return: + * * >=0 - The length of @s__ign + * * %-EFAULT - Cannot read @s__ign + * * %-E2BIG - @s__ign is too large + * * %-ERANGE - @s__ign is outside of kernel address space + */ +__bpf_kfunc int bpf_strnlen(const char *s__ign, size_t count) +{ + char c; + int i; + + if (!copy_from_kernel_nofault_allowed(s__ign, 1)) + return -ERANGE; + + guard(pagefault)(); + for (i = 0; i < count && i < XATTR_SIZE_MAX; i++) { + __get_kernel_nofault(&c, s__ign, char, err_out); + if (c == '\0') + return i; + s__ign++; + } + return i == XATTR_SIZE_MAX ? -E2BIG : i; +err_out: + return -EFAULT; +} + +/** + * bpf_strlen - Calculate the length of a string + * @s__ign: The string + * + * Return: + * * >=0 - The length of @s__ign + * * %-EFAULT - Cannot read @s__ign + * * %-E2BIG - @s__ign is too large + * * %-ERANGE - @s__ign is outside of kernel address space + */ +__bpf_kfunc int bpf_strlen(const char *s__ign) +{ + return bpf_strnlen(s__ign, XATTR_SIZE_MAX); +} + +/** + * bpf_strspn - Calculate the length of the initial substring of @s__ign which + * only contains letters in @accept__ign + * @s__ign: The string to be searched + * @accept__ign: The string to search for + * + * Return: + * * >=0 - The length of the initial substring of @s__ign which only + * contains letters from @accept__ign + * * %-EFAULT - Cannot read one of the strings + * * %-E2BIG - One of the strings is too large + * * %-ERANGE - One of the strings is outside of kernel address space + */ +__bpf_kfunc int bpf_strspn(const char *s__ign, const char *accept__ign) +{ + char cs, ca; + int i, j; + + if (!copy_from_kernel_nofault_allowed(s__ign, 1) || + !copy_from_kernel_nofault_allowed(accept__ign, 1)) { + return -ERANGE; + } + + guard(pagefault)(); + for (i = 0; i < XATTR_SIZE_MAX; i++) { + __get_kernel_nofault(&cs, s__ign, char, err_out); + if (cs == '\0') + return i; + for (j = 0; j < XATTR_SIZE_MAX; j++) { + __get_kernel_nofault(&ca, accept__ign + j, char, err_out); + if (cs == ca || ca == '\0') + break; + } + if (j == XATTR_SIZE_MAX) + return -E2BIG; + if (ca == '\0') + return i; + s__ign++; + } + return -E2BIG; +err_out: + return -EFAULT; +} + +/** + * strcspn - Calculate the length of the initial substring of @s__ign which + * does not contain letters in @reject__ign + * @s__ign: The string to be searched + * @reject__ign: The string to search for + * + * Return: + * * >=0 - The length of the initial substring of @s__ign which does not + * contain letters from @reject__ign + * * %-EFAULT - Cannot read one of the strings + * * %-E2BIG - One of the strings is too large + * * %-ERANGE - One of the strings is outside of kernel address space + */ +__bpf_kfunc int bpf_strcspn(const char *s__ign, const char *reject__ign) +{ + char cs, cr; + int i, j; + + if (!copy_from_kernel_nofault_allowed(s__ign, 1) || + !copy_from_kernel_nofault_allowed(reject__ign, 1)) { + return -ERANGE; + } + + guard(pagefault)(); + for (i = 0; i < XATTR_SIZE_MAX; i++) { + __get_kernel_nofault(&cs, s__ign, char, err_out); + if (cs == '\0') + return i; + for (j = 0; j < XATTR_SIZE_MAX; j++) { + __get_kernel_nofault(&cr, reject__ign + j, char, err_out); + if (cs == cr || cr == '\0') + break; + } + if (j == XATTR_SIZE_MAX) + return -E2BIG; + if (cr != '\0') + return i; + s__ign++; + } + return -E2BIG; +err_out: + return -EFAULT; +} + +/** + * bpf_strnstr - Find the first substring in a length-limited string + * @s1__ign: The string to be searched + * @s2__ign: The string to search for + * @len: the maximum number of characters to search + * + * Return: + * * >=0 - Index of the first character of the first occurrence of @s2__ign + * within the first @len characters of @s1__ign + * * %-ENOENT - @s2__ign not found in the first @len characters of @s1__ign + * * %-EFAULT - Cannot read one of the strings + * * %-E2BIG - One of the strings is too large + * * %-ERANGE - One of the strings is outside of kernel address space + */ +__bpf_kfunc int bpf_strnstr(const char *s1__ign, const char *s2__ign, size_t len) +{ + char c1, c2; + int i, j; + + if (!copy_from_kernel_nofault_allowed(s1__ign, 1) || + !copy_from_kernel_nofault_allowed(s2__ign, 1)) { + return -ERANGE; + } + + guard(pagefault)(); + for (i = 0; i < XATTR_SIZE_MAX; i++) { + for (j = 0; i + j < len && j < XATTR_SIZE_MAX; j++) { + __get_kernel_nofault(&c2, s2__ign + j, char, err_out); + if (c2 == '\0') + return i; + __get_kernel_nofault(&c1, s1__ign + j, char, err_out); + if (c1 == '\0') + return -ENOENT; + if (c1 != c2) + break; + } + if (j == XATTR_SIZE_MAX) + return -E2BIG; + if (i + j == len) + return -ENOENT; + s1__ign++; + } + return -E2BIG; +err_out: + return -EFAULT; +} + +/** + * bpf_strstr - Find the first substring in a string + * @s1__ign: The string to be searched + * @s2__ign: The string to search for + * + * Return: + * * >=0 - Index of the first character of the first occurrence of @s2__ign + * within @s1__ign + * * %-ENOENT - @s2__ign is not a substring of @s1__ign + * * %-EFAULT - Cannot read one of the strings + * * %-E2BIG - One of the strings is too large + * * %-ERANGE - One of the strings is outside of kernel address space + */ +__bpf_kfunc int bpf_strstr(const char *s1__ign, const char *s2__ign) +{ + return bpf_strnstr(s1__ign, s2__ign, XATTR_SIZE_MAX); +} + __bpf_kfunc_end_defs(); BTF_KFUNCS_START(generic_btf_ids) @@ -3397,6 +3768,17 @@ BTF_ID_FLAGS(func, bpf_iter_dmabuf_next, KF_ITER_NEXT | KF_RET_NULL | KF_SLEEPAB BTF_ID_FLAGS(func, bpf_iter_dmabuf_destroy, KF_ITER_DESTROY | KF_SLEEPABLE) #endif BTF_ID_FLAGS(func, __bpf_trap) +BTF_ID_FLAGS(func, bpf_strcmp); +BTF_ID_FLAGS(func, bpf_strchr); +BTF_ID_FLAGS(func, bpf_strchrnul); +BTF_ID_FLAGS(func, bpf_strnchr); +BTF_ID_FLAGS(func, bpf_strrchr); +BTF_ID_FLAGS(func, bpf_strlen); +BTF_ID_FLAGS(func, bpf_strnlen); +BTF_ID_FLAGS(func, bpf_strspn); +BTF_ID_FLAGS(func, bpf_strcspn); +BTF_ID_FLAGS(func, bpf_strstr); +BTF_ID_FLAGS(func, bpf_strnstr); #ifdef CONFIG_CGROUPS BTF_ID_FLAGS(func, bpf_cgroup_read_xattr, KF_RCU) #endif diff --git a/kernel/bpf/local_storage.c b/kernel/bpf/local_storage.c index 3969eb0382af..632d51b05fe9 100644 --- a/kernel/bpf/local_storage.c +++ b/kernel/bpf/local_storage.c @@ -394,17 +394,10 @@ static int cgroup_storage_check_btf(const struct bpf_map *map, if (!btf_member_is_reg_int(btf, key_type, m, offset, size)) return -EINVAL; } else { - u32 int_data; - /* * Key is expected to be u64, which stores the cgroup_inode_id */ - - if (BTF_INFO_KIND(key_type->info) != BTF_KIND_INT) - return -EINVAL; - - int_data = *(u32 *)(key_type + 1); - if (BTF_INT_BITS(int_data) != 64 || BTF_INT_OFFSET(int_data)) + if (!btf_type_is_i64(key_type)) return -EINVAL; } diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index dd5304c6ac3c..56500381c28a 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -3403,10 +3403,12 @@ static void bpf_tracing_link_show_fdinfo(const struct bpf_link *link, seq_printf(seq, "attach_type:\t%d\n" "target_obj_id:\t%u\n" - "target_btf_id:\t%u\n", + "target_btf_id:\t%u\n" + "cookie:\t%llu\n", tr_link->attach_type, target_obj_id, - target_btf_id); + target_btf_id, + tr_link->link.cookie); } static int bpf_tracing_link_fill_link_info(const struct bpf_link *link, @@ -3416,6 +3418,7 @@ static int bpf_tracing_link_fill_link_info(const struct bpf_link *link, container_of(link, struct bpf_tracing_link, link.link); info->tracing.attach_type = tr_link->attach_type; + info->tracing.cookie = tr_link->link.cookie; bpf_trampoline_unpack_key(tr_link->trampoline->key, &info->tracing.target_obj_id, &info->tracing.target_btf_id); @@ -3651,8 +3654,10 @@ static void bpf_raw_tp_link_show_fdinfo(const struct bpf_link *link, container_of(link, struct bpf_raw_tp_link, link); seq_printf(seq, - "tp_name:\t%s\n", - raw_tp_link->btp->tp->name); + "tp_name:\t%s\n" + "cookie:\t%llu\n", + raw_tp_link->btp->tp->name, + raw_tp_link->cookie); } static int bpf_copy_to_user(char __user *ubuf, const char *buf, u32 ulen, @@ -3688,6 +3693,7 @@ static int bpf_raw_tp_link_fill_link_info(const struct bpf_link *link, return -EINVAL; info->raw_tracepoint.tp_name_len = tp_len + 1; + info->raw_tracepoint.cookie = raw_tp_link->cookie; if (!ubuf) return 0; @@ -3794,6 +3800,32 @@ static int bpf_perf_link_fill_kprobe(const struct perf_event *event, info->perf_event.kprobe.cookie = event->bpf_cookie; return 0; } + +static void bpf_perf_link_fdinfo_kprobe(const struct perf_event *event, + struct seq_file *seq) +{ + const char *name; + int err; + u32 prog_id, type; + u64 offset, addr; + unsigned long missed; + + err = bpf_get_perf_event_info(event, &prog_id, &type, &name, + &offset, &addr, &missed); + if (err) + return; + + seq_printf(seq, + "name:\t%s\n" + "offset:\t%#llx\n" + "missed:\t%lu\n" + "addr:\t%#llx\n" + "event_type:\t%s\n" + "cookie:\t%llu\n", + name, offset, missed, addr, + type == BPF_FD_TYPE_KRETPROBE ? "kretprobe" : "kprobe", + event->bpf_cookie); +} #endif #ifdef CONFIG_UPROBE_EVENTS @@ -3822,6 +3854,31 @@ static int bpf_perf_link_fill_uprobe(const struct perf_event *event, info->perf_event.uprobe.ref_ctr_offset = ref_ctr_offset; return 0; } + +static void bpf_perf_link_fdinfo_uprobe(const struct perf_event *event, + struct seq_file *seq) +{ + const char *name; + int err; + u32 prog_id, type; + u64 offset, ref_ctr_offset; + unsigned long missed; + + err = bpf_get_perf_event_info(event, &prog_id, &type, &name, + &offset, &ref_ctr_offset, &missed); + if (err) + return; + + seq_printf(seq, + "name:\t%s\n" + "offset:\t%#llx\n" + "ref_ctr_offset:\t%#llx\n" + "event_type:\t%s\n" + "cookie:\t%llu\n", + name, offset, ref_ctr_offset, + type == BPF_FD_TYPE_URETPROBE ? "uretprobe" : "uprobe", + event->bpf_cookie); +} #endif static int bpf_perf_link_fill_probe(const struct perf_event *event, @@ -3890,10 +3947,79 @@ static int bpf_perf_link_fill_link_info(const struct bpf_link *link, } } +static void bpf_perf_event_link_show_fdinfo(const struct perf_event *event, + struct seq_file *seq) +{ + seq_printf(seq, + "type:\t%u\n" + "config:\t%llu\n" + "event_type:\t%s\n" + "cookie:\t%llu\n", + event->attr.type, event->attr.config, + "event", event->bpf_cookie); +} + +static void bpf_tracepoint_link_show_fdinfo(const struct perf_event *event, + struct seq_file *seq) +{ + int err; + const char *name; + u32 prog_id; + + err = bpf_get_perf_event_info(event, &prog_id, NULL, &name, NULL, + NULL, NULL); + if (err) + return; + + seq_printf(seq, + "tp_name:\t%s\n" + "event_type:\t%s\n" + "cookie:\t%llu\n", + name, "tracepoint", event->bpf_cookie); +} + +static void bpf_probe_link_show_fdinfo(const struct perf_event *event, + struct seq_file *seq) +{ +#ifdef CONFIG_KPROBE_EVENTS + if (event->tp_event->flags & TRACE_EVENT_FL_KPROBE) + return bpf_perf_link_fdinfo_kprobe(event, seq); +#endif + +#ifdef CONFIG_UPROBE_EVENTS + if (event->tp_event->flags & TRACE_EVENT_FL_UPROBE) + return bpf_perf_link_fdinfo_uprobe(event, seq); +#endif +} + +static void bpf_perf_link_show_fdinfo(const struct bpf_link *link, + struct seq_file *seq) +{ + struct bpf_perf_link *perf_link; + const struct perf_event *event; + + perf_link = container_of(link, struct bpf_perf_link, link); + event = perf_get_event(perf_link->perf_file); + if (IS_ERR(event)) + return; + + switch (event->prog->type) { + case BPF_PROG_TYPE_PERF_EVENT: + return bpf_perf_event_link_show_fdinfo(event, seq); + case BPF_PROG_TYPE_TRACEPOINT: + return bpf_tracepoint_link_show_fdinfo(event, seq); + case BPF_PROG_TYPE_KPROBE: + return bpf_probe_link_show_fdinfo(event, seq); + default: + return; + } +} + static const struct bpf_link_ops bpf_perf_link_lops = { .release = bpf_perf_link_release, .dealloc = bpf_perf_link_dealloc, .fill_link_info = bpf_perf_link_fill_link_info, + .show_fdinfo = bpf_perf_link_show_fdinfo, }; static int bpf_perf_link_attach(const union bpf_attr *attr, struct bpf_prog *prog) @@ -4185,6 +4311,25 @@ static int bpf_prog_attach_check_attach_type(const struct bpf_prog *prog, } } +static bool is_cgroup_prog_type(enum bpf_prog_type ptype, enum bpf_attach_type atype, + bool check_atype) +{ + switch (ptype) { + case BPF_PROG_TYPE_CGROUP_DEVICE: + case BPF_PROG_TYPE_CGROUP_SKB: + case BPF_PROG_TYPE_CGROUP_SOCK: + case BPF_PROG_TYPE_CGROUP_SOCK_ADDR: + case BPF_PROG_TYPE_CGROUP_SOCKOPT: + case BPF_PROG_TYPE_CGROUP_SYSCTL: + case BPF_PROG_TYPE_SOCK_OPS: + return true; + case BPF_PROG_TYPE_LSM: + return check_atype ? atype == BPF_LSM_CGROUP : true; + default: + return false; + } +} + #define BPF_PROG_ATTACH_LAST_FIELD expected_revision #define BPF_F_ATTACH_MASK_BASE \ @@ -4215,6 +4360,9 @@ static int bpf_prog_attach(const union bpf_attr *attr) if (bpf_mprog_supported(ptype)) { if (attr->attach_flags & ~BPF_F_ATTACH_MASK_MPROG) return -EINVAL; + } else if (is_cgroup_prog_type(ptype, 0, false)) { + if (attr->attach_flags & ~(BPF_F_ATTACH_MASK_BASE | BPF_F_ATTACH_MASK_MPROG)) + return -EINVAL; } else { if (attr->attach_flags & ~BPF_F_ATTACH_MASK_BASE) return -EINVAL; @@ -4232,6 +4380,11 @@ static int bpf_prog_attach(const union bpf_attr *attr) return -EINVAL; } + if (is_cgroup_prog_type(ptype, prog->expected_attach_type, true)) { + ret = cgroup_bpf_prog_attach(attr, ptype, prog); + goto out; + } + switch (ptype) { case BPF_PROG_TYPE_SK_SKB: case BPF_PROG_TYPE_SK_MSG: @@ -4243,20 +4396,6 @@ static int bpf_prog_attach(const union bpf_attr *attr) case BPF_PROG_TYPE_FLOW_DISSECTOR: ret = netns_bpf_prog_attach(attr, prog); break; - case BPF_PROG_TYPE_CGROUP_DEVICE: - case BPF_PROG_TYPE_CGROUP_SKB: - case BPF_PROG_TYPE_CGROUP_SOCK: - case BPF_PROG_TYPE_CGROUP_SOCK_ADDR: - case BPF_PROG_TYPE_CGROUP_SOCKOPT: - case BPF_PROG_TYPE_CGROUP_SYSCTL: - case BPF_PROG_TYPE_SOCK_OPS: - case BPF_PROG_TYPE_LSM: - if (ptype == BPF_PROG_TYPE_LSM && - prog->expected_attach_type != BPF_LSM_CGROUP) - ret = -EINVAL; - else - ret = cgroup_bpf_prog_attach(attr, ptype, prog); - break; case BPF_PROG_TYPE_SCHED_CLS: if (attr->attach_type == BPF_TCX_INGRESS || attr->attach_type == BPF_TCX_EGRESS) @@ -4267,7 +4406,7 @@ static int bpf_prog_attach(const union bpf_attr *attr) default: ret = -EINVAL; } - +out: if (ret) bpf_prog_put(prog); return ret; @@ -4295,6 +4434,9 @@ static int bpf_prog_detach(const union bpf_attr *attr) if (IS_ERR(prog)) return PTR_ERR(prog); } + } else if (is_cgroup_prog_type(ptype, 0, false)) { + if (attr->attach_flags || attr->relative_fd) + return -EINVAL; } else if (attr->attach_flags || attr->relative_fd || attr->expected_revision) { diff --git a/kernel/bpf/tnum.c b/kernel/bpf/tnum.c index 9dbc31b25e3d..fa353c5d550f 100644 --- a/kernel/bpf/tnum.c +++ b/kernel/bpf/tnum.c @@ -83,6 +83,11 @@ struct tnum tnum_sub(struct tnum a, struct tnum b) return TNUM(dv & ~mu, mu); } +struct tnum tnum_neg(struct tnum a) +{ + return tnum_sub(TNUM(0, 0), a); +} + struct tnum tnum_and(struct tnum a, struct tnum b) { u64 alpha, beta, v; diff --git a/kernel/bpf/trampoline.c b/kernel/bpf/trampoline.c index c4b1a98ff726..b1e358c16eeb 100644 --- a/kernel/bpf/trampoline.c +++ b/kernel/bpf/trampoline.c @@ -911,27 +911,32 @@ static u64 notrace __bpf_prog_enter_recur(struct bpf_prog *prog, struct bpf_tram return bpf_prog_start_time(); } -static void notrace update_prog_stats(struct bpf_prog *prog, - u64 start) +static void notrace __update_prog_stats(struct bpf_prog *prog, u64 start) { struct bpf_prog_stats *stats; + unsigned long flags; + u64 duration; - if (static_branch_unlikely(&bpf_stats_enabled_key) && - /* static_key could be enabled in __bpf_prog_enter* - * and disabled in __bpf_prog_exit*. - * And vice versa. - * Hence check that 'start' is valid. - */ - start > NO_START_TIME) { - u64 duration = sched_clock() - start; - unsigned long flags; - - stats = this_cpu_ptr(prog->stats); - flags = u64_stats_update_begin_irqsave(&stats->syncp); - u64_stats_inc(&stats->cnt); - u64_stats_add(&stats->nsecs, duration); - u64_stats_update_end_irqrestore(&stats->syncp, flags); - } + /* + * static_key could be enabled in __bpf_prog_enter* and disabled in + * __bpf_prog_exit*. And vice versa. Check that 'start' is valid. + */ + if (start <= NO_START_TIME) + return; + + duration = sched_clock() - start; + stats = this_cpu_ptr(prog->stats); + flags = u64_stats_update_begin_irqsave(&stats->syncp); + u64_stats_inc(&stats->cnt); + u64_stats_add(&stats->nsecs, duration); + u64_stats_update_end_irqrestore(&stats->syncp, flags); +} + +static __always_inline void notrace update_prog_stats(struct bpf_prog *prog, + u64 start) +{ + if (static_branch_unlikely(&bpf_stats_enabled_key)) + __update_prog_stats(prog, start); } static void notrace __bpf_prog_exit_recur(struct bpf_prog *prog, u64 start, diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index db01bf51c792..90e688f81a48 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -44,6 +44,11 @@ static const struct bpf_verifier_ops * const bpf_verifier_ops[] = { #undef BPF_LINK_TYPE }; +enum bpf_features { + BPF_FEAT_RDONLY_CAST_TO_VOID = 0, + __MAX_BPF_FEAT, +}; + struct bpf_mem_alloc bpf_global_percpu_ma; static bool bpf_global_percpu_ma_set; @@ -405,7 +410,8 @@ static bool reg_not_null(const struct bpf_reg_state *reg) type == PTR_TO_MAP_KEY || type == PTR_TO_SOCK_COMMON || (type == PTR_TO_BTF_ID && is_trusted_reg(reg)) || - type == PTR_TO_MEM; + type == PTR_TO_MEM || + type == CONST_PTR_TO_MAP; } static struct btf_record *reg_btf_record(const struct bpf_reg_state *reg) @@ -1403,7 +1409,7 @@ static void *realloc_array(void *arr, size_t old_n, size_t new_n, size_t size) goto out; alloc_size = kmalloc_size_roundup(size_mul(new_n, size)); - new_arr = krealloc(arr, alloc_size, GFP_KERNEL); + new_arr = krealloc(arr, alloc_size, GFP_KERNEL_ACCOUNT); if (!new_arr) { kfree(arr); return NULL; @@ -1420,7 +1426,7 @@ out: static int copy_reference_state(struct bpf_verifier_state *dst, const struct bpf_verifier_state *src) { dst->refs = copy_array(dst->refs, src->refs, src->acquired_refs, - sizeof(struct bpf_reference_state), GFP_KERNEL); + sizeof(struct bpf_reference_state), GFP_KERNEL_ACCOUNT); if (!dst->refs) return -ENOMEM; @@ -1439,7 +1445,7 @@ static int copy_stack_state(struct bpf_func_state *dst, const struct bpf_func_st size_t n = src->allocated_stack / BPF_REG_SIZE; dst->stack = copy_array(dst->stack, src->stack, n, sizeof(struct bpf_stack_state), - GFP_KERNEL); + GFP_KERNEL_ACCOUNT); if (!dst->stack) return -ENOMEM; @@ -1647,7 +1653,7 @@ static void update_peak_states(struct bpf_verifier_env *env) { u32 cur_states; - cur_states = env->explored_states_size + env->free_list_size; + cur_states = env->explored_states_size + env->free_list_size + env->num_backedges; env->peak_states = max(env->peak_states, cur_states); } @@ -1659,6 +1665,13 @@ static void free_func_state(struct bpf_func_state *state) kfree(state); } +static void clear_jmp_history(struct bpf_verifier_state *state) +{ + kfree(state->jmp_history); + state->jmp_history = NULL; + state->jmp_history_cnt = 0; +} + static void free_verifier_state(struct bpf_verifier_state *state, bool free_self) { @@ -1669,11 +1682,12 @@ static void free_verifier_state(struct bpf_verifier_state *state, state->frame[i] = NULL; } kfree(state->refs); + clear_jmp_history(state); if (free_self) kfree(state); } -/* struct bpf_verifier_state->{parent,loop_entry} refer to states +/* struct bpf_verifier_state->parent refers to states * that are in either of env->{expored_states,free_list}. * In both cases the state is contained in struct bpf_verifier_state_list. */ @@ -1684,37 +1698,24 @@ static struct bpf_verifier_state_list *state_parent_as_list(struct bpf_verifier_ return NULL; } -static struct bpf_verifier_state_list *state_loop_entry_as_list(struct bpf_verifier_state *st) -{ - if (st->loop_entry) - return container_of(st->loop_entry, struct bpf_verifier_state_list, state); - return NULL; -} +static bool incomplete_read_marks(struct bpf_verifier_env *env, + struct bpf_verifier_state *st); /* A state can be freed if it is no longer referenced: * - is in the env->free_list; * - has no children states; - * - is not used as loop_entry. - * - * Freeing a state can make it's loop_entry free-able. */ static void maybe_free_verifier_state(struct bpf_verifier_env *env, struct bpf_verifier_state_list *sl) { - struct bpf_verifier_state_list *loop_entry_sl; - - while (sl && sl->in_free_list && - sl->state.branches == 0 && - sl->state.used_as_loop_entry == 0) { - loop_entry_sl = state_loop_entry_as_list(&sl->state); - if (loop_entry_sl) - loop_entry_sl->state.used_as_loop_entry--; - list_del(&sl->node); - free_verifier_state(&sl->state, false); - kfree(sl); - env->free_list_size--; - sl = loop_entry_sl; - } + if (!sl->in_free_list + || sl->state.branches != 0 + || incomplete_read_marks(env, &sl->state)) + return; + list_del(&sl->node); + free_verifier_state(&sl->state, false); + kfree(sl); + env->free_list_size--; } /* copy verifier state from src to dst growing dst stack space @@ -1733,6 +1734,13 @@ static int copy_verifier_state(struct bpf_verifier_state *dst_state, struct bpf_func_state *dst; int i, err; + dst_state->jmp_history = copy_array(dst_state->jmp_history, src->jmp_history, + src->jmp_history_cnt, sizeof(*dst_state->jmp_history), + GFP_KERNEL_ACCOUNT); + if (!dst_state->jmp_history) + return -ENOMEM; + dst_state->jmp_history_cnt = src->jmp_history_cnt; + /* if dst has more stack frames then src frame, free them, this is also * necessary in case of exceptional exits using bpf_throw. */ @@ -1750,17 +1758,14 @@ static int copy_verifier_state(struct bpf_verifier_state *dst_state, dst_state->parent = src->parent; dst_state->first_insn_idx = src->first_insn_idx; dst_state->last_insn_idx = src->last_insn_idx; - dst_state->insn_hist_start = src->insn_hist_start; - dst_state->insn_hist_end = src->insn_hist_end; dst_state->dfs_depth = src->dfs_depth; dst_state->callback_unroll_depth = src->callback_unroll_depth; - dst_state->used_as_loop_entry = src->used_as_loop_entry; dst_state->may_goto_depth = src->may_goto_depth; - dst_state->loop_entry = src->loop_entry; + dst_state->equal_state = src->equal_state; for (i = 0; i <= src->curframe; i++) { dst = dst_state->frame[i]; if (!dst) { - dst = kzalloc(sizeof(*dst), GFP_KERNEL); + dst = kzalloc(sizeof(*dst), GFP_KERNEL_ACCOUNT); if (!dst) return -ENOMEM; dst_state->frame[i] = dst; @@ -1799,173 +1804,232 @@ static bool same_callsites(struct bpf_verifier_state *a, struct bpf_verifier_sta return true; } -/* Open coded iterators allow back-edges in the state graph in order to - * check unbounded loops that iterators. - * - * In is_state_visited() it is necessary to know if explored states are - * part of some loops in order to decide whether non-exact states - * comparison could be used: - * - non-exact states comparison establishes sub-state relation and uses - * read and precision marks to do so, these marks are propagated from - * children states and thus are not guaranteed to be final in a loop; - * - exact states comparison just checks if current and explored states - * are identical (and thus form a back-edge). - * - * Paper "A New Algorithm for Identifying Loops in Decompilation" - * by Tao Wei, Jian Mao, Wei Zou and Yu Chen [1] presents a convenient - * algorithm for loop structure detection and gives an overview of - * relevant terminology. It also has helpful illustrations. - * - * [1] https://api.semanticscholar.org/CorpusID:15784067 - * - * We use a similar algorithm but because loop nested structure is - * irrelevant for verifier ours is significantly simpler and resembles - * strongly connected components algorithm from Sedgewick's textbook. - * - * Define topmost loop entry as a first node of the loop traversed in a - * depth first search starting from initial state. The goal of the loop - * tracking algorithm is to associate topmost loop entries with states - * derived from these entries. - * - * For each step in the DFS states traversal algorithm needs to identify - * the following situations: - * - * initial initial initial - * | | | - * V V V - * ... ... .---------> hdr - * | | | | - * V V | V - * cur .-> succ | .------... - * | | | | | | - * V | V | V V - * succ '-- cur | ... ... - * | | | - * | V V - * | succ <- cur - * | | - * | V - * | ... - * | | - * '----' - * - * (A) successor state of cur (B) successor state of cur or it's entry - * not yet traversed are in current DFS path, thus cur and succ - * are members of the same outermost loop - * - * initial initial - * | | - * V V - * ... ... - * | | - * V V - * .------... .------... - * | | | | - * V V V V - * .-> hdr ... ... ... - * | | | | | - * | V V V V - * | succ <- cur succ <- cur - * | | | - * | V V - * | ... ... - * | | | - * '----' exit - * - * (C) successor state of cur is a part of some loop but this loop - * does not include cur or successor state is not in a loop at all. - * - * Algorithm could be described as the following python code: - * - * traversed = set() # Set of traversed nodes - * entries = {} # Mapping from node to loop entry - * depths = {} # Depth level assigned to graph node - * path = set() # Current DFS path - * - * # Find outermost loop entry known for n - * def get_loop_entry(n): - * h = entries.get(n, None) - * while h in entries: - * h = entries[h] - * return h - * - * # Update n's loop entry if h comes before n in current DFS path. - * def update_loop_entry(n, h): - * if h in path and depths[entries.get(n, n)] < depths[n]: - * entries[n] = h1 +/* Return IP for a given frame in a call stack */ +static u32 frame_insn_idx(struct bpf_verifier_state *st, u32 frame) +{ + return frame == st->curframe + ? st->insn_idx + : st->frame[frame + 1]->callsite; +} + +/* For state @st look for a topmost frame with frame_insn_idx() in some SCC, + * if such frame exists form a corresponding @callchain as an array of + * call sites leading to this frame and SCC id. + * E.g.: * - * def dfs(n, depth): - * traversed.add(n) - * path.add(n) - * depths[n] = depth - * for succ in G.successors(n): - * if succ not in traversed: - * # Case A: explore succ and update cur's loop entry - * # only if succ's entry is in current DFS path. - * dfs(succ, depth + 1) - * h = entries.get(succ, None) - * update_loop_entry(n, h) - * else: - * # Case B or C depending on `h1 in path` check in update_loop_entry(). - * update_loop_entry(n, succ) - * path.remove(n) + * void foo() { A: loop {... SCC#1 ...}; } + * void bar() { B: loop { C: foo(); ... SCC#2 ... } + * D: loop { E: foo(); ... SCC#3 ... } } + * void main() { F: bar(); } * - * To adapt this algorithm for use with verifier: - * - use st->branch == 0 as a signal that DFS of succ had been finished - * and cur's loop entry has to be updated (case A), handle this in - * update_branch_counts(); - * - use st->branch > 0 as a signal that st is in the current DFS path; - * - handle cases B and C in is_state_visited(). + * @callchain at (A) would be either (F,SCC#2) or (F,SCC#3) depending + * on @st frame call sites being (F,C,A) or (F,E,A). */ -static struct bpf_verifier_state *get_loop_entry(struct bpf_verifier_env *env, - struct bpf_verifier_state *st) +static bool compute_scc_callchain(struct bpf_verifier_env *env, + struct bpf_verifier_state *st, + struct bpf_scc_callchain *callchain) { - struct bpf_verifier_state *topmost = st->loop_entry; - u32 steps = 0; + u32 i, scc, insn_idx; - while (topmost && topmost->loop_entry) { - if (verifier_bug_if(steps++ > st->dfs_depth, env, "infinite loop")) - return ERR_PTR(-EFAULT); - topmost = topmost->loop_entry; + memset(callchain, 0, sizeof(*callchain)); + for (i = 0; i <= st->curframe; i++) { + insn_idx = frame_insn_idx(st, i); + scc = env->insn_aux_data[insn_idx].scc; + if (scc) { + callchain->scc = scc; + break; + } else if (i < st->curframe) { + callchain->callsites[i] = insn_idx; + } else { + return false; + } } - return topmost; + return true; } -static void update_loop_entry(struct bpf_verifier_env *env, - struct bpf_verifier_state *cur, struct bpf_verifier_state *hdr) +/* Check if bpf_scc_visit instance for @callchain exists. */ +static struct bpf_scc_visit *scc_visit_lookup(struct bpf_verifier_env *env, + struct bpf_scc_callchain *callchain) { - /* The hdr->branches check decides between cases B and C in - * comment for get_loop_entry(). If hdr->branches == 0 then - * head's topmost loop entry is not in current DFS path, - * hence 'cur' and 'hdr' are not in the same loop and there is - * no need to update cur->loop_entry. - */ - if (hdr->branches && hdr->dfs_depth < (cur->loop_entry ?: cur)->dfs_depth) { - if (cur->loop_entry) { - cur->loop_entry->used_as_loop_entry--; - maybe_free_verifier_state(env, state_loop_entry_as_list(cur)); - } - cur->loop_entry = hdr; - hdr->used_as_loop_entry++; + struct bpf_scc_info *info = env->scc_info[callchain->scc]; + struct bpf_scc_visit *visits = info->visits; + u32 i; + + if (!info) + return NULL; + for (i = 0; i < info->num_visits; i++) + if (memcmp(callchain, &visits[i].callchain, sizeof(*callchain)) == 0) + return &visits[i]; + return NULL; +} + +/* Allocate a new bpf_scc_visit instance corresponding to @callchain. + * Allocated instances are alive for a duration of the do_check_common() + * call and are freed by free_states(). + */ +static struct bpf_scc_visit *scc_visit_alloc(struct bpf_verifier_env *env, + struct bpf_scc_callchain *callchain) +{ + struct bpf_scc_visit *visit; + struct bpf_scc_info *info; + u32 scc, num_visits; + u64 new_sz; + + scc = callchain->scc; + info = env->scc_info[scc]; + num_visits = info ? info->num_visits : 0; + new_sz = sizeof(*info) + sizeof(struct bpf_scc_visit) * (num_visits + 1); + info = kvrealloc(env->scc_info[scc], new_sz, GFP_KERNEL_ACCOUNT); + if (!info) + return NULL; + env->scc_info[scc] = info; + info->num_visits = num_visits + 1; + visit = &info->visits[num_visits]; + memset(visit, 0, sizeof(*visit)); + memcpy(&visit->callchain, callchain, sizeof(*callchain)); + return visit; +} + +/* Form a string '(callsite#1,callsite#2,...,scc)' in env->tmp_str_buf */ +static char *format_callchain(struct bpf_verifier_env *env, struct bpf_scc_callchain *callchain) +{ + char *buf = env->tmp_str_buf; + int i, delta = 0; + + delta += snprintf(buf + delta, TMP_STR_BUF_LEN - delta, "("); + for (i = 0; i < ARRAY_SIZE(callchain->callsites); i++) { + if (!callchain->callsites[i]) + break; + delta += snprintf(buf + delta, TMP_STR_BUF_LEN - delta, "%u,", + callchain->callsites[i]); + } + delta += snprintf(buf + delta, TMP_STR_BUF_LEN - delta, "%u)", callchain->scc); + return env->tmp_str_buf; +} + +/* If callchain for @st exists (@st is in some SCC), ensure that + * bpf_scc_visit instance for this callchain exists. + * If instance does not exist or is empty, assign visit->entry_state to @st. + */ +static int maybe_enter_scc(struct bpf_verifier_env *env, struct bpf_verifier_state *st) +{ + struct bpf_scc_callchain callchain; + struct bpf_scc_visit *visit; + + if (!compute_scc_callchain(env, st, &callchain)) + return 0; + visit = scc_visit_lookup(env, &callchain); + visit = visit ?: scc_visit_alloc(env, &callchain); + if (!visit) + return -ENOMEM; + if (!visit->entry_state) { + visit->entry_state = st; + if (env->log.level & BPF_LOG_LEVEL2) + verbose(env, "SCC enter %s\n", format_callchain(env, &callchain)); + } + return 0; +} + +static int propagate_backedges(struct bpf_verifier_env *env, struct bpf_scc_visit *visit); + +/* If callchain for @st exists (@st is in some SCC), make it empty: + * - set visit->entry_state to NULL; + * - flush accumulated backedges. + */ +static int maybe_exit_scc(struct bpf_verifier_env *env, struct bpf_verifier_state *st) +{ + struct bpf_scc_callchain callchain; + struct bpf_scc_visit *visit; + + if (!compute_scc_callchain(env, st, &callchain)) + return 0; + visit = scc_visit_lookup(env, &callchain); + if (!visit) { + verifier_bug(env, "scc exit: no visit info for call chain %s", + format_callchain(env, &callchain)); + return -EFAULT; + } + if (visit->entry_state != st) + return 0; + if (env->log.level & BPF_LOG_LEVEL2) + verbose(env, "SCC exit %s\n", format_callchain(env, &callchain)); + visit->entry_state = NULL; + env->num_backedges -= visit->num_backedges; + visit->num_backedges = 0; + update_peak_states(env); + return propagate_backedges(env, visit); +} + +/* Lookup an bpf_scc_visit instance corresponding to @st callchain + * and add @backedge to visit->backedges. @st callchain must exist. + */ +static int add_scc_backedge(struct bpf_verifier_env *env, + struct bpf_verifier_state *st, + struct bpf_scc_backedge *backedge) +{ + struct bpf_scc_callchain callchain; + struct bpf_scc_visit *visit; + + if (!compute_scc_callchain(env, st, &callchain)) { + verifier_bug(env, "add backedge: no SCC in verification path, insn_idx %d", + st->insn_idx); + return -EFAULT; + } + visit = scc_visit_lookup(env, &callchain); + if (!visit) { + verifier_bug(env, "add backedge: no visit info for call chain %s", + format_callchain(env, &callchain)); + return -EFAULT; + } + if (env->log.level & BPF_LOG_LEVEL2) + verbose(env, "SCC backedge %s\n", format_callchain(env, &callchain)); + backedge->next = visit->backedges; + visit->backedges = backedge; + visit->num_backedges++; + env->num_backedges++; + update_peak_states(env); + return 0; +} + +/* bpf_reg_state->live marks for registers in a state @st are incomplete, + * if state @st is in some SCC and not all execution paths starting at this + * SCC are fully explored. + */ +static bool incomplete_read_marks(struct bpf_verifier_env *env, + struct bpf_verifier_state *st) +{ + struct bpf_scc_callchain callchain; + struct bpf_scc_visit *visit; + + if (!compute_scc_callchain(env, st, &callchain)) + return false; + visit = scc_visit_lookup(env, &callchain); + if (!visit) + return false; + return !!visit->backedges; +} + +static void free_backedges(struct bpf_scc_visit *visit) +{ + struct bpf_scc_backedge *backedge, *next; + + for (backedge = visit->backedges; backedge; backedge = next) { + free_verifier_state(&backedge->state, false); + next = backedge->next; + kvfree(backedge); } + visit->backedges = NULL; } -static void update_branch_counts(struct bpf_verifier_env *env, struct bpf_verifier_state *st) +static int update_branch_counts(struct bpf_verifier_env *env, struct bpf_verifier_state *st) { struct bpf_verifier_state_list *sl = NULL, *parent_sl; struct bpf_verifier_state *parent; + int err; while (st) { u32 br = --st->branches; - /* br == 0 signals that DFS exploration for 'st' is finished, - * thus it is necessary to update parent's loop entry if it - * turned out that st is a part of some loop. - * This is a part of 'case A' in get_loop_entry() comment. - */ - if (br == 0 && st->parent && st->loop_entry) - update_loop_entry(env, st->parent, st->loop_entry); - /* WARN_ON(br > 1) technically makes sense here, * but see comment in push_stack(), hence: */ @@ -1974,6 +2038,9 @@ static void update_branch_counts(struct bpf_verifier_env *env, struct bpf_verifi br); if (br) break; + err = maybe_exit_scc(env, st); + if (err) + return err; parent = st->parent; parent_sl = state_parent_as_list(st); if (sl) @@ -1981,6 +2048,7 @@ static void update_branch_counts(struct bpf_verifier_env *env, struct bpf_verifi st = parent; sl = parent_sl; } + return 0; } static int pop_stack(struct bpf_verifier_env *env, int *prev_insn_idx, @@ -2012,6 +2080,18 @@ static int pop_stack(struct bpf_verifier_env *env, int *prev_insn_idx, return 0; } +static bool error_recoverable_with_nospec(int err) +{ + /* Should only return true for non-fatal errors that are allowed to + * occur during speculative verification. For these we can insert a + * nospec and the program might still be accepted. Do not include + * something like ENOMEM because it is likely to re-occur for the next + * architectural path once it has been recovered-from in all speculative + * paths. + */ + return err == -EPERM || err == -EACCES || err == -EINVAL; +} + static struct bpf_verifier_state *push_stack(struct bpf_verifier_env *env, int insn_idx, int prev_insn_idx, bool speculative) @@ -2020,9 +2100,9 @@ static struct bpf_verifier_state *push_stack(struct bpf_verifier_env *env, struct bpf_verifier_stack_elem *elem; int err; - elem = kzalloc(sizeof(struct bpf_verifier_stack_elem), GFP_KERNEL); + elem = kzalloc(sizeof(struct bpf_verifier_stack_elem), GFP_KERNEL_ACCOUNT); if (!elem) - goto err; + return NULL; elem->insn_idx = insn_idx; elem->prev_insn_idx = prev_insn_idx; @@ -2032,12 +2112,12 @@ static struct bpf_verifier_state *push_stack(struct bpf_verifier_env *env, env->stack_size++; err = copy_verifier_state(&elem->st, cur); if (err) - goto err; + return NULL; elem->st.speculative |= speculative; if (env->stack_size > BPF_COMPLEXITY_LIMIT_JMP_SEQ) { verbose(env, "The sequence of %d jumps is too complex.\n", env->stack_size); - goto err; + return NULL; } if (elem->st.parent) { ++elem->st.parent->branches; @@ -2052,12 +2132,6 @@ static struct bpf_verifier_state *push_stack(struct bpf_verifier_env *env, */ } return &elem->st; -err: - free_verifier_state(env->cur_state, true); - env->cur_state = NULL; - /* pop all elements and return */ - while (!pop_stack(env, NULL, NULL, false)); - return NULL; } #define CALLER_SAVED_REGS 6 @@ -2787,9 +2861,9 @@ static struct bpf_verifier_state *push_async_cb(struct bpf_verifier_env *env, struct bpf_verifier_stack_elem *elem; struct bpf_func_state *frame; - elem = kzalloc(sizeof(struct bpf_verifier_stack_elem), GFP_KERNEL); + elem = kzalloc(sizeof(struct bpf_verifier_stack_elem), GFP_KERNEL_ACCOUNT); if (!elem) - goto err; + return NULL; elem->insn_idx = insn_idx; elem->prev_insn_idx = prev_insn_idx; @@ -2801,35 +2875,24 @@ static struct bpf_verifier_state *push_async_cb(struct bpf_verifier_env *env, verbose(env, "The sequence of %d jumps is too complex for async cb.\n", env->stack_size); - goto err; + return NULL; } /* Unlike push_stack() do not copy_verifier_state(). * The caller state doesn't matter. * This is async callback. It starts in a fresh stack. * Initialize it similar to do_check_common(). - * But we do need to make sure to not clobber insn_hist, so we keep - * chaining insn_hist_start/insn_hist_end indices as for a normal - * child state. */ elem->st.branches = 1; elem->st.in_sleepable = is_sleepable; - elem->st.insn_hist_start = env->cur_state->insn_hist_end; - elem->st.insn_hist_end = elem->st.insn_hist_start; - frame = kzalloc(sizeof(*frame), GFP_KERNEL); + frame = kzalloc(sizeof(*frame), GFP_KERNEL_ACCOUNT); if (!frame) - goto err; + return NULL; init_func_state(env, frame, BPF_MAIN_FUNC /* callsite */, 0 /* frameno within this callchain */, subprog /* subprog number within this prog */); elem->st.frame[0] = frame; return &elem->st; -err: - free_verifier_state(env->cur_state, true); - env->cur_state = NULL; - /* pop all elements and return */ - while (!pop_stack(env, NULL, NULL, false)); - return NULL; } @@ -3167,7 +3230,7 @@ static int add_kfunc_call(struct bpf_verifier_env *env, u32 func_id, s16 offset) return -EINVAL; } - tab = kzalloc(sizeof(*tab), GFP_KERNEL); + tab = kzalloc(sizeof(*tab), GFP_KERNEL_ACCOUNT); if (!tab) return -ENOMEM; prog_aux->kfunc_tab = tab; @@ -3183,7 +3246,7 @@ static int add_kfunc_call(struct bpf_verifier_env *env, u32 func_id, s16 offset) return 0; if (!btf_tab && offset) { - btf_tab = kzalloc(sizeof(*btf_tab), GFP_KERNEL); + btf_tab = kzalloc(sizeof(*btf_tab), GFP_KERNEL_ACCOUNT); if (!btf_tab) return -ENOMEM; prog_aux->kfunc_btf_tab = btf_tab; @@ -3843,10 +3906,11 @@ static void linked_regs_unpack(u64 val, struct linked_regs *s) } /* for any branch, call, exit record the history of jmps in the given state */ -static int push_insn_history(struct bpf_verifier_env *env, struct bpf_verifier_state *cur, - int insn_flags, u64 linked_regs) +static int push_jmp_history(struct bpf_verifier_env *env, struct bpf_verifier_state *cur, + int insn_flags, u64 linked_regs) { - struct bpf_insn_hist_entry *p; + u32 cnt = cur->jmp_history_cnt; + struct bpf_jmp_history_entry *p; size_t alloc_size; /* combine instruction flags if we already recorded this instruction */ @@ -3866,32 +3930,29 @@ static int push_insn_history(struct bpf_verifier_env *env, struct bpf_verifier_s return 0; } - if (cur->insn_hist_end + 1 > env->insn_hist_cap) { - alloc_size = size_mul(cur->insn_hist_end + 1, sizeof(*p)); - p = kvrealloc(env->insn_hist, alloc_size, GFP_USER); - if (!p) - return -ENOMEM; - env->insn_hist = p; - env->insn_hist_cap = alloc_size / sizeof(*p); - } + cnt++; + alloc_size = kmalloc_size_roundup(size_mul(cnt, sizeof(*p))); + p = krealloc(cur->jmp_history, alloc_size, GFP_KERNEL_ACCOUNT); + if (!p) + return -ENOMEM; + cur->jmp_history = p; - p = &env->insn_hist[cur->insn_hist_end]; + p = &cur->jmp_history[cnt - 1]; p->idx = env->insn_idx; p->prev_idx = env->prev_insn_idx; p->flags = insn_flags; p->linked_regs = linked_regs; - - cur->insn_hist_end++; + cur->jmp_history_cnt = cnt; env->cur_hist_ent = p; return 0; } -static struct bpf_insn_hist_entry *get_insn_hist_entry(struct bpf_verifier_env *env, - u32 hist_start, u32 hist_end, int insn_idx) +static struct bpf_jmp_history_entry *get_jmp_hist_entry(struct bpf_verifier_state *st, + u32 hist_end, int insn_idx) { - if (hist_end > hist_start && env->insn_hist[hist_end - 1].idx == insn_idx) - return &env->insn_hist[hist_end - 1]; + if (hist_end > 0 && st->jmp_history[hist_end - 1].idx == insn_idx) + return &st->jmp_history[hist_end - 1]; return NULL; } @@ -3908,26 +3969,25 @@ static struct bpf_insn_hist_entry *get_insn_hist_entry(struct bpf_verifier_env * * history entry recording a jump from last instruction of parent state and * first instruction of given state. */ -static int get_prev_insn_idx(const struct bpf_verifier_env *env, - struct bpf_verifier_state *st, - int insn_idx, u32 hist_start, u32 *hist_endp) +static int get_prev_insn_idx(struct bpf_verifier_state *st, int i, + u32 *history) { - u32 hist_end = *hist_endp; - u32 cnt = hist_end - hist_start; + u32 cnt = *history; - if (insn_idx == st->first_insn_idx) { + if (i == st->first_insn_idx) { if (cnt == 0) return -ENOENT; - if (cnt == 1 && env->insn_hist[hist_start].idx == insn_idx) + if (cnt == 1 && st->jmp_history[0].idx == i) return -ENOENT; } - if (cnt && env->insn_hist[hist_end - 1].idx == insn_idx) { - (*hist_endp)--; - return env->insn_hist[hist_end - 1].prev_idx; + if (cnt && st->jmp_history[cnt - 1].idx == i) { + i = st->jmp_history[cnt - 1].prev_idx; + (*history)--; } else { - return insn_idx - 1; + i--; } + return i; } static const char *disasm_kfunc_name(void *data, const struct bpf_insn *insn) @@ -4108,7 +4168,7 @@ static void fmt_stack_mask(char *buf, ssize_t buf_sz, u64 stack_mask) /* If any register R in hist->linked_regs is marked as precise in bt, * do bt_set_frame_{reg,slot}(bt, R) for all registers in hist->linked_regs. */ -static void bt_sync_linked_regs(struct backtrack_state *bt, struct bpf_insn_hist_entry *hist) +static void bt_sync_linked_regs(struct backtrack_state *bt, struct bpf_jmp_history_entry *hist) { struct linked_regs linked_regs; bool some_precise = false; @@ -4153,7 +4213,7 @@ static bool calls_callback(struct bpf_verifier_env *env, int insn_idx); * - *was* processed previously during backtracking. */ static int backtrack_insn(struct bpf_verifier_env *env, int idx, int subseq_idx, - struct bpf_insn_hist_entry *hist, struct backtrack_state *bt) + struct bpf_jmp_history_entry *hist, struct backtrack_state *bt) { struct bpf_insn *insn = env->prog->insnsi + idx; u8 class = BPF_CLASS(insn->code); @@ -4571,7 +4631,7 @@ static void mark_all_scalars_imprecise(struct bpf_verifier_env *env, struct bpf_ * SCALARS, as well as any other registers and slots that contribute to * a tracked state of given registers/stack slots, depending on specific BPF * assembly instructions (see backtrack_insns() for exact instruction handling - * logic). This backtracking relies on recorded insn_hist and is able to + * logic). This backtracking relies on recorded jmp_history and is able to * traverse entire chain of parent states. This process ends only when all the * necessary registers/slots and their transitive dependencies are marked as * precise. @@ -4651,23 +4711,27 @@ static void mark_all_scalars_imprecise(struct bpf_verifier_env *env, struct bpf_ * mark_all_scalars_imprecise() to hopefully get more permissive and generic * finalized states which help in short circuiting more future states. */ -static int __mark_chain_precision(struct bpf_verifier_env *env, int regno) +static int __mark_chain_precision(struct bpf_verifier_env *env, + struct bpf_verifier_state *starting_state, + int regno, + bool *changed) { + struct bpf_verifier_state *st = starting_state; struct backtrack_state *bt = &env->bt; - struct bpf_verifier_state *st = env->cur_state; int first_idx = st->first_insn_idx; - int last_idx = env->insn_idx; + int last_idx = starting_state->insn_idx; int subseq_idx = -1; struct bpf_func_state *func; + bool tmp, skip_first = true; struct bpf_reg_state *reg; - bool skip_first = true; int i, fr, err; if (!env->bpf_capable) return 0; + changed = changed ?: &tmp; /* set frame number from which we are starting to backtrack */ - bt_init(bt, env->cur_state->curframe); + bt_init(bt, starting_state->curframe); /* Do sanity checks against current state of register and/or stack * slot, but don't set precise flag in current state, as precision @@ -4688,9 +4752,8 @@ static int __mark_chain_precision(struct bpf_verifier_env *env, int regno) for (;;) { DECLARE_BITMAP(mask, 64); - u32 hist_start = st->insn_hist_start; - u32 hist_end = st->insn_hist_end; - struct bpf_insn_hist_entry *hist; + u32 history = st->jmp_history_cnt; + struct bpf_jmp_history_entry *hist; if (env->log.level & BPF_LOG_LEVEL2) { verbose(env, "mark_precise: frame%d: last_idx %d first_idx %d subseq_idx %d \n", @@ -4712,8 +4775,10 @@ static int __mark_chain_precision(struct bpf_verifier_env *env, int regno) for_each_set_bit(i, mask, 32) { reg = &st->frame[0]->regs[i]; bt_clear_reg(bt, i); - if (reg->type == SCALAR_VALUE) + if (reg->type == SCALAR_VALUE) { reg->precise = true; + *changed = true; + } } return 0; } @@ -4728,11 +4793,11 @@ static int __mark_chain_precision(struct bpf_verifier_env *env, int regno) err = 0; skip_first = false; } else { - hist = get_insn_hist_entry(env, hist_start, hist_end, i); + hist = get_jmp_hist_entry(st, history, i); err = backtrack_insn(env, i, subseq_idx, hist, bt); } if (err == -ENOTSUPP) { - mark_all_scalars_precise(env, env->cur_state); + mark_all_scalars_precise(env, starting_state); bt_reset(bt); return 0; } else if (err) { @@ -4745,7 +4810,7 @@ static int __mark_chain_precision(struct bpf_verifier_env *env, int regno) */ return 0; subseq_idx = i; - i = get_prev_insn_idx(env, st, i, hist_start, &hist_end); + i = get_prev_insn_idx(st, i, &history); if (i == -ENOENT) break; if (i >= env->prog->len) { @@ -4772,10 +4837,12 @@ static int __mark_chain_precision(struct bpf_verifier_env *env, int regno) bt_clear_frame_reg(bt, fr, i); continue; } - if (reg->precise) + if (reg->precise) { bt_clear_frame_reg(bt, fr, i); - else + } else { reg->precise = true; + *changed = true; + } } bitmap_from_u64(mask, bt_frame_stack_mask(bt, fr)); @@ -4790,10 +4857,12 @@ static int __mark_chain_precision(struct bpf_verifier_env *env, int regno) continue; } reg = &func->stack[i].spilled_ptr; - if (reg->precise) + if (reg->precise) { bt_clear_frame_slot(bt, fr, i); - else + } else { reg->precise = true; + *changed = true; + } } if (env->log.level & BPF_LOG_LEVEL2) { fmt_reg_mask(env->tmp_str_buf, TMP_STR_BUF_LEN, @@ -4820,7 +4889,7 @@ static int __mark_chain_precision(struct bpf_verifier_env *env, int regno) * fallback to marking all precise */ if (!bt_empty(bt)) { - mark_all_scalars_precise(env, env->cur_state); + mark_all_scalars_precise(env, starting_state); bt_reset(bt); } @@ -4829,15 +4898,16 @@ static int __mark_chain_precision(struct bpf_verifier_env *env, int regno) int mark_chain_precision(struct bpf_verifier_env *env, int regno) { - return __mark_chain_precision(env, regno); + return __mark_chain_precision(env, env->cur_state, regno, NULL); } /* mark_chain_precision_batch() assumes that env->bt is set in the caller to * desired reg and stack masks across all relevant frames */ -static int mark_chain_precision_batch(struct bpf_verifier_env *env) +static int mark_chain_precision_batch(struct bpf_verifier_env *env, + struct bpf_verifier_state *starting_state) { - return __mark_chain_precision(env, -1); + return __mark_chain_precision(env, starting_state, -1, NULL); } static bool is_spillable_regtype(enum bpf_reg_type type) @@ -5026,7 +5096,7 @@ static int check_stack_write_fixed_off(struct bpf_verifier_env *env, } if (sanitize) - env->insn_aux_data[insn_idx].sanitize_stack_spill = true; + env->insn_aux_data[insn_idx].nospec_result = true; } err = destroy_if_dynptr_stack_slot(env, state, spi); @@ -5109,7 +5179,7 @@ static int check_stack_write_fixed_off(struct bpf_verifier_env *env, } if (insn_flags) - return push_insn_history(env, env->cur_state, insn_flags, 0); + return push_jmp_history(env, env->cur_state, insn_flags, 0); return 0; } @@ -5416,7 +5486,7 @@ static int check_stack_read_fixed_off(struct bpf_verifier_env *env, insn_flags = 0; /* we are not restoring spilled register */ } if (insn_flags) - return push_insn_history(env, env->cur_state, insn_flags, 0); + return push_jmp_history(env, env->cur_state, insn_flags, 0); return 0; } @@ -7031,8 +7101,7 @@ BTF_TYPE_SAFE_TRUSTED(struct file) { struct inode *f_inode; }; -BTF_TYPE_SAFE_TRUSTED(struct dentry) { - /* no negative dentry-s in places where bpf can see it */ +BTF_TYPE_SAFE_TRUSTED_OR_NULL(struct dentry) { struct inode *d_inode; }; @@ -7071,7 +7140,6 @@ static bool type_is_trusted(struct bpf_verifier_env *env, BTF_TYPE_EMIT(BTF_TYPE_SAFE_TRUSTED(struct bpf_iter__task)); BTF_TYPE_EMIT(BTF_TYPE_SAFE_TRUSTED(struct linux_binprm)); BTF_TYPE_EMIT(BTF_TYPE_SAFE_TRUSTED(struct file)); - BTF_TYPE_EMIT(BTF_TYPE_SAFE_TRUSTED(struct dentry)); return btf_nested_type_is_trusted(&env->log, reg, field_name, btf_id, "__safe_trusted"); } @@ -7081,6 +7149,7 @@ static bool type_is_trusted_or_null(struct bpf_verifier_env *env, const char *field_name, u32 btf_id) { BTF_TYPE_EMIT(BTF_TYPE_SAFE_TRUSTED_OR_NULL(struct socket)); + BTF_TYPE_EMIT(BTF_TYPE_SAFE_TRUSTED_OR_NULL(struct dentry)); return btf_nested_type_is_trusted(&env->log, reg, field_name, btf_id, "__safe_trusted_or_null"); @@ -7475,6 +7544,7 @@ static int check_mem_access(struct bpf_verifier_env *env, int insn_idx, u32 regn } } else if (base_type(reg->type) == PTR_TO_MEM) { bool rdonly_mem = type_is_rdonly_mem(reg->type); + bool rdonly_untrusted = rdonly_mem && (reg->type & PTR_UNTRUSTED); if (type_may_be_null(reg->type)) { verbose(env, "R%d invalid mem access '%s'\n", regno, @@ -7494,8 +7564,13 @@ static int check_mem_access(struct bpf_verifier_env *env, int insn_idx, u32 regn return -EACCES; } - err = check_mem_region_access(env, regno, off, size, - reg->mem_size, false); + /* + * Accesses to untrusted PTR_TO_MEM are done through probe + * instructions, hence no need to check bounds in that case. + */ + if (!rdonly_untrusted) + err = check_mem_region_access(env, regno, off, size, + reg->mem_size, false); if (!err && value_regno >= 0 && (t == BPF_READ || rdonly_mem)) mark_reg_unknown(env, regs, value_regno); } else if (reg->type == PTR_TO_CTX) { @@ -8959,7 +9034,7 @@ static int resolve_map_arg_type(struct bpf_verifier_env *env, if (!meta->map_ptr) { /* kernel subsystem misconfigured verifier */ verbose(env, "invalid map_ptr to access map->type\n"); - return -EACCES; + return -EFAULT; } switch (meta->map_ptr->map_type) { @@ -9495,7 +9570,7 @@ static int get_constant_map_key(struct bpf_verifier_env *env, * to prevent pruning on it. */ bt_set_frame_slot(&env->bt, key->frameno, spi); - err = mark_chain_precision_batch(env); + err = mark_chain_precision_batch(env, env->cur_state); if (err < 0) return err; @@ -9647,7 +9722,7 @@ skip_type_check: * that kernel subsystem misconfigured verifier */ verbose(env, "invalid map_ptr to access map->key\n"); - return -EACCES; + return -EFAULT; } key_size = meta->map_ptr->key_size; err = check_helper_mem_access(env, regno, key_size, BPF_READ, false, NULL); @@ -9674,7 +9749,7 @@ skip_type_check: if (!meta->map_ptr) { /* kernel subsystem misconfigured verifier */ verbose(env, "invalid map_ptr to access map->value\n"); - return -EACCES; + return -EFAULT; } meta->raw_mode = arg_type & MEM_UNINIT; err = check_helper_mem_access(env, regno, meta->map_ptr->value_size, @@ -10284,7 +10359,7 @@ static int setup_func_entry(struct bpf_verifier_env *env, int subprog, int calls } caller = state->frame[state->curframe]; - callee = kzalloc(sizeof(*callee), GFP_KERNEL); + callee = kzalloc(sizeof(*callee), GFP_KERNEL_ACCOUNT); if (!callee) return -ENOMEM; state->frame[state->curframe + 1] = callee; @@ -10970,7 +11045,7 @@ record_func_map(struct bpf_verifier_env *env, struct bpf_call_arg_meta *meta, if (map == NULL) { verbose(env, "kernel subsystem misconfigured verifier\n"); - return -EINVAL; + return -EFAULT; } /* In case of read-only, some additional restrictions @@ -11009,7 +11084,7 @@ record_func_key(struct bpf_verifier_env *env, struct bpf_call_arg_meta *meta, return 0; if (!map || map->map_type != BPF_MAP_TYPE_PROG_ARRAY) { verbose(env, "kernel subsystem misconfigured verifier\n"); - return -EINVAL; + return -EFAULT; } reg = ®s[BPF_REG_3]; @@ -11151,7 +11226,7 @@ static int check_get_func_ip(struct bpf_verifier_env *env) return -ENOTSUPP; } -static struct bpf_insn_aux_data *cur_aux(struct bpf_verifier_env *env) +static struct bpf_insn_aux_data *cur_aux(const struct bpf_verifier_env *env) { return &env->insn_aux_data[env->insn_idx]; } @@ -11263,7 +11338,7 @@ static int check_helper_call(struct bpf_verifier_env *env, struct bpf_insn *insn if (changes_data && fn->arg1_type != ARG_PTR_TO_CTX) { verbose(env, "kernel subsystem misconfigured func %s#%d: r1 != ctx\n", func_id_name(func_id), func_id); - return -EINVAL; + return -EFAULT; } memset(&meta, 0, sizeof(meta)); @@ -11565,7 +11640,7 @@ static int check_helper_call(struct bpf_verifier_env *env, struct bpf_insn *insn if (meta.map_ptr == NULL) { verbose(env, "kernel subsystem misconfigured verifier\n"); - return -EINVAL; + return -EFAULT; } if (func_id == BPF_FUNC_map_lookup_elem && @@ -11658,7 +11733,7 @@ static int check_helper_call(struct bpf_verifier_env *env, struct bpf_insn *insn verbose(env, "verifier internal error:"); verbose(env, "func %s has non-overwritten BPF_PTR_POISON return type\n", func_id_name(func_id)); - return -EINVAL; + return -EFAULT; } ret_btf = btf_vmlinux; ret_btf_id = *fn->ret_btf_id; @@ -13542,16 +13617,24 @@ static int check_special_kfunc(struct bpf_verifier_env *env, struct bpf_kfunc_ca regs[BPF_REG_0].btf_id = meta->ret_btf_id; } else if (meta->func_id == special_kfunc_list[KF_bpf_rdonly_cast]) { ret_t = btf_type_by_id(desc_btf, meta->arg_constant.value); - if (!ret_t || !btf_type_is_struct(ret_t)) { + if (!ret_t) { + verbose(env, "Unknown type ID %lld passed to kfunc bpf_rdonly_cast\n", + meta->arg_constant.value); + return -EINVAL; + } else if (btf_type_is_struct(ret_t)) { + mark_reg_known_zero(env, regs, BPF_REG_0); + regs[BPF_REG_0].type = PTR_TO_BTF_ID | PTR_UNTRUSTED; + regs[BPF_REG_0].btf = desc_btf; + regs[BPF_REG_0].btf_id = meta->arg_constant.value; + } else if (btf_type_is_void(ret_t)) { + mark_reg_known_zero(env, regs, BPF_REG_0); + regs[BPF_REG_0].type = PTR_TO_MEM | MEM_RDONLY | PTR_UNTRUSTED; + regs[BPF_REG_0].mem_size = 0; + } else { verbose(env, - "kfunc bpf_rdonly_cast type ID argument must be of a struct\n"); + "kfunc bpf_rdonly_cast type ID argument must be of a struct or void\n"); return -EINVAL; } - - mark_reg_known_zero(env, regs, BPF_REG_0); - regs[BPF_REG_0].type = PTR_TO_BTF_ID | PTR_UNTRUSTED; - regs[BPF_REG_0].btf = desc_btf; - regs[BPF_REG_0].btf_id = meta->arg_constant.value; } else if (meta->func_id == special_kfunc_list[KF_bpf_dynptr_slice] || meta->func_id == special_kfunc_list[KF_bpf_dynptr_slice_rdwr]) { enum bpf_type_flag type_flag = get_dynptr_type_flag(meta->initialized_dynptr.type); @@ -14019,7 +14102,9 @@ static int retrieve_ptr_limit(const struct bpf_reg_state *ptr_reg, static bool can_skip_alu_sanitation(const struct bpf_verifier_env *env, const struct bpf_insn *insn) { - return env->bypass_spec_v1 || BPF_SRC(insn->code) == BPF_K; + return env->bypass_spec_v1 || + BPF_SRC(insn->code) == BPF_K || + cur_aux(env)->nospec; } static int update_alu_sanitation_state(struct bpf_insn_aux_data *aux, @@ -14219,7 +14304,7 @@ static int sanitize_err(struct bpf_verifier_env *env, case REASON_STACK: verbose(env, "R%d could not be pushed for speculative verification, %s\n", dst, err); - break; + return -ENOMEM; default: verbose(env, "verifier internal error: unknown reason (%d)\n", reason); @@ -14289,7 +14374,7 @@ static int sanitize_check_bounds(struct bpf_verifier_env *env, } break; default: - break; + return -EOPNOTSUPP; } return 0; @@ -14316,7 +14401,7 @@ static int adjust_ptr_min_max_vals(struct bpf_verifier_env *env, struct bpf_sanitize_info info = {}; u8 opcode = BPF_OP(insn->code); u32 dst = insn->dst_reg; - int ret; + int ret, bounds_ret; dst_reg = ®s[dst]; @@ -14348,6 +14433,13 @@ static int adjust_ptr_min_max_vals(struct bpf_verifier_env *env, return -EACCES; } + /* + * Accesses to untrusted PTR_TO_MEM are done through probe + * instructions, hence no need to track offsets. + */ + if (base_type(ptr_reg->type) == PTR_TO_MEM && (ptr_reg->type & PTR_UNTRUSTED)) + return 0; + switch (base_type(ptr_reg->type)) { case PTR_TO_CTX: case PTR_TO_MAP_VALUE: @@ -14516,11 +14608,19 @@ static int adjust_ptr_min_max_vals(struct bpf_verifier_env *env, if (!check_reg_sane_offset(env, dst_reg, ptr_reg->type)) return -EINVAL; reg_bounds_sync(dst_reg); - if (sanitize_check_bounds(env, insn, dst_reg) < 0) - return -EACCES; + bounds_ret = sanitize_check_bounds(env, insn, dst_reg); + if (bounds_ret == -EACCES) + return bounds_ret; if (sanitize_needed(opcode)) { ret = sanitize_ptr_alu(env, insn, dst_reg, off_reg, dst_reg, &info, true); + if (verifier_bug_if(!can_skip_alu_sanitation(env, insn) + && !env->cur_state->speculative + && bounds_ret + && !ret, + env, "Pointer type unsupported by sanitize_check_bounds() not rejected by retrieve_ptr_limit() as required")) { + return -EFAULT; + } if (ret < 0) return sanitize_err(env, insn, ret, off_reg, dst_reg); } @@ -14535,14 +14635,25 @@ static void scalar32_min_max_add(struct bpf_reg_state *dst_reg, s32 *dst_smax = &dst_reg->s32_max_value; u32 *dst_umin = &dst_reg->u32_min_value; u32 *dst_umax = &dst_reg->u32_max_value; + u32 umin_val = src_reg->u32_min_value; + u32 umax_val = src_reg->u32_max_value; + bool min_overflow, max_overflow; if (check_add_overflow(*dst_smin, src_reg->s32_min_value, dst_smin) || check_add_overflow(*dst_smax, src_reg->s32_max_value, dst_smax)) { *dst_smin = S32_MIN; *dst_smax = S32_MAX; } - if (check_add_overflow(*dst_umin, src_reg->u32_min_value, dst_umin) || - check_add_overflow(*dst_umax, src_reg->u32_max_value, dst_umax)) { + + /* If either all additions overflow or no additions overflow, then + * it is okay to set: dst_umin = dst_umin + src_umin, dst_umax = + * dst_umax + src_umax. Otherwise (some additions overflow), set + * the output bounds to unbounded. + */ + min_overflow = check_add_overflow(*dst_umin, umin_val, dst_umin); + max_overflow = check_add_overflow(*dst_umax, umax_val, dst_umax); + + if (!min_overflow && max_overflow) { *dst_umin = 0; *dst_umax = U32_MAX; } @@ -14555,14 +14666,25 @@ static void scalar_min_max_add(struct bpf_reg_state *dst_reg, s64 *dst_smax = &dst_reg->smax_value; u64 *dst_umin = &dst_reg->umin_value; u64 *dst_umax = &dst_reg->umax_value; + u64 umin_val = src_reg->umin_value; + u64 umax_val = src_reg->umax_value; + bool min_overflow, max_overflow; if (check_add_overflow(*dst_smin, src_reg->smin_value, dst_smin) || check_add_overflow(*dst_smax, src_reg->smax_value, dst_smax)) { *dst_smin = S64_MIN; *dst_smax = S64_MAX; } - if (check_add_overflow(*dst_umin, src_reg->umin_value, dst_umin) || - check_add_overflow(*dst_umax, src_reg->umax_value, dst_umax)) { + + /* If either all additions overflow or no additions overflow, then + * it is okay to set: dst_umin = dst_umin + src_umin, dst_umax = + * dst_umax + src_umax. Otherwise (some additions overflow), set + * the output bounds to unbounded. + */ + min_overflow = check_add_overflow(*dst_umin, umin_val, dst_umin); + max_overflow = check_add_overflow(*dst_umax, umax_val, dst_umax); + + if (!min_overflow && max_overflow) { *dst_umin = 0; *dst_umax = U64_MAX; } @@ -14573,8 +14695,11 @@ static void scalar32_min_max_sub(struct bpf_reg_state *dst_reg, { s32 *dst_smin = &dst_reg->s32_min_value; s32 *dst_smax = &dst_reg->s32_max_value; + u32 *dst_umin = &dst_reg->u32_min_value; + u32 *dst_umax = &dst_reg->u32_max_value; u32 umin_val = src_reg->u32_min_value; u32 umax_val = src_reg->u32_max_value; + bool min_underflow, max_underflow; if (check_sub_overflow(*dst_smin, src_reg->s32_max_value, dst_smin) || check_sub_overflow(*dst_smax, src_reg->s32_min_value, dst_smax)) { @@ -14582,14 +14707,18 @@ static void scalar32_min_max_sub(struct bpf_reg_state *dst_reg, *dst_smin = S32_MIN; *dst_smax = S32_MAX; } - if (dst_reg->u32_min_value < umax_val) { - /* Overflow possible, we know nothing */ - dst_reg->u32_min_value = 0; - dst_reg->u32_max_value = U32_MAX; - } else { - /* Cannot overflow (as long as bounds are consistent) */ - dst_reg->u32_min_value -= umax_val; - dst_reg->u32_max_value -= umin_val; + + /* If either all subtractions underflow or no subtractions + * underflow, it is okay to set: dst_umin = dst_umin - src_umax, + * dst_umax = dst_umax - src_umin. Otherwise (some subtractions + * underflow), set the output bounds to unbounded. + */ + min_underflow = check_sub_overflow(*dst_umin, umax_val, dst_umin); + max_underflow = check_sub_overflow(*dst_umax, umin_val, dst_umax); + + if (min_underflow && !max_underflow) { + *dst_umin = 0; + *dst_umax = U32_MAX; } } @@ -14598,8 +14727,11 @@ static void scalar_min_max_sub(struct bpf_reg_state *dst_reg, { s64 *dst_smin = &dst_reg->smin_value; s64 *dst_smax = &dst_reg->smax_value; + u64 *dst_umin = &dst_reg->umin_value; + u64 *dst_umax = &dst_reg->umax_value; u64 umin_val = src_reg->umin_value; u64 umax_val = src_reg->umax_value; + bool min_underflow, max_underflow; if (check_sub_overflow(*dst_smin, src_reg->smax_value, dst_smin) || check_sub_overflow(*dst_smax, src_reg->smin_value, dst_smax)) { @@ -14607,14 +14739,18 @@ static void scalar_min_max_sub(struct bpf_reg_state *dst_reg, *dst_smin = S64_MIN; *dst_smax = S64_MAX; } - if (dst_reg->umin_value < umax_val) { - /* Overflow possible, we know nothing */ - dst_reg->umin_value = 0; - dst_reg->umax_value = U64_MAX; - } else { - /* Cannot overflow (as long as bounds are consistent) */ - dst_reg->umin_value -= umax_val; - dst_reg->umax_value -= umin_val; + + /* If either all subtractions underflow or no subtractions + * underflow, it is okay to set: dst_umin = dst_umin - src_umax, + * dst_umax = dst_umax - src_umin. Otherwise (some subtractions + * underflow), set the output bounds to unbounded. + */ + min_underflow = check_sub_overflow(*dst_umin, umax_val, dst_umin); + max_underflow = check_sub_overflow(*dst_umax, umin_val, dst_umax); + + if (min_underflow && !max_underflow) { + *dst_umin = 0; + *dst_umax = U64_MAX; } } @@ -15076,6 +15212,7 @@ static bool is_safe_to_compute_dst_reg_range(struct bpf_insn *insn, switch (BPF_OP(insn->code)) { case BPF_ADD: case BPF_SUB: + case BPF_NEG: case BPF_AND: case BPF_XOR: case BPF_OR: @@ -15144,6 +15281,13 @@ static int adjust_scalar_min_max_vals(struct bpf_verifier_env *env, scalar_min_max_sub(dst_reg, &src_reg); dst_reg->var_off = tnum_sub(dst_reg->var_off, src_reg.var_off); break; + case BPF_NEG: + env->fake_reg[0] = *dst_reg; + __mark_reg_known(dst_reg, 0); + scalar32_min_max_sub(dst_reg, &env->fake_reg[0]); + scalar_min_max_sub(dst_reg, &env->fake_reg[0]); + dst_reg->var_off = tnum_neg(env->fake_reg[0].var_off); + break; case BPF_MUL: dst_reg->var_off = tnum_mul(dst_reg->var_off, src_reg.var_off); scalar32_min_max_mul(dst_reg, &src_reg); @@ -15283,12 +15427,12 @@ static int adjust_reg_min_max_vals(struct bpf_verifier_env *env, if (WARN_ON_ONCE(ptr_reg)) { print_verifier_state(env, vstate, vstate->curframe, true); verbose(env, "verifier internal error: unexpected ptr_reg\n"); - return -EINVAL; + return -EFAULT; } if (WARN_ON(!src_reg)) { print_verifier_state(env, vstate, vstate->curframe, true); verbose(env, "verifier internal error: no src_reg\n"); - return -EINVAL; + return -EFAULT; } err = adjust_scalar_min_max_vals(env, insn, dst_reg, *src_reg); if (err) @@ -15367,7 +15511,14 @@ static int check_alu_op(struct bpf_verifier_env *env, struct bpf_insn *insn) } /* check dest operand */ - err = check_reg_arg(env, insn->dst_reg, DST_OP); + if (opcode == BPF_NEG) { + err = check_reg_arg(env, insn->dst_reg, DST_OP_NO_MARK); + err = err ?: adjust_scalar_min_max_vals(env, insn, + ®s[insn->dst_reg], + regs[insn->dst_reg]); + } else { + err = check_reg_arg(env, insn->dst_reg, DST_OP); + } if (err) return err; @@ -16478,7 +16629,7 @@ static int check_cond_jmp_op(struct bpf_verifier_env *env, } if (insn_flags) { - err = push_insn_history(env, this_branch, insn_flags, 0); + err = push_jmp_history(env, this_branch, insn_flags, 0); if (err) return err; } @@ -16536,7 +16687,7 @@ static int check_cond_jmp_op(struct bpf_verifier_env *env, if (dst_reg->type == SCALAR_VALUE && dst_reg->id) collect_linked_regs(this_branch, dst_reg->id, &linked_regs); if (linked_regs.cnt > 1) { - err = push_insn_history(env, this_branch, 0, linked_regs_pack(&linked_regs)); + err = push_jmp_history(env, this_branch, 0, linked_regs_pack(&linked_regs)); if (err) return err; } @@ -16734,7 +16885,7 @@ static int check_ld_imm(struct bpf_verifier_env *env, struct bpf_insn *insn) dst_reg->type = CONST_PTR_TO_MAP; } else { verbose(env, "bpf verifier is misconfigured\n"); - return -EINVAL; + return -EFAULT; } return 0; @@ -16781,7 +16932,7 @@ static int check_ld_abs(struct bpf_verifier_env *env, struct bpf_insn *insn) if (!env->ops->gen_ld_abs) { verbose(env, "bpf verifier is misconfigured\n"); - return -EINVAL; + return -EFAULT; } if (insn->dst_reg != BPF_REG_0 || insn->off != 0 || @@ -17611,17 +17762,18 @@ static int check_cfg(struct bpf_verifier_env *env) int *insn_stack, *insn_state, *insn_postorder; int ex_insn_beg, i, ret = 0; - insn_state = env->cfg.insn_state = kvcalloc(insn_cnt, sizeof(int), GFP_KERNEL); + insn_state = env->cfg.insn_state = kvcalloc(insn_cnt, sizeof(int), GFP_KERNEL_ACCOUNT); if (!insn_state) return -ENOMEM; - insn_stack = env->cfg.insn_stack = kvcalloc(insn_cnt, sizeof(int), GFP_KERNEL); + insn_stack = env->cfg.insn_stack = kvcalloc(insn_cnt, sizeof(int), GFP_KERNEL_ACCOUNT); if (!insn_stack) { kvfree(insn_state); return -ENOMEM; } - insn_postorder = env->cfg.insn_postorder = kvcalloc(insn_cnt, sizeof(int), GFP_KERNEL); + insn_postorder = env->cfg.insn_postorder = + kvcalloc(insn_cnt, sizeof(int), GFP_KERNEL_ACCOUNT); if (!insn_postorder) { kvfree(insn_state); kvfree(insn_stack); @@ -17755,7 +17907,7 @@ static int check_btf_func_early(struct bpf_verifier_env *env, urecord = make_bpfptr(attr->func_info, uattr.is_kernel); min_size = min_t(u32, krec_size, urec_size); - krecord = kvcalloc(nfuncs, krec_size, GFP_KERNEL | __GFP_NOWARN); + krecord = kvcalloc(nfuncs, krec_size, GFP_KERNEL_ACCOUNT | __GFP_NOWARN); if (!krecord) return -ENOMEM; @@ -17855,7 +18007,7 @@ static int check_btf_func(struct bpf_verifier_env *env, urecord = make_bpfptr(attr->func_info, uattr.is_kernel); krecord = prog->aux->func_info; - info_aux = kcalloc(nfuncs, sizeof(*info_aux), GFP_KERNEL | __GFP_NOWARN); + info_aux = kcalloc(nfuncs, sizeof(*info_aux), GFP_KERNEL_ACCOUNT | __GFP_NOWARN); if (!info_aux) return -ENOMEM; @@ -17941,7 +18093,7 @@ static int check_btf_line(struct bpf_verifier_env *env, * pass in a smaller bpf_line_info object. */ linfo = kvcalloc(nr_linfo, sizeof(struct bpf_line_info), - GFP_KERNEL | __GFP_NOWARN); + GFP_KERNEL_ACCOUNT | __GFP_NOWARN); if (!linfo) return -ENOMEM; @@ -18264,10 +18416,6 @@ static void clean_verifier_state(struct bpf_verifier_env *env, { int i; - if (st->frame[0]->regs[0].live & REG_LIVE_DONE) - /* all regs in this state in all frames were already marked */ - return; - for (i = 0; i <= st->curframe; i++) clean_func_state(env, st->frame[i]); } @@ -18307,7 +18455,6 @@ static void clean_verifier_state(struct bpf_verifier_env *env, static void clean_live_states(struct bpf_verifier_env *env, int insn, struct bpf_verifier_state *cur) { - struct bpf_verifier_state *loop_entry; struct bpf_verifier_state_list *sl; struct list_head *pos, *head; @@ -18316,12 +18463,14 @@ static void clean_live_states(struct bpf_verifier_env *env, int insn, sl = container_of(pos, struct bpf_verifier_state_list, node); if (sl->state.branches) continue; - loop_entry = get_loop_entry(env, &sl->state); - if (!IS_ERR_OR_NULL(loop_entry) && loop_entry->branches) - continue; if (sl->state.insn_idx != insn || !same_callsites(&sl->state, cur)) continue; + if (sl->state.frame[0]->regs[0].live & REG_LIVE_DONE) + /* all regs in this state in all frames were already marked */ + continue; + if (incomplete_read_marks(env, &sl->state)) + continue; clean_verifier_state(env, &sl->state); } } @@ -18768,9 +18917,7 @@ static bool states_equal(struct bpf_verifier_env *env, * and all frame states need to be equivalent */ for (i = 0; i <= old->curframe; i++) { - insn_idx = i == old->curframe - ? env->insn_idx - : old->frame[i + 1]->callsite; + insn_idx = frame_insn_idx(old, i); if (old->frame[i]->callsite != cur->frame[i]->callsite) return false; if (!func_states_equal(env, old->frame[i], cur->frame[i], insn_idx, exact)) @@ -18817,12 +18964,15 @@ static int propagate_liveness_reg(struct bpf_verifier_env *env, */ static int propagate_liveness(struct bpf_verifier_env *env, const struct bpf_verifier_state *vstate, - struct bpf_verifier_state *vparent) + struct bpf_verifier_state *vparent, + bool *changed) { struct bpf_reg_state *state_reg, *parent_reg; struct bpf_func_state *state, *parent; int i, frame, err = 0; + bool tmp = false; + changed = changed ?: &tmp; if (vparent->curframe != vstate->curframe) { WARN(1, "propagate_live: parent frame %d current frame %d\n", vparent->curframe, vstate->curframe); @@ -18841,6 +18991,7 @@ static int propagate_liveness(struct bpf_verifier_env *env, &parent_reg[i]); if (err < 0) return err; + *changed |= err > 0; if (err == REG_LIVE_READ64) mark_insn_zext(env, &parent_reg[i]); } @@ -18852,6 +19003,7 @@ static int propagate_liveness(struct bpf_verifier_env *env, state_reg = &state->stack[i].spilled_ptr; err = propagate_liveness_reg(env, state_reg, parent_reg); + *changed |= err > 0; if (err < 0) return err; } @@ -18863,7 +19015,9 @@ static int propagate_liveness(struct bpf_verifier_env *env, * propagate them into the current state */ static int propagate_precision(struct bpf_verifier_env *env, - const struct bpf_verifier_state *old) + const struct bpf_verifier_state *old, + struct bpf_verifier_state *cur, + bool *changed) { struct bpf_reg_state *state_reg; struct bpf_func_state *state; @@ -18911,13 +19065,53 @@ static int propagate_precision(struct bpf_verifier_env *env, verbose(env, "\n"); } - err = mark_chain_precision_batch(env); + err = __mark_chain_precision(env, cur, -1, changed); if (err < 0) return err; return 0; } +#define MAX_BACKEDGE_ITERS 64 + +/* Propagate read and precision marks from visit->backedges[*].state->equal_state + * to corresponding parent states of visit->backedges[*].state until fixed point is reached, + * then free visit->backedges. + * After execution of this function incomplete_read_marks() will return false + * for all states corresponding to @visit->callchain. + */ +static int propagate_backedges(struct bpf_verifier_env *env, struct bpf_scc_visit *visit) +{ + struct bpf_scc_backedge *backedge; + struct bpf_verifier_state *st; + bool changed; + int i, err; + + i = 0; + do { + if (i++ > MAX_BACKEDGE_ITERS) { + if (env->log.level & BPF_LOG_LEVEL2) + verbose(env, "%s: too many iterations\n", __func__); + for (backedge = visit->backedges; backedge; backedge = backedge->next) + mark_all_scalars_precise(env, &backedge->state); + break; + } + changed = false; + for (backedge = visit->backedges; backedge; backedge = backedge->next) { + st = &backedge->state; + err = propagate_liveness(env, st->equal_state, st, &changed); + if (err) + return err; + err = propagate_precision(env, st->equal_state, st, &changed); + if (err) + return err; + } + } while (changed); + + free_backedges(visit); + return 0; +} + static bool states_maybe_looping(struct bpf_verifier_state *old, struct bpf_verifier_state *cur) { @@ -19027,14 +19221,14 @@ static int is_state_visited(struct bpf_verifier_env *env, int insn_idx) { struct bpf_verifier_state_list *new_sl; struct bpf_verifier_state_list *sl; - struct bpf_verifier_state *cur = env->cur_state, *new, *loop_entry; + struct bpf_verifier_state *cur = env->cur_state, *new; + bool force_new_state, add_new_state, loop; int i, j, n, err, states_cnt = 0; - bool force_new_state, add_new_state, force_exact; struct list_head *pos, *tmp, *head; force_new_state = env->test_state_freq || is_force_checkpoint(env, insn_idx) || /* Avoid accumulating infinitely long jmp history */ - cur->insn_hist_end - cur->insn_hist_start > 40; + cur->jmp_history_cnt > 40; /* bpf progs typically have pruning point every 4 instructions * http://vger.kernel.org/bpfconf2019.html#session-1 @@ -19051,6 +19245,7 @@ static int is_state_visited(struct bpf_verifier_env *env, int insn_idx) clean_live_states(env, insn_idx, cur); + loop = false; head = explored_state(env, insn_idx); list_for_each_safe(pos, tmp, head) { sl = container_of(pos, struct bpf_verifier_state_list, node); @@ -19130,7 +19325,7 @@ static int is_state_visited(struct bpf_verifier_env *env, int insn_idx) spi = __get_spi(iter_reg->off + iter_reg->var_off.value); iter_state = &func(env, iter_reg)->stack[spi].spilled_ptr; if (iter_state->iter.state == BPF_ITER_STATE_ACTIVE) { - update_loop_entry(env, cur, &sl->state); + loop = true; goto hit; } } @@ -19139,7 +19334,7 @@ static int is_state_visited(struct bpf_verifier_env *env, int insn_idx) if (is_may_goto_insn_at(env, insn_idx)) { if (sl->state.may_goto_depth != cur->may_goto_depth && states_equal(env, &sl->state, cur, RANGE_WITHIN)) { - update_loop_entry(env, cur, &sl->state); + loop = true; goto hit; } } @@ -19181,38 +19376,9 @@ skip_inf_loop_check: add_new_state = false; goto miss; } - /* If sl->state is a part of a loop and this loop's entry is a part of - * current verification path then states have to be compared exactly. - * 'force_exact' is needed to catch the following case: - * - * initial Here state 'succ' was processed first, - * | it was eventually tracked to produce a - * V state identical to 'hdr'. - * .---------> hdr All branches from 'succ' had been explored - * | | and thus 'succ' has its .branches == 0. - * | V - * | .------... Suppose states 'cur' and 'succ' correspond - * | | | to the same instruction + callsites. - * | V V In such case it is necessary to check - * | ... ... if 'succ' and 'cur' are states_equal(). - * | | | If 'succ' and 'cur' are a part of the - * | V V same loop exact flag has to be set. - * | succ <- cur To check if that is the case, verify - * | | if loop entry of 'succ' is in current - * | V DFS path. - * | ... - * | | - * '----' - * - * Additional details are in the comment before get_loop_entry(). - */ - loop_entry = get_loop_entry(env, &sl->state); - if (IS_ERR(loop_entry)) - return PTR_ERR(loop_entry); - force_exact = loop_entry && loop_entry->branches > 0; - if (states_equal(env, &sl->state, cur, force_exact ? RANGE_WITHIN : NOT_EXACT)) { - if (force_exact) - update_loop_entry(env, cur, loop_entry); + /* See comments for mark_all_regs_read_and_precise() */ + loop = incomplete_read_marks(env, &sl->state); + if (states_equal(env, &sl->state, cur, loop ? RANGE_WITHIN : NOT_EXACT)) { hit: sl->hit_cnt++; /* reached equivalent register/stack state, @@ -19225,7 +19391,7 @@ hit: * they'll be immediately forgotten as we're pruning * this state and will pop a new one. */ - err = propagate_liveness(env, &sl->state, cur); + err = propagate_liveness(env, &sl->state, cur, NULL); /* if previous state reached the exit with precision and * current state is equivalent to it (except precision marks) @@ -19233,10 +19399,98 @@ hit: * the current state. */ if (is_jmp_point(env, env->insn_idx)) - err = err ? : push_insn_history(env, cur, 0, 0); - err = err ? : propagate_precision(env, &sl->state); + err = err ? : push_jmp_history(env, cur, 0, 0); + err = err ? : propagate_precision(env, &sl->state, cur, NULL); if (err) return err; + /* When processing iterator based loops above propagate_liveness and + * propagate_precision calls are not sufficient to transfer all relevant + * read and precision marks. E.g. consider the following case: + * + * .-> A --. Assume the states are visited in the order A, B, C. + * | | | Assume that state B reaches a state equivalent to state A. + * | v v At this point, state C is not processed yet, so state A + * '-- B C has not received any read or precision marks from C. + * Thus, marks propagated from A to B are incomplete. + * + * The verifier mitigates this by performing the following steps: + * + * - Prior to the main verification pass, strongly connected components + * (SCCs) are computed over the program's control flow graph, + * intraprocedurally. + * + * - During the main verification pass, `maybe_enter_scc()` checks + * whether the current verifier state is entering an SCC. If so, an + * instance of a `bpf_scc_visit` object is created, and the state + * entering the SCC is recorded as the entry state. + * + * - This instance is associated not with the SCC itself, but with a + * `bpf_scc_callchain`: a tuple consisting of the call sites leading to + * the SCC and the SCC id. See `compute_scc_callchain()`. + * + * - When a verification path encounters a `states_equal(..., + * RANGE_WITHIN)` condition, there exists a call chain describing the + * current state and a corresponding `bpf_scc_visit` instance. A copy + * of the current state is created and added to + * `bpf_scc_visit->backedges`. + * + * - When a verification path terminates, `maybe_exit_scc()` is called + * from `update_branch_counts()`. For states with `branches == 0`, it + * checks whether the state is the entry state of any `bpf_scc_visit` + * instance. If it is, this indicates that all paths originating from + * this SCC visit have been explored. `propagate_backedges()` is then + * called, which propagates read and precision marks through the + * backedges until a fixed point is reached. + * (In the earlier example, this would propagate marks from A to B, + * from C to A, and then again from A to B.) + * + * A note on callchains + * -------------------- + * + * Consider the following example: + * + * void foo() { loop { ... SCC#1 ... } } + * void main() { + * A: foo(); + * B: ... + * C: foo(); + * } + * + * Here, there are two distinct callchains leading to SCC#1: + * - (A, SCC#1) + * - (C, SCC#1) + * + * Each callchain identifies a separate `bpf_scc_visit` instance that + * accumulates backedge states. The `propagate_{liveness,precision}()` + * functions traverse the parent state of each backedge state, which + * means these parent states must remain valid (i.e., not freed) while + * the corresponding `bpf_scc_visit` instance exists. + * + * Associating `bpf_scc_visit` instances directly with SCCs instead of + * callchains would break this invariant: + * - States explored during `C: foo()` would contribute backedges to + * SCC#1, but SCC#1 would only be exited once the exploration of + * `A: foo()` completes. + * - By that time, the states explored between `A: foo()` and `C: foo()` + * (i.e., `B: ...`) may have already been freed, causing the parent + * links for states from `C: foo()` to become invalid. + */ + if (loop) { + struct bpf_scc_backedge *backedge; + + backedge = kzalloc(sizeof(*backedge), GFP_KERNEL_ACCOUNT); + if (!backedge) + return -ENOMEM; + err = copy_verifier_state(&backedge->state, cur); + backedge->state.equal_state = &sl->state; + backedge->state.insn_idx = insn_idx; + err = err ?: add_scc_backedge(env, &sl->state, backedge); + if (err) { + free_verifier_state(&backedge->state, false); + kvfree(backedge); + return err; + } + } return 1; } miss: @@ -19288,7 +19542,7 @@ miss: * When looping the sl->state.branches will be > 0 and this state * will not be considered for equivalence until branches == 0. */ - new_sl = kzalloc(sizeof(struct bpf_verifier_state_list), GFP_KERNEL); + new_sl = kzalloc(sizeof(struct bpf_verifier_state_list), GFP_KERNEL_ACCOUNT); if (!new_sl) return -ENOMEM; env->total_states++; @@ -19312,11 +19566,17 @@ miss: new->insn_idx = insn_idx; WARN_ONCE(new->branches != 1, "BUG is_state_visited:branches_to_explore=%d insn %d\n", new->branches, insn_idx); + err = maybe_enter_scc(env, new); + if (err) { + free_verifier_state(new, false); + kvfree(new_sl); + return err; + } cur->parent = new; cur->first_insn_idx = insn_idx; - cur->insn_hist_start = cur->insn_hist_end; cur->dfs_depth = new->dfs_depth + 1; + clear_jmp_history(cur); list_add(&new_sl->node, head); /* connect new state to parentage chain. Current frame needs all @@ -19388,10 +19648,27 @@ static bool reg_type_mismatch(enum bpf_reg_type src, enum bpf_reg_type prev) !reg_type_mismatch_ok(prev)); } +static bool is_ptr_to_mem_or_btf_id(enum bpf_reg_type type) +{ + switch (base_type(type)) { + case PTR_TO_MEM: + case PTR_TO_BTF_ID: + return true; + default: + return false; + } +} + +static bool is_ptr_to_mem(enum bpf_reg_type type) +{ + return base_type(type) == PTR_TO_MEM; +} + static int save_aux_ptr_type(struct bpf_verifier_env *env, enum bpf_reg_type type, bool allow_trust_mismatch) { enum bpf_reg_type *prev_type = &env->insn_aux_data[env->insn_idx].ptr_type; + enum bpf_reg_type merged_type; if (*prev_type == NOT_INIT) { /* Saw a valid insn @@ -19408,15 +19685,24 @@ static int save_aux_ptr_type(struct bpf_verifier_env *env, enum bpf_reg_type typ * Reject it. */ if (allow_trust_mismatch && - base_type(type) == PTR_TO_BTF_ID && - base_type(*prev_type) == PTR_TO_BTF_ID) { + is_ptr_to_mem_or_btf_id(type) && + is_ptr_to_mem_or_btf_id(*prev_type)) { /* * Have to support a use case when one path through * the program yields TRUSTED pointer while another * is UNTRUSTED. Fallback to UNTRUSTED to generate * BPF_PROBE_MEM/BPF_PROBE_MEMSX. + * Same behavior of MEM_RDONLY flag. */ - *prev_type = PTR_TO_BTF_ID | PTR_UNTRUSTED; + if (is_ptr_to_mem(type) || is_ptr_to_mem(*prev_type)) + merged_type = PTR_TO_MEM; + else + merged_type = PTR_TO_BTF_ID; + if ((type & PTR_UNTRUSTED) || (*prev_type & PTR_UNTRUSTED)) + merged_type |= PTR_UNTRUSTED; + if ((type & MEM_RDONLY) || (*prev_type & MEM_RDONLY)) + merged_type |= MEM_RDONLY; + *prev_type = merged_type; } else { verbose(env, "same insn cannot be used with different pointers\n"); return -EINVAL; @@ -19426,20 +19712,223 @@ static int save_aux_ptr_type(struct bpf_verifier_env *env, enum bpf_reg_type typ return 0; } +enum { + PROCESS_BPF_EXIT = 1 +}; + +static int process_bpf_exit_full(struct bpf_verifier_env *env, + bool *do_print_state, + bool exception_exit) +{ + /* We must do check_reference_leak here before + * prepare_func_exit to handle the case when + * state->curframe > 0, it may be a callback function, + * for which reference_state must match caller reference + * state when it exits. + */ + int err = check_resource_leak(env, exception_exit, + !env->cur_state->curframe, + "BPF_EXIT instruction in main prog"); + if (err) + return err; + + /* The side effect of the prepare_func_exit which is + * being skipped is that it frees bpf_func_state. + * Typically, process_bpf_exit will only be hit with + * outermost exit. copy_verifier_state in pop_stack will + * handle freeing of any extra bpf_func_state left over + * from not processing all nested function exits. We + * also skip return code checks as they are not needed + * for exceptional exits. + */ + if (exception_exit) + return PROCESS_BPF_EXIT; + + if (env->cur_state->curframe) { + /* exit from nested function */ + err = prepare_func_exit(env, &env->insn_idx); + if (err) + return err; + *do_print_state = true; + return 0; + } + + err = check_return_code(env, BPF_REG_0, "R0"); + if (err) + return err; + return PROCESS_BPF_EXIT; +} + +static int do_check_insn(struct bpf_verifier_env *env, bool *do_print_state) +{ + int err; + struct bpf_insn *insn = &env->prog->insnsi[env->insn_idx]; + u8 class = BPF_CLASS(insn->code); + + if (class == BPF_ALU || class == BPF_ALU64) { + err = check_alu_op(env, insn); + if (err) + return err; + + } else if (class == BPF_LDX) { + bool is_ldsx = BPF_MODE(insn->code) == BPF_MEMSX; + + /* Check for reserved fields is already done in + * resolve_pseudo_ldimm64(). + */ + err = check_load_mem(env, insn, false, is_ldsx, true, "ldx"); + if (err) + return err; + } else if (class == BPF_STX) { + if (BPF_MODE(insn->code) == BPF_ATOMIC) { + err = check_atomic(env, insn); + if (err) + return err; + env->insn_idx++; + return 0; + } + + if (BPF_MODE(insn->code) != BPF_MEM || insn->imm != 0) { + verbose(env, "BPF_STX uses reserved fields\n"); + return -EINVAL; + } + + err = check_store_reg(env, insn, false); + if (err) + return err; + } else if (class == BPF_ST) { + enum bpf_reg_type dst_reg_type; + + if (BPF_MODE(insn->code) != BPF_MEM || + insn->src_reg != BPF_REG_0) { + verbose(env, "BPF_ST uses reserved fields\n"); + return -EINVAL; + } + /* check src operand */ + err = check_reg_arg(env, insn->dst_reg, SRC_OP); + if (err) + return err; + + dst_reg_type = cur_regs(env)[insn->dst_reg].type; + + /* check that memory (dst_reg + off) is writeable */ + err = check_mem_access(env, env->insn_idx, insn->dst_reg, + insn->off, BPF_SIZE(insn->code), + BPF_WRITE, -1, false, false); + if (err) + return err; + + err = save_aux_ptr_type(env, dst_reg_type, false); + if (err) + return err; + } else if (class == BPF_JMP || class == BPF_JMP32) { + u8 opcode = BPF_OP(insn->code); + + env->jmps_processed++; + if (opcode == BPF_CALL) { + if (BPF_SRC(insn->code) != BPF_K || + (insn->src_reg != BPF_PSEUDO_KFUNC_CALL && + insn->off != 0) || + (insn->src_reg != BPF_REG_0 && + insn->src_reg != BPF_PSEUDO_CALL && + insn->src_reg != BPF_PSEUDO_KFUNC_CALL) || + insn->dst_reg != BPF_REG_0 || class == BPF_JMP32) { + verbose(env, "BPF_CALL uses reserved fields\n"); + return -EINVAL; + } + + if (env->cur_state->active_locks) { + if ((insn->src_reg == BPF_REG_0 && + insn->imm != BPF_FUNC_spin_unlock) || + (insn->src_reg == BPF_PSEUDO_KFUNC_CALL && + (insn->off != 0 || !kfunc_spin_allowed(insn->imm)))) { + verbose(env, + "function calls are not allowed while holding a lock\n"); + return -EINVAL; + } + } + if (insn->src_reg == BPF_PSEUDO_CALL) { + err = check_func_call(env, insn, &env->insn_idx); + } else if (insn->src_reg == BPF_PSEUDO_KFUNC_CALL) { + err = check_kfunc_call(env, insn, &env->insn_idx); + if (!err && is_bpf_throw_kfunc(insn)) + return process_bpf_exit_full(env, do_print_state, true); + } else { + err = check_helper_call(env, insn, &env->insn_idx); + } + if (err) + return err; + + mark_reg_scratched(env, BPF_REG_0); + } else if (opcode == BPF_JA) { + if (BPF_SRC(insn->code) != BPF_K || + insn->src_reg != BPF_REG_0 || + insn->dst_reg != BPF_REG_0 || + (class == BPF_JMP && insn->imm != 0) || + (class == BPF_JMP32 && insn->off != 0)) { + verbose(env, "BPF_JA uses reserved fields\n"); + return -EINVAL; + } + + if (class == BPF_JMP) + env->insn_idx += insn->off + 1; + else + env->insn_idx += insn->imm + 1; + return 0; + } else if (opcode == BPF_EXIT) { + if (BPF_SRC(insn->code) != BPF_K || + insn->imm != 0 || + insn->src_reg != BPF_REG_0 || + insn->dst_reg != BPF_REG_0 || + class == BPF_JMP32) { + verbose(env, "BPF_EXIT uses reserved fields\n"); + return -EINVAL; + } + return process_bpf_exit_full(env, do_print_state, false); + } else { + err = check_cond_jmp_op(env, insn, &env->insn_idx); + if (err) + return err; + } + } else if (class == BPF_LD) { + u8 mode = BPF_MODE(insn->code); + + if (mode == BPF_ABS || mode == BPF_IND) { + err = check_ld_abs(env, insn); + if (err) + return err; + + } else if (mode == BPF_IMM) { + err = check_ld_imm(env, insn); + if (err) + return err; + + env->insn_idx++; + sanitize_mark_insn_seen(env); + } else { + verbose(env, "invalid BPF_LD mode\n"); + return -EINVAL; + } + } else { + verbose(env, "unknown insn class %d\n", class); + return -EINVAL; + } + + env->insn_idx++; + return 0; +} + static int do_check(struct bpf_verifier_env *env) { bool pop_log = !(env->log.level & BPF_LOG_LEVEL2); struct bpf_verifier_state *state = env->cur_state; struct bpf_insn *insns = env->prog->insnsi; - struct bpf_reg_state *regs; int insn_cnt = env->prog->len; bool do_print_state = false; int prev_insn_idx = -1; for (;;) { - bool exception_exit = false; struct bpf_insn *insn; - u8 class; int err; /* reset current history entry on each new instruction */ @@ -19453,7 +19942,6 @@ static int do_check(struct bpf_verifier_env *env) } insn = &insns[env->insn_idx]; - class = BPF_CLASS(insn->code); if (++env->insn_processed > BPF_COMPLEXITY_LIMIT_INSNS) { verbose(env, @@ -19463,6 +19951,7 @@ static int do_check(struct bpf_verifier_env *env) } state->last_insn_idx = env->prev_insn_idx; + state->insn_idx = env->insn_idx; if (is_prune_point(env, env->insn_idx)) { err = is_state_visited(env, env->insn_idx); @@ -19484,7 +19973,7 @@ static int do_check(struct bpf_verifier_env *env) } if (is_jmp_point(env, env->insn_idx)) { - err = push_insn_history(env, state, 0, 0); + err = push_jmp_history(env, state, 0, 0); if (err) return err; } @@ -19523,215 +20012,60 @@ static int do_check(struct bpf_verifier_env *env) return err; } - regs = cur_regs(env); sanitize_mark_insn_seen(env); prev_insn_idx = env->insn_idx; - if (class == BPF_ALU || class == BPF_ALU64) { - err = check_alu_op(env, insn); - if (err) - return err; - - } else if (class == BPF_LDX) { - bool is_ldsx = BPF_MODE(insn->code) == BPF_MEMSX; + /* Reduce verification complexity by stopping speculative path + * verification when a nospec is encountered. + */ + if (state->speculative && cur_aux(env)->nospec) + goto process_bpf_exit; - /* Check for reserved fields is already done in - * resolve_pseudo_ldimm64(). + err = do_check_insn(env, &do_print_state); + if (error_recoverable_with_nospec(err) && state->speculative) { + /* Prevent this speculative path from ever reaching the + * insn that would have been unsafe to execute. */ - err = check_load_mem(env, insn, false, is_ldsx, true, - "ldx"); - if (err) - return err; - } else if (class == BPF_STX) { - if (BPF_MODE(insn->code) == BPF_ATOMIC) { - err = check_atomic(env, insn); - if (err) - return err; - env->insn_idx++; - continue; - } - - if (BPF_MODE(insn->code) != BPF_MEM || insn->imm != 0) { - verbose(env, "BPF_STX uses reserved fields\n"); - return -EINVAL; - } - - err = check_store_reg(env, insn, false); - if (err) - return err; - } else if (class == BPF_ST) { - enum bpf_reg_type dst_reg_type; - - if (BPF_MODE(insn->code) != BPF_MEM || - insn->src_reg != BPF_REG_0) { - verbose(env, "BPF_ST uses reserved fields\n"); - return -EINVAL; - } - /* check src operand */ - err = check_reg_arg(env, insn->dst_reg, SRC_OP); - if (err) - return err; - - dst_reg_type = regs[insn->dst_reg].type; - - /* check that memory (dst_reg + off) is writeable */ - err = check_mem_access(env, env->insn_idx, insn->dst_reg, - insn->off, BPF_SIZE(insn->code), - BPF_WRITE, -1, false, false); - if (err) - return err; - - err = save_aux_ptr_type(env, dst_reg_type, false); + cur_aux(env)->nospec = true; + /* If it was an ADD/SUB insn, potentially remove any + * markings for alu sanitization. + */ + cur_aux(env)->alu_state = 0; + goto process_bpf_exit; + } else if (err < 0) { + return err; + } else if (err == PROCESS_BPF_EXIT) { + goto process_bpf_exit; + } + WARN_ON_ONCE(err); + + if (state->speculative && cur_aux(env)->nospec_result) { + /* If we are on a path that performed a jump-op, this + * may skip a nospec patched-in after the jump. This can + * currently never happen because nospec_result is only + * used for the write-ops + * `*(size*)(dst_reg+off)=src_reg|imm32` which must + * never skip the following insn. Still, add a warning + * to document this in case nospec_result is used + * elsewhere in the future. + */ + WARN_ON_ONCE(env->insn_idx != prev_insn_idx + 1); +process_bpf_exit: + mark_verifier_state_scratched(env); + err = update_branch_counts(env, env->cur_state); if (err) return err; - } else if (class == BPF_JMP || class == BPF_JMP32) { - u8 opcode = BPF_OP(insn->code); - - env->jmps_processed++; - if (opcode == BPF_CALL) { - if (BPF_SRC(insn->code) != BPF_K || - (insn->src_reg != BPF_PSEUDO_KFUNC_CALL - && insn->off != 0) || - (insn->src_reg != BPF_REG_0 && - insn->src_reg != BPF_PSEUDO_CALL && - insn->src_reg != BPF_PSEUDO_KFUNC_CALL) || - insn->dst_reg != BPF_REG_0 || - class == BPF_JMP32) { - verbose(env, "BPF_CALL uses reserved fields\n"); - return -EINVAL; - } - - if (env->cur_state->active_locks) { - if ((insn->src_reg == BPF_REG_0 && insn->imm != BPF_FUNC_spin_unlock) || - (insn->src_reg == BPF_PSEUDO_KFUNC_CALL && - (insn->off != 0 || !kfunc_spin_allowed(insn->imm)))) { - verbose(env, "function calls are not allowed while holding a lock\n"); - return -EINVAL; - } - } - if (insn->src_reg == BPF_PSEUDO_CALL) { - err = check_func_call(env, insn, &env->insn_idx); - } else if (insn->src_reg == BPF_PSEUDO_KFUNC_CALL) { - err = check_kfunc_call(env, insn, &env->insn_idx); - if (!err && is_bpf_throw_kfunc(insn)) { - exception_exit = true; - goto process_bpf_exit_full; - } - } else { - err = check_helper_call(env, insn, &env->insn_idx); - } - if (err) - return err; - - mark_reg_scratched(env, BPF_REG_0); - } else if (opcode == BPF_JA) { - if (BPF_SRC(insn->code) != BPF_K || - insn->src_reg != BPF_REG_0 || - insn->dst_reg != BPF_REG_0 || - (class == BPF_JMP && insn->imm != 0) || - (class == BPF_JMP32 && insn->off != 0)) { - verbose(env, "BPF_JA uses reserved fields\n"); - return -EINVAL; - } - - if (class == BPF_JMP) - env->insn_idx += insn->off + 1; - else - env->insn_idx += insn->imm + 1; - continue; - - } else if (opcode == BPF_EXIT) { - if (BPF_SRC(insn->code) != BPF_K || - insn->imm != 0 || - insn->src_reg != BPF_REG_0 || - insn->dst_reg != BPF_REG_0 || - class == BPF_JMP32) { - verbose(env, "BPF_EXIT uses reserved fields\n"); - return -EINVAL; - } -process_bpf_exit_full: - /* We must do check_reference_leak here before - * prepare_func_exit to handle the case when - * state->curframe > 0, it may be a callback - * function, for which reference_state must - * match caller reference state when it exits. - */ - err = check_resource_leak(env, exception_exit, !env->cur_state->curframe, - "BPF_EXIT instruction in main prog"); - if (err) - return err; - - /* The side effect of the prepare_func_exit - * which is being skipped is that it frees - * bpf_func_state. Typically, process_bpf_exit - * will only be hit with outermost exit. - * copy_verifier_state in pop_stack will handle - * freeing of any extra bpf_func_state left over - * from not processing all nested function - * exits. We also skip return code checks as - * they are not needed for exceptional exits. - */ - if (exception_exit) - goto process_bpf_exit; - - if (state->curframe) { - /* exit from nested function */ - err = prepare_func_exit(env, &env->insn_idx); - if (err) - return err; - do_print_state = true; - continue; - } - - err = check_return_code(env, BPF_REG_0, "R0"); - if (err) - return err; -process_bpf_exit: - mark_verifier_state_scratched(env); - update_branch_counts(env, env->cur_state); - err = pop_stack(env, &prev_insn_idx, - &env->insn_idx, pop_log); - if (err < 0) { - if (err != -ENOENT) - return err; - break; - } else { - if (verifier_bug_if(env->cur_state->loop_entry, env, - "broken loop detection")) - return -EFAULT; - do_print_state = true; - continue; - } - } else { - err = check_cond_jmp_op(env, insn, &env->insn_idx); - if (err) - return err; - } - } else if (class == BPF_LD) { - u8 mode = BPF_MODE(insn->code); - - if (mode == BPF_ABS || mode == BPF_IND) { - err = check_ld_abs(env, insn); - if (err) - return err; - - } else if (mode == BPF_IMM) { - err = check_ld_imm(env, insn); - if (err) + err = pop_stack(env, &prev_insn_idx, &env->insn_idx, + pop_log); + if (err < 0) { + if (err != -ENOENT) return err; - - env->insn_idx++; - sanitize_mark_insn_seen(env); + break; } else { - verbose(env, "invalid BPF_LD mode\n"); - return -EINVAL; + do_print_state = true; + continue; } - } else { - verbose(env, "unknown insn class %d\n", class); - return -EINVAL; } - - env->insn_idx++; } return 0; @@ -20803,7 +21137,7 @@ static int convert_ctx_accesses(struct bpf_verifier_env *env) -(subprogs[0].stack_depth + 8)); if (epilogue_cnt >= INSN_BUF_SIZE) { verbose(env, "bpf verifier is misconfigured\n"); - return -EINVAL; + return -EFAULT; } else if (epilogue_cnt) { /* Save the ARG_PTR_TO_CTX for the epilogue to use */ cnt = 0; @@ -20826,13 +21160,13 @@ static int convert_ctx_accesses(struct bpf_verifier_env *env) if (ops->gen_prologue || env->seen_direct_write) { if (!ops->gen_prologue) { verbose(env, "bpf verifier is misconfigured\n"); - return -EINVAL; + return -EFAULT; } cnt = ops->gen_prologue(insn_buf, env->seen_direct_write, env->prog); if (cnt >= INSN_BUF_SIZE) { verbose(env, "bpf verifier is misconfigured\n"); - return -EINVAL; + return -EFAULT; } else if (cnt) { new_prog = bpf_patch_insn_data(env, 0, insn_buf, cnt); if (!new_prog) @@ -20859,6 +21193,29 @@ static int convert_ctx_accesses(struct bpf_verifier_env *env) bpf_convert_ctx_access_t convert_ctx_access; u8 mode; + if (env->insn_aux_data[i + delta].nospec) { + WARN_ON_ONCE(env->insn_aux_data[i + delta].alu_state); + struct bpf_insn patch[] = { + BPF_ST_NOSPEC(), + *insn, + }; + + cnt = ARRAY_SIZE(patch); + new_prog = bpf_patch_insn_data(env, i + delta, patch, cnt); + if (!new_prog) + return -ENOMEM; + + delta += cnt - 1; + env->prog = new_prog; + insn = new_prog->insnsi + i + delta; + /* This can not be easily merged with the + * nospec_result-case, because an insn may require a + * nospec before and after itself. Therefore also do not + * 'continue' here but potentially apply further + * patching to insn. *insn should equal patch[1] now. + */ + } + if (insn->code == (BPF_LDX | BPF_MEM | BPF_B) || insn->code == (BPF_LDX | BPF_MEM | BPF_H) || insn->code == (BPF_LDX | BPF_MEM | BPF_W) || @@ -20908,7 +21265,10 @@ static int convert_ctx_accesses(struct bpf_verifier_env *env) } if (type == BPF_WRITE && - env->insn_aux_data[i + delta].sanitize_stack_spill) { + env->insn_aux_data[i + delta].nospec_result) { + /* nospec_result is only used to mitigate Spectre v4 and + * to limit verification-time for Spectre v1. + */ struct bpf_insn patch[] = { *insn, BPF_ST_NOSPEC(), @@ -20950,6 +21310,7 @@ static int convert_ctx_accesses(struct bpf_verifier_env *env) * for this case. */ case PTR_TO_BTF_ID | MEM_ALLOC | PTR_UNTRUSTED: + case PTR_TO_MEM | MEM_RDONLY | PTR_UNTRUSTED: if (type == BPF_READ) { if (BPF_MODE(insn->code) == BPF_MEM) insn->code = BPF_LDX | BPF_PROBE_MEM | @@ -20989,7 +21350,7 @@ static int convert_ctx_accesses(struct bpf_verifier_env *env) if (type == BPF_WRITE) { verbose(env, "bpf verifier narrow ctx access misconfigured\n"); - return -EINVAL; + return -EFAULT; } size_code = BPF_H; @@ -21008,7 +21369,7 @@ static int convert_ctx_accesses(struct bpf_verifier_env *env) if (cnt == 0 || cnt >= INSN_BUF_SIZE || (ctx_field_size && !target_size)) { verbose(env, "bpf verifier is misconfigured\n"); - return -EINVAL; + return -EFAULT; } if (is_narrower_load && size < target_size) { @@ -21016,7 +21377,7 @@ static int convert_ctx_accesses(struct bpf_verifier_env *env) off, size, size_default) * 8; if (shift && cnt + 1 >= INSN_BUF_SIZE) { verbose(env, "bpf verifier narrow ctx load misconfigured\n"); - return -EINVAL; + return -EFAULT; } if (ctx_field_size <= 4) { if (shift) @@ -21781,7 +22142,7 @@ static int do_misc_fixups(struct bpf_verifier_env *env) cnt = env->ops->gen_ld_abs(insn, insn_buf); if (cnt == 0 || cnt >= INSN_BUF_SIZE) { verbose(env, "bpf verifier is misconfigured\n"); - return -EINVAL; + return -EFAULT; } new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt); @@ -22117,7 +22478,7 @@ static int do_misc_fixups(struct bpf_verifier_env *env) goto patch_map_ops_generic; if (cnt <= 0 || cnt >= INSN_BUF_SIZE) { verbose(env, "bpf verifier is misconfigured\n"); - return -EINVAL; + return -EFAULT; } new_prog = bpf_patch_insn_data(env, i + delta, @@ -22477,7 +22838,7 @@ next_insn: !map_ptr->ops->map_poke_untrack || !map_ptr->ops->map_poke_run) { verbose(env, "bpf verifier is misconfigured\n"); - return -EINVAL; + return -EFAULT; } ret = map_ptr->ops->map_poke_track(map_ptr, prog->aux); @@ -22667,7 +23028,12 @@ static void free_states(struct bpf_verifier_env *env) { struct bpf_verifier_state_list *sl; struct list_head *head, *pos, *tmp; - int i; + struct bpf_scc_info *info; + int i, j; + + free_verifier_state(env->cur_state, true); + env->cur_state = NULL; + while (!pop_stack(env, NULL, NULL, false)); list_for_each_safe(pos, tmp, &env->free_list) { sl = container_of(pos, struct bpf_verifier_state_list, node); @@ -22676,6 +23042,14 @@ static void free_states(struct bpf_verifier_env *env) } INIT_LIST_HEAD(&env->free_list); + for (i = 0; i < env->scc_cnt; ++i) { + info = env->scc_info[i]; + for (j = 0; j < info->num_visits; j++) + free_backedges(&info->visits[j]); + kvfree(info); + env->scc_info[i] = NULL; + } + if (!env->explored_states) return; @@ -22703,13 +23077,13 @@ static int do_check_common(struct bpf_verifier_env *env, int subprog) env->prev_linfo = NULL; env->pass_cnt++; - state = kzalloc(sizeof(struct bpf_verifier_state), GFP_KERNEL); + state = kzalloc(sizeof(struct bpf_verifier_state), GFP_KERNEL_ACCOUNT); if (!state) return -ENOMEM; state->curframe = 0; state->speculative = false; state->branches = 1; - state->frame[0] = kzalloc(sizeof(struct bpf_func_state), GFP_KERNEL); + state->frame[0] = kzalloc(sizeof(struct bpf_func_state), GFP_KERNEL_ACCOUNT); if (!state->frame[0]) { kfree(state); return -ENOMEM; @@ -22812,14 +23186,6 @@ static int do_check_common(struct bpf_verifier_env *env, int subprog) ret = do_check(env); out: - /* check for NULL is necessary, since cur_state can be freed inside - * do_check() under memory pressure. - */ - if (env->cur_state) { - free_verifier_state(env->cur_state, true); - env->cur_state = NULL; - } - while (!pop_stack(env, NULL, NULL, false)); if (!ret && pop_log) bpf_vlog_reset(&env->log, 0); free_states(env); @@ -22935,7 +23301,7 @@ static void print_verification_stats(struct bpf_verifier_env *env) int bpf_prog_ctx_arg_info_init(struct bpf_prog *prog, const struct bpf_ctx_arg_aux *info, u32 cnt) { - prog->aux->ctx_arg_info = kmemdup_array(info, cnt, sizeof(*info), GFP_KERNEL); + prog->aux->ctx_arg_info = kmemdup_array(info, cnt, sizeof(*info), GFP_KERNEL_ACCOUNT); prog->aux->ctx_arg_info_size = cnt; return prog->aux->ctx_arg_info ? 0 : -ENOMEM; @@ -23677,6 +24043,7 @@ static bool can_jump(struct bpf_insn *insn) case BPF_JSLT: case BPF_JSLE: case BPF_JCOND: + case BPF_JSET: return true; } @@ -23879,7 +24246,7 @@ static int compute_live_registers(struct bpf_verifier_env *env) * - repeat the computation while {in,out} fields changes for * any instruction. */ - state = kvcalloc(insn_cnt, sizeof(*state), GFP_KERNEL); + state = kvcalloc(insn_cnt, sizeof(*state), GFP_KERNEL_ACCOUNT); if (!state) { err = -ENOMEM; goto out; @@ -23917,6 +24284,10 @@ static int compute_live_registers(struct bpf_verifier_env *env) if (env->log.level & BPF_LOG_LEVEL2) { verbose(env, "Live regs before insn:\n"); for (i = 0; i < insn_cnt; ++i) { + if (env->insn_aux_data[i].scc) + verbose(env, "%3d ", env->insn_aux_data[i].scc); + else + verbose(env, " "); verbose(env, "%3d: ", i); for (j = BPF_REG_0; j < BPF_REG_10; ++j) if (insn_aux[i].live_regs_before & BIT(j)) @@ -23938,6 +24309,185 @@ out: return err; } +/* + * Compute strongly connected components (SCCs) on the CFG. + * Assign an SCC number to each instruction, recorded in env->insn_aux[*].scc. + * If instruction is a sole member of its SCC and there are no self edges, + * assign it SCC number of zero. + * Uses a non-recursive adaptation of Tarjan's algorithm for SCC computation. + */ +static int compute_scc(struct bpf_verifier_env *env) +{ + const u32 NOT_ON_STACK = U32_MAX; + + struct bpf_insn_aux_data *aux = env->insn_aux_data; + const u32 insn_cnt = env->prog->len; + int stack_sz, dfs_sz, err = 0; + u32 *stack, *pre, *low, *dfs; + u32 succ_cnt, i, j, t, w; + u32 next_preorder_num; + u32 next_scc_id; + bool assign_scc; + u32 succ[2]; + + next_preorder_num = 1; + next_scc_id = 1; + /* + * - 'stack' accumulates vertices in DFS order, see invariant comment below; + * - 'pre[t] == p' => preorder number of vertex 't' is 'p'; + * - 'low[t] == n' => smallest preorder number of the vertex reachable from 't' is 'n'; + * - 'dfs' DFS traversal stack, used to emulate explicit recursion. + */ + stack = kvcalloc(insn_cnt, sizeof(int), GFP_KERNEL_ACCOUNT); + pre = kvcalloc(insn_cnt, sizeof(int), GFP_KERNEL_ACCOUNT); + low = kvcalloc(insn_cnt, sizeof(int), GFP_KERNEL_ACCOUNT); + dfs = kvcalloc(insn_cnt, sizeof(*dfs), GFP_KERNEL_ACCOUNT); + if (!stack || !pre || !low || !dfs) { + err = -ENOMEM; + goto exit; + } + /* + * References: + * [1] R. Tarjan "Depth-First Search and Linear Graph Algorithms" + * [2] D. J. Pearce "A Space-Efficient Algorithm for Finding Strongly Connected Components" + * + * The algorithm maintains the following invariant: + * - suppose there is a path 'u' ~> 'v', such that 'pre[v] < pre[u]'; + * - then, vertex 'u' remains on stack while vertex 'v' is on stack. + * + * Consequently: + * - If 'low[v] < pre[v]', there is a path from 'v' to some vertex 'u', + * such that 'pre[u] == low[v]'; vertex 'u' is currently on the stack, + * and thus there is an SCC (loop) containing both 'u' and 'v'. + * - If 'low[v] == pre[v]', loops containing 'v' have been explored, + * and 'v' can be considered the root of some SCC. + * + * Here is a pseudo-code for an explicitly recursive version of the algorithm: + * + * NOT_ON_STACK = insn_cnt + 1 + * pre = [0] * insn_cnt + * low = [0] * insn_cnt + * scc = [0] * insn_cnt + * stack = [] + * + * next_preorder_num = 1 + * next_scc_id = 1 + * + * def recur(w): + * nonlocal next_preorder_num + * nonlocal next_scc_id + * + * pre[w] = next_preorder_num + * low[w] = next_preorder_num + * next_preorder_num += 1 + * stack.append(w) + * for s in successors(w): + * # Note: for classic algorithm the block below should look as: + * # + * # if pre[s] == 0: + * # recur(s) + * # low[w] = min(low[w], low[s]) + * # elif low[s] != NOT_ON_STACK: + * # low[w] = min(low[w], pre[s]) + * # + * # But replacing both 'min' instructions with 'low[w] = min(low[w], low[s])' + * # does not break the invariant and makes itartive version of the algorithm + * # simpler. See 'Algorithm #3' from [2]. + * + * # 's' not yet visited + * if pre[s] == 0: + * recur(s) + * # if 's' is on stack, pick lowest reachable preorder number from it; + * # if 's' is not on stack 'low[s] == NOT_ON_STACK > low[w]', + * # so 'min' would be a noop. + * low[w] = min(low[w], low[s]) + * + * if low[w] == pre[w]: + * # 'w' is the root of an SCC, pop all vertices + * # below 'w' on stack and assign same SCC to them. + * while True: + * t = stack.pop() + * low[t] = NOT_ON_STACK + * scc[t] = next_scc_id + * if t == w: + * break + * next_scc_id += 1 + * + * for i in range(0, insn_cnt): + * if pre[i] == 0: + * recur(i) + * + * Below implementation replaces explicit recusion with array 'dfs'. + */ + for (i = 0; i < insn_cnt; i++) { + if (pre[i]) + continue; + stack_sz = 0; + dfs_sz = 1; + dfs[0] = i; +dfs_continue: + while (dfs_sz) { + w = dfs[dfs_sz - 1]; + if (pre[w] == 0) { + low[w] = next_preorder_num; + pre[w] = next_preorder_num; + next_preorder_num++; + stack[stack_sz++] = w; + } + /* Visit 'w' successors */ + succ_cnt = insn_successors(env->prog, w, succ); + for (j = 0; j < succ_cnt; ++j) { + if (pre[succ[j]]) { + low[w] = min(low[w], low[succ[j]]); + } else { + dfs[dfs_sz++] = succ[j]; + goto dfs_continue; + } + } + /* + * Preserve the invariant: if some vertex above in the stack + * is reachable from 'w', keep 'w' on the stack. + */ + if (low[w] < pre[w]) { + dfs_sz--; + goto dfs_continue; + } + /* + * Assign SCC number only if component has two or more elements, + * or if component has a self reference. + */ + assign_scc = stack[stack_sz - 1] != w; + for (j = 0; j < succ_cnt; ++j) { + if (succ[j] == w) { + assign_scc = true; + break; + } + } + /* Pop component elements from stack */ + do { + t = stack[--stack_sz]; + low[t] = NOT_ON_STACK; + if (assign_scc) + aux[t].scc = next_scc_id; + } while (t != w); + if (assign_scc) + next_scc_id++; + dfs_sz--; + } + } + env->scc_info = kvcalloc(next_scc_id, sizeof(*env->scc_info), GFP_KERNEL_ACCOUNT); + if (!env->scc_info) { + err = -ENOMEM; + goto exit; + } +exit: + kvfree(stack); + kvfree(pre); + kvfree(low); + kvfree(dfs); + return err; +} + int bpf_check(struct bpf_prog **prog, union bpf_attr *attr, bpfptr_t uattr, __u32 uattr_size) { u64 start_time = ktime_get_ns(); @@ -23946,6 +24496,8 @@ int bpf_check(struct bpf_prog **prog, union bpf_attr *attr, bpfptr_t uattr, __u3 u32 log_true_size; bool is_priv; + BTF_TYPE_EMIT(enum bpf_features); + /* no program is valid */ if (ARRAY_SIZE(bpf_verifier_ops) == 0) return -EINVAL; @@ -23953,7 +24505,7 @@ int bpf_check(struct bpf_prog **prog, union bpf_attr *attr, bpfptr_t uattr, __u3 /* 'struct bpf_verifier_env' can be global, but since it's not small, * allocate/free it every time bpf_check() is called */ - env = kvzalloc(sizeof(struct bpf_verifier_env), GFP_KERNEL); + env = kvzalloc(sizeof(struct bpf_verifier_env), GFP_KERNEL_ACCOUNT); if (!env) return -ENOMEM; @@ -24016,7 +24568,7 @@ int bpf_check(struct bpf_prog **prog, union bpf_attr *attr, bpfptr_t uattr, __u3 env->explored_states = kvcalloc(state_htab_size(env), sizeof(struct list_head), - GFP_USER); + GFP_KERNEL_ACCOUNT); ret = -ENOMEM; if (!env->explored_states) goto skip_full_check; @@ -24059,6 +24611,10 @@ int bpf_check(struct bpf_prog **prog, union bpf_attr *attr, bpfptr_t uattr, __u3 if (ret) goto skip_full_check; + ret = compute_scc(env); + if (ret < 0) + goto skip_full_check; + ret = compute_live_registers(env); if (ret < 0) goto skip_full_check; @@ -24143,7 +24699,7 @@ skip_full_check: /* if program passed verifier, update used_maps in bpf_prog_info */ env->prog->aux->used_maps = kmalloc_array(env->used_map_cnt, sizeof(env->used_maps[0]), - GFP_KERNEL); + GFP_KERNEL_ACCOUNT); if (!env->prog->aux->used_maps) { ret = -ENOMEM; @@ -24158,7 +24714,7 @@ skip_full_check: /* if program passed verifier, update used_btfs in bpf_prog_aux */ env->prog->aux->used_btfs = kmalloc_array(env->used_btf_cnt, sizeof(env->used_btfs[0]), - GFP_KERNEL); + GFP_KERNEL_ACCOUNT); if (!env->prog->aux->used_btfs) { ret = -ENOMEM; goto err_release_maps; @@ -24199,9 +24755,9 @@ err_unlock: if (!is_priv) mutex_unlock(&bpf_verifier_lock); vfree(env->insn_aux_data); - kvfree(env->insn_hist); err_free_env: kvfree(env->cfg.insn_postorder); + kvfree(env->scc_info); kvfree(env); return ret; } diff --git a/kernel/cgroup/cgroup.c b/kernel/cgroup/cgroup.c index a723b7dc6e4e..312c6a8b55bb 100644 --- a/kernel/cgroup/cgroup.c +++ b/kernel/cgroup/cgroup.c @@ -2074,6 +2074,11 @@ static void init_cgroup_housekeeping(struct cgroup *cgrp) for_each_subsys(ss, ssid) INIT_LIST_HEAD(&cgrp->e_csets[ssid]); +#ifdef CONFIG_CGROUP_BPF + for (int i = 0; i < ARRAY_SIZE(cgrp->bpf.revisions); i++) + cgrp->bpf.revisions[i] = 1; +#endif + init_waitqueue_head(&cgrp->offline_waitq); INIT_WORK(&cgrp->release_agent_work, cgroup1_release_agent); } diff --git a/kernel/cgroup/legacy_freezer.c b/kernel/cgroup/legacy_freezer.c index 039d1eb2f215..507b8f19a262 100644 --- a/kernel/cgroup/legacy_freezer.c +++ b/kernel/cgroup/legacy_freezer.c @@ -188,13 +188,12 @@ static void freezer_attach(struct cgroup_taskset *tset) if (!(freezer->state & CGROUP_FREEZING)) { __thaw_task(task); } else { - freeze_task(task); - /* clear FROZEN and propagate upwards */ while (freezer && (freezer->state & CGROUP_FROZEN)) { freezer->state &= ~CGROUP_FROZEN; freezer = parent_freezer(freezer); } + freeze_task(task); } } diff --git a/kernel/events/core.c b/kernel/events/core.c index f34c99f8ce8f..1f746469fda5 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -207,6 +207,19 @@ static void perf_ctx_unlock(struct perf_cpu_context *cpuctx, __perf_ctx_unlock(&cpuctx->ctx); } +typedef struct { + struct perf_cpu_context *cpuctx; + struct perf_event_context *ctx; +} class_perf_ctx_lock_t; + +static inline void class_perf_ctx_lock_destructor(class_perf_ctx_lock_t *_T) +{ perf_ctx_unlock(_T->cpuctx, _T->ctx); } + +static inline class_perf_ctx_lock_t +class_perf_ctx_lock_constructor(struct perf_cpu_context *cpuctx, + struct perf_event_context *ctx) +{ perf_ctx_lock(cpuctx, ctx); return (class_perf_ctx_lock_t){ cpuctx, ctx }; } + #define TASK_TOMBSTONE ((void *)-1L) static bool is_kernel_event(struct perf_event *event) @@ -944,7 +957,13 @@ static void perf_cgroup_switch(struct task_struct *task) if (READ_ONCE(cpuctx->cgrp) == cgrp) return; - perf_ctx_lock(cpuctx, cpuctx->task_ctx); + guard(perf_ctx_lock)(cpuctx, cpuctx->task_ctx); + /* + * Re-check, could've raced vs perf_remove_from_context(). + */ + if (READ_ONCE(cpuctx->cgrp) == NULL) + return; + perf_ctx_disable(&cpuctx->ctx, true); ctx_sched_out(&cpuctx->ctx, NULL, EVENT_ALL|EVENT_CGROUP); @@ -962,7 +981,6 @@ static void perf_cgroup_switch(struct task_struct *task) ctx_sched_in(&cpuctx->ctx, NULL, EVENT_ALL|EVENT_CGROUP); perf_ctx_enable(&cpuctx->ctx, true); - perf_ctx_unlock(cpuctx, cpuctx->task_ctx); } static int perf_cgroup_ensure_storage(struct perf_event *event, @@ -2120,18 +2138,6 @@ list_del_event(struct perf_event *event, struct perf_event_context *ctx) if (event->group_leader == event) del_event_from_groups(event, ctx); - /* - * If event was in error state, then keep it - * that way, otherwise bogus counts will be - * returned on read(). The only way to get out - * of error state is by explicit re-enabling - * of the event - */ - if (event->state > PERF_EVENT_STATE_OFF) { - perf_cgroup_event_disable(event, ctx); - perf_event_set_state(event, PERF_EVENT_STATE_OFF); - } - ctx->generation++; event->pmu_ctx->nr_events--; } @@ -2149,8 +2155,9 @@ perf_aux_output_match(struct perf_event *event, struct perf_event *aux_event) } static void put_event(struct perf_event *event); -static void event_sched_out(struct perf_event *event, - struct perf_event_context *ctx); +static void __event_disable(struct perf_event *event, + struct perf_event_context *ctx, + enum perf_event_state state); static void perf_put_aux_event(struct perf_event *event) { @@ -2183,8 +2190,7 @@ static void perf_put_aux_event(struct perf_event *event) * state so that we don't try to schedule it again. Note * that perf_event_enable() will clear the ERROR status. */ - event_sched_out(iter, ctx); - perf_event_set_state(event, PERF_EVENT_STATE_ERROR); + __event_disable(iter, ctx, PERF_EVENT_STATE_ERROR); } } @@ -2242,18 +2248,6 @@ static inline struct list_head *get_event_list(struct perf_event *event) &event->pmu_ctx->flexible_active; } -/* - * Events that have PERF_EV_CAP_SIBLING require being part of a group and - * cannot exist on their own, schedule them out and move them into the ERROR - * state. Also see _perf_event_enable(), it will not be able to recover - * this ERROR state. - */ -static inline void perf_remove_sibling_event(struct perf_event *event) -{ - event_sched_out(event, event->ctx); - perf_event_set_state(event, PERF_EVENT_STATE_ERROR); -} - static void perf_group_detach(struct perf_event *event) { struct perf_event *leader = event->group_leader; @@ -2289,8 +2283,15 @@ static void perf_group_detach(struct perf_event *event) */ list_for_each_entry_safe(sibling, tmp, &event->sibling_list, sibling_list) { + /* + * Events that have PERF_EV_CAP_SIBLING require being part of + * a group and cannot exist on their own, schedule them out + * and move them into the ERROR state. Also see + * _perf_event_enable(), it will not be able to recover this + * ERROR state. + */ if (sibling->event_caps & PERF_EV_CAP_SIBLING) - perf_remove_sibling_event(sibling); + __event_disable(sibling, ctx, PERF_EVENT_STATE_ERROR); sibling->group_leader = sibling; list_del_init(&sibling->sibling_list); @@ -2493,11 +2494,14 @@ __perf_remove_from_context(struct perf_event *event, state = PERF_EVENT_STATE_EXIT; if (flags & DETACH_REVOKE) state = PERF_EVENT_STATE_REVOKED; - if (flags & DETACH_DEAD) { - event->pending_disable = 1; + if (flags & DETACH_DEAD) state = PERF_EVENT_STATE_DEAD; - } + event_sched_out(event, ctx); + + if (event->state > PERF_EVENT_STATE_OFF) + perf_cgroup_event_disable(event, ctx); + perf_event_set_state(event, min(event->state, state)); if (flags & DETACH_GROUP) @@ -2562,6 +2566,15 @@ static void perf_remove_from_context(struct perf_event *event, unsigned long fla event_function_call(event, __perf_remove_from_context, (void *)flags); } +static void __event_disable(struct perf_event *event, + struct perf_event_context *ctx, + enum perf_event_state state) +{ + event_sched_out(event, ctx); + perf_cgroup_event_disable(event, ctx); + perf_event_set_state(event, state); +} + /* * Cross CPU call to disable a performance event */ @@ -2576,13 +2589,18 @@ static void __perf_event_disable(struct perf_event *event, perf_pmu_disable(event->pmu_ctx->pmu); ctx_time_update_event(ctx, event); + /* + * When disabling a group leader, the whole group becomes ineligible + * to run, so schedule out the full group. + */ if (event == event->group_leader) group_sched_out(event, ctx); - else - event_sched_out(event, ctx); - perf_event_set_state(event, PERF_EVENT_STATE_OFF); - perf_cgroup_event_disable(event, ctx); + /* + * But only mark the leader OFF; the siblings will remain + * INACTIVE. + */ + __event_disable(event, ctx, PERF_EVENT_STATE_OFF); perf_pmu_enable(event->pmu_ctx->pmu); } @@ -2656,8 +2674,8 @@ static void perf_event_unthrottle(struct perf_event *event, bool start) static void perf_event_throttle(struct perf_event *event) { - event->pmu->stop(event, 0); event->hw.interrupts = MAX_INTERRUPTS; + event->pmu->stop(event, 0); if (event == event->group_leader) perf_log_throttle(event, 0); } @@ -7439,6 +7457,10 @@ perf_sample_ustack_size(u16 stack_size, u16 header_size, if (!regs) return 0; + /* No mm, no stack, no dump. */ + if (!current->mm) + return 0; + /* * Check if we fit in with the requested stack size into the: * - TASK_SIZE @@ -8150,6 +8172,9 @@ perf_callchain(struct perf_event *event, struct pt_regs *regs) const u32 max_stack = event->attr.sample_max_stack; struct perf_callchain_entry *callchain; + if (!current->mm) + user = false; + if (!kernel && !user) return &__empty_callchain; @@ -11749,7 +11774,12 @@ static void perf_swevent_cancel_hrtimer(struct perf_event *event) { struct hw_perf_event *hwc = &event->hw; - if (is_sampling_event(event)) { + /* + * The throttle can be triggered in the hrtimer handler. + * The HRTIMER_NORESTART should be used to stop the timer, + * rather than hrtimer_cancel(). See perf_swevent_hrtimer() + */ + if (is_sampling_event(event) && (hwc->interrupts != MAX_INTERRUPTS)) { ktime_t remaining = hrtimer_get_remaining(&hwc->hrtimer); local64_set(&hwc->period_left, ktime_to_ns(remaining)); @@ -11804,7 +11834,8 @@ static void cpu_clock_event_start(struct perf_event *event, int flags) static void cpu_clock_event_stop(struct perf_event *event, int flags) { perf_swevent_cancel_hrtimer(event); - cpu_clock_event_update(event); + if (flags & PERF_EF_UPDATE) + cpu_clock_event_update(event); } static int cpu_clock_event_add(struct perf_event *event, int flags) @@ -11882,7 +11913,8 @@ static void task_clock_event_start(struct perf_event *event, int flags) static void task_clock_event_stop(struct perf_event *event, int flags) { perf_swevent_cancel_hrtimer(event); - task_clock_event_update(event, event->ctx->time); + if (flags & PERF_EF_UPDATE) + task_clock_event_update(event, event->ctx->time); } static int task_clock_event_add(struct perf_event *event, int flags) diff --git a/kernel/exit.c b/kernel/exit.c index bd743900354c..bb184a67ac73 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -940,6 +940,15 @@ void __noreturn do_exit(long code) taskstats_exit(tsk, group_dead); trace_sched_process_exit(tsk, group_dead); + /* + * Since sampling can touch ->mm, make sure to stop everything before we + * tear it down. + * + * Also flushes inherited counters to the parent - before the parent + * gets woken up by child-exit notifications. + */ + perf_event_exit_task(tsk); + exit_mm(); if (group_dead) @@ -955,14 +964,6 @@ void __noreturn do_exit(long code) exit_task_work(tsk); exit_thread(tsk); - /* - * Flush inherited counters to the parent - before the parent - * gets woken up by child-exit notifications. - * - * because of cgroup mode, must be called before cgroup_exit() - */ - perf_event_exit_task(tsk); - sched_autogroup_exit_task(tsk); cgroup_exit(tsk); diff --git a/kernel/futex/core.c b/kernel/futex/core.c index 565f9717c6ca..90d53fb0ee9e 100644 --- a/kernel/futex/core.c +++ b/kernel/futex/core.c @@ -583,8 +583,8 @@ int get_futex_key(u32 __user *uaddr, unsigned int flags, union futex_key *key, if (futex_get_value(&node, naddr)) return -EFAULT; - if (node != FUTEX_NO_NODE && - (node >= MAX_NUMNODES || !node_possible(node))) + if ((node != FUTEX_NO_NODE) && + ((unsigned int)node >= MAX_NUMNODES || !node_possible(node))) return -EINVAL; } @@ -1629,6 +1629,16 @@ again: mm->futex_phash_new = NULL; if (fph) { + if (cur && (!cur->hash_mask || cur->immutable)) { + /* + * If two threads simultaneously request the global + * hash then the first one performs the switch, + * the second one returns here. + */ + free = fph; + mm->futex_phash_new = new; + return -EBUSY; + } if (cur && !new) { /* * If we have an existing hash, but do not yet have diff --git a/kernel/irq/chip.c b/kernel/irq/chip.c index b0e0a7332993..2b274007e8ba 100644 --- a/kernel/irq/chip.c +++ b/kernel/irq/chip.c @@ -205,6 +205,14 @@ __irq_startup_managed(struct irq_desc *desc, const struct cpumask *aff, void irq_startup_managed(struct irq_desc *desc) { + struct irq_data *d = irq_desc_get_irq_data(desc); + + /* + * Clear managed-shutdown flag, so we don't repeat managed-startup for + * multiple hotplugs, and cause imbalanced disable depth. + */ + irqd_clr_managed_shutdown(d); + /* * Only start it up when the disable depth is 1, so that a disable, * hotunplug, hotplug sequence does not end up enabling it during diff --git a/kernel/irq/cpuhotplug.c b/kernel/irq/cpuhotplug.c index f07529ae4895..755346ea9819 100644 --- a/kernel/irq/cpuhotplug.c +++ b/kernel/irq/cpuhotplug.c @@ -210,13 +210,6 @@ static void irq_restore_affinity_of_irq(struct irq_desc *desc, unsigned int cpu) !irq_data_get_irq_chip(data) || !cpumask_test_cpu(cpu, affinity)) return; - /* - * Don't restore suspended interrupts here when a system comes back - * from S3. They are reenabled via resume_device_irqs(). - */ - if (desc->istate & IRQS_SUSPENDED) - return; - if (irqd_is_managed_and_shutdown(data)) irq_startup_managed(desc); diff --git a/kernel/irq/irq_sim.c b/kernel/irq/irq_sim.c index 1a3d483548e2..ae4c9cbd1b4b 100644 --- a/kernel/irq/irq_sim.c +++ b/kernel/irq/irq_sim.c @@ -202,7 +202,7 @@ struct irq_domain *irq_domain_create_sim_full(struct fwnode_handle *fwnode, void *data) { struct irq_sim_work_ctx *work_ctx __free(kfree) = - kmalloc(sizeof(*work_ctx), GFP_KERNEL); + kzalloc(sizeof(*work_ctx), GFP_KERNEL); if (!work_ctx) return ERR_PTR(-ENOMEM); diff --git a/kernel/kexec_handover.c b/kernel/kexec_handover.c index 69b953551677..5a21dbe17950 100644 --- a/kernel/kexec_handover.c +++ b/kernel/kexec_handover.c @@ -164,11 +164,21 @@ static int __kho_preserve_order(struct kho_mem_track *track, unsigned long pfn, } /* almost as free_reserved_page(), just don't free the page */ -static void kho_restore_page(struct page *page) +static void kho_restore_page(struct page *page, unsigned int order) { - ClearPageReserved(page); - init_page_count(page); - adjust_managed_page_count(page, 1); + unsigned int nr_pages = (1 << order); + + /* Head page gets refcount of 1. */ + set_page_count(page, 1); + + /* For higher order folios, tail pages get a page count of zero. */ + for (unsigned int i = 1; i < nr_pages; i++) + set_page_count(page + i, 0); + + if (order > 0) + prep_compound_page(page, order); + + adjust_managed_page_count(page, nr_pages); } /** @@ -186,15 +196,10 @@ struct folio *kho_restore_folio(phys_addr_t phys) return NULL; order = page->private; - if (order) { - if (order > MAX_PAGE_ORDER) - return NULL; - - prep_compound_page(page, order); - } else { - kho_restore_page(page); - } + if (order > MAX_PAGE_ORDER) + return NULL; + kho_restore_page(page, order); return page_folio(page); } EXPORT_SYMBOL_GPL(kho_restore_folio); diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c index e8a4b720d7d2..14d4499c6fc3 100644 --- a/kernel/rcu/tree.c +++ b/kernel/rcu/tree.c @@ -3072,6 +3072,10 @@ __call_rcu_common(struct rcu_head *head, rcu_callback_t func, bool lazy_in) /* Misaligned rcu_head! */ WARN_ON_ONCE((unsigned long)head & (sizeof(void *) - 1)); + /* Avoid NULL dereference if callback is NULL. */ + if (WARN_ON_ONCE(!func)) + return; + if (debug_rcu_head_queue(head)) { /* * Probable double call_rcu(), so leak the callback. diff --git a/kernel/sched/core.c b/kernel/sched/core.c index dce50fa57471..8988d38d46a3 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -8545,7 +8545,7 @@ void __init sched_init(void) init_cfs_bandwidth(&root_task_group.cfs_bandwidth, NULL); #endif /* CONFIG_FAIR_GROUP_SCHED */ #ifdef CONFIG_EXT_GROUP_SCHED - root_task_group.scx_weight = CGROUP_WEIGHT_DFL; + scx_tg_init(&root_task_group); #endif /* CONFIG_EXT_GROUP_SCHED */ #ifdef CONFIG_RT_GROUP_SCHED root_task_group.rt_se = (struct sched_rt_entity **)ptr; @@ -8985,7 +8985,7 @@ struct task_group *sched_create_group(struct task_group *parent) if (!alloc_rt_sched_group(tg, parent)) goto err; - scx_group_set_weight(tg, CGROUP_WEIGHT_DFL); + scx_tg_init(tg); alloc_uclamp_sched_group(tg, parent); return tg; diff --git a/kernel/sched/ext.c b/kernel/sched/ext.c index 2c41c78be61e..b498d867ba21 100644 --- a/kernel/sched/ext.c +++ b/kernel/sched/ext.c @@ -4092,6 +4092,11 @@ bool scx_can_stop_tick(struct rq *rq) DEFINE_STATIC_PERCPU_RWSEM(scx_cgroup_rwsem); static bool scx_cgroup_enabled; +void scx_tg_init(struct task_group *tg) +{ + tg->scx_weight = CGROUP_WEIGHT_DFL; +} + int scx_tg_online(struct task_group *tg) { struct scx_sched *sch = scx_root; @@ -4241,12 +4246,12 @@ void scx_group_set_weight(struct task_group *tg, unsigned long weight) percpu_down_read(&scx_cgroup_rwsem); - if (scx_cgroup_enabled && tg->scx_weight != weight) { - if (SCX_HAS_OP(sch, cgroup_set_weight)) - SCX_CALL_OP(sch, SCX_KF_UNLOCKED, cgroup_set_weight, NULL, - tg_cgrp(tg), weight); - tg->scx_weight = weight; - } + if (scx_cgroup_enabled && SCX_HAS_OP(sch, cgroup_set_weight) && + tg->scx_weight != weight) + SCX_CALL_OP(sch, SCX_KF_UNLOCKED, cgroup_set_weight, NULL, + tg_cgrp(tg), weight); + + tg->scx_weight = weight; percpu_up_read(&scx_cgroup_rwsem); } diff --git a/kernel/sched/ext.h b/kernel/sched/ext.h index 6e5072f57771..a75835c23f15 100644 --- a/kernel/sched/ext.h +++ b/kernel/sched/ext.h @@ -79,6 +79,7 @@ static inline void scx_update_idle(struct rq *rq, bool idle, bool do_notify) {} #ifdef CONFIG_CGROUP_SCHED #ifdef CONFIG_EXT_GROUP_SCHED +void scx_tg_init(struct task_group *tg); int scx_tg_online(struct task_group *tg); void scx_tg_offline(struct task_group *tg); int scx_cgroup_can_attach(struct cgroup_taskset *tset); @@ -88,6 +89,7 @@ void scx_cgroup_cancel_attach(struct cgroup_taskset *tset); void scx_group_set_weight(struct task_group *tg, unsigned long cgrp_weight); void scx_group_set_idle(struct task_group *tg, bool idle); #else /* CONFIG_EXT_GROUP_SCHED */ +static inline void scx_tg_init(struct task_group *tg) {} static inline int scx_tg_online(struct task_group *tg) { return 0; } static inline void scx_tg_offline(struct task_group *tg) {} static inline int scx_cgroup_can_attach(struct cgroup_taskset *tset) { return 0; } diff --git a/kernel/time/posix-cpu-timers.c b/kernel/time/posix-cpu-timers.c index 50e8d04ab661..2e5b89d7d866 100644 --- a/kernel/time/posix-cpu-timers.c +++ b/kernel/time/posix-cpu-timers.c @@ -1406,6 +1406,15 @@ void run_posix_cpu_timers(void) lockdep_assert_irqs_disabled(); /* + * Ensure that release_task(tsk) can't happen while + * handle_posix_cpu_timers() is running. Otherwise, a concurrent + * posix_cpu_timer_del() may fail to lock_task_sighand(tsk) and + * miss timer->it.cpu.firing != 0. + */ + if (tsk->exit_state) + return; + + /* * If the actual expiry is deferred to task work context and the * work is already scheduled there is no point to do anything here. */ diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c index 132c8be6f635..0a06ea6638fe 100644 --- a/kernel/trace/bpf_trace.c +++ b/kernel/trace/bpf_trace.c @@ -1270,7 +1270,7 @@ __bpf_kfunc_start_defs(); * Return: a bpf_key pointer with a valid key pointer if the key is found, a * NULL pointer otherwise. */ -__bpf_kfunc struct bpf_key *bpf_lookup_user_key(u32 serial, u64 flags) +__bpf_kfunc struct bpf_key *bpf_lookup_user_key(s32 serial, u64 flags) { key_ref_t key_ref; struct bpf_key *bkey; diff --git a/kernel/trace/trace_events_filter.c b/kernel/trace/trace_events_filter.c index ea8b364b6818..08141f105c95 100644 --- a/kernel/trace/trace_events_filter.c +++ b/kernel/trace/trace_events_filter.c @@ -1437,10 +1437,8 @@ static void filter_free_subsystem_filters(struct trace_subsystem_dir *dir, INIT_LIST_HEAD(&head->list); item = kmalloc(sizeof(*item), GFP_KERNEL); - if (!item) { - kfree(head); + if (!item) goto free_now; - } item->filter = filter; list_add_tail(&item->list, &head->list); diff --git a/kernel/trace/trace_functions_graph.c b/kernel/trace/trace_functions_graph.c index 9234e2c39abf..14d74a7491b8 100644 --- a/kernel/trace/trace_functions_graph.c +++ b/kernel/trace/trace_functions_graph.c @@ -455,10 +455,16 @@ static int graph_trace_init(struct trace_array *tr) return 0; } +static struct tracer graph_trace; + static int ftrace_graph_trace_args(struct trace_array *tr, int set) { trace_func_graph_ent_t entry; + /* Do nothing if the current tracer is not this tracer */ + if (tr->current_trace != &graph_trace) + return 0; + if (set) entry = trace_graph_entry_args; else diff --git a/kernel/workqueue.c b/kernel/workqueue.c index 97f37b5bae66..9f9148075828 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c @@ -7767,7 +7767,8 @@ void __init workqueue_init_early(void) restrict_unbound_cpumask("workqueue.unbound_cpus", &wq_cmdline_cpumask); cpumask_copy(wq_requested_unbound_cpumask, wq_unbound_cpumask); - + cpumask_andnot(wq_isolated_cpumask, cpu_possible_mask, + housekeeping_cpumask(HK_TYPE_DOMAIN)); pwq_cache = KMEM_CACHE(pool_workqueue, SLAB_PANIC); unbound_wq_update_pwq_attrs_buf = alloc_workqueue_attrs(); diff --git a/lib/Kconfig b/lib/Kconfig index 6c1b8f184267..37db228f70a9 100644 --- a/lib/Kconfig +++ b/lib/Kconfig @@ -716,6 +716,7 @@ config GENERIC_LIB_DEVMEM_IS_ALLOWED config PLDMFW bool + select CRC32 default n config ASN1_ENCODER diff --git a/lib/crypto/Makefile b/lib/crypto/Makefile index 3e79283b617d..b0c0f8aea269 100644 --- a/lib/crypto/Makefile +++ b/lib/crypto/Makefile @@ -35,6 +35,10 @@ obj-$(CONFIG_CRYPTO_LIB_CURVE25519_GENERIC) += libcurve25519-generic.o libcurve25519-generic-y := curve25519-fiat32.o libcurve25519-generic-$(CONFIG_ARCH_SUPPORTS_INT128) := curve25519-hacl64.o libcurve25519-generic-y += curve25519-generic.o +# clang versions prior to 18 may blow out the stack with KASAN +ifeq ($(call clang-min-version, 180000),) +KASAN_SANITIZE_curve25519-hacl64.o := n +endif obj-$(CONFIG_CRYPTO_LIB_CURVE25519) += libcurve25519.o libcurve25519-y += curve25519.o @@ -62,7 +66,7 @@ libsha256-generic-y := sha256-generic.o obj-$(CONFIG_MPILIB) += mpi/ -obj-$(CONFIG_CRYPTO_SELFTESTS) += simd.o +obj-$(CONFIG_CRYPTO_SELFTESTS_FULL) += simd.o obj-$(CONFIG_CRYPTO_LIB_SM3) += libsm3.o libsm3-y := sm3.o diff --git a/lib/crypto/aescfb.c b/lib/crypto/aescfb.c index 437613265e14..2f09ae92ffa0 100644 --- a/lib/crypto/aescfb.c +++ b/lib/crypto/aescfb.c @@ -106,11 +106,11 @@ MODULE_LICENSE("GPL"); */ static struct { - u8 ptext[64]; - u8 ctext[64]; + u8 ptext[64] __nonstring; + u8 ctext[64] __nonstring; - u8 key[AES_MAX_KEY_SIZE]; - u8 iv[AES_BLOCK_SIZE]; + u8 key[AES_MAX_KEY_SIZE] __nonstring; + u8 iv[AES_BLOCK_SIZE] __nonstring; int klen; int len; diff --git a/lib/crypto/aesgcm.c b/lib/crypto/aesgcm.c index 277824d6b4af..faa4dee9bb1b 100644 --- a/lib/crypto/aesgcm.c +++ b/lib/crypto/aesgcm.c @@ -205,19 +205,19 @@ MODULE_LICENSE("GPL"); * Test code below. Vectors taken from crypto/testmgr.h */ -static const u8 __initconst ctext0[16] = +static const u8 __initconst ctext0[16] __nonstring = "\x58\xe2\xfc\xce\xfa\x7e\x30\x61" "\x36\x7f\x1d\x57\xa4\xe7\x45\x5a"; static const u8 __initconst ptext1[16]; -static const u8 __initconst ctext1[32] = +static const u8 __initconst ctext1[32] __nonstring = "\x03\x88\xda\xce\x60\xb6\xa3\x92" "\xf3\x28\xc2\xb9\x71\xb2\xfe\x78" "\xab\x6e\x47\xd4\x2c\xec\x13\xbd" "\xf5\x3a\x67\xb2\x12\x57\xbd\xdf"; -static const u8 __initconst ptext2[64] = +static const u8 __initconst ptext2[64] __nonstring = "\xd9\x31\x32\x25\xf8\x84\x06\xe5" "\xa5\x59\x09\xc5\xaf\xf5\x26\x9a" "\x86\xa7\xa9\x53\x15\x34\xf7\xda" @@ -227,7 +227,7 @@ static const u8 __initconst ptext2[64] = "\xb1\x6a\xed\xf5\xaa\x0d\xe6\x57" "\xba\x63\x7b\x39\x1a\xaf\xd2\x55"; -static const u8 __initconst ctext2[80] = +static const u8 __initconst ctext2[80] __nonstring = "\x42\x83\x1e\xc2\x21\x77\x74\x24" "\x4b\x72\x21\xb7\x84\xd0\xd4\x9c" "\xe3\xaa\x21\x2f\x2c\x02\xa4\xe0" @@ -239,7 +239,7 @@ static const u8 __initconst ctext2[80] = "\x4d\x5c\x2a\xf3\x27\xcd\x64\xa6" "\x2c\xf3\x5a\xbd\x2b\xa6\xfa\xb4"; -static const u8 __initconst ptext3[60] = +static const u8 __initconst ptext3[60] __nonstring = "\xd9\x31\x32\x25\xf8\x84\x06\xe5" "\xa5\x59\x09\xc5\xaf\xf5\x26\x9a" "\x86\xa7\xa9\x53\x15\x34\xf7\xda" @@ -249,7 +249,7 @@ static const u8 __initconst ptext3[60] = "\xb1\x6a\xed\xf5\xaa\x0d\xe6\x57" "\xba\x63\x7b\x39"; -static const u8 __initconst ctext3[76] = +static const u8 __initconst ctext3[76] __nonstring = "\x42\x83\x1e\xc2\x21\x77\x74\x24" "\x4b\x72\x21\xb7\x84\xd0\xd4\x9c" "\xe3\xaa\x21\x2f\x2c\x02\xa4\xe0" @@ -261,17 +261,17 @@ static const u8 __initconst ctext3[76] = "\x5b\xc9\x4f\xbc\x32\x21\xa5\xdb" "\x94\xfa\xe9\x5a\xe7\x12\x1a\x47"; -static const u8 __initconst ctext4[16] = +static const u8 __initconst ctext4[16] __nonstring = "\xcd\x33\xb2\x8a\xc7\x73\xf7\x4b" "\xa0\x0e\xd1\xf3\x12\x57\x24\x35"; -static const u8 __initconst ctext5[32] = +static const u8 __initconst ctext5[32] __nonstring = "\x98\xe7\x24\x7c\x07\xf0\xfe\x41" "\x1c\x26\x7e\x43\x84\xb0\xf6\x00" "\x2f\xf5\x8d\x80\x03\x39\x27\xab" "\x8e\xf4\xd4\x58\x75\x14\xf0\xfb"; -static const u8 __initconst ptext6[64] = +static const u8 __initconst ptext6[64] __nonstring = "\xd9\x31\x32\x25\xf8\x84\x06\xe5" "\xa5\x59\x09\xc5\xaf\xf5\x26\x9a" "\x86\xa7\xa9\x53\x15\x34\xf7\xda" @@ -281,7 +281,7 @@ static const u8 __initconst ptext6[64] = "\xb1\x6a\xed\xf5\xaa\x0d\xe6\x57" "\xba\x63\x7b\x39\x1a\xaf\xd2\x55"; -static const u8 __initconst ctext6[80] = +static const u8 __initconst ctext6[80] __nonstring = "\x39\x80\xca\x0b\x3c\x00\xe8\x41" "\xeb\x06\xfa\xc4\x87\x2a\x27\x57" "\x85\x9e\x1c\xea\xa6\xef\xd9\x84" @@ -293,17 +293,17 @@ static const u8 __initconst ctext6[80] = "\x99\x24\xa7\xc8\x58\x73\x36\xbf" "\xb1\x18\x02\x4d\xb8\x67\x4a\x14"; -static const u8 __initconst ctext7[16] = +static const u8 __initconst ctext7[16] __nonstring = "\x53\x0f\x8a\xfb\xc7\x45\x36\xb9" "\xa9\x63\xb4\xf1\xc4\xcb\x73\x8b"; -static const u8 __initconst ctext8[32] = +static const u8 __initconst ctext8[32] __nonstring = "\xce\xa7\x40\x3d\x4d\x60\x6b\x6e" "\x07\x4e\xc5\xd3\xba\xf3\x9d\x18" "\xd0\xd1\xc8\xa7\x99\x99\x6b\xf0" "\x26\x5b\x98\xb5\xd4\x8a\xb9\x19"; -static const u8 __initconst ptext9[64] = +static const u8 __initconst ptext9[64] __nonstring = "\xd9\x31\x32\x25\xf8\x84\x06\xe5" "\xa5\x59\x09\xc5\xaf\xf5\x26\x9a" "\x86\xa7\xa9\x53\x15\x34\xf7\xda" @@ -313,7 +313,7 @@ static const u8 __initconst ptext9[64] = "\xb1\x6a\xed\xf5\xaa\x0d\xe6\x57" "\xba\x63\x7b\x39\x1a\xaf\xd2\x55"; -static const u8 __initconst ctext9[80] = +static const u8 __initconst ctext9[80] __nonstring = "\x52\x2d\xc1\xf0\x99\x56\x7d\x07" "\xf4\x7f\x37\xa3\x2a\x84\x42\x7d" "\x64\x3a\x8c\xdc\xbf\xe5\xc0\xc9" @@ -325,7 +325,7 @@ static const u8 __initconst ctext9[80] = "\xb0\x94\xda\xc5\xd9\x34\x71\xbd" "\xec\x1a\x50\x22\x70\xe3\xcc\x6c"; -static const u8 __initconst ptext10[60] = +static const u8 __initconst ptext10[60] __nonstring = "\xd9\x31\x32\x25\xf8\x84\x06\xe5" "\xa5\x59\x09\xc5\xaf\xf5\x26\x9a" "\x86\xa7\xa9\x53\x15\x34\xf7\xda" @@ -335,7 +335,7 @@ static const u8 __initconst ptext10[60] = "\xb1\x6a\xed\xf5\xaa\x0d\xe6\x57" "\xba\x63\x7b\x39"; -static const u8 __initconst ctext10[76] = +static const u8 __initconst ctext10[76] __nonstring = "\x52\x2d\xc1\xf0\x99\x56\x7d\x07" "\xf4\x7f\x37\xa3\x2a\x84\x42\x7d" "\x64\x3a\x8c\xdc\xbf\xe5\xc0\xc9" @@ -347,7 +347,7 @@ static const u8 __initconst ctext10[76] = "\x76\xfc\x6e\xce\x0f\x4e\x17\x68" "\xcd\xdf\x88\x53\xbb\x2d\x55\x1b"; -static const u8 __initconst ptext11[60] = +static const u8 __initconst ptext11[60] __nonstring = "\xd9\x31\x32\x25\xf8\x84\x06\xe5" "\xa5\x59\x09\xc5\xaf\xf5\x26\x9a" "\x86\xa7\xa9\x53\x15\x34\xf7\xda" @@ -357,7 +357,7 @@ static const u8 __initconst ptext11[60] = "\xb1\x6a\xed\xf5\xaa\x0d\xe6\x57" "\xba\x63\x7b\x39"; -static const u8 __initconst ctext11[76] = +static const u8 __initconst ctext11[76] __nonstring = "\x39\x80\xca\x0b\x3c\x00\xe8\x41" "\xeb\x06\xfa\xc4\x87\x2a\x27\x57" "\x85\x9e\x1c\xea\xa6\xef\xd9\x84" @@ -369,7 +369,7 @@ static const u8 __initconst ctext11[76] = "\x25\x19\x49\x8e\x80\xf1\x47\x8f" "\x37\xba\x55\xbd\x6d\x27\x61\x8c"; -static const u8 __initconst ptext12[719] = +static const u8 __initconst ptext12[719] __nonstring = "\x42\xc1\xcc\x08\x48\x6f\x41\x3f" "\x2f\x11\x66\x8b\x2a\x16\xf0\xe0" "\x58\x83\xf0\xc3\x70\x14\xc0\x5b" @@ -461,7 +461,7 @@ static const u8 __initconst ptext12[719] = "\x59\xfa\xfa\xaa\x44\x04\x01\xa7" "\xa4\x78\xdb\x74\x3d\x8b\xb5"; -static const u8 __initconst ctext12[735] = +static const u8 __initconst ctext12[735] __nonstring = "\x84\x0b\xdb\xd5\xb7\xa8\xfe\x20" "\xbb\xb1\x12\x7f\x41\xea\xb3\xc0" "\xa2\xb4\x37\x19\x11\x58\xb6\x0b" @@ -559,9 +559,9 @@ static struct { const u8 *ptext; const u8 *ctext; - u8 key[AES_MAX_KEY_SIZE]; - u8 iv[GCM_AES_IV_SIZE]; - u8 assoc[20]; + u8 key[AES_MAX_KEY_SIZE] __nonstring; + u8 iv[GCM_AES_IV_SIZE] __nonstring; + u8 assoc[20] __nonstring; int klen; int clen; diff --git a/lib/maple_tree.c b/lib/maple_tree.c index affe979bd14d..00524e55a21e 100644 --- a/lib/maple_tree.c +++ b/lib/maple_tree.c @@ -5527,8 +5527,9 @@ int mas_preallocate(struct ma_state *mas, void *entry, gfp_t gfp) mas->store_type = mas_wr_store_type(&wr_mas); request = mas_prealloc_calc(&wr_mas, entry); if (!request) - return ret; + goto set_flag; + mas->mas_flags &= ~MA_STATE_PREALLOC; mas_node_count_gfp(mas, request, gfp); if (mas_is_err(mas)) { mas_set_alloc_req(mas, 0); @@ -5538,6 +5539,7 @@ int mas_preallocate(struct ma_state *mas, void *entry, gfp_t gfp) return ret; } +set_flag: mas->mas_flags |= MA_STATE_PREALLOC; return ret; } diff --git a/lib/scatterlist.c b/lib/scatterlist.c index 7582dfab7fe3..4af1c8b0775a 100644 --- a/lib/scatterlist.c +++ b/lib/scatterlist.c @@ -73,9 +73,9 @@ EXPORT_SYMBOL(sg_nents_for_len); * Should only be used casually, it (currently) scans the entire list * to get the last entry. * - * Note that the @sgl@ pointer passed in need not be the first one, - * the important bit is that @nents@ denotes the number of entries that - * exist from @sgl@. + * Note that the @sgl pointer passed in need not be the first one, + * the important bit is that @nents denotes the number of entries that + * exist from @sgl. * **/ struct scatterlist *sg_last(struct scatterlist *sgl, unsigned int nents) @@ -345,7 +345,7 @@ EXPORT_SYMBOL(__sg_alloc_table); * @gfp_mask: GFP allocation mask * * Description: - * Allocate and initialize an sg table. If @nents@ is larger than + * Allocate and initialize an sg table. If @nents is larger than * SG_MAX_SINGLE_ALLOC a chained sg table will be setup. * **/ diff --git a/mm/damon/Kconfig b/mm/damon/Kconfig index 551745df011b..c93d0c56b963 100644 --- a/mm/damon/Kconfig +++ b/mm/damon/Kconfig @@ -4,7 +4,6 @@ menu "Data Access Monitoring" config DAMON bool "DAMON: Data Access Monitoring Framework" - default y help This builds a framework that allows kernel subsystems to monitor access frequency of each memory region. The information can be useful diff --git a/mm/execmem.c b/mm/execmem.c index 9720ac2dfa41..2b683e7d864d 100644 --- a/mm/execmem.c +++ b/mm/execmem.c @@ -254,34 +254,6 @@ out_unlock: return ptr; } -static bool execmem_cache_rox = false; - -void execmem_cache_make_ro(void) -{ - struct maple_tree *free_areas = &execmem_cache.free_areas; - struct maple_tree *busy_areas = &execmem_cache.busy_areas; - MA_STATE(mas_free, free_areas, 0, ULONG_MAX); - MA_STATE(mas_busy, busy_areas, 0, ULONG_MAX); - struct mutex *mutex = &execmem_cache.mutex; - void *area; - - execmem_cache_rox = true; - - mutex_lock(mutex); - - mas_for_each(&mas_free, area, ULONG_MAX) { - unsigned long pages = mas_range_len(&mas_free) >> PAGE_SHIFT; - set_memory_ro(mas_free.index, pages); - } - - mas_for_each(&mas_busy, area, ULONG_MAX) { - unsigned long pages = mas_range_len(&mas_busy) >> PAGE_SHIFT; - set_memory_ro(mas_busy.index, pages); - } - - mutex_unlock(mutex); -} - static int execmem_cache_populate(struct execmem_range *range, size_t size) { unsigned long vm_flags = VM_ALLOW_HUGE_VMAP; @@ -302,15 +274,9 @@ static int execmem_cache_populate(struct execmem_range *range, size_t size) /* fill memory with instructions that will trap */ execmem_fill_trapping_insns(p, alloc_size, /* writable = */ true); - if (execmem_cache_rox) { - err = set_memory_rox((unsigned long)p, vm->nr_pages); - if (err) - goto err_free_mem; - } else { - err = set_memory_x((unsigned long)p, vm->nr_pages); - if (err) - goto err_free_mem; - } + err = set_memory_rox((unsigned long)p, vm->nr_pages); + if (err) + goto err_free_mem; err = execmem_cache_add(p, alloc_size); if (err) @@ -2303,13 +2303,13 @@ static void pofs_unpin(struct pages_or_folios *pofs) /* * Returns the number of collected folios. Return value is always >= 0. */ -static void collect_longterm_unpinnable_folios( +static unsigned long collect_longterm_unpinnable_folios( struct list_head *movable_folio_list, struct pages_or_folios *pofs) { + unsigned long i, collected = 0; struct folio *prev_folio = NULL; bool drain_allow = true; - unsigned long i; for (i = 0; i < pofs->nr_entries; i++) { struct folio *folio = pofs_get_folio(pofs, i); @@ -2321,6 +2321,8 @@ static void collect_longterm_unpinnable_folios( if (folio_is_longterm_pinnable(folio)) continue; + collected++; + if (folio_is_device_coherent(folio)) continue; @@ -2342,6 +2344,8 @@ static void collect_longterm_unpinnable_folios( NR_ISOLATED_ANON + folio_is_file_lru(folio), folio_nr_pages(folio)); } + + return collected; } /* @@ -2418,9 +2422,11 @@ static long check_and_migrate_movable_pages_or_folios(struct pages_or_folios *pofs) { LIST_HEAD(movable_folio_list); + unsigned long collected; - collect_longterm_unpinnable_folios(&movable_folio_list, pofs); - if (list_empty(&movable_folio_list)) + collected = collect_longterm_unpinnable_folios(&movable_folio_list, + pofs); + if (!collected) return 0; return migrate_longterm_unpinnable_folios(&movable_folio_list, pofs); diff --git a/mm/madvise.c b/mm/madvise.c index 5f7a66a1617e..1d44a35ae85c 100644 --- a/mm/madvise.c +++ b/mm/madvise.c @@ -508,6 +508,7 @@ restart: pte_offset_map_lock(mm, pmd, addr, &ptl); if (!start_pte) break; + flush_tlb_batched_pending(mm); arch_enter_lazy_mmu_mode(); if (!err) nr = 0; @@ -741,6 +742,7 @@ static int madvise_free_pte_range(pmd_t *pmd, unsigned long addr, start_pte = pte; if (!start_pte) break; + flush_tlb_batched_pending(mm); arch_enter_lazy_mmu_mode(); if (!err) nr = 0; diff --git a/mm/memory.c b/mm/memory.c index 8eba595056fe..b0cda5aab398 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -4315,26 +4315,6 @@ static struct folio *__alloc_swap_folio(struct vm_fault *vmf) } #ifdef CONFIG_TRANSPARENT_HUGEPAGE -static inline int non_swapcache_batch(swp_entry_t entry, int max_nr) -{ - struct swap_info_struct *si = swp_swap_info(entry); - pgoff_t offset = swp_offset(entry); - int i; - - /* - * While allocating a large folio and doing swap_read_folio, which is - * the case the being faulted pte doesn't have swapcache. We need to - * ensure all PTEs have no cache as well, otherwise, we might go to - * swap devices while the content is in swapcache. - */ - for (i = 0; i < max_nr; i++) { - if ((si->swap_map[offset + i] & SWAP_HAS_CACHE)) - return i; - } - - return i; -} - /* * Check if the PTEs within a range are contiguous swap entries * and have consistent swapcache, zeromap. diff --git a/mm/shmem.c b/mm/shmem.c index 0c5fb4ffa03a..3a5a65b1f41a 100644 --- a/mm/shmem.c +++ b/mm/shmem.c @@ -2259,6 +2259,7 @@ static int shmem_swapin_folio(struct inode *inode, pgoff_t index, folio = swap_cache_get_folio(swap, NULL, 0); order = xa_get_order(&mapping->i_pages, index); if (!folio) { + int nr_pages = 1 << order; bool fallback_order0 = false; /* Or update major stats only when swapin succeeds?? */ @@ -2272,9 +2273,12 @@ static int shmem_swapin_folio(struct inode *inode, pgoff_t index, * If uffd is active for the vma, we need per-page fault * fidelity to maintain the uffd semantics, then fallback * to swapin order-0 folio, as well as for zswap case. + * Any existing sub folio in the swap cache also blocks + * mTHP swapin. */ if (order > 0 && ((vma && unlikely(userfaultfd_armed(vma))) || - !zswap_never_enabled())) + !zswap_never_enabled() || + non_swapcache_batch(swap, nr_pages) != nr_pages)) fallback_order0 = true; /* Skip swapcache for synchronous device. */ diff --git a/mm/swap.h b/mm/swap.h index 2269eb9df0af..9096082a915e 100644 --- a/mm/swap.h +++ b/mm/swap.h @@ -106,6 +106,25 @@ static inline int swap_zeromap_batch(swp_entry_t entry, int max_nr, return find_next_bit(sis->zeromap, end, start) - start; } +static inline int non_swapcache_batch(swp_entry_t entry, int max_nr) +{ + struct swap_info_struct *si = swp_swap_info(entry); + pgoff_t offset = swp_offset(entry); + int i; + + /* + * While allocating a large folio and doing mTHP swapin, we need to + * ensure all entries are not cached, otherwise, the mTHP folio will + * be in conflict with the folio in swap cache. + */ + for (i = 0; i < max_nr; i++) { + if ((si->swap_map[offset + i] & SWAP_HAS_CACHE)) + return i; + } + + return i; +} + #else /* CONFIG_SWAP */ struct swap_iocb; static inline void swap_read_folio(struct folio *folio, struct swap_iocb **plug) @@ -199,6 +218,10 @@ static inline int swap_zeromap_batch(swp_entry_t entry, int max_nr, return 0; } +static inline int non_swapcache_batch(swp_entry_t entry, int max_nr) +{ + return 0; +} #endif /* CONFIG_SWAP */ /** diff --git a/mm/userfaultfd.c b/mm/userfaultfd.c index bc473ad21202..8253978ee0fb 100644 --- a/mm/userfaultfd.c +++ b/mm/userfaultfd.c @@ -1084,8 +1084,18 @@ static int move_swap_pte(struct mm_struct *mm, struct vm_area_struct *dst_vma, pte_t orig_dst_pte, pte_t orig_src_pte, pmd_t *dst_pmd, pmd_t dst_pmdval, spinlock_t *dst_ptl, spinlock_t *src_ptl, - struct folio *src_folio) + struct folio *src_folio, + struct swap_info_struct *si, swp_entry_t entry) { + /* + * Check if the folio still belongs to the target swap entry after + * acquiring the lock. Folio can be freed in the swap cache while + * not locked. + */ + if (src_folio && unlikely(!folio_test_swapcache(src_folio) || + entry.val != src_folio->swap.val)) + return -EAGAIN; + double_pt_lock(dst_ptl, src_ptl); if (!is_pte_pages_stable(dst_pte, src_pte, orig_dst_pte, orig_src_pte, @@ -1102,6 +1112,25 @@ static int move_swap_pte(struct mm_struct *mm, struct vm_area_struct *dst_vma, if (src_folio) { folio_move_anon_rmap(src_folio, dst_vma); src_folio->index = linear_page_index(dst_vma, dst_addr); + } else { + /* + * Check if the swap entry is cached after acquiring the src_pte + * lock. Otherwise, we might miss a newly loaded swap cache folio. + * + * Check swap_map directly to minimize overhead, READ_ONCE is sufficient. + * We are trying to catch newly added swap cache, the only possible case is + * when a folio is swapped in and out again staying in swap cache, using the + * same entry before the PTE check above. The PTL is acquired and released + * twice, each time after updating the swap_map's flag. So holding + * the PTL here ensures we see the updated value. False positive is possible, + * e.g. SWP_SYNCHRONOUS_IO swapin may set the flag without touching the + * cache, or during the tiny synchronization window between swap cache and + * swap_map, but it will be gone very quickly, worst result is retry jitters. + */ + if (READ_ONCE(si->swap_map[swp_offset(entry)]) & SWAP_HAS_CACHE) { + double_pt_unlock(dst_ptl, src_ptl); + return -EAGAIN; + } } orig_src_pte = ptep_get_and_clear(mm, src_addr, src_pte); @@ -1412,7 +1441,7 @@ retry: } err = move_swap_pte(mm, dst_vma, dst_addr, src_addr, dst_pte, src_pte, orig_dst_pte, orig_src_pte, dst_pmd, dst_pmdval, - dst_ptl, src_ptl, src_folio); + dst_ptl, src_ptl, src_folio, si, entry); } out: diff --git a/mm/util.c b/mm/util.c index 448117da071f..0b270c43d7d1 100644 --- a/mm/util.c +++ b/mm/util.c @@ -1131,3 +1131,43 @@ void flush_dcache_folio(struct folio *folio) } EXPORT_SYMBOL(flush_dcache_folio); #endif + +/** + * compat_vma_mmap_prepare() - Apply the file's .mmap_prepare() hook to an + * existing VMA + * @file: The file which possesss an f_op->mmap_prepare() hook + * @vma: The VMA to apply the .mmap_prepare() hook to. + * + * Ordinarily, .mmap_prepare() is invoked directly upon mmap(). However, certain + * 'wrapper' file systems invoke a nested mmap hook of an underlying file. + * + * Until all filesystems are converted to use .mmap_prepare(), we must be + * conservative and continue to invoke these 'wrapper' filesystems using the + * deprecated .mmap() hook. + * + * However we have a problem if the underlying file system possesses an + * .mmap_prepare() hook, as we are in a different context when we invoke the + * .mmap() hook, already having a VMA to deal with. + * + * compat_vma_mmap_prepare() is a compatibility function that takes VMA state, + * establishes a struct vm_area_desc descriptor, passes to the underlying + * .mmap_prepare() hook and applies any changes performed by it. + * + * Once the conversion of filesystems is complete this function will no longer + * be required and will be removed. + * + * Returns: 0 on success or error. + */ +int compat_vma_mmap_prepare(struct file *file, struct vm_area_struct *vma) +{ + struct vm_area_desc desc; + int err; + + err = file->f_op->mmap_prepare(vma_to_desc(vma, &desc)); + if (err) + return err; + set_vma_from_desc(vma, &desc); + + return 0; +} +EXPORT_SYMBOL(compat_vma_mmap_prepare); @@ -967,26 +967,9 @@ static __must_check struct vm_area_struct *vma_merge_existing_range( err = dup_anon_vma(next, middle, &anon_dup); } - if (err) + if (err || commit_merge(vmg)) goto abort; - err = commit_merge(vmg); - if (err) { - VM_WARN_ON(err != -ENOMEM); - - if (anon_dup) - unlink_anon_vmas(anon_dup); - - /* - * We've cleaned up any cloned anon_vma's, no VMAs have been - * modified, no harm no foul if the user requests that we not - * report this and just give up, leaving the VMAs unmerged. - */ - if (!vmg->give_up_on_oom) - vmg->state = VMA_MERGE_ERROR_NOMEM; - return NULL; - } - khugepaged_enter_vma(vmg->target, vmg->flags); vmg->state = VMA_MERGE_SUCCESS; return vmg->target; @@ -995,6 +978,9 @@ abort: vma_iter_set(vmg->vmi, start); vma_iter_load(vmg->vmi); + if (anon_dup) + unlink_anon_vmas(anon_dup); + /* * This means we have failed to clone anon_vma's correctly, but no * actual changes to VMAs have occurred, so no harm no foul - if the @@ -3127,7 +3113,6 @@ int __vm_munmap(unsigned long start, size_t len, bool unlock) return ret; } - /* Insert vm structure into process list sorted by address * and into the inode's i_mmap tree. If vm_file is non-NULL * then i_mmap_rwsem is taken here. @@ -222,6 +222,53 @@ static inline int vma_iter_store_gfp(struct vma_iterator *vmi, return 0; } + +/* + * Temporary helper functions for file systems which wrap an invocation of + * f_op->mmap() but which might have an underlying file system which implements + * f_op->mmap_prepare(). + */ + +static inline struct vm_area_desc *vma_to_desc(struct vm_area_struct *vma, + struct vm_area_desc *desc) +{ + desc->mm = vma->vm_mm; + desc->start = vma->vm_start; + desc->end = vma->vm_end; + + desc->pgoff = vma->vm_pgoff; + desc->file = vma->vm_file; + desc->vm_flags = vma->vm_flags; + desc->page_prot = vma->vm_page_prot; + + desc->vm_ops = NULL; + desc->private_data = NULL; + + return desc; +} + +static inline void set_vma_from_desc(struct vm_area_struct *vma, + struct vm_area_desc *desc) +{ + /* + * Since we're invoking .mmap_prepare() despite having a partially + * established VMA, we must take care to handle setting fields + * correctly. + */ + + /* Mutable fields. Populated with initial state. */ + vma->vm_pgoff = desc->pgoff; + if (vma->vm_file != desc->file) + vma_set_file(vma, desc->file); + if (vma->vm_flags != desc->vm_flags) + vm_flags_set(vma, desc->vm_flags); + vma->vm_page_prot = desc->page_prot; + + /* User-defined fields. */ + vma->vm_ops = desc->vm_ops; + vma->vm_private_data = desc->private_data; +} + int do_vmi_align_munmap(struct vma_iterator *vmi, struct vm_area_struct *vma, struct mm_struct *mm, unsigned long start, diff --git a/net/atm/clip.c b/net/atm/clip.c index 61b5b700817d..b234dc3bcb0d 100644 --- a/net/atm/clip.c +++ b/net/atm/clip.c @@ -193,12 +193,6 @@ static void clip_push(struct atm_vcc *vcc, struct sk_buff *skb) pr_debug("\n"); - if (!clip_devs) { - atm_return(vcc, skb->truesize); - kfree_skb(skb); - return; - } - if (!skb) { pr_debug("removing VCC %p\n", clip_vcc); if (clip_vcc->entry) @@ -208,6 +202,11 @@ static void clip_push(struct atm_vcc *vcc, struct sk_buff *skb) return; } atm_return(vcc, skb->truesize); + if (!clip_devs) { + kfree_skb(skb); + return; + } + skb->dev = clip_vcc->entry ? clip_vcc->entry->neigh->dev : clip_devs; /* clip_vcc->entry == NULL if we don't have an IP address yet */ if (!skb->dev) { diff --git a/net/atm/common.c b/net/atm/common.c index 9b75699992ff..d7f7976ea13a 100644 --- a/net/atm/common.c +++ b/net/atm/common.c @@ -635,6 +635,7 @@ int vcc_sendmsg(struct socket *sock, struct msghdr *m, size_t size) skb->dev = NULL; /* for paths shared with net_device interfaces */ if (!copy_from_iter_full(skb_put(skb, size), size, &m->msg_iter)) { + atm_return_tx(vcc, skb); kfree_skb(skb); error = -EFAULT; goto out; diff --git a/net/atm/lec.c b/net/atm/lec.c index acef984f3367..afb8d3eb2185 100644 --- a/net/atm/lec.c +++ b/net/atm/lec.c @@ -124,6 +124,7 @@ static unsigned char bus_mac[ETH_ALEN] = { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff }; /* Device structures */ static struct net_device *dev_lec[MAX_LEC_ITF]; +static DEFINE_MUTEX(lec_mutex); #if IS_ENABLED(CONFIG_BRIDGE) static void lec_handle_bridge(struct sk_buff *skb, struct net_device *dev) @@ -685,6 +686,7 @@ static int lec_vcc_attach(struct atm_vcc *vcc, void __user *arg) int bytes_left; struct atmlec_ioc ioc_data; + lockdep_assert_held(&lec_mutex); /* Lecd must be up in this case */ bytes_left = copy_from_user(&ioc_data, arg, sizeof(struct atmlec_ioc)); if (bytes_left != 0) @@ -710,6 +712,7 @@ static int lec_vcc_attach(struct atm_vcc *vcc, void __user *arg) static int lec_mcast_attach(struct atm_vcc *vcc, int arg) { + lockdep_assert_held(&lec_mutex); if (arg < 0 || arg >= MAX_LEC_ITF) return -EINVAL; arg = array_index_nospec(arg, MAX_LEC_ITF); @@ -725,6 +728,7 @@ static int lecd_attach(struct atm_vcc *vcc, int arg) int i; struct lec_priv *priv; + lockdep_assert_held(&lec_mutex); if (arg < 0) arg = 0; if (arg >= MAX_LEC_ITF) @@ -742,6 +746,7 @@ static int lecd_attach(struct atm_vcc *vcc, int arg) snprintf(dev_lec[i]->name, IFNAMSIZ, "lec%d", i); if (register_netdev(dev_lec[i])) { free_netdev(dev_lec[i]); + dev_lec[i] = NULL; return -EINVAL; } @@ -904,7 +909,6 @@ static void *lec_itf_walk(struct lec_state *state, loff_t *l) v = (dev && netdev_priv(dev)) ? lec_priv_walk(state, l, netdev_priv(dev)) : NULL; if (!v && dev) { - dev_put(dev); /* Partial state reset for the next time we get called */ dev = NULL; } @@ -928,6 +932,7 @@ static void *lec_seq_start(struct seq_file *seq, loff_t *pos) { struct lec_state *state = seq->private; + mutex_lock(&lec_mutex); state->itf = 0; state->dev = NULL; state->locked = NULL; @@ -945,8 +950,9 @@ static void lec_seq_stop(struct seq_file *seq, void *v) if (state->dev) { spin_unlock_irqrestore(&state->locked->lec_arp_lock, state->flags); - dev_put(state->dev); + state->dev = NULL; } + mutex_unlock(&lec_mutex); } static void *lec_seq_next(struct seq_file *seq, void *v, loff_t *pos) @@ -1003,6 +1009,7 @@ static int lane_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) return -ENOIOCTLCMD; } + mutex_lock(&lec_mutex); switch (cmd) { case ATMLEC_CTRL: err = lecd_attach(vcc, (int)arg); @@ -1017,6 +1024,7 @@ static int lane_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) break; } + mutex_unlock(&lec_mutex); return err; } diff --git a/net/atm/raw.c b/net/atm/raw.c index 2b5f78a7ec3e..1e6511ec842c 100644 --- a/net/atm/raw.c +++ b/net/atm/raw.c @@ -36,7 +36,7 @@ static void atm_pop_raw(struct atm_vcc *vcc, struct sk_buff *skb) pr_debug("(%d) %d -= %d\n", vcc->vci, sk_wmem_alloc_get(sk), ATM_SKB(skb)->acct_truesize); - WARN_ON(refcount_sub_and_test(ATM_SKB(skb)->acct_truesize, &sk->sk_wmem_alloc)); + atm_return_tx(vcc, skb); dev_kfree_skb_any(skb); sk->sk_write_space(sk); } diff --git a/net/atm/resources.c b/net/atm/resources.c index 995d29e7fb13..b19d851e1f44 100644 --- a/net/atm/resources.c +++ b/net/atm/resources.c @@ -146,11 +146,10 @@ void atm_dev_deregister(struct atm_dev *dev) */ mutex_lock(&atm_dev_mutex); list_del(&dev->dev_list); - mutex_unlock(&atm_dev_mutex); - atm_dev_release_vccs(dev); atm_unregister_sysfs(dev); atm_proc_dev_deregister(dev); + mutex_unlock(&atm_dev_mutex); atm_dev_put(dev); } diff --git a/net/bluetooth/eir.c b/net/bluetooth/eir.c index 1bc51e2b05a3..3f72111ba651 100644 --- a/net/bluetooth/eir.c +++ b/net/bluetooth/eir.c @@ -242,7 +242,7 @@ u8 eir_create_per_adv_data(struct hci_dev *hdev, u8 instance, u8 *ptr) return ad_len; } -u8 eir_create_adv_data(struct hci_dev *hdev, u8 instance, u8 *ptr) +u8 eir_create_adv_data(struct hci_dev *hdev, u8 instance, u8 *ptr, u8 size) { struct adv_info *adv = NULL; u8 ad_len = 0, flags = 0; @@ -286,7 +286,7 @@ u8 eir_create_adv_data(struct hci_dev *hdev, u8 instance, u8 *ptr) /* If flags would still be empty, then there is no need to * include the "Flags" AD field". */ - if (flags) { + if (flags && (ad_len + eir_precalc_len(1) <= size)) { ptr[0] = 0x02; ptr[1] = EIR_FLAGS; ptr[2] = flags; @@ -316,7 +316,8 @@ skip_flags: } /* Provide Tx Power only if we can provide a valid value for it */ - if (adv_tx_power != HCI_TX_POWER_INVALID) { + if (adv_tx_power != HCI_TX_POWER_INVALID && + (ad_len + eir_precalc_len(1) <= size)) { ptr[0] = 0x02; ptr[1] = EIR_TX_POWER; ptr[2] = (u8)adv_tx_power; @@ -366,17 +367,19 @@ u8 eir_create_scan_rsp(struct hci_dev *hdev, u8 instance, u8 *ptr) void *eir_get_service_data(u8 *eir, size_t eir_len, u16 uuid, size_t *len) { - while ((eir = eir_get_data(eir, eir_len, EIR_SERVICE_DATA, len))) { + size_t dlen; + + while ((eir = eir_get_data(eir, eir_len, EIR_SERVICE_DATA, &dlen))) { u16 value = get_unaligned_le16(eir); if (uuid == value) { if (len) - *len -= 2; + *len = dlen - 2; return &eir[2]; } - eir += *len; - eir_len -= *len; + eir += dlen; + eir_len -= dlen; } return NULL; diff --git a/net/bluetooth/eir.h b/net/bluetooth/eir.h index 5c89a05e8b29..9372db83f912 100644 --- a/net/bluetooth/eir.h +++ b/net/bluetooth/eir.h @@ -9,7 +9,7 @@ void eir_create(struct hci_dev *hdev, u8 *data); -u8 eir_create_adv_data(struct hci_dev *hdev, u8 instance, u8 *ptr); +u8 eir_create_adv_data(struct hci_dev *hdev, u8 instance, u8 *ptr, u8 size); u8 eir_create_scan_rsp(struct hci_dev *hdev, u8 instance, u8 *ptr); u8 eir_create_per_adv_data(struct hci_dev *hdev, u8 instance, u8 *ptr); diff --git a/net/bluetooth/hci_conn.c b/net/bluetooth/hci_conn.c index 99efeed6a766..4f379184df5b 100644 --- a/net/bluetooth/hci_conn.c +++ b/net/bluetooth/hci_conn.c @@ -1501,8 +1501,8 @@ static int qos_set_bis(struct hci_dev *hdev, struct bt_iso_qos *qos) /* This function requires the caller holds hdev->lock */ static struct hci_conn *hci_add_bis(struct hci_dev *hdev, bdaddr_t *dst, - struct bt_iso_qos *qos, __u8 base_len, - __u8 *base) + __u8 sid, struct bt_iso_qos *qos, + __u8 base_len, __u8 *base) { struct hci_conn *conn; int err; @@ -1543,6 +1543,7 @@ static struct hci_conn *hci_add_bis(struct hci_dev *hdev, bdaddr_t *dst, return conn; conn->state = BT_CONNECT; + conn->sid = sid; hci_conn_hold(conn); return conn; @@ -2062,7 +2063,8 @@ static int create_big_sync(struct hci_dev *hdev, void *data) if (qos->bcast.bis) sync_interval = interval * 4; - err = hci_start_per_adv_sync(hdev, qos->bcast.bis, conn->le_per_adv_data_len, + err = hci_start_per_adv_sync(hdev, qos->bcast.bis, conn->sid, + conn->le_per_adv_data_len, conn->le_per_adv_data, flags, interval, interval, sync_interval); if (err) @@ -2134,7 +2136,7 @@ static void create_big_complete(struct hci_dev *hdev, void *data, int err) } } -struct hci_conn *hci_bind_bis(struct hci_dev *hdev, bdaddr_t *dst, +struct hci_conn *hci_bind_bis(struct hci_dev *hdev, bdaddr_t *dst, __u8 sid, struct bt_iso_qos *qos, __u8 base_len, __u8 *base) { @@ -2156,7 +2158,7 @@ struct hci_conn *hci_bind_bis(struct hci_dev *hdev, bdaddr_t *dst, base, base_len); /* We need hci_conn object using the BDADDR_ANY as dst */ - conn = hci_add_bis(hdev, dst, qos, base_len, eir); + conn = hci_add_bis(hdev, dst, sid, qos, base_len, eir); if (IS_ERR(conn)) return conn; @@ -2207,20 +2209,35 @@ static void bis_mark_per_adv(struct hci_conn *conn, void *data) } struct hci_conn *hci_connect_bis(struct hci_dev *hdev, bdaddr_t *dst, - __u8 dst_type, struct bt_iso_qos *qos, + __u8 dst_type, __u8 sid, + struct bt_iso_qos *qos, __u8 base_len, __u8 *base) { struct hci_conn *conn; int err; struct iso_list_data data; - conn = hci_bind_bis(hdev, dst, qos, base_len, base); + conn = hci_bind_bis(hdev, dst, sid, qos, base_len, base); if (IS_ERR(conn)) return conn; if (conn->state == BT_CONNECTED) return conn; + /* Check if SID needs to be allocated then search for the first + * available. + */ + if (conn->sid == HCI_SID_INVALID) { + u8 sid; + + for (sid = 0; sid <= 0x0f; sid++) { + if (!hci_find_adv_sid(hdev, sid)) { + conn->sid = sid; + break; + } + } + } + data.big = qos->bcast.big; data.bis = qos->bcast.bis; diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c index 3b49828160b7..14d7221b8ac0 100644 --- a/net/bluetooth/hci_core.c +++ b/net/bluetooth/hci_core.c @@ -64,7 +64,7 @@ static DEFINE_IDA(hci_index_ida); /* Get HCI device by index. * Device is held on return. */ -struct hci_dev *hci_dev_get(int index) +static struct hci_dev *__hci_dev_get(int index, int *srcu_index) { struct hci_dev *hdev = NULL, *d; @@ -77,6 +77,8 @@ struct hci_dev *hci_dev_get(int index) list_for_each_entry(d, &hci_dev_list, list) { if (d->id == index) { hdev = hci_dev_hold(d); + if (srcu_index) + *srcu_index = srcu_read_lock(&d->srcu); break; } } @@ -84,6 +86,22 @@ struct hci_dev *hci_dev_get(int index) return hdev; } +struct hci_dev *hci_dev_get(int index) +{ + return __hci_dev_get(index, NULL); +} + +static struct hci_dev *hci_dev_get_srcu(int index, int *srcu_index) +{ + return __hci_dev_get(index, srcu_index); +} + +static void hci_dev_put_srcu(struct hci_dev *hdev, int srcu_index) +{ + srcu_read_unlock(&hdev->srcu, srcu_index); + hci_dev_put(hdev); +} + /* ---- Inquiry support ---- */ bool hci_discovery_active(struct hci_dev *hdev) @@ -568,9 +586,9 @@ static int hci_dev_do_reset(struct hci_dev *hdev) int hci_dev_reset(__u16 dev) { struct hci_dev *hdev; - int err; + int err, srcu_index; - hdev = hci_dev_get(dev); + hdev = hci_dev_get_srcu(dev, &srcu_index); if (!hdev) return -ENODEV; @@ -592,7 +610,7 @@ int hci_dev_reset(__u16 dev) err = hci_dev_do_reset(hdev); done: - hci_dev_put(hdev); + hci_dev_put_srcu(hdev, srcu_index); return err; } @@ -1585,6 +1603,19 @@ struct adv_info *hci_find_adv_instance(struct hci_dev *hdev, u8 instance) } /* This function requires the caller holds hdev->lock */ +struct adv_info *hci_find_adv_sid(struct hci_dev *hdev, u8 sid) +{ + struct adv_info *adv; + + list_for_each_entry(adv, &hdev->adv_instances, list) { + if (adv->sid == sid) + return adv; + } + + return NULL; +} + +/* This function requires the caller holds hdev->lock */ struct adv_info *hci_get_next_instance(struct hci_dev *hdev, u8 instance) { struct adv_info *cur_instance; @@ -1736,7 +1767,7 @@ struct adv_info *hci_add_adv_instance(struct hci_dev *hdev, u8 instance, } /* This function requires the caller holds hdev->lock */ -struct adv_info *hci_add_per_instance(struct hci_dev *hdev, u8 instance, +struct adv_info *hci_add_per_instance(struct hci_dev *hdev, u8 instance, u8 sid, u32 flags, u8 data_len, u8 *data, u32 min_interval, u32 max_interval) { @@ -1748,6 +1779,7 @@ struct adv_info *hci_add_per_instance(struct hci_dev *hdev, u8 instance, if (IS_ERR(adv)) return adv; + adv->sid = sid; adv->periodic = true; adv->per_adv_data_len = data_len; @@ -1877,10 +1909,8 @@ void hci_free_adv_monitor(struct hci_dev *hdev, struct adv_monitor *monitor) if (monitor->handle) idr_remove(&hdev->adv_monitors_idr, monitor->handle); - if (monitor->state != ADV_MONITOR_STATE_NOT_REGISTERED) { + if (monitor->state != ADV_MONITOR_STATE_NOT_REGISTERED) hdev->adv_monitors_cnt--; - mgmt_adv_monitor_removed(hdev, monitor->handle); - } kfree(monitor); } @@ -2421,6 +2451,11 @@ struct hci_dev *hci_alloc_dev_priv(int sizeof_priv) if (!hdev) return NULL; + if (init_srcu_struct(&hdev->srcu)) { + kfree(hdev); + return NULL; + } + hdev->pkt_type = (HCI_DM1 | HCI_DH1 | HCI_HV1); hdev->esco_type = (ESCO_HV1); hdev->link_mode = (HCI_LM_ACCEPT); @@ -2487,6 +2522,7 @@ struct hci_dev *hci_alloc_dev_priv(int sizeof_priv) mutex_init(&hdev->lock); mutex_init(&hdev->req_lock); + mutex_init(&hdev->mgmt_pending_lock); ida_init(&hdev->unset_handle_ida); @@ -2665,6 +2701,9 @@ void hci_unregister_dev(struct hci_dev *hdev) list_del(&hdev->list); write_unlock(&hci_dev_list_lock); + synchronize_srcu(&hdev->srcu); + cleanup_srcu_struct(&hdev->srcu); + disable_work_sync(&hdev->rx_work); disable_work_sync(&hdev->cmd_work); disable_work_sync(&hdev->tx_work); @@ -3417,23 +3456,18 @@ static void hci_link_tx_to(struct hci_dev *hdev, __u8 type) bt_dev_err(hdev, "link tx timeout"); - rcu_read_lock(); + hci_dev_lock(hdev); /* Kill stalled connections */ - list_for_each_entry_rcu(c, &h->list, list) { + list_for_each_entry(c, &h->list, list) { if (c->type == type && c->sent) { bt_dev_err(hdev, "killing stalled connection %pMR", &c->dst); - /* hci_disconnect might sleep, so, we have to release - * the RCU read lock before calling it. - */ - rcu_read_unlock(); hci_disconnect(c, HCI_ERROR_REMOTE_USER_TERM); - rcu_read_lock(); } } - rcu_read_unlock(); + hci_dev_unlock(hdev); } static struct hci_chan *hci_chan_sent(struct hci_dev *hdev, __u8 type, diff --git a/net/bluetooth/hci_sync.c b/net/bluetooth/hci_sync.c index 62d1ff951ebe..6687f2a4d1eb 100644 --- a/net/bluetooth/hci_sync.c +++ b/net/bluetooth/hci_sync.c @@ -1261,10 +1261,12 @@ int hci_setup_ext_adv_instance_sync(struct hci_dev *hdev, u8 instance) hci_cpu_to_le24(adv->min_interval, cp.min_interval); hci_cpu_to_le24(adv->max_interval, cp.max_interval); cp.tx_power = adv->tx_power; + cp.sid = adv->sid; } else { hci_cpu_to_le24(hdev->le_adv_min_interval, cp.min_interval); hci_cpu_to_le24(hdev->le_adv_max_interval, cp.max_interval); cp.tx_power = HCI_ADV_TX_POWER_NO_PREFERENCE; + cp.sid = 0x00; } secondary_adv = (flags & MGMT_ADV_FLAG_SEC_MASK); @@ -1559,7 +1561,8 @@ static int hci_enable_per_advertising_sync(struct hci_dev *hdev, u8 instance) static int hci_adv_bcast_annoucement(struct hci_dev *hdev, struct adv_info *adv) { u8 bid[3]; - u8 ad[4 + 3]; + u8 ad[HCI_MAX_EXT_AD_LENGTH]; + u8 len; /* Skip if NULL adv as instance 0x00 is used for general purpose * advertising so it cannot used for the likes of Broadcast Announcement @@ -1585,14 +1588,16 @@ static int hci_adv_bcast_annoucement(struct hci_dev *hdev, struct adv_info *adv) /* Generate Broadcast ID */ get_random_bytes(bid, sizeof(bid)); - eir_append_service_data(ad, 0, 0x1852, bid, sizeof(bid)); - hci_set_adv_instance_data(hdev, adv->instance, sizeof(ad), ad, 0, NULL); + len = eir_append_service_data(ad, 0, 0x1852, bid, sizeof(bid)); + memcpy(ad + len, adv->adv_data, adv->adv_data_len); + hci_set_adv_instance_data(hdev, adv->instance, len + adv->adv_data_len, + ad, 0, NULL); return hci_update_adv_data_sync(hdev, adv->instance); } -int hci_start_per_adv_sync(struct hci_dev *hdev, u8 instance, u8 data_len, - u8 *data, u32 flags, u16 min_interval, +int hci_start_per_adv_sync(struct hci_dev *hdev, u8 instance, u8 sid, + u8 data_len, u8 *data, u32 flags, u16 min_interval, u16 max_interval, u16 sync_interval) { struct adv_info *adv = NULL; @@ -1603,9 +1608,28 @@ int hci_start_per_adv_sync(struct hci_dev *hdev, u8 instance, u8 data_len, if (instance) { adv = hci_find_adv_instance(hdev, instance); - /* Create an instance if that could not be found */ - if (!adv) { - adv = hci_add_per_instance(hdev, instance, flags, + if (adv) { + if (sid != HCI_SID_INVALID && adv->sid != sid) { + /* If the SID don't match attempt to find by + * SID. + */ + adv = hci_find_adv_sid(hdev, sid); + if (!adv) { + bt_dev_err(hdev, + "Unable to find adv_info"); + return -EINVAL; + } + } + + /* Turn it into periodic advertising */ + adv->periodic = true; + adv->per_adv_data_len = data_len; + if (data) + memcpy(adv->per_adv_data, data, data_len); + adv->flags = flags; + } else if (!adv) { + /* Create an instance if that could not be found */ + adv = hci_add_per_instance(hdev, instance, sid, flags, data_len, data, sync_interval, sync_interval); @@ -1812,7 +1836,8 @@ static int hci_set_ext_adv_data_sync(struct hci_dev *hdev, u8 instance) return 0; } - len = eir_create_adv_data(hdev, instance, pdu->data); + len = eir_create_adv_data(hdev, instance, pdu->data, + HCI_MAX_EXT_AD_LENGTH); pdu->length = len; pdu->handle = adv ? adv->handle : instance; @@ -1843,7 +1868,7 @@ static int hci_set_adv_data_sync(struct hci_dev *hdev, u8 instance) memset(&cp, 0, sizeof(cp)); - len = eir_create_adv_data(hdev, instance, cp.data); + len = eir_create_adv_data(hdev, instance, cp.data, sizeof(cp.data)); /* There's nothing to do if the data hasn't changed */ if (hdev->adv_data_len == len && diff --git a/net/bluetooth/iso.c b/net/bluetooth/iso.c index 6e2c752aaa8f..3c2c98eecc62 100644 --- a/net/bluetooth/iso.c +++ b/net/bluetooth/iso.c @@ -336,7 +336,7 @@ static int iso_connect_bis(struct sock *sk) struct hci_dev *hdev; int err; - BT_DBG("%pMR", &iso_pi(sk)->src); + BT_DBG("%pMR (SID 0x%2.2x)", &iso_pi(sk)->src, iso_pi(sk)->bc_sid); hdev = hci_get_route(&iso_pi(sk)->dst, &iso_pi(sk)->src, iso_pi(sk)->src_type); @@ -365,7 +365,7 @@ static int iso_connect_bis(struct sock *sk) /* Just bind if DEFER_SETUP has been set */ if (test_bit(BT_SK_DEFER_SETUP, &bt_sk(sk)->flags)) { - hcon = hci_bind_bis(hdev, &iso_pi(sk)->dst, + hcon = hci_bind_bis(hdev, &iso_pi(sk)->dst, iso_pi(sk)->bc_sid, &iso_pi(sk)->qos, iso_pi(sk)->base_len, iso_pi(sk)->base); if (IS_ERR(hcon)) { @@ -375,12 +375,16 @@ static int iso_connect_bis(struct sock *sk) } else { hcon = hci_connect_bis(hdev, &iso_pi(sk)->dst, le_addr_type(iso_pi(sk)->dst_type), - &iso_pi(sk)->qos, iso_pi(sk)->base_len, - iso_pi(sk)->base); + iso_pi(sk)->bc_sid, &iso_pi(sk)->qos, + iso_pi(sk)->base_len, iso_pi(sk)->base); if (IS_ERR(hcon)) { err = PTR_ERR(hcon); goto unlock; } + + /* Update SID if it was not set */ + if (iso_pi(sk)->bc_sid == HCI_SID_INVALID) + iso_pi(sk)->bc_sid = hcon->sid; } conn = iso_conn_add(hcon); @@ -1337,10 +1341,13 @@ static int iso_sock_getname(struct socket *sock, struct sockaddr *addr, addr->sa_family = AF_BLUETOOTH; if (peer) { + struct hci_conn *hcon = iso_pi(sk)->conn ? + iso_pi(sk)->conn->hcon : NULL; + bacpy(&sa->iso_bdaddr, &iso_pi(sk)->dst); sa->iso_bdaddr_type = iso_pi(sk)->dst_type; - if (test_bit(BT_SK_PA_SYNC, &iso_pi(sk)->flags)) { + if (hcon && hcon->type == BIS_LINK) { sa->iso_bc->bc_sid = iso_pi(sk)->bc_sid; sa->iso_bc->bc_num_bis = iso_pi(sk)->bc_num_bis; memcpy(sa->iso_bc->bc_bis, iso_pi(sk)->bc_bis, diff --git a/net/bluetooth/l2cap_core.c b/net/bluetooth/l2cap_core.c index a5bde5db58ef..40daa38276f3 100644 --- a/net/bluetooth/l2cap_core.c +++ b/net/bluetooth/l2cap_core.c @@ -3415,7 +3415,7 @@ static int l2cap_parse_conf_req(struct l2cap_chan *chan, void *data, size_t data struct l2cap_conf_rfc rfc = { .mode = L2CAP_MODE_BASIC }; struct l2cap_conf_efs efs; u8 remote_efs = 0; - u16 mtu = L2CAP_DEFAULT_MTU; + u16 mtu = 0; u16 result = L2CAP_CONF_SUCCESS; u16 size; @@ -3520,6 +3520,13 @@ done: /* Configure output options and let the other side know * which ones we don't like. */ + /* If MTU is not provided in configure request, use the most recently + * explicitly or implicitly accepted value for the other direction, + * or the default value. + */ + if (mtu == 0) + mtu = chan->imtu ? chan->imtu : L2CAP_DEFAULT_MTU; + if (mtu < L2CAP_DEFAULT_MIN_MTU) result = L2CAP_CONF_UNACCEPT; else { diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c index 14a9462fced5..d540f7b4f75f 100644 --- a/net/bluetooth/mgmt.c +++ b/net/bluetooth/mgmt.c @@ -1447,22 +1447,17 @@ static void settings_rsp(struct mgmt_pending_cmd *cmd, void *data) send_settings_rsp(cmd->sk, cmd->opcode, match->hdev); - list_del(&cmd->list); - if (match->sk == NULL) { match->sk = cmd->sk; sock_hold(match->sk); } - - mgmt_pending_free(cmd); } static void cmd_status_rsp(struct mgmt_pending_cmd *cmd, void *data) { u8 *status = data; - mgmt_cmd_status(cmd->sk, cmd->index, cmd->opcode, *status); - mgmt_pending_remove(cmd); + mgmt_cmd_status(cmd->sk, cmd->hdev->id, cmd->opcode, *status); } static void cmd_complete_rsp(struct mgmt_pending_cmd *cmd, void *data) @@ -1476,8 +1471,6 @@ static void cmd_complete_rsp(struct mgmt_pending_cmd *cmd, void *data) if (cmd->cmd_complete) { cmd->cmd_complete(cmd, match->mgmt_status); - mgmt_pending_remove(cmd); - return; } @@ -1486,13 +1479,13 @@ static void cmd_complete_rsp(struct mgmt_pending_cmd *cmd, void *data) static int generic_cmd_complete(struct mgmt_pending_cmd *cmd, u8 status) { - return mgmt_cmd_complete(cmd->sk, cmd->index, cmd->opcode, status, + return mgmt_cmd_complete(cmd->sk, cmd->hdev->id, cmd->opcode, status, cmd->param, cmd->param_len); } static int addr_cmd_complete(struct mgmt_pending_cmd *cmd, u8 status) { - return mgmt_cmd_complete(cmd->sk, cmd->index, cmd->opcode, status, + return mgmt_cmd_complete(cmd->sk, cmd->hdev->id, cmd->opcode, status, cmd->param, sizeof(struct mgmt_addr_info)); } @@ -1532,7 +1525,7 @@ static void mgmt_set_discoverable_complete(struct hci_dev *hdev, void *data, if (err) { u8 mgmt_err = mgmt_status(err); - mgmt_cmd_status(cmd->sk, cmd->index, cmd->opcode, mgmt_err); + mgmt_cmd_status(cmd->sk, cmd->hdev->id, cmd->opcode, mgmt_err); hci_dev_clear_flag(hdev, HCI_LIMITED_DISCOVERABLE); goto done; } @@ -1707,7 +1700,7 @@ static void mgmt_set_connectable_complete(struct hci_dev *hdev, void *data, if (err) { u8 mgmt_err = mgmt_status(err); - mgmt_cmd_status(cmd->sk, cmd->index, cmd->opcode, mgmt_err); + mgmt_cmd_status(cmd->sk, cmd->hdev->id, cmd->opcode, mgmt_err); goto done; } @@ -1943,8 +1936,8 @@ static void set_ssp_complete(struct hci_dev *hdev, void *data, int err) new_settings(hdev, NULL); } - mgmt_pending_foreach(MGMT_OP_SET_SSP, hdev, cmd_status_rsp, - &mgmt_err); + mgmt_pending_foreach(MGMT_OP_SET_SSP, hdev, true, + cmd_status_rsp, &mgmt_err); return; } @@ -1954,7 +1947,7 @@ static void set_ssp_complete(struct hci_dev *hdev, void *data, int err) changed = hci_dev_test_and_clear_flag(hdev, HCI_SSP_ENABLED); } - mgmt_pending_foreach(MGMT_OP_SET_SSP, hdev, settings_rsp, &match); + mgmt_pending_foreach(MGMT_OP_SET_SSP, hdev, true, settings_rsp, &match); if (changed) new_settings(hdev, match.sk); @@ -2074,12 +2067,12 @@ static void set_le_complete(struct hci_dev *hdev, void *data, int err) bt_dev_dbg(hdev, "err %d", err); if (status) { - mgmt_pending_foreach(MGMT_OP_SET_LE, hdev, cmd_status_rsp, - &status); + mgmt_pending_foreach(MGMT_OP_SET_LE, hdev, true, cmd_status_rsp, + &status); return; } - mgmt_pending_foreach(MGMT_OP_SET_LE, hdev, settings_rsp, &match); + mgmt_pending_foreach(MGMT_OP_SET_LE, hdev, true, settings_rsp, &match); new_settings(hdev, match.sk); @@ -2138,7 +2131,7 @@ static void set_mesh_complete(struct hci_dev *hdev, void *data, int err) struct sock *sk = cmd->sk; if (status) { - mgmt_pending_foreach(MGMT_OP_SET_MESH_RECEIVER, hdev, + mgmt_pending_foreach(MGMT_OP_SET_MESH_RECEIVER, hdev, true, cmd_status_rsp, &status); return; } @@ -2638,7 +2631,7 @@ static void mgmt_class_complete(struct hci_dev *hdev, void *data, int err) bt_dev_dbg(hdev, "err %d", err); - mgmt_cmd_complete(cmd->sk, cmd->index, cmd->opcode, + mgmt_cmd_complete(cmd->sk, cmd->hdev->id, cmd->opcode, mgmt_status(err), hdev->dev_class, 3); mgmt_pending_free(cmd); @@ -3427,7 +3420,7 @@ static int pairing_complete(struct mgmt_pending_cmd *cmd, u8 status) bacpy(&rp.addr.bdaddr, &conn->dst); rp.addr.type = link_to_bdaddr(conn->type, conn->dst_type); - err = mgmt_cmd_complete(cmd->sk, cmd->index, MGMT_OP_PAIR_DEVICE, + err = mgmt_cmd_complete(cmd->sk, cmd->hdev->id, MGMT_OP_PAIR_DEVICE, status, &rp, sizeof(rp)); /* So we don't get further callbacks for this connection */ @@ -5108,24 +5101,14 @@ static void mgmt_adv_monitor_added(struct sock *sk, struct hci_dev *hdev, mgmt_event(MGMT_EV_ADV_MONITOR_ADDED, hdev, &ev, sizeof(ev), sk); } -void mgmt_adv_monitor_removed(struct hci_dev *hdev, u16 handle) +static void mgmt_adv_monitor_removed(struct sock *sk, struct hci_dev *hdev, + __le16 handle) { struct mgmt_ev_adv_monitor_removed ev; - struct mgmt_pending_cmd *cmd; - struct sock *sk_skip = NULL; - struct mgmt_cp_remove_adv_monitor *cp; - - cmd = pending_find(MGMT_OP_REMOVE_ADV_MONITOR, hdev); - if (cmd) { - cp = cmd->param; - - if (cp->monitor_handle) - sk_skip = cmd->sk; - } - ev.monitor_handle = cpu_to_le16(handle); + ev.monitor_handle = handle; - mgmt_event(MGMT_EV_ADV_MONITOR_REMOVED, hdev, &ev, sizeof(ev), sk_skip); + mgmt_event(MGMT_EV_ADV_MONITOR_REMOVED, hdev, &ev, sizeof(ev), sk); } static int read_adv_mon_features(struct sock *sk, struct hci_dev *hdev, @@ -5196,7 +5179,7 @@ static void mgmt_add_adv_patterns_monitor_complete(struct hci_dev *hdev, hci_update_passive_scan(hdev); } - mgmt_cmd_complete(cmd->sk, cmd->index, cmd->opcode, + mgmt_cmd_complete(cmd->sk, cmd->hdev->id, cmd->opcode, mgmt_status(status), &rp, sizeof(rp)); mgmt_pending_remove(cmd); @@ -5227,8 +5210,7 @@ static int __add_adv_patterns_monitor(struct sock *sk, struct hci_dev *hdev, if (pending_find(MGMT_OP_SET_LE, hdev) || pending_find(MGMT_OP_ADD_ADV_PATTERNS_MONITOR, hdev) || - pending_find(MGMT_OP_ADD_ADV_PATTERNS_MONITOR_RSSI, hdev) || - pending_find(MGMT_OP_REMOVE_ADV_MONITOR, hdev)) { + pending_find(MGMT_OP_ADD_ADV_PATTERNS_MONITOR_RSSI, hdev)) { status = MGMT_STATUS_BUSY; goto unlock; } @@ -5398,8 +5380,7 @@ static void mgmt_remove_adv_monitor_complete(struct hci_dev *hdev, struct mgmt_pending_cmd *cmd = data; struct mgmt_cp_remove_adv_monitor *cp; - if (status == -ECANCELED || - cmd != pending_find(MGMT_OP_REMOVE_ADV_MONITOR, hdev)) + if (status == -ECANCELED) return; hci_dev_lock(hdev); @@ -5408,12 +5389,14 @@ static void mgmt_remove_adv_monitor_complete(struct hci_dev *hdev, rp.monitor_handle = cp->monitor_handle; - if (!status) + if (!status) { + mgmt_adv_monitor_removed(cmd->sk, hdev, cp->monitor_handle); hci_update_passive_scan(hdev); + } - mgmt_cmd_complete(cmd->sk, cmd->index, cmd->opcode, + mgmt_cmd_complete(cmd->sk, cmd->hdev->id, cmd->opcode, mgmt_status(status), &rp, sizeof(rp)); - mgmt_pending_remove(cmd); + mgmt_pending_free(cmd); hci_dev_unlock(hdev); bt_dev_dbg(hdev, "remove monitor %d complete, status %d", @@ -5423,10 +5406,6 @@ static void mgmt_remove_adv_monitor_complete(struct hci_dev *hdev, static int mgmt_remove_adv_monitor_sync(struct hci_dev *hdev, void *data) { struct mgmt_pending_cmd *cmd = data; - - if (cmd != pending_find(MGMT_OP_REMOVE_ADV_MONITOR, hdev)) - return -ECANCELED; - struct mgmt_cp_remove_adv_monitor *cp = cmd->param; u16 handle = __le16_to_cpu(cp->monitor_handle); @@ -5445,14 +5424,13 @@ static int remove_adv_monitor(struct sock *sk, struct hci_dev *hdev, hci_dev_lock(hdev); if (pending_find(MGMT_OP_SET_LE, hdev) || - pending_find(MGMT_OP_REMOVE_ADV_MONITOR, hdev) || pending_find(MGMT_OP_ADD_ADV_PATTERNS_MONITOR, hdev) || pending_find(MGMT_OP_ADD_ADV_PATTERNS_MONITOR_RSSI, hdev)) { status = MGMT_STATUS_BUSY; goto unlock; } - cmd = mgmt_pending_add(sk, MGMT_OP_REMOVE_ADV_MONITOR, hdev, data, len); + cmd = mgmt_pending_new(sk, MGMT_OP_REMOVE_ADV_MONITOR, hdev, data, len); if (!cmd) { status = MGMT_STATUS_NO_RESOURCES; goto unlock; @@ -5462,7 +5440,7 @@ static int remove_adv_monitor(struct sock *sk, struct hci_dev *hdev, mgmt_remove_adv_monitor_complete); if (err) { - mgmt_pending_remove(cmd); + mgmt_pending_free(cmd); if (err == -ENOMEM) status = MGMT_STATUS_NO_RESOURCES; @@ -5792,7 +5770,7 @@ static void start_discovery_complete(struct hci_dev *hdev, void *data, int err) cmd != pending_find(MGMT_OP_START_SERVICE_DISCOVERY, hdev)) return; - mgmt_cmd_complete(cmd->sk, cmd->index, cmd->opcode, mgmt_status(err), + mgmt_cmd_complete(cmd->sk, cmd->hdev->id, cmd->opcode, mgmt_status(err), cmd->param, 1); mgmt_pending_remove(cmd); @@ -6013,7 +5991,7 @@ static void stop_discovery_complete(struct hci_dev *hdev, void *data, int err) bt_dev_dbg(hdev, "err %d", err); - mgmt_cmd_complete(cmd->sk, cmd->index, cmd->opcode, mgmt_status(err), + mgmt_cmd_complete(cmd->sk, cmd->hdev->id, cmd->opcode, mgmt_status(err), cmd->param, 1); mgmt_pending_remove(cmd); @@ -6238,7 +6216,7 @@ static void set_advertising_complete(struct hci_dev *hdev, void *data, int err) u8 status = mgmt_status(err); if (status) { - mgmt_pending_foreach(MGMT_OP_SET_ADVERTISING, hdev, + mgmt_pending_foreach(MGMT_OP_SET_ADVERTISING, hdev, true, cmd_status_rsp, &status); return; } @@ -6248,7 +6226,7 @@ static void set_advertising_complete(struct hci_dev *hdev, void *data, int err) else hci_dev_clear_flag(hdev, HCI_ADVERTISING); - mgmt_pending_foreach(MGMT_OP_SET_ADVERTISING, hdev, settings_rsp, + mgmt_pending_foreach(MGMT_OP_SET_ADVERTISING, hdev, true, settings_rsp, &match); new_settings(hdev, match.sk); @@ -6592,7 +6570,7 @@ static void set_bredr_complete(struct hci_dev *hdev, void *data, int err) */ hci_dev_clear_flag(hdev, HCI_BREDR_ENABLED); - mgmt_cmd_status(cmd->sk, cmd->index, cmd->opcode, mgmt_err); + mgmt_cmd_status(cmd->sk, cmd->hdev->id, cmd->opcode, mgmt_err); } else { send_settings_rsp(cmd->sk, MGMT_OP_SET_BREDR, hdev); new_settings(hdev, cmd->sk); @@ -6729,7 +6707,7 @@ static void set_secure_conn_complete(struct hci_dev *hdev, void *data, int err) if (err) { u8 mgmt_err = mgmt_status(err); - mgmt_cmd_status(cmd->sk, cmd->index, cmd->opcode, mgmt_err); + mgmt_cmd_status(cmd->sk, cmd->hdev->id, cmd->opcode, mgmt_err); goto done; } @@ -7176,7 +7154,7 @@ static void get_conn_info_complete(struct hci_dev *hdev, void *data, int err) rp.max_tx_power = HCI_TX_POWER_INVALID; } - mgmt_cmd_complete(cmd->sk, cmd->index, MGMT_OP_GET_CONN_INFO, status, + mgmt_cmd_complete(cmd->sk, cmd->hdev->id, MGMT_OP_GET_CONN_INFO, status, &rp, sizeof(rp)); mgmt_pending_free(cmd); @@ -7336,7 +7314,7 @@ static void get_clock_info_complete(struct hci_dev *hdev, void *data, int err) } complete: - mgmt_cmd_complete(cmd->sk, cmd->index, cmd->opcode, status, &rp, + mgmt_cmd_complete(cmd->sk, cmd->hdev->id, cmd->opcode, status, &rp, sizeof(rp)); mgmt_pending_free(cmd); @@ -8586,10 +8564,10 @@ static void add_advertising_complete(struct hci_dev *hdev, void *data, int err) rp.instance = cp->instance; if (err) - mgmt_cmd_status(cmd->sk, cmd->index, cmd->opcode, + mgmt_cmd_status(cmd->sk, cmd->hdev->id, cmd->opcode, mgmt_status(err)); else - mgmt_cmd_complete(cmd->sk, cmd->index, cmd->opcode, + mgmt_cmd_complete(cmd->sk, cmd->hdev->id, cmd->opcode, mgmt_status(err), &rp, sizeof(rp)); add_adv_complete(hdev, cmd->sk, cp->instance, err); @@ -8777,10 +8755,10 @@ static void add_ext_adv_params_complete(struct hci_dev *hdev, void *data, hci_remove_adv_instance(hdev, cp->instance); - mgmt_cmd_status(cmd->sk, cmd->index, cmd->opcode, + mgmt_cmd_status(cmd->sk, cmd->hdev->id, cmd->opcode, mgmt_status(err)); } else { - mgmt_cmd_complete(cmd->sk, cmd->index, cmd->opcode, + mgmt_cmd_complete(cmd->sk, cmd->hdev->id, cmd->opcode, mgmt_status(err), &rp, sizeof(rp)); } @@ -8927,10 +8905,10 @@ static void add_ext_adv_data_complete(struct hci_dev *hdev, void *data, int err) rp.instance = cp->instance; if (err) - mgmt_cmd_status(cmd->sk, cmd->index, cmd->opcode, + mgmt_cmd_status(cmd->sk, cmd->hdev->id, cmd->opcode, mgmt_status(err)); else - mgmt_cmd_complete(cmd->sk, cmd->index, cmd->opcode, + mgmt_cmd_complete(cmd->sk, cmd->hdev->id, cmd->opcode, mgmt_status(err), &rp, sizeof(rp)); mgmt_pending_free(cmd); @@ -9089,10 +9067,10 @@ static void remove_advertising_complete(struct hci_dev *hdev, void *data, rp.instance = cp->instance; if (err) - mgmt_cmd_status(cmd->sk, cmd->index, cmd->opcode, + mgmt_cmd_status(cmd->sk, cmd->hdev->id, cmd->opcode, mgmt_status(err)); else - mgmt_cmd_complete(cmd->sk, cmd->index, cmd->opcode, + mgmt_cmd_complete(cmd->sk, cmd->hdev->id, cmd->opcode, MGMT_STATUS_SUCCESS, &rp, sizeof(rp)); mgmt_pending_free(cmd); @@ -9364,7 +9342,7 @@ void mgmt_index_removed(struct hci_dev *hdev) if (test_bit(HCI_QUIRK_RAW_DEVICE, &hdev->quirks)) return; - mgmt_pending_foreach(0, hdev, cmd_complete_rsp, &match); + mgmt_pending_foreach(0, hdev, true, cmd_complete_rsp, &match); if (hci_dev_test_flag(hdev, HCI_UNCONFIGURED)) { mgmt_index_event(MGMT_EV_UNCONF_INDEX_REMOVED, hdev, NULL, 0, @@ -9402,7 +9380,8 @@ void mgmt_power_on(struct hci_dev *hdev, int err) hci_update_passive_scan(hdev); } - mgmt_pending_foreach(MGMT_OP_SET_POWERED, hdev, settings_rsp, &match); + mgmt_pending_foreach(MGMT_OP_SET_POWERED, hdev, true, settings_rsp, + &match); new_settings(hdev, match.sk); @@ -9417,7 +9396,8 @@ void __mgmt_power_off(struct hci_dev *hdev) struct cmd_lookup match = { NULL, hdev }; u8 zero_cod[] = { 0, 0, 0 }; - mgmt_pending_foreach(MGMT_OP_SET_POWERED, hdev, settings_rsp, &match); + mgmt_pending_foreach(MGMT_OP_SET_POWERED, hdev, true, settings_rsp, + &match); /* If the power off is because of hdev unregistration let * use the appropriate INVALID_INDEX status. Otherwise use @@ -9431,7 +9411,7 @@ void __mgmt_power_off(struct hci_dev *hdev) else match.mgmt_status = MGMT_STATUS_NOT_POWERED; - mgmt_pending_foreach(0, hdev, cmd_complete_rsp, &match); + mgmt_pending_foreach(0, hdev, true, cmd_complete_rsp, &match); if (memcmp(hdev->dev_class, zero_cod, sizeof(zero_cod)) != 0) { mgmt_limited_event(MGMT_EV_CLASS_OF_DEV_CHANGED, hdev, @@ -9672,7 +9652,6 @@ static void unpair_device_rsp(struct mgmt_pending_cmd *cmd, void *data) device_unpaired(hdev, &cp->addr.bdaddr, cp->addr.type, cmd->sk); cmd->cmd_complete(cmd, 0); - mgmt_pending_remove(cmd); } bool mgmt_powering_down(struct hci_dev *hdev) @@ -9728,8 +9707,8 @@ void mgmt_disconnect_failed(struct hci_dev *hdev, bdaddr_t *bdaddr, struct mgmt_cp_disconnect *cp; struct mgmt_pending_cmd *cmd; - mgmt_pending_foreach(MGMT_OP_UNPAIR_DEVICE, hdev, unpair_device_rsp, - hdev); + mgmt_pending_foreach(MGMT_OP_UNPAIR_DEVICE, hdev, true, + unpair_device_rsp, hdev); cmd = pending_find(MGMT_OP_DISCONNECT, hdev); if (!cmd) @@ -9922,7 +9901,7 @@ void mgmt_auth_enable_complete(struct hci_dev *hdev, u8 status) if (status) { u8 mgmt_err = mgmt_status(status); - mgmt_pending_foreach(MGMT_OP_SET_LINK_SECURITY, hdev, + mgmt_pending_foreach(MGMT_OP_SET_LINK_SECURITY, hdev, true, cmd_status_rsp, &mgmt_err); return; } @@ -9932,8 +9911,8 @@ void mgmt_auth_enable_complete(struct hci_dev *hdev, u8 status) else changed = hci_dev_test_and_clear_flag(hdev, HCI_LINK_SECURITY); - mgmt_pending_foreach(MGMT_OP_SET_LINK_SECURITY, hdev, settings_rsp, - &match); + mgmt_pending_foreach(MGMT_OP_SET_LINK_SECURITY, hdev, true, + settings_rsp, &match); if (changed) new_settings(hdev, match.sk); @@ -9957,9 +9936,12 @@ void mgmt_set_class_of_dev_complete(struct hci_dev *hdev, u8 *dev_class, { struct cmd_lookup match = { NULL, hdev, mgmt_status(status) }; - mgmt_pending_foreach(MGMT_OP_SET_DEV_CLASS, hdev, sk_lookup, &match); - mgmt_pending_foreach(MGMT_OP_ADD_UUID, hdev, sk_lookup, &match); - mgmt_pending_foreach(MGMT_OP_REMOVE_UUID, hdev, sk_lookup, &match); + mgmt_pending_foreach(MGMT_OP_SET_DEV_CLASS, hdev, false, sk_lookup, + &match); + mgmt_pending_foreach(MGMT_OP_ADD_UUID, hdev, false, sk_lookup, + &match); + mgmt_pending_foreach(MGMT_OP_REMOVE_UUID, hdev, false, sk_lookup, + &match); if (!status) { mgmt_limited_event(MGMT_EV_CLASS_OF_DEV_CHANGED, hdev, dev_class, diff --git a/net/bluetooth/mgmt_util.c b/net/bluetooth/mgmt_util.c index 3713ff490c65..a88a07da3947 100644 --- a/net/bluetooth/mgmt_util.c +++ b/net/bluetooth/mgmt_util.c @@ -217,30 +217,47 @@ int mgmt_cmd_complete(struct sock *sk, u16 index, u16 cmd, u8 status, struct mgmt_pending_cmd *mgmt_pending_find(unsigned short channel, u16 opcode, struct hci_dev *hdev) { - struct mgmt_pending_cmd *cmd; + struct mgmt_pending_cmd *cmd, *tmp; + + mutex_lock(&hdev->mgmt_pending_lock); - list_for_each_entry(cmd, &hdev->mgmt_pending, list) { + list_for_each_entry_safe(cmd, tmp, &hdev->mgmt_pending, list) { if (hci_sock_get_channel(cmd->sk) != channel) continue; - if (cmd->opcode == opcode) + + if (cmd->opcode == opcode) { + mutex_unlock(&hdev->mgmt_pending_lock); return cmd; + } } + mutex_unlock(&hdev->mgmt_pending_lock); + return NULL; } -void mgmt_pending_foreach(u16 opcode, struct hci_dev *hdev, +void mgmt_pending_foreach(u16 opcode, struct hci_dev *hdev, bool remove, void (*cb)(struct mgmt_pending_cmd *cmd, void *data), void *data) { struct mgmt_pending_cmd *cmd, *tmp; + mutex_lock(&hdev->mgmt_pending_lock); + list_for_each_entry_safe(cmd, tmp, &hdev->mgmt_pending, list) { if (opcode > 0 && cmd->opcode != opcode) continue; + if (remove) + list_del(&cmd->list); + cb(cmd, data); + + if (remove) + mgmt_pending_free(cmd); } + + mutex_unlock(&hdev->mgmt_pending_lock); } struct mgmt_pending_cmd *mgmt_pending_new(struct sock *sk, u16 opcode, @@ -254,7 +271,7 @@ struct mgmt_pending_cmd *mgmt_pending_new(struct sock *sk, u16 opcode, return NULL; cmd->opcode = opcode; - cmd->index = hdev->id; + cmd->hdev = hdev; cmd->param = kmemdup(data, len, GFP_KERNEL); if (!cmd->param) { @@ -280,7 +297,9 @@ struct mgmt_pending_cmd *mgmt_pending_add(struct sock *sk, u16 opcode, if (!cmd) return NULL; + mutex_lock(&hdev->mgmt_pending_lock); list_add_tail(&cmd->list, &hdev->mgmt_pending); + mutex_unlock(&hdev->mgmt_pending_lock); return cmd; } @@ -294,7 +313,10 @@ void mgmt_pending_free(struct mgmt_pending_cmd *cmd) void mgmt_pending_remove(struct mgmt_pending_cmd *cmd) { + mutex_lock(&cmd->hdev->mgmt_pending_lock); list_del(&cmd->list); + mutex_unlock(&cmd->hdev->mgmt_pending_lock); + mgmt_pending_free(cmd); } diff --git a/net/bluetooth/mgmt_util.h b/net/bluetooth/mgmt_util.h index f2ba994ab1d8..024e51dd6937 100644 --- a/net/bluetooth/mgmt_util.h +++ b/net/bluetooth/mgmt_util.h @@ -33,7 +33,7 @@ struct mgmt_mesh_tx { struct mgmt_pending_cmd { struct list_head list; u16 opcode; - int index; + struct hci_dev *hdev; void *param; size_t param_len; struct sock *sk; @@ -54,7 +54,7 @@ int mgmt_cmd_complete(struct sock *sk, u16 index, u16 cmd, u8 status, struct mgmt_pending_cmd *mgmt_pending_find(unsigned short channel, u16 opcode, struct hci_dev *hdev); -void mgmt_pending_foreach(u16 opcode, struct hci_dev *hdev, +void mgmt_pending_foreach(u16 opcode, struct hci_dev *hdev, bool remove, void (*cb)(struct mgmt_pending_cmd *cmd, void *data), void *data); struct mgmt_pending_cmd *mgmt_pending_add(struct sock *sk, u16 opcode, diff --git a/net/bpf/test_run.c b/net/bpf/test_run.c index aaf13a7d58ed..9728dbd4c66c 100644 --- a/net/bpf/test_run.c +++ b/net/bpf/test_run.c @@ -1255,7 +1255,7 @@ int bpf_prog_test_run_xdp(struct bpf_prog *prog, const union bpf_attr *kattr, headroom -= ctx->data; } - max_data_sz = 4096 - headroom - tailroom; + max_data_sz = PAGE_SIZE - headroom - tailroom; if (size > max_data_sz) { /* disallow live data mode for jumbo frames */ if (do_live) diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c index 0224ef3dfec0..1377f31b719c 100644 --- a/net/bridge/br_multicast.c +++ b/net/bridge/br_multicast.c @@ -2015,10 +2015,19 @@ void br_multicast_port_ctx_init(struct net_bridge_port *port, void br_multicast_port_ctx_deinit(struct net_bridge_mcast_port *pmctx) { + struct net_bridge *br = pmctx->port->br; + bool del = false; + #if IS_ENABLED(CONFIG_IPV6) timer_delete_sync(&pmctx->ip6_mc_router_timer); #endif timer_delete_sync(&pmctx->ip4_mc_router_timer); + + spin_lock_bh(&br->multicast_lock); + del |= br_ip6_multicast_rport_del(pmctx); + del |= br_ip4_multicast_rport_del(pmctx); + br_multicast_rport_del_notify(pmctx, del); + spin_unlock_bh(&br->multicast_lock); } int br_multicast_add_port(struct net_bridge_port *port) diff --git a/net/core/filter.c b/net/core/filter.c index 327ca73f9cd7..7a72f766aacf 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -3233,6 +3233,13 @@ static const struct bpf_func_proto bpf_skb_vlan_pop_proto = { .arg1_type = ARG_PTR_TO_CTX, }; +static void bpf_skb_change_protocol(struct sk_buff *skb, u16 proto) +{ + skb->protocol = htons(proto); + if (skb_valid_dst(skb)) + skb_dst_drop(skb); +} + static int bpf_skb_generic_push(struct sk_buff *skb, u32 off, u32 len) { /* Caller already did skb_cow() with len as headroom, @@ -3329,7 +3336,7 @@ static int bpf_skb_proto_4_to_6(struct sk_buff *skb) } } - skb->protocol = htons(ETH_P_IPV6); + bpf_skb_change_protocol(skb, ETH_P_IPV6); skb_clear_hash(skb); return 0; @@ -3359,7 +3366,7 @@ static int bpf_skb_proto_6_to_4(struct sk_buff *skb) } } - skb->protocol = htons(ETH_P_IP); + bpf_skb_change_protocol(skb, ETH_P_IP); skb_clear_hash(skb); return 0; @@ -3550,10 +3557,10 @@ static int bpf_skb_net_grow(struct sk_buff *skb, u32 off, u32 len_diff, /* Match skb->protocol to new outer l3 protocol */ if (skb->protocol == htons(ETH_P_IP) && flags & BPF_F_ADJ_ROOM_ENCAP_L3_IPV6) - skb->protocol = htons(ETH_P_IPV6); + bpf_skb_change_protocol(skb, ETH_P_IPV6); else if (skb->protocol == htons(ETH_P_IPV6) && flags & BPF_F_ADJ_ROOM_ENCAP_L3_IPV4) - skb->protocol = htons(ETH_P_IP); + bpf_skb_change_protocol(skb, ETH_P_IP); } if (skb_is_gso(skb)) { @@ -3606,10 +3613,10 @@ static int bpf_skb_net_shrink(struct sk_buff *skb, u32 off, u32 len_diff, /* Match skb->protocol to new outer l3 protocol */ if (skb->protocol == htons(ETH_P_IP) && flags & BPF_F_ADJ_ROOM_DECAP_L3_IPV6) - skb->protocol = htons(ETH_P_IPV6); + bpf_skb_change_protocol(skb, ETH_P_IPV6); else if (skb->protocol == htons(ETH_P_IPV6) && flags & BPF_F_ADJ_ROOM_DECAP_L3_IPV4) - skb->protocol = htons(ETH_P_IP); + bpf_skb_change_protocol(skb, ETH_P_IP); if (skb_is_gso(skb)) { struct skb_shared_info *shinfo = skb_shinfo(skb); diff --git a/net/core/netpoll.c b/net/core/netpoll.c index 4ddb7490df4b..6ad84d4a2b46 100644 --- a/net/core/netpoll.c +++ b/net/core/netpoll.c @@ -432,6 +432,7 @@ int netpoll_send_udp(struct netpoll *np, const char *msg, int len) udph->dest = htons(np->remote_port); udph->len = htons(udp_len); + udph->check = 0; if (np->ipv6) { udph->check = csum_ipv6_magic(&np->local_ip.in6, &np->remote_ip.in6, @@ -460,7 +461,6 @@ int netpoll_send_udp(struct netpoll *np, const char *msg, int len) skb_reset_mac_header(skb); skb->protocol = eth->h_proto = htons(ETH_P_IPV6); } else { - udph->check = 0; udph->check = csum_tcpudp_magic(np->local_ip.ip, np->remote_ip.ip, udp_len, IPPROTO_UDP, diff --git a/net/core/selftests.c b/net/core/selftests.c index 35f807ea9952..406faf8e5f3f 100644 --- a/net/core/selftests.c +++ b/net/core/selftests.c @@ -160,8 +160,9 @@ static struct sk_buff *net_test_get_skb(struct net_device *ndev, skb->csum = 0; skb->ip_summed = CHECKSUM_PARTIAL; if (attr->tcp) { - thdr->check = ~tcp_v4_check(skb->len, ihdr->saddr, - ihdr->daddr, 0); + int l4len = skb->len - skb_transport_offset(skb); + + thdr->check = ~tcp_v4_check(l4len, ihdr->saddr, ihdr->daddr, 0); skb->csum_start = skb_transport_header(skb) - skb->head; skb->csum_offset = offsetof(struct tcphdr, check); } else { diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 85fc82f72d26..d6420b74ea9c 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -6261,9 +6261,6 @@ int skb_ensure_writable(struct sk_buff *skb, unsigned int write_len) if (!pskb_may_pull(skb, write_len)) return -ENOMEM; - if (!skb_frags_readable(skb)) - return -EFAULT; - if (!skb_cloned(skb) || skb_clone_writable(skb, write_len)) return 0; diff --git a/net/ethtool/ioctl.c b/net/ethtool/ioctl.c index 39ec920f5de7..71c828d0bf31 100644 --- a/net/ethtool/ioctl.c +++ b/net/ethtool/ioctl.c @@ -1083,7 +1083,8 @@ static noinline_for_stack int ethtool_set_rxnfc(struct net_device *dev, ethtool_get_flow_spec_ring(info.fs.ring_cookie)) return -EINVAL; - if (!xa_load(&dev->ethtool->rss_ctx, info.rss_context)) + if (info.rss_context && + !xa_load(&dev->ethtool->rss_ctx, info.rss_context)) return -EINVAL; } diff --git a/net/ipv4/tcp_fastopen.c b/net/ipv4/tcp_fastopen.c index 9b83d639b5ac..5107121c5e37 100644 --- a/net/ipv4/tcp_fastopen.c +++ b/net/ipv4/tcp_fastopen.c @@ -3,6 +3,7 @@ #include <linux/tcp.h> #include <linux/rcupdate.h> #include <net/tcp.h> +#include <net/busy_poll.h> void tcp_fastopen_init_key_once(struct net *net) { @@ -279,6 +280,8 @@ static struct sock *tcp_fastopen_create_child(struct sock *sk, refcount_set(&req->rsk_refcnt, 2); + sk_mark_napi_id_set(child, skb); + /* Now finish processing the fastopen child socket. */ tcp_init_transfer(child, BPF_SOCK_OPS_PASSIVE_ESTABLISHED_CB, skb); diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 8ec92dec321a..12c2e6fc85c6 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -2479,20 +2479,33 @@ static inline bool tcp_packet_delayed(const struct tcp_sock *tp) { const struct sock *sk = (const struct sock *)tp; - if (tp->retrans_stamp && - tcp_tsopt_ecr_before(tp, tp->retrans_stamp)) - return true; /* got echoed TS before first retransmission */ - - /* Check if nothing was retransmitted (retrans_stamp==0), which may - * happen in fast recovery due to TSQ. But we ignore zero retrans_stamp - * in TCP_SYN_SENT, since when we set FLAG_SYN_ACKED we also clear - * retrans_stamp even if we had retransmitted the SYN. + /* Received an echoed timestamp before the first retransmission? */ + if (tp->retrans_stamp) + return tcp_tsopt_ecr_before(tp, tp->retrans_stamp); + + /* We set tp->retrans_stamp upon the first retransmission of a loss + * recovery episode, so normally if tp->retrans_stamp is 0 then no + * retransmission has happened yet (likely due to TSQ, which can cause + * fast retransmits to be delayed). So if snd_una advanced while + * (tp->retrans_stamp is 0 then apparently a packet was merely delayed, + * not lost. But there are exceptions where we retransmit but then + * clear tp->retrans_stamp, so we check for those exceptions. */ - if (!tp->retrans_stamp && /* no record of a retransmit/SYN? */ - sk->sk_state != TCP_SYN_SENT) /* not the FLAG_SYN_ACKED case? */ - return true; /* nothing was retransmitted */ - return false; + /* (1) For non-SACK connections, tcp_is_non_sack_preventing_reopen() + * clears tp->retrans_stamp when snd_una == high_seq. + */ + if (!tcp_is_sack(tp) && !before(tp->snd_una, tp->high_seq)) + return false; + + /* (2) In TCP_SYN_SENT tcp_clean_rtx_queue() clears tp->retrans_stamp + * when setting FLAG_SYN_ACKED is set, even if the SYN was + * retransmitted. + */ + if (sk->sk_state == TCP_SYN_SENT) + return false; + + return true; /* tp->retrans_stamp is zero; no retransmit yet */ } /* Undo procedures. */ diff --git a/net/ipv6/calipso.c b/net/ipv6/calipso.c index 62618a058b8f..a247bb93908b 100644 --- a/net/ipv6/calipso.c +++ b/net/ipv6/calipso.c @@ -1207,6 +1207,10 @@ static int calipso_req_setattr(struct request_sock *req, struct ipv6_opt_hdr *old, *new; struct sock *sk = sk_to_full_sk(req_to_sk(req)); + /* sk is NULL for SYN+ACK w/ SYN Cookie */ + if (!sk) + return -ENOMEM; + if (req_inet->ipv6_opt && req_inet->ipv6_opt->hopopt) old = req_inet->ipv6_opt->hopopt; else @@ -1247,6 +1251,10 @@ static void calipso_req_delattr(struct request_sock *req) struct ipv6_txoptions *txopts; struct sock *sk = sk_to_full_sk(req_to_sk(req)); + /* sk is NULL for SYN+ACK w/ SYN Cookie */ + if (!sk) + return; + if (!req_inet->ipv6_opt || !req_inet->ipv6_opt->hopopt) return; diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 0143262094b0..79c8f1acf8a3 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -3737,6 +3737,53 @@ void fib6_nh_release_dsts(struct fib6_nh *fib6_nh) } } +static int fib6_config_validate(struct fib6_config *cfg, + struct netlink_ext_ack *extack) +{ + /* RTF_PCPU is an internal flag; can not be set by userspace */ + if (cfg->fc_flags & RTF_PCPU) { + NL_SET_ERR_MSG(extack, "Userspace can not set RTF_PCPU"); + goto errout; + } + + /* RTF_CACHE is an internal flag; can not be set by userspace */ + if (cfg->fc_flags & RTF_CACHE) { + NL_SET_ERR_MSG(extack, "Userspace can not set RTF_CACHE"); + goto errout; + } + + if (cfg->fc_type > RTN_MAX) { + NL_SET_ERR_MSG(extack, "Invalid route type"); + goto errout; + } + + if (cfg->fc_dst_len > 128) { + NL_SET_ERR_MSG(extack, "Invalid prefix length"); + goto errout; + } + +#ifdef CONFIG_IPV6_SUBTREES + if (cfg->fc_src_len > 128) { + NL_SET_ERR_MSG(extack, "Invalid source address length"); + goto errout; + } + + if (cfg->fc_nh_id && cfg->fc_src_len) { + NL_SET_ERR_MSG(extack, "Nexthops can not be used with source routing"); + goto errout; + } +#else + if (cfg->fc_src_len) { + NL_SET_ERR_MSG(extack, + "Specifying source address requires IPV6_SUBTREES to be enabled"); + goto errout; + } +#endif + return 0; +errout: + return -EINVAL; +} + static struct fib6_info *ip6_route_info_create(struct fib6_config *cfg, gfp_t gfp_flags, struct netlink_ext_ack *extack) @@ -3886,6 +3933,10 @@ int ip6_route_add(struct fib6_config *cfg, gfp_t gfp_flags, struct fib6_info *rt; int err; + err = fib6_config_validate(cfg, extack); + if (err) + return err; + rt = ip6_route_info_create(cfg, gfp_flags, extack); if (IS_ERR(rt)) return PTR_ERR(rt); @@ -4479,53 +4530,6 @@ void rt6_purge_dflt_routers(struct net *net) rcu_read_unlock(); } -static int fib6_config_validate(struct fib6_config *cfg, - struct netlink_ext_ack *extack) -{ - /* RTF_PCPU is an internal flag; can not be set by userspace */ - if (cfg->fc_flags & RTF_PCPU) { - NL_SET_ERR_MSG(extack, "Userspace can not set RTF_PCPU"); - goto errout; - } - - /* RTF_CACHE is an internal flag; can not be set by userspace */ - if (cfg->fc_flags & RTF_CACHE) { - NL_SET_ERR_MSG(extack, "Userspace can not set RTF_CACHE"); - goto errout; - } - - if (cfg->fc_type > RTN_MAX) { - NL_SET_ERR_MSG(extack, "Invalid route type"); - goto errout; - } - - if (cfg->fc_dst_len > 128) { - NL_SET_ERR_MSG(extack, "Invalid prefix length"); - goto errout; - } - -#ifdef CONFIG_IPV6_SUBTREES - if (cfg->fc_src_len > 128) { - NL_SET_ERR_MSG(extack, "Invalid source address length"); - goto errout; - } - - if (cfg->fc_nh_id && cfg->fc_src_len) { - NL_SET_ERR_MSG(extack, "Nexthops can not be used with source routing"); - goto errout; - } -#else - if (cfg->fc_src_len) { - NL_SET_ERR_MSG(extack, - "Specifying source address requires IPV6_SUBTREES to be enabled"); - goto errout; - } -#endif - return 0; -errout: - return -EINVAL; -} - static void rtmsg_to_fib6_config(struct net *net, struct in6_rtmsg *rtmsg, struct fib6_config *cfg) @@ -4563,10 +4567,6 @@ int ipv6_route_ioctl(struct net *net, unsigned int cmd, struct in6_rtmsg *rtmsg) switch (cmd) { case SIOCADDRT: - err = fib6_config_validate(&cfg, NULL); - if (err) - break; - /* Only do the default setting of fc_metric in route adding */ if (cfg.fc_metric == 0) cfg.fc_metric = IP6_RT_PRIO_USER; @@ -5402,6 +5402,10 @@ static int ip6_route_multipath_add(struct fib6_config *cfg, int nhn = 0; int err; + err = fib6_config_validate(cfg, extack); + if (err) + return err; + replace = (cfg->fc_nlinfo.nlh && (cfg->fc_nlinfo.nlh->nlmsg_flags & NLM_F_REPLACE)); @@ -5636,10 +5640,6 @@ static int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr *nlh, if (err < 0) return err; - err = fib6_config_validate(&cfg, extack); - if (err) - return err; - if (cfg.fc_metric == 0) cfg.fc_metric = IP6_RT_PRIO_USER; diff --git a/net/mac80211/debug.h b/net/mac80211/debug.h index 5b81998cb0c9..ef7c1a68d88d 100644 --- a/net/mac80211/debug.h +++ b/net/mac80211/debug.h @@ -1,10 +1,11 @@ /* SPDX-License-Identifier: GPL-2.0 */ /* * Portions - * Copyright (C) 2022 - 2024 Intel Corporation + * Copyright (C) 2022 - 2025 Intel Corporation */ #ifndef __MAC80211_DEBUG_H #define __MAC80211_DEBUG_H +#include <linux/once_lite.h> #include <net/cfg80211.h> #ifdef CONFIG_MAC80211_OCB_DEBUG @@ -152,6 +153,8 @@ do { \ else \ _sdata_err((link)->sdata, fmt, ##__VA_ARGS__); \ } while (0) +#define link_err_once(link, fmt, ...) \ + DO_ONCE_LITE(link_err, link, fmt, ##__VA_ARGS__) #define link_id_info(sdata, link_id, fmt, ...) \ do { \ if (ieee80211_vif_is_mld(&sdata->vif)) \ diff --git a/net/mac80211/link.c b/net/mac80211/link.c index d40c2bd3b50b..4f7b7d0f64f2 100644 --- a/net/mac80211/link.c +++ b/net/mac80211/link.c @@ -93,9 +93,6 @@ void ieee80211_link_init(struct ieee80211_sub_if_data *sdata, if (link_id < 0) link_id = 0; - rcu_assign_pointer(sdata->vif.link_conf[link_id], link_conf); - rcu_assign_pointer(sdata->link[link_id], link); - if (sdata->vif.type == NL80211_IFTYPE_AP_VLAN) { struct ieee80211_sub_if_data *ap_bss; struct ieee80211_bss_conf *ap_bss_conf; @@ -145,6 +142,9 @@ void ieee80211_link_init(struct ieee80211_sub_if_data *sdata, ieee80211_link_debugfs_add(link); } + + rcu_assign_pointer(sdata->vif.link_conf[link_id], link_conf); + rcu_assign_pointer(sdata->link[link_id], link); } void ieee80211_link_stop(struct ieee80211_link_data *link) diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c index 09beb65d6108..e73431549ce7 100644 --- a/net/mac80211/rx.c +++ b/net/mac80211/rx.c @@ -4432,6 +4432,10 @@ static bool ieee80211_accept_frame(struct ieee80211_rx_data *rx) if (!multicast && !ether_addr_equal(sdata->dev->dev_addr, hdr->addr1)) return false; + /* reject invalid/our STA address */ + if (!is_valid_ether_addr(hdr->addr2) || + ether_addr_equal(sdata->dev->dev_addr, hdr->addr2)) + return false; if (!rx->sta) { int rate_idx; if (status->encoding != RX_ENC_LEGACY) diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c index d8d4f3d7d7f2..d58b80813bdd 100644 --- a/net/mac80211/tx.c +++ b/net/mac80211/tx.c @@ -5,7 +5,7 @@ * Copyright 2006-2007 Jiri Benc <jbenc@suse.cz> * Copyright 2007 Johannes Berg <johannes@sipsolutions.net> * Copyright 2013-2014 Intel Mobile Communications GmbH - * Copyright (C) 2018-2024 Intel Corporation + * Copyright (C) 2018-2025 Intel Corporation * * Transmit and frame generation functions. */ @@ -5016,12 +5016,25 @@ static void ieee80211_set_beacon_cntdwn(struct ieee80211_sub_if_data *sdata, } } -static u8 __ieee80211_beacon_update_cntdwn(struct beacon_data *beacon) +static u8 __ieee80211_beacon_update_cntdwn(struct ieee80211_link_data *link, + struct beacon_data *beacon) { - beacon->cntdwn_current_counter--; + if (beacon->cntdwn_current_counter == 1) { + /* + * Channel switch handling is done by a worker thread while + * beacons get pulled from hardware timers. It's therefore + * possible that software threads are slow enough to not be + * able to complete CSA handling in a single beacon interval, + * in which case we get here. There isn't much to do about + * it, other than letting the user know that the AP isn't + * behaving correctly. + */ + link_err_once(link, + "beacon TX faster than countdown (channel/color switch) completion\n"); + return 0; + } - /* the counter should never reach 0 */ - WARN_ON_ONCE(!beacon->cntdwn_current_counter); + beacon->cntdwn_current_counter--; return beacon->cntdwn_current_counter; } @@ -5052,7 +5065,7 @@ u8 ieee80211_beacon_update_cntdwn(struct ieee80211_vif *vif, unsigned int link_i if (!beacon) goto unlock; - count = __ieee80211_beacon_update_cntdwn(beacon); + count = __ieee80211_beacon_update_cntdwn(link, beacon); unlock: rcu_read_unlock(); @@ -5450,7 +5463,7 @@ __ieee80211_beacon_get(struct ieee80211_hw *hw, if (beacon->cntdwn_counter_offsets[0]) { if (!is_template) - __ieee80211_beacon_update_cntdwn(beacon); + __ieee80211_beacon_update_cntdwn(link, beacon); ieee80211_set_beacon_cntdwn(sdata, beacon, link); } @@ -5482,7 +5495,7 @@ __ieee80211_beacon_get(struct ieee80211_hw *hw, * for now we leave it consistent with overall * mac80211's behavior. */ - __ieee80211_beacon_update_cntdwn(beacon); + __ieee80211_beacon_update_cntdwn(link, beacon); ieee80211_set_beacon_cntdwn(sdata, beacon, link); } diff --git a/net/mac80211/util.c b/net/mac80211/util.c index 27d414efa3fd..a125995ed252 100644 --- a/net/mac80211/util.c +++ b/net/mac80211/util.c @@ -3884,7 +3884,7 @@ void ieee80211_recalc_dtim(struct ieee80211_local *local, { u64 tsf = drv_get_tsf(local, sdata); u64 dtim_count = 0; - u16 beacon_int = sdata->vif.bss_conf.beacon_int * 1024; + u32 beacon_int = sdata->vif.bss_conf.beacon_int * 1024; u8 dtim_period = sdata->vif.bss_conf.dtim_period; struct ps_data *ps; u8 bcns_from_dtim; diff --git a/net/mpls/af_mpls.c b/net/mpls/af_mpls.c index d536c97144e9..47d7dfd9ad09 100644 --- a/net/mpls/af_mpls.c +++ b/net/mpls/af_mpls.c @@ -81,8 +81,8 @@ static struct mpls_route *mpls_route_input_rcu(struct net *net, unsigned index) if (index < net->mpls.platform_labels) { struct mpls_route __rcu **platform_label = - rcu_dereference(net->mpls.platform_label); - rt = rcu_dereference(platform_label[index]); + rcu_dereference_rtnl(net->mpls.platform_label); + rt = rcu_dereference_rtnl(platform_label[index]); } return rt; } diff --git a/net/nfc/nci/uart.c b/net/nfc/nci/uart.c index ed1508a9e093..aab107727f18 100644 --- a/net/nfc/nci/uart.c +++ b/net/nfc/nci/uart.c @@ -119,22 +119,22 @@ static int nci_uart_set_driver(struct tty_struct *tty, unsigned int driver) memcpy(nu, nci_uart_drivers[driver], sizeof(struct nci_uart)); nu->tty = tty; - tty->disc_data = nu; skb_queue_head_init(&nu->tx_q); INIT_WORK(&nu->write_work, nci_uart_write_work); spin_lock_init(&nu->rx_lock); ret = nu->ops.open(nu); if (ret) { - tty->disc_data = NULL; kfree(nu); + return ret; } else if (!try_module_get(nu->owner)) { nu->ops.close(nu); - tty->disc_data = NULL; kfree(nu); return -ENOENT; } - return ret; + tty->disc_data = nu; + + return 0; } /* ------ LDISC part ------ */ diff --git a/net/openvswitch/actions.c b/net/openvswitch/actions.c index e7269a3eec79..3add108340bf 100644 --- a/net/openvswitch/actions.c +++ b/net/openvswitch/actions.c @@ -39,16 +39,14 @@ #include "flow_netlink.h" #include "openvswitch_trace.h" -DEFINE_PER_CPU(struct ovs_pcpu_storage, ovs_pcpu_storage) = { - .bh_lock = INIT_LOCAL_LOCK(bh_lock), -}; +struct ovs_pcpu_storage __percpu *ovs_pcpu_storage; /* Make a clone of the 'key', using the pre-allocated percpu 'flow_keys' * space. Return NULL if out of key spaces. */ static struct sw_flow_key *clone_key(const struct sw_flow_key *key_) { - struct ovs_pcpu_storage *ovs_pcpu = this_cpu_ptr(&ovs_pcpu_storage); + struct ovs_pcpu_storage *ovs_pcpu = this_cpu_ptr(ovs_pcpu_storage); struct action_flow_keys *keys = &ovs_pcpu->flow_keys; int level = ovs_pcpu->exec_level; struct sw_flow_key *key = NULL; @@ -94,7 +92,7 @@ static struct deferred_action *add_deferred_actions(struct sk_buff *skb, const struct nlattr *actions, const int actions_len) { - struct action_fifo *fifo = this_cpu_ptr(&ovs_pcpu_storage.action_fifos); + struct action_fifo *fifo = this_cpu_ptr(&ovs_pcpu_storage->action_fifos); struct deferred_action *da; da = action_fifo_put(fifo); @@ -755,7 +753,7 @@ static int set_sctp(struct sk_buff *skb, struct sw_flow_key *flow_key, static int ovs_vport_output(struct net *net, struct sock *sk, struct sk_buff *skb) { - struct ovs_frag_data *data = this_cpu_ptr(&ovs_pcpu_storage.frag_data); + struct ovs_frag_data *data = this_cpu_ptr(&ovs_pcpu_storage->frag_data); struct vport *vport = data->vport; if (skb_cow_head(skb, data->l2_len) < 0) { @@ -807,7 +805,7 @@ static void prepare_frag(struct vport *vport, struct sk_buff *skb, unsigned int hlen = skb_network_offset(skb); struct ovs_frag_data *data; - data = this_cpu_ptr(&ovs_pcpu_storage.frag_data); + data = this_cpu_ptr(&ovs_pcpu_storage->frag_data); data->dst = skb->_skb_refdst; data->vport = vport; data->cb = *OVS_CB(skb); @@ -1566,16 +1564,15 @@ static int clone_execute(struct datapath *dp, struct sk_buff *skb, clone = clone_flow_key ? clone_key(key) : key; if (clone) { int err = 0; - if (actions) { /* Sample action */ if (clone_flow_key) - __this_cpu_inc(ovs_pcpu_storage.exec_level); + __this_cpu_inc(ovs_pcpu_storage->exec_level); err = do_execute_actions(dp, skb, clone, actions, len); if (clone_flow_key) - __this_cpu_dec(ovs_pcpu_storage.exec_level); + __this_cpu_dec(ovs_pcpu_storage->exec_level); } else { /* Recirc action */ clone->recirc_id = recirc_id; ovs_dp_process_packet(skb, clone); @@ -1611,7 +1608,7 @@ static int clone_execute(struct datapath *dp, struct sk_buff *skb, static void process_deferred_actions(struct datapath *dp) { - struct action_fifo *fifo = this_cpu_ptr(&ovs_pcpu_storage.action_fifos); + struct action_fifo *fifo = this_cpu_ptr(&ovs_pcpu_storage->action_fifos); /* Do not touch the FIFO in case there is no deferred actions. */ if (action_fifo_is_empty(fifo)) @@ -1642,7 +1639,7 @@ int ovs_execute_actions(struct datapath *dp, struct sk_buff *skb, { int err, level; - level = __this_cpu_inc_return(ovs_pcpu_storage.exec_level); + level = __this_cpu_inc_return(ovs_pcpu_storage->exec_level); if (unlikely(level > OVS_RECURSION_LIMIT)) { net_crit_ratelimited("ovs: recursion limit reached on datapath %s, probable configuration error\n", ovs_dp_name(dp)); @@ -1659,6 +1656,6 @@ int ovs_execute_actions(struct datapath *dp, struct sk_buff *skb, process_deferred_actions(dp); out: - __this_cpu_dec(ovs_pcpu_storage.exec_level); + __this_cpu_dec(ovs_pcpu_storage->exec_level); return err; } diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c index 6a304ae2d959..b990dc83504f 100644 --- a/net/openvswitch/datapath.c +++ b/net/openvswitch/datapath.c @@ -244,7 +244,7 @@ void ovs_dp_detach_port(struct vport *p) /* Must be called with rcu_read_lock. */ void ovs_dp_process_packet(struct sk_buff *skb, struct sw_flow_key *key) { - struct ovs_pcpu_storage *ovs_pcpu = this_cpu_ptr(&ovs_pcpu_storage); + struct ovs_pcpu_storage *ovs_pcpu = this_cpu_ptr(ovs_pcpu_storage); const struct vport *p = OVS_CB(skb)->input_vport; struct datapath *dp = p->dp; struct sw_flow *flow; @@ -299,7 +299,7 @@ void ovs_dp_process_packet(struct sk_buff *skb, struct sw_flow_key *key) * avoided. */ if (IS_ENABLED(CONFIG_PREEMPT_RT) && ovs_pcpu->owner != current) { - local_lock_nested_bh(&ovs_pcpu_storage.bh_lock); + local_lock_nested_bh(&ovs_pcpu_storage->bh_lock); ovs_pcpu->owner = current; ovs_pcpu_locked = true; } @@ -310,7 +310,7 @@ void ovs_dp_process_packet(struct sk_buff *skb, struct sw_flow_key *key) ovs_dp_name(dp), error); if (ovs_pcpu_locked) { ovs_pcpu->owner = NULL; - local_unlock_nested_bh(&ovs_pcpu_storage.bh_lock); + local_unlock_nested_bh(&ovs_pcpu_storage->bh_lock); } stats_counter = &stats->n_hit; @@ -689,13 +689,13 @@ static int ovs_packet_cmd_execute(struct sk_buff *skb, struct genl_info *info) sf_acts = rcu_dereference(flow->sf_acts); local_bh_disable(); - local_lock_nested_bh(&ovs_pcpu_storage.bh_lock); + local_lock_nested_bh(&ovs_pcpu_storage->bh_lock); if (IS_ENABLED(CONFIG_PREEMPT_RT)) - this_cpu_write(ovs_pcpu_storage.owner, current); + this_cpu_write(ovs_pcpu_storage->owner, current); err = ovs_execute_actions(dp, packet, sf_acts, &flow->key); if (IS_ENABLED(CONFIG_PREEMPT_RT)) - this_cpu_write(ovs_pcpu_storage.owner, NULL); - local_unlock_nested_bh(&ovs_pcpu_storage.bh_lock); + this_cpu_write(ovs_pcpu_storage->owner, NULL); + local_unlock_nested_bh(&ovs_pcpu_storage->bh_lock); local_bh_enable(); rcu_read_unlock(); @@ -2744,6 +2744,28 @@ static struct drop_reason_list drop_reason_list_ovs = { .n_reasons = ARRAY_SIZE(ovs_drop_reasons), }; +static int __init ovs_alloc_percpu_storage(void) +{ + unsigned int cpu; + + ovs_pcpu_storage = alloc_percpu(*ovs_pcpu_storage); + if (!ovs_pcpu_storage) + return -ENOMEM; + + for_each_possible_cpu(cpu) { + struct ovs_pcpu_storage *ovs_pcpu; + + ovs_pcpu = per_cpu_ptr(ovs_pcpu_storage, cpu); + local_lock_init(&ovs_pcpu->bh_lock); + } + return 0; +} + +static void ovs_free_percpu_storage(void) +{ + free_percpu(ovs_pcpu_storage); +} + static int __init dp_init(void) { int err; @@ -2753,6 +2775,10 @@ static int __init dp_init(void) pr_info("Open vSwitch switching datapath\n"); + err = ovs_alloc_percpu_storage(); + if (err) + goto error; + err = ovs_internal_dev_rtnl_link_register(); if (err) goto error; @@ -2799,6 +2825,7 @@ error_flow_exit: error_unreg_rtnl_link: ovs_internal_dev_rtnl_link_unregister(); error: + ovs_free_percpu_storage(); return err; } @@ -2813,6 +2840,7 @@ static void dp_cleanup(void) ovs_vport_exit(); ovs_flow_exit(); ovs_internal_dev_rtnl_link_unregister(); + ovs_free_percpu_storage(); } module_init(dp_init); diff --git a/net/openvswitch/datapath.h b/net/openvswitch/datapath.h index 1b5348b0f559..cfeb817a1889 100644 --- a/net/openvswitch/datapath.h +++ b/net/openvswitch/datapath.h @@ -220,7 +220,8 @@ struct ovs_pcpu_storage { struct task_struct *owner; local_lock_t bh_lock; }; -DECLARE_PER_CPU(struct ovs_pcpu_storage, ovs_pcpu_storage); + +extern struct ovs_pcpu_storage __percpu *ovs_pcpu_storage; /** * enum ovs_pkt_hash_types - hash info to include with a packet diff --git a/net/sched/sch_ets.c b/net/sched/sch_ets.c index 2c069f0181c6..037f764822b9 100644 --- a/net/sched/sch_ets.c +++ b/net/sched/sch_ets.c @@ -661,7 +661,7 @@ static int ets_qdisc_change(struct Qdisc *sch, struct nlattr *opt, for (i = q->nbands; i < oldbands; i++) { if (i >= q->nstrict && q->classes[i].qdisc->q.qlen) list_del_init(&q->classes[i].alist); - qdisc_tree_flush_backlog(q->classes[i].qdisc); + qdisc_purge_queue(q->classes[i].qdisc); } WRITE_ONCE(q->nstrict, nstrict); memcpy(q->prio2band, priomap, sizeof(priomap)); diff --git a/net/sched/sch_prio.c b/net/sched/sch_prio.c index cc30f7a32f1a..9e2b9a490db2 100644 --- a/net/sched/sch_prio.c +++ b/net/sched/sch_prio.c @@ -211,7 +211,7 @@ static int prio_tune(struct Qdisc *sch, struct nlattr *opt, memcpy(q->prio2band, qopt->priomap, TC_PRIO_MAX+1); for (i = q->bands; i < oldbands; i++) - qdisc_tree_flush_backlog(q->queues[i]); + qdisc_purge_queue(q->queues[i]); for (i = oldbands; i < q->bands; i++) { q->queues[i] = queues[i]; diff --git a/net/sched/sch_red.c b/net/sched/sch_red.c index 339d70b4a4c5..479c42d11083 100644 --- a/net/sched/sch_red.c +++ b/net/sched/sch_red.c @@ -285,7 +285,7 @@ static int __red_change(struct Qdisc *sch, struct nlattr **tb, q->userbits = userbits; q->limit = ctl->limit; if (child) { - qdisc_tree_flush_backlog(q->qdisc); + qdisc_purge_queue(q->qdisc); old_child = q->qdisc; q->qdisc = child; } diff --git a/net/sched/sch_sfq.c b/net/sched/sch_sfq.c index a8081492d671..96eb2f122973 100644 --- a/net/sched/sch_sfq.c +++ b/net/sched/sch_sfq.c @@ -310,7 +310,10 @@ drop: /* It is difficult to believe, but ALL THE SLOTS HAVE LENGTH 1. */ x = q->tail->next; slot = &q->slots[x]; - q->tail->next = slot->next; + if (slot->next == x) + q->tail = NULL; /* no more active slots */ + else + q->tail->next = slot->next; q->ht[slot->hash] = SFQ_EMPTY_SLOT; goto drop; } @@ -653,6 +656,14 @@ static int sfq_change(struct Qdisc *sch, struct nlattr *opt, NL_SET_ERR_MSG_MOD(extack, "invalid quantum"); return -EINVAL; } + + if (ctl->perturb_period < 0 || + ctl->perturb_period > INT_MAX / HZ) { + NL_SET_ERR_MSG_MOD(extack, "invalid perturb period"); + return -EINVAL; + } + perturb_period = ctl->perturb_period * HZ; + if (ctl_v1 && !red_check_params(ctl_v1->qth_min, ctl_v1->qth_max, ctl_v1->Wlog, ctl_v1->Scell_log, NULL)) return -EINVAL; @@ -669,14 +680,12 @@ static int sfq_change(struct Qdisc *sch, struct nlattr *opt, headdrop = q->headdrop; maxdepth = q->maxdepth; maxflows = q->maxflows; - perturb_period = q->perturb_period; quantum = q->quantum; flags = q->flags; /* update and validate configuration */ if (ctl->quantum) quantum = ctl->quantum; - perturb_period = ctl->perturb_period * HZ; if (ctl->flows) maxflows = min_t(u32, ctl->flows, SFQ_MAX_FLOWS); if (ctl->divisor) { diff --git a/net/sched/sch_taprio.c b/net/sched/sch_taprio.c index 14021b812329..2b14c81a87e5 100644 --- a/net/sched/sch_taprio.c +++ b/net/sched/sch_taprio.c @@ -1328,13 +1328,15 @@ static int taprio_dev_notifier(struct notifier_block *nb, unsigned long event, stab = rtnl_dereference(q->root->stab); - oper = rtnl_dereference(q->oper_sched); + rcu_read_lock(); + oper = rcu_dereference(q->oper_sched); if (oper) taprio_update_queue_max_sdu(q, oper, stab); - admin = rtnl_dereference(q->admin_sched); + admin = rcu_dereference(q->admin_sched); if (admin) taprio_update_queue_max_sdu(q, admin, stab); + rcu_read_unlock(); break; } diff --git a/net/sched/sch_tbf.c b/net/sched/sch_tbf.c index dc26b22d53c7..4c977f049670 100644 --- a/net/sched/sch_tbf.c +++ b/net/sched/sch_tbf.c @@ -452,7 +452,7 @@ static int tbf_change(struct Qdisc *sch, struct nlattr *opt, sch_tree_lock(sch); if (child) { - qdisc_tree_flush_backlog(q->qdisc); + qdisc_purge_queue(q->qdisc); old = q->qdisc; q->qdisc = child; } diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c index 939b6239df8a..9c93b854e809 100644 --- a/net/sunrpc/svc.c +++ b/net/sunrpc/svc.c @@ -638,8 +638,6 @@ EXPORT_SYMBOL_GPL(svc_destroy); static bool svc_init_buffer(struct svc_rqst *rqstp, const struct svc_serv *serv, int node) { - unsigned long ret; - rqstp->rq_maxpages = svc_serv_maxpages(serv); /* rq_pages' last entry is NULL for historical reasons. */ @@ -649,9 +647,7 @@ svc_init_buffer(struct svc_rqst *rqstp, const struct svc_serv *serv, int node) if (!rqstp->rq_pages) return false; - ret = alloc_pages_bulk_node(GFP_KERNEL, node, rqstp->rq_maxpages, - rqstp->rq_pages); - return ret == rqstp->rq_maxpages; + return true; } /* @@ -1375,7 +1371,8 @@ svc_process_common(struct svc_rqst *rqstp) case SVC_OK: break; case SVC_GARBAGE: - goto err_garbage_args; + rqstp->rq_auth_stat = rpc_autherr_badcred; + goto err_bad_auth; case SVC_SYSERR: goto err_system_err; case SVC_DENIED: @@ -1516,14 +1513,6 @@ err_bad_proc: *rqstp->rq_accept_statp = rpc_proc_unavail; goto sendit; -err_garbage_args: - svc_printk(rqstp, "failed to decode RPC header\n"); - - if (serv->sv_stats) - serv->sv_stats->rpcbadfmt++; - *rqstp->rq_accept_statp = rpc_garbage_args; - goto sendit; - err_system_err: if (serv->sv_stats) serv->sv_stats->rpcbadfmt++; diff --git a/net/tipc/udp_media.c b/net/tipc/udp_media.c index 108a4cc2e001..258d6aa4f21a 100644 --- a/net/tipc/udp_media.c +++ b/net/tipc/udp_media.c @@ -489,7 +489,7 @@ int tipc_udp_nl_dump_remoteip(struct sk_buff *skb, struct netlink_callback *cb) rtnl_lock(); b = tipc_bearer_find(net, bname); - if (!b) { + if (!b || b->bcast_addr.media_id != TIPC_MEDIA_TYPE_UDP) { rtnl_unlock(); return -EINVAL; } @@ -500,7 +500,7 @@ int tipc_udp_nl_dump_remoteip(struct sk_buff *skb, struct netlink_callback *cb) rtnl_lock(); b = rtnl_dereference(tn->bearer_list[bid]); - if (!b) { + if (!b || b->bcast_addr.media_id != TIPC_MEDIA_TYPE_UDP) { rtnl_unlock(); return -EINVAL; } diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index 2e2e9997a68e..52b155123985 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -660,6 +660,11 @@ static void unix_sock_destructor(struct sock *sk) #endif } +static unsigned int unix_skb_len(const struct sk_buff *skb) +{ + return skb->len - UNIXCB(skb).consumed; +} + static void unix_release_sock(struct sock *sk, int embrion) { struct unix_sock *u = unix_sk(sk); @@ -694,10 +699,16 @@ static void unix_release_sock(struct sock *sk, int embrion) if (skpair != NULL) { if (sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET) { + struct sk_buff *skb = skb_peek(&sk->sk_receive_queue); + +#if IS_ENABLED(CONFIG_AF_UNIX_OOB) + if (skb && !unix_skb_len(skb)) + skb = skb_peek_next(skb, &sk->sk_receive_queue); +#endif unix_state_lock(skpair); /* No more writes */ WRITE_ONCE(skpair->sk_shutdown, SHUTDOWN_MASK); - if (!skb_queue_empty_lockless(&sk->sk_receive_queue) || embrion) + if (skb || embrion) WRITE_ONCE(skpair->sk_err, ECONNRESET); unix_state_unlock(skpair); skpair->sk_state_change(skpair); @@ -1971,7 +1982,8 @@ static void unix_maybe_add_creds(struct sk_buff *skb, const struct sock *sk, if (UNIXCB(skb).pid) return; - if (unix_may_passcred(sk) || unix_may_passcred(other)) { + if (unix_may_passcred(sk) || unix_may_passcred(other) || + !other->sk_socket) { UNIXCB(skb).pid = get_pid(task_tgid(current)); current_uid_gid(&UNIXCB(skb).uid, &UNIXCB(skb).gid); } @@ -2660,11 +2672,6 @@ static long unix_stream_data_wait(struct sock *sk, long timeo, return timeo; } -static unsigned int unix_skb_len(const struct sk_buff *skb) -{ - return skb->len - UNIXCB(skb).consumed; -} - struct unix_stream_read_state { int (*recv_actor)(struct sk_buff *, int, int, struct unix_stream_read_state *); @@ -2679,11 +2686,11 @@ struct unix_stream_read_state { #if IS_ENABLED(CONFIG_AF_UNIX_OOB) static int unix_stream_recv_urg(struct unix_stream_read_state *state) { + struct sk_buff *oob_skb, *read_skb = NULL; struct socket *sock = state->socket; struct sock *sk = sock->sk; struct unix_sock *u = unix_sk(sk); int chunk = 1; - struct sk_buff *oob_skb; mutex_lock(&u->iolock); unix_state_lock(sk); @@ -2698,9 +2705,16 @@ static int unix_stream_recv_urg(struct unix_stream_read_state *state) oob_skb = u->oob_skb; - if (!(state->flags & MSG_PEEK)) + if (!(state->flags & MSG_PEEK)) { WRITE_ONCE(u->oob_skb, NULL); + if (oob_skb->prev != (struct sk_buff *)&sk->sk_receive_queue && + !unix_skb_len(oob_skb->prev)) { + read_skb = oob_skb->prev; + __skb_unlink(read_skb, &sk->sk_receive_queue); + } + } + spin_unlock(&sk->sk_receive_queue.lock); unix_state_unlock(sk); @@ -2711,6 +2725,8 @@ static int unix_stream_recv_urg(struct unix_stream_read_state *state) mutex_unlock(&u->iolock); + consume_skb(read_skb); + if (chunk < 0) return -EFAULT; diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index fd5f79266471..85f139016da2 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -1583,7 +1583,7 @@ nl80211_parse_connkeys(struct cfg80211_registered_device *rdev, return result; error: - kfree(result); + kfree_sensitive(result); return ERR_PTR(err); } diff --git a/rust/bindings/bindings_helper.h b/rust/bindings/bindings_helper.h index bc494745f67b..8cbb660e2ec2 100644 --- a/rust/bindings/bindings_helper.h +++ b/rust/bindings/bindings_helper.h @@ -39,6 +39,7 @@ #include <linux/blk_types.h> #include <linux/blkdev.h> #include <linux/clk.h> +#include <linux/completion.h> #include <linux/configfs.h> #include <linux/cpu.h> #include <linux/cpufreq.h> diff --git a/rust/helpers/completion.c b/rust/helpers/completion.c new file mode 100644 index 000000000000..b2443262a2ae --- /dev/null +++ b/rust/helpers/completion.c @@ -0,0 +1,8 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include <linux/completion.h> + +void rust_helper_init_completion(struct completion *x) +{ + init_completion(x); +} diff --git a/rust/helpers/cpu.c b/rust/helpers/cpu.c new file mode 100644 index 000000000000..824e0adb19d4 --- /dev/null +++ b/rust/helpers/cpu.c @@ -0,0 +1,8 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include <linux/smp.h> + +unsigned int rust_helper_raw_smp_processor_id(void) +{ + return raw_smp_processor_id(); +} diff --git a/rust/helpers/helpers.c b/rust/helpers/helpers.c index 0f1b5d115985..b15b3cddad4e 100644 --- a/rust/helpers/helpers.c +++ b/rust/helpers/helpers.c @@ -13,6 +13,8 @@ #include "build_assert.c" #include "build_bug.c" #include "clk.c" +#include "completion.c" +#include "cpu.c" #include "cpufreq.c" #include "cpumask.c" #include "cred.c" diff --git a/rust/kernel/cpu.rs b/rust/kernel/cpu.rs index 10c5c3b25873..b75403b0eb56 100644 --- a/rust/kernel/cpu.rs +++ b/rust/kernel/cpu.rs @@ -6,6 +6,127 @@ use crate::{bindings, device::Device, error::Result, prelude::ENODEV}; +/// Returns the maximum number of possible CPUs in the current system configuration. +#[inline] +pub fn nr_cpu_ids() -> u32 { + #[cfg(any(NR_CPUS_1, CONFIG_FORCE_NR_CPUS))] + { + bindings::NR_CPUS + } + + #[cfg(not(any(NR_CPUS_1, CONFIG_FORCE_NR_CPUS)))] + // SAFETY: `nr_cpu_ids` is a valid global provided by the kernel. + unsafe { + bindings::nr_cpu_ids + } +} + +/// The CPU ID. +/// +/// Represents a CPU identifier as a wrapper around an [`u32`]. +/// +/// # Invariants +/// +/// The CPU ID lies within the range `[0, nr_cpu_ids())`. +/// +/// # Examples +/// +/// ``` +/// use kernel::cpu::CpuId; +/// +/// let cpu = 0; +/// +/// // SAFETY: 0 is always a valid CPU number. +/// let id = unsafe { CpuId::from_u32_unchecked(cpu) }; +/// +/// assert_eq!(id.as_u32(), cpu); +/// assert!(CpuId::from_i32(0).is_some()); +/// assert!(CpuId::from_i32(-1).is_none()); +/// ``` +#[derive(Copy, Clone, PartialEq, Eq, Debug)] +pub struct CpuId(u32); + +impl CpuId { + /// Creates a new [`CpuId`] from the given `id` without checking bounds. + /// + /// # Safety + /// + /// The caller must ensure that `id` is a valid CPU ID (i.e., `0 <= id < nr_cpu_ids()`). + #[inline] + pub unsafe fn from_i32_unchecked(id: i32) -> Self { + debug_assert!(id >= 0); + debug_assert!((id as u32) < nr_cpu_ids()); + + // INVARIANT: The function safety guarantees `id` is a valid CPU id. + Self(id as u32) + } + + /// Creates a new [`CpuId`] from the given `id`, checking that it is valid. + pub fn from_i32(id: i32) -> Option<Self> { + if id < 0 || id as u32 >= nr_cpu_ids() { + None + } else { + // INVARIANT: `id` has just been checked as a valid CPU ID. + Some(Self(id as u32)) + } + } + + /// Creates a new [`CpuId`] from the given `id` without checking bounds. + /// + /// # Safety + /// + /// The caller must ensure that `id` is a valid CPU ID (i.e., `0 <= id < nr_cpu_ids()`). + #[inline] + pub unsafe fn from_u32_unchecked(id: u32) -> Self { + debug_assert!(id < nr_cpu_ids()); + + // Ensure the `id` fits in an [`i32`] as it's also representable that way. + debug_assert!(id <= i32::MAX as u32); + + // INVARIANT: The function safety guarantees `id` is a valid CPU id. + Self(id) + } + + /// Creates a new [`CpuId`] from the given `id`, checking that it is valid. + pub fn from_u32(id: u32) -> Option<Self> { + if id >= nr_cpu_ids() { + None + } else { + // INVARIANT: `id` has just been checked as a valid CPU ID. + Some(Self(id)) + } + } + + /// Returns CPU number. + #[inline] + pub fn as_u32(&self) -> u32 { + self.0 + } + + /// Returns the ID of the CPU the code is currently running on. + /// + /// The returned value is considered unstable because it may change + /// unexpectedly due to preemption or CPU migration. It should only be + /// used when the context ensures that the task remains on the same CPU + /// or the users could use a stale (yet valid) CPU ID. + pub fn current() -> Self { + // SAFETY: raw_smp_processor_id() always returns a valid CPU ID. + unsafe { Self::from_u32_unchecked(bindings::raw_smp_processor_id()) } + } +} + +impl From<CpuId> for u32 { + fn from(id: CpuId) -> Self { + id.as_u32() + } +} + +impl From<CpuId> for i32 { + fn from(id: CpuId) -> Self { + id.as_u32() as i32 + } +} + /// Creates a new instance of CPU's device. /// /// # Safety @@ -17,9 +138,9 @@ use crate::{bindings, device::Device, error::Result, prelude::ENODEV}; /// Callers must ensure that the CPU device is not used after it has been unregistered. /// This can be achieved, for example, by registering a CPU hotplug notifier and removing /// any references to the CPU device within the notifier's callback. -pub unsafe fn from_cpu(cpu: u32) -> Result<&'static Device> { +pub unsafe fn from_cpu(cpu: CpuId) -> Result<&'static Device> { // SAFETY: It is safe to call `get_cpu_device()` for any CPU. - let ptr = unsafe { bindings::get_cpu_device(cpu) }; + let ptr = unsafe { bindings::get_cpu_device(u32::from(cpu)) }; if ptr.is_null() { return Err(ENODEV); } diff --git a/rust/kernel/cpufreq.rs b/rust/kernel/cpufreq.rs index b0a9c6182aec..11b03e9d7e89 100644 --- a/rust/kernel/cpufreq.rs +++ b/rust/kernel/cpufreq.rs @@ -10,6 +10,7 @@ use crate::{ clk::Hertz, + cpu::CpuId, cpumask, device::{Bound, Device}, devres::Devres, @@ -465,8 +466,9 @@ impl Policy { /// Returns the primary CPU for the [`Policy`]. #[inline] - pub fn cpu(&self) -> u32 { - self.as_ref().cpu + pub fn cpu(&self) -> CpuId { + // SAFETY: The C API guarantees that `cpu` refers to a valid CPU number. + unsafe { CpuId::from_u32_unchecked(self.as_ref().cpu) } } /// Returns the minimum frequency for the [`Policy`]. @@ -525,7 +527,7 @@ impl Policy { #[inline] pub fn generic_get(&self) -> Result<u32> { // SAFETY: By the type invariant, the pointer stored in `self` is valid. - Ok(unsafe { bindings::cpufreq_generic_get(self.cpu()) }) + Ok(unsafe { bindings::cpufreq_generic_get(u32::from(self.cpu())) }) } /// Provides a wrapper to the register with energy model using the OPP core. @@ -678,9 +680,9 @@ impl Policy { struct PolicyCpu<'a>(&'a mut Policy); impl<'a> PolicyCpu<'a> { - fn from_cpu(cpu: u32) -> Result<Self> { + fn from_cpu(cpu: CpuId) -> Result<Self> { // SAFETY: It is safe to call `cpufreq_cpu_get` for any valid CPU. - let ptr = from_err_ptr(unsafe { bindings::cpufreq_cpu_get(cpu) })?; + let ptr = from_err_ptr(unsafe { bindings::cpufreq_cpu_get(u32::from(cpu)) })?; Ok(Self( // SAFETY: The `ptr` is guaranteed to be valid and remains valid for the lifetime of @@ -1055,8 +1057,11 @@ impl<T: Driver> Registration<T> { impl<T: Driver> Registration<T> { /// Driver's `init` callback. /// - /// SAFETY: Called from C. Inputs must be valid pointers. - extern "C" fn init_callback(ptr: *mut bindings::cpufreq_policy) -> kernel::ffi::c_int { + /// # Safety + /// + /// - This function may only be called from the cpufreq C infrastructure. + /// - The pointer arguments must be valid pointers. + unsafe extern "C" fn init_callback(ptr: *mut bindings::cpufreq_policy) -> kernel::ffi::c_int { from_result(|| { // SAFETY: The `ptr` is guaranteed to be valid by the contract with the C code for the // lifetime of `policy`. @@ -1070,8 +1075,11 @@ impl<T: Driver> Registration<T> { /// Driver's `exit` callback. /// - /// SAFETY: Called from C. Inputs must be valid pointers. - extern "C" fn exit_callback(ptr: *mut bindings::cpufreq_policy) { + /// # Safety + /// + /// - This function may only be called from the cpufreq C infrastructure. + /// - The pointer arguments must be valid pointers. + unsafe extern "C" fn exit_callback(ptr: *mut bindings::cpufreq_policy) { // SAFETY: The `ptr` is guaranteed to be valid by the contract with the C code for the // lifetime of `policy`. let policy = unsafe { Policy::from_raw_mut(ptr) }; @@ -1082,8 +1090,11 @@ impl<T: Driver> Registration<T> { /// Driver's `online` callback. /// - /// SAFETY: Called from C. Inputs must be valid pointers. - extern "C" fn online_callback(ptr: *mut bindings::cpufreq_policy) -> kernel::ffi::c_int { + /// # Safety + /// + /// - This function may only be called from the cpufreq C infrastructure. + /// - The pointer arguments must be valid pointers. + unsafe extern "C" fn online_callback(ptr: *mut bindings::cpufreq_policy) -> kernel::ffi::c_int { from_result(|| { // SAFETY: The `ptr` is guaranteed to be valid by the contract with the C code for the // lifetime of `policy`. @@ -1094,8 +1105,13 @@ impl<T: Driver> Registration<T> { /// Driver's `offline` callback. /// - /// SAFETY: Called from C. Inputs must be valid pointers. - extern "C" fn offline_callback(ptr: *mut bindings::cpufreq_policy) -> kernel::ffi::c_int { + /// # Safety + /// + /// - This function may only be called from the cpufreq C infrastructure. + /// - The pointer arguments must be valid pointers. + unsafe extern "C" fn offline_callback( + ptr: *mut bindings::cpufreq_policy, + ) -> kernel::ffi::c_int { from_result(|| { // SAFETY: The `ptr` is guaranteed to be valid by the contract with the C code for the // lifetime of `policy`. @@ -1106,8 +1122,13 @@ impl<T: Driver> Registration<T> { /// Driver's `suspend` callback. /// - /// SAFETY: Called from C. Inputs must be valid pointers. - extern "C" fn suspend_callback(ptr: *mut bindings::cpufreq_policy) -> kernel::ffi::c_int { + /// # Safety + /// + /// - This function may only be called from the cpufreq C infrastructure. + /// - The pointer arguments must be valid pointers. + unsafe extern "C" fn suspend_callback( + ptr: *mut bindings::cpufreq_policy, + ) -> kernel::ffi::c_int { from_result(|| { // SAFETY: The `ptr` is guaranteed to be valid by the contract with the C code for the // lifetime of `policy`. @@ -1118,8 +1139,11 @@ impl<T: Driver> Registration<T> { /// Driver's `resume` callback. /// - /// SAFETY: Called from C. Inputs must be valid pointers. - extern "C" fn resume_callback(ptr: *mut bindings::cpufreq_policy) -> kernel::ffi::c_int { + /// # Safety + /// + /// - This function may only be called from the cpufreq C infrastructure. + /// - The pointer arguments must be valid pointers. + unsafe extern "C" fn resume_callback(ptr: *mut bindings::cpufreq_policy) -> kernel::ffi::c_int { from_result(|| { // SAFETY: The `ptr` is guaranteed to be valid by the contract with the C code for the // lifetime of `policy`. @@ -1130,8 +1154,11 @@ impl<T: Driver> Registration<T> { /// Driver's `ready` callback. /// - /// SAFETY: Called from C. Inputs must be valid pointers. - extern "C" fn ready_callback(ptr: *mut bindings::cpufreq_policy) { + /// # Safety + /// + /// - This function may only be called from the cpufreq C infrastructure. + /// - The pointer arguments must be valid pointers. + unsafe extern "C" fn ready_callback(ptr: *mut bindings::cpufreq_policy) { // SAFETY: The `ptr` is guaranteed to be valid by the contract with the C code for the // lifetime of `policy`. let policy = unsafe { Policy::from_raw_mut(ptr) }; @@ -1140,8 +1167,13 @@ impl<T: Driver> Registration<T> { /// Driver's `verify` callback. /// - /// SAFETY: Called from C. Inputs must be valid pointers. - extern "C" fn verify_callback(ptr: *mut bindings::cpufreq_policy_data) -> kernel::ffi::c_int { + /// # Safety + /// + /// - This function may only be called from the cpufreq C infrastructure. + /// - The pointer arguments must be valid pointers. + unsafe extern "C" fn verify_callback( + ptr: *mut bindings::cpufreq_policy_data, + ) -> kernel::ffi::c_int { from_result(|| { // SAFETY: The `ptr` is guaranteed to be valid by the contract with the C code for the // lifetime of `policy`. @@ -1152,8 +1184,13 @@ impl<T: Driver> Registration<T> { /// Driver's `setpolicy` callback. /// - /// SAFETY: Called from C. Inputs must be valid pointers. - extern "C" fn setpolicy_callback(ptr: *mut bindings::cpufreq_policy) -> kernel::ffi::c_int { + /// # Safety + /// + /// - This function may only be called from the cpufreq C infrastructure. + /// - The pointer arguments must be valid pointers. + unsafe extern "C" fn setpolicy_callback( + ptr: *mut bindings::cpufreq_policy, + ) -> kernel::ffi::c_int { from_result(|| { // SAFETY: The `ptr` is guaranteed to be valid by the contract with the C code for the // lifetime of `policy`. @@ -1164,8 +1201,11 @@ impl<T: Driver> Registration<T> { /// Driver's `target` callback. /// - /// SAFETY: Called from C. Inputs must be valid pointers. - extern "C" fn target_callback( + /// # Safety + /// + /// - This function may only be called from the cpufreq C infrastructure. + /// - The pointer arguments must be valid pointers. + unsafe extern "C" fn target_callback( ptr: *mut bindings::cpufreq_policy, target_freq: u32, relation: u32, @@ -1180,8 +1220,11 @@ impl<T: Driver> Registration<T> { /// Driver's `target_index` callback. /// - /// SAFETY: Called from C. Inputs must be valid pointers. - extern "C" fn target_index_callback( + /// # Safety + /// + /// - This function may only be called from the cpufreq C infrastructure. + /// - The pointer arguments must be valid pointers. + unsafe extern "C" fn target_index_callback( ptr: *mut bindings::cpufreq_policy, index: u32, ) -> kernel::ffi::c_int { @@ -1200,8 +1243,11 @@ impl<T: Driver> Registration<T> { /// Driver's `fast_switch` callback. /// - /// SAFETY: Called from C. Inputs must be valid pointers. - extern "C" fn fast_switch_callback( + /// # Safety + /// + /// - This function may only be called from the cpufreq C infrastructure. + /// - The pointer arguments must be valid pointers. + unsafe extern "C" fn fast_switch_callback( ptr: *mut bindings::cpufreq_policy, target_freq: u32, ) -> kernel::ffi::c_uint { @@ -1212,21 +1258,31 @@ impl<T: Driver> Registration<T> { } /// Driver's `adjust_perf` callback. - extern "C" fn adjust_perf_callback( + /// + /// # Safety + /// + /// - This function may only be called from the cpufreq C infrastructure. + unsafe extern "C" fn adjust_perf_callback( cpu: u32, min_perf: usize, target_perf: usize, capacity: usize, ) { - if let Ok(mut policy) = PolicyCpu::from_cpu(cpu) { + // SAFETY: The C API guarantees that `cpu` refers to a valid CPU number. + let cpu_id = unsafe { CpuId::from_u32_unchecked(cpu) }; + + if let Ok(mut policy) = PolicyCpu::from_cpu(cpu_id) { T::adjust_perf(&mut policy, min_perf, target_perf, capacity); } } /// Driver's `get_intermediate` callback. /// - /// SAFETY: Called from C. Inputs must be valid pointers. - extern "C" fn get_intermediate_callback( + /// # Safety + /// + /// - This function may only be called from the cpufreq C infrastructure. + /// - The pointer arguments must be valid pointers. + unsafe extern "C" fn get_intermediate_callback( ptr: *mut bindings::cpufreq_policy, index: u32, ) -> kernel::ffi::c_uint { @@ -1243,8 +1299,11 @@ impl<T: Driver> Registration<T> { /// Driver's `target_intermediate` callback. /// - /// SAFETY: Called from C. Inputs must be valid pointers. - extern "C" fn target_intermediate_callback( + /// # Safety + /// + /// - This function may only be called from the cpufreq C infrastructure. + /// - The pointer arguments must be valid pointers. + unsafe extern "C" fn target_intermediate_callback( ptr: *mut bindings::cpufreq_policy, index: u32, ) -> kernel::ffi::c_int { @@ -1262,12 +1321,24 @@ impl<T: Driver> Registration<T> { } /// Driver's `get` callback. - extern "C" fn get_callback(cpu: u32) -> kernel::ffi::c_uint { - PolicyCpu::from_cpu(cpu).map_or(0, |mut policy| T::get(&mut policy).map_or(0, |f| f)) + /// + /// # Safety + /// + /// - This function may only be called from the cpufreq C infrastructure. + unsafe extern "C" fn get_callback(cpu: u32) -> kernel::ffi::c_uint { + // SAFETY: The C API guarantees that `cpu` refers to a valid CPU number. + let cpu_id = unsafe { CpuId::from_u32_unchecked(cpu) }; + + PolicyCpu::from_cpu(cpu_id).map_or(0, |mut policy| T::get(&mut policy).map_or(0, |f| f)) } /// Driver's `update_limit` callback. - extern "C" fn update_limits_callback(ptr: *mut bindings::cpufreq_policy) { + /// + /// # Safety + /// + /// - This function may only be called from the cpufreq C infrastructure. + /// - The pointer arguments must be valid pointers. + unsafe extern "C" fn update_limits_callback(ptr: *mut bindings::cpufreq_policy) { // SAFETY: The `ptr` is guaranteed to be valid by the contract with the C code for the // lifetime of `policy`. let policy = unsafe { Policy::from_raw_mut(ptr) }; @@ -1276,10 +1347,16 @@ impl<T: Driver> Registration<T> { /// Driver's `bios_limit` callback. /// - /// SAFETY: Called from C. Inputs must be valid pointers. - extern "C" fn bios_limit_callback(cpu: i32, limit: *mut u32) -> kernel::ffi::c_int { + /// # Safety + /// + /// - This function may only be called from the cpufreq C infrastructure. + /// - The pointer arguments must be valid pointers. + unsafe extern "C" fn bios_limit_callback(cpu: i32, limit: *mut u32) -> kernel::ffi::c_int { + // SAFETY: The C API guarantees that `cpu` refers to a valid CPU number. + let cpu_id = unsafe { CpuId::from_i32_unchecked(cpu) }; + from_result(|| { - let mut policy = PolicyCpu::from_cpu(cpu as u32)?; + let mut policy = PolicyCpu::from_cpu(cpu_id)?; // SAFETY: `limit` is guaranteed by the C code to be valid. T::bios_limit(&mut policy, &mut (unsafe { *limit })).map(|()| 0) @@ -1288,8 +1365,11 @@ impl<T: Driver> Registration<T> { /// Driver's `set_boost` callback. /// - /// SAFETY: Called from C. Inputs must be valid pointers. - extern "C" fn set_boost_callback( + /// # Safety + /// + /// - This function may only be called from the cpufreq C infrastructure. + /// - The pointer arguments must be valid pointers. + unsafe extern "C" fn set_boost_callback( ptr: *mut bindings::cpufreq_policy, state: i32, ) -> kernel::ffi::c_int { @@ -1303,8 +1383,11 @@ impl<T: Driver> Registration<T> { /// Driver's `register_em` callback. /// - /// SAFETY: Called from C. Inputs must be valid pointers. - extern "C" fn register_em_callback(ptr: *mut bindings::cpufreq_policy) { + /// # Safety + /// + /// - This function may only be called from the cpufreq C infrastructure. + /// - The pointer arguments must be valid pointers. + unsafe extern "C" fn register_em_callback(ptr: *mut bindings::cpufreq_policy) { // SAFETY: The `ptr` is guaranteed to be valid by the contract with the C code for the // lifetime of `policy`. let policy = unsafe { Policy::from_raw_mut(ptr) }; diff --git a/rust/kernel/cpumask.rs b/rust/kernel/cpumask.rs index c90bfac9346a..19c607709b5f 100644 --- a/rust/kernel/cpumask.rs +++ b/rust/kernel/cpumask.rs @@ -6,6 +6,7 @@ use crate::{ alloc::{AllocError, Flags}, + cpu::CpuId, prelude::*, types::Opaque, }; @@ -35,9 +36,10 @@ use core::ops::{Deref, DerefMut}; /// /// ``` /// use kernel::bindings; +/// use kernel::cpu::CpuId; /// use kernel::cpumask::Cpumask; /// -/// fn set_clear_cpu(ptr: *mut bindings::cpumask, set_cpu: u32, clear_cpu: i32) { +/// fn set_clear_cpu(ptr: *mut bindings::cpumask, set_cpu: CpuId, clear_cpu: CpuId) { /// // SAFETY: The `ptr` is valid for writing and remains valid for the lifetime of the /// // returned reference. /// let mask = unsafe { Cpumask::as_mut_ref(ptr) }; @@ -90,9 +92,9 @@ impl Cpumask { /// This mismatches kernel naming convention and corresponds to the C /// function `__cpumask_set_cpu()`. #[inline] - pub fn set(&mut self, cpu: u32) { + pub fn set(&mut self, cpu: CpuId) { // SAFETY: By the type invariant, `self.as_raw` is a valid argument to `__cpumask_set_cpu`. - unsafe { bindings::__cpumask_set_cpu(cpu, self.as_raw()) }; + unsafe { bindings::__cpumask_set_cpu(u32::from(cpu), self.as_raw()) }; } /// Clear `cpu` in the cpumask. @@ -101,19 +103,19 @@ impl Cpumask { /// This mismatches kernel naming convention and corresponds to the C /// function `__cpumask_clear_cpu()`. #[inline] - pub fn clear(&mut self, cpu: i32) { + pub fn clear(&mut self, cpu: CpuId) { // SAFETY: By the type invariant, `self.as_raw` is a valid argument to // `__cpumask_clear_cpu`. - unsafe { bindings::__cpumask_clear_cpu(cpu, self.as_raw()) }; + unsafe { bindings::__cpumask_clear_cpu(i32::from(cpu), self.as_raw()) }; } /// Test `cpu` in the cpumask. /// /// Equivalent to the kernel's `cpumask_test_cpu` API. #[inline] - pub fn test(&self, cpu: i32) -> bool { + pub fn test(&self, cpu: CpuId) -> bool { // SAFETY: By the type invariant, `self.as_raw` is a valid argument to `cpumask_test_cpu`. - unsafe { bindings::cpumask_test_cpu(cpu, self.as_raw()) } + unsafe { bindings::cpumask_test_cpu(i32::from(cpu), self.as_raw()) } } /// Set all CPUs in the cpumask. @@ -178,21 +180,40 @@ impl Cpumask { /// The following example demonstrates how to create and update a [`CpumaskVar`]. /// /// ``` +/// use kernel::cpu::CpuId; /// use kernel::cpumask::CpumaskVar; /// /// let mut mask = CpumaskVar::new_zero(GFP_KERNEL).unwrap(); /// /// assert!(mask.empty()); -/// mask.set(2); -/// assert!(mask.test(2)); -/// mask.set(3); -/// assert!(mask.test(3)); -/// assert_eq!(mask.weight(), 2); +/// let mut count = 0; +/// +/// let cpu2 = CpuId::from_u32(2); +/// if let Some(cpu) = cpu2 { +/// mask.set(cpu); +/// assert!(mask.test(cpu)); +/// count += 1; +/// } +/// +/// let cpu3 = CpuId::from_u32(3); +/// if let Some(cpu) = cpu3 { +/// mask.set(cpu); +/// assert!(mask.test(cpu)); +/// count += 1; +/// } +/// +/// assert_eq!(mask.weight(), count); /// /// let mask2 = CpumaskVar::try_clone(&mask).unwrap(); -/// assert!(mask2.test(2)); -/// assert!(mask2.test(3)); -/// assert_eq!(mask2.weight(), 2); +/// +/// if let Some(cpu) = cpu2 { +/// assert!(mask2.test(cpu)); +/// } +/// +/// if let Some(cpu) = cpu3 { +/// assert!(mask2.test(cpu)); +/// } +/// assert_eq!(mask2.weight(), count); /// ``` pub struct CpumaskVar { #[cfg(CONFIG_CPUMASK_OFFSTACK)] diff --git a/rust/kernel/devres.rs b/rust/kernel/devres.rs index 0f79a2ec9474..57502534d985 100644 --- a/rust/kernel/devres.rs +++ b/rust/kernel/devres.rs @@ -12,26 +12,28 @@ use crate::{ error::{Error, Result}, ffi::c_void, prelude::*, - revocable::Revocable, - sync::Arc, + revocable::{Revocable, RevocableGuard}, + sync::{rcu, Arc, Completion}, types::ARef, }; -use core::ops::Deref; - #[pin_data] struct DevresInner<T> { dev: ARef<Device>, callback: unsafe extern "C" fn(*mut c_void), #[pin] data: Revocable<T>, + #[pin] + revoke: Completion, } /// This abstraction is meant to be used by subsystems to containerize [`Device`] bound resources to /// manage their lifetime. /// /// [`Device`] bound resources should be freed when either the resource goes out of scope or the -/// [`Device`] is unbound respectively, depending on what happens first. +/// [`Device`] is unbound respectively, depending on what happens first. In any case, it is always +/// guaranteed that revoking the device resource is completed before the corresponding [`Device`] +/// is unbound. /// /// To achieve that [`Devres`] registers a devres callback on creation, which is called once the /// [`Device`] is unbound, revoking access to the encapsulated resource (see also [`Revocable`]). @@ -102,6 +104,7 @@ impl<T> DevresInner<T> { dev: dev.into(), callback: Self::devres_callback, data <- Revocable::new(data), + revoke <- Completion::new(), }), flags, )?; @@ -130,26 +133,28 @@ impl<T> DevresInner<T> { self as _ } - fn remove_action(this: &Arc<Self>) { + fn remove_action(this: &Arc<Self>) -> bool { // SAFETY: // - `self.inner.dev` is a valid `Device`, // - the `action` and `data` pointers are the exact same ones as given to devm_add_action() // previously, // - `self` is always valid, even if the action has been released already. - let ret = unsafe { + let success = unsafe { bindings::devm_remove_action_nowarn( this.dev.as_raw(), Some(this.callback), this.as_ptr() as _, ) - }; + } == 0; - if ret == 0 { + if success { // SAFETY: We leaked an `Arc` reference to devm_add_action() in `DevresInner::new`; if // devm_remove_action_nowarn() was successful we can (and have to) claim back ownership // of this reference. let _ = unsafe { Arc::from_raw(this.as_ptr()) }; } + + success } #[allow(clippy::missing_safety_doc)] @@ -161,7 +166,12 @@ impl<T> DevresInner<T> { // `DevresInner::new`. let inner = unsafe { Arc::from_raw(ptr) }; - inner.data.revoke(); + if !inner.data.revoke() { + // If `revoke()` returns false, it means that `Devres::drop` already started revoking + // `inner.data` for us. Hence we have to wait until `Devres::drop()` signals that it + // completed revoking `inner.data`. + inner.revoke.wait_for_completion(); + } } } @@ -218,20 +228,36 @@ impl<T> Devres<T> { // SAFETY: `dev` being the same device as the device this `Devres` has been created for // proves that `self.0.data` hasn't been revoked and is guaranteed to not be revoked as // long as `dev` lives; `dev` lives at least as long as `self`. - Ok(unsafe { self.deref().access() }) + Ok(unsafe { self.0.data.access() }) } -} -impl<T> Deref for Devres<T> { - type Target = Revocable<T>; + /// [`Devres`] accessor for [`Revocable::try_access`]. + pub fn try_access(&self) -> Option<RevocableGuard<'_, T>> { + self.0.data.try_access() + } + + /// [`Devres`] accessor for [`Revocable::try_access_with`]. + pub fn try_access_with<R, F: FnOnce(&T) -> R>(&self, f: F) -> Option<R> { + self.0.data.try_access_with(f) + } - fn deref(&self) -> &Self::Target { - &self.0.data + /// [`Devres`] accessor for [`Revocable::try_access_with_guard`]. + pub fn try_access_with_guard<'a>(&'a self, guard: &'a rcu::Guard) -> Option<&'a T> { + self.0.data.try_access_with_guard(guard) } } impl<T> Drop for Devres<T> { fn drop(&mut self) { - DevresInner::remove_action(&self.0); + // SAFETY: When `drop` runs, it is guaranteed that nobody is accessing the revocable data + // anymore, hence it is safe not to wait for the grace period to finish. + if unsafe { self.0.data.revoke_nosync() } { + // We revoked `self.0.data` before the devres action did, hence try to remove it. + if !DevresInner::remove_action(&self.0) { + // We could not remove the devres action, which means that it now runs concurrently, + // hence signal that `self.0.data` has been revoked successfully. + self.0.revoke.complete_all(); + } + } } } diff --git a/rust/kernel/revocable.rs b/rust/kernel/revocable.rs index db4aa46bb121..06a3cdfce344 100644 --- a/rust/kernel/revocable.rs +++ b/rust/kernel/revocable.rs @@ -154,8 +154,10 @@ impl<T> Revocable<T> { /// # Safety /// /// Callers must ensure that there are no more concurrent users of the revocable object. - unsafe fn revoke_internal<const SYNC: bool>(&self) { - if self.is_available.swap(false, Ordering::Relaxed) { + unsafe fn revoke_internal<const SYNC: bool>(&self) -> bool { + let revoke = self.is_available.swap(false, Ordering::Relaxed); + + if revoke { if SYNC { // SAFETY: Just an FFI call, there are no further requirements. unsafe { bindings::synchronize_rcu() }; @@ -165,6 +167,8 @@ impl<T> Revocable<T> { // `compare_exchange` above that takes `is_available` from `true` to `false`. unsafe { drop_in_place(self.data.get()) }; } + + revoke } /// Revokes access to and drops the wrapped object. @@ -172,10 +176,13 @@ impl<T> Revocable<T> { /// Access to the object is revoked immediately to new callers of [`Revocable::try_access`], /// expecting that there are no concurrent users of the object. /// + /// Returns `true` if `&self` has been revoked with this call, `false` if it was revoked + /// already. + /// /// # Safety /// /// Callers must ensure that there are no more concurrent users of the revocable object. - pub unsafe fn revoke_nosync(&self) { + pub unsafe fn revoke_nosync(&self) -> bool { // SAFETY: By the safety requirement of this function, the caller ensures that nobody is // accessing the data anymore and hence we don't have to wait for the grace period to // finish. @@ -189,7 +196,10 @@ impl<T> Revocable<T> { /// If there are concurrent users of the object (i.e., ones that called /// [`Revocable::try_access`] beforehand and still haven't dropped the returned guard), this /// function waits for the concurrent access to complete before dropping the wrapped object. - pub fn revoke(&self) { + /// + /// Returns `true` if `&self` has been revoked with this call, `false` if it was revoked + /// already. + pub fn revoke(&self) -> bool { // SAFETY: By passing `true` we ask `revoke_internal` to wait for the grace period to // finish. unsafe { self.revoke_internal::<true>() } diff --git a/rust/kernel/sync.rs b/rust/kernel/sync.rs index 36a719015583..c23a12639924 100644 --- a/rust/kernel/sync.rs +++ b/rust/kernel/sync.rs @@ -10,6 +10,7 @@ use crate::types::Opaque; use pin_init; mod arc; +pub mod completion; mod condvar; pub mod lock; mod locked_by; @@ -17,6 +18,7 @@ pub mod poll; pub mod rcu; pub use arc::{Arc, ArcBorrow, UniqueArc}; +pub use completion::Completion; pub use condvar::{new_condvar, CondVar, CondVarTimeoutResult}; pub use lock::global::{global_lock, GlobalGuard, GlobalLock, GlobalLockBackend, GlobalLockedBy}; pub use lock::mutex::{new_mutex, Mutex, MutexGuard}; diff --git a/rust/kernel/sync/completion.rs b/rust/kernel/sync/completion.rs new file mode 100644 index 000000000000..c50012a940a3 --- /dev/null +++ b/rust/kernel/sync/completion.rs @@ -0,0 +1,112 @@ +// SPDX-License-Identifier: GPL-2.0 + +//! Completion support. +//! +//! Reference: <https://docs.kernel.org/scheduler/completion.html> +//! +//! C header: [`include/linux/completion.h`](srctree/include/linux/completion.h) + +use crate::{bindings, prelude::*, types::Opaque}; + +/// Synchronization primitive to signal when a certain task has been completed. +/// +/// The [`Completion`] synchronization primitive signals when a certain task has been completed by +/// waking up other tasks that have been queued up to wait for the [`Completion`] to be completed. +/// +/// # Examples +/// +/// ``` +/// use kernel::sync::{Arc, Completion}; +/// use kernel::workqueue::{self, impl_has_work, new_work, Work, WorkItem}; +/// +/// #[pin_data] +/// struct MyTask { +/// #[pin] +/// work: Work<MyTask>, +/// #[pin] +/// done: Completion, +/// } +/// +/// impl_has_work! { +/// impl HasWork<Self> for MyTask { self.work } +/// } +/// +/// impl MyTask { +/// fn new() -> Result<Arc<Self>> { +/// let this = Arc::pin_init(pin_init!(MyTask { +/// work <- new_work!("MyTask::work"), +/// done <- Completion::new(), +/// }), GFP_KERNEL)?; +/// +/// let _ = workqueue::system().enqueue(this.clone()); +/// +/// Ok(this) +/// } +/// +/// fn wait_for_completion(&self) { +/// self.done.wait_for_completion(); +/// +/// pr_info!("Completion: task complete\n"); +/// } +/// } +/// +/// impl WorkItem for MyTask { +/// type Pointer = Arc<MyTask>; +/// +/// fn run(this: Arc<MyTask>) { +/// // process this task +/// this.done.complete_all(); +/// } +/// } +/// +/// let task = MyTask::new()?; +/// task.wait_for_completion(); +/// # Ok::<(), Error>(()) +/// ``` +#[pin_data] +pub struct Completion { + #[pin] + inner: Opaque<bindings::completion>, +} + +// SAFETY: `Completion` is safe to be send to any task. +unsafe impl Send for Completion {} + +// SAFETY: `Completion` is safe to be accessed concurrently. +unsafe impl Sync for Completion {} + +impl Completion { + /// Create an initializer for a new [`Completion`]. + pub fn new() -> impl PinInit<Self> { + pin_init!(Self { + inner <- Opaque::ffi_init(|slot: *mut bindings::completion| { + // SAFETY: `slot` is a valid pointer to an uninitialized `struct completion`. + unsafe { bindings::init_completion(slot) }; + }), + }) + } + + fn as_raw(&self) -> *mut bindings::completion { + self.inner.get() + } + + /// Signal all tasks waiting on this completion. + /// + /// This method wakes up all tasks waiting on this completion; after this operation the + /// completion is permanently done, i.e. signals all current and future waiters. + pub fn complete_all(&self) { + // SAFETY: `self.as_raw()` is a pointer to a valid `struct completion`. + unsafe { bindings::complete_all(self.as_raw()) }; + } + + /// Wait for completion of a task. + /// + /// This method waits for the completion of a task; it is not interruptible and there is no + /// timeout. + /// + /// See also [`Completion::complete_all`]. + pub fn wait_for_completion(&self) { + // SAFETY: `self.as_raw()` is a pointer to a valid `struct completion`. + unsafe { bindings::wait_for_completion(self.as_raw()) }; + } +} diff --git a/rust/kernel/time/hrtimer.rs b/rust/kernel/time/hrtimer.rs index 9df3dcd2fa39..36e1290cd079 100644 --- a/rust/kernel/time/hrtimer.rs +++ b/rust/kernel/time/hrtimer.rs @@ -517,7 +517,7 @@ macro_rules! impl_has_hr_timer { ) -> *mut Self { // SAFETY: As per the safety requirement of this function, `ptr` // is pointing inside a `$timer_type`. - unsafe { ::kernel::container_of!(ptr, $timer_type, $field).cast_mut() } + unsafe { ::kernel::container_of!(ptr, $timer_type, $field) } } } } diff --git a/scripts/gendwarfksyms/gendwarfksyms.h b/scripts/gendwarfksyms/gendwarfksyms.h index 7dd03ffe0c5c..d9c06d2cb1df 100644 --- a/scripts/gendwarfksyms/gendwarfksyms.h +++ b/scripts/gendwarfksyms/gendwarfksyms.h @@ -216,24 +216,14 @@ int cache_get(struct cache *cache, unsigned long key); void cache_init(struct cache *cache); void cache_free(struct cache *cache); -static inline void __cache_mark_expanded(struct cache *cache, uintptr_t addr) -{ - cache_set(cache, addr, 1); -} - -static inline bool __cache_was_expanded(struct cache *cache, uintptr_t addr) -{ - return cache_get(cache, addr) == 1; -} - static inline void cache_mark_expanded(struct cache *cache, void *addr) { - __cache_mark_expanded(cache, (uintptr_t)addr); + cache_set(cache, (unsigned long)addr, 1); } static inline bool cache_was_expanded(struct cache *cache, void *addr) { - return __cache_was_expanded(cache, (uintptr_t)addr); + return cache_get(cache, (unsigned long)addr) == 1; } /* diff --git a/scripts/gendwarfksyms/types.c b/scripts/gendwarfksyms/types.c index 39ce1770e463..7bd459ea6c59 100644 --- a/scripts/gendwarfksyms/types.c +++ b/scripts/gendwarfksyms/types.c @@ -333,37 +333,11 @@ static void calculate_version(struct version *version, cache_free(&expansion_cache); } -static void __type_expand(struct die *cache, struct type_expansion *type, - bool recursive); - -static void type_expand_child(struct die *cache, struct type_expansion *type, - bool recursive) -{ - struct type_expansion child; - char *name; - - name = get_type_name(cache); - if (!name) { - __type_expand(cache, type, recursive); - return; - } - - if (recursive && !__cache_was_expanded(&expansion_cache, cache->addr)) { - __cache_mark_expanded(&expansion_cache, cache->addr); - type_expansion_init(&child); - __type_expand(cache, &child, true); - type_map_add(name, &child); - type_expansion_free(&child); - } - - type_expansion_append(type, name, name); -} - -static void __type_expand(struct die *cache, struct type_expansion *type, - bool recursive) +static void __type_expand(struct die *cache, struct type_expansion *type) { struct die_fragment *df; struct die *child; + char *name; list_for_each_entry(df, &cache->fragments, list) { switch (df->type) { @@ -379,7 +353,12 @@ static void __type_expand(struct die *cache, struct type_expansion *type, error("unknown child: %" PRIxPTR, df->data.addr); - type_expand_child(child, type, recursive); + name = get_type_name(child); + if (name) + type_expansion_append(type, name, name); + else + __type_expand(child, type); + break; case FRAGMENT_LINEBREAK: /* @@ -397,12 +376,17 @@ static void __type_expand(struct die *cache, struct type_expansion *type, } } -static void type_expand(struct die *cache, struct type_expansion *type, - bool recursive) +static void type_expand(const char *name, struct die *cache, + struct type_expansion *type) { + const char *override; + type_expansion_init(type); - __type_expand(cache, type, recursive); - cache_free(&expansion_cache); + + if (stable && kabi_get_type_string(name, &override)) + type_parse(name, override, type); + else + __type_expand(cache, type); } static void type_parse(const char *name, const char *str, @@ -416,8 +400,6 @@ static void type_parse(const char *name, const char *str, if (!*str) error("empty type string override for '%s'", name); - type_expansion_init(type); - for (pos = 0; str[pos]; ++pos) { bool empty; char marker = ' '; @@ -478,7 +460,6 @@ static void type_parse(const char *name, const char *str, static void expand_type(struct die *cache, void *arg) { struct type_expansion type; - const char *override; char *name; if (cache->mapped) @@ -504,11 +485,7 @@ static void expand_type(struct die *cache, void *arg) debug("%s", name); - if (stable && kabi_get_type_string(name, &override)) - type_parse(name, override, &type); - else - type_expand(cache, &type, true); - + type_expand(name, cache, &type); type_map_add(name, &type); type_expansion_free(&type); free(name); @@ -518,7 +495,6 @@ static void expand_symbol(struct symbol *sym, void *arg) { struct type_expansion type; struct version version; - const char *override; struct die *cache; /* @@ -532,10 +508,7 @@ static void expand_symbol(struct symbol *sym, void *arg) if (__die_map_get(sym->die_addr, DIE_SYMBOL, &cache)) return; /* We'll warn about missing CRCs later. */ - if (stable && kabi_get_type_string(sym->name, &override)) - type_parse(sym->name, override, &type); - else - type_expand(cache, &type, false); + type_expand(sym->name, cache, &type); /* If the symbol already has a version, don't calculate it again. */ if (sym->state != SYMBOL_PROCESSED) { diff --git a/scripts/misc-check b/scripts/misc-check index a74450e799d1..84f08da17b2c 100755 --- a/scripts/misc-check +++ b/scripts/misc-check @@ -62,6 +62,15 @@ check_unnecessary_include_linux_export_h () { xargs -r printf "%s: warning: EXPORT_SYMBOL() is not used, but #include <linux/export.h> is present\n" >&2 } -check_tracked_ignored_files -check_missing_include_linux_export_h -check_unnecessary_include_linux_export_h +case "${KBUILD_EXTRA_WARN}" in +*1*) + check_tracked_ignored_files + ;; +esac + +case "${KBUILD_EXTRA_WARN}" in +*2*) + check_missing_include_linux_export_h + check_unnecessary_include_linux_export_h + ;; +esac diff --git a/security/keys/gc.c b/security/keys/gc.c index f27223ea4578..748e83818a76 100644 --- a/security/keys/gc.c +++ b/security/keys/gc.c @@ -218,8 +218,8 @@ continue_scanning: key = rb_entry(cursor, struct key, serial_node); cursor = rb_next(cursor); - if (test_bit(KEY_FLAG_FINAL_PUT, &key->flags)) { - smp_mb(); /* Clobber key->user after FINAL_PUT seen. */ + if (!test_bit_acquire(KEY_FLAG_USER_ALIVE, &key->flags)) { + /* Clobber key->user after final put seen. */ goto found_unreferenced_key; } diff --git a/security/keys/key.c b/security/keys/key.c index 7198cd2ac3a3..3bbdde778631 100644 --- a/security/keys/key.c +++ b/security/keys/key.c @@ -298,6 +298,7 @@ struct key *key_alloc(struct key_type *type, const char *desc, key->restrict_link = restrict_link; key->last_used_at = ktime_get_real_seconds(); + key->flags |= 1 << KEY_FLAG_USER_ALIVE; if (!(flags & KEY_ALLOC_NOT_IN_QUOTA)) key->flags |= 1 << KEY_FLAG_IN_QUOTA; if (flags & KEY_ALLOC_BUILT_IN) @@ -658,8 +659,8 @@ void key_put(struct key *key) key->user->qnbytes -= key->quotalen; spin_unlock_irqrestore(&key->user->lock, flags); } - smp_mb(); /* key->user before FINAL_PUT set. */ - set_bit(KEY_FLAG_FINAL_PUT, &key->flags); + /* Mark key as safe for GC after key->user done. */ + clear_bit_unlock(KEY_FLAG_USER_ALIVE, &key->flags); schedule_work(&key_gc_work); } } diff --git a/security/selinux/ss/services.c b/security/selinux/ss/services.c index 7becf3808818..d185754c2786 100644 --- a/security/selinux/ss/services.c +++ b/security/selinux/ss/services.c @@ -1909,11 +1909,17 @@ retry: goto out_unlock; } /* Obtain the sid for the context. */ - rc = sidtab_context_to_sid(sidtab, &newcontext, out_sid); - if (rc == -ESTALE) { - rcu_read_unlock(); - context_destroy(&newcontext); - goto retry; + if (context_equal(scontext, &newcontext)) + *out_sid = ssid; + else if (context_equal(tcontext, &newcontext)) + *out_sid = tsid; + else { + rc = sidtab_context_to_sid(sidtab, &newcontext, out_sid); + if (rc == -ESTALE) { + rcu_read_unlock(); + context_destroy(&newcontext); + goto retry; + } } out_unlock: rcu_read_unlock(); diff --git a/security/selinux/xfrm.c b/security/selinux/xfrm.c index 90ec4ef1b082..61d56b0c2be1 100644 --- a/security/selinux/xfrm.c +++ b/security/selinux/xfrm.c @@ -94,7 +94,7 @@ static int selinux_xfrm_alloc_user(struct xfrm_sec_ctx **ctxp, ctx->ctx_doi = XFRM_SC_DOI_LSM; ctx->ctx_alg = XFRM_SC_ALG_SELINUX; - ctx->ctx_len = str_len; + ctx->ctx_len = str_len + 1; memcpy(ctx->ctx_str, &uctx[1], str_len); ctx->ctx_str[str_len] = '\0'; rc = security_context_to_sid(ctx->ctx_str, str_len, diff --git a/sound/isa/sb/sb16_main.c b/sound/isa/sb/sb16_main.c index 74db11525003..5a083eecaa6b 100644 --- a/sound/isa/sb/sb16_main.c +++ b/sound/isa/sb/sb16_main.c @@ -703,6 +703,9 @@ static int snd_sb16_dma_control_put(struct snd_kcontrol *kcontrol, struct snd_ct unsigned char nval, oval; int change; + if (chip->mode & (SB_MODE_PLAYBACK | SB_MODE_CAPTURE)) + return -EBUSY; + nval = ucontrol->value.enumerated.item[0]; if (nval > 2) return -EINVAL; @@ -711,6 +714,10 @@ static int snd_sb16_dma_control_put(struct snd_kcontrol *kcontrol, struct snd_ct change = nval != oval; snd_sb16_set_dma_mode(chip, nval); spin_unlock_irqrestore(&chip->reg_lock, flags); + if (change) { + snd_dma_disable(chip->dma8); + snd_dma_disable(chip->dma16); + } return change; } diff --git a/sound/pci/ctxfi/xfi.c b/sound/pci/ctxfi/xfi.c index 713d36ea40cb..d8dd84d41c87 100644 --- a/sound/pci/ctxfi/xfi.c +++ b/sound/pci/ctxfi/xfi.c @@ -98,8 +98,8 @@ ct_card_probe(struct pci_dev *pci, const struct pci_device_id *pci_id) if (err < 0) goto error; - strcpy(card->driver, "SB-XFi"); - strcpy(card->shortname, "Creative X-Fi"); + strscpy(card->driver, "SB-XFi"); + strscpy(card->shortname, "Creative X-Fi"); snprintf(card->longname, sizeof(card->longname), "%s %s %s", card->shortname, atc->chip_name, atc->model_name); diff --git a/sound/pci/hda/hda_intel.c b/sound/pci/hda/hda_intel.c index e5210ed48ddf..439cf1bda6e6 100644 --- a/sound/pci/hda/hda_intel.c +++ b/sound/pci/hda/hda_intel.c @@ -2283,6 +2283,8 @@ static const struct snd_pci_quirk power_save_denylist[] = { SND_PCI_QUIRK(0x1734, 0x1232, "KONTRON SinglePC", 0), /* Dell ALC3271 */ SND_PCI_QUIRK(0x1028, 0x0962, "Dell ALC3271", 0), + /* https://bugzilla.kernel.org/show_bug.cgi?id=220210 */ + SND_PCI_QUIRK(0x17aa, 0x5079, "Lenovo Thinkpad E15", 0), {} }; diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index cd0d7ba7320e..2e1618494c20 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -8030,6 +8030,9 @@ enum { ALC294_FIXUP_ASUS_CS35L41_SPI_2, ALC274_FIXUP_HP_AIO_BIND_DACS, ALC287_FIXUP_PREDATOR_SPK_CS35L41_I2C_2, + ALC285_FIXUP_ASUS_GA605K_HEADSET_MIC, + ALC285_FIXUP_ASUS_GA605K_I2C_SPEAKER2_TO_DAC1, + ALC269_FIXUP_POSITIVO_P15X_HEADSET_MIC, }; /* A special fixup for Lenovo C940 and Yoga Duet 7; @@ -10414,6 +10417,26 @@ static const struct hda_fixup alc269_fixups[] = { .type = HDA_FIXUP_FUNC, .v.func = alc274_fixup_hp_aio_bind_dacs, }, + [ALC285_FIXUP_ASUS_GA605K_HEADSET_MIC] = { + .type = HDA_FIXUP_PINS, + .v.pins = (const struct hda_pintbl[]) { + { 0x19, 0x03a11050 }, + { 0x1b, 0x03a11c30 }, + { } + }, + .chained = true, + .chain_id = ALC285_FIXUP_ASUS_GA605K_I2C_SPEAKER2_TO_DAC1 + }, + [ALC285_FIXUP_ASUS_GA605K_I2C_SPEAKER2_TO_DAC1] = { + .type = HDA_FIXUP_FUNC, + .v.func = alc285_fixup_speaker2_to_dac1, + }, + [ALC269_FIXUP_POSITIVO_P15X_HEADSET_MIC] = { + .type = HDA_FIXUP_FUNC, + .v.func = alc269_fixup_limit_int_mic_boost, + .chained = true, + .chain_id = ALC269VC_FIXUP_ACER_MIC_NO_PRESENCE, + }, }; static const struct hda_quirk alc269_fixup_tbl[] = { @@ -10509,6 +10532,7 @@ static const struct hda_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x1028, 0x0871, "Dell Precision 3630", ALC255_FIXUP_DELL_HEADSET_MIC), SND_PCI_QUIRK(0x1028, 0x0872, "Dell Precision 3630", ALC255_FIXUP_DELL_HEADSET_MIC), SND_PCI_QUIRK(0x1028, 0x0873, "Dell Precision 3930", ALC255_FIXUP_DUMMY_LINEOUT_VERB), + SND_PCI_QUIRK(0x1028, 0x0879, "Dell Latitude 5420 Rugged", ALC269_FIXUP_DELL4_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x1028, 0x08ad, "Dell WYSE AIO", ALC225_FIXUP_DELL_WYSE_AIO_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x1028, 0x08ae, "Dell WYSE NB", ALC225_FIXUP_DELL1_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x1028, 0x0935, "Dell", ALC274_FIXUP_DELL_AIO_LINEOUT_VERB), @@ -10787,6 +10811,7 @@ static const struct hda_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x103c, 0x8b97, "HP", ALC236_FIXUP_HP_MUTE_LED_MICMUTE_VREF), SND_PCI_QUIRK(0x103c, 0x8bb3, "HP Slim OMEN", ALC287_FIXUP_CS35L41_I2C_2), SND_PCI_QUIRK(0x103c, 0x8bb4, "HP Slim OMEN", ALC287_FIXUP_CS35L41_I2C_2), + SND_PCI_QUIRK(0x103c, 0x8bc8, "HP Victus 15-fa1xxx", ALC245_FIXUP_HP_MUTE_LED_COEFBIT), SND_PCI_QUIRK(0x103c, 0x8bcd, "HP Omen 16-xd0xxx", ALC245_FIXUP_HP_MUTE_LED_V1_COEFBIT), SND_PCI_QUIRK(0x103c, 0x8bdd, "HP Envy 17", ALC287_FIXUP_CS35L41_I2C_2), SND_PCI_QUIRK(0x103c, 0x8bde, "HP Envy 17", ALC287_FIXUP_CS35L41_I2C_2), @@ -10840,6 +10865,7 @@ static const struct hda_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x103c, 0x8c91, "HP EliteBook 660", ALC236_FIXUP_HP_GPIO_LED), SND_PCI_QUIRK(0x103c, 0x8c96, "HP", ALC236_FIXUP_HP_MUTE_LED_MICMUTE_VREF), SND_PCI_QUIRK(0x103c, 0x8c97, "HP ZBook", ALC236_FIXUP_HP_MUTE_LED_MICMUTE_VREF), + SND_PCI_QUIRK(0x103c, 0x8c9c, "HP Victus 16-s1xxx (MB 8C9C)", ALC245_FIXUP_HP_MUTE_LED_COEFBIT), SND_PCI_QUIRK(0x103c, 0x8ca1, "HP ZBook Power", ALC236_FIXUP_HP_GPIO_LED), SND_PCI_QUIRK(0x103c, 0x8ca2, "HP ZBook Power", ALC236_FIXUP_HP_GPIO_LED), SND_PCI_QUIRK(0x103c, 0x8ca4, "HP ZBook Fury", ALC245_FIXUP_CS35L41_SPI_2_HP_GPIO_LED), @@ -10904,6 +10930,8 @@ static const struct hda_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x103c, 0x8e60, "HP Trekker ", ALC287_FIXUP_CS35L41_I2C_2), SND_PCI_QUIRK(0x103c, 0x8e61, "HP Trekker ", ALC287_FIXUP_CS35L41_I2C_2), SND_PCI_QUIRK(0x103c, 0x8e62, "HP Trekker ", ALC287_FIXUP_CS35L41_I2C_2), + SND_PCI_QUIRK(0x1043, 0x1032, "ASUS VivoBook X513EA", ALC256_FIXUP_ASUS_MIC_NO_PRESENCE), + SND_PCI_QUIRK(0x1043, 0x1034, "ASUS GU605C", ALC285_FIXUP_ASUS_GU605_SPI_SPEAKER2_TO_DAC1), SND_PCI_QUIRK(0x1043, 0x103e, "ASUS X540SA", ALC256_FIXUP_ASUS_MIC), SND_PCI_QUIRK(0x1043, 0x103f, "ASUS TX300", ALC282_FIXUP_ASUS_TX300), SND_PCI_QUIRK(0x1043, 0x1054, "ASUS G614FH/FM/FP", ALC287_FIXUP_CS35L41_I2C_2), @@ -10932,6 +10960,7 @@ static const struct hda_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x1043, 0x12e0, "ASUS X541SA", ALC256_FIXUP_ASUS_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x1043, 0x12f0, "ASUS X541UV", ALC256_FIXUP_ASUS_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x1043, 0x1313, "Asus K42JZ", ALC269VB_FIXUP_ASUS_MIC_NO_PRESENCE), + SND_PCI_QUIRK(0x1043, 0x1314, "ASUS GA605K", ALC285_FIXUP_ASUS_GA605K_HEADSET_MIC), SND_PCI_QUIRK(0x1043, 0x13b0, "ASUS Z550SA", ALC256_FIXUP_ASUS_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x1043, 0x1427, "Asus Zenbook UX31E", ALC269VB_FIXUP_ASUS_ZENBOOK), SND_PCI_QUIRK(0x1043, 0x1433, "ASUS GX650PY/PZ/PV/PU/PYV/PZV/PIV/PVV", ALC285_FIXUP_ASUS_I2C_HEADSET_MIC), @@ -11384,6 +11413,7 @@ static const struct hda_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x2782, 0x0214, "VAIO VJFE-CL", ALC269_FIXUP_LIMIT_INT_MIC_BOOST), SND_PCI_QUIRK(0x2782, 0x0228, "Infinix ZERO BOOK 13", ALC269VB_FIXUP_INFINIX_ZERO_BOOK_13), SND_PCI_QUIRK(0x2782, 0x0232, "CHUWI CoreBook XPro", ALC269VB_FIXUP_CHUWI_COREBOOK_XPRO), + SND_PCI_QUIRK(0x2782, 0x1407, "Positivo P15X", ALC269_FIXUP_POSITIVO_P15X_HEADSET_MIC), SND_PCI_QUIRK(0x2782, 0x1701, "Infinix Y4 Max", ALC269VC_FIXUP_INFINIX_Y4_MAX), SND_PCI_QUIRK(0x2782, 0x1705, "MEDION E15433", ALC269VC_FIXUP_INFINIX_Y4_MAX), SND_PCI_QUIRK(0x2782, 0x1707, "Vaio VJFE-ADL", ALC298_FIXUP_SPK_VOLUME), diff --git a/sound/soc/amd/yc/acp6x-mach.c b/sound/soc/amd/yc/acp6x-mach.c index 7e62445e02c1..98022e5fd428 100644 --- a/sound/soc/amd/yc/acp6x-mach.c +++ b/sound/soc/amd/yc/acp6x-mach.c @@ -454,6 +454,13 @@ static const struct dmi_system_id yc_acp_quirk_table[] = { { .driver_data = &acp6x_card, .matches = { + DMI_MATCH(DMI_BOARD_VENDOR, "Micro-Star International Co., Ltd."), + DMI_MATCH(DMI_PRODUCT_NAME, "Bravo 17 D7VF"), + } + }, + { + .driver_data = &acp6x_card, + .matches = { DMI_MATCH(DMI_BOARD_VENDOR, "Alienware"), DMI_MATCH(DMI_PRODUCT_NAME, "Alienware m17 R5 AMD"), } @@ -518,6 +525,13 @@ static const struct dmi_system_id yc_acp_quirk_table[] = { .driver_data = &acp6x_card, .matches = { DMI_MATCH(DMI_BOARD_VENDOR, "HP"), + DMI_MATCH(DMI_PRODUCT_NAME, "Victus by HP Gaming Laptop 15-fb2xxx"), + } + }, + { + .driver_data = &acp6x_card, + .matches = { + DMI_MATCH(DMI_BOARD_VENDOR, "HP"), DMI_MATCH(DMI_BOARD_NAME, "8A42"), } }, diff --git a/sound/soc/apple/Kconfig b/sound/soc/apple/Kconfig index 793f7782e0d7..e9c777cdb6e3 100644 --- a/sound/soc/apple/Kconfig +++ b/sound/soc/apple/Kconfig @@ -2,7 +2,6 @@ config SND_SOC_APPLE_MCA tristate "Apple Silicon MCA driver" depends on ARCH_APPLE || COMPILE_TEST select SND_DMAENGINE_PCM - default ARCH_APPLE help This option enables an ASoC platform driver for MCA peripherals found on Apple Silicon SoCs. diff --git a/sound/soc/codecs/cs35l56-sdw.c b/sound/soc/codecs/cs35l56-sdw.c index 13f602f51bf3..fa9693af3722 100644 --- a/sound/soc/codecs/cs35l56-sdw.c +++ b/sound/soc/codecs/cs35l56-sdw.c @@ -238,16 +238,15 @@ static const struct regmap_bus cs35l56_regmap_bus_sdw = { .val_format_endian_default = REGMAP_ENDIAN_BIG, }; -static int cs35l56_sdw_set_cal_index(struct cs35l56_private *cs35l56) +static int cs35l56_sdw_get_unique_id(struct cs35l56_private *cs35l56) { int ret; - /* SoundWire UniqueId is used to index the calibration array */ ret = sdw_read_no_pm(cs35l56->sdw_peripheral, SDW_SCP_DEVID_0); if (ret < 0) return ret; - cs35l56->base.cal_index = ret & 0xf; + cs35l56->sdw_unique_id = ret & 0xf; return 0; } @@ -259,11 +258,13 @@ static void cs35l56_sdw_init(struct sdw_slave *peripheral) pm_runtime_get_noresume(cs35l56->base.dev); - if (cs35l56->base.cal_index < 0) { - ret = cs35l56_sdw_set_cal_index(cs35l56); - if (ret < 0) - goto out; - } + ret = cs35l56_sdw_get_unique_id(cs35l56); + if (ret) + goto out; + + /* SoundWire UniqueId is used to index the calibration array */ + if (cs35l56->base.cal_index < 0) + cs35l56->base.cal_index = cs35l56->sdw_unique_id; ret = cs35l56_init(cs35l56); if (ret < 0) { @@ -587,6 +588,7 @@ static int cs35l56_sdw_probe(struct sdw_slave *peripheral, const struct sdw_devi cs35l56->base.dev = dev; cs35l56->sdw_peripheral = peripheral; + cs35l56->sdw_link_num = peripheral->bus->link_id; INIT_WORK(&cs35l56->sdw_irq_work, cs35l56_sdw_irq_work); dev_set_drvdata(dev, cs35l56); diff --git a/sound/soc/codecs/cs35l56.c b/sound/soc/codecs/cs35l56.c index c78e4746e428..1b42586794ad 100644 --- a/sound/soc/codecs/cs35l56.c +++ b/sound/soc/codecs/cs35l56.c @@ -706,17 +706,41 @@ static int cs35l56_write_cal(struct cs35l56_private *cs35l56) return ret; } -static void cs35l56_reinit_patch(struct cs35l56_private *cs35l56) +static int cs35l56_dsp_download_and_power_up(struct cs35l56_private *cs35l56, + bool load_firmware) { int ret; - /* Use wm_adsp to load and apply the firmware patch and coefficient files */ - ret = wm_adsp_power_up(&cs35l56->dsp, true); + /* + * Abort the first load if it didn't find the suffixed bins and + * we have an alternate fallback suffix. + */ + cs35l56->dsp.bin_mandatory = (load_firmware && cs35l56->fallback_fw_suffix); + + ret = wm_adsp_power_up(&cs35l56->dsp, load_firmware); + if ((ret == -ENOENT) && cs35l56->dsp.bin_mandatory) { + cs35l56->dsp.fwf_suffix = cs35l56->fallback_fw_suffix; + cs35l56->fallback_fw_suffix = NULL; + cs35l56->dsp.bin_mandatory = false; + ret = wm_adsp_power_up(&cs35l56->dsp, load_firmware); + } + if (ret) { - dev_dbg(cs35l56->base.dev, "%s: wm_adsp_power_up ret %d\n", __func__, ret); - return; + dev_dbg(cs35l56->base.dev, "wm_adsp_power_up ret %d\n", ret); + return ret; } + return 0; +} + +static void cs35l56_reinit_patch(struct cs35l56_private *cs35l56) +{ + int ret; + + ret = cs35l56_dsp_download_and_power_up(cs35l56, true); + if (ret) + return; + cs35l56_write_cal(cs35l56); /* Always REINIT after applying patch or coefficients */ @@ -750,11 +774,9 @@ static void cs35l56_patch(struct cs35l56_private *cs35l56, bool firmware_missing * but only if firmware is missing. If firmware is already patched just * power-up wm_adsp without downloading firmware. */ - ret = wm_adsp_power_up(&cs35l56->dsp, !!firmware_missing); - if (ret) { - dev_dbg(cs35l56->base.dev, "%s: wm_adsp_power_up ret %d\n", __func__, ret); + ret = cs35l56_dsp_download_and_power_up(cs35l56, firmware_missing); + if (ret) goto err; - } mutex_lock(&cs35l56->base.irq_lock); @@ -853,6 +875,34 @@ err: pm_runtime_put_autosuspend(cs35l56->base.dev); } +static int cs35l56_set_fw_suffix(struct cs35l56_private *cs35l56) +{ + if (cs35l56->dsp.fwf_suffix) + return 0; + + if (!cs35l56->sdw_peripheral) + return 0; + + cs35l56->dsp.fwf_suffix = devm_kasprintf(cs35l56->base.dev, GFP_KERNEL, + "l%uu%u", + cs35l56->sdw_link_num, + cs35l56->sdw_unique_id); + if (!cs35l56->dsp.fwf_suffix) + return -ENOMEM; + + /* + * There are published firmware files for L56 B0 silicon using + * the ALSA prefix as the filename suffix. Default to trying these + * first, with the new name as an alternate. + */ + if ((cs35l56->base.type == 0x56) && (cs35l56->base.rev == 0xb0)) { + cs35l56->fallback_fw_suffix = cs35l56->dsp.fwf_suffix; + cs35l56->dsp.fwf_suffix = cs35l56->component->name_prefix; + } + + return 0; +} + static int cs35l56_component_probe(struct snd_soc_component *component) { struct cs35l56_private *cs35l56 = snd_soc_component_get_drvdata(component); @@ -892,6 +942,10 @@ static int cs35l56_component_probe(struct snd_soc_component *component) return -ENOMEM; cs35l56->component = component; + ret = cs35l56_set_fw_suffix(cs35l56); + if (ret) + return ret; + wm_adsp2_component_probe(&cs35l56->dsp, component); debugfs_create_bool("init_done", 0444, debugfs_root, &cs35l56->base.init_done); diff --git a/sound/soc/codecs/cs35l56.h b/sound/soc/codecs/cs35l56.h index 200f695efca3..bd77a57249d7 100644 --- a/sound/soc/codecs/cs35l56.h +++ b/sound/soc/codecs/cs35l56.h @@ -38,6 +38,7 @@ struct cs35l56_private { struct snd_soc_component *component; struct regulator_bulk_data supplies[CS35L56_NUM_BULK_SUPPLIES]; struct sdw_slave *sdw_peripheral; + const char *fallback_fw_suffix; struct work_struct sdw_irq_work; bool sdw_irq_no_unmask; bool soft_resetting; @@ -52,6 +53,8 @@ struct cs35l56_private { bool tdm_mode; bool sysclk_set; u8 old_sdw_clock_scale; + u8 sdw_link_num; + u8 sdw_unique_id; }; extern const struct dev_pm_ops cs35l56_pm_ops_i2c_spi; diff --git a/sound/soc/codecs/cs48l32.c b/sound/soc/codecs/cs48l32.c index 90a795230d27..9bdd48aab42a 100644 --- a/sound/soc/codecs/cs48l32.c +++ b/sound/soc/codecs/cs48l32.c @@ -2162,6 +2162,10 @@ static int cs48l32_hw_params(struct snd_pcm_substream *substream, n_slots_multiple = 1; sclk_target = snd_soc_tdm_params_to_bclk(params, slotw, n_slots, n_slots_multiple); + if (sclk_target < 0) { + cs48l32_asp_err(dai, "Invalid parameters\n"); + return sclk_target; + } for (i = 0; i < ARRAY_SIZE(cs48l32_sclk_rates); i++) { if ((cs48l32_sclk_rates[i].freq >= sclk_target) && diff --git a/sound/soc/codecs/es8326.c b/sound/soc/codecs/es8326.c index 066d92b54312..78c4e68f6002 100644 --- a/sound/soc/codecs/es8326.c +++ b/sound/soc/codecs/es8326.c @@ -1079,8 +1079,7 @@ static void es8326_init(struct snd_soc_component *component) regmap_update_bits(es8326->regmap, ES8326_HPDET_TYPE, 0x03, 0x00); regmap_write(es8326->regmap, ES8326_INTOUT_IO, es8326->interrupt_clk); - regmap_write(es8326->regmap, ES8326_SDINOUT1_IO, - (ES8326_IO_DMIC_CLK << ES8326_SDINOUT1_SHIFT)); + regmap_write(es8326->regmap, ES8326_SDINOUT1_IO, ES8326_IO_INPUT); regmap_write(es8326->regmap, ES8326_SDINOUT23_IO, ES8326_IO_INPUT); regmap_write(es8326->regmap, ES8326_ANA_PDN, 0x00); diff --git a/sound/soc/codecs/wm_adsp.c b/sound/soc/codecs/wm_adsp.c index 3c580faab3b7..8a1d5cc75d6c 100644 --- a/sound/soc/codecs/wm_adsp.c +++ b/sound/soc/codecs/wm_adsp.c @@ -783,16 +783,19 @@ static int wm_adsp_request_firmware_files(struct wm_adsp *dsp, char **coeff_filename) { const char *system_name = dsp->system_name; - const char *asoc_component_prefix = dsp->component->name_prefix; + const char *suffix = dsp->component->name_prefix; int ret = 0; - if (system_name && asoc_component_prefix) { + if (dsp->fwf_suffix) + suffix = dsp->fwf_suffix; + + if (system_name && suffix) { if (!wm_adsp_request_firmware_file(dsp, wmfw_firmware, wmfw_filename, cirrus_dir, system_name, - asoc_component_prefix, "wmfw")) { + suffix, "wmfw")) { wm_adsp_request_firmware_file(dsp, coeff_firmware, coeff_filename, cirrus_dir, system_name, - asoc_component_prefix, "bin"); + suffix, "bin"); return 0; } } @@ -801,10 +804,10 @@ static int wm_adsp_request_firmware_files(struct wm_adsp *dsp, if (!wm_adsp_request_firmware_file(dsp, wmfw_firmware, wmfw_filename, cirrus_dir, system_name, NULL, "wmfw")) { - if (asoc_component_prefix) + if (suffix) wm_adsp_request_firmware_file(dsp, coeff_firmware, coeff_filename, cirrus_dir, system_name, - asoc_component_prefix, "bin"); + suffix, "bin"); if (!*coeff_firmware) wm_adsp_request_firmware_file(dsp, coeff_firmware, coeff_filename, @@ -816,10 +819,10 @@ static int wm_adsp_request_firmware_files(struct wm_adsp *dsp, /* Check system-specific bin without wmfw before falling back to generic */ if (dsp->wmfw_optional && system_name) { - if (asoc_component_prefix) + if (suffix) wm_adsp_request_firmware_file(dsp, coeff_firmware, coeff_filename, cirrus_dir, system_name, - asoc_component_prefix, "bin"); + suffix, "bin"); if (!*coeff_firmware) wm_adsp_request_firmware_file(dsp, coeff_firmware, coeff_filename, @@ -850,7 +853,7 @@ static int wm_adsp_request_firmware_files(struct wm_adsp *dsp, adsp_err(dsp, "Failed to request firmware <%s>%s-%s-%s<-%s<%s>>.wmfw\n", cirrus_dir, dsp->part, dsp->fwf_name ? dsp->fwf_name : dsp->cs_dsp.name, - wm_adsp_fw[dsp->fw].file, system_name, asoc_component_prefix); + wm_adsp_fw[dsp->fw].file, system_name, suffix); return -ENOENT; } @@ -997,11 +1000,17 @@ int wm_adsp_power_up(struct wm_adsp *dsp, bool load_firmware) return ret; } + if (dsp->bin_mandatory && !coeff_firmware) { + ret = -ENOENT; + goto err; + } + ret = cs_dsp_power_up(&dsp->cs_dsp, wmfw_firmware, wmfw_filename, coeff_firmware, coeff_filename, wm_adsp_fw_text[dsp->fw]); +err: wm_adsp_release_firmware_files(dsp, wmfw_firmware, wmfw_filename, coeff_firmware, coeff_filename); diff --git a/sound/soc/codecs/wm_adsp.h b/sound/soc/codecs/wm_adsp.h index edc5b02ae765..25210d404bf1 100644 --- a/sound/soc/codecs/wm_adsp.h +++ b/sound/soc/codecs/wm_adsp.h @@ -29,12 +29,14 @@ struct wm_adsp { const char *part; const char *fwf_name; const char *system_name; + const char *fwf_suffix; struct snd_soc_component *component; unsigned int sys_config_size; int fw; bool wmfw_optional; + bool bin_mandatory; struct work_struct boot_work; int (*control_add)(struct wm_adsp *dsp, struct cs_dsp_coeff_ctl *cs_ctl); diff --git a/sound/soc/intel/common/sof-function-topology-lib.c b/sound/soc/intel/common/sof-function-topology-lib.c index 90fe7aa3df1c..3cc81dcf047e 100644 --- a/sound/soc/intel/common/sof-function-topology-lib.c +++ b/sound/soc/intel/common/sof-function-topology-lib.c @@ -73,7 +73,8 @@ int sof_sdw_get_tplg_files(struct snd_soc_card *card, const struct snd_soc_acpi_ break; default: dev_warn(card->dev, - "only -2ch and -4ch are supported for dmic\n"); + "unsupported number of dmics: %d\n", + mach_params.dmic_num); continue; } tplg_dev = TPLG_DEVICE_INTEL_PCH_DMIC; diff --git a/sound/soc/loongson/loongson_i2s.c b/sound/soc/loongson/loongson_i2s.c index e8852a30f213..e336656e13eb 100644 --- a/sound/soc/loongson/loongson_i2s.c +++ b/sound/soc/loongson/loongson_i2s.c @@ -9,6 +9,7 @@ #include <linux/module.h> #include <linux/platform_device.h> #include <linux/delay.h> +#include <linux/export.h> #include <linux/pm_runtime.h> #include <linux/dma-mapping.h> #include <sound/soc.h> diff --git a/sound/soc/sdw_utils/soc_sdw_utils.c b/sound/soc/sdw_utils/soc_sdw_utils.c index b7060b746356..d75e7292240b 100644 --- a/sound/soc/sdw_utils/soc_sdw_utils.c +++ b/sound/soc/sdw_utils/soc_sdw_utils.c @@ -1205,6 +1205,8 @@ static int is_sdca_endpoint_present(struct device *dev, int i; dlc = kzalloc(sizeof(*dlc), GFP_KERNEL); + if (!dlc) + return -ENOMEM; adr_end = &adr_dev->endpoints[end_index]; dai_info = &codec_info->dais[adr_end->num]; diff --git a/sound/soc/sof/imx/imx8.c b/sound/soc/sof/imx/imx8.c index a40a8047873e..b73dd91bd529 100644 --- a/sound/soc/sof/imx/imx8.c +++ b/sound/soc/sof/imx/imx8.c @@ -40,6 +40,19 @@ struct imx8m_chip_data { struct reset_control *run_stall; }; +static int imx8_shutdown(struct snd_sof_dev *sdev) +{ + /* + * Force the DSP to stall. After the firmware image is loaded, + * the stall will be removed during run() by a matching + * imx_sc_pm_cpu_start() call. + */ + imx_sc_pm_cpu_start(get_chip_pdata(sdev), IMX_SC_R_DSP, false, + RESET_VECTOR_VADDR); + + return 0; +} + /* * DSP control. */ @@ -281,11 +294,13 @@ static int imx8_ops_init(struct snd_sof_dev *sdev) static const struct imx_chip_ops imx8_chip_ops = { .probe = imx8_probe, .core_kick = imx8_run, + .core_shutdown = imx8_shutdown, }; static const struct imx_chip_ops imx8x_chip_ops = { .probe = imx8_probe, .core_kick = imx8x_run, + .core_shutdown = imx8_shutdown, }; static const struct imx_chip_ops imx8m_chip_ops = { diff --git a/sound/usb/mixer_maps.c b/sound/usb/mixer_maps.c index 0e9b5431a47f..faac7df1fbcf 100644 --- a/sound/usb/mixer_maps.c +++ b/sound/usb/mixer_maps.c @@ -383,6 +383,13 @@ static const struct usbmix_name_map ms_usb_link_map[] = { { 0 } /* terminator */ }; +/* KTMicro USB */ +static struct usbmix_name_map s31b2_0022_map[] = { + { 23, "Speaker Playback" }, + { 18, "Headphone Playback" }, + { 0 } +}; + /* ASUS ROG Zenith II with Realtek ALC1220-VB */ static const struct usbmix_name_map asus_zenith_ii_map[] = { { 19, NULL, 12 }, /* FU, Input Gain Pad - broken response, disabled */ @@ -692,6 +699,11 @@ static const struct usbmix_ctl_map usbmix_ctl_maps[] = { .id = USB_ID(0x045e, 0x083c), .map = ms_usb_link_map, }, + { + /* KTMicro USB */ + .id = USB_ID(0X31b2, 0x0022), + .map = s31b2_0022_map, + }, { 0 } /* terminator */ }; diff --git a/tools/arch/arm64/include/uapi/asm/kvm.h b/tools/arch/arm64/include/uapi/asm/kvm.h index af9d9acaf997..ed5f3892674c 100644 --- a/tools/arch/arm64/include/uapi/asm/kvm.h +++ b/tools/arch/arm64/include/uapi/asm/kvm.h @@ -431,10 +431,11 @@ enum { /* Device Control API on vcpu fd */ #define KVM_ARM_VCPU_PMU_V3_CTRL 0 -#define KVM_ARM_VCPU_PMU_V3_IRQ 0 -#define KVM_ARM_VCPU_PMU_V3_INIT 1 -#define KVM_ARM_VCPU_PMU_V3_FILTER 2 -#define KVM_ARM_VCPU_PMU_V3_SET_PMU 3 +#define KVM_ARM_VCPU_PMU_V3_IRQ 0 +#define KVM_ARM_VCPU_PMU_V3_INIT 1 +#define KVM_ARM_VCPU_PMU_V3_FILTER 2 +#define KVM_ARM_VCPU_PMU_V3_SET_PMU 3 +#define KVM_ARM_VCPU_PMU_V3_SET_NR_COUNTERS 4 #define KVM_ARM_VCPU_TIMER_CTRL 1 #define KVM_ARM_VCPU_TIMER_IRQ_VTIMER 0 #define KVM_ARM_VCPU_TIMER_IRQ_PTIMER 1 diff --git a/tools/arch/x86/include/asm/amd/ibs.h b/tools/arch/x86/include/asm/amd/ibs.h index 300b6e0765b2..cbce54fec7b9 100644 --- a/tools/arch/x86/include/asm/amd/ibs.h +++ b/tools/arch/x86/include/asm/amd/ibs.h @@ -1,4 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_X86_AMD_IBS_H +#define _ASM_X86_AMD_IBS_H + /* * From PPR Vol 1 for AMD Family 19h Model 01h B1 * 55898 Rev 0.35 - Feb 5, 2021 @@ -151,3 +154,5 @@ struct perf_ibs_data { }; u64 regs[MSR_AMD64_IBS_REG_COUNT_MAX]; }; + +#endif /* _ASM_X86_AMD_IBS_H */ diff --git a/tools/arch/x86/include/asm/cpufeatures.h b/tools/arch/x86/include/asm/cpufeatures.h index e02be2962a01..ee176236c2be 100644 --- a/tools/arch/x86/include/asm/cpufeatures.h +++ b/tools/arch/x86/include/asm/cpufeatures.h @@ -336,7 +336,7 @@ #define X86_FEATURE_AMD_IBRS (13*32+14) /* Indirect Branch Restricted Speculation */ #define X86_FEATURE_AMD_STIBP (13*32+15) /* Single Thread Indirect Branch Predictors */ #define X86_FEATURE_AMD_STIBP_ALWAYS_ON (13*32+17) /* Single Thread Indirect Branch Predictors always-on preferred */ -#define X86_FEATURE_AMD_IBRS_SAME_MODE (13*32+19) /* Indirect Branch Restricted Speculation same mode protection*/ +#define X86_FEATURE_AMD_IBRS_SAME_MODE (13*32+19) /* Indirect Branch Restricted Speculation same mode protection*/ #define X86_FEATURE_AMD_PPIN (13*32+23) /* "amd_ppin" Protected Processor Inventory Number */ #define X86_FEATURE_AMD_SSBD (13*32+24) /* Speculative Store Bypass Disable */ #define X86_FEATURE_VIRT_SSBD (13*32+25) /* "virt_ssbd" Virtualized Speculative Store Bypass Disable */ @@ -379,6 +379,7 @@ #define X86_FEATURE_V_SPEC_CTRL (15*32+20) /* "v_spec_ctrl" Virtual SPEC_CTRL */ #define X86_FEATURE_VNMI (15*32+25) /* "vnmi" Virtual NMI */ #define X86_FEATURE_SVME_ADDR_CHK (15*32+28) /* SVME addr check */ +#define X86_FEATURE_BUS_LOCK_THRESHOLD (15*32+29) /* Bus lock threshold */ #define X86_FEATURE_IDLE_HLT (15*32+30) /* IDLE HLT intercept */ /* Intel-defined CPU features, CPUID level 0x00000007:0 (ECX), word 16 */ @@ -447,6 +448,7 @@ #define X86_FEATURE_DEBUG_SWAP (19*32+14) /* "debug_swap" SEV-ES full debug state swap support */ #define X86_FEATURE_RMPREAD (19*32+21) /* RMPREAD instruction */ #define X86_FEATURE_SEGMENTED_RMP (19*32+23) /* Segmented RMP support */ +#define X86_FEATURE_ALLOWED_SEV_FEATURES (19*32+27) /* Allowed SEV Features */ #define X86_FEATURE_SVSM (19*32+28) /* "svsm" SVSM present */ #define X86_FEATURE_HV_INUSE_WR_ALLOWED (19*32+30) /* Allow Write to in-use hypervisor-owned pages */ @@ -458,6 +460,7 @@ #define X86_FEATURE_AUTOIBRS (20*32+ 8) /* Automatic IBRS */ #define X86_FEATURE_NO_SMM_CTL_MSR (20*32+ 9) /* SMM_CTL MSR is not present */ +#define X86_FEATURE_PREFETCHI (20*32+20) /* Prefetch Data/Instruction to Cache Level */ #define X86_FEATURE_SBPB (20*32+27) /* Selective Branch Prediction Barrier */ #define X86_FEATURE_IBPB_BRTYPE (20*32+28) /* MSR_PRED_CMD[IBPB] flushes all branch type predictions */ #define X86_FEATURE_SRSO_NO (20*32+29) /* CPU is not affected by SRSO */ @@ -482,7 +485,8 @@ #define X86_FEATURE_AMD_HTR_CORES (21*32+ 6) /* Heterogeneous Core Topology */ #define X86_FEATURE_AMD_WORKLOAD_CLASS (21*32+ 7) /* Workload Classification */ #define X86_FEATURE_PREFER_YMM (21*32+ 8) /* Avoid ZMM registers due to downclocking */ -#define X86_FEATURE_INDIRECT_THUNK_ITS (21*32+ 9) /* Use thunk for indirect branches in lower half of cacheline */ +#define X86_FEATURE_APX (21*32+ 9) /* Advanced Performance Extensions */ +#define X86_FEATURE_INDIRECT_THUNK_ITS (21*32+10) /* Use thunk for indirect branches in lower half of cacheline */ /* * BUG word(s) @@ -535,6 +539,8 @@ #define X86_BUG_BHI X86_BUG( 1*32+ 3) /* "bhi" CPU is affected by Branch History Injection */ #define X86_BUG_IBPB_NO_RET X86_BUG( 1*32+ 4) /* "ibpb_no_ret" IBPB omits return target predictions */ #define X86_BUG_SPECTRE_V2_USER X86_BUG( 1*32+ 5) /* "spectre_v2_user" CPU is affected by Spectre variant 2 attack between user processes */ -#define X86_BUG_ITS X86_BUG( 1*32+ 6) /* "its" CPU is affected by Indirect Target Selection */ -#define X86_BUG_ITS_NATIVE_ONLY X86_BUG( 1*32+ 7) /* "its_native_only" CPU is affected by ITS, VMX is not affected */ +#define X86_BUG_OLD_MICROCODE X86_BUG( 1*32+ 6) /* "old_microcode" CPU has old microcode, it is surely vulnerable to something */ +#define X86_BUG_ITS X86_BUG( 1*32+ 7) /* "its" CPU is affected by Indirect Target Selection */ +#define X86_BUG_ITS_NATIVE_ONLY X86_BUG( 1*32+ 8) /* "its_native_only" CPU is affected by ITS, VMX is not affected */ + #endif /* _ASM_X86_CPUFEATURES_H */ diff --git a/tools/arch/x86/include/asm/msr-index.h b/tools/arch/x86/include/asm/msr-index.h index e7d2f460fcc6..b7dded3c8113 100644 --- a/tools/arch/x86/include/asm/msr-index.h +++ b/tools/arch/x86/include/asm/msr-index.h @@ -533,7 +533,7 @@ #define MSR_HWP_CAPABILITIES 0x00000771 #define MSR_HWP_REQUEST_PKG 0x00000772 #define MSR_HWP_INTERRUPT 0x00000773 -#define MSR_HWP_REQUEST 0x00000774 +#define MSR_HWP_REQUEST 0x00000774 #define MSR_HWP_STATUS 0x00000777 /* CPUID.6.EAX */ @@ -550,16 +550,16 @@ #define HWP_LOWEST_PERF(x) (((x) >> 24) & 0xff) /* IA32_HWP_REQUEST */ -#define HWP_MIN_PERF(x) (x & 0xff) -#define HWP_MAX_PERF(x) ((x & 0xff) << 8) +#define HWP_MIN_PERF(x) (x & 0xff) +#define HWP_MAX_PERF(x) ((x & 0xff) << 8) #define HWP_DESIRED_PERF(x) ((x & 0xff) << 16) -#define HWP_ENERGY_PERF_PREFERENCE(x) (((unsigned long long) x & 0xff) << 24) +#define HWP_ENERGY_PERF_PREFERENCE(x) (((u64)x & 0xff) << 24) #define HWP_EPP_PERFORMANCE 0x00 #define HWP_EPP_BALANCE_PERFORMANCE 0x80 #define HWP_EPP_BALANCE_POWERSAVE 0xC0 #define HWP_EPP_POWERSAVE 0xFF -#define HWP_ACTIVITY_WINDOW(x) ((unsigned long long)(x & 0xff3) << 32) -#define HWP_PACKAGE_CONTROL(x) ((unsigned long long)(x & 0x1) << 42) +#define HWP_ACTIVITY_WINDOW(x) ((u64)(x & 0xff3) << 32) +#define HWP_PACKAGE_CONTROL(x) ((u64)(x & 0x1) << 42) /* IA32_HWP_STATUS */ #define HWP_GUARANTEED_CHANGE(x) (x & 0x1) @@ -602,7 +602,11 @@ /* V6 PMON MSR range */ #define MSR_IA32_PMC_V6_GP0_CTR 0x1900 #define MSR_IA32_PMC_V6_GP0_CFG_A 0x1901 +#define MSR_IA32_PMC_V6_GP0_CFG_B 0x1902 +#define MSR_IA32_PMC_V6_GP0_CFG_C 0x1903 #define MSR_IA32_PMC_V6_FX0_CTR 0x1980 +#define MSR_IA32_PMC_V6_FX0_CFG_B 0x1982 +#define MSR_IA32_PMC_V6_FX0_CFG_C 0x1983 #define MSR_IA32_PMC_V6_STEP 4 /* KeyID partitioning between MKTME and TDX */ diff --git a/tools/arch/x86/include/uapi/asm/kvm.h b/tools/arch/x86/include/uapi/asm/kvm.h index b663d916f162..6f3499507c5e 100644 --- a/tools/arch/x86/include/uapi/asm/kvm.h +++ b/tools/arch/x86/include/uapi/asm/kvm.h @@ -441,6 +441,7 @@ struct kvm_sync_regs { #define KVM_X86_QUIRK_MWAIT_NEVER_UD_FAULTS (1 << 6) #define KVM_X86_QUIRK_SLOT_ZAP_ALL (1 << 7) #define KVM_X86_QUIRK_STUFF_FEATURE_MSRS (1 << 8) +#define KVM_X86_QUIRK_IGNORE_GUEST_PAT (1 << 9) #define KVM_STATE_NESTED_FORMAT_VMX 0 #define KVM_STATE_NESTED_FORMAT_SVM 1 @@ -931,4 +932,74 @@ struct kvm_hyperv_eventfd { #define KVM_X86_SNP_VM 4 #define KVM_X86_TDX_VM 5 +/* Trust Domain eXtension sub-ioctl() commands. */ +enum kvm_tdx_cmd_id { + KVM_TDX_CAPABILITIES = 0, + KVM_TDX_INIT_VM, + KVM_TDX_INIT_VCPU, + KVM_TDX_INIT_MEM_REGION, + KVM_TDX_FINALIZE_VM, + KVM_TDX_GET_CPUID, + + KVM_TDX_CMD_NR_MAX, +}; + +struct kvm_tdx_cmd { + /* enum kvm_tdx_cmd_id */ + __u32 id; + /* flags for sub-commend. If sub-command doesn't use this, set zero. */ + __u32 flags; + /* + * data for each sub-command. An immediate or a pointer to the actual + * data in process virtual address. If sub-command doesn't use it, + * set zero. + */ + __u64 data; + /* + * Auxiliary error code. The sub-command may return TDX SEAMCALL + * status code in addition to -Exxx. + */ + __u64 hw_error; +}; + +struct kvm_tdx_capabilities { + __u64 supported_attrs; + __u64 supported_xfam; + __u64 reserved[254]; + + /* Configurable CPUID bits for userspace */ + struct kvm_cpuid2 cpuid; +}; + +struct kvm_tdx_init_vm { + __u64 attributes; + __u64 xfam; + __u64 mrconfigid[6]; /* sha384 digest */ + __u64 mrowner[6]; /* sha384 digest */ + __u64 mrownerconfig[6]; /* sha384 digest */ + + /* The total space for TD_PARAMS before the CPUIDs is 256 bytes */ + __u64 reserved[12]; + + /* + * Call KVM_TDX_INIT_VM before vcpu creation, thus before + * KVM_SET_CPUID2. + * This configuration supersedes KVM_SET_CPUID2s for VCPUs because the + * TDX module directly virtualizes those CPUIDs without VMM. The user + * space VMM, e.g. qemu, should make KVM_SET_CPUID2 consistent with + * those values. If it doesn't, KVM may have wrong idea of vCPUIDs of + * the guest, and KVM may wrongly emulate CPUIDs or MSRs that the TDX + * module doesn't virtualize. + */ + struct kvm_cpuid2 cpuid; +}; + +#define KVM_TDX_MEASURE_MEMORY_REGION _BITULL(0) + +struct kvm_tdx_init_mem_region { + __u64 source_addr; + __u64 gpa; + __u64 nr_pages; +}; + #endif /* _ASM_X86_KVM_H */ diff --git a/tools/arch/x86/include/uapi/asm/svm.h b/tools/arch/x86/include/uapi/asm/svm.h index ec1321248dac..9c640a521a67 100644 --- a/tools/arch/x86/include/uapi/asm/svm.h +++ b/tools/arch/x86/include/uapi/asm/svm.h @@ -95,6 +95,7 @@ #define SVM_EXIT_CR14_WRITE_TRAP 0x09e #define SVM_EXIT_CR15_WRITE_TRAP 0x09f #define SVM_EXIT_INVPCID 0x0a2 +#define SVM_EXIT_BUS_LOCK 0x0a5 #define SVM_EXIT_IDLE_HLT 0x0a6 #define SVM_EXIT_NPF 0x400 #define SVM_EXIT_AVIC_INCOMPLETE_IPI 0x401 @@ -225,6 +226,7 @@ { SVM_EXIT_CR4_WRITE_TRAP, "write_cr4_trap" }, \ { SVM_EXIT_CR8_WRITE_TRAP, "write_cr8_trap" }, \ { SVM_EXIT_INVPCID, "invpcid" }, \ + { SVM_EXIT_BUS_LOCK, "buslock" }, \ { SVM_EXIT_IDLE_HLT, "idle-halt" }, \ { SVM_EXIT_NPF, "npf" }, \ { SVM_EXIT_AVIC_INCOMPLETE_IPI, "avic_incomplete_ipi" }, \ diff --git a/tools/arch/x86/include/uapi/asm/vmx.h b/tools/arch/x86/include/uapi/asm/vmx.h index a5faf6d88f1b..f0f4a4cf84a7 100644 --- a/tools/arch/x86/include/uapi/asm/vmx.h +++ b/tools/arch/x86/include/uapi/asm/vmx.h @@ -34,6 +34,7 @@ #define EXIT_REASON_TRIPLE_FAULT 2 #define EXIT_REASON_INIT_SIGNAL 3 #define EXIT_REASON_SIPI_SIGNAL 4 +#define EXIT_REASON_OTHER_SMI 6 #define EXIT_REASON_INTERRUPT_WINDOW 7 #define EXIT_REASON_NMI_WINDOW 8 @@ -92,6 +93,7 @@ #define EXIT_REASON_TPAUSE 68 #define EXIT_REASON_BUS_LOCK 74 #define EXIT_REASON_NOTIFY 75 +#define EXIT_REASON_TDCALL 77 #define VMX_EXIT_REASONS \ { EXIT_REASON_EXCEPTION_NMI, "EXCEPTION_NMI" }, \ @@ -155,7 +157,8 @@ { EXIT_REASON_UMWAIT, "UMWAIT" }, \ { EXIT_REASON_TPAUSE, "TPAUSE" }, \ { EXIT_REASON_BUS_LOCK, "BUS_LOCK" }, \ - { EXIT_REASON_NOTIFY, "NOTIFY" } + { EXIT_REASON_NOTIFY, "NOTIFY" }, \ + { EXIT_REASON_TDCALL, "TDCALL" } #define VMX_EXIT_REASON_FLAGS \ { VMX_EXIT_REASONS_FAILED_VMENTRY, "FAILED_VMENTRY" } diff --git a/tools/arch/x86/lib/memcpy_64.S b/tools/arch/x86/lib/memcpy_64.S index 59cf6f9065aa..ccc3d923fc1e 100644 --- a/tools/arch/x86/lib/memcpy_64.S +++ b/tools/arch/x86/lib/memcpy_64.S @@ -40,6 +40,7 @@ SYM_FUNC_END(__memcpy) EXPORT_SYMBOL(__memcpy) SYM_FUNC_ALIAS_MEMFUNC(memcpy, __memcpy) +SYM_PIC_ALIAS(memcpy) EXPORT_SYMBOL(memcpy) SYM_FUNC_START_LOCAL(memcpy_orig) diff --git a/tools/arch/x86/lib/memset_64.S b/tools/arch/x86/lib/memset_64.S index d66b710d628f..fb5a03cf5ab7 100644 --- a/tools/arch/x86/lib/memset_64.S +++ b/tools/arch/x86/lib/memset_64.S @@ -42,6 +42,7 @@ SYM_FUNC_END(__memset) EXPORT_SYMBOL(__memset) SYM_FUNC_ALIAS_MEMFUNC(memset, __memset) +SYM_PIC_ALIAS(memset) EXPORT_SYMBOL(memset) SYM_FUNC_START_LOCAL(memset_orig) diff --git a/tools/bpf/bpf_jit_disasm.c b/tools/bpf/bpf_jit_disasm.c index 1baee9e2aba9..5ab8f80e2834 100644 --- a/tools/bpf/bpf_jit_disasm.c +++ b/tools/bpf/bpf_jit_disasm.c @@ -45,6 +45,8 @@ static void get_exec_path(char *tpath, size_t size) assert(path); len = readlink(path, tpath, size); + if (len < 0) + len = 0; tpath[len] = 0; free(path); diff --git a/tools/bpf/bpftool/btf.c b/tools/bpf/bpftool/btf.c index 6b14cbfa58aa..946612029dee 100644 --- a/tools/bpf/bpftool/btf.c +++ b/tools/bpf/bpftool/btf.c @@ -905,7 +905,8 @@ static int do_dump(int argc, char **argv) return -1; } - fd = map_parse_fd_and_info(&argc, &argv, &info, &len); + fd = map_parse_fd_and_info(&argc, &argv, &info, &len, + BPF_F_RDONLY); if (fd < 0) return -1; @@ -1118,10 +1119,13 @@ build_btf_type_table(struct hashmap *tab, enum bpf_obj_type type, [BPF_OBJ_PROG] = "prog", [BPF_OBJ_MAP] = "map", }; + LIBBPF_OPTS(bpf_get_fd_by_id_opts, opts_ro); __u32 btf_id, id = 0; int err; int fd; + opts_ro.open_flags = BPF_F_RDONLY; + while (true) { switch (type) { case BPF_OBJ_PROG: @@ -1151,7 +1155,7 @@ build_btf_type_table(struct hashmap *tab, enum bpf_obj_type type, fd = bpf_prog_get_fd_by_id(id); break; case BPF_OBJ_MAP: - fd = bpf_map_get_fd_by_id(id); + fd = bpf_map_get_fd_by_id_opts(id, &opts_ro); break; default: err = -1; diff --git a/tools/bpf/bpftool/common.c b/tools/bpf/bpftool/common.c index ecfa790adc13..b07317d2842f 100644 --- a/tools/bpf/bpftool/common.c +++ b/tools/bpf/bpftool/common.c @@ -4,6 +4,7 @@ #ifndef _GNU_SOURCE #define _GNU_SOURCE #endif +#include <assert.h> #include <ctype.h> #include <errno.h> #include <fcntl.h> @@ -193,7 +194,8 @@ int mount_tracefs(const char *target) return err; } -int open_obj_pinned(const char *path, bool quiet) +int open_obj_pinned(const char *path, bool quiet, + const struct bpf_obj_get_opts *opts) { char *pname; int fd = -1; @@ -205,7 +207,7 @@ int open_obj_pinned(const char *path, bool quiet) goto out_ret; } - fd = bpf_obj_get(pname); + fd = bpf_obj_get_opts(pname, opts); if (fd < 0) { if (!quiet) p_err("bpf obj get (%s): %s", pname, @@ -221,12 +223,13 @@ out_ret: return fd; } -int open_obj_pinned_any(const char *path, enum bpf_obj_type exp_type) +int open_obj_pinned_any(const char *path, enum bpf_obj_type exp_type, + const struct bpf_obj_get_opts *opts) { enum bpf_obj_type type; int fd; - fd = open_obj_pinned(path, false); + fd = open_obj_pinned(path, false, opts); if (fd < 0) return -1; @@ -555,7 +558,7 @@ static int do_build_table_cb(const char *fpath, const struct stat *sb, if (typeflag != FTW_F) goto out_ret; - fd = open_obj_pinned(fpath, true); + fd = open_obj_pinned(fpath, true, NULL); if (fd < 0) goto out_ret; @@ -928,7 +931,7 @@ int prog_parse_fds(int *argc, char ***argv, int **fds) path = **argv; NEXT_ARGP(); - (*fds)[0] = open_obj_pinned_any(path, BPF_OBJ_PROG); + (*fds)[0] = open_obj_pinned_any(path, BPF_OBJ_PROG, NULL); if ((*fds)[0] < 0) return -1; return 1; @@ -965,7 +968,8 @@ exit_free: return fd; } -static int map_fd_by_name(char *name, int **fds) +static int map_fd_by_name(char *name, int **fds, + const struct bpf_get_fd_by_id_opts *opts) { unsigned int id = 0; int fd, nb_fds = 0; @@ -973,6 +977,7 @@ static int map_fd_by_name(char *name, int **fds) int err; while (true) { + LIBBPF_OPTS(bpf_get_fd_by_id_opts, opts_ro); struct bpf_map_info info = {}; __u32 len = sizeof(info); @@ -985,7 +990,9 @@ static int map_fd_by_name(char *name, int **fds) return nb_fds; } - fd = bpf_map_get_fd_by_id(id); + /* Request a read-only fd to query the map info */ + opts_ro.open_flags = BPF_F_RDONLY; + fd = bpf_map_get_fd_by_id_opts(id, &opts_ro); if (fd < 0) { p_err("can't get map by id (%u): %s", id, strerror(errno)); @@ -1004,6 +1011,19 @@ static int map_fd_by_name(char *name, int **fds) continue; } + /* Get an fd with the requested options, if they differ + * from the read-only options used to get the fd above. + */ + if (memcmp(opts, &opts_ro, sizeof(opts_ro))) { + close(fd); + fd = bpf_map_get_fd_by_id_opts(id, opts); + if (fd < 0) { + p_err("can't get map by id (%u): %s", id, + strerror(errno)); + goto err_close_fds; + } + } + if (nb_fds > 0) { tmp = realloc(*fds, (nb_fds + 1) * sizeof(int)); if (!tmp) { @@ -1023,8 +1043,13 @@ err_close_fds: return -1; } -int map_parse_fds(int *argc, char ***argv, int **fds) +int map_parse_fds(int *argc, char ***argv, int **fds, __u32 open_flags) { + LIBBPF_OPTS(bpf_get_fd_by_id_opts, opts); + + assert((open_flags & ~BPF_F_RDONLY) == 0); + opts.open_flags = open_flags; + if (is_prefix(**argv, "id")) { unsigned int id; char *endptr; @@ -1038,7 +1063,7 @@ int map_parse_fds(int *argc, char ***argv, int **fds) } NEXT_ARGP(); - (*fds)[0] = bpf_map_get_fd_by_id(id); + (*fds)[0] = bpf_map_get_fd_by_id_opts(id, &opts); if ((*fds)[0] < 0) { p_err("get map by id (%u): %s", id, strerror(errno)); return -1; @@ -1056,16 +1081,18 @@ int map_parse_fds(int *argc, char ***argv, int **fds) } NEXT_ARGP(); - return map_fd_by_name(name, fds); + return map_fd_by_name(name, fds, &opts); } else if (is_prefix(**argv, "pinned")) { char *path; + LIBBPF_OPTS(bpf_obj_get_opts, get_opts); + get_opts.file_flags = open_flags; NEXT_ARGP(); path = **argv; NEXT_ARGP(); - (*fds)[0] = open_obj_pinned_any(path, BPF_OBJ_MAP); + (*fds)[0] = open_obj_pinned_any(path, BPF_OBJ_MAP, &get_opts); if ((*fds)[0] < 0) return -1; return 1; @@ -1075,7 +1102,7 @@ int map_parse_fds(int *argc, char ***argv, int **fds) return -1; } -int map_parse_fd(int *argc, char ***argv) +int map_parse_fd(int *argc, char ***argv, __u32 open_flags) { int *fds = NULL; int nb_fds, fd; @@ -1085,7 +1112,7 @@ int map_parse_fd(int *argc, char ***argv) p_err("mem alloc failed"); return -1; } - nb_fds = map_parse_fds(argc, argv, &fds); + nb_fds = map_parse_fds(argc, argv, &fds, open_flags); if (nb_fds != 1) { if (nb_fds > 1) { p_err("several maps match this handle"); @@ -1103,12 +1130,12 @@ exit_free: } int map_parse_fd_and_info(int *argc, char ***argv, struct bpf_map_info *info, - __u32 *info_len) + __u32 *info_len, __u32 open_flags) { int err; int fd; - fd = map_parse_fd(argc, argv); + fd = map_parse_fd(argc, argv, open_flags); if (fd < 0) return -1; diff --git a/tools/bpf/bpftool/iter.c b/tools/bpf/bpftool/iter.c index 5c39c2ed36a2..df5f0d1e07e8 100644 --- a/tools/bpf/bpftool/iter.c +++ b/tools/bpf/bpftool/iter.c @@ -37,7 +37,7 @@ static int do_pin(int argc, char **argv) return -1; } - map_fd = map_parse_fd(&argc, &argv); + map_fd = map_parse_fd(&argc, &argv, BPF_F_RDONLY); if (map_fd < 0) return -1; diff --git a/tools/bpf/bpftool/link.c b/tools/bpf/bpftool/link.c index 3535afc80a49..a773e05d5ade 100644 --- a/tools/bpf/bpftool/link.c +++ b/tools/bpf/bpftool/link.c @@ -117,7 +117,7 @@ static int link_parse_fd(int *argc, char ***argv) path = **argv; NEXT_ARGP(); - return open_obj_pinned_any(path, BPF_OBJ_LINK); + return open_obj_pinned_any(path, BPF_OBJ_LINK, NULL); } p_err("expected 'id' or 'pinned', got: '%s'?", **argv); @@ -485,6 +485,7 @@ static int show_link_close_json(int fd, struct bpf_link_info *info) case BPF_LINK_TYPE_RAW_TRACEPOINT: jsonw_string_field(json_wtr, "tp_name", u64_to_ptr(info->raw_tracepoint.tp_name)); + jsonw_uint_field(json_wtr, "cookie", info->raw_tracepoint.cookie); break; case BPF_LINK_TYPE_TRACING: err = get_prog_info(info->prog_id, &prog_info); @@ -502,6 +503,7 @@ static int show_link_close_json(int fd, struct bpf_link_info *info) json_wtr); jsonw_uint_field(json_wtr, "target_obj_id", info->tracing.target_obj_id); jsonw_uint_field(json_wtr, "target_btf_id", info->tracing.target_btf_id); + jsonw_uint_field(json_wtr, "cookie", info->tracing.cookie); break; case BPF_LINK_TYPE_CGROUP: jsonw_lluint_field(json_wtr, "cgroup_id", @@ -879,6 +881,8 @@ static int show_link_close_plain(int fd, struct bpf_link_info *info) case BPF_LINK_TYPE_RAW_TRACEPOINT: printf("\n\ttp '%s' ", (const char *)u64_to_ptr(info->raw_tracepoint.tp_name)); + if (info->raw_tracepoint.cookie) + printf("cookie %llu ", info->raw_tracepoint.cookie); break; case BPF_LINK_TYPE_TRACING: err = get_prog_info(info->prog_id, &prog_info); @@ -897,6 +901,8 @@ static int show_link_close_plain(int fd, struct bpf_link_info *info) printf("\n\ttarget_obj_id %u target_btf_id %u ", info->tracing.target_obj_id, info->tracing.target_btf_id); + if (info->tracing.cookie) + printf("\n\tcookie %llu ", info->tracing.cookie); break; case BPF_LINK_TYPE_CGROUP: printf("\n\tcgroup_id %zu ", (size_t)info->cgroup.cgroup_id); diff --git a/tools/bpf/bpftool/main.c b/tools/bpf/bpftool/main.c index cd5963cb6058..2b7f2bd3a7db 100644 --- a/tools/bpf/bpftool/main.c +++ b/tools/bpf/bpftool/main.c @@ -534,9 +534,9 @@ int main(int argc, char **argv) usage(); if (version_requested) - return do_version(argc, argv); - - ret = cmd_select(commands, argc, argv, do_help); + ret = do_version(argc, argv); + else + ret = cmd_select(commands, argc, argv, do_help); if (json_output) jsonw_destroy(&json_wtr); diff --git a/tools/bpf/bpftool/main.h b/tools/bpf/bpftool/main.h index 9eb764fe4cc8..6db704fda5c0 100644 --- a/tools/bpf/bpftool/main.h +++ b/tools/bpf/bpftool/main.h @@ -15,6 +15,7 @@ #include <bpf/hashmap.h> #include <bpf/libbpf.h> +#include <bpf/bpf.h> #include "json_writer.h" @@ -140,8 +141,10 @@ void get_prog_full_name(const struct bpf_prog_info *prog_info, int prog_fd, int get_fd_type(int fd); const char *get_fd_type_name(enum bpf_obj_type type); char *get_fdinfo(int fd, const char *key); -int open_obj_pinned(const char *path, bool quiet); -int open_obj_pinned_any(const char *path, enum bpf_obj_type exp_type); +int open_obj_pinned(const char *path, bool quiet, + const struct bpf_obj_get_opts *opts); +int open_obj_pinned_any(const char *path, enum bpf_obj_type exp_type, + const struct bpf_obj_get_opts *opts); int mount_bpffs_for_file(const char *file_name); int create_and_mount_bpffs_dir(const char *dir_name); int do_pin_any(int argc, char **argv, int (*get_fd_by_id)(int *, char ***)); @@ -167,10 +170,10 @@ int do_iter(int argc, char **argv) __weak; int parse_u32_arg(int *argc, char ***argv, __u32 *val, const char *what); int prog_parse_fd(int *argc, char ***argv); int prog_parse_fds(int *argc, char ***argv, int **fds); -int map_parse_fd(int *argc, char ***argv); -int map_parse_fds(int *argc, char ***argv, int **fds); +int map_parse_fd(int *argc, char ***argv, __u32 open_flags); +int map_parse_fds(int *argc, char ***argv, int **fds, __u32 open_flags); int map_parse_fd_and_info(int *argc, char ***argv, struct bpf_map_info *info, - __u32 *info_len); + __u32 *info_len, __u32 open_flags); struct bpf_prog_linfo; #if defined(HAVE_LLVM_SUPPORT) || defined(HAVE_LIBBFD_SUPPORT) diff --git a/tools/bpf/bpftool/map.c b/tools/bpf/bpftool/map.c index 81cc668b4b05..c9de44a45778 100644 --- a/tools/bpf/bpftool/map.c +++ b/tools/bpf/bpftool/map.c @@ -337,9 +337,9 @@ static void fill_per_cpu_value(struct bpf_map_info *info, void *value) memcpy(value + i * step, value, info->value_size); } -static int parse_elem(char **argv, struct bpf_map_info *info, - void *key, void *value, __u32 key_size, __u32 value_size, - __u32 *flags, __u32 **value_fd) +static int parse_elem(char **argv, struct bpf_map_info *info, void *key, + void *value, __u32 key_size, __u32 value_size, + __u32 *flags, __u32 **value_fd, __u32 open_flags) { if (!*argv) { if (!key && !value) @@ -362,7 +362,7 @@ static int parse_elem(char **argv, struct bpf_map_info *info, return -1; return parse_elem(argv, info, NULL, value, key_size, value_size, - flags, value_fd); + flags, value_fd, open_flags); } else if (is_prefix(*argv, "value")) { int fd; @@ -388,7 +388,7 @@ static int parse_elem(char **argv, struct bpf_map_info *info, return -1; } - fd = map_parse_fd(&argc, &argv); + fd = map_parse_fd(&argc, &argv, open_flags); if (fd < 0) return -1; @@ -424,7 +424,7 @@ static int parse_elem(char **argv, struct bpf_map_info *info, } return parse_elem(argv, info, key, NULL, key_size, value_size, - flags, NULL); + flags, NULL, open_flags); } else if (is_prefix(*argv, "any") || is_prefix(*argv, "noexist") || is_prefix(*argv, "exist")) { if (!flags) { @@ -440,7 +440,7 @@ static int parse_elem(char **argv, struct bpf_map_info *info, *flags = BPF_EXIST; return parse_elem(argv + 1, info, key, value, key_size, - value_size, NULL, value_fd); + value_size, NULL, value_fd, open_flags); } p_err("expected key or value, got: %s", *argv); @@ -639,7 +639,7 @@ static int do_show_subset(int argc, char **argv) p_err("mem alloc failed"); return -1; } - nb_fds = map_parse_fds(&argc, &argv, &fds); + nb_fds = map_parse_fds(&argc, &argv, &fds, BPF_F_RDONLY); if (nb_fds < 1) goto exit_free; @@ -672,12 +672,15 @@ exit_free: static int do_show(int argc, char **argv) { + LIBBPF_OPTS(bpf_get_fd_by_id_opts, opts); struct bpf_map_info info = {}; __u32 len = sizeof(info); __u32 id = 0; int err; int fd; + opts.open_flags = BPF_F_RDONLY; + if (show_pinned) { map_table = hashmap__new(hash_fn_for_key_as_id, equal_fn_for_key_as_id, NULL); @@ -707,7 +710,7 @@ static int do_show(int argc, char **argv) break; } - fd = bpf_map_get_fd_by_id(id); + fd = bpf_map_get_fd_by_id_opts(id, &opts); if (fd < 0) { if (errno == ENOENT) continue; @@ -909,7 +912,7 @@ static int do_dump(int argc, char **argv) p_err("mem alloc failed"); return -1; } - nb_fds = map_parse_fds(&argc, &argv, &fds); + nb_fds = map_parse_fds(&argc, &argv, &fds, BPF_F_RDONLY); if (nb_fds < 1) goto exit_free; @@ -997,7 +1000,7 @@ static int do_update(int argc, char **argv) if (argc < 2) usage(); - fd = map_parse_fd_and_info(&argc, &argv, &info, &len); + fd = map_parse_fd_and_info(&argc, &argv, &info, &len, 0); if (fd < 0) return -1; @@ -1006,7 +1009,7 @@ static int do_update(int argc, char **argv) goto exit_free; err = parse_elem(argv, &info, key, value, info.key_size, - info.value_size, &flags, &value_fd); + info.value_size, &flags, &value_fd, 0); if (err) goto exit_free; @@ -1076,7 +1079,7 @@ static int do_lookup(int argc, char **argv) if (argc < 2) usage(); - fd = map_parse_fd_and_info(&argc, &argv, &info, &len); + fd = map_parse_fd_and_info(&argc, &argv, &info, &len, BPF_F_RDONLY); if (fd < 0) return -1; @@ -1084,7 +1087,8 @@ static int do_lookup(int argc, char **argv) if (err) goto exit_free; - err = parse_elem(argv, &info, key, NULL, info.key_size, 0, NULL, NULL); + err = parse_elem(argv, &info, key, NULL, info.key_size, 0, NULL, NULL, + BPF_F_RDONLY); if (err) goto exit_free; @@ -1127,7 +1131,7 @@ static int do_getnext(int argc, char **argv) if (argc < 2) usage(); - fd = map_parse_fd_and_info(&argc, &argv, &info, &len); + fd = map_parse_fd_and_info(&argc, &argv, &info, &len, BPF_F_RDONLY); if (fd < 0) return -1; @@ -1140,8 +1144,8 @@ static int do_getnext(int argc, char **argv) } if (argc) { - err = parse_elem(argv, &info, key, NULL, info.key_size, 0, - NULL, NULL); + err = parse_elem(argv, &info, key, NULL, info.key_size, 0, NULL, + NULL, BPF_F_RDONLY); if (err) goto exit_free; } else { @@ -1198,7 +1202,7 @@ static int do_delete(int argc, char **argv) if (argc < 2) usage(); - fd = map_parse_fd_and_info(&argc, &argv, &info, &len); + fd = map_parse_fd_and_info(&argc, &argv, &info, &len, 0); if (fd < 0) return -1; @@ -1209,7 +1213,8 @@ static int do_delete(int argc, char **argv) goto exit_free; } - err = parse_elem(argv, &info, key, NULL, info.key_size, 0, NULL, NULL); + err = parse_elem(argv, &info, key, NULL, info.key_size, 0, NULL, NULL, + 0); if (err) goto exit_free; @@ -1226,11 +1231,16 @@ exit_free: return err; } +static int map_parse_read_only_fd(int *argc, char ***argv) +{ + return map_parse_fd(argc, argv, BPF_F_RDONLY); +} + static int do_pin(int argc, char **argv) { int err; - err = do_pin_any(argc, argv, map_parse_fd); + err = do_pin_any(argc, argv, map_parse_read_only_fd); if (!err && json_output) jsonw_null(json_wtr); return err; @@ -1319,7 +1329,7 @@ offload_dev: if (!REQ_ARGS(2)) usage(); inner_map_fd = map_parse_fd_and_info(&argc, &argv, - &info, &len); + &info, &len, BPF_F_RDONLY); if (inner_map_fd < 0) return -1; attr.inner_map_fd = inner_map_fd; @@ -1368,7 +1378,7 @@ static int do_pop_dequeue(int argc, char **argv) if (argc < 2) usage(); - fd = map_parse_fd_and_info(&argc, &argv, &info, &len); + fd = map_parse_fd_and_info(&argc, &argv, &info, &len, 0); if (fd < 0) return -1; @@ -1407,7 +1417,7 @@ static int do_freeze(int argc, char **argv) if (!REQ_ARGS(2)) return -1; - fd = map_parse_fd(&argc, &argv); + fd = map_parse_fd(&argc, &argv, 0); if (fd < 0) return -1; diff --git a/tools/bpf/bpftool/map_perf_ring.c b/tools/bpf/bpftool/map_perf_ring.c index 552b4ca40c27..bcb767e2d673 100644 --- a/tools/bpf/bpftool/map_perf_ring.c +++ b/tools/bpf/bpftool/map_perf_ring.c @@ -128,7 +128,8 @@ int do_event_pipe(int argc, char **argv) int err, map_fd; map_info_len = sizeof(map_info); - map_fd = map_parse_fd_and_info(&argc, &argv, &map_info, &map_info_len); + map_fd = map_parse_fd_and_info(&argc, &argv, &map_info, &map_info_len, + 0); if (map_fd < 0) return -1; diff --git a/tools/bpf/bpftool/net.c b/tools/bpf/bpftool/net.c index 64f958f437b0..cfc6f944f7c3 100644 --- a/tools/bpf/bpftool/net.c +++ b/tools/bpf/bpftool/net.c @@ -366,17 +366,18 @@ static int dump_link_nlmsg(void *cookie, void *msg, struct nlattr **tb) { struct bpf_netdev_t *netinfo = cookie; struct ifinfomsg *ifinfo = msg; + struct ip_devname_ifindex *tmp; if (netinfo->filter_idx > 0 && netinfo->filter_idx != ifinfo->ifi_index) return 0; if (netinfo->used_len == netinfo->array_len) { - netinfo->devices = realloc(netinfo->devices, - (netinfo->array_len + 16) * - sizeof(struct ip_devname_ifindex)); - if (!netinfo->devices) + tmp = realloc(netinfo->devices, + (netinfo->array_len + 16) * sizeof(struct ip_devname_ifindex)); + if (!tmp) return -ENOMEM; + netinfo->devices = tmp; netinfo->array_len += 16; } netinfo->devices[netinfo->used_len].ifindex = ifinfo->ifi_index; @@ -395,6 +396,7 @@ static int dump_class_qdisc_nlmsg(void *cookie, void *msg, struct nlattr **tb) { struct bpf_tcinfo_t *tcinfo = cookie; struct tcmsg *info = msg; + struct tc_kind_handle *tmp; if (tcinfo->is_qdisc) { /* skip clsact qdisc */ @@ -406,11 +408,12 @@ static int dump_class_qdisc_nlmsg(void *cookie, void *msg, struct nlattr **tb) } if (tcinfo->used_len == tcinfo->array_len) { - tcinfo->handle_array = realloc(tcinfo->handle_array, + tmp = realloc(tcinfo->handle_array, (tcinfo->array_len + 16) * sizeof(struct tc_kind_handle)); - if (!tcinfo->handle_array) + if (!tmp) return -ENOMEM; + tcinfo->handle_array = tmp; tcinfo->array_len += 16; } tcinfo->handle_array[tcinfo->used_len].handle = info->tcm_handle; diff --git a/tools/bpf/bpftool/prog.c b/tools/bpf/bpftool/prog.c index 96eea8a67225..deeaa5c1ed7d 100644 --- a/tools/bpf/bpftool/prog.c +++ b/tools/bpf/bpftool/prog.c @@ -1062,7 +1062,7 @@ static int parse_attach_detach_args(int argc, char **argv, int *progfd, if (!REQ_ARGS(2)) return -EINVAL; - *mapfd = map_parse_fd(&argc, &argv); + *mapfd = map_parse_fd(&argc, &argv, 0); if (*mapfd < 0) return *mapfd; @@ -1608,7 +1608,7 @@ static int load_with_options(int argc, char **argv, bool first_prog_only) } NEXT_ARG(); - fd = map_parse_fd(&argc, &argv); + fd = map_parse_fd(&argc, &argv, 0); if (fd < 0) goto err_free_reuse_maps; diff --git a/tools/bpf/resolve_btfids/Makefile b/tools/bpf/resolve_btfids/Makefile index afbddea3a39c..ce1b556dfa90 100644 --- a/tools/bpf/resolve_btfids/Makefile +++ b/tools/bpf/resolve_btfids/Makefile @@ -17,7 +17,7 @@ endif # Overrides for the prepare step libraries. HOST_OVERRIDES := AR="$(HOSTAR)" CC="$(HOSTCC)" LD="$(HOSTLD)" ARCH="$(HOSTARCH)" \ - CROSS_COMPILE="" EXTRA_CFLAGS="$(HOSTCFLAGS)" + CROSS_COMPILE="" CLANG_CROSS_FLAGS="" EXTRA_CFLAGS="$(HOSTCFLAGS)" RM ?= rm HOSTCC ?= gcc diff --git a/tools/include/linux/bits.h b/tools/include/linux/bits.h index 14fd0ca9a6cd..7ad056219115 100644 --- a/tools/include/linux/bits.h +++ b/tools/include/linux/bits.h @@ -12,6 +12,7 @@ #define BIT_ULL_MASK(nr) (ULL(1) << ((nr) % BITS_PER_LONG_LONG)) #define BIT_ULL_WORD(nr) ((nr) / BITS_PER_LONG_LONG) #define BITS_PER_BYTE 8 +#define BITS_PER_TYPE(type) (sizeof(type) * BITS_PER_BYTE) /* * Create a contiguous bitmask starting at bit position @l and ending at @@ -19,16 +20,68 @@ * GENMASK_ULL(39, 21) gives us the 64bit vector 0x000000ffffe00000. */ #if !defined(__ASSEMBLY__) + +/* + * Missing asm support + * + * GENMASK_U*() and BIT_U*() depend on BITS_PER_TYPE() which relies on sizeof(), + * something not available in asm. Nevertheless, fixed width integers is a C + * concept. Assembly code can rely on the long and long long versions instead. + */ + #include <linux/build_bug.h> #include <linux/compiler.h> +#include <linux/overflow.h> + #define GENMASK_INPUT_CHECK(h, l) BUILD_BUG_ON_ZERO(const_true((l) > (h))) -#else + +/* + * Generate a mask for the specified type @t. Additional checks are made to + * guarantee the value returned fits in that type, relying on + * -Wshift-count-overflow compiler check to detect incompatible arguments. + * For example, all these create build errors or warnings: + * + * - GENMASK(15, 20): wrong argument order + * - GENMASK(72, 15): doesn't fit unsigned long + * - GENMASK_U32(33, 15): doesn't fit in a u32 + */ +#define GENMASK_TYPE(t, h, l) \ + ((t)(GENMASK_INPUT_CHECK(h, l) + \ + (type_max(t) << (l) & \ + type_max(t) >> (BITS_PER_TYPE(t) - 1 - (h))))) + +#define GENMASK_U8(h, l) GENMASK_TYPE(u8, h, l) +#define GENMASK_U16(h, l) GENMASK_TYPE(u16, h, l) +#define GENMASK_U32(h, l) GENMASK_TYPE(u32, h, l) +#define GENMASK_U64(h, l) GENMASK_TYPE(u64, h, l) + +/* + * Fixed-type variants of BIT(), with additional checks like GENMASK_TYPE(). The + * following examples generate compiler warnings due to -Wshift-count-overflow: + * + * - BIT_U8(8) + * - BIT_U32(-1) + * - BIT_U32(40) + */ +#define BIT_INPUT_CHECK(type, nr) \ + BUILD_BUG_ON_ZERO(const_true((nr) >= BITS_PER_TYPE(type))) + +#define BIT_TYPE(type, nr) ((type)(BIT_INPUT_CHECK(type, nr) + BIT_ULL(nr))) + +#define BIT_U8(nr) BIT_TYPE(u8, nr) +#define BIT_U16(nr) BIT_TYPE(u16, nr) +#define BIT_U32(nr) BIT_TYPE(u32, nr) +#define BIT_U64(nr) BIT_TYPE(u64, nr) + +#else /* defined(__ASSEMBLY__) */ + /* * BUILD_BUG_ON_ZERO is not available in h files included from asm files, * disable the input check if that is the case. */ #define GENMASK_INPUT_CHECK(h, l) 0 -#endif + +#endif /* !defined(__ASSEMBLY__) */ #define GENMASK(h, l) \ (GENMASK_INPUT_CHECK(h, l) + __GENMASK(h, l)) diff --git a/tools/include/linux/build_bug.h b/tools/include/linux/build_bug.h index b4898ff085de..ab2aa97bd8ce 100644 --- a/tools/include/linux/build_bug.h +++ b/tools/include/linux/build_bug.h @@ -4,17 +4,17 @@ #include <linux/compiler.h> -#ifdef __CHECKER__ -#define BUILD_BUG_ON_ZERO(e) (0) -#else /* __CHECKER__ */ /* * Force a compilation error if condition is true, but also produce a * result (of value 0 and type int), so the expression can be used * e.g. in a structure initializer (or where-ever else comma expressions * aren't permitted). + * + * Take an error message as an optional second argument. If omitted, + * default to the stringification of the tested expression. */ -#define BUILD_BUG_ON_ZERO(e) ((int)(sizeof(struct { int:(-!!(e)); }))) -#endif /* __CHECKER__ */ +#define BUILD_BUG_ON_ZERO(e, ...) \ + __BUILD_BUG_ON_ZERO_MSG(e, ##__VA_ARGS__, #e " is true") /* Force a compilation error if a constant expression is not a power of 2 */ #define __BUILD_BUG_ON_NOT_POWER_OF_2(n) \ diff --git a/tools/include/linux/compiler.h b/tools/include/linux/compiler.h index d627e66a04a6..33411ca0cc90 100644 --- a/tools/include/linux/compiler.h +++ b/tools/include/linux/compiler.h @@ -244,6 +244,14 @@ static __always_inline void __write_once_size(volatile void *p, void *res, int s __asm__ ("" : "=r" (var) : "0" (var)) #endif +#ifndef __BUILD_BUG_ON_ZERO_MSG +#if defined(__clang__) +#define __BUILD_BUG_ON_ZERO_MSG(e, msg, ...) ((int)(sizeof(struct { int:(-!!(e)); }))) +#else +#define __BUILD_BUG_ON_ZERO_MSG(e, msg, ...) ((int)sizeof(struct {_Static_assert(!(e), msg);})) +#endif +#endif + #endif /* __ASSEMBLY__ */ #endif /* _TOOLS_LINUX_COMPILER_H */ diff --git a/tools/include/uapi/drm/drm.h b/tools/include/uapi/drm/drm.h index 7fba37b94401..e63a71d3c607 100644 --- a/tools/include/uapi/drm/drm.h +++ b/tools/include/uapi/drm/drm.h @@ -905,13 +905,17 @@ struct drm_syncobj_destroy { }; #define DRM_SYNCOBJ_FD_TO_HANDLE_FLAGS_IMPORT_SYNC_FILE (1 << 0) +#define DRM_SYNCOBJ_FD_TO_HANDLE_FLAGS_TIMELINE (1 << 1) #define DRM_SYNCOBJ_HANDLE_TO_FD_FLAGS_EXPORT_SYNC_FILE (1 << 0) +#define DRM_SYNCOBJ_HANDLE_TO_FD_FLAGS_TIMELINE (1 << 1) struct drm_syncobj_handle { __u32 handle; __u32 flags; __s32 fd; __u32 pad; + + __u64 point; }; struct drm_syncobj_transfer { diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index 0b4a2f124d11..719ba230032f 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -1794,6 +1794,13 @@ union bpf_attr { }; __u64 expected_revision; } netkit; + struct { + union { + __u32 relative_fd; + __u32 relative_id; + }; + __u64 expected_revision; + } cgroup; }; } link_create; @@ -2403,7 +2410,7 @@ union bpf_attr { * into it. An example is available in file * *samples/bpf/trace_output_user.c* in the Linux kernel source * tree (the eBPF program counterpart is in - * *samples/bpf/trace_output_kern.c*). + * *samples/bpf/trace_output.bpf.c*). * * **bpf_perf_event_output**\ () achieves better performance * than **bpf_trace_printk**\ () for sharing data with user @@ -6653,11 +6660,15 @@ struct bpf_link_info { struct { __aligned_u64 tp_name; /* in/out: tp_name buffer ptr */ __u32 tp_name_len; /* in/out: tp_name buffer len */ + __u32 :32; + __u64 cookie; } raw_tracepoint; struct { __u32 attach_type; __u32 target_obj_id; /* prog_id for PROG_EXT, otherwise btf object id */ __u32 target_btf_id; /* BTF type id inside the object */ + __u32 :32; + __u64 cookie; } tracing; struct { __u64 cgroup_id; diff --git a/tools/include/uapi/linux/fscrypt.h b/tools/include/uapi/linux/fscrypt.h index 7a8f4c290187..3aff99f2696a 100644 --- a/tools/include/uapi/linux/fscrypt.h +++ b/tools/include/uapi/linux/fscrypt.h @@ -119,7 +119,7 @@ struct fscrypt_key_specifier { */ struct fscrypt_provisioning_key_payload { __u32 type; - __u32 __reserved; + __u32 flags; __u8 raw[]; }; @@ -128,7 +128,9 @@ struct fscrypt_add_key_arg { struct fscrypt_key_specifier key_spec; __u32 raw_size; __u32 key_id; - __u32 __reserved[8]; +#define FSCRYPT_ADD_KEY_FLAG_HW_WRAPPED 0x00000001 + __u32 flags; + __u32 __reserved[7]; __u8 raw[]; }; diff --git a/tools/include/uapi/linux/kvm.h b/tools/include/uapi/linux/kvm.h index b6ae8ad8934b..d00b85cb168c 100644 --- a/tools/include/uapi/linux/kvm.h +++ b/tools/include/uapi/linux/kvm.h @@ -375,6 +375,7 @@ struct kvm_run { #define KVM_SYSTEM_EVENT_WAKEUP 4 #define KVM_SYSTEM_EVENT_SUSPEND 5 #define KVM_SYSTEM_EVENT_SEV_TERM 6 +#define KVM_SYSTEM_EVENT_TDX_FATAL 7 __u32 type; __u32 ndata; union { @@ -930,6 +931,9 @@ struct kvm_enable_cap { #define KVM_CAP_X86_APIC_BUS_CYCLES_NS 237 #define KVM_CAP_X86_GUEST_MODE 238 #define KVM_CAP_ARM_WRITABLE_IMP_ID_REGS 239 +#define KVM_CAP_ARM_EL2 240 +#define KVM_CAP_ARM_EL2_E2H0 241 +#define KVM_CAP_RISCV_MP_STATE_RESET 242 struct kvm_irq_routing_irqchip { __u32 irqchip; diff --git a/tools/include/uapi/linux/stat.h b/tools/include/uapi/linux/stat.h index f78ee3670dd5..1686861aae20 100644 --- a/tools/include/uapi/linux/stat.h +++ b/tools/include/uapi/linux/stat.h @@ -182,8 +182,12 @@ struct statx { /* File offset alignment for direct I/O reads */ __u32 stx_dio_read_offset_align; - /* 0xb8 */ - __u64 __spare3[9]; /* Spare space for future expansion */ + /* Optimised max atomic write unit in bytes */ + __u32 stx_atomic_write_unit_max_opt; + __u32 __spare2[1]; + + /* 0xc0 */ + __u64 __spare3[8]; /* Spare space for future expansion */ /* 0x100 */ }; diff --git a/tools/lib/bpf/bpf.c b/tools/lib/bpf/bpf.c index a9c3e33d0f8a..6eb421ccf91b 100644 --- a/tools/lib/bpf/bpf.c +++ b/tools/lib/bpf/bpf.c @@ -837,6 +837,50 @@ int bpf_link_create(int prog_fd, int target_fd, if (!OPTS_ZEROED(opts, netkit)) return libbpf_err(-EINVAL); break; + case BPF_CGROUP_INET_INGRESS: + case BPF_CGROUP_INET_EGRESS: + case BPF_CGROUP_INET_SOCK_CREATE: + case BPF_CGROUP_INET_SOCK_RELEASE: + case BPF_CGROUP_INET4_BIND: + case BPF_CGROUP_INET6_BIND: + case BPF_CGROUP_INET4_POST_BIND: + case BPF_CGROUP_INET6_POST_BIND: + case BPF_CGROUP_INET4_CONNECT: + case BPF_CGROUP_INET6_CONNECT: + case BPF_CGROUP_UNIX_CONNECT: + case BPF_CGROUP_INET4_GETPEERNAME: + case BPF_CGROUP_INET6_GETPEERNAME: + case BPF_CGROUP_UNIX_GETPEERNAME: + case BPF_CGROUP_INET4_GETSOCKNAME: + case BPF_CGROUP_INET6_GETSOCKNAME: + case BPF_CGROUP_UNIX_GETSOCKNAME: + case BPF_CGROUP_UDP4_SENDMSG: + case BPF_CGROUP_UDP6_SENDMSG: + case BPF_CGROUP_UNIX_SENDMSG: + case BPF_CGROUP_UDP4_RECVMSG: + case BPF_CGROUP_UDP6_RECVMSG: + case BPF_CGROUP_UNIX_RECVMSG: + case BPF_CGROUP_SOCK_OPS: + case BPF_CGROUP_DEVICE: + case BPF_CGROUP_SYSCTL: + case BPF_CGROUP_GETSOCKOPT: + case BPF_CGROUP_SETSOCKOPT: + case BPF_LSM_CGROUP: + relative_fd = OPTS_GET(opts, cgroup.relative_fd, 0); + relative_id = OPTS_GET(opts, cgroup.relative_id, 0); + if (relative_fd && relative_id) + return libbpf_err(-EINVAL); + if (relative_id) { + attr.link_create.cgroup.relative_id = relative_id; + attr.link_create.flags |= BPF_F_ID; + } else { + attr.link_create.cgroup.relative_fd = relative_fd; + } + attr.link_create.cgroup.expected_revision = + OPTS_GET(opts, cgroup.expected_revision, 0); + if (!OPTS_ZEROED(opts, cgroup)) + return libbpf_err(-EINVAL); + break; default: if (!OPTS_ZEROED(opts, flags)) return libbpf_err(-EINVAL); diff --git a/tools/lib/bpf/bpf.h b/tools/lib/bpf/bpf.h index 777627d33d25..1342564214c8 100644 --- a/tools/lib/bpf/bpf.h +++ b/tools/lib/bpf/bpf.h @@ -438,6 +438,11 @@ struct bpf_link_create_opts { __u32 relative_id; __u64 expected_revision; } netkit; + struct { + __u32 relative_fd; + __u32 relative_id; + __u64 expected_revision; + } cgroup; }; size_t :0; }; diff --git a/tools/lib/bpf/btf.c b/tools/lib/bpf/btf.c index f1d495dc66bb..37682908cb0f 100644 --- a/tools/lib/bpf/btf.c +++ b/tools/lib/bpf/btf.c @@ -1384,12 +1384,12 @@ static struct btf *btf_parse_raw_mmap(const char *path, struct btf *base_btf) fd = open(path, O_RDONLY); if (fd < 0) - return libbpf_err_ptr(-errno); + return ERR_PTR(-errno); if (fstat(fd, &st) < 0) { err = -errno; close(fd); - return libbpf_err_ptr(err); + return ERR_PTR(err); } data = mmap(NULL, st.st_size, PROT_READ, MAP_PRIVATE, fd, 0); @@ -1397,7 +1397,7 @@ static struct btf *btf_parse_raw_mmap(const char *path, struct btf *base_btf) close(fd); if (data == MAP_FAILED) - return libbpf_err_ptr(err); + return ERR_PTR(err); btf = btf_new(data, st.st_size, base_btf, true); if (IS_ERR(btf)) diff --git a/tools/lib/bpf/btf.h b/tools/lib/bpf/btf.h index 4392451d634b..ccfd905f03df 100644 --- a/tools/lib/bpf/btf.h +++ b/tools/lib/bpf/btf.h @@ -326,9 +326,10 @@ struct btf_dump_type_data_opts { bool compact; /* no newlines/indentation */ bool skip_names; /* skip member/type names */ bool emit_zeroes; /* show 0-valued fields */ + bool emit_strings; /* print char arrays as strings */ size_t :0; }; -#define btf_dump_type_data_opts__last_field emit_zeroes +#define btf_dump_type_data_opts__last_field emit_strings LIBBPF_API int btf_dump__dump_type_data(struct btf_dump *d, __u32 id, diff --git a/tools/lib/bpf/btf_dump.c b/tools/lib/bpf/btf_dump.c index 460c3e57fadb..f09f25eccf3c 100644 --- a/tools/lib/bpf/btf_dump.c +++ b/tools/lib/bpf/btf_dump.c @@ -68,6 +68,7 @@ struct btf_dump_data { bool compact; bool skip_names; bool emit_zeroes; + bool emit_strings; __u8 indent_lvl; /* base indent level */ char indent_str[BTF_DATA_INDENT_STR_LEN]; /* below are used during iteration */ @@ -226,6 +227,9 @@ static void btf_dump_free_names(struct hashmap *map) size_t bkt; struct hashmap_entry *cur; + if (!map) + return; + hashmap__for_each_entry(map, cur, bkt) free((void *)cur->pkey); @@ -2028,6 +2032,52 @@ static int btf_dump_var_data(struct btf_dump *d, return btf_dump_dump_type_data(d, NULL, t, type_id, data, 0, 0); } +static int btf_dump_string_data(struct btf_dump *d, + const struct btf_type *t, + __u32 id, + const void *data) +{ + const struct btf_array *array = btf_array(t); + const char *chars = data; + __u32 i; + + /* Make sure it is a NUL-terminated string. */ + for (i = 0; i < array->nelems; i++) { + if ((void *)(chars + i) >= d->typed_dump->data_end) + return -E2BIG; + if (chars[i] == '\0') + break; + } + if (i == array->nelems) { + /* The caller will print this as a regular array. */ + return -EINVAL; + } + + btf_dump_data_pfx(d); + btf_dump_printf(d, "\""); + + for (i = 0; i < array->nelems; i++) { + char c = chars[i]; + + if (c == '\0') { + /* + * When printing character arrays as strings, NUL bytes + * are always treated as string terminators; they are + * never printed. + */ + break; + } + if (isprint(c)) + btf_dump_printf(d, "%c", c); + else + btf_dump_printf(d, "\\x%02x", (__u8)c); + } + + btf_dump_printf(d, "\""); + + return 0; +} + static int btf_dump_array_data(struct btf_dump *d, const struct btf_type *t, __u32 id, @@ -2055,8 +2105,13 @@ static int btf_dump_array_data(struct btf_dump *d, * char arrays, so if size is 1 and element is * printable as a char, we'll do that. */ - if (elem_size == 1) + if (elem_size == 1) { + if (d->typed_dump->emit_strings && + btf_dump_string_data(d, t, id, data) == 0) { + return 0; + } d->typed_dump->is_array_char = true; + } } /* note that we increment depth before calling btf_dump_print() below; @@ -2544,6 +2599,7 @@ int btf_dump__dump_type_data(struct btf_dump *d, __u32 id, d->typed_dump->compact = OPTS_GET(opts, compact, false); d->typed_dump->skip_names = OPTS_GET(opts, skip_names, false); d->typed_dump->emit_zeroes = OPTS_GET(opts, emit_zeroes, false); + d->typed_dump->emit_strings = OPTS_GET(opts, emit_strings, false); ret = btf_dump_dump_type_data(d, NULL, t, id, data, 0, 0); diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index e9c641a2fb20..aee36402f0a3 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -597,7 +597,7 @@ struct extern_desc { int sym_idx; int btf_id; int sec_btf_id; - const char *name; + char *name; char *essent_name; bool is_set; bool is_weak; @@ -4259,7 +4259,9 @@ static int bpf_object__collect_externs(struct bpf_object *obj) return ext->btf_id; } t = btf__type_by_id(obj->btf, ext->btf_id); - ext->name = btf__name_by_offset(obj->btf, t->name_off); + ext->name = strdup(btf__name_by_offset(obj->btf, t->name_off)); + if (!ext->name) + return -ENOMEM; ext->sym_idx = i; ext->is_weak = ELF64_ST_BIND(sym->st_info) == STB_WEAK; @@ -9138,8 +9140,10 @@ void bpf_object__close(struct bpf_object *obj) zfree(&obj->btf_custom_path); zfree(&obj->kconfig); - for (i = 0; i < obj->nr_extern; i++) + for (i = 0; i < obj->nr_extern; i++) { + zfree(&obj->externs[i].name); zfree(&obj->externs[i].essent_name); + } zfree(&obj->externs); obj->nr_extern = 0; @@ -12838,6 +12842,34 @@ struct bpf_link *bpf_program__attach_xdp(const struct bpf_program *prog, int ifi } struct bpf_link * +bpf_program__attach_cgroup_opts(const struct bpf_program *prog, int cgroup_fd, + const struct bpf_cgroup_opts *opts) +{ + LIBBPF_OPTS(bpf_link_create_opts, link_create_opts); + __u32 relative_id; + int relative_fd; + + if (!OPTS_VALID(opts, bpf_cgroup_opts)) + return libbpf_err_ptr(-EINVAL); + + relative_id = OPTS_GET(opts, relative_id, 0); + relative_fd = OPTS_GET(opts, relative_fd, 0); + + if (relative_fd && relative_id) { + pr_warn("prog '%s': relative_fd and relative_id cannot be set at the same time\n", + prog->name); + return libbpf_err_ptr(-EINVAL); + } + + link_create_opts.cgroup.expected_revision = OPTS_GET(opts, expected_revision, 0); + link_create_opts.cgroup.relative_fd = relative_fd; + link_create_opts.cgroup.relative_id = relative_id; + link_create_opts.flags = OPTS_GET(opts, flags, 0); + + return bpf_program_attach_fd(prog, cgroup_fd, "cgroup", &link_create_opts); +} + +struct bpf_link * bpf_program__attach_tcx(const struct bpf_program *prog, int ifindex, const struct bpf_tcx_opts *opts) { diff --git a/tools/lib/bpf/libbpf.h b/tools/lib/bpf/libbpf.h index 1137e7d2e1b5..d1cf813a057b 100644 --- a/tools/lib/bpf/libbpf.h +++ b/tools/lib/bpf/libbpf.h @@ -877,6 +877,21 @@ LIBBPF_API struct bpf_link * bpf_program__attach_netkit(const struct bpf_program *prog, int ifindex, const struct bpf_netkit_opts *opts); +struct bpf_cgroup_opts { + /* size of this struct, for forward/backward compatibility */ + size_t sz; + __u32 flags; + __u32 relative_fd; + __u32 relative_id; + __u64 expected_revision; + size_t :0; +}; +#define bpf_cgroup_opts__last_field expected_revision + +LIBBPF_API struct bpf_link * +bpf_program__attach_cgroup_opts(const struct bpf_program *prog, int cgroup_fd, + const struct bpf_cgroup_opts *opts); + struct bpf_map; LIBBPF_API struct bpf_link *bpf_map__attach_struct_ops(const struct bpf_map *map); diff --git a/tools/lib/bpf/libbpf.map b/tools/lib/bpf/libbpf.map index 1205f9a4fe04..c7fc0bde5648 100644 --- a/tools/lib/bpf/libbpf.map +++ b/tools/lib/bpf/libbpf.map @@ -437,6 +437,7 @@ LIBBPF_1.6.0 { bpf_linker__add_fd; bpf_linker__new_fd; bpf_object__prepare; + bpf_program__attach_cgroup_opts; bpf_program__func_info; bpf_program__func_info_cnt; bpf_program__line_info; diff --git a/tools/lib/bpf/usdt.c b/tools/lib/bpf/usdt.c index 4e4a52742b01..3373b9d45ac4 100644 --- a/tools/lib/bpf/usdt.c +++ b/tools/lib/bpf/usdt.c @@ -59,7 +59,7 @@ * * STAP_PROBE3(my_usdt_provider, my_usdt_probe_name, 123, x, &y); * - * USDT is identified by it's <provider-name>:<probe-name> pair of names. Each + * USDT is identified by its <provider-name>:<probe-name> pair of names. Each * individual USDT has a fixed number of arguments (3 in the above example) * and specifies values of each argument as if it was a function call. * @@ -81,7 +81,7 @@ * NOP instruction that kernel can replace with an interrupt instruction to * trigger instrumentation code (BPF program for all that we care about). * - * Semaphore above is and optional feature. It records an address of a 2-byte + * Semaphore above is an optional feature. It records an address of a 2-byte * refcount variable (normally in '.probes' ELF section) used for signaling if * there is anything that is attached to USDT. This is useful for user * applications if, for example, they need to prepare some arguments that are @@ -121,7 +121,7 @@ * a uprobe BPF program (which for kernel, at least currently, is just a kprobe * program, so BPF_PROG_TYPE_KPROBE program type). With the only difference * that uprobe is usually attached at the function entry, while USDT will - * normally will be somewhere inside the function. But it should always be + * normally be somewhere inside the function. But it should always be * pointing to NOP instruction, which makes such uprobes the fastest uprobe * kind. * @@ -151,7 +151,7 @@ * libbpf sets to spec ID during attach time, or, if kernel is too old to * support BPF cookie, through IP-to-spec-ID map that libbpf maintains in such * case. The latter means that some modes of operation can't be supported - * without BPF cookie. Such mode is attaching to shared library "generically", + * without BPF cookie. Such a mode is attaching to shared library "generically", * without specifying target process. In such case, it's impossible to * calculate absolute IP addresses for IP-to-spec-ID map, and thus such mode * is not supported without BPF cookie support. @@ -185,7 +185,7 @@ * as even if USDT spec string is the same, USDT cookie value can be * different. It was deemed excessive to try to deduplicate across independent * USDT attachments by taking into account USDT spec string *and* USDT cookie - * value, which would complicated spec ID accounting significantly for little + * value, which would complicate spec ID accounting significantly for little * gain. */ diff --git a/tools/net/ynl/pyynl/lib/ynl.py b/tools/net/ynl/pyynl/lib/ynl.py index 55b59f6c79b8..61deb5923067 100644 --- a/tools/net/ynl/pyynl/lib/ynl.py +++ b/tools/net/ynl/pyynl/lib/ynl.py @@ -231,14 +231,7 @@ class NlMsg: self.extack['unknown'].append(extack) if attr_space: - # We don't have the ability to parse nests yet, so only do global - if 'miss-type' in self.extack and 'miss-nest' not in self.extack: - miss_type = self.extack['miss-type'] - if miss_type in attr_space.attrs_by_val: - spec = attr_space.attrs_by_val[miss_type] - self.extack['miss-type'] = spec['name'] - if 'doc' in spec: - self.extack['miss-type-doc'] = spec['doc'] + self.annotate_extack(attr_space) def _decode_policy(self, raw): policy = {} @@ -264,6 +257,18 @@ class NlMsg: policy['mask'] = attr.as_scalar('u64') return policy + def annotate_extack(self, attr_space): + """ Make extack more human friendly with attribute information """ + + # We don't have the ability to parse nests yet, so only do global + if 'miss-type' in self.extack and 'miss-nest' not in self.extack: + miss_type = self.extack['miss-type'] + if miss_type in attr_space.attrs_by_val: + spec = attr_space.attrs_by_val[miss_type] + self.extack['miss-type'] = spec['name'] + if 'doc' in spec: + self.extack['miss-type-doc'] = spec['doc'] + def cmd(self): return self.nl_type @@ -277,12 +282,12 @@ class NlMsg: class NlMsgs: - def __init__(self, data, attr_space=None): + def __init__(self, data): self.msgs = [] offset = 0 while offset < len(data): - msg = NlMsg(data, offset, attr_space=attr_space) + msg = NlMsg(data, offset) offset += msg.nl_len self.msgs.append(msg) @@ -1034,12 +1039,13 @@ class YnlFamily(SpecFamily): op_rsp = [] while not done: reply = self.sock.recv(self._recv_size) - nms = NlMsgs(reply, attr_space=op.attr_set) + nms = NlMsgs(reply) self._recv_dbg_print(reply, nms) for nl_msg in nms: if nl_msg.nl_seq in reqs_by_seq: (op, vals, req_msg, req_flags) = reqs_by_seq[nl_msg.nl_seq] if nl_msg.extack: + nl_msg.annotate_extack(op.attr_set) self._decode_extack(req_msg, op, nl_msg.extack, vals) else: op = None diff --git a/tools/perf/Documentation/perf-amd-ibs.txt b/tools/perf/Documentation/perf-amd-ibs.txt index 55f80beae037..548549935760 100644 --- a/tools/perf/Documentation/perf-amd-ibs.txt +++ b/tools/perf/Documentation/perf-amd-ibs.txt @@ -171,23 +171,48 @@ Below is a simple example of the perf mem tool. # perf mem report A normal perf mem report output will provide detailed memory access profile. -However, it can also be aggregated based on output fields. For example: - - # perf mem report -F mem,sample,snoop - Samples: 3M of event 'ibs_op//', Event count (approx.): 23524876 - Memory access Samples Snoop - N/A 1903343 N/A - L1 hit 1056754 N/A - L2 hit 75231 N/A - L3 hit 9496 HitM - L3 hit 2270 N/A - RAM hit 8710 N/A - Remote node, same socket RAM hit 3241 N/A - Remote core, same node Any cache hit 1572 HitM - Remote core, same node Any cache hit 514 N/A - Remote node, same socket Any cache hit 1216 HitM - Remote node, same socket Any cache hit 350 N/A - Uncached hit 18 N/A +New output fields will show related access info together. For example: + + # perf mem report -F overhead,cache,snoop,comm + ... + # Samples: 92K of event 'ibs_op//' + # Total weight : 531104 + # + # ---------- Cache ----------- --- Snoop ---- + # Overhead L1 L2 L1-buf Other HitM Other Command + # ........ ............................ .............. .......... + # + 76.07% 5.8% 35.7% 0.0% 34.6% 23.3% 52.8% cc1 + 5.79% 0.2% 0.0% 0.0% 5.6% 0.1% 5.7% make + 5.78% 0.1% 4.4% 0.0% 1.2% 0.5% 5.3% gcc + 5.33% 0.3% 3.9% 0.0% 1.1% 0.2% 5.2% as + 5.00% 0.1% 3.8% 0.0% 1.0% 0.3% 4.7% sh + 1.56% 0.1% 0.1% 0.0% 1.4% 0.6% 0.9% ld + 0.28% 0.1% 0.0% 0.0% 0.2% 0.1% 0.2% pkg-config + 0.09% 0.0% 0.0% 0.0% 0.1% 0.0% 0.1% git + 0.03% 0.0% 0.0% 0.0% 0.0% 0.0% 0.0% rm + ... + +Also, it can be aggregated based on various memory access info using the +sort keys. For example: + + # perf mem report -s mem,snoop + ... + # Samples: 92K of event 'ibs_op//' + # Total weight : 531104 + # Sort order : mem,snoop + # + # Overhead Samples Memory access Snoop + # ........ ............ ....................................... ............ + # + 47.99% 1509 L2 hit N/A + 25.08% 338 core, same node Any cache hit HitM + 10.24% 54374 N/A N/A + 6.77% 35938 L1 hit N/A + 6.39% 101 core, same node Any cache hit N/A + 3.50% 69 RAM hit N/A + 0.03% 158 LFB/MAB hit N/A + 0.00% 2 Uncached hit N/A Please refer to their man page for more detail. diff --git a/tools/perf/Documentation/perf-mem.txt b/tools/perf/Documentation/perf-mem.txt index 965e73d37772..4d164836d094 100644 --- a/tools/perf/Documentation/perf-mem.txt +++ b/tools/perf/Documentation/perf-mem.txt @@ -119,6 +119,22 @@ REPORT OPTIONS And the default sort keys are changed to local_weight, mem, sym, dso, symbol_daddr, dso_daddr, snoop, tlb, locked, blocked, local_ins_lat. +-F:: +--fields=:: + Specify output field - multiple keys can be specified in CSV format. + Please see linkperf:perf-report[1] for details. + + In addition to the default fields, 'perf mem report' will provide the + following fields to break down sample periods. + + - op: operation in the sample instruction (load, store, prefetch, ...) + - cache: location in CPU cache (L1, L2, ...) where the sample hit + - mem: location in memory or other places the sample hit + - dtlb: location in Data TLB (L1, L2) where the sample hit + - snoop: snoop result for the sampled data access + + Please take a look at the OUTPUT FIELD SELECTION section for caveats. + -T:: --type-profile:: Show data-type profile result instead of code symbols. This requires @@ -156,6 +172,40 @@ but one sample with weight 180 and the other with weight 20: 90% [k] memcpy 10% [.] strcmp +OUTPUT FIELD SELECTION +---------------------- +"perf mem report" adds a number of new output fields specific to data source +information in the sample. Some of them have the same name with the existing +sort keys ("mem" and "snoop"). So unlike other fields and sort keys, they'll +behave differently when it's used by -F/--fields or -s/--sort. + +Using those two as output fields will aggregate samples altogether and show +breakdown. + + $ perf mem report -F mem,snoop + ... + # ------ Memory ------- --- Snoop ---- + # RAM Uncach Other HitM Other + # ..................... .............. + # + 3.5% 0.0% 96.5% 25.1% 74.9% + +But using the same name for sort keys will aggregate samples for each type +separately. + + $ perf mem report -s mem,snoop + # Overhead Samples Memory access Snoop + # ........ ............ ....................................... ............ + # + 47.99% 1509 L2 hit N/A + 25.08% 338 core, same node Any cache hit HitM + 10.24% 54374 N/A N/A + 6.77% 35938 L1 hit N/A + 6.39% 101 core, same node Any cache hit N/A + 3.50% 69 RAM hit N/A + 0.03% 158 LFB/MAB hit N/A + 0.00% 2 Uncached hit N/A + SEE ALSO -------- linkperf:perf-record[1], linkperf:perf-report[1], linkperf:perf-arm-spe[1] diff --git a/tools/perf/bench/futex-hash.c b/tools/perf/bench/futex-hash.c index fdf133c9520f..d2d6d7f3ea33 100644 --- a/tools/perf/bench/futex-hash.c +++ b/tools/perf/bench/futex-hash.c @@ -18,7 +18,6 @@ #include <stdlib.h> #include <linux/compiler.h> #include <linux/kernel.h> -#include <linux/prctl.h> #include <linux/zalloc.h> #include <sys/time.h> #include <sys/mman.h> diff --git a/tools/perf/bench/futex.c b/tools/perf/bench/futex.c index 26382e4d8d4c..4c4fee107e59 100644 --- a/tools/perf/bench/futex.c +++ b/tools/perf/bench/futex.c @@ -2,11 +2,18 @@ #include <err.h> #include <stdio.h> #include <stdlib.h> -#include <linux/prctl.h> #include <sys/prctl.h> #include "futex.h" +#ifndef PR_FUTEX_HASH +#define PR_FUTEX_HASH 78 +# define PR_FUTEX_HASH_SET_SLOTS 1 +# define FH_FLAG_IMMUTABLE (1ULL << 0) +# define PR_FUTEX_HASH_GET_SLOTS 2 +# define PR_FUTEX_HASH_GET_IMMUTABLE 3 +#endif // PR_FUTEX_HASH + void futex_set_nbuckets_param(struct bench_futex_parameters *params) { unsigned long flags; diff --git a/tools/perf/check-headers.sh b/tools/perf/check-headers.sh index e9fab20e9330..8085e4d1d8af 100755 --- a/tools/perf/check-headers.sh +++ b/tools/perf/check-headers.sh @@ -186,7 +186,7 @@ done # diff with extra ignore lines check arch/x86/lib/memcpy_64.S '-I "^EXPORT_SYMBOL" -I "^#include <asm/export.h>" -I"^SYM_FUNC_START\(_LOCAL\)*(memcpy_\(erms\|orig\))" -I"^#include <linux/cfi_types.h>"' check arch/x86/lib/memset_64.S '-I "^EXPORT_SYMBOL" -I "^#include <asm/export.h>" -I"^SYM_FUNC_START\(_LOCAL\)*(memset_\(erms\|orig\))"' -check arch/x86/include/asm/amd/ibs.h '-I "^#include [<\"]\(asm/\)*msr-index.h"' +check arch/x86/include/asm/amd/ibs.h '-I "^#include .*/msr-index.h"' check arch/arm64/include/asm/cputype.h '-I "^#include [<\"]\(asm/\)*sysreg.h"' check include/linux/unaligned.h '-I "^#include <linux/unaligned/packed_struct.h>" -I "^#include <asm/byteorder.h>" -I "^#pragma GCC diagnostic"' check include/uapi/asm-generic/mman.h '-I "^#include <\(uapi/\)*asm-generic/mman-common\(-tools\)*.h>"' diff --git a/tools/perf/tests/shell/stat+event_uniquifying.sh b/tools/perf/tests/shell/stat+event_uniquifying.sh index 5ec35c52b7d9..bf54bd6c3e2e 100755 --- a/tools/perf/tests/shell/stat+event_uniquifying.sh +++ b/tools/perf/tests/shell/stat+event_uniquifying.sh @@ -9,7 +9,8 @@ perf_tool=perf err=0 test_event_uniquifying() { - # We use `clockticks` to verify the uniquify behavior. + # We use `clockticks` in `uncore_imc` to verify the uniquify behavior. + pmu="uncore_imc" event="clockticks" # If the `-A` option is added, the event should be uniquified. @@ -43,11 +44,18 @@ test_event_uniquifying() { echo "stat event uniquifying test" uniquified_event_array=() + # Skip if the machine does not have `uncore_imc` device. + if ! ${perf_tool} list pmu | grep -q ${pmu}; then + echo "Target does not support PMU ${pmu} [Skipped]" + err=2 + return + fi + # Check how many uniquified events. while IFS= read -r line; do uniquified_event=$(echo "$line" | awk '{print $1}') uniquified_event_array+=("${uniquified_event}") - done < <(${perf_tool} list -v ${event} | grep "\[Kernel PMU event\]") + done < <(${perf_tool} list -v ${event} | grep ${pmu}) perf_command="${perf_tool} stat -e $event -A -o ${stat_output} -- true" $perf_command diff --git a/tools/perf/tests/tests-scripts.c b/tools/perf/tests/tests-scripts.c index 1d5759d08141..3a2a8438f9af 100644 --- a/tools/perf/tests/tests-scripts.c +++ b/tools/perf/tests/tests-scripts.c @@ -260,6 +260,7 @@ static void append_scripts_in_dir(int dir_fd, continue; /* Skip scripts that have a separate driver. */ fd = openat(dir_fd, ent->d_name, O_PATH); append_scripts_in_dir(fd, result, result_sz); + close(fd); } for (i = 0; i < n_dirs; i++) /* Clean up */ zfree(&entlist[i]); diff --git a/tools/perf/trace/beauty/include/linux/socket.h b/tools/perf/trace/beauty/include/linux/socket.h index c3322eb3d686..3b262487ec06 100644 --- a/tools/perf/trace/beauty/include/linux/socket.h +++ b/tools/perf/trace/beauty/include/linux/socket.h @@ -168,7 +168,7 @@ static inline struct cmsghdr * cmsg_nxthdr (struct msghdr *__msg, struct cmsghdr return __cmsg_nxthdr(__msg->msg_control, __msg->msg_controllen, __cmsg); } -static inline size_t msg_data_left(struct msghdr *msg) +static inline size_t msg_data_left(const struct msghdr *msg) { return iov_iter_count(&msg->msg_iter); } diff --git a/tools/perf/trace/beauty/include/uapi/linux/fs.h b/tools/perf/trace/beauty/include/uapi/linux/fs.h index e762e1af650c..0098b0ce8ccb 100644 --- a/tools/perf/trace/beauty/include/uapi/linux/fs.h +++ b/tools/perf/trace/beauty/include/uapi/linux/fs.h @@ -361,6 +361,7 @@ typedef int __bitwise __kernel_rwf_t; #define PAGE_IS_PFNZERO (1 << 5) #define PAGE_IS_HUGE (1 << 6) #define PAGE_IS_SOFT_DIRTY (1 << 7) +#define PAGE_IS_GUARD (1 << 8) /* * struct page_region - Page region with flags diff --git a/tools/perf/trace/beauty/include/uapi/linux/prctl.h b/tools/perf/trace/beauty/include/uapi/linux/prctl.h index 15c18ef4eb11..43dec6eed559 100644 --- a/tools/perf/trace/beauty/include/uapi/linux/prctl.h +++ b/tools/perf/trace/beauty/include/uapi/linux/prctl.h @@ -364,4 +364,11 @@ struct prctl_mm_map { # define PR_TIMER_CREATE_RESTORE_IDS_ON 1 # define PR_TIMER_CREATE_RESTORE_IDS_GET 2 +/* FUTEX hash management */ +#define PR_FUTEX_HASH 78 +# define PR_FUTEX_HASH_SET_SLOTS 1 +# define FH_FLAG_IMMUTABLE (1ULL << 0) +# define PR_FUTEX_HASH_GET_SLOTS 2 +# define PR_FUTEX_HASH_GET_IMMUTABLE 3 + #endif /* _LINUX_PRCTL_H */ diff --git a/tools/perf/trace/beauty/include/uapi/linux/stat.h b/tools/perf/trace/beauty/include/uapi/linux/stat.h index f78ee3670dd5..1686861aae20 100644 --- a/tools/perf/trace/beauty/include/uapi/linux/stat.h +++ b/tools/perf/trace/beauty/include/uapi/linux/stat.h @@ -182,8 +182,12 @@ struct statx { /* File offset alignment for direct I/O reads */ __u32 stx_dio_read_offset_align; - /* 0xb8 */ - __u64 __spare3[9]; /* Spare space for future expansion */ + /* Optimised max atomic write unit in bytes */ + __u32 stx_atomic_write_unit_max_opt; + __u32 __spare2[1]; + + /* 0xc0 */ + __u64 __spare3[8]; /* Spare space for future expansion */ /* 0x100 */ }; diff --git a/tools/perf/util/include/linux/linkage.h b/tools/perf/util/include/linux/linkage.h index 178b00205fe6..89979ca23c3f 100644 --- a/tools/perf/util/include/linux/linkage.h +++ b/tools/perf/util/include/linux/linkage.h @@ -132,4 +132,8 @@ SYM_TYPED_START(name, SYM_L_GLOBAL, SYM_A_ALIGN) #endif +#ifndef SYM_PIC_ALIAS +#define SYM_PIC_ALIAS(sym) SYM_ALIAS(__pi_ ## sym, sym, SYM_T_FUNC, SYM_L_GLOBAL) +#endif + #endif /* PERF_LINUX_LINKAGE_H_ */ diff --git a/tools/perf/util/print-events.c b/tools/perf/util/print-events.c index a786cbfb0ff5..83aaf7cda635 100644 --- a/tools/perf/util/print-events.c +++ b/tools/perf/util/print-events.c @@ -268,6 +268,7 @@ bool is_event_supported(u8 type, u64 config) ret = evsel__open(evsel, NULL, tmap) >= 0; } + evsel__close(evsel); evsel__delete(evsel); } diff --git a/tools/power/cpupower/Makefile b/tools/power/cpupower/Makefile index be8dfac14076..c43db1c41205 100644 --- a/tools/power/cpupower/Makefile +++ b/tools/power/cpupower/Makefile @@ -73,6 +73,7 @@ sbindir ?= /usr/sbin mandir ?= /usr/man libdir ?= /usr/lib libexecdir ?= /usr/libexec +unitdir ?= /usr/lib/systemd/system includedir ?= /usr/include localedir ?= /usr/share/locale docdir ?= /usr/share/doc/packages/cpupower @@ -309,9 +310,9 @@ install-tools: $(OUTPUT)cpupower $(INSTALL_DATA) cpupower-service.conf '$(DESTDIR)${confdir}' $(INSTALL) -d $(DESTDIR)${libexecdir} $(INSTALL_PROGRAM) cpupower.sh '$(DESTDIR)${libexecdir}/cpupower' - $(INSTALL) -d $(DESTDIR)${libdir}/systemd/system - sed 's|___CDIR___|${confdir}|; s|___LDIR___|${libexecdir}|' cpupower.service.in > '$(DESTDIR)${libdir}/systemd/system/cpupower.service' - $(SETPERM_DATA) '$(DESTDIR)${libdir}/systemd/system/cpupower.service' + $(INSTALL) -d $(DESTDIR)${unitdir} + sed 's|___CDIR___|${confdir}|; s|___LDIR___|${libexecdir}|' cpupower.service.in > '$(DESTDIR)${unitdir}/cpupower.service' + $(SETPERM_DATA) '$(DESTDIR)${unitdir}/cpupower.service' install-man: $(INSTALL_DATA) -D man/cpupower.1 $(DESTDIR)${mandir}/man1/cpupower.1 @@ -348,7 +349,7 @@ uninstall: - rm -f $(DESTDIR)${bindir}/utils/cpupower - rm -f $(DESTDIR)${confdir}cpupower-service.conf - rm -f $(DESTDIR)${libexecdir}/cpupower - - rm -f $(DESTDIR)${libdir}/systemd/system/cpupower.service + - rm -f $(DESTDIR)${unitdir}/cpupower.service - rm -f $(DESTDIR)${mandir}/man1/cpupower.1 - rm -f $(DESTDIR)${mandir}/man1/cpupower-frequency-set.1 - rm -f $(DESTDIR)${mandir}/man1/cpupower-frequency-info.1 diff --git a/tools/testing/selftests/bpf/.gitignore b/tools/testing/selftests/bpf/.gitignore index e2a2c46c008b..3d8378972d26 100644 --- a/tools/testing/selftests/bpf/.gitignore +++ b/tools/testing/selftests/bpf/.gitignore @@ -21,7 +21,6 @@ test_lirc_mode2_user flow_dissector_load test_tcpnotify_user test_libbpf -test_sysctl xdping test_cpp *.d diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile index cf5ed3bee573..4863106034df 100644 --- a/tools/testing/selftests/bpf/Makefile +++ b/tools/testing/selftests/bpf/Makefile @@ -73,7 +73,7 @@ endif # Order correspond to 'make run_tests' order TEST_GEN_PROGS = test_verifier test_tag test_maps test_lru_map test_progs \ test_sockmap \ - test_tcpnotify_user test_sysctl \ + test_tcpnotify_user \ test_progs-no_alu32 TEST_INST_SUBDIRS := no_alu32 @@ -109,6 +109,7 @@ TEST_PROGS := test_kmod.sh \ test_xdping.sh \ test_bpftool_build.sh \ test_bpftool.sh \ + test_bpftool_map.sh \ test_bpftool_metadata.sh \ test_doc_build.sh \ test_xsk.sh \ @@ -220,7 +221,7 @@ ifeq ($(VMLINUX_BTF),) $(error Cannot find a vmlinux for VMLINUX_BTF at any of "$(VMLINUX_BTF_PATHS)") endif -# Define simple and short `make test_progs`, `make test_sysctl`, etc targets +# Define simple and short `make test_progs`, `make test_maps`, etc targets # to build individual tests. # NOTE: Semicolon at the end is critical to override lib.mk's default static # rule for binaries. @@ -329,7 +330,6 @@ NETWORK_HELPERS := $(OUTPUT)/network_helpers.o $(OUTPUT)/test_sockmap: $(CGROUP_HELPERS) $(TESTING_HELPERS) $(OUTPUT)/test_tcpnotify_user: $(CGROUP_HELPERS) $(TESTING_HELPERS) $(TRACE_HELPERS) $(OUTPUT)/test_sock_fields: $(CGROUP_HELPERS) $(TESTING_HELPERS) -$(OUTPUT)/test_sysctl: $(CGROUP_HELPERS) $(TESTING_HELPERS) $(OUTPUT)/test_tag: $(TESTING_HELPERS) $(OUTPUT)/test_lirc_mode2_user: $(TESTING_HELPERS) $(OUTPUT)/xdping: $(TESTING_HELPERS) @@ -841,6 +841,11 @@ $(OUTPUT)/bench: $(OUTPUT)/bench.o \ $(call msg,BINARY,,$@) $(Q)$(CC) $(CFLAGS) $(LDFLAGS) $(filter %.a %.o,$^) $(LDLIBS) -o $@ +# This works around GCC warning about snprintf truncating strings like: +# +# char a[PATH_MAX], b[PATH_MAX]; +# snprintf(a, "%s/foo", b); // triggers -Wformat-truncation +$(OUTPUT)/veristat.o: CFLAGS += -Wno-format-truncation $(OUTPUT)/veristat.o: $(BPFOBJ) $(OUTPUT)/veristat: $(OUTPUT)/veristat.o $(call msg,BINARY,,$@) diff --git a/tools/testing/selftests/bpf/bpf_kfuncs.h b/tools/testing/selftests/bpf/bpf_kfuncs.h index 8215c9b3115e..9386dfe8b884 100644 --- a/tools/testing/selftests/bpf/bpf_kfuncs.h +++ b/tools/testing/selftests/bpf/bpf_kfuncs.h @@ -69,7 +69,7 @@ extern int bpf_get_file_xattr(struct file *file, const char *name, struct bpf_dynptr *value_ptr) __ksym; extern int bpf_get_fsverity_digest(struct file *file, struct bpf_dynptr *digest_ptr) __ksym; -extern struct bpf_key *bpf_lookup_user_key(__u32 serial, __u64 flags) __ksym; +extern struct bpf_key *bpf_lookup_user_key(__s32 serial, __u64 flags) __ksym; extern struct bpf_key *bpf_lookup_system_key(__u64 id) __ksym; extern void bpf_key_put(struct bpf_key *key) __ksym; extern int bpf_verify_pkcs7_signature(struct bpf_dynptr *data_ptr, diff --git a/tools/testing/selftests/bpf/config.ppc64el b/tools/testing/selftests/bpf/config.ppc64el new file mode 100644 index 000000000000..9acf389dc4ce --- /dev/null +++ b/tools/testing/selftests/bpf/config.ppc64el @@ -0,0 +1,93 @@ +CONFIG_ALTIVEC=y +CONFIG_AUDIT=y +CONFIG_BLK_CGROUP=y +CONFIG_BLK_DEV_INITRD=y +CONFIG_BLK_DEV_RAM=y +CONFIG_BONDING=y +CONFIG_BPF_JIT_ALWAYS_ON=y +CONFIG_BPF_PRELOAD_UMD=y +CONFIG_BPF_PRELOAD=y +CONFIG_CGROUP_CPUACCT=y +CONFIG_CGROUP_DEVICE=y +CONFIG_CGROUP_FREEZER=y +CONFIG_CGROUP_HUGETLB=y +CONFIG_CGROUP_NET_CLASSID=y +CONFIG_CGROUP_PERF=y +CONFIG_CGROUP_PIDS=y +CONFIG_CGROUP_SCHED=y +CONFIG_CGROUPS=y +CONFIG_CMDLINE_BOOL=y +CONFIG_CMDLINE="console=hvc0 wg.success=hvc1 panic_on_warn=1" +CONFIG_CPU_LITTLE_ENDIAN=y +CONFIG_CPUSETS=y +CONFIG_DEBUG_ATOMIC_SLEEP=y +CONFIG_DEBUG_FS=y +CONFIG_DETECT_HUNG_TASK=y +CONFIG_DEVTMPFS_MOUNT=y +CONFIG_DEVTMPFS=y +CONFIG_EXPERT=y +CONFIG_EXT4_FS_POSIX_ACL=y +CONFIG_EXT4_FS_SECURITY=y +CONFIG_EXT4_FS=y +CONFIG_FRAME_POINTER=y +CONFIG_FRAME_WARN=1280 +CONFIG_HARDLOCKUP_DETECTOR=y +CONFIG_HIGH_RES_TIMERS=y +CONFIG_HUGETLBFS=y +CONFIG_HVC_CONSOLE=y +CONFIG_INET=y +CONFIG_IP_ADVANCED_ROUTER=y +CONFIG_IP_MULTICAST=y +CONFIG_IP_MULTIPLE_TABLES=y +CONFIG_IPV6_SEG6_LWTUNNEL=y +CONFIG_JUMP_LABEL=y +CONFIG_KALLSYMS_ALL=y +CONFIG_KPROBES=y +CONFIG_MEMCG=y +CONFIG_NAMESPACES=y +CONFIG_NET_ACT_BPF=y +CONFIG_NETDEVICES=y +CONFIG_NETFILTER_XT_MATCH_BPF=y +CONFIG_NET_L3_MASTER_DEV=y +CONFIG_NET_VRF=y +CONFIG_NET=y +CONFIG_NO_HZ_IDLE=y +CONFIG_NONPORTABLE=y +CONFIG_NR_CPUS=256 +CONFIG_PACKET=y +CONFIG_PANIC_ON_OOPS=y +CONFIG_PARTITION_ADVANCED=y +CONFIG_PCI_HOST_GENERIC=y +CONFIG_PCI=y +CONFIG_POSIX_MQUEUE=y +CONFIG_PPC64=y +CONFIG_PPC_OF_BOOT_TRAMPOLINE=y +CONFIG_PPC_PSERIES=y +CONFIG_PPC_RADIX_MMU=y +CONFIG_PRINTK_TIME=y +CONFIG_PROC_KCORE=y +CONFIG_PROFILING=y +CONFIG_RCU_CPU_STALL_TIMEOUT=60 +CONFIG_RT_GROUP_SCHED=y +CONFIG_SECTION_MISMATCH_WARN_ONLY=y +CONFIG_SECURITY_NETWORK=y +CONFIG_SERIAL_8250_CONSOLE=y +CONFIG_SERIAL_8250=y +CONFIG_SERIAL_OF_PLATFORM=y +CONFIG_SMP=y +CONFIG_SOC_VIRT=y +CONFIG_SYSVIPC=y +CONFIG_TCP_CONG_ADVANCED=y +CONFIG_THREAD_SHIFT=14 +CONFIG_TLS=y +CONFIG_TMPFS_POSIX_ACL=y +CONFIG_TMPFS=y +CONFIG_TUN=y +CONFIG_UNIX=y +CONFIG_UPROBES=y +CONFIG_USER_NS=y +CONFIG_VETH=y +CONFIG_VLAN_8021Q=y +CONFIG_VSOCKETS_LOOPBACK=y +CONFIG_VSX=y +CONFIG_XFRM_USER=y diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_cookie.c b/tools/testing/selftests/bpf/prog_tests/bpf_cookie.c index 6befa870434b..4a0670c056ba 100644 --- a/tools/testing/selftests/bpf/prog_tests/bpf_cookie.c +++ b/tools/testing/selftests/bpf/prog_tests/bpf_cookie.c @@ -489,10 +489,28 @@ cleanup: bpf_link__destroy(link); } +static int verify_tracing_link_info(int fd, u64 cookie) +{ + struct bpf_link_info info; + int err; + u32 len = sizeof(info); + + err = bpf_link_get_info_by_fd(fd, &info, &len); + if (!ASSERT_OK(err, "get_link_info")) + return -1; + + if (!ASSERT_EQ(info.type, BPF_LINK_TYPE_TRACING, "link_type")) + return -1; + + ASSERT_EQ(info.tracing.cookie, cookie, "tracing_cookie"); + + return 0; +} + static void tracing_subtest(struct test_bpf_cookie *skel) { __u64 cookie; - int prog_fd; + int prog_fd, err; int fentry_fd = -1, fexit_fd = -1, fmod_ret_fd = -1; LIBBPF_OPTS(bpf_test_run_opts, opts); LIBBPF_OPTS(bpf_link_create_opts, link_opts); @@ -507,6 +525,10 @@ static void tracing_subtest(struct test_bpf_cookie *skel) if (!ASSERT_GE(fentry_fd, 0, "fentry.link_create")) goto cleanup; + err = verify_tracing_link_info(fentry_fd, cookie); + if (!ASSERT_OK(err, "verify_tracing_link_info")) + goto cleanup; + cookie = 0x20000000000000L; prog_fd = bpf_program__fd(skel->progs.fexit_test1); link_opts.tracing.cookie = cookie; @@ -635,10 +657,29 @@ cleanup: bpf_link__destroy(link); } +static int verify_raw_tp_link_info(int fd, u64 cookie) +{ + struct bpf_link_info info; + int err; + u32 len = sizeof(info); + + memset(&info, 0, sizeof(info)); + err = bpf_link_get_info_by_fd(fd, &info, &len); + if (!ASSERT_OK(err, "get_link_info")) + return -1; + + if (!ASSERT_EQ(info.type, BPF_LINK_TYPE_RAW_TRACEPOINT, "link_type")) + return -1; + + ASSERT_EQ(info.raw_tracepoint.cookie, cookie, "raw_tp_cookie"); + + return 0; +} + static void raw_tp_subtest(struct test_bpf_cookie *skel) { __u64 cookie; - int prog_fd, link_fd = -1; + int err, prog_fd, link_fd = -1; struct bpf_link *link = NULL; LIBBPF_OPTS(bpf_raw_tp_opts, raw_tp_opts); LIBBPF_OPTS(bpf_raw_tracepoint_opts, opts); @@ -656,6 +697,11 @@ static void raw_tp_subtest(struct test_bpf_cookie *skel) goto cleanup; usleep(1); /* trigger */ + + err = verify_raw_tp_link_info(link_fd, cookie); + if (!ASSERT_OK(err, "verify_raw_tp_link_info")) + goto cleanup; + close(link_fd); /* detach */ link_fd = -1; diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_mod_race.c b/tools/testing/selftests/bpf/prog_tests/bpf_mod_race.c index fe2c502e5089..ecc3d47919ad 100644 --- a/tools/testing/selftests/bpf/prog_tests/bpf_mod_race.c +++ b/tools/testing/selftests/bpf/prog_tests/bpf_mod_race.c @@ -78,7 +78,7 @@ static int test_setup_uffd(void *fault_addr) } uffd_register.range.start = (unsigned long)fault_addr; - uffd_register.range.len = 4096; + uffd_register.range.len = getpagesize(); uffd_register.mode = UFFDIO_REGISTER_MODE_MISSING; if (ioctl(uffd, UFFDIO_REGISTER, &uffd_register)) { close(uffd); diff --git a/tools/testing/selftests/bpf/prog_tests/btf_dump.c b/tools/testing/selftests/bpf/prog_tests/btf_dump.c index c0a776feec23..82903585c870 100644 --- a/tools/testing/selftests/bpf/prog_tests/btf_dump.c +++ b/tools/testing/selftests/bpf/prog_tests/btf_dump.c @@ -879,6 +879,122 @@ static void test_btf_dump_var_data(struct btf *btf, struct btf_dump *d, "static int bpf_cgrp_storage_busy = (int)2", 2); } +struct btf_dump_string_ctx { + struct btf *btf; + struct btf_dump *d; + char *str; + struct btf_dump_type_data_opts *opts; + int array_id; +}; + +static int btf_dump_one_string(struct btf_dump_string_ctx *ctx, + char *ptr, size_t ptr_sz, + const char *expected_val) +{ + size_t type_sz; + int ret; + + ctx->str[0] = '\0'; + type_sz = btf__resolve_size(ctx->btf, ctx->array_id); + ret = btf_dump__dump_type_data(ctx->d, ctx->array_id, ptr, ptr_sz, ctx->opts); + if (type_sz <= ptr_sz) { + if (!ASSERT_EQ(ret, type_sz, "failed/unexpected type_sz")) + return -EINVAL; + } + if (!ASSERT_STREQ(ctx->str, expected_val, "ensure expected/actual match")) + return -EFAULT; + return 0; +} + +static void btf_dump_strings(struct btf_dump_string_ctx *ctx) +{ + struct btf_dump_type_data_opts *opts = ctx->opts; + + opts->emit_strings = true; + + opts->compact = true; + opts->emit_zeroes = false; + + opts->skip_names = false; + btf_dump_one_string(ctx, "foo", 4, "(char[4])\"foo\""); + + opts->skip_names = true; + btf_dump_one_string(ctx, "foo", 4, "\"foo\""); + + /* This should have no effect. */ + opts->emit_zeroes = false; + btf_dump_one_string(ctx, "foo", 4, "\"foo\""); + + /* This should have no effect. */ + opts->compact = false; + btf_dump_one_string(ctx, "foo", 4, "\"foo\""); + + /* Non-printable characters come out as hex. */ + btf_dump_one_string(ctx, "fo\xff", 4, "\"fo\\xff\""); + btf_dump_one_string(ctx, "fo\x7", 4, "\"fo\\x07\""); + + /* + * Strings that are too long for the specified type ("char[4]") + * should fall back to the current behavior. + */ + opts->compact = true; + btf_dump_one_string(ctx, "abcde", 6, "['a','b','c','d',]"); + + /* + * Strings that are too short for the specified type ("char[4]") + * should work normally. + */ + btf_dump_one_string(ctx, "ab", 3, "\"ab\""); + + /* Non-NUL-terminated arrays don't get printed as strings. */ + char food[4] = { 'f', 'o', 'o', 'd' }; + char bye[3] = { 'b', 'y', 'e' }; + + btf_dump_one_string(ctx, food, 4, "['f','o','o','d',]"); + btf_dump_one_string(ctx, bye, 3, "['b','y','e',]"); + + /* The embedded NUL should terminate the string. */ + char embed[4] = { 'f', 'o', '\0', 'd' }; + + btf_dump_one_string(ctx, embed, 4, "\"fo\""); +} + +static void test_btf_dump_string_data(void) +{ + struct test_ctx t = {}; + char str[STRSIZE]; + struct btf_dump *d; + DECLARE_LIBBPF_OPTS(btf_dump_type_data_opts, opts); + struct btf_dump_string_ctx ctx; + int char_id, int_id, array_id; + + if (test_ctx__init(&t)) + return; + + d = btf_dump__new(t.btf, btf_dump_snprintf, str, NULL); + if (!ASSERT_OK_PTR(d, "could not create BTF dump")) + return; + + /* Generate BTF for a four-element char array. */ + char_id = btf__add_int(t.btf, "char", 1, BTF_INT_CHAR); + ASSERT_EQ(char_id, 1, "char_id"); + int_id = btf__add_int(t.btf, "int", 4, BTF_INT_SIGNED); + ASSERT_EQ(int_id, 2, "int_id"); + array_id = btf__add_array(t.btf, int_id, char_id, 4); + ASSERT_EQ(array_id, 3, "array_id"); + + ctx.btf = t.btf; + ctx.d = d; + ctx.str = str; + ctx.opts = &opts; + ctx.array_id = array_id; + + btf_dump_strings(&ctx); + + btf_dump__free(d); + test_ctx__free(&t); +} + static void test_btf_datasec(struct btf *btf, struct btf_dump *d, char *str, const char *name, const char *expected_val, void *data, size_t data_sz) @@ -970,6 +1086,8 @@ void test_btf_dump() { test_btf_dump_struct_data(btf, d, str); if (test__start_subtest("btf_dump: var_data")) test_btf_dump_var_data(btf, d, str); + if (test__start_subtest("btf_dump: string_data")) + test_btf_dump_string_data(); btf_dump__free(d); btf__free(btf); diff --git a/tools/testing/selftests/bpf/prog_tests/cgroup_mprog_opts.c b/tools/testing/selftests/bpf/prog_tests/cgroup_mprog_opts.c new file mode 100644 index 000000000000..bb60704a3ef9 --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/cgroup_mprog_opts.c @@ -0,0 +1,617 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2025 Meta Platforms, Inc. and affiliates. */ +#include <test_progs.h> +#include "cgroup_helpers.h" +#include "cgroup_mprog.skel.h" + +static void assert_mprog_count(int cg, int atype, int expected) +{ + __u32 count = 0, attach_flags = 0; + int err; + + err = bpf_prog_query(cg, atype, 0, &attach_flags, + NULL, &count); + ASSERT_EQ(count, expected, "count"); + ASSERT_EQ(err, 0, "prog_query"); +} + +static void test_prog_attach_detach(int atype) +{ + LIBBPF_OPTS(bpf_prog_attach_opts, opta); + LIBBPF_OPTS(bpf_prog_detach_opts, optd); + LIBBPF_OPTS(bpf_prog_query_opts, optq); + __u32 fd1, fd2, fd3, fd4, id1, id2, id3, id4; + struct cgroup_mprog *skel; + __u32 prog_ids[10]; + int cg, err; + + cg = test__join_cgroup("/prog_attach_detach"); + if (!ASSERT_GE(cg, 0, "join_cgroup /prog_attach_detach")) + return; + + skel = cgroup_mprog__open_and_load(); + if (!ASSERT_OK_PTR(skel, "skel_load")) + goto cleanup; + + fd1 = bpf_program__fd(skel->progs.getsockopt_1); + fd2 = bpf_program__fd(skel->progs.getsockopt_2); + fd3 = bpf_program__fd(skel->progs.getsockopt_3); + fd4 = bpf_program__fd(skel->progs.getsockopt_4); + + id1 = id_from_prog_fd(fd1); + id2 = id_from_prog_fd(fd2); + id3 = id_from_prog_fd(fd3); + id4 = id_from_prog_fd(fd4); + + assert_mprog_count(cg, atype, 0); + + LIBBPF_OPTS_RESET(opta, + .flags = BPF_F_ALLOW_MULTI | BPF_F_BEFORE | BPF_F_AFTER, + .expected_revision = 1, + ); + + /* ordering: [fd1] */ + err = bpf_prog_attach_opts(fd1, cg, atype, &opta); + if (!ASSERT_EQ(err, 0, "prog_attach")) + goto cleanup; + + assert_mprog_count(cg, atype, 1); + + LIBBPF_OPTS_RESET(opta, + .flags = BPF_F_ALLOW_MULTI | BPF_F_BEFORE, + .expected_revision = 2, + ); + + /* ordering: [fd2, fd1] */ + err = bpf_prog_attach_opts(fd2, cg, atype, &opta); + if (!ASSERT_EQ(err, 0, "prog_attach")) + goto cleanup1; + + assert_mprog_count(cg, atype, 2); + + LIBBPF_OPTS_RESET(opta, + .flags = BPF_F_ALLOW_MULTI | BPF_F_AFTER, + .relative_fd = fd2, + .expected_revision = 3, + ); + + /* ordering: [fd2, fd3, fd1] */ + err = bpf_prog_attach_opts(fd3, cg, atype, &opta); + if (!ASSERT_EQ(err, 0, "prog_attach")) + goto cleanup2; + + assert_mprog_count(cg, atype, 3); + + LIBBPF_OPTS_RESET(opta, + .flags = BPF_F_ALLOW_MULTI, + .expected_revision = 4, + ); + + /* ordering: [fd2, fd3, fd1, fd4] */ + err = bpf_prog_attach_opts(fd4, cg, atype, &opta); + if (!ASSERT_EQ(err, 0, "prog_attach")) + goto cleanup3; + + assert_mprog_count(cg, atype, 4); + + /* retrieve optq.prog_cnt */ + err = bpf_prog_query_opts(cg, atype, &optq); + if (!ASSERT_OK(err, "prog_query")) + goto cleanup4; + + /* optq.prog_cnt will be used in below query */ + memset(prog_ids, 0, sizeof(prog_ids)); + optq.prog_ids = prog_ids; + err = bpf_prog_query_opts(cg, atype, &optq); + if (!ASSERT_OK(err, "prog_query")) + goto cleanup4; + + ASSERT_EQ(optq.count, 4, "count"); + ASSERT_EQ(optq.revision, 5, "revision"); + ASSERT_EQ(optq.prog_ids[0], id2, "prog_ids[0]"); + ASSERT_EQ(optq.prog_ids[1], id3, "prog_ids[1]"); + ASSERT_EQ(optq.prog_ids[2], id1, "prog_ids[2]"); + ASSERT_EQ(optq.prog_ids[3], id4, "prog_ids[3]"); + ASSERT_EQ(optq.prog_ids[4], 0, "prog_ids[4]"); + ASSERT_EQ(optq.link_ids, NULL, "link_ids"); + +cleanup4: + optd.expected_revision = 5; + err = bpf_prog_detach_opts(fd4, cg, atype, &optd); + ASSERT_OK(err, "prog_detach"); + assert_mprog_count(cg, atype, 3); + +cleanup3: + LIBBPF_OPTS_RESET(optd); + err = bpf_prog_detach_opts(fd3, cg, atype, &optd); + ASSERT_OK(err, "prog_detach"); + assert_mprog_count(cg, atype, 2); + + /* Check revision after two detach operations */ + err = bpf_prog_query_opts(cg, atype, &optq); + ASSERT_OK(err, "prog_query"); + ASSERT_EQ(optq.revision, 7, "revision"); + +cleanup2: + err = bpf_prog_detach_opts(fd2, cg, atype, &optd); + ASSERT_OK(err, "prog_detach"); + assert_mprog_count(cg, atype, 1); + +cleanup1: + err = bpf_prog_detach_opts(fd1, cg, atype, &optd); + ASSERT_OK(err, "prog_detach"); + assert_mprog_count(cg, atype, 0); + +cleanup: + cgroup_mprog__destroy(skel); + close(cg); +} + +static void test_link_attach_detach(int atype) +{ + LIBBPF_OPTS(bpf_cgroup_opts, opta); + LIBBPF_OPTS(bpf_cgroup_opts, optd); + LIBBPF_OPTS(bpf_prog_query_opts, optq); + struct bpf_link *link1, *link2, *link3, *link4; + __u32 fd1, fd2, fd3, fd4, id1, id2, id3, id4; + struct cgroup_mprog *skel; + __u32 prog_ids[10]; + int cg, err; + + cg = test__join_cgroup("/link_attach_detach"); + if (!ASSERT_GE(cg, 0, "join_cgroup /link_attach_detach")) + return; + + skel = cgroup_mprog__open_and_load(); + if (!ASSERT_OK_PTR(skel, "skel_load")) + goto cleanup; + + fd1 = bpf_program__fd(skel->progs.getsockopt_1); + fd2 = bpf_program__fd(skel->progs.getsockopt_2); + fd3 = bpf_program__fd(skel->progs.getsockopt_3); + fd4 = bpf_program__fd(skel->progs.getsockopt_4); + + id1 = id_from_prog_fd(fd1); + id2 = id_from_prog_fd(fd2); + id3 = id_from_prog_fd(fd3); + id4 = id_from_prog_fd(fd4); + + assert_mprog_count(cg, atype, 0); + + LIBBPF_OPTS_RESET(opta, + .expected_revision = 1, + ); + + /* ordering: [fd1] */ + link1 = bpf_program__attach_cgroup_opts(skel->progs.getsockopt_1, cg, &opta); + if (!ASSERT_OK_PTR(link1, "link_attach")) + goto cleanup; + + assert_mprog_count(cg, atype, 1); + + LIBBPF_OPTS_RESET(opta, + .flags = BPF_F_BEFORE | BPF_F_LINK, + .relative_id = id_from_link_fd(bpf_link__fd(link1)), + .expected_revision = 2, + ); + + /* ordering: [fd2, fd1] */ + link2 = bpf_program__attach_cgroup_opts(skel->progs.getsockopt_2, cg, &opta); + if (!ASSERT_OK_PTR(link2, "link_attach")) + goto cleanup1; + + assert_mprog_count(cg, atype, 2); + + LIBBPF_OPTS_RESET(opta, + .flags = BPF_F_AFTER | BPF_F_LINK, + .relative_fd = bpf_link__fd(link2), + .expected_revision = 3, + ); + + /* ordering: [fd2, fd3, fd1] */ + link3 = bpf_program__attach_cgroup_opts(skel->progs.getsockopt_3, cg, &opta); + if (!ASSERT_OK_PTR(link3, "link_attach")) + goto cleanup2; + + assert_mprog_count(cg, atype, 3); + + LIBBPF_OPTS_RESET(opta, + .expected_revision = 4, + ); + + /* ordering: [fd2, fd3, fd1, fd4] */ + link4 = bpf_program__attach_cgroup_opts(skel->progs.getsockopt_4, cg, &opta); + if (!ASSERT_OK_PTR(link4, "link_attach")) + goto cleanup3; + + assert_mprog_count(cg, atype, 4); + + /* retrieve optq.prog_cnt */ + err = bpf_prog_query_opts(cg, atype, &optq); + if (!ASSERT_OK(err, "prog_query")) + goto cleanup4; + + /* optq.prog_cnt will be used in below query */ + memset(prog_ids, 0, sizeof(prog_ids)); + optq.prog_ids = prog_ids; + err = bpf_prog_query_opts(cg, atype, &optq); + if (!ASSERT_OK(err, "prog_query")) + goto cleanup4; + + ASSERT_EQ(optq.count, 4, "count"); + ASSERT_EQ(optq.revision, 5, "revision"); + ASSERT_EQ(optq.prog_ids[0], id2, "prog_ids[0]"); + ASSERT_EQ(optq.prog_ids[1], id3, "prog_ids[1]"); + ASSERT_EQ(optq.prog_ids[2], id1, "prog_ids[2]"); + ASSERT_EQ(optq.prog_ids[3], id4, "prog_ids[3]"); + ASSERT_EQ(optq.prog_ids[4], 0, "prog_ids[4]"); + ASSERT_EQ(optq.link_ids, NULL, "link_ids"); + +cleanup4: + bpf_link__destroy(link4); + assert_mprog_count(cg, atype, 3); + +cleanup3: + bpf_link__destroy(link3); + assert_mprog_count(cg, atype, 2); + + /* Check revision after two detach operations */ + err = bpf_prog_query_opts(cg, atype, &optq); + ASSERT_OK(err, "prog_query"); + ASSERT_EQ(optq.revision, 7, "revision"); + +cleanup2: + bpf_link__destroy(link2); + assert_mprog_count(cg, atype, 1); + +cleanup1: + bpf_link__destroy(link1); + assert_mprog_count(cg, atype, 0); + +cleanup: + cgroup_mprog__destroy(skel); + close(cg); +} + +static void test_preorder_prog_attach_detach(int atype) +{ + LIBBPF_OPTS(bpf_prog_attach_opts, opta); + LIBBPF_OPTS(bpf_prog_detach_opts, optd); + __u32 fd1, fd2, fd3, fd4; + struct cgroup_mprog *skel; + int cg, err; + + cg = test__join_cgroup("/preorder_prog_attach_detach"); + if (!ASSERT_GE(cg, 0, "join_cgroup /preorder_prog_attach_detach")) + return; + + skel = cgroup_mprog__open_and_load(); + if (!ASSERT_OK_PTR(skel, "skel_load")) + goto cleanup; + + fd1 = bpf_program__fd(skel->progs.getsockopt_1); + fd2 = bpf_program__fd(skel->progs.getsockopt_2); + fd3 = bpf_program__fd(skel->progs.getsockopt_3); + fd4 = bpf_program__fd(skel->progs.getsockopt_4); + + assert_mprog_count(cg, atype, 0); + + LIBBPF_OPTS_RESET(opta, + .flags = BPF_F_ALLOW_MULTI, + .expected_revision = 1, + ); + + /* ordering: [fd1] */ + err = bpf_prog_attach_opts(fd1, cg, atype, &opta); + if (!ASSERT_EQ(err, 0, "prog_attach")) + goto cleanup; + + assert_mprog_count(cg, atype, 1); + + LIBBPF_OPTS_RESET(opta, + .flags = BPF_F_ALLOW_MULTI | BPF_F_PREORDER, + .expected_revision = 2, + ); + + /* ordering: [fd1, fd2] */ + err = bpf_prog_attach_opts(fd2, cg, atype, &opta); + if (!ASSERT_EQ(err, 0, "prog_attach")) + goto cleanup1; + + assert_mprog_count(cg, atype, 2); + + LIBBPF_OPTS_RESET(opta, + .flags = BPF_F_ALLOW_MULTI | BPF_F_AFTER, + .relative_fd = fd2, + .expected_revision = 3, + ); + + err = bpf_prog_attach_opts(fd3, cg, atype, &opta); + if (!ASSERT_EQ(err, -EINVAL, "prog_attach")) + goto cleanup2; + + assert_mprog_count(cg, atype, 2); + + LIBBPF_OPTS_RESET(opta, + .flags = BPF_F_ALLOW_MULTI | BPF_F_AFTER | BPF_F_PREORDER, + .relative_fd = fd2, + .expected_revision = 3, + ); + + /* ordering: [fd1, fd2, fd3] */ + err = bpf_prog_attach_opts(fd3, cg, atype, &opta); + if (!ASSERT_EQ(err, 0, "prog_attach")) + goto cleanup2; + + assert_mprog_count(cg, atype, 3); + + LIBBPF_OPTS_RESET(opta, + .flags = BPF_F_ALLOW_MULTI, + .expected_revision = 4, + ); + + /* ordering: [fd2, fd3, fd1, fd4] */ + err = bpf_prog_attach_opts(fd4, cg, atype, &opta); + if (!ASSERT_EQ(err, 0, "prog_attach")) + goto cleanup3; + + assert_mprog_count(cg, atype, 4); + + err = bpf_prog_detach_opts(fd4, cg, atype, &optd); + ASSERT_OK(err, "prog_detach"); + assert_mprog_count(cg, atype, 3); + +cleanup3: + err = bpf_prog_detach_opts(fd3, cg, atype, &optd); + ASSERT_OK(err, "prog_detach"); + assert_mprog_count(cg, atype, 2); + +cleanup2: + err = bpf_prog_detach_opts(fd2, cg, atype, &optd); + ASSERT_OK(err, "prog_detach"); + assert_mprog_count(cg, atype, 1); + +cleanup1: + err = bpf_prog_detach_opts(fd1, cg, atype, &optd); + ASSERT_OK(err, "prog_detach"); + assert_mprog_count(cg, atype, 0); + +cleanup: + cgroup_mprog__destroy(skel); + close(cg); +} + +static void test_preorder_link_attach_detach(int atype) +{ + LIBBPF_OPTS(bpf_cgroup_opts, opta); + struct bpf_link *link1, *link2, *link3, *link4; + struct cgroup_mprog *skel; + __u32 fd2; + int cg; + + cg = test__join_cgroup("/preorder_link_attach_detach"); + if (!ASSERT_GE(cg, 0, "join_cgroup /preorder_link_attach_detach")) + return; + + skel = cgroup_mprog__open_and_load(); + if (!ASSERT_OK_PTR(skel, "skel_load")) + goto cleanup; + + fd2 = bpf_program__fd(skel->progs.getsockopt_2); + + assert_mprog_count(cg, atype, 0); + + LIBBPF_OPTS_RESET(opta, + .expected_revision = 1, + ); + + /* ordering: [fd1] */ + link1 = bpf_program__attach_cgroup_opts(skel->progs.getsockopt_1, cg, &opta); + if (!ASSERT_OK_PTR(link1, "link_attach")) + goto cleanup; + + assert_mprog_count(cg, atype, 1); + + LIBBPF_OPTS_RESET(opta, + .flags = BPF_F_PREORDER, + .expected_revision = 2, + ); + + /* ordering: [fd1, fd2] */ + link2 = bpf_program__attach_cgroup_opts(skel->progs.getsockopt_2, cg, &opta); + if (!ASSERT_OK_PTR(link2, "link_attach")) + goto cleanup1; + + assert_mprog_count(cg, atype, 2); + + LIBBPF_OPTS_RESET(opta, + .flags = BPF_F_AFTER, + .relative_fd = fd2, + .expected_revision = 3, + ); + + link3 = bpf_program__attach_cgroup_opts(skel->progs.getsockopt_3, cg, &opta); + if (!ASSERT_ERR_PTR(link3, "link_attach")) + goto cleanup2; + + assert_mprog_count(cg, atype, 2); + + LIBBPF_OPTS_RESET(opta, + .flags = BPF_F_AFTER | BPF_F_PREORDER | BPF_F_LINK, + .relative_fd = bpf_link__fd(link2), + .expected_revision = 3, + ); + + /* ordering: [fd1, fd2, fd3] */ + link3 = bpf_program__attach_cgroup_opts(skel->progs.getsockopt_3, cg, &opta); + if (!ASSERT_OK_PTR(link3, "link_attach")) + goto cleanup2; + + assert_mprog_count(cg, atype, 3); + + LIBBPF_OPTS_RESET(opta, + .expected_revision = 4, + ); + + /* ordering: [fd2, fd3, fd1, fd4] */ + link4 = bpf_program__attach_cgroup_opts(skel->progs.getsockopt_4, cg, &opta); + if (!ASSERT_OK_PTR(link4, "prog_attach")) + goto cleanup3; + + assert_mprog_count(cg, atype, 4); + + bpf_link__destroy(link4); + assert_mprog_count(cg, atype, 3); + +cleanup3: + bpf_link__destroy(link3); + assert_mprog_count(cg, atype, 2); + +cleanup2: + bpf_link__destroy(link2); + assert_mprog_count(cg, atype, 1); + +cleanup1: + bpf_link__destroy(link1); + assert_mprog_count(cg, atype, 0); + +cleanup: + cgroup_mprog__destroy(skel); + close(cg); +} + +static void test_invalid_attach_detach(int atype) +{ + LIBBPF_OPTS(bpf_prog_attach_opts, opta); + __u32 fd1, fd2, id2; + struct cgroup_mprog *skel; + int cg, err; + + cg = test__join_cgroup("/invalid_attach_detach"); + if (!ASSERT_GE(cg, 0, "join_cgroup /invalid_attach_detach")) + return; + + skel = cgroup_mprog__open_and_load(); + if (!ASSERT_OK_PTR(skel, "skel_load")) + goto cleanup; + + fd1 = bpf_program__fd(skel->progs.getsockopt_1); + fd2 = bpf_program__fd(skel->progs.getsockopt_2); + + id2 = id_from_prog_fd(fd2); + + assert_mprog_count(cg, atype, 0); + + LIBBPF_OPTS_RESET(opta, + .flags = BPF_F_ALLOW_MULTI | BPF_F_BEFORE | BPF_F_AFTER, + .relative_id = id2, + ); + + err = bpf_prog_attach_opts(fd1, cg, atype, &opta); + ASSERT_EQ(err, -EINVAL, "prog_attach"); + assert_mprog_count(cg, atype, 0); + + LIBBPF_OPTS_RESET(opta, + .flags = BPF_F_ALLOW_MULTI | BPF_F_BEFORE | BPF_F_ID, + ); + + err = bpf_prog_attach_opts(fd1, cg, atype, &opta); + ASSERT_EQ(err, -ENOENT, "prog_attach"); + assert_mprog_count(cg, atype, 0); + + LIBBPF_OPTS_RESET(opta, + .flags = BPF_F_ALLOW_MULTI | BPF_F_AFTER | BPF_F_ID, + ); + + err = bpf_prog_attach_opts(fd1, cg, atype, &opta); + ASSERT_EQ(err, -ENOENT, "prog_attach"); + assert_mprog_count(cg, atype, 0); + + LIBBPF_OPTS_RESET(opta, + .flags = BPF_F_ALLOW_MULTI | BPF_F_BEFORE | BPF_F_AFTER, + .relative_id = id2, + ); + + err = bpf_prog_attach_opts(fd1, cg, atype, &opta); + ASSERT_EQ(err, -EINVAL, "prog_attach"); + assert_mprog_count(cg, atype, 0); + + LIBBPF_OPTS_RESET(opta, + .flags = BPF_F_ALLOW_MULTI | BPF_F_LINK, + .relative_id = id2, + ); + + err = bpf_prog_attach_opts(fd1, cg, atype, &opta); + ASSERT_EQ(err, -EINVAL, "prog_attach"); + assert_mprog_count(cg, atype, 0); + + LIBBPF_OPTS_RESET(opta, + .flags = BPF_F_ALLOW_MULTI, + .relative_id = id2, + ); + + err = bpf_prog_attach_opts(fd1, cg, atype, &opta); + ASSERT_EQ(err, -EINVAL, "prog_attach"); + assert_mprog_count(cg, atype, 0); + + LIBBPF_OPTS_RESET(opta, + .flags = BPF_F_ALLOW_MULTI | BPF_F_BEFORE, + .relative_fd = fd1, + ); + + err = bpf_prog_attach_opts(fd1, cg, atype, &opta); + ASSERT_EQ(err, -ENOENT, "prog_attach"); + assert_mprog_count(cg, atype, 0); + + LIBBPF_OPTS_RESET(opta, + .flags = BPF_F_ALLOW_MULTI | BPF_F_AFTER, + .relative_fd = fd1, + ); + + err = bpf_prog_attach_opts(fd1, cg, atype, &opta); + ASSERT_EQ(err, -ENOENT, "prog_attach"); + assert_mprog_count(cg, atype, 0); + + LIBBPF_OPTS_RESET(opta, + .flags = BPF_F_ALLOW_MULTI, + ); + + err = bpf_prog_attach_opts(fd1, cg, atype, &opta); + if (!ASSERT_EQ(err, 0, "prog_attach")) + goto cleanup; + assert_mprog_count(cg, atype, 1); + + LIBBPF_OPTS_RESET(opta, + .flags = BPF_F_ALLOW_MULTI | BPF_F_AFTER, + ); + + err = bpf_prog_attach_opts(fd1, cg, atype, &opta); + ASSERT_EQ(err, -EINVAL, "prog_attach"); + assert_mprog_count(cg, atype, 1); + + LIBBPF_OPTS_RESET(opta, + .flags = BPF_F_ALLOW_MULTI | BPF_F_REPLACE | BPF_F_AFTER, + .replace_prog_fd = fd1, + ); + + err = bpf_prog_attach_opts(fd1, cg, atype, &opta); + ASSERT_EQ(err, -EINVAL, "prog_attach"); + assert_mprog_count(cg, atype, 1); +cleanup: + cgroup_mprog__destroy(skel); + close(cg); +} + +void test_cgroup_mprog_opts(void) +{ + if (test__start_subtest("prog_attach_detach")) + test_prog_attach_detach(BPF_CGROUP_GETSOCKOPT); + if (test__start_subtest("link_attach_detach")) + test_link_attach_detach(BPF_CGROUP_GETSOCKOPT); + if (test__start_subtest("preorder_prog_attach_detach")) + test_preorder_prog_attach_detach(BPF_CGROUP_GETSOCKOPT); + if (test__start_subtest("preorder_link_attach_detach")) + test_preorder_link_attach_detach(BPF_CGROUP_GETSOCKOPT); + if (test__start_subtest("invalid_attach_detach")) + test_invalid_attach_detach(BPF_CGROUP_GETSOCKOPT); +} diff --git a/tools/testing/selftests/bpf/prog_tests/cgroup_mprog_ordering.c b/tools/testing/selftests/bpf/prog_tests/cgroup_mprog_ordering.c new file mode 100644 index 000000000000..a36d2e968bc5 --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/cgroup_mprog_ordering.c @@ -0,0 +1,77 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2025 Meta Platforms, Inc. and affiliates. */ +#include <test_progs.h> +#include "cgroup_helpers.h" +#include "cgroup_preorder.skel.h" + +static int run_getsockopt_test(int cg_parent, int sock_fd, bool has_relative_fd) +{ + LIBBPF_OPTS(bpf_prog_attach_opts, opts); + enum bpf_attach_type prog_p_atype, prog_p2_atype; + int prog_p_fd, prog_p2_fd; + struct cgroup_preorder *skel = NULL; + struct bpf_program *prog; + __u8 *result, buf; + socklen_t optlen = 1; + int err = 0; + + skel = cgroup_preorder__open_and_load(); + if (!ASSERT_OK_PTR(skel, "cgroup_preorder__open_and_load")) + return 0; + + LIBBPF_OPTS_RESET(opts); + opts.flags = BPF_F_ALLOW_MULTI; + prog = skel->progs.parent; + prog_p_fd = bpf_program__fd(prog); + prog_p_atype = bpf_program__expected_attach_type(prog); + err = bpf_prog_attach_opts(prog_p_fd, cg_parent, prog_p_atype, &opts); + if (!ASSERT_OK(err, "bpf_prog_attach_opts-parent")) + goto close_skel; + + opts.flags = BPF_F_ALLOW_MULTI | BPF_F_BEFORE; + if (has_relative_fd) + opts.relative_fd = prog_p_fd; + prog = skel->progs.parent_2; + prog_p2_fd = bpf_program__fd(prog); + prog_p2_atype = bpf_program__expected_attach_type(prog); + err = bpf_prog_attach_opts(prog_p2_fd, cg_parent, prog_p2_atype, &opts); + if (!ASSERT_OK(err, "bpf_prog_attach_opts-parent_2")) + goto detach_parent; + + err = getsockopt(sock_fd, SOL_IP, IP_TOS, &buf, &optlen); + if (!ASSERT_OK(err, "getsockopt")) + goto detach_parent_2; + + result = skel->bss->result; + ASSERT_TRUE(result[0] == 4 && result[1] == 3, "result values"); + +detach_parent_2: + ASSERT_OK(bpf_prog_detach2(prog_p2_fd, cg_parent, prog_p2_atype), + "bpf_prog_detach2-parent_2"); +detach_parent: + ASSERT_OK(bpf_prog_detach2(prog_p_fd, cg_parent, prog_p_atype), + "bpf_prog_detach2-parent"); +close_skel: + cgroup_preorder__destroy(skel); + return err; +} + +void test_cgroup_mprog_ordering(void) +{ + int cg_parent = -1, sock_fd = -1; + + cg_parent = test__join_cgroup("/parent"); + if (!ASSERT_GE(cg_parent, 0, "join_cgroup /parent")) + goto out; + + sock_fd = socket(AF_INET, SOCK_STREAM, 0); + if (!ASSERT_GE(sock_fd, 0, "socket")) + goto out; + + ASSERT_OK(run_getsockopt_test(cg_parent, sock_fd, false), "getsockopt_test_1"); + ASSERT_OK(run_getsockopt_test(cg_parent, sock_fd, true), "getsockopt_test_2"); + +out: + close(sock_fd); + close(cg_parent); +} diff --git a/tools/testing/selftests/bpf/prog_tests/fd_array.c b/tools/testing/selftests/bpf/prog_tests/fd_array.c index 9add890c2d37..241b2c8c6e0f 100644 --- a/tools/testing/selftests/bpf/prog_tests/fd_array.c +++ b/tools/testing/selftests/bpf/prog_tests/fd_array.c @@ -312,7 +312,7 @@ static void check_fd_array_cnt__referenced_btfs(void) /* btf should still exist when original file descriptor is closed */ err = get_btf_id_by_fd(extra_fds[0], &btf_id); - if (!ASSERT_GE(err, 0, "get_btf_id_by_fd")) + if (!ASSERT_EQ(err, 0, "get_btf_id_by_fd")) goto cleanup; Close(extra_fds[0]); diff --git a/tools/testing/selftests/bpf/prog_tests/log_buf.c b/tools/testing/selftests/bpf/prog_tests/log_buf.c index 169ce689b97c..d6f14a232002 100644 --- a/tools/testing/selftests/bpf/prog_tests/log_buf.c +++ b/tools/testing/selftests/bpf/prog_tests/log_buf.c @@ -7,6 +7,10 @@ #include "test_log_buf.skel.h" #include "bpf_util.h" +#if !defined(__clang__) +#pragma GCC diagnostic ignored "-Wmaybe-uninitialized" +#endif + static size_t libbpf_log_pos; static char libbpf_log_buf[1024 * 1024]; static bool libbpf_log_error; diff --git a/tools/testing/selftests/bpf/prog_tests/mem_rdonly_untrusted.c b/tools/testing/selftests/bpf/prog_tests/mem_rdonly_untrusted.c new file mode 100644 index 000000000000..40d4f687bd9c --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/mem_rdonly_untrusted.c @@ -0,0 +1,9 @@ +// SPDX-License-Identifier: GPL-2.0-only + +#include <test_progs.h> +#include "mem_rdonly_untrusted.skel.h" + +void test_mem_rdonly_untrusted(void) +{ + RUN_TESTS(mem_rdonly_untrusted); +} diff --git a/tools/testing/selftests/bpf/prog_tests/ringbuf.c b/tools/testing/selftests/bpf/prog_tests/ringbuf.c index da430df45aa4..d1e4cb28a72c 100644 --- a/tools/testing/selftests/bpf/prog_tests/ringbuf.c +++ b/tools/testing/selftests/bpf/prog_tests/ringbuf.c @@ -97,7 +97,7 @@ static void ringbuf_write_subtest(void) if (!ASSERT_OK_PTR(skel, "skel_open")) return; - skel->maps.ringbuf.max_entries = 0x4000; + skel->maps.ringbuf.max_entries = 0x40000; err = test_ringbuf_write_lskel__load(skel); if (!ASSERT_OK(err, "skel_load")) @@ -108,7 +108,7 @@ static void ringbuf_write_subtest(void) mmap_ptr = mmap(NULL, page_size, PROT_READ | PROT_WRITE, MAP_SHARED, rb_fd, 0); if (!ASSERT_OK_PTR(mmap_ptr, "rw_cons_pos")) goto cleanup; - *mmap_ptr = 0x3000; + *mmap_ptr = 0x30000; ASSERT_OK(munmap(mmap_ptr, page_size), "unmap_rw"); skel->bss->pid = getpid(); diff --git a/tools/testing/selftests/bpf/prog_tests/sockmap_listen.c b/tools/testing/selftests/bpf/prog_tests/sockmap_listen.c index 1d98eee7a2c3..f1bdccc7e4e7 100644 --- a/tools/testing/selftests/bpf/prog_tests/sockmap_listen.c +++ b/tools/testing/selftests/bpf/prog_tests/sockmap_listen.c @@ -924,6 +924,8 @@ static void redir_partial(int family, int sotype, int sock_map, int parser_map) goto close; n = xsend(c1, buf, sizeof(buf), 0); + if (n == -1) + goto close; if (n < sizeof(buf)) FAIL("incomplete write"); diff --git a/tools/testing/selftests/bpf/prog_tests/string_kfuncs.c b/tools/testing/selftests/bpf/prog_tests/string_kfuncs.c new file mode 100644 index 000000000000..35af8044d059 --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/string_kfuncs.c @@ -0,0 +1,65 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (C) 2025 Red Hat, Inc.*/ +#include <test_progs.h> +#include "string_kfuncs_success.skel.h" +#include "string_kfuncs_failure1.skel.h" +#include "string_kfuncs_failure2.skel.h" +#include <sys/mman.h> + +static const char * const test_cases[] = { + "strcmp", + "strchr", + "strchrnul", + "strnchr", + "strrchr", + "strlen", + "strnlen", + "strspn_str", + "strspn_accept", + "strcspn_str", + "strcspn_reject", + "strstr", + "strnstr", +}; + +void run_too_long_tests(void) +{ + struct string_kfuncs_failure2 *skel; + struct bpf_program *prog; + char test_name[256]; + int err, i; + + skel = string_kfuncs_failure2__open_and_load(); + if (!ASSERT_OK_PTR(skel, "string_kfuncs_failure2__open_and_load")) + return; + + memset(skel->bss->long_str, 'a', sizeof(skel->bss->long_str)); + + for (i = 0; i < ARRAY_SIZE(test_cases); i++) { + sprintf(test_name, "test_%s_too_long", test_cases[i]); + if (!test__start_subtest(test_name)) + continue; + + prog = bpf_object__find_program_by_name(skel->obj, test_name); + if (!ASSERT_OK_PTR(prog, "bpf_object__find_program_by_name")) + goto cleanup; + + LIBBPF_OPTS(bpf_test_run_opts, topts); + err = bpf_prog_test_run_opts(bpf_program__fd(prog), &topts); + if (!ASSERT_OK(err, "bpf_prog_test_run")) + goto cleanup; + + ASSERT_EQ(topts.retval, -E2BIG, "reading too long string fails with -E2BIG"); + } + +cleanup: + string_kfuncs_failure2__destroy(skel); +} + +void test_string_kfuncs(void) +{ + RUN_TESTS(string_kfuncs_success); + RUN_TESTS(string_kfuncs_failure1); + + run_too_long_tests(); +} diff --git a/tools/testing/selftests/bpf/prog_tests/tailcalls.c b/tools/testing/selftests/bpf/prog_tests/tailcalls.c index 66a900327f91..0ab36503c3b2 100644 --- a/tools/testing/selftests/bpf/prog_tests/tailcalls.c +++ b/tools/testing/selftests/bpf/prog_tests/tailcalls.c @@ -1195,7 +1195,7 @@ static void test_tailcall_hierarchy_count(const char *which, bool test_fentry, bool test_fexit, bool test_fentry_entry) { - int err, map_fd, prog_fd, main_data_fd, fentry_data_fd, fexit_data_fd, i, val; + int err, map_fd, prog_fd, main_data_fd, fentry_data_fd = 0, fexit_data_fd = 0, i, val; struct bpf_object *obj = NULL, *fentry_obj = NULL, *fexit_obj = NULL; struct bpf_link *fentry_link = NULL, *fexit_link = NULL; struct bpf_program *prog, *fentry_prog; diff --git a/tools/testing/selftests/bpf/prog_tests/tc_helpers.h b/tools/testing/selftests/bpf/prog_tests/tc_helpers.h index 924d0e25320c..d52a62af77bf 100644 --- a/tools/testing/selftests/bpf/prog_tests/tc_helpers.h +++ b/tools/testing/selftests/bpf/prog_tests/tc_helpers.h @@ -8,34 +8,6 @@ # define loopback 1 #endif -static inline __u32 id_from_prog_fd(int fd) -{ - struct bpf_prog_info prog_info = {}; - __u32 prog_info_len = sizeof(prog_info); - int err; - - err = bpf_obj_get_info_by_fd(fd, &prog_info, &prog_info_len); - if (!ASSERT_OK(err, "id_from_prog_fd")) - return 0; - - ASSERT_NEQ(prog_info.id, 0, "prog_info.id"); - return prog_info.id; -} - -static inline __u32 id_from_link_fd(int fd) -{ - struct bpf_link_info link_info = {}; - __u32 link_info_len = sizeof(link_info); - int err; - - err = bpf_link_get_info_by_fd(fd, &link_info, &link_info_len); - if (!ASSERT_OK(err, "id_from_link_fd")) - return 0; - - ASSERT_NEQ(link_info.id, 0, "link_info.id"); - return link_info.id; -} - static inline __u32 ifindex_from_link_fd(int fd) { struct bpf_link_info link_info = {}; diff --git a/tools/testing/selftests/bpf/test_sysctl.c b/tools/testing/selftests/bpf/prog_tests/test_sysctl.c index bcdbd27f22f0..273dd41ca09e 100644 --- a/tools/testing/selftests/bpf/test_sysctl.c +++ b/tools/testing/selftests/bpf/prog_tests/test_sysctl.c @@ -1,22 +1,8 @@ // SPDX-License-Identifier: GPL-2.0 // Copyright (c) 2019 Facebook -#include <fcntl.h> -#include <stdint.h> -#include <stdio.h> -#include <stdlib.h> -#include <string.h> -#include <unistd.h> - -#include <linux/filter.h> - -#include <bpf/bpf.h> -#include <bpf/libbpf.h> - -#include <bpf/bpf_endian.h> -#include "bpf_util.h" +#include "test_progs.h" #include "cgroup_helpers.h" -#include "testing_helpers.h" #define CG_PATH "/foo" #define MAX_INSNS 512 @@ -1608,26 +1594,19 @@ static int run_tests(int cgfd) return fails ? -1 : 0; } -int main(int argc, char **argv) +void test_sysctl(void) { - int cgfd = -1; - int err = 0; + int cgfd; cgfd = cgroup_setup_and_join(CG_PATH); - if (cgfd < 0) - goto err; + if (!ASSERT_OK_FD(cgfd < 0, "create_cgroup")) + goto out; - /* Use libbpf 1.0 API mode */ - libbpf_set_strict_mode(LIBBPF_STRICT_ALL); + if (!ASSERT_OK(run_tests(cgfd), "run_tests")) + goto out; - if (run_tests(cgfd)) - goto err; - - goto out; -err: - err = -1; out: close(cgfd); cleanup_cgroup_environment(); - return err; + return; } diff --git a/tools/testing/selftests/bpf/prog_tests/test_veristat.c b/tools/testing/selftests/bpf/prog_tests/test_veristat.c index 47b56c258f3f..367f47e4a936 100644 --- a/tools/testing/selftests/bpf/prog_tests/test_veristat.c +++ b/tools/testing/selftests/bpf/prog_tests/test_veristat.c @@ -60,13 +60,19 @@ static void test_set_global_vars_succeeds(void) " -G \"var_s8 = -128\" "\ " -G \"var_u8 = 255\" "\ " -G \"var_ea = EA2\" "\ - " -G \"var_eb = EB2\" "\ - " -G \"var_ec = EC2\" "\ + " -G \"var_eb = EB2\" "\ + " -G \"var_ec=EC2\" "\ " -G \"var_b = 1\" "\ - " -G \"struct1.struct2.u.var_u8 = 170\" "\ + " -G \"struct1[2].struct2[1][2].u.var_u8[2]=170\" "\ " -G \"union1.struct3.var_u8_l = 0xaa\" "\ " -G \"union1.struct3.var_u8_h = 0xaa\" "\ - "-vl2 > %s", fix->veristat, fix->tmpfile); + " -G \"arr[3]= 171\" " \ + " -G \"arr[EA2] =172\" " \ + " -G \"enum_arr[EC2]=EA3\" " \ + " -G \"three_d[31][7][EA2]=173\"" \ + " -G \"struct1[2].struct2[1][2].u.mat[5][3]=174\" " \ + " -G \"struct11 [ 7 ] [ 5 ] .struct2[0][1].u.mat[3][0] = 175\" " \ + " -vl2 > %s", fix->veristat, fix->tmpfile); read(fix->fd, fix->output, fix->sz); __CHECK_STR("_w=0xf000000000000001 ", "var_s64 = 0xf000000000000001"); @@ -81,8 +87,14 @@ static void test_set_global_vars_succeeds(void) __CHECK_STR("_w=12 ", "var_eb = EB2"); __CHECK_STR("_w=13 ", "var_ec = EC2"); __CHECK_STR("_w=1 ", "var_b = 1"); - __CHECK_STR("_w=170 ", "struct1.struct2.u.var_u8 = 170"); + __CHECK_STR("_w=170 ", "struct1[2].struct2[1][2].u.var_u8[2]=170"); __CHECK_STR("_w=0xaaaa ", "union1.var_u16 = 0xaaaa"); + __CHECK_STR("_w=171 ", "arr[3]= 171"); + __CHECK_STR("_w=172 ", "arr[EA2] =172"); + __CHECK_STR("_w=10 ", "enum_arr[EC2]=EA3"); + __CHECK_STR("_w=173 ", "matrix[31][7][11]=173"); + __CHECK_STR("_w=174 ", "struct1[2].struct2[1][2].u.mat[5][3]=174"); + __CHECK_STR("_w=175 ", "struct11[7][5].struct2[0][1].u.mat[3][0]=175"); out: teardown_fixture(fix); @@ -129,6 +141,95 @@ out: teardown_fixture(fix); } +static void test_unsupported_ptr_array_type(void) +{ + struct fixture *fix = init_fixture(); + + SYS_FAIL(out, + "%s set_global_vars.bpf.o -G \"ptr_arr[0] = 0\" -vl2 2> %s", + fix->veristat, fix->tmpfile); + + read(fix->fd, fix->output, fix->sz); + __CHECK_STR("Can't set ptr_arr[0]. Only ints and enums are supported", "ptr_arr"); + +out: + teardown_fixture(fix); +} + +static void test_array_out_of_bounds(void) +{ + struct fixture *fix = init_fixture(); + + SYS_FAIL(out, + "%s set_global_vars.bpf.o -G \"arr[99] = 0\" -vl2 2> %s", + fix->veristat, fix->tmpfile); + + read(fix->fd, fix->output, fix->sz); + __CHECK_STR("Array index 99 is out of bounds", "arr[99]"); + +out: + teardown_fixture(fix); +} + +static void test_array_index_not_found(void) +{ + struct fixture *fix = init_fixture(); + + SYS_FAIL(out, + "%s set_global_vars.bpf.o -G \"arr[EG2] = 0\" -vl2 2> %s", + fix->veristat, fix->tmpfile); + + read(fix->fd, fix->output, fix->sz); + __CHECK_STR("Can't resolve enum value EG2", "arr[EG2]"); + +out: + teardown_fixture(fix); +} + +static void test_array_index_for_non_array(void) +{ + struct fixture *fix = init_fixture(); + + SYS_FAIL(out, + "%s set_global_vars.bpf.o -G \"var_b[0] = 1\" -vl2 2> %s", + fix->veristat, fix->tmpfile); + + pread(fix->fd, fix->output, fix->sz, 0); + __CHECK_STR("Array index is not expected for var_b", "var_b[0] = 1"); + + SYS_FAIL(out, + "%s set_global_vars.bpf.o -G \"union1.struct3[0].var_u8_l=1\" -vl2 2> %s", + fix->veristat, fix->tmpfile); + + pread(fix->fd, fix->output, fix->sz, 0); + __CHECK_STR("Array index is not expected for struct3", "union1.struct3[0].var_u8_l=1"); + +out: + teardown_fixture(fix); +} + +static void test_no_array_index_for_array(void) +{ + struct fixture *fix = init_fixture(); + + SYS_FAIL(out, + "%s set_global_vars.bpf.o -G \"arr = 1\" -vl2 2> %s", + fix->veristat, fix->tmpfile); + + pread(fix->fd, fix->output, fix->sz, 0); + __CHECK_STR("Can't set arr. Only ints and enums are supported", "arr = 1"); + + SYS_FAIL(out, + "%s set_global_vars.bpf.o -G \"struct1[0].struct2.u.var_u8[2]=1\" -vl2 2> %s", + fix->veristat, fix->tmpfile); + + pread(fix->fd, fix->output, fix->sz, 0); + __CHECK_STR("Can't resolve field u for non-composite type", "struct1[0].struct2.u.var_u8[2]=1"); + +out: + teardown_fixture(fix); +} + void test_veristat(void) { if (test__start_subtest("set_global_vars_succeeds")) @@ -139,6 +240,22 @@ void test_veristat(void) if (test__start_subtest("set_global_vars_from_file_succeeds")) test_set_global_vars_from_file_succeeds(); + + if (test__start_subtest("test_unsupported_ptr_array_type")) + test_unsupported_ptr_array_type(); + + if (test__start_subtest("test_array_out_of_bounds")) + test_array_out_of_bounds(); + + if (test__start_subtest("test_array_index_not_found")) + test_array_index_not_found(); + + if (test__start_subtest("test_array_index_for_non_array")) + test_array_index_for_non_array(); + + if (test__start_subtest("test_no_array_index_for_array")) + test_no_array_index_for_array(); + } #undef __CHECK_STR diff --git a/tools/testing/selftests/bpf/prog_tests/usdt.c b/tools/testing/selftests/bpf/prog_tests/usdt.c index 495d66414b57..9057e983cc54 100644 --- a/tools/testing/selftests/bpf/prog_tests/usdt.c +++ b/tools/testing/selftests/bpf/prog_tests/usdt.c @@ -270,8 +270,16 @@ static void subtest_multispec_usdt(void) */ trigger_300_usdts(); - /* we'll reuse usdt_100 BPF program for usdt_300 test */ bpf_link__destroy(skel->links.usdt_100); + + bss->usdt_100_called = 0; + bss->usdt_100_sum = 0; + + /* If built with arm64/clang, there will be much less number of specs + * for usdt_300 call sites. + */ +#if !defined(__aarch64__) || !defined(__clang__) + /* we'll reuse usdt_100 BPF program for usdt_300 test */ skel->links.usdt_100 = bpf_program__attach_usdt(skel->progs.usdt_100, -1, "/proc/self/exe", "test", "usdt_300", NULL); err = -errno; @@ -282,13 +290,11 @@ static void subtest_multispec_usdt(void) /* let's check that there are no "dangling" BPF programs attached due * to partial success of the above test:usdt_300 attachment */ - bss->usdt_100_called = 0; - bss->usdt_100_sum = 0; - f300(777); /* this is 301st instance of usdt_300 */ ASSERT_EQ(bss->usdt_100_called, 0, "usdt_301_called"); ASSERT_EQ(bss->usdt_100_sum, 0, "usdt_301_sum"); +#endif /* This time we have USDT with 400 inlined invocations, but arg specs * should be the same across all sites, so libbpf will only need to diff --git a/tools/testing/selftests/bpf/prog_tests/user_ringbuf.c b/tools/testing/selftests/bpf/prog_tests/user_ringbuf.c index d424e7ecbd12..9fd3ae987321 100644 --- a/tools/testing/selftests/bpf/prog_tests/user_ringbuf.c +++ b/tools/testing/selftests/bpf/prog_tests/user_ringbuf.c @@ -21,8 +21,7 @@ #include "../progs/test_user_ringbuf.h" static const long c_sample_size = sizeof(struct sample) + BPF_RINGBUF_HDR_SZ; -static const long c_ringbuf_size = 1 << 12; /* 1 small page */ -static const long c_max_entries = c_ringbuf_size / c_sample_size; +static long c_ringbuf_size, c_max_entries; static void drain_current_samples(void) { @@ -424,7 +423,9 @@ static void test_user_ringbuf_loop(void) uint32_t remaining_samples = total_samples; int err; - BUILD_BUG_ON(total_samples <= c_max_entries); + if (!ASSERT_LT(c_max_entries, total_samples, "compare_c_max_entries")) + return; + err = load_skel_create_user_ringbuf(&skel, &ringbuf); if (err) return; @@ -686,6 +687,9 @@ void test_user_ringbuf(void) { int i; + c_ringbuf_size = getpagesize(); /* 1 page */ + c_max_entries = c_ringbuf_size / c_sample_size; + for (i = 0; i < ARRAY_SIZE(success_tests); i++) { if (!test__start_subtest(success_tests[i].test_name)) continue; diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_adjust_tail.c b/tools/testing/selftests/bpf/prog_tests/xdp_adjust_tail.c index b2b2d85dbb1b..43264347e7d7 100644 --- a/tools/testing/selftests/bpf/prog_tests/xdp_adjust_tail.c +++ b/tools/testing/selftests/bpf/prog_tests/xdp_adjust_tail.c @@ -37,21 +37,26 @@ static void test_xdp_adjust_tail_shrink(void) bpf_object__close(obj); } -static void test_xdp_adjust_tail_grow(void) +static void test_xdp_adjust_tail_grow(bool is_64k_pagesize) { const char *file = "./test_xdp_adjust_tail_grow.bpf.o"; struct bpf_object *obj; - char buf[4096]; /* avoid segfault: large buf to hold grow results */ + char buf[8192]; /* avoid segfault: large buf to hold grow results */ __u32 expect_sz; int err, prog_fd; LIBBPF_OPTS(bpf_test_run_opts, topts, .data_in = &pkt_v4, - .data_size_in = sizeof(pkt_v4), .data_out = buf, .data_size_out = sizeof(buf), .repeat = 1, ); + /* topts.data_size_in as a special signal to bpf prog */ + if (is_64k_pagesize) + topts.data_size_in = sizeof(pkt_v4) - 1; + else + topts.data_size_in = sizeof(pkt_v4); + err = bpf_prog_test_load(file, BPF_PROG_TYPE_XDP, &obj, &prog_fd); if (!ASSERT_OK(err, "test_xdp_adjust_tail_grow")) return; @@ -208,7 +213,7 @@ out: bpf_object__close(obj); } -static void test_xdp_adjust_frags_tail_grow(void) +static void test_xdp_adjust_frags_tail_grow_4k(void) { const char *file = "./test_xdp_adjust_tail_grow.bpf.o"; __u32 exp_size; @@ -246,14 +251,20 @@ static void test_xdp_adjust_frags_tail_grow(void) ASSERT_EQ(topts.retval, XDP_TX, "9Kb+10b retval"); ASSERT_EQ(topts.data_size_out, exp_size, "9Kb+10b size"); - for (i = 0; i < 9000; i++) - ASSERT_EQ(buf[i], 1, "9Kb+10b-old"); + for (i = 0; i < 9000; i++) { + if (buf[i] != 1) + ASSERT_EQ(buf[i], 1, "9Kb+10b-old"); + } - for (i = 9000; i < 9010; i++) - ASSERT_EQ(buf[i], 0, "9Kb+10b-new"); + for (i = 9000; i < 9010; i++) { + if (buf[i] != 0) + ASSERT_EQ(buf[i], 0, "9Kb+10b-new"); + } - for (i = 9010; i < 16384; i++) - ASSERT_EQ(buf[i], 1, "9Kb+10b-untouched"); + for (i = 9010; i < 16384; i++) { + if (buf[i] != 1) + ASSERT_EQ(buf[i], 1, "9Kb+10b-untouched"); + } /* Test a too large grow */ memset(buf, 1, 16384); @@ -273,16 +284,93 @@ out: bpf_object__close(obj); } +static void test_xdp_adjust_frags_tail_grow_64k(void) +{ + const char *file = "./test_xdp_adjust_tail_grow.bpf.o"; + __u32 exp_size; + struct bpf_program *prog; + struct bpf_object *obj; + int err, i, prog_fd; + __u8 *buf; + LIBBPF_OPTS(bpf_test_run_opts, topts); + + obj = bpf_object__open(file); + if (libbpf_get_error(obj)) + return; + + prog = bpf_object__next_program(obj, NULL); + if (bpf_object__load(obj)) + goto out; + + prog_fd = bpf_program__fd(prog); + + buf = malloc(262144); + if (!ASSERT_OK_PTR(buf, "alloc buf 256Kb")) + goto out; + + /* Test case add 10 bytes to last frag */ + memset(buf, 1, 262144); + exp_size = 90000 + 10; + + topts.data_in = buf; + topts.data_out = buf; + topts.data_size_in = 90000; + topts.data_size_out = 262144; + err = bpf_prog_test_run_opts(prog_fd, &topts); + + ASSERT_OK(err, "90Kb+10b"); + ASSERT_EQ(topts.retval, XDP_TX, "90Kb+10b retval"); + ASSERT_EQ(topts.data_size_out, exp_size, "90Kb+10b size"); + + for (i = 0; i < 90000; i++) { + if (buf[i] != 1) + ASSERT_EQ(buf[i], 1, "90Kb+10b-old"); + } + + for (i = 90000; i < 90010; i++) { + if (buf[i] != 0) + ASSERT_EQ(buf[i], 0, "90Kb+10b-new"); + } + + for (i = 90010; i < 262144; i++) { + if (buf[i] != 1) + ASSERT_EQ(buf[i], 1, "90Kb+10b-untouched"); + } + + /* Test a too large grow */ + memset(buf, 1, 262144); + exp_size = 90001; + + topts.data_in = topts.data_out = buf; + topts.data_size_in = 90001; + topts.data_size_out = 262144; + err = bpf_prog_test_run_opts(prog_fd, &topts); + + ASSERT_OK(err, "90Kb+10b"); + ASSERT_EQ(topts.retval, XDP_DROP, "90Kb+10b retval"); + ASSERT_EQ(topts.data_size_out, exp_size, "90Kb+10b size"); + + free(buf); +out: + bpf_object__close(obj); +} + void test_xdp_adjust_tail(void) { + int page_size = getpagesize(); + if (test__start_subtest("xdp_adjust_tail_shrink")) test_xdp_adjust_tail_shrink(); if (test__start_subtest("xdp_adjust_tail_grow")) - test_xdp_adjust_tail_grow(); + test_xdp_adjust_tail_grow(page_size == 65536); if (test__start_subtest("xdp_adjust_tail_grow2")) test_xdp_adjust_tail_grow2(); if (test__start_subtest("xdp_adjust_frags_tail_shrink")) test_xdp_adjust_frags_tail_shrink(); - if (test__start_subtest("xdp_adjust_frags_tail_grow")) - test_xdp_adjust_frags_tail_grow(); + if (test__start_subtest("xdp_adjust_frags_tail_grow")) { + if (page_size == 65536) + test_xdp_adjust_frags_tail_grow_64k(); + else + test_xdp_adjust_frags_tail_grow_4k(); + } } diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_do_redirect.c b/tools/testing/selftests/bpf/prog_tests/xdp_do_redirect.c index 7dac044664ac..dd34b0cc4b4e 100644 --- a/tools/testing/selftests/bpf/prog_tests/xdp_do_redirect.c +++ b/tools/testing/selftests/bpf/prog_tests/xdp_do_redirect.c @@ -66,16 +66,25 @@ static int attach_tc_prog(struct bpf_tc_hook *hook, int fd) #else #define MAX_PKT_SIZE 3408 #endif + +#define PAGE_SIZE_4K 4096 +#define PAGE_SIZE_64K 65536 + static void test_max_pkt_size(int fd) { - char data[MAX_PKT_SIZE + 1] = {}; + char data[PAGE_SIZE_64K + 1] = {}; int err; DECLARE_LIBBPF_OPTS(bpf_test_run_opts, opts, .data_in = &data, - .data_size_in = MAX_PKT_SIZE, .flags = BPF_F_TEST_XDP_LIVE_FRAMES, .repeat = 1, ); + + if (getpagesize() == PAGE_SIZE_64K) + opts.data_size_in = MAX_PKT_SIZE + PAGE_SIZE_64K - PAGE_SIZE_4K; + else + opts.data_size_in = MAX_PKT_SIZE; + err = bpf_prog_test_run_opts(fd, &opts); ASSERT_OK(err, "prog_run_max_size"); diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_map_elem.c b/tools/testing/selftests/bpf/progs/bpf_iter_map_elem.c new file mode 100644 index 000000000000..2f20485e0de3 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/bpf_iter_map_elem.c @@ -0,0 +1,22 @@ +// SPDX-License-Identifier: GPL-2.0-only + +#include "vmlinux.h" +#include <bpf/bpf_tracing.h> +#include <bpf/bpf_helpers.h> + +char _license[] SEC("license") = "GPL"; + +__u32 value_sum = 0; + +SEC("iter/bpf_map_elem") +int dump_bpf_map_values(struct bpf_iter__bpf_map_elem *ctx) +{ + __u32 value = 0; + + if (ctx->value == (void *)0) + return 0; + + bpf_probe_read_kernel(&value, sizeof(value), ctx->value); + value_sum += value; + return 0; +} diff --git a/tools/testing/selftests/bpf/progs/bpf_misc.h b/tools/testing/selftests/bpf/progs/bpf_misc.h index 6e208e24ba3b..20dce508d8e0 100644 --- a/tools/testing/selftests/bpf/progs/bpf_misc.h +++ b/tools/testing/selftests/bpf/progs/bpf_misc.h @@ -83,9 +83,11 @@ * expect return value to match passed parameter: * - a decimal number * - a hexadecimal number, when starts from 0x - * - literal INT_MIN - * - literal POINTER_VALUE (see definition below) - * - literal TEST_DATA_LEN (see definition below) + * - a macro which expands to one of the above + * - literal _INT_MIN (expands to INT_MIN) + * In addition, two special macros are defined below: + * - POINTER_VALUE + * - TEST_DATA_LEN * __retval_unpriv Same, but load program in unprivileged mode. * * __description Text to be used instead of a program name for display @@ -125,8 +127,8 @@ #define __success_unpriv __attribute__((btf_decl_tag("comment:test_expect_success_unpriv"))) #define __log_level(lvl) __attribute__((btf_decl_tag("comment:test_log_level="#lvl))) #define __flag(flag) __attribute__((btf_decl_tag("comment:test_prog_flags="#flag))) -#define __retval(val) __attribute__((btf_decl_tag("comment:test_retval="#val))) -#define __retval_unpriv(val) __attribute__((btf_decl_tag("comment:test_retval_unpriv="#val))) +#define __retval(val) __attribute__((btf_decl_tag("comment:test_retval="XSTR(val)))) +#define __retval_unpriv(val) __attribute__((btf_decl_tag("comment:test_retval_unpriv="XSTR(val)))) #define __auxiliary __attribute__((btf_decl_tag("comment:test_auxiliary"))) #define __auxiliary_unpriv __attribute__((btf_decl_tag("comment:test_auxiliary_unpriv"))) #define __btf_path(path) __attribute__((btf_decl_tag("comment:test_btf_path=" path))) @@ -155,7 +157,7 @@ #define __imm_insn(name, expr) [name]"i"(*(long *)&(expr)) /* Magic constants used with __retval() */ -#define POINTER_VALUE 0xcafe4all +#define POINTER_VALUE 0xbadcafe #define TEST_DATA_LEN 64 #ifndef __used @@ -231,4 +233,8 @@ #define CAN_USE_LOAD_ACQ_STORE_REL #endif +#if defined(__TARGET_ARCH_arm64) || defined(__TARGET_ARCH_x86) +#define SPEC_V1 +#endif + #endif diff --git a/tools/testing/selftests/bpf/progs/cgroup_mprog.c b/tools/testing/selftests/bpf/progs/cgroup_mprog.c new file mode 100644 index 000000000000..6a0ea02c4de2 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/cgroup_mprog.c @@ -0,0 +1,30 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2025 Meta Platforms, Inc. and affiliates. */ +#include <vmlinux.h> +#include <bpf/bpf_helpers.h> + +char _license[] SEC("license") = "GPL"; + +SEC("cgroup/getsockopt") +int getsockopt_1(struct bpf_sockopt *ctx) +{ + return 1; +} + +SEC("cgroup/getsockopt") +int getsockopt_2(struct bpf_sockopt *ctx) +{ + return 1; +} + +SEC("cgroup/getsockopt") +int getsockopt_3(struct bpf_sockopt *ctx) +{ + return 1; +} + +SEC("cgroup/getsockopt") +int getsockopt_4(struct bpf_sockopt *ctx) +{ + return 1; +} diff --git a/tools/testing/selftests/bpf/progs/compute_live_registers.c b/tools/testing/selftests/bpf/progs/compute_live_registers.c index f3d79aecbf93..6884ab99a421 100644 --- a/tools/testing/selftests/bpf/progs/compute_live_registers.c +++ b/tools/testing/selftests/bpf/progs/compute_live_registers.c @@ -240,6 +240,22 @@ __naked void if2(void) ::: __clobber_all); } +/* Verifier misses that r2 is alive if jset is not handled properly */ +SEC("socket") +__log_level(2) +__msg("2: 012....... (45) if r1 & 0x7 goto pc+1") +__naked void if3_jset_bug(void) +{ + asm volatile ( + "r0 = 1;" + "r2 = 2;" + "if r1 & 0x7 goto +1;" + "exit;" + "r0 = r2;" + "exit;" + ::: __clobber_all); +} + SEC("socket") __log_level(2) __msg("0: .......... (b7) r1 = 0") diff --git a/tools/testing/selftests/bpf/progs/iters.c b/tools/testing/selftests/bpf/progs/iters.c index 76adf4a8f2da..7dd92a303bf6 100644 --- a/tools/testing/selftests/bpf/progs/iters.c +++ b/tools/testing/selftests/bpf/progs/iters.c @@ -1649,4 +1649,281 @@ int clean_live_states(const void *ctx) return 0; } +SEC("?raw_tp") +__flag(BPF_F_TEST_STATE_FREQ) +__failure __msg("misaligned stack access off 0+-31+0 size 8") +__naked int absent_mark_in_the_middle_state(void) +{ + /* This is equivalent to C program below. + * + * r8 = bpf_get_prandom_u32(); + * r6 = -32; + * bpf_iter_num_new(&fp[-8], 0, 10); + * if (unlikely(bpf_get_prandom_u32())) + * r6 = -31; + * while (bpf_iter_num_next(&fp[-8])) { + * if (unlikely(bpf_get_prandom_u32())) + * *(fp + r6) = 7; + * } + * bpf_iter_num_destroy(&fp[-8]) + * return 0 + */ + asm volatile ( + "call %[bpf_get_prandom_u32];" + "r8 = r0;" + "r7 = 0;" + "r6 = -32;" + "r0 = 0;" + "*(u64 *)(r10 - 16) = r0;" + "r1 = r10;" + "r1 += -8;" + "r2 = 0;" + "r3 = 10;" + "call %[bpf_iter_num_new];" + "call %[bpf_get_prandom_u32];" + "if r0 == r8 goto change_r6_%=;" + "loop_%=:" + "call noop;" + "r1 = r10;" + "r1 += -8;" + "call %[bpf_iter_num_next];" + "if r0 == 0 goto loop_end_%=;" + "call %[bpf_get_prandom_u32];" + "if r0 == r8 goto use_r6_%=;" + "goto loop_%=;" + "loop_end_%=:" + "r1 = r10;" + "r1 += -8;" + "call %[bpf_iter_num_destroy];" + "r0 = 0;" + "exit;" + "use_r6_%=:" + "r0 = r10;" + "r0 += r6;" + "r1 = 7;" + "*(u64 *)(r0 + 0) = r1;" + "goto loop_%=;" + "change_r6_%=:" + "r6 = -31;" + "goto loop_%=;" + : + : __imm(bpf_iter_num_new), + __imm(bpf_iter_num_next), + __imm(bpf_iter_num_destroy), + __imm(bpf_get_prandom_u32) + : __clobber_all + ); +} + +__used __naked +static int noop(void) +{ + asm volatile ( + "r0 = 0;" + "exit;" + ); +} + +SEC("?raw_tp") +__flag(BPF_F_TEST_STATE_FREQ) +__failure __msg("misaligned stack access off 0+-31+0 size 8") +__naked int absent_mark_in_the_middle_state2(void) +{ + /* This is equivalent to C program below. + * + * r8 = bpf_get_prandom_u32(); + * r6 = -32; + * bpf_iter_num_new(&fp[-8], 0, 10); + * if (unlikely(bpf_get_prandom_u32())) { + * r6 = -31; + * jump_into_loop: + * goto +0; + * goto loop; + * } + * if (unlikely(bpf_get_prandom_u32())) + * goto jump_into_loop; + * loop: + * while (bpf_iter_num_next(&fp[-8])) { + * if (unlikely(bpf_get_prandom_u32())) + * *(fp + r6) = 7; + * } + * bpf_iter_num_destroy(&fp[-8]) + * return 0 + */ + asm volatile ( + "call %[bpf_get_prandom_u32];" + "r8 = r0;" + "r7 = 0;" + "r6 = -32;" + "r0 = 0;" + "*(u64 *)(r10 - 16) = r0;" + "r1 = r10;" + "r1 += -8;" + "r2 = 0;" + "r3 = 10;" + "call %[bpf_iter_num_new];" + "call %[bpf_get_prandom_u32];" + "if r0 == r8 goto change_r6_%=;" + "call %[bpf_get_prandom_u32];" + "if r0 == r8 goto jump_into_loop_%=;" + "loop_%=:" + "r1 = r10;" + "r1 += -8;" + "call %[bpf_iter_num_next];" + "if r0 == 0 goto loop_end_%=;" + "call %[bpf_get_prandom_u32];" + "if r0 == r8 goto use_r6_%=;" + "goto loop_%=;" + "loop_end_%=:" + "r1 = r10;" + "r1 += -8;" + "call %[bpf_iter_num_destroy];" + "r0 = 0;" + "exit;" + "use_r6_%=:" + "r0 = r10;" + "r0 += r6;" + "r1 = 7;" + "*(u64 *)(r0 + 0) = r1;" + "goto loop_%=;" + "change_r6_%=:" + "r6 = -31;" + "jump_into_loop_%=: " + "goto +0;" + "goto loop_%=;" + : + : __imm(bpf_iter_num_new), + __imm(bpf_iter_num_next), + __imm(bpf_iter_num_destroy), + __imm(bpf_get_prandom_u32) + : __clobber_all + ); +} + +SEC("?raw_tp") +__flag(BPF_F_TEST_STATE_FREQ) +__failure __msg("misaligned stack access off 0+-31+0 size 8") +__naked int absent_mark_in_the_middle_state3(void) +{ + /* + * bpf_iter_num_new(&fp[-8], 0, 10) + * loop1(-32, &fp[-8]) + * loop1_wrapper(&fp[-8]) + * bpf_iter_num_destroy(&fp[-8]) + */ + asm volatile ( + "r1 = r10;" + "r1 += -8;" + "r2 = 0;" + "r3 = 10;" + "call %[bpf_iter_num_new];" + /* call #1 */ + "r1 = -32;" + "r2 = r10;" + "r2 += -8;" + "call loop1;" + "r1 = r10;" + "r1 += -8;" + "call %[bpf_iter_num_destroy];" + /* call #2 */ + "r1 = r10;" + "r1 += -8;" + "r2 = 0;" + "r3 = 10;" + "call %[bpf_iter_num_new];" + "r1 = r10;" + "r1 += -8;" + "call loop1_wrapper;" + /* return */ + "r1 = r10;" + "r1 += -8;" + "call %[bpf_iter_num_destroy];" + "r0 = 0;" + "exit;" + : + : __imm(bpf_iter_num_new), + __imm(bpf_iter_num_destroy), + __imm(bpf_get_prandom_u32) + : __clobber_all + ); +} + +__used __naked +static int loop1(void) +{ + /* + * int loop1(num, iter) { + * r6 = num; + * r7 = iter; + * while (bpf_iter_num_next(r7)) { + * if (unlikely(bpf_get_prandom_u32())) + * *(fp + r6) = 7; + * } + * return 0 + * } + */ + asm volatile ( + "r6 = r1;" + "r7 = r2;" + "call %[bpf_get_prandom_u32];" + "r8 = r0;" + "loop_%=:" + "r1 = r7;" + "call %[bpf_iter_num_next];" + "if r0 == 0 goto loop_end_%=;" + "call %[bpf_get_prandom_u32];" + "if r0 == r8 goto use_r6_%=;" + "goto loop_%=;" + "loop_end_%=:" + "r0 = 0;" + "exit;" + "use_r6_%=:" + "r0 = r10;" + "r0 += r6;" + "r1 = 7;" + "*(u64 *)(r0 + 0) = r1;" + "goto loop_%=;" + : + : __imm(bpf_iter_num_next), + __imm(bpf_get_prandom_u32) + : __clobber_all + ); +} + +__used __naked +static int loop1_wrapper(void) +{ + /* + * int loop1_wrapper(iter) { + * r6 = -32; + * r7 = iter; + * if (unlikely(bpf_get_prandom_u32())) + * r6 = -31; + * loop1(r6, r7); + * return 0; + * } + */ + asm volatile ( + "r6 = -32;" + "r7 = r1;" + "call %[bpf_get_prandom_u32];" + "r8 = r0;" + "call %[bpf_get_prandom_u32];" + "if r0 == r8 goto change_r6_%=;" + "loop_%=:" + "r1 = r6;" + "r2 = r7;" + "call loop1;" + "r0 = 0;" + "exit;" + "change_r6_%=:" + "r6 = -31;" + "goto loop_%=;" + : + : __imm(bpf_iter_num_next), + __imm(bpf_get_prandom_u32) + : __clobber_all + ); +} + char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/mem_rdonly_untrusted.c b/tools/testing/selftests/bpf/progs/mem_rdonly_untrusted.c new file mode 100644 index 000000000000..00604755e698 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/mem_rdonly_untrusted.c @@ -0,0 +1,136 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include <vmlinux.h> +#include <bpf/bpf_core_read.h> +#include "bpf_misc.h" +#include "../test_kmods/bpf_testmod_kfunc.h" + +SEC("socket") +__success +__retval(0) +int ldx_is_ok_bad_addr(void *ctx) +{ + char *p; + + if (!bpf_core_enum_value_exists(enum bpf_features, BPF_FEAT_RDONLY_CAST_TO_VOID)) + return 42; + + p = bpf_rdonly_cast(0, 0); + return p[0x7fff]; +} + +SEC("socket") +__success +__retval(1) +int ldx_is_ok_good_addr(void *ctx) +{ + int v, *p; + + v = 1; + p = bpf_rdonly_cast(&v, 0); + return *p; +} + +SEC("socket") +__success +int offset_not_tracked(void *ctx) +{ + int *p, i, s; + + p = bpf_rdonly_cast(0, 0); + s = 0; + bpf_for(i, 0, 1000 * 1000 * 1000) { + p++; + s += *p; + } + return s; +} + +SEC("socket") +__failure +__msg("cannot write into rdonly_untrusted_mem") +int stx_not_ok(void *ctx) +{ + int v, *p; + + v = 1; + p = bpf_rdonly_cast(&v, 0); + *p = 1; + return 0; +} + +SEC("socket") +__failure +__msg("cannot write into rdonly_untrusted_mem") +int atomic_not_ok(void *ctx) +{ + int v, *p; + + v = 1; + p = bpf_rdonly_cast(&v, 0); + __sync_fetch_and_add(p, 1); + return 0; +} + +SEC("socket") +__failure +__msg("cannot write into rdonly_untrusted_mem") +int atomic_rmw_not_ok(void *ctx) +{ + long v, *p; + + v = 1; + p = bpf_rdonly_cast(&v, 0); + return __sync_val_compare_and_swap(p, 0, 42); +} + +SEC("socket") +__failure +__msg("invalid access to memory, mem_size=0 off=0 size=4") +__msg("R1 min value is outside of the allowed memory range") +int kfunc_param_not_ok(void *ctx) +{ + int *p; + + p = bpf_rdonly_cast(0, 0); + bpf_kfunc_trusted_num_test(p); + return 0; +} + +SEC("?fentry.s/" SYS_PREFIX "sys_getpgid") +__failure +__msg("R1 type=rdonly_untrusted_mem expected=") +int helper_param_not_ok(void *ctx) +{ + char *p; + + p = bpf_rdonly_cast(0, 0); + /* + * Any helper with ARG_CONST_SIZE_OR_ZERO constraint will do, + * the most permissive constraint + */ + bpf_copy_from_user(p, 0, (void *)42); + return 0; +} + +static __noinline u64 *get_some_addr(void) +{ + if (bpf_get_prandom_u32()) + return bpf_rdonly_cast(0, bpf_core_type_id_kernel(struct sock)); + else + return bpf_rdonly_cast(0, 0); +} + +SEC("socket") +__success +__retval(0) +int mixed_mem_type(void *ctx) +{ + u64 *p; + + /* Try to avoid compiler hoisting load to if branches by using __noinline func. */ + p = get_some_addr(); + return *p; +} + +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/rbtree.c b/tools/testing/selftests/bpf/progs/rbtree.c index a3620c15c136..49fe93d7e059 100644 --- a/tools/testing/selftests/bpf/progs/rbtree.c +++ b/tools/testing/selftests/bpf/progs/rbtree.c @@ -61,19 +61,19 @@ static long __add_three(struct bpf_rb_root *root, struct bpf_spin_lock *lock) } m->key = 1; - bpf_spin_lock(&glock); - bpf_rbtree_add(&groot, &n->node, less); - bpf_rbtree_add(&groot, &m->node, less); - bpf_spin_unlock(&glock); + bpf_spin_lock(lock); + bpf_rbtree_add(root, &n->node, less); + bpf_rbtree_add(root, &m->node, less); + bpf_spin_unlock(lock); n = bpf_obj_new(typeof(*n)); if (!n) return 3; n->key = 3; - bpf_spin_lock(&glock); - bpf_rbtree_add(&groot, &n->node, less); - bpf_spin_unlock(&glock); + bpf_spin_lock(lock); + bpf_rbtree_add(root, &n->node, less); + bpf_spin_unlock(lock); return 0; } diff --git a/tools/testing/selftests/bpf/progs/rcu_read_lock.c b/tools/testing/selftests/bpf/progs/rcu_read_lock.c index 43637ee2cdcd..3a868a199349 100644 --- a/tools/testing/selftests/bpf/progs/rcu_read_lock.c +++ b/tools/testing/selftests/bpf/progs/rcu_read_lock.c @@ -16,10 +16,11 @@ struct { __type(value, long); } map_a SEC(".maps"); -__u32 user_data, key_serial, target_pid; +__u32 user_data, target_pid; +__s32 key_serial; __u64 flags, task_storage_val, cgroup_id; -struct bpf_key *bpf_lookup_user_key(__u32 serial, __u64 flags) __ksym; +struct bpf_key *bpf_lookup_user_key(__s32 serial, __u64 flags) __ksym; void bpf_key_put(struct bpf_key *key) __ksym; void bpf_rcu_read_lock(void) __ksym; void bpf_rcu_read_unlock(void) __ksym; diff --git a/tools/testing/selftests/bpf/progs/security_bpf_map.c b/tools/testing/selftests/bpf/progs/security_bpf_map.c new file mode 100644 index 000000000000..7216b3450e96 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/security_bpf_map.c @@ -0,0 +1,69 @@ +// SPDX-License-Identifier: GPL-2.0-only + +#include "vmlinux.h" +#include <bpf/bpf_tracing.h> +#include <bpf/bpf_helpers.h> + +char _license[] SEC("license") = "GPL"; + +#define EPERM 1 /* Operation not permitted */ + +/* From include/linux/mm.h. */ +#define FMODE_WRITE 0x2 + +struct map; + +struct { + __uint(type, BPF_MAP_TYPE_ARRAY); + __type(key, __u32); + __type(value, __u32); + __uint(max_entries, 1); +} prot_status_map SEC(".maps"); + +struct { + __uint(type, BPF_MAP_TYPE_HASH); + __type(key, __u32); + __type(value, __u32); + __uint(max_entries, 3); +} prot_map SEC(".maps"); + +struct { + __uint(type, BPF_MAP_TYPE_HASH); + __type(key, __u32); + __type(value, __u32); + __uint(max_entries, 3); +} not_prot_map SEC(".maps"); + +SEC("fmod_ret/security_bpf_map") +int BPF_PROG(fmod_bpf_map, struct bpf_map *map, int fmode) +{ + __u32 key = 0; + __u32 *status_ptr = bpf_map_lookup_elem(&prot_status_map, &key); + + if (!status_ptr || !*status_ptr) + return 0; + + if (map == &prot_map) { + /* Allow read-only access */ + if (fmode & FMODE_WRITE) + return -EPERM; + } + + return 0; +} + +/* + * This program keeps references to maps. This is needed to prevent + * optimizing them out. + */ +SEC("fentry/bpf_fentry_test1") +int BPF_PROG(fentry_dummy1, int a) +{ + __u32 key = 0; + __u32 val1 = a; + __u32 val2 = a + 1; + + bpf_map_update_elem(&prot_map, &key, &val1, BPF_ANY); + bpf_map_update_elem(¬_prot_map, &key, &val2, BPF_ANY); + return 0; +} diff --git a/tools/testing/selftests/bpf/progs/set_global_vars.c b/tools/testing/selftests/bpf/progs/set_global_vars.c index 90f5656c3991..ebaef28b2cb3 100644 --- a/tools/testing/selftests/bpf/progs/set_global_vars.c +++ b/tools/testing/selftests/bpf/progs/set_global_vars.c @@ -7,22 +7,30 @@ char _license[] SEC("license") = "GPL"; -enum Enum { EA1 = 0, EA2 = 11 }; +typedef __s32 s32; +typedef s32 i32; +typedef __u8 u8; + +enum Enum { EA1 = 0, EA2 = 11, EA3 = 10 }; enum Enumu64 {EB1 = 0llu, EB2 = 12llu }; enum Enums64 { EC1 = 0ll, EC2 = 13ll }; const volatile __s64 var_s64 = -1; const volatile __u64 var_u64 = 0; -const volatile __s32 var_s32 = -1; +const volatile i32 var_s32 = -1; const volatile __u32 var_u32 = 0; const volatile __s16 var_s16 = -1; const volatile __u16 var_u16 = 0; const volatile __s8 var_s8 = -1; -const volatile __u8 var_u8 = 0; +const volatile u8 var_u8 = 0; const volatile enum Enum var_ea = EA1; const volatile enum Enumu64 var_eb = EB1; const volatile enum Enums64 var_ec = EC1; const volatile bool var_b = false; +const volatile i32 arr[32]; +const volatile enum Enum enum_arr[32]; +const volatile i32 three_d[47][19][17]; +const volatile i32 *ptr_arr[32]; struct Struct { int:16; @@ -35,34 +43,38 @@ struct Struct { volatile struct { const int:1; union { - const volatile __u8 var_u8; + const volatile u8 var_u8[3]; const volatile __s16 filler3; const int:1; + s32 mat[7][5]; } u; }; - } struct2; + } struct2[2][4]; }; const volatile __u32 stru = 0; /* same prefix as below */ -const volatile struct Struct struct1 = {.struct2 = {.u = {.var_u8 = 1}}}; +const volatile struct Struct struct1[3]; +const volatile struct Struct struct11[11][7]; -union Union { - __u16 var_u16; - struct Struct3 { - struct { - __u8 var_u8_l; - }; +struct Struct3 { + struct { + u8 var_u8_l; + }; + struct { struct { - struct { - __u8 var_u8_h; - }; + u8 var_u8_h; }; - } struct3; + }; }; -const volatile union Union union1 = {.var_u16 = -1}; +typedef struct Struct3 Struct3_t; -char arr[4] = {0}; +union Union { + __u16 var_u16; + Struct3_t struct3; +}; + +const volatile union Union union1 = {.var_u16 = -1}; SEC("socket") int test_set_globals(void *ctx) @@ -81,8 +93,14 @@ int test_set_globals(void *ctx) a = var_eb; a = var_ec; a = var_b; - a = struct1.struct2.u.var_u8; + a = struct1[2].struct2[1][2].u.var_u8[2]; a = union1.var_u16; + a = arr[3]; + a = arr[EA2]; + a = enum_arr[EC2]; + a = three_d[31][7][EA2]; + a = struct1[2].struct2[1][2].u.mat[5][3]; + a = struct11[7][5].struct2[0][1].u.mat[3][0]; return a; } diff --git a/tools/testing/selftests/bpf/progs/string_kfuncs_failure1.c b/tools/testing/selftests/bpf/progs/string_kfuncs_failure1.c new file mode 100644 index 000000000000..53af438bd998 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/string_kfuncs_failure1.c @@ -0,0 +1,87 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (C) 2025 Red Hat, Inc.*/ +#include "vmlinux.h" +#include <bpf/bpf_helpers.h> +#include <linux/limits.h> +#include "bpf_misc.h" +#include "errno.h" + +char *user_ptr = (char *)1; +char *invalid_kern_ptr = (char *)-1; + +/* + * When passing userspace pointers, the error code differs based on arch: + * -ERANGE on arches with non-overlapping address spaces + * -EFAULT on other arches + */ +#if defined(__TARGET_ARCH_arm) || defined(__TARGET_ARCH_loongarch) || \ + defined(__TARGET_ARCH_powerpc) || defined(__TARGET_ARCH_x86) +#define USER_PTR_ERR -ERANGE +#else +#define USER_PTR_ERR -EFAULT +#endif + +/* + * On s390, __get_kernel_nofault (used in string kfuncs) returns 0 for NULL and + * user_ptr (instead of causing an exception) so the below two groups of tests + * are not applicable. + */ +#ifndef __TARGET_ARCH_s390 + +/* Passing NULL to string kfuncs (treated as a userspace ptr) */ +SEC("syscall") __retval(USER_PTR_ERR) int test_strcmp_null1(void *ctx) { return bpf_strcmp(NULL, "hello"); } +SEC("syscall") __retval(USER_PTR_ERR)int test_strcmp_null2(void *ctx) { return bpf_strcmp("hello", NULL); } +SEC("syscall") __retval(USER_PTR_ERR)int test_strchr_null(void *ctx) { return bpf_strchr(NULL, 'a'); } +SEC("syscall") __retval(USER_PTR_ERR)int test_strchrnul_null(void *ctx) { return bpf_strchrnul(NULL, 'a'); } +SEC("syscall") __retval(USER_PTR_ERR)int test_strnchr_null(void *ctx) { return bpf_strnchr(NULL, 1, 'a'); } +SEC("syscall") __retval(USER_PTR_ERR)int test_strrchr_null(void *ctx) { return bpf_strrchr(NULL, 'a'); } +SEC("syscall") __retval(USER_PTR_ERR)int test_strlen_null(void *ctx) { return bpf_strlen(NULL); } +SEC("syscall") __retval(USER_PTR_ERR)int test_strnlen_null(void *ctx) { return bpf_strnlen(NULL, 1); } +SEC("syscall") __retval(USER_PTR_ERR)int test_strspn_null1(void *ctx) { return bpf_strspn(NULL, "hello"); } +SEC("syscall") __retval(USER_PTR_ERR)int test_strspn_null2(void *ctx) { return bpf_strspn("hello", NULL); } +SEC("syscall") __retval(USER_PTR_ERR)int test_strcspn_null1(void *ctx) { return bpf_strcspn(NULL, "hello"); } +SEC("syscall") __retval(USER_PTR_ERR)int test_strcspn_null2(void *ctx) { return bpf_strcspn("hello", NULL); } +SEC("syscall") __retval(USER_PTR_ERR)int test_strstr_null1(void *ctx) { return bpf_strstr(NULL, "hello"); } +SEC("syscall") __retval(USER_PTR_ERR)int test_strstr_null2(void *ctx) { return bpf_strstr("hello", NULL); } +SEC("syscall") __retval(USER_PTR_ERR)int test_strnstr_null1(void *ctx) { return bpf_strnstr(NULL, "hello", 1); } +SEC("syscall") __retval(USER_PTR_ERR)int test_strnstr_null2(void *ctx) { return bpf_strnstr("hello", NULL, 1); } + +/* Passing userspace ptr to string kfuncs */ +SEC("syscall") __retval(USER_PTR_ERR) int test_strcmp_user_ptr1(void *ctx) { return bpf_strcmp(user_ptr, "hello"); } +SEC("syscall") __retval(USER_PTR_ERR) int test_strcmp_user_ptr2(void *ctx) { return bpf_strcmp("hello", user_ptr); } +SEC("syscall") __retval(USER_PTR_ERR) int test_strchr_user_ptr(void *ctx) { return bpf_strchr(user_ptr, 'a'); } +SEC("syscall") __retval(USER_PTR_ERR) int test_strchrnul_user_ptr(void *ctx) { return bpf_strchrnul(user_ptr, 'a'); } +SEC("syscall") __retval(USER_PTR_ERR) int test_strnchr_user_ptr(void *ctx) { return bpf_strnchr(user_ptr, 1, 'a'); } +SEC("syscall") __retval(USER_PTR_ERR) int test_strrchr_user_ptr(void *ctx) { return bpf_strrchr(user_ptr, 'a'); } +SEC("syscall") __retval(USER_PTR_ERR) int test_strlen_user_ptr(void *ctx) { return bpf_strlen(user_ptr); } +SEC("syscall") __retval(USER_PTR_ERR) int test_strnlen_user_ptr(void *ctx) { return bpf_strnlen(user_ptr, 1); } +SEC("syscall") __retval(USER_PTR_ERR) int test_strspn_user_ptr1(void *ctx) { return bpf_strspn(user_ptr, "hello"); } +SEC("syscall") __retval(USER_PTR_ERR) int test_strspn_user_ptr2(void *ctx) { return bpf_strspn("hello", user_ptr); } +SEC("syscall") __retval(USER_PTR_ERR) int test_strcspn_user_ptr1(void *ctx) { return bpf_strcspn(user_ptr, "hello"); } +SEC("syscall") __retval(USER_PTR_ERR) int test_strcspn_user_ptr2(void *ctx) { return bpf_strcspn("hello", user_ptr); } +SEC("syscall") __retval(USER_PTR_ERR) int test_strstr_user_ptr1(void *ctx) { return bpf_strstr(user_ptr, "hello"); } +SEC("syscall") __retval(USER_PTR_ERR) int test_strstr_user_ptr2(void *ctx) { return bpf_strstr("hello", user_ptr); } +SEC("syscall") __retval(USER_PTR_ERR) int test_strnstr_user_ptr1(void *ctx) { return bpf_strnstr(user_ptr, "hello", 1); } +SEC("syscall") __retval(USER_PTR_ERR) int test_strnstr_user_ptr2(void *ctx) { return bpf_strnstr("hello", user_ptr, 1); } + +#endif /* __TARGET_ARCH_s390 */ + +/* Passing invalid kernel ptr to string kfuncs should always return -EFAULT */ +SEC("syscall") __retval(-EFAULT) int test_strcmp_pagefault1(void *ctx) { return bpf_strcmp(invalid_kern_ptr, "hello"); } +SEC("syscall") __retval(-EFAULT) int test_strcmp_pagefault2(void *ctx) { return bpf_strcmp("hello", invalid_kern_ptr); } +SEC("syscall") __retval(-EFAULT) int test_strchr_pagefault(void *ctx) { return bpf_strchr(invalid_kern_ptr, 'a'); } +SEC("syscall") __retval(-EFAULT) int test_strchrnul_pagefault(void *ctx) { return bpf_strchrnul(invalid_kern_ptr, 'a'); } +SEC("syscall") __retval(-EFAULT) int test_strnchr_pagefault(void *ctx) { return bpf_strnchr(invalid_kern_ptr, 1, 'a'); } +SEC("syscall") __retval(-EFAULT) int test_strrchr_pagefault(void *ctx) { return bpf_strrchr(invalid_kern_ptr, 'a'); } +SEC("syscall") __retval(-EFAULT) int test_strlen_pagefault(void *ctx) { return bpf_strlen(invalid_kern_ptr); } +SEC("syscall") __retval(-EFAULT) int test_strnlen_pagefault(void *ctx) { return bpf_strnlen(invalid_kern_ptr, 1); } +SEC("syscall") __retval(-EFAULT) int test_strspn_pagefault1(void *ctx) { return bpf_strspn(invalid_kern_ptr, "hello"); } +SEC("syscall") __retval(-EFAULT) int test_strspn_pagefault2(void *ctx) { return bpf_strspn("hello", invalid_kern_ptr); } +SEC("syscall") __retval(-EFAULT) int test_strcspn_pagefault1(void *ctx) { return bpf_strcspn(invalid_kern_ptr, "hello"); } +SEC("syscall") __retval(-EFAULT) int test_strcspn_pagefault2(void *ctx) { return bpf_strcspn("hello", invalid_kern_ptr); } +SEC("syscall") __retval(-EFAULT) int test_strstr_pagefault1(void *ctx) { return bpf_strstr(invalid_kern_ptr, "hello"); } +SEC("syscall") __retval(-EFAULT) int test_strstr_pagefault2(void *ctx) { return bpf_strstr("hello", invalid_kern_ptr); } +SEC("syscall") __retval(-EFAULT) int test_strnstr_pagefault1(void *ctx) { return bpf_strnstr(invalid_kern_ptr, "hello", 1); } +SEC("syscall") __retval(-EFAULT) int test_strnstr_pagefault2(void *ctx) { return bpf_strnstr("hello", invalid_kern_ptr, 1); } + +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/string_kfuncs_failure2.c b/tools/testing/selftests/bpf/progs/string_kfuncs_failure2.c new file mode 100644 index 000000000000..89fb4669b0e9 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/string_kfuncs_failure2.c @@ -0,0 +1,23 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (C) 2025 Red Hat, Inc.*/ +#include "vmlinux.h" +#include <bpf/bpf_helpers.h> +#include <linux/limits.h> + +char long_str[XATTR_SIZE_MAX + 1]; + +SEC("syscall") int test_strcmp_too_long(void *ctx) { return bpf_strcmp(long_str, long_str); } +SEC("syscall") int test_strchr_too_long(void *ctx) { return bpf_strchr(long_str, 'b'); } +SEC("syscall") int test_strchrnul_too_long(void *ctx) { return bpf_strchrnul(long_str, 'b'); } +SEC("syscall") int test_strnchr_too_long(void *ctx) { return bpf_strnchr(long_str, sizeof(long_str), 'b'); } +SEC("syscall") int test_strrchr_too_long(void *ctx) { return bpf_strrchr(long_str, 'b'); } +SEC("syscall") int test_strlen_too_long(void *ctx) { return bpf_strlen(long_str); } +SEC("syscall") int test_strnlen_too_long(void *ctx) { return bpf_strnlen(long_str, sizeof(long_str)); } +SEC("syscall") int test_strspn_str_too_long(void *ctx) { return bpf_strspn(long_str, "a"); } +SEC("syscall") int test_strspn_accept_too_long(void *ctx) { return bpf_strspn("b", long_str); } +SEC("syscall") int test_strcspn_str_too_long(void *ctx) { return bpf_strcspn(long_str, "b"); } +SEC("syscall") int test_strcspn_reject_too_long(void *ctx) { return bpf_strcspn("b", long_str); } +SEC("syscall") int test_strstr_too_long(void *ctx) { return bpf_strstr(long_str, "hello"); } +SEC("syscall") int test_strnstr_too_long(void *ctx) { return bpf_strnstr(long_str, "hello", sizeof(long_str)); } + +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/string_kfuncs_success.c b/tools/testing/selftests/bpf/progs/string_kfuncs_success.c new file mode 100644 index 000000000000..46697f381878 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/string_kfuncs_success.c @@ -0,0 +1,37 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (C) 2025 Red Hat, Inc.*/ +#include "vmlinux.h" +#include <bpf/bpf_helpers.h> +#include "bpf_misc.h" +#include "errno.h" + +char str[] = "hello world"; + +#define __test(retval) SEC("syscall") __success __retval(retval) + +/* Functional tests */ +__test(0) int test_strcmp_eq(void *ctx) { return bpf_strcmp(str, "hello world"); } +__test(1) int test_strcmp_neq(void *ctx) { return bpf_strcmp(str, "hello"); } +__test(1) int test_strchr_found(void *ctx) { return bpf_strchr(str, 'e'); } +__test(11) int test_strchr_null(void *ctx) { return bpf_strchr(str, '\0'); } +__test(-ENOENT) int test_strchr_notfound(void *ctx) { return bpf_strchr(str, 'x'); } +__test(1) int test_strchrnul_found(void *ctx) { return bpf_strchrnul(str, 'e'); } +__test(11) int test_strchrnul_notfound(void *ctx) { return bpf_strchrnul(str, 'x'); } +__test(1) int test_strnchr_found(void *ctx) { return bpf_strnchr(str, 5, 'e'); } +__test(11) int test_strnchr_null(void *ctx) { return bpf_strnchr(str, 12, '\0'); } +__test(-ENOENT) int test_strnchr_notfound(void *ctx) { return bpf_strnchr(str, 5, 'w'); } +__test(9) int test_strrchr_found(void *ctx) { return bpf_strrchr(str, 'l'); } +__test(11) int test_strrchr_null(void *ctx) { return bpf_strrchr(str, '\0'); } +__test(-ENOENT) int test_strrchr_notfound(void *ctx) { return bpf_strrchr(str, 'x'); } +__test(11) int test_strlen(void *ctx) { return bpf_strlen(str); } +__test(11) int test_strnlen(void *ctx) { return bpf_strnlen(str, 12); } +__test(5) int test_strspn(void *ctx) { return bpf_strspn(str, "ehlo"); } +__test(2) int test_strcspn(void *ctx) { return bpf_strcspn(str, "lo"); } +__test(6) int test_strstr_found(void *ctx) { return bpf_strstr(str, "world"); } +__test(-ENOENT) int test_strstr_notfound(void *ctx) { return bpf_strstr(str, "hi"); } +__test(0) int test_strstr_empty(void *ctx) { return bpf_strstr(str, ""); } +__test(0) int test_strnstr_found(void *ctx) { return bpf_strnstr(str, "hello", 6); } +__test(-ENOENT) int test_strnstr_notfound(void *ctx) { return bpf_strnstr(str, "hi", 10); } +__test(0) int test_strnstr_empty(void *ctx) { return bpf_strnstr(str, "", 1); } + +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/test_global_map_resize.c b/tools/testing/selftests/bpf/progs/test_global_map_resize.c index a3f220ba7025..ee65bad0436d 100644 --- a/tools/testing/selftests/bpf/progs/test_global_map_resize.c +++ b/tools/testing/selftests/bpf/progs/test_global_map_resize.c @@ -32,6 +32,16 @@ int my_int_last SEC(".data.array_not_last"); int percpu_arr[1] SEC(".data.percpu_arr"); +/* at least one extern is included, to ensure that a specific + * regression is tested whereby resizing resulted in a free-after-use + * bug after type information is invalidated by the resize operation. + * + * There isn't a particularly good API to test for this specific condition, + * but by having externs for the resizing tests it will cover this path. + */ +extern int LINUX_KERNEL_VERSION __kconfig; +long version_sink; + SEC("tp/syscalls/sys_enter_getpid") int bss_array_sum(void *ctx) { @@ -44,6 +54,9 @@ int bss_array_sum(void *ctx) for (size_t i = 0; i < bss_array_len; ++i) sum += array[i]; + /* see above; ensure this is not optimized out */ + version_sink = LINUX_KERNEL_VERSION; + return 0; } @@ -59,6 +72,9 @@ int data_array_sum(void *ctx) for (size_t i = 0; i < data_array_len; ++i) sum += my_array[i]; + /* see above; ensure this is not optimized out */ + version_sink = LINUX_KERNEL_VERSION; + return 0; } diff --git a/tools/testing/selftests/bpf/progs/test_lookup_key.c b/tools/testing/selftests/bpf/progs/test_lookup_key.c index cdbbb12f1491..1f7e1e59b073 100644 --- a/tools/testing/selftests/bpf/progs/test_lookup_key.c +++ b/tools/testing/selftests/bpf/progs/test_lookup_key.c @@ -14,11 +14,11 @@ char _license[] SEC("license") = "GPL"; __u32 monitored_pid; -__u32 key_serial; +__s32 key_serial; __u32 key_id; __u64 flags; -extern struct bpf_key *bpf_lookup_user_key(__u32 serial, __u64 flags) __ksym; +extern struct bpf_key *bpf_lookup_user_key(__s32 serial, __u64 flags) __ksym; extern struct bpf_key *bpf_lookup_system_key(__u64 id) __ksym; extern void bpf_key_put(struct bpf_key *key) __ksym; diff --git a/tools/testing/selftests/bpf/progs/test_ringbuf_write.c b/tools/testing/selftests/bpf/progs/test_ringbuf_write.c index 350513c0e4c9..f063a0013f85 100644 --- a/tools/testing/selftests/bpf/progs/test_ringbuf_write.c +++ b/tools/testing/selftests/bpf/progs/test_ringbuf_write.c @@ -26,11 +26,11 @@ int test_ringbuf_write(void *ctx) if (cur_pid != pid) return 0; - sample1 = bpf_ringbuf_reserve(&ringbuf, 0x3000, 0); + sample1 = bpf_ringbuf_reserve(&ringbuf, 0x30000, 0); if (!sample1) return 0; /* first one can pass */ - sample2 = bpf_ringbuf_reserve(&ringbuf, 0x3000, 0); + sample2 = bpf_ringbuf_reserve(&ringbuf, 0x30000, 0); if (!sample2) { bpf_ringbuf_discard(sample1, 0); __sync_fetch_and_add(&discarded, 1); diff --git a/tools/testing/selftests/bpf/progs/test_sig_in_xattr.c b/tools/testing/selftests/bpf/progs/test_sig_in_xattr.c index 8ef6b39335b6..34b30e2603f0 100644 --- a/tools/testing/selftests/bpf/progs/test_sig_in_xattr.c +++ b/tools/testing/selftests/bpf/progs/test_sig_in_xattr.c @@ -40,7 +40,7 @@ char digest[MAGIC_SIZE + SIZEOF_STRUCT_FSVERITY_DIGEST + SHA256_DIGEST_SIZE]; __u32 monitored_pid; char sig[MAX_SIG_SIZE]; __u32 sig_size; -__u32 user_keyring_serial; +__s32 user_keyring_serial; SEC("lsm.s/file_open") int BPF_PROG(test_file_open, struct file *f) diff --git a/tools/testing/selftests/bpf/progs/test_sockmap_change_tail.c b/tools/testing/selftests/bpf/progs/test_sockmap_change_tail.c index 2796dd8545eb..1c7941a4ad00 100644 --- a/tools/testing/selftests/bpf/progs/test_sockmap_change_tail.c +++ b/tools/testing/selftests/bpf/progs/test_sockmap_change_tail.c @@ -1,8 +1,13 @@ // SPDX-License-Identifier: GPL-2.0 /* Copyright (c) 2024 ByteDance */ -#include <linux/bpf.h> +#include "vmlinux.h" #include <bpf/bpf_helpers.h> +#ifndef PAGE_SIZE +#define PAGE_SIZE __PAGE_SIZE +#endif +#define BPF_SKB_MAX_LEN (PAGE_SIZE << 2) + struct { __uint(type, BPF_MAP_TYPE_SOCKMAP); __uint(max_entries, 1); @@ -31,7 +36,7 @@ int prog_skb_verdict(struct __sk_buff *skb) change_tail_ret = bpf_skb_change_tail(skb, skb->len + 1, 0); return SK_PASS; } else if (data[0] == 'E') { /* Error */ - change_tail_ret = bpf_skb_change_tail(skb, 65535, 0); + change_tail_ret = bpf_skb_change_tail(skb, BPF_SKB_MAX_LEN, 0); return SK_PASS; } return SK_PASS; diff --git a/tools/testing/selftests/bpf/progs/test_tc_change_tail.c b/tools/testing/selftests/bpf/progs/test_tc_change_tail.c index 28edafe803f0..fcba8299f0bc 100644 --- a/tools/testing/selftests/bpf/progs/test_tc_change_tail.c +++ b/tools/testing/selftests/bpf/progs/test_tc_change_tail.c @@ -1,11 +1,11 @@ // SPDX-License-Identifier: GPL-2.0 -#include <linux/bpf.h> +#include "vmlinux.h" #include <bpf/bpf_helpers.h> -#include <linux/if_ether.h> -#include <linux/in.h> -#include <linux/ip.h> -#include <linux/udp.h> -#include <linux/pkt_cls.h> + +#ifndef PAGE_SIZE +#define PAGE_SIZE __PAGE_SIZE +#endif +#define BPF_SKB_MAX_LEN (PAGE_SIZE << 2) long change_tail_ret = 1; @@ -94,7 +94,7 @@ int change_tail(struct __sk_buff *skb) bpf_skb_change_tail(skb, len, 0); return TCX_PASS; } else if (payload[0] == 'E') { /* Error */ - change_tail_ret = bpf_skb_change_tail(skb, 65535, 0); + change_tail_ret = bpf_skb_change_tail(skb, BPF_SKB_MAX_LEN, 0); return TCX_PASS; } else if (payload[0] == 'Z') { /* Zero */ change_tail_ret = bpf_skb_change_tail(skb, 0, 0); diff --git a/tools/testing/selftests/bpf/progs/test_verify_pkcs7_sig.c b/tools/testing/selftests/bpf/progs/test_verify_pkcs7_sig.c index e96d09e11115..ff8d755548b9 100644 --- a/tools/testing/selftests/bpf/progs/test_verify_pkcs7_sig.c +++ b/tools/testing/selftests/bpf/progs/test_verify_pkcs7_sig.c @@ -17,7 +17,7 @@ #define MAX_SIG_SIZE 1024 __u32 monitored_pid; -__u32 user_keyring_serial; +__s32 user_keyring_serial; __u64 system_keyring_id; struct data { diff --git a/tools/testing/selftests/bpf/progs/test_xdp_adjust_tail_grow.c b/tools/testing/selftests/bpf/progs/test_xdp_adjust_tail_grow.c index dc74d8cf9e3f..5904f45cfbc4 100644 --- a/tools/testing/selftests/bpf/progs/test_xdp_adjust_tail_grow.c +++ b/tools/testing/selftests/bpf/progs/test_xdp_adjust_tail_grow.c @@ -19,7 +19,9 @@ int _xdp_adjust_tail_grow(struct xdp_md *xdp) /* Data length determine test case */ if (data_len == 54) { /* sizeof(pkt_v4) */ - offset = 4096; /* test too large offset */ + offset = 4096; /* test too large offset, 4k page size */ + } else if (data_len == 53) { /* sizeof(pkt_v4) - 1 */ + offset = 65536; /* test too large offset, 64k page size */ } else if (data_len == 74) { /* sizeof(pkt_v6) */ offset = 40; } else if (data_len == 64) { @@ -31,6 +33,10 @@ int _xdp_adjust_tail_grow(struct xdp_md *xdp) offset = 10; } else if (data_len == 9001) { offset = 4096; + } else if (data_len == 90000) { + offset = 10; /* test a small offset, 64k page size */ + } else if (data_len == 90001) { + offset = 65536; /* test too large offset, 64k page size */ } else { return XDP_ABORTED; /* No matching test */ } diff --git a/tools/testing/selftests/bpf/progs/verifier_and.c b/tools/testing/selftests/bpf/progs/verifier_and.c index e97e518516b6..2b4fdca162be 100644 --- a/tools/testing/selftests/bpf/progs/verifier_and.c +++ b/tools/testing/selftests/bpf/progs/verifier_and.c @@ -85,8 +85,14 @@ l0_%=: r0 = r0; \ SEC("socket") __description("check known subreg with unknown reg") -__success __failure_unpriv __msg_unpriv("R1 !read_ok") +__success __success_unpriv __retval(0) +#ifdef SPEC_V1 +__xlated_unpriv("if w0 < 0x1 goto pc+2") +__xlated_unpriv("nospec") /* inserted to prevent `R1 !read_ok'` */ +__xlated_unpriv("goto pc-1") /* `r1 = *(u32*)(r1 + 512)`, sanitized dead code */ +__xlated_unpriv("r0 = 0") +#endif __naked void known_subreg_with_unknown_reg(void) { asm volatile (" \ diff --git a/tools/testing/selftests/bpf/progs/verifier_bounds.c b/tools/testing/selftests/bpf/progs/verifier_bounds.c index 0eb33bb801b5..e52a24e15b34 100644 --- a/tools/testing/selftests/bpf/progs/verifier_bounds.c +++ b/tools/testing/selftests/bpf/progs/verifier_bounds.c @@ -620,8 +620,14 @@ l1_%=: exit; \ SEC("socket") __description("bounds check mixed 32bit and 64bit arithmetic. test1") -__success __failure_unpriv __msg_unpriv("R0 invalid mem access 'scalar'") +__success __success_unpriv __retval(0) +#ifdef SPEC_V1 +__xlated_unpriv("goto pc+2") +__xlated_unpriv("nospec") /* inserted to prevent `R0 invalid mem access 'scalar'` */ +__xlated_unpriv("goto pc-1") /* sanitized dead code */ +__xlated_unpriv("exit") +#endif __naked void _32bit_and_64bit_arithmetic_test1(void) { asm volatile (" \ @@ -643,8 +649,14 @@ l1_%=: exit; \ SEC("socket") __description("bounds check mixed 32bit and 64bit arithmetic. test2") -__success __failure_unpriv __msg_unpriv("R0 invalid mem access 'scalar'") +__success __success_unpriv __retval(0) +#ifdef SPEC_V1 +__xlated_unpriv("goto pc+2") +__xlated_unpriv("nospec") /* inserted to prevent `R0 invalid mem access 'scalar'` */ +__xlated_unpriv("goto pc-1") /* sanitized dead code */ +__xlated_unpriv("exit") +#endif __naked void _32bit_and_64bit_arithmetic_test2(void) { asm volatile (" \ @@ -691,9 +703,14 @@ l0_%=: r0 = 0; \ SEC("socket") __description("bounds check for reg = 0, reg xor 1") -__success __failure_unpriv -__msg_unpriv("R0 min value is outside of the allowed memory range") +__success __success_unpriv __retval(0) +#ifdef SPEC_V1 +__xlated_unpriv("if r1 != 0x0 goto pc+2") +__xlated_unpriv("nospec") /* inserted to prevent `R0 min value is outside of the allowed memory range` */ +__xlated_unpriv("goto pc-1") /* sanitized dead code */ +__xlated_unpriv("r0 = 0") +#endif __naked void reg_0_reg_xor_1(void) { asm volatile (" \ @@ -719,9 +736,14 @@ l1_%=: r0 = 0; \ SEC("socket") __description("bounds check for reg32 = 0, reg32 xor 1") -__success __failure_unpriv -__msg_unpriv("R0 min value is outside of the allowed memory range") +__success __success_unpriv __retval(0) +#ifdef SPEC_V1 +__xlated_unpriv("if w1 != 0x0 goto pc+2") +__xlated_unpriv("nospec") /* inserted to prevent `R0 min value is outside of the allowed memory range` */ +__xlated_unpriv("goto pc-1") /* sanitized dead code */ +__xlated_unpriv("r0 = 0") +#endif __naked void reg32_0_reg32_xor_1(void) { asm volatile (" \ @@ -747,9 +769,14 @@ l1_%=: r0 = 0; \ SEC("socket") __description("bounds check for reg = 2, reg xor 3") -__success __failure_unpriv -__msg_unpriv("R0 min value is outside of the allowed memory range") +__success __success_unpriv __retval(0) +#ifdef SPEC_V1 +__xlated_unpriv("if r1 > 0x0 goto pc+2") +__xlated_unpriv("nospec") /* inserted to prevent `R0 min value is outside of the allowed memory range` */ +__xlated_unpriv("goto pc-1") /* sanitized dead code */ +__xlated_unpriv("r0 = 0") +#endif __naked void reg_2_reg_xor_3(void) { asm volatile (" \ @@ -829,9 +856,14 @@ l1_%=: r0 = 0; \ SEC("socket") __description("bounds check for reg > 0, reg xor 3") -__success __failure_unpriv -__msg_unpriv("R0 min value is outside of the allowed memory range") +__success __success_unpriv __retval(0) +#ifdef SPEC_V1 +__xlated_unpriv("if r1 >= 0x0 goto pc+2") +__xlated_unpriv("nospec") /* inserted to prevent `R0 min value is outside of the allowed memory range` */ +__xlated_unpriv("goto pc-1") /* sanitized dead code */ +__xlated_unpriv("r0 = 0") +#endif __naked void reg_0_reg_xor_3(void) { asm volatile (" \ @@ -858,9 +890,14 @@ l1_%=: r0 = 0; \ SEC("socket") __description("bounds check for reg32 > 0, reg32 xor 3") -__success __failure_unpriv -__msg_unpriv("R0 min value is outside of the allowed memory range") +__success __success_unpriv __retval(0) +#ifdef SPEC_V1 +__xlated_unpriv("if w1 >= 0x0 goto pc+2") +__xlated_unpriv("nospec") /* inserted to prevent `R0 min value is outside of the allowed memory range` */ +__xlated_unpriv("goto pc-1") /* sanitized dead code */ +__xlated_unpriv("r0 = 0") +#endif __naked void reg32_0_reg32_xor_3(void) { asm volatile (" \ @@ -1334,4 +1371,165 @@ __naked void mult_sign_ovf(void) __imm(bpf_skb_store_bytes) : __clobber_all); } + +SEC("socket") +__description("64-bit addition, all outcomes overflow") +__success __log_level(2) +__msg("5: (0f) r3 += r3 {{.*}} R3_w=scalar(umin=0x4000000000000000,umax=0xfffffffffffffffe)") +__retval(0) +__naked void add64_full_overflow(void) +{ + asm volatile ( + "call %[bpf_get_prandom_u32];" + "r4 = r0;" + "r3 = 0xa000000000000000 ll;" + "r3 |= r4;" + "r3 += r3;" + "r0 = 0;" + "exit" + : + : __imm(bpf_get_prandom_u32) + : __clobber_all); +} + +SEC("socket") +__description("64-bit addition, partial overflow, result in unbounded reg") +__success __log_level(2) +__msg("4: (0f) r3 += r3 {{.*}} R3_w=scalar()") +__retval(0) +__naked void add64_partial_overflow(void) +{ + asm volatile ( + "call %[bpf_get_prandom_u32];" + "r4 = r0;" + "r3 = 2;" + "r3 |= r4;" + "r3 += r3;" + "r0 = 0;" + "exit" + : + : __imm(bpf_get_prandom_u32) + : __clobber_all); +} + +SEC("socket") +__description("32-bit addition overflow, all outcomes overflow") +__success __log_level(2) +__msg("4: (0c) w3 += w3 {{.*}} R3_w=scalar(smin=umin=umin32=0x40000000,smax=umax=umax32=0xfffffffe,var_off=(0x0; 0xffffffff))") +__retval(0) +__naked void add32_full_overflow(void) +{ + asm volatile ( + "call %[bpf_get_prandom_u32];" + "w4 = w0;" + "w3 = 0xa0000000;" + "w3 |= w4;" + "w3 += w3;" + "r0 = 0;" + "exit" + : + : __imm(bpf_get_prandom_u32) + : __clobber_all); +} + +SEC("socket") +__description("32-bit addition, partial overflow, result in unbounded u32 bounds") +__success __log_level(2) +__msg("4: (0c) w3 += w3 {{.*}} R3_w=scalar(smin=0,smax=umax=0xffffffff,var_off=(0x0; 0xffffffff))") +__retval(0) +__naked void add32_partial_overflow(void) +{ + asm volatile ( + "call %[bpf_get_prandom_u32];" + "w4 = w0;" + "w3 = 2;" + "w3 |= w4;" + "w3 += w3;" + "r0 = 0;" + "exit" + : + : __imm(bpf_get_prandom_u32) + : __clobber_all); +} + +SEC("socket") +__description("64-bit subtraction, all outcomes underflow") +__success __log_level(2) +__msg("6: (1f) r3 -= r1 {{.*}} R3_w=scalar(umin=1,umax=0x8000000000000000)") +__retval(0) +__naked void sub64_full_overflow(void) +{ + asm volatile ( + "call %[bpf_get_prandom_u32];" + "r1 = r0;" + "r2 = 0x8000000000000000 ll;" + "r1 |= r2;" + "r3 = 0;" + "r3 -= r1;" + "r0 = 0;" + "exit" + : + : __imm(bpf_get_prandom_u32) + : __clobber_all); +} + +SEC("socket") +__description("64-bit subtration, partial overflow, result in unbounded reg") +__success __log_level(2) +__msg("3: (1f) r3 -= r2 {{.*}} R3_w=scalar()") +__retval(0) +__naked void sub64_partial_overflow(void) +{ + asm volatile ( + "call %[bpf_get_prandom_u32];" + "r3 = r0;" + "r2 = 1;" + "r3 -= r2;" + "r0 = 0;" + "exit" + : + : __imm(bpf_get_prandom_u32) + : __clobber_all); +} + +SEC("socket") +__description("32-bit subtraction overflow, all outcomes underflow") +__success __log_level(2) +__msg("5: (1c) w3 -= w1 {{.*}} R3_w=scalar(smin=umin=umin32=1,smax=umax=umax32=0x80000000,var_off=(0x0; 0xffffffff))") +__retval(0) +__naked void sub32_full_overflow(void) +{ + asm volatile ( + "call %[bpf_get_prandom_u32];" + "w1 = w0;" + "w2 = 0x80000000;" + "w1 |= w2;" + "w3 = 0;" + "w3 -= w1;" + "r0 = 0;" + "exit" + : + : __imm(bpf_get_prandom_u32) + : __clobber_all); +} + +SEC("socket") +__description("32-bit subtration, partial overflow, result in unbounded u32 bounds") +__success __log_level(2) +__msg("3: (1c) w3 -= w2 {{.*}} R3_w=scalar(smin=0,smax=umax=0xffffffff,var_off=(0x0; 0xffffffff))") +__retval(0) +__naked void sub32_partial_overflow(void) +{ + asm volatile ( + "call %[bpf_get_prandom_u32];" + "w3 = w0;" + "w2 = 1;" + "w3 -= w2;" + "r0 = 0;" + "exit" + : + : __imm(bpf_get_prandom_u32) + : __clobber_all); +} + char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/verifier_bounds_deduction.c b/tools/testing/selftests/bpf/progs/verifier_bounds_deduction.c index c506afbdd936..260a6df264e3 100644 --- a/tools/testing/selftests/bpf/progs/verifier_bounds_deduction.c +++ b/tools/testing/selftests/bpf/progs/verifier_bounds_deduction.c @@ -159,13 +159,16 @@ __failure_unpriv __naked void deducing_bounds_from_const_10(void) { asm volatile (" \ + r6 = r1; \ r0 = 0; \ if r0 s<= 0 goto l0_%=; \ -l0_%=: /* Marks reg as unknown. */ \ - r0 = -r0; \ - r0 -= r1; \ +l0_%=: /* Marks r0 as unknown. */ \ + call %[bpf_get_prandom_u32]; \ + r0 -= r6; \ exit; \ -" ::: __clobber_all); +" : + : __imm(bpf_get_prandom_u32) + : __clobber_all); } char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/verifier_div_overflow.c b/tools/testing/selftests/bpf/progs/verifier_div_overflow.c index 458984da804c..34e0c012ee76 100644 --- a/tools/testing/selftests/bpf/progs/verifier_div_overflow.c +++ b/tools/testing/selftests/bpf/progs/verifier_div_overflow.c @@ -77,7 +77,7 @@ l0_%=: exit; \ SEC("tc") __description("MOD32 overflow, check 1") -__success __retval(INT_MIN) +__success __retval(_INT_MIN) __naked void mod32_overflow_check_1(void) { asm volatile (" \ @@ -92,7 +92,7 @@ __naked void mod32_overflow_check_1(void) SEC("tc") __description("MOD32 overflow, check 2") -__success __retval(INT_MIN) +__success __retval(_INT_MIN) __naked void mod32_overflow_check_2(void) { asm volatile (" \ diff --git a/tools/testing/selftests/bpf/progs/verifier_map_in_map.c b/tools/testing/selftests/bpf/progs/verifier_map_in_map.c index 7d088ba99ea5..16b761e510f0 100644 --- a/tools/testing/selftests/bpf/progs/verifier_map_in_map.c +++ b/tools/testing/selftests/bpf/progs/verifier_map_in_map.c @@ -139,4 +139,122 @@ __naked void on_the_inner_map_pointer(void) : __clobber_all); } +SEC("socket") +__description("map_ptr is never null") +__success +__naked void map_ptr_is_never_null(void) +{ + asm volatile (" \ + r0 = 0; \ + r1 = %[map_in_map] ll; \ + if r1 != 0 goto l0_%=; \ + r10 = 42; \ +l0_%=: exit; \ +" : + : __imm(bpf_map_lookup_elem), + __imm_addr(map_in_map) + : __clobber_all); +} + +SEC("socket") +__description("map_ptr is never null inner") +__success +__naked void map_ptr_is_never_null_inner(void) +{ + asm volatile (" \ + r1 = 0; \ + *(u32*)(r10 - 4) = r1; \ + r2 = r10; \ + r2 += -4; \ + r1 = %[map_in_map] ll; \ + call %[bpf_map_lookup_elem]; \ + if r0 == 0 goto l0_%=; \ + if r0 != 0 goto l0_%=; \ + r10 = 42; \ +l0_%=: exit; \ +" : + : __imm(bpf_map_lookup_elem), + __imm_addr(map_in_map) + : __clobber_all); +} + +SEC("socket") +__description("map_ptr is never null inner spill fill") +__success +__naked void map_ptr_is_never_null_inner_spill_fill(void) +{ + asm volatile (" \ + r1 = 0; \ + *(u32*)(r10 - 4) = r1; \ + r2 = r10; \ + r2 += -4; \ + r1 = %[map_in_map] ll; \ + call %[bpf_map_lookup_elem]; \ + if r0 != 0 goto l0_%=; \ + exit; \ +l0_%=: *(u64 *)(r10 -16) = r0; \ + r1 = *(u64 *)(r10 -16); \ + if r1 == 0 goto l1_%=; \ + exit; \ +l1_%=: r10 = 42; \ + exit; \ +" : + : __imm(bpf_map_lookup_elem), + __imm_addr(map_in_map) + : __clobber_all); +} + +struct { + __uint(type, BPF_MAP_TYPE_ARRAY_OF_MAPS); + __uint(max_entries, 1); + __type(key, int); + __type(value, int); + __array(values, struct { + __uint(type, BPF_MAP_TYPE_RINGBUF); + __uint(max_entries, 64 * 1024); + }); +} rb_in_map SEC(".maps"); + +struct rb_ctx { + void *rb; + struct bpf_dynptr dptr; +}; + +static __always_inline struct rb_ctx __rb_event_reserve(__u32 sz) +{ + struct rb_ctx rb_ctx = {}; + void *rb; + __u32 cpu = bpf_get_smp_processor_id(); + __u32 rb_slot = cpu & 1; + + rb = bpf_map_lookup_elem(&rb_in_map, &rb_slot); + if (!rb) + return rb_ctx; + + rb_ctx.rb = rb; + bpf_ringbuf_reserve_dynptr(rb, sz, 0, &rb_ctx.dptr); + + return rb_ctx; +} + +static __noinline void __rb_event_submit(struct rb_ctx *ctx) +{ + if (!ctx->rb) + return; + + /* If the verifier (incorrectly) concludes that ctx->rb can be + * NULL at this point, we'll get "BPF_EXIT instruction in main + * prog would lead to reference leak" error + */ + bpf_ringbuf_submit_dynptr(&ctx->dptr, 0); +} + +SEC("socket") +int map_ptr_is_never_null_rb(void *ctx) +{ + struct rb_ctx event_ctx = __rb_event_reserve(256); + __rb_event_submit(&event_ctx); + return 0; +} + char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/verifier_movsx.c b/tools/testing/selftests/bpf/progs/verifier_movsx.c index 994bbc346d25..a4d8814eb5ed 100644 --- a/tools/testing/selftests/bpf/progs/verifier_movsx.c +++ b/tools/testing/selftests/bpf/progs/verifier_movsx.c @@ -245,7 +245,13 @@ l0_%=: \ SEC("socket") __description("MOV32SX, S8, var_off not u32_max, positive after s8 extension") __success __retval(0) -__failure_unpriv __msg_unpriv("frame pointer is read only") +__success_unpriv +#ifdef SPEC_V1 +__xlated_unpriv("w0 = 0") +__xlated_unpriv("exit") +__xlated_unpriv("nospec") /* inserted to prevent `frame pointer is read only` */ +__xlated_unpriv("goto pc-1") +#endif __naked void mov64sx_s32_varoff_2(void) { asm volatile (" \ @@ -267,7 +273,13 @@ l0_%=: \ SEC("socket") __description("MOV32SX, S8, var_off not u32_max, negative after s8 extension") __success __retval(0) -__failure_unpriv __msg_unpriv("frame pointer is read only") +__success_unpriv +#ifdef SPEC_V1 +__xlated_unpriv("w0 = 0") +__xlated_unpriv("exit") +__xlated_unpriv("nospec") /* inserted to prevent `frame pointer is read only` */ +__xlated_unpriv("goto pc-1") +#endif __naked void mov64sx_s32_varoff_3(void) { asm volatile (" \ diff --git a/tools/testing/selftests/bpf/progs/verifier_precision.c b/tools/testing/selftests/bpf/progs/verifier_precision.c index 9fe5d255ee37..73fee2aec698 100644 --- a/tools/testing/selftests/bpf/progs/verifier_precision.c +++ b/tools/testing/selftests/bpf/progs/verifier_precision.c @@ -231,4 +231,74 @@ __naked void bpf_cond_op_not_r10(void) ::: __clobber_all); } +SEC("lsm.s/socket_connect") +__success __log_level(2) +__msg("0: (b7) r0 = 1 ; R0_w=1") +__msg("1: (84) w0 = -w0 ; R0_w=0xffffffff") +__msg("mark_precise: frame0: last_idx 2 first_idx 0 subseq_idx -1") +__msg("mark_precise: frame0: regs=r0 stack= before 1: (84) w0 = -w0") +__msg("mark_precise: frame0: regs=r0 stack= before 0: (b7) r0 = 1") +__naked int bpf_neg_2(void) +{ + /* + * lsm.s/socket_connect requires a return value within [-4095, 0]. + * Returning -1 is allowed + */ + asm volatile ( + "r0 = 1;" + "w0 = -w0;" + "exit;" + ::: __clobber_all); +} + +SEC("lsm.s/socket_connect") +__failure __msg("At program exit the register R0 has") +__naked int bpf_neg_3(void) +{ + /* + * lsm.s/socket_connect requires a return value within [-4095, 0]. + * Returning -10000 is not allowed. + */ + asm volatile ( + "r0 = 10000;" + "w0 = -w0;" + "exit;" + ::: __clobber_all); +} + +SEC("lsm.s/socket_connect") +__success __log_level(2) +__msg("0: (b7) r0 = 1 ; R0_w=1") +__msg("1: (87) r0 = -r0 ; R0_w=-1") +__msg("mark_precise: frame0: last_idx 2 first_idx 0 subseq_idx -1") +__msg("mark_precise: frame0: regs=r0 stack= before 1: (87) r0 = -r0") +__msg("mark_precise: frame0: regs=r0 stack= before 0: (b7) r0 = 1") +__naked int bpf_neg_4(void) +{ + /* + * lsm.s/socket_connect requires a return value within [-4095, 0]. + * Returning -1 is allowed + */ + asm volatile ( + "r0 = 1;" + "r0 = -r0;" + "exit;" + ::: __clobber_all); +} + +SEC("lsm.s/socket_connect") +__failure __msg("At program exit the register R0 has") +__naked int bpf_neg_5(void) +{ + /* + * lsm.s/socket_connect requires a return value within [-4095, 0]. + * Returning -10000 is not allowed. + */ + asm volatile ( + "r0 = 10000;" + "r0 = -r0;" + "exit;" + ::: __clobber_all); +} + char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/verifier_ref_tracking.c b/tools/testing/selftests/bpf/progs/verifier_ref_tracking.c index 683a882b3e6d..910365201f68 100644 --- a/tools/testing/selftests/bpf/progs/verifier_ref_tracking.c +++ b/tools/testing/selftests/bpf/progs/verifier_ref_tracking.c @@ -27,7 +27,7 @@ struct bpf_key {} __attribute__((preserve_access_index)); extern void bpf_key_put(struct bpf_key *key) __ksym; extern struct bpf_key *bpf_lookup_system_key(__u64 id) __ksym; -extern struct bpf_key *bpf_lookup_user_key(__u32 serial, __u64 flags) __ksym; +extern struct bpf_key *bpf_lookup_user_key(__s32 serial, __u64 flags) __ksym; /* BTF FUNC records are not generated for kfuncs referenced * from inline assembly. These records are necessary for diff --git a/tools/testing/selftests/bpf/progs/verifier_unpriv.c b/tools/testing/selftests/bpf/progs/verifier_unpriv.c index a4a5e2071604..4470541b5e71 100644 --- a/tools/testing/selftests/bpf/progs/verifier_unpriv.c +++ b/tools/testing/selftests/bpf/progs/verifier_unpriv.c @@ -572,8 +572,14 @@ l0_%=: exit; \ SEC("socket") __description("alu32: mov u32 const") -__success __failure_unpriv __msg_unpriv("R7 invalid mem access 'scalar'") +__success __success_unpriv __retval(0) +#ifdef SPEC_V1 +__xlated_unpriv("if r0 == 0x0 goto pc+2") +__xlated_unpriv("nospec") /* inserted to prevent `R7 invalid mem access 'scalar'` */ +__xlated_unpriv("goto pc-1") /* sanitized dead code */ +__xlated_unpriv("exit") +#endif __naked void alu32_mov_u32_const(void) { asm volatile (" \ @@ -619,12 +625,11 @@ __naked void pass_pointer_to_tail_call(void) SEC("socket") __description("unpriv: cmp map pointer with zero") -__success __failure_unpriv __msg_unpriv("R1 pointer comparison") +__success __success_unpriv __retval(0) __naked void cmp_map_pointer_with_zero(void) { asm volatile (" \ - r1 = 0; \ r1 = %[map_hash_8b] ll; \ if r1 == 0 goto l0_%=; \ l0_%=: r0 = 0; \ @@ -635,6 +640,22 @@ l0_%=: r0 = 0; \ } SEC("socket") +__description("unpriv: cmp map pointer with const") +__success __failure_unpriv __msg_unpriv("R1 pointer comparison prohibited") +__retval(0) +__naked void cmp_map_pointer_with_const(void) +{ + asm volatile (" \ + r1 = %[map_hash_8b] ll; \ + if r1 == 0x0000beef goto l0_%=; \ +l0_%=: r0 = 0; \ + exit; \ +" : + : __imm_addr(map_hash_8b) + : __clobber_all); +} + +SEC("socket") __description("unpriv: write into frame pointer") __failure __msg("frame pointer is read only") __failure_unpriv @@ -723,4 +744,61 @@ l0_%=: r0 = 0; \ " ::: __clobber_all); } +SEC("socket") +__description("unpriv: Spectre v1 path-based type confusion of scalar as stack-ptr") +__success __success_unpriv __retval(0) +#ifdef SPEC_V1 +__xlated_unpriv("if r0 != 0x1 goto pc+2") +/* This nospec prevents the exploit because it forces the mispredicted (not + * taken) `if r0 != 0x0 goto l0_%=` to resolve before using r6 as a pointer. + * This causes the CPU to realize that `r6 = r9` should have never executed. It + * ensures that r6 always contains a readable stack slot ptr when the insn after + * the nospec executes. + */ +__xlated_unpriv("nospec") +__xlated_unpriv("r9 = *(u8 *)(r6 +0)") +#endif +__naked void unpriv_spec_v1_type_confusion(void) +{ + asm volatile (" \ + r1 = 0; \ + *(u64*)(r10 - 8) = r1; \ + r2 = r10; \ + r2 += -8; \ + r1 = %[map_hash_8b] ll; \ + call %[bpf_map_lookup_elem]; \ + if r0 == 0 goto l2_%=; \ + /* r0: pointer to a map array entry */ \ + r2 = r10; \ + r2 += -8; \ + r1 = %[map_hash_8b] ll; \ + /* r1, r2: prepared call args */ \ + r6 = r10; \ + r6 += -8; \ + /* r6: pointer to readable stack slot */ \ + r9 = 0xffffc900; \ + r9 <<= 32; \ + /* r9: scalar controlled by attacker */ \ + r0 = *(u64 *)(r0 + 0); /* cache miss */ \ + if r0 != 0x0 goto l0_%=; \ + r6 = r9; \ +l0_%=: if r0 != 0x1 goto l1_%=; \ + r9 = *(u8 *)(r6 + 0); \ +l1_%=: /* leak r9 */ \ + r9 &= 1; \ + r9 <<= 9; \ + *(u64*)(r10 - 8) = r9; \ + call %[bpf_map_lookup_elem]; \ + if r0 == 0 goto l2_%=; \ + /* leak secret into is_cached(map[0|512]): */ \ + r0 = *(u64 *)(r0 + 0); \ +l2_%=: \ + r0 = 0; \ + exit; \ +" : + : __imm(bpf_map_lookup_elem), + __imm_addr(map_hash_8b) + : __clobber_all); +} + char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/verifier_value_ptr_arith.c b/tools/testing/selftests/bpf/progs/verifier_value_ptr_arith.c index 5ba6e53571c8..af7938ce56cb 100644 --- a/tools/testing/selftests/bpf/progs/verifier_value_ptr_arith.c +++ b/tools/testing/selftests/bpf/progs/verifier_value_ptr_arith.c @@ -231,6 +231,10 @@ __retval(1) __naked void ptr_unknown_vs_unknown_lt(void) { asm volatile (" \ + r8 = r1; \ + call %[bpf_get_prandom_u32]; \ + r9 = r0; \ + r1 = r8; \ r0 = *(u32*)(r1 + %[__sk_buff_len]); \ r1 = 0; \ *(u64*)(r10 - 8) = r1; \ @@ -245,11 +249,11 @@ l1_%=: call %[bpf_map_lookup_elem]; \ r4 = *(u8*)(r0 + 0); \ if r4 == 1 goto l3_%=; \ r1 = 6; \ - r1 = -r1; \ + r1 = r9; \ r1 &= 0x3; \ goto l4_%=; \ l3_%=: r1 = 6; \ - r1 = -r1; \ + r1 = r9; \ r1 &= 0x7; \ l4_%=: r1 += r0; \ r0 = *(u8*)(r1 + 0); \ @@ -259,7 +263,8 @@ l2_%=: r0 = 1; \ : __imm(bpf_map_lookup_elem), __imm_addr(map_array_48b), __imm_addr(map_hash_16b), - __imm_const(__sk_buff_len, offsetof(struct __sk_buff, len)) + __imm_const(__sk_buff_len, offsetof(struct __sk_buff, len)), + __imm(bpf_get_prandom_u32) : __clobber_all); } @@ -271,6 +276,10 @@ __retval(1) __naked void ptr_unknown_vs_unknown_gt(void) { asm volatile (" \ + r8 = r1; \ + call %[bpf_get_prandom_u32]; \ + r9 = r0; \ + r1 = r8; \ r0 = *(u32*)(r1 + %[__sk_buff_len]); \ r1 = 0; \ *(u64*)(r10 - 8) = r1; \ @@ -285,11 +294,11 @@ l1_%=: call %[bpf_map_lookup_elem]; \ r4 = *(u8*)(r0 + 0); \ if r4 == 1 goto l3_%=; \ r1 = 6; \ - r1 = -r1; \ + r1 = r9; \ r1 &= 0x7; \ goto l4_%=; \ l3_%=: r1 = 6; \ - r1 = -r1; \ + r1 = r9; \ r1 &= 0x3; \ l4_%=: r1 += r0; \ r0 = *(u8*)(r1 + 0); \ @@ -299,7 +308,8 @@ l2_%=: r0 = 1; \ : __imm(bpf_map_lookup_elem), __imm_addr(map_array_48b), __imm_addr(map_hash_16b), - __imm_const(__sk_buff_len, offsetof(struct __sk_buff, len)) + __imm_const(__sk_buff_len, offsetof(struct __sk_buff, len)), + __imm(bpf_get_prandom_u32) : __clobber_all); } @@ -398,7 +408,8 @@ l2_%=: r0 = 1; \ SEC("socket") __description("map access: mixing value pointer and scalar, 1") -__success __failure_unpriv __msg_unpriv("R2 pointer comparison prohibited") +__success __failure_unpriv +__msg_unpriv("R2 tried to add from different maps, paths or scalars, pointer arithmetic with it prohibited for !root") __retval(0) __naked void value_pointer_and_scalar_1(void) { @@ -433,6 +444,7 @@ l2_%=: /* common instruction */ \ l3_%=: /* branch B */ \ r0 = 0x13371337; \ /* verifier follows fall-through */ \ + /* unpriv: nospec (inserted to prevent `R2 pointer comparison prohibited`) */\ if r2 != 0x100000 goto l4_%=; \ r0 = 0; \ exit; \ @@ -450,7 +462,8 @@ l4_%=: /* fake-dead code; targeted from branch A to \ SEC("socket") __description("map access: mixing value pointer and scalar, 2") -__success __failure_unpriv __msg_unpriv("R0 invalid mem access 'scalar'") +__success __failure_unpriv +__msg_unpriv("R2 tried to add from different maps, paths or scalars, pointer arithmetic with it prohibited for !root") __retval(0) __naked void value_pointer_and_scalar_2(void) { @@ -492,6 +505,7 @@ l4_%=: /* fake-dead code; targeted from branch A to \ * prevent dead code sanitization, rejected \ * via branch B however \ */ \ + /* unpriv: nospec (inserted to prevent `R0 invalid mem access 'scalar'`) */\ r0 = *(u8*)(r0 + 0); \ r0 = 0; \ exit; \ @@ -1296,9 +1310,13 @@ l0_%=: r0 = 1; \ SEC("socket") __description("map access: value_ptr -= unknown scalar, 2") -__success __failure_unpriv -__msg_unpriv("R0 pointer arithmetic of map value goes out of range") +__success __success_unpriv __retval(1) +#ifdef SPEC_V1 +__xlated_unpriv("r1 &= 7") +__xlated_unpriv("nospec") /* inserted to prevent `R0 pointer arithmetic of map value goes out of range` */ +__xlated_unpriv("r0 -= r1") +#endif __naked void value_ptr_unknown_scalar_2_2(void) { asm volatile (" \ diff --git a/tools/testing/selftests/bpf/progs/verifier_vfs_accept.c b/tools/testing/selftests/bpf/progs/verifier_vfs_accept.c index a7c0a553aa50..3e2d76ee8050 100644 --- a/tools/testing/selftests/bpf/progs/verifier_vfs_accept.c +++ b/tools/testing/selftests/bpf/progs/verifier_vfs_accept.c @@ -2,6 +2,7 @@ /* Copyright (c) 2024 Google LLC. */ #include <vmlinux.h> +#include <errno.h> #include <bpf/bpf_helpers.h> #include <bpf/bpf_tracing.h> @@ -82,4 +83,21 @@ int BPF_PROG(path_d_path_from_file_argument, struct file *file) return 0; } +SEC("lsm.s/inode_rename") +__success +int BPF_PROG(inode_rename, struct inode *old_dir, struct dentry *old_dentry, + struct inode *new_dir, struct dentry *new_dentry, + unsigned int flags) +{ + struct inode *inode = new_dentry->d_inode; + ino_t ino; + + if (!inode) + return 0; + ino = inode->i_ino; + if (ino == 0) + return -EACCES; + return 0; +} + char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/verifier_vfs_reject.c b/tools/testing/selftests/bpf/progs/verifier_vfs_reject.c index d6d3f4fcb24c..4b392c6c8fc4 100644 --- a/tools/testing/selftests/bpf/progs/verifier_vfs_reject.c +++ b/tools/testing/selftests/bpf/progs/verifier_vfs_reject.c @@ -2,6 +2,7 @@ /* Copyright (c) 2024 Google LLC. */ #include <vmlinux.h> +#include <errno.h> #include <bpf/bpf_helpers.h> #include <bpf/bpf_tracing.h> #include <linux/limits.h> @@ -158,4 +159,18 @@ int BPF_PROG(path_d_path_kfunc_non_lsm, struct path *path, struct file *f) return 0; } +SEC("lsm.s/inode_rename") +__failure __msg("invalid mem access 'trusted_ptr_or_null_'") +int BPF_PROG(inode_rename, struct inode *old_dir, struct dentry *old_dentry, + struct inode *new_dir, struct dentry *new_dentry, + unsigned int flags) +{ + struct inode *inode = new_dentry->d_inode; + ino_t ino; + + ino = inode->i_ino; + if (ino == 0) + return -EACCES; + return 0; +} char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/test_bpftool_map.sh b/tools/testing/selftests/bpf/test_bpftool_map.sh new file mode 100755 index 000000000000..515b1df0501e --- /dev/null +++ b/tools/testing/selftests/bpf/test_bpftool_map.sh @@ -0,0 +1,398 @@ +#!/bin/sh +# SPDX-License-Identifier: GPL-2.0 + +# Kselftest framework requirement - SKIP code is 4. +ksft_skip=4 + +TESTNAME="bpftool_map" +BPF_FILE="security_bpf_map.bpf.o" +BPF_ITER_FILE="bpf_iter_map_elem.bpf.o" +PROTECTED_MAP_NAME="prot_map" +NOT_PROTECTED_MAP_NAME="not_prot_map" +BPF_FS_TMP_PARENT="/tmp" +BPF_FS_PARENT=$(awk '$3 == "bpf" {print $2; exit}' /proc/mounts) +BPF_FS_PARENT=${BPF_FS_PARENT:-$BPF_FS_TMP_PARENT} +# bpftool will mount bpf file system under BPF_DIR if it is not mounted +# under BPF_FS_PARENT. +BPF_DIR="$BPF_FS_PARENT/test_$TESTNAME" +SCRIPT_DIR=$(dirname $(realpath "$0")) +BPF_FILE_PATH="$SCRIPT_DIR/$BPF_FILE" +BPF_ITER_FILE_PATH="$SCRIPT_DIR/$BPF_ITER_FILE" +BPFTOOL_PATH="bpftool" +# Assume the script is located under tools/testing/selftests/bpf/ +KDIR_ROOT_DIR=$(realpath "$SCRIPT_DIR"/../../../../) + +_cleanup() +{ + set +eu + + # If BPF_DIR is a mount point this will not remove the mount point itself. + [ -d "$BPF_DIR" ] && rm -rf "$BPF_DIR" 2> /dev/null + + # Unmount if BPF filesystem was temporarily created. + if [ "$BPF_FS_PARENT" = "$BPF_FS_TMP_PARENT" ]; then + # A loop and recursive unmount are required as bpftool might + # create multiple mounts. For example, a bind mount of the directory + # to itself. The bind mount is created to change mount propagation + # flags on an actual mount point. + max_attempts=3 + attempt=0 + while mountpoint -q "$BPF_DIR" && [ $attempt -lt $max_attempts ]; do + umount -R "$BPF_DIR" 2>/dev/null + attempt=$((attempt+1)) + done + + # The directory still exists. Remove it now. + [ -d "$BPF_DIR" ] && rm -rf "$BPF_DIR" 2>/dev/null + fi +} + +cleanup_skip() +{ + echo "selftests: $TESTNAME [SKIP]" + _cleanup + + exit $ksft_skip +} + +cleanup() +{ + if [ "$?" = 0 ]; then + echo "selftests: $TESTNAME [PASS]" + else + echo "selftests: $TESTNAME [FAILED]" + fi + _cleanup +} + +check_root_privileges() { + if [ $(id -u) -ne 0 ]; then + echo "Need root privileges" + exit $ksft_skip + fi +} + +# Function to verify bpftool path. +# Parameters: +# $1: bpftool path +verify_bpftool_path() { + local bpftool_path="$1" + if ! "$bpftool_path" version > /dev/null 2>&1; then + echo "Could not run test without bpftool" + exit $ksft_skip + fi +} + +# Function to verify BTF support. +# The test requires BTF support for fmod_ret programs. +verify_btf_support() { + if [ ! -f /sys/kernel/btf/vmlinux ]; then + echo "Could not run test without BTF support" + exit $ksft_skip + fi +} + +# Function to initialize map entries with keys [0..2] and values set to 0. +# Parameters: +# $1: Map name +# $2: bpftool path +initialize_map_entries() { + local map_name="$1" + local bpftool_path="$2" + + for key in 0 1 2; do + "$bpftool_path" map update name "$map_name" key $key 0 0 0 value 0 0 0 $key + done +} + +# Test read access to the map. +# Parameters: +# $1: Name command (name/pinned) +# $2: Map name +# $3: bpftool path +# $4: key +access_for_read() { + local name_cmd="$1" + local map_name="$2" + local bpftool_path="$3" + local key="$4" + + # Test read access to the map. + if ! "$bpftool_path" map lookup "$name_cmd" "$map_name" key $key 1>/dev/null; then + echo " Read access to $key in $map_name failed" + exit 1 + fi + + # Test read access to map's BTF data. + if ! "$bpftool_path" btf dump map "$name_cmd" "$map_name" 1>/dev/null; then + echo " Read access to $map_name for BTF data failed" + exit 1 + fi +} + +# Test write access to the map. +# Parameters: +# $1: Name command (name/pinned) +# $2: Map name +# $3: bpftool path +# $4: key +# $5: Whether write should succeed (true/false) +access_for_write() { + local name_cmd="$1" + local map_name="$2" + local bpftool_path="$3" + local key="$4" + local write_should_succeed="$5" + local value="1 1 1 1" + + if "$bpftool_path" map update "$name_cmd" "$map_name" key $key value \ + $value 2>/dev/null; then + if [ "$write_should_succeed" = "false" ]; then + echo " Write access to $key in $map_name succeeded but should have failed" + exit 1 + fi + else + if [ "$write_should_succeed" = "true" ]; then + echo " Write access to $key in $map_name failed but should have succeeded" + exit 1 + fi + fi +} + +# Test entry deletion for the map. +# Parameters: +# $1: Name command (name/pinned) +# $2: Map name +# $3: bpftool path +# $4: key +# $5: Whether write should succeed (true/false) +access_for_deletion() { + local name_cmd="$1" + local map_name="$2" + local bpftool_path="$3" + local key="$4" + local write_should_succeed="$5" + local value="1 1 1 1" + + # Test deletion by key for the map. + # Before deleting, check the key exists. + if ! "$bpftool_path" map lookup "$name_cmd" "$map_name" key $key 1>/dev/null; then + echo " Key $key does not exist in $map_name" + exit 1 + fi + + # Delete by key. + if "$bpftool_path" map delete "$name_cmd" "$map_name" key $key 2>/dev/null; then + if [ "$write_should_succeed" = "false" ]; then + echo " Deletion for $key in $map_name succeeded but should have failed" + exit 1 + fi + else + if [ "$write_should_succeed" = "true" ]; then + echo " Deletion for $key in $map_name failed but should have succeeded" + exit 1 + fi + fi + + # After deleting, check the entry existence according to the expected status. + if "$bpftool_path" map lookup "$name_cmd" "$map_name" key $key 1>/dev/null; then + if [ "$write_should_succeed" = "true" ]; then + echo " Key $key for $map_name was not deleted but should have been deleted" + exit 1 + fi + else + if [ "$write_should_succeed" = "false" ]; then + echo "Key $key for $map_name was deleted but should have not been deleted" + exit 1 + fi + fi + + # Test creation of map's deleted entry, if deletion was successful. + # Otherwise, the entry exists. + if "$bpftool_path" map update "$name_cmd" "$map_name" key $key value \ + $value 2>/dev/null; then + if [ "$write_should_succeed" = "false" ]; then + echo " Write access to $key in $map_name succeeded after deletion attempt but should have failed" + exit 1 + fi + else + if [ "$write_should_succeed" = "true" ]; then + echo " Write access to $key in $map_name failed after deletion attempt but should have succeeded" + exit 1 + fi + fi +} + +# Test map elements iterator. +# Parameters: +# $1: Name command (name/pinned) +# $2: Map name +# $3: bpftool path +# $4: BPF_DIR +# $5: bpf iterator object file path +iterate_map_elem() { + local name_cmd="$1" + local map_name="$2" + local bpftool_path="$3" + local bpf_dir="$4" + local bpf_file="$5" + local pin_path="$bpf_dir/map_iterator" + + "$bpftool_path" iter pin "$bpf_file" "$pin_path" map "$name_cmd" "$map_name" + if [ ! -f "$pin_path" ]; then + echo " Failed to pin iterator to $pin_path" + exit 1 + fi + + cat "$pin_path" 1>/dev/null + rm "$pin_path" 2>/dev/null +} + +# Function to test map access with configurable write expectations +# Parameters: +# $1: Name command (name/pinned) +# $2: Map name +# $3: bpftool path +# $4: key for rw +# $5: key to delete +# $6: Whether write should succeed (true/false) +# $7: BPF_DIR +# $8: bpf iterator object file path +access_map() { + local name_cmd="$1" + local map_name="$2" + local bpftool_path="$3" + local key_for_rw="$4" + local key_to_del="$5" + local write_should_succeed="$6" + local bpf_dir="$7" + local bpf_iter_file_path="$8" + + access_for_read "$name_cmd" "$map_name" "$bpftool_path" "$key_for_rw" + access_for_write "$name_cmd" "$map_name" "$bpftool_path" "$key_for_rw" \ + "$write_should_succeed" + access_for_deletion "$name_cmd" "$map_name" "$bpftool_path" "$key_to_del" \ + "$write_should_succeed" + iterate_map_elem "$name_cmd" "$map_name" "$bpftool_path" "$bpf_dir" \ + "$bpf_iter_file_path" +} + +# Function to test map access with configurable write expectations +# Parameters: +# $1: Map name +# $2: bpftool path +# $3: BPF_DIR +# $4: Whether write should succeed (true/false) +# $5: bpf iterator object file path +test_map_access() { + local map_name="$1" + local bpftool_path="$2" + local bpf_dir="$3" + local pin_path="$bpf_dir/${map_name}_pinned" + local write_should_succeed="$4" + local bpf_iter_file_path="$5" + + # Test access to the map by name. + access_map "name" "$map_name" "$bpftool_path" "0 0 0 0" "1 0 0 0" \ + "$write_should_succeed" "$bpf_dir" "$bpf_iter_file_path" + + # Pin the map to the BPF filesystem + "$bpftool_path" map pin name "$map_name" "$pin_path" + if [ ! -e "$pin_path" ]; then + echo " Failed to pin $map_name" + exit 1 + fi + + # Test access to the pinned map. + access_map "pinned" "$pin_path" "$bpftool_path" "0 0 0 0" "2 0 0 0" \ + "$write_should_succeed" "$bpf_dir" "$bpf_iter_file_path" +} + +# Function to test map creation and map-of-maps +# Parameters: +# $1: bpftool path +# $2: BPF_DIR +test_map_creation_and_map_of_maps() { + local bpftool_path="$1" + local bpf_dir="$2" + local outer_map_name="outer_map_tt" + local inner_map_name="inner_map_tt" + + "$bpftool_path" map create "$bpf_dir/$inner_map_name" type array key 4 \ + value 4 entries 4 name "$inner_map_name" + if [ ! -f "$bpf_dir/$inner_map_name" ]; then + echo " Failed to create inner map file at $bpf_dir/$outer_map_name" + return 1 + fi + + "$bpftool_path" map create "$bpf_dir/$outer_map_name" type hash_of_maps \ + key 4 value 4 entries 2 name "$outer_map_name" inner_map name "$inner_map_name" + if [ ! -f "$bpf_dir/$outer_map_name" ]; then + echo " Failed to create outer map file at $bpf_dir/$outer_map_name" + return 1 + fi + + # Add entries to the outer map by name and by pinned path. + "$bpftool_path" map update pinned "$bpf_dir/$outer_map_name" key 0 0 0 0 \ + value pinned "$bpf_dir/$inner_map_name" + "$bpftool_path" map update name "$outer_map_name" key 1 0 0 0 value \ + name "$inner_map_name" + + # The outer map should be full by now. + # The following map update command is expected to fail. + if "$bpftool_path" map update name "$outer_map_name" key 2 0 0 0 value name \ + "$inner_map_name" 2>/dev/null; then + echo " Update for $outer_map_name succeeded but should have failed" + exit 1 + fi +} + +# Function to test map access with the btf list command +# Parameters: +# $1: bpftool path +test_map_access_with_btf_list() { + local bpftool_path="$1" + + # The btf list command iterates over maps for + # loaded BPF programs. + if ! "$bpftool_path" btf list 1>/dev/null; then + echo " Failed to access btf data" + exit 1 + fi +} + +set -eu + +trap cleanup_skip EXIT + +check_root_privileges + +verify_bpftool_path "$BPFTOOL_PATH" + +verify_btf_support + +trap cleanup EXIT + +# Load and attach the BPF programs to control maps access. +"$BPFTOOL_PATH" prog loadall "$BPF_FILE_PATH" "$BPF_DIR" autoattach + +initialize_map_entries "$PROTECTED_MAP_NAME" "$BPFTOOL_PATH" +initialize_map_entries "$NOT_PROTECTED_MAP_NAME" "$BPFTOOL_PATH" + +# Activate the map protection mechanism. Protection status is controlled +# by a value stored in the prot_status_map at index 0. +"$BPFTOOL_PATH" map update name prot_status_map key 0 0 0 0 value 1 0 0 0 + +# Test protected map (write should fail). +test_map_access "$PROTECTED_MAP_NAME" "$BPFTOOL_PATH" "$BPF_DIR" "false" \ + "$BPF_ITER_FILE_PATH" + +# Test not protected map (write should succeed). +test_map_access "$NOT_PROTECTED_MAP_NAME" "$BPFTOOL_PATH" "$BPF_DIR" "true" \ + "$BPF_ITER_FILE_PATH" + +test_map_creation_and_map_of_maps "$BPFTOOL_PATH" "$BPF_DIR" + +test_map_access_with_btf_list "$BPFTOOL_PATH" + +exit 0 diff --git a/tools/testing/selftests/bpf/test_loader.c b/tools/testing/selftests/bpf/test_loader.c index 9551d8d5f8f9..78423cf89e01 100644 --- a/tools/testing/selftests/bpf/test_loader.c +++ b/tools/testing/selftests/bpf/test_loader.c @@ -40,7 +40,7 @@ #define TEST_TAG_LOAD_MODE_PFX "comment:load_mode=" /* Warning: duplicated in bpf_misc.h */ -#define POINTER_VALUE 0xcafe4all +#define POINTER_VALUE 0xbadcafe #define TEST_DATA_LEN 64 #ifdef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS @@ -318,20 +318,14 @@ static int parse_caps(const char *str, __u64 *val, const char *name) static int parse_retval(const char *str, int *val, const char *name) { - struct { - char *name; - int val; - } named_values[] = { - { "INT_MIN" , INT_MIN }, - { "POINTER_VALUE", POINTER_VALUE }, - { "TEST_DATA_LEN", TEST_DATA_LEN }, - }; - int i; - - for (i = 0; i < ARRAY_SIZE(named_values); ++i) { - if (strcmp(str, named_values[i].name) != 0) - continue; - *val = named_values[i].val; + /* + * INT_MIN is defined as (-INT_MAX -1), i.e. it doesn't expand to a + * single int and cannot be parsed with strtol, so we handle it + * separately here. In addition, it expands to different expressions in + * different compilers so we use a prefixed _INT_MIN instead. + */ + if (strcmp(str, "_INT_MIN") == 0) { + *val = INT_MIN; return 0; } @@ -1103,9 +1097,9 @@ void run_subtest(struct test_loader *tester, } } - do_prog_test_run(bpf_program__fd(tprog), &retval, - bpf_program__type(tprog) == BPF_PROG_TYPE_SYSCALL ? true : false); - if (retval != subspec->retval && subspec->retval != POINTER_VALUE) { + err = do_prog_test_run(bpf_program__fd(tprog), &retval, + bpf_program__type(tprog) == BPF_PROG_TYPE_SYSCALL ? true : false); + if (!err && retval != subspec->retval && subspec->retval != POINTER_VALUE) { PRINT_FAIL("Unexpected retval: %d != %d\n", retval, subspec->retval); goto tobj_cleanup; } diff --git a/tools/testing/selftests/bpf/test_lru_map.c b/tools/testing/selftests/bpf/test_lru_map.c index fda7589c5023..0921939532c6 100644 --- a/tools/testing/selftests/bpf/test_lru_map.c +++ b/tools/testing/selftests/bpf/test_lru_map.c @@ -138,6 +138,18 @@ static int sched_next_online(int pid, int *next_to_try) return ret; } +/* Derive target_free from map_size, same as bpf_common_lru_populate */ +static unsigned int __tgt_size(unsigned int map_size) +{ + return (map_size / nr_cpus) / 2; +} + +/* Inverse of how bpf_common_lru_populate derives target_free from map_size. */ +static unsigned int __map_size(unsigned int tgt_free) +{ + return tgt_free * nr_cpus * 2; +} + /* Size of the LRU map is 2 * Add key=1 (+1 key) * Add key=2 (+1 key) @@ -231,11 +243,11 @@ static void test_lru_sanity0(int map_type, int map_flags) printf("Pass\n"); } -/* Size of the LRU map is 1.5*tgt_free - * Insert 1 to tgt_free (+tgt_free keys) - * Lookup 1 to tgt_free/2 - * Insert 1+tgt_free to 2*tgt_free (+tgt_free keys) - * => 1+tgt_free/2 to LOCALFREE_TARGET will be removed by LRU +/* Verify that unreferenced elements are recycled before referenced ones. + * Insert elements. + * Reference a subset of these. + * Insert more, enough to trigger recycling. + * Verify that unreferenced are recycled. */ static void test_lru_sanity1(int map_type, int map_flags, unsigned int tgt_free) { @@ -257,7 +269,7 @@ static void test_lru_sanity1(int map_type, int map_flags, unsigned int tgt_free) batch_size = tgt_free / 2; assert(batch_size * 2 == tgt_free); - map_size = tgt_free + batch_size; + map_size = __map_size(tgt_free) + batch_size; lru_map_fd = create_map(map_type, map_flags, map_size); assert(lru_map_fd != -1); @@ -266,13 +278,13 @@ static void test_lru_sanity1(int map_type, int map_flags, unsigned int tgt_free) value[0] = 1234; - /* Insert 1 to tgt_free (+tgt_free keys) */ - end_key = 1 + tgt_free; + /* Insert map_size - batch_size keys */ + end_key = 1 + __map_size(tgt_free); for (key = 1; key < end_key; key++) assert(!bpf_map_update_elem(lru_map_fd, &key, value, BPF_NOEXIST)); - /* Lookup 1 to tgt_free/2 */ + /* Lookup 1 to batch_size */ end_key = 1 + batch_size; for (key = 1; key < end_key; key++) { assert(!bpf_map_lookup_elem_with_ref_bit(lru_map_fd, key, value)); @@ -280,12 +292,13 @@ static void test_lru_sanity1(int map_type, int map_flags, unsigned int tgt_free) BPF_NOEXIST)); } - /* Insert 1+tgt_free to 2*tgt_free - * => 1+tgt_free/2 to LOCALFREE_TARGET will be + /* Insert another map_size - batch_size keys + * Map will contain 1 to batch_size plus these latest, i.e., + * => previous 1+batch_size to map_size - batch_size will have been * removed by LRU */ - key = 1 + tgt_free; - end_key = key + tgt_free; + key = 1 + __map_size(tgt_free); + end_key = key + __map_size(tgt_free); for (; key < end_key; key++) { assert(!bpf_map_update_elem(lru_map_fd, &key, value, BPF_NOEXIST)); @@ -301,17 +314,8 @@ static void test_lru_sanity1(int map_type, int map_flags, unsigned int tgt_free) printf("Pass\n"); } -/* Size of the LRU map 1.5 * tgt_free - * Insert 1 to tgt_free (+tgt_free keys) - * Update 1 to tgt_free/2 - * => The original 1 to tgt_free/2 will be removed due to - * the LRU shrink process - * Re-insert 1 to tgt_free/2 again and do a lookup immeidately - * Insert 1+tgt_free to tgt_free*3/2 - * Insert 1+tgt_free*3/2 to tgt_free*5/2 - * => Key 1+tgt_free to tgt_free*3/2 - * will be removed from LRU because it has never - * been lookup and ref bit is not set +/* Verify that insertions exceeding map size will recycle the oldest. + * Verify that unreferenced elements are recycled before referenced. */ static void test_lru_sanity2(int map_type, int map_flags, unsigned int tgt_free) { @@ -334,7 +338,7 @@ static void test_lru_sanity2(int map_type, int map_flags, unsigned int tgt_free) batch_size = tgt_free / 2; assert(batch_size * 2 == tgt_free); - map_size = tgt_free + batch_size; + map_size = __map_size(tgt_free) + batch_size; lru_map_fd = create_map(map_type, map_flags, map_size); assert(lru_map_fd != -1); @@ -343,8 +347,8 @@ static void test_lru_sanity2(int map_type, int map_flags, unsigned int tgt_free) value[0] = 1234; - /* Insert 1 to tgt_free (+tgt_free keys) */ - end_key = 1 + tgt_free; + /* Insert map_size - batch_size keys */ + end_key = 1 + __map_size(tgt_free); for (key = 1; key < end_key; key++) assert(!bpf_map_update_elem(lru_map_fd, &key, value, BPF_NOEXIST)); @@ -357,8 +361,7 @@ static void test_lru_sanity2(int map_type, int map_flags, unsigned int tgt_free) * shrink the inactive list to get tgt_free * number of free nodes. * - * Hence, the oldest key 1 to tgt_free/2 - * are removed from the LRU list. + * Hence, the oldest key is removed from the LRU list. */ key = 1; if (map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH) { @@ -370,8 +373,7 @@ static void test_lru_sanity2(int map_type, int map_flags, unsigned int tgt_free) BPF_EXIST)); } - /* Re-insert 1 to tgt_free/2 again and do a lookup - * immeidately. + /* Re-insert 1 to batch_size again and do a lookup immediately. */ end_key = 1 + batch_size; value[0] = 4321; @@ -387,17 +389,18 @@ static void test_lru_sanity2(int map_type, int map_flags, unsigned int tgt_free) value[0] = 1234; - /* Insert 1+tgt_free to tgt_free*3/2 */ - end_key = 1 + tgt_free + batch_size; - for (key = 1 + tgt_free; key < end_key; key++) + /* Insert batch_size new elements */ + key = 1 + __map_size(tgt_free); + end_key = key + batch_size; + for (; key < end_key; key++) /* These newly added but not referenced keys will be * gone during the next LRU shrink. */ assert(!bpf_map_update_elem(lru_map_fd, &key, value, BPF_NOEXIST)); - /* Insert 1+tgt_free*3/2 to tgt_free*5/2 */ - end_key = key + tgt_free; + /* Insert map_size - batch_size elements */ + end_key += __map_size(tgt_free); for (; key < end_key; key++) { assert(!bpf_map_update_elem(lru_map_fd, &key, value, BPF_NOEXIST)); @@ -413,12 +416,12 @@ static void test_lru_sanity2(int map_type, int map_flags, unsigned int tgt_free) printf("Pass\n"); } -/* Size of the LRU map is 2*tgt_free - * It is to test the active/inactive list rotation - * Insert 1 to 2*tgt_free (+2*tgt_free keys) - * Lookup key 1 to tgt_free*3/2 - * Add 1+2*tgt_free to tgt_free*5/2 (+tgt_free/2 keys) - * => key 1+tgt_free*3/2 to 2*tgt_free are removed from LRU +/* Test the active/inactive list rotation + * + * Fill the whole map, deplete the free list. + * Reference all except the last lru->target_free elements. + * Insert lru->target_free new elements. This triggers one shrink. + * Verify that the non-referenced elements are replaced. */ static void test_lru_sanity3(int map_type, int map_flags, unsigned int tgt_free) { @@ -437,8 +440,7 @@ static void test_lru_sanity3(int map_type, int map_flags, unsigned int tgt_free) assert(sched_next_online(0, &next_cpu) != -1); - batch_size = tgt_free / 2; - assert(batch_size * 2 == tgt_free); + batch_size = __tgt_size(tgt_free); map_size = tgt_free * 2; lru_map_fd = create_map(map_type, map_flags, map_size); @@ -449,23 +451,21 @@ static void test_lru_sanity3(int map_type, int map_flags, unsigned int tgt_free) value[0] = 1234; - /* Insert 1 to 2*tgt_free (+2*tgt_free keys) */ - end_key = 1 + (2 * tgt_free); + /* Fill the map */ + end_key = 1 + map_size; for (key = 1; key < end_key; key++) assert(!bpf_map_update_elem(lru_map_fd, &key, value, BPF_NOEXIST)); - /* Lookup key 1 to tgt_free*3/2 */ - end_key = tgt_free + batch_size; + /* Reference all but the last batch_size */ + end_key = 1 + map_size - batch_size; for (key = 1; key < end_key; key++) { assert(!bpf_map_lookup_elem_with_ref_bit(lru_map_fd, key, value)); assert(!bpf_map_update_elem(expected_map_fd, &key, value, BPF_NOEXIST)); } - /* Add 1+2*tgt_free to tgt_free*5/2 - * (+tgt_free/2 keys) - */ + /* Insert new batch_size: replaces the non-referenced elements */ key = 2 * tgt_free + 1; end_key = key + batch_size; for (; key < end_key; key++) { @@ -500,7 +500,8 @@ static void test_lru_sanity4(int map_type, int map_flags, unsigned int tgt_free) lru_map_fd = create_map(map_type, map_flags, 3 * tgt_free * nr_cpus); else - lru_map_fd = create_map(map_type, map_flags, 3 * tgt_free); + lru_map_fd = create_map(map_type, map_flags, + 3 * __map_size(tgt_free)); assert(lru_map_fd != -1); expected_map_fd = create_map(BPF_MAP_TYPE_HASH, 0, diff --git a/tools/testing/selftests/bpf/test_progs.h b/tools/testing/selftests/bpf/test_progs.h index 870694f2a359..df2222a1806f 100644 --- a/tools/testing/selftests/bpf/test_progs.h +++ b/tools/testing/selftests/bpf/test_progs.h @@ -460,6 +460,34 @@ static inline void *u64_to_ptr(__u64 ptr) return (void *) (unsigned long) ptr; } +static inline __u32 id_from_prog_fd(int fd) +{ + struct bpf_prog_info prog_info = {}; + __u32 prog_info_len = sizeof(prog_info); + int err; + + err = bpf_obj_get_info_by_fd(fd, &prog_info, &prog_info_len); + if (!ASSERT_OK(err, "id_from_prog_fd")) + return 0; + + ASSERT_NEQ(prog_info.id, 0, "prog_info.id"); + return prog_info.id; +} + +static inline __u32 id_from_link_fd(int fd) +{ + struct bpf_link_info link_info = {}; + __u32 link_info_len = sizeof(link_info); + int err; + + err = bpf_link_get_info_by_fd(fd, &link_info, &link_info_len); + if (!ASSERT_OK(err, "id_from_link_fd")) + return 0; + + ASSERT_NEQ(link_info.id, 0, "link_info.id"); + return link_info.id; +} + int bpf_find_map(const char *test, struct bpf_object *obj, const char *name); int compare_map_keys(int map1_fd, int map2_fd); int compare_stack_ips(int smap_fd, int amap_fd, int stack_trace_len); diff --git a/tools/testing/selftests/bpf/unpriv_helpers.c b/tools/testing/selftests/bpf/unpriv_helpers.c index 220f6a963813..f997d7ec8fd0 100644 --- a/tools/testing/selftests/bpf/unpriv_helpers.c +++ b/tools/testing/selftests/bpf/unpriv_helpers.c @@ -1,15 +1,76 @@ // SPDX-License-Identifier: GPL-2.0-only +#include <errno.h> +#include <limits.h> #include <stdbool.h> #include <stdlib.h> #include <stdio.h> #include <string.h> +#include <sys/utsname.h> #include <unistd.h> #include <fcntl.h> +#include <zlib.h> #include "unpriv_helpers.h" -static bool get_mitigations_off(void) +static gzFile open_config(void) +{ + struct utsname uts; + char buf[PATH_MAX]; + gzFile config; + + if (uname(&uts)) { + perror("uname"); + goto config_gz; + } + + snprintf(buf, sizeof(buf), "/boot/config-%s", uts.release); + config = gzopen(buf, "rb"); + if (config) + return config; + fprintf(stderr, "gzopen %s: %s\n", buf, strerror(errno)); + +config_gz: + config = gzopen("/proc/config.gz", "rb"); + if (!config) + perror("gzopen /proc/config.gz"); + return config; +} + +static int config_contains(const char *pat) +{ + const char *msg; + char buf[1024]; + gzFile config; + int n, err; + + config = open_config(); + if (!config) + return -1; + + for (;;) { + if (!gzgets(config, buf, sizeof(buf))) { + msg = gzerror(config, &err); + if (err == Z_ERRNO) + perror("gzgets /proc/config.gz"); + else if (err != Z_OK) + fprintf(stderr, "gzgets /proc/config.gz: %s", msg); + gzclose(config); + return -1; + } + n = strlen(buf); + if (buf[n - 1] == '\n') + buf[n - 1] = 0; + if (strcmp(buf, pat) == 0) { + gzclose(config); + return 1; + } + } + gzclose(config); + return 0; +} + +static bool cmdline_contains(const char *pat) { char cmdline[4096], *c; int fd, ret = false; @@ -27,7 +88,7 @@ static bool get_mitigations_off(void) cmdline[sizeof(cmdline) - 1] = '\0'; for (c = strtok(cmdline, " \n"); c; c = strtok(NULL, " \n")) { - if (strncmp(c, "mitigations=off", strlen(c))) + if (strncmp(c, pat, strlen(c))) continue; ret = true; break; @@ -37,8 +98,21 @@ out: return ret; } +static int get_mitigations_off(void) +{ + int enabled_in_config; + + if (cmdline_contains("mitigations=off")) + return 1; + enabled_in_config = config_contains("CONFIG_CPU_MITIGATIONS=y"); + if (enabled_in_config < 0) + return -1; + return !enabled_in_config; +} + bool get_unpriv_disabled(void) { + int mitigations_off; bool disabled; char buf[2]; FILE *fd; @@ -52,5 +126,19 @@ bool get_unpriv_disabled(void) disabled = true; } - return disabled ? true : get_mitigations_off(); + if (disabled) + return true; + + /* + * Some unpriv tests rely on spectre mitigations being on. + * If mitigations are off or status can't be determined + * assume that unpriv tests are disabled. + */ + mitigations_off = get_mitigations_off(); + if (mitigations_off < 0) { + fprintf(stderr, + "Can't determine if mitigations are enabled, disabling unpriv tests."); + return true; + } + return mitigations_off; } diff --git a/tools/testing/selftests/bpf/verifier/dead_code.c b/tools/testing/selftests/bpf/verifier/dead_code.c index ee454327e5c6..77207b498c6f 100644 --- a/tools/testing/selftests/bpf/verifier/dead_code.c +++ b/tools/testing/selftests/bpf/verifier/dead_code.c @@ -2,14 +2,13 @@ "dead code: start", .insns = { BPF_JMP_IMM(BPF_JA, 0, 0, 2), + /* unpriv: nospec (inserted to prevent "R9 !read_ok") */ BPF_LDX_MEM(BPF_B, BPF_REG_8, BPF_REG_9, 0), BPF_JMP_IMM(BPF_JA, 0, 0, 2), BPF_MOV64_IMM(BPF_REG_0, 7), BPF_JMP_IMM(BPF_JGE, BPF_REG_0, 10, -4), BPF_EXIT_INSN(), }, - .errstr_unpriv = "R9 !read_ok", - .result_unpriv = REJECT, .result = ACCEPT, .retval = 7, }, diff --git a/tools/testing/selftests/bpf/verifier/jmp32.c b/tools/testing/selftests/bpf/verifier/jmp32.c index 43776f6f92f4..91d83e9cb148 100644 --- a/tools/testing/selftests/bpf/verifier/jmp32.c +++ b/tools/testing/selftests/bpf/verifier/jmp32.c @@ -84,11 +84,10 @@ BPF_JMP32_IMM(BPF_JSET, BPF_REG_7, 0x10, 1), BPF_EXIT_INSN(), BPF_JMP32_IMM(BPF_JGE, BPF_REG_7, 0x10, 1), + /* unpriv: nospec (inserted to prevent "R9 !read_ok") */ BPF_LDX_MEM(BPF_B, BPF_REG_8, BPF_REG_9, 0), BPF_EXIT_INSN(), }, - .errstr_unpriv = "R9 !read_ok", - .result_unpriv = REJECT, .result = ACCEPT, }, { @@ -149,11 +148,10 @@ BPF_JMP32_IMM(BPF_JEQ, BPF_REG_7, 0x10, 1), BPF_EXIT_INSN(), BPF_JMP32_IMM(BPF_JSGE, BPF_REG_7, 0xf, 1), + /* unpriv: nospec (inserted to prevent "R9 !read_ok") */ BPF_LDX_MEM(BPF_B, BPF_REG_8, BPF_REG_9, 0), BPF_EXIT_INSN(), }, - .errstr_unpriv = "R9 !read_ok", - .result_unpriv = REJECT, .result = ACCEPT, }, { @@ -214,11 +212,10 @@ BPF_JMP32_IMM(BPF_JNE, BPF_REG_7, 0x10, 1), BPF_JMP_IMM(BPF_JNE, BPF_REG_7, 0x10, 1), BPF_EXIT_INSN(), + /* unpriv: nospec (inserted to prevent "R9 !read_ok") */ BPF_LDX_MEM(BPF_B, BPF_REG_8, BPF_REG_9, 0), BPF_EXIT_INSN(), }, - .errstr_unpriv = "R9 !read_ok", - .result_unpriv = REJECT, .result = ACCEPT, }, { @@ -283,11 +280,10 @@ BPF_JMP32_REG(BPF_JGE, BPF_REG_7, BPF_REG_8, 1), BPF_EXIT_INSN(), BPF_JMP32_IMM(BPF_JGE, BPF_REG_7, 0x7ffffff0, 1), + /* unpriv: nospec (inserted to prevent "R0 invalid mem access 'scalar'") */ BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_0, 0), BPF_EXIT_INSN(), }, - .errstr_unpriv = "R0 invalid mem access 'scalar'", - .result_unpriv = REJECT, .result = ACCEPT, .retval = 2, .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, @@ -354,11 +350,10 @@ BPF_JMP32_REG(BPF_JGT, BPF_REG_7, BPF_REG_8, 1), BPF_EXIT_INSN(), BPF_JMP_IMM(BPF_JGT, BPF_REG_7, 0x7ffffff0, 1), + /* unpriv: nospec (inserted to prevent "R0 invalid mem access 'scalar'") */ BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_0, 0), BPF_EXIT_INSN(), }, - .errstr_unpriv = "R0 invalid mem access 'scalar'", - .result_unpriv = REJECT, .result = ACCEPT, .retval = 2, .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, @@ -425,11 +420,10 @@ BPF_JMP32_REG(BPF_JLE, BPF_REG_7, BPF_REG_8, 1), BPF_EXIT_INSN(), BPF_JMP32_IMM(BPF_JLE, BPF_REG_7, 0x7ffffff0, 1), + /* unpriv: nospec (inserted to prevent "R0 invalid mem access 'scalar'") */ BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_0, 0), BPF_EXIT_INSN(), }, - .errstr_unpriv = "R0 invalid mem access 'scalar'", - .result_unpriv = REJECT, .result = ACCEPT, .retval = 2, .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, @@ -496,11 +490,10 @@ BPF_JMP32_REG(BPF_JLT, BPF_REG_7, BPF_REG_8, 1), BPF_EXIT_INSN(), BPF_JMP_IMM(BPF_JSLT, BPF_REG_7, 0x7ffffff0, 1), + /* unpriv: nospec (inserted to prevent "R0 invalid mem access 'scalar'") */ BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_0, 0), BPF_EXIT_INSN(), }, - .errstr_unpriv = "R0 invalid mem access 'scalar'", - .result_unpriv = REJECT, .result = ACCEPT, .retval = 2, .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, @@ -567,11 +560,10 @@ BPF_JMP32_REG(BPF_JSGE, BPF_REG_7, BPF_REG_8, 1), BPF_EXIT_INSN(), BPF_JMP_IMM(BPF_JSGE, BPF_REG_7, 0x7ffffff0, 1), + /* unpriv: nospec (inserted to prevent "R0 invalid mem access 'scalar'") */ BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_0, 0), BPF_EXIT_INSN(), }, - .errstr_unpriv = "R0 invalid mem access 'scalar'", - .result_unpriv = REJECT, .result = ACCEPT, .retval = 2, .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, @@ -638,11 +630,10 @@ BPF_JMP32_REG(BPF_JSGT, BPF_REG_7, BPF_REG_8, 1), BPF_EXIT_INSN(), BPF_JMP_IMM(BPF_JSGT, BPF_REG_7, -2, 1), + /* unpriv: nospec (inserted to prevent "R0 invalid mem access 'scalar'") */ BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_0, 0), BPF_EXIT_INSN(), }, - .errstr_unpriv = "R0 invalid mem access 'scalar'", - .result_unpriv = REJECT, .result = ACCEPT, .retval = 2, .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, @@ -709,11 +700,10 @@ BPF_JMP32_REG(BPF_JSLE, BPF_REG_7, BPF_REG_8, 1), BPF_EXIT_INSN(), BPF_JMP_IMM(BPF_JSLE, BPF_REG_7, 0x7ffffff0, 1), + /* unpriv: nospec (inserted to prevent "R0 invalid mem access 'scalar'") */ BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_0, 0), BPF_EXIT_INSN(), }, - .errstr_unpriv = "R0 invalid mem access 'scalar'", - .result_unpriv = REJECT, .result = ACCEPT, .retval = 2, .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, @@ -780,11 +770,10 @@ BPF_JMP32_REG(BPF_JSLT, BPF_REG_7, BPF_REG_8, 1), BPF_EXIT_INSN(), BPF_JMP32_IMM(BPF_JSLT, BPF_REG_7, -1, 1), + /* unpriv: nospec (inserted to prevent "R0 invalid mem access 'scalar'") */ BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_0, 0), BPF_EXIT_INSN(), }, - .errstr_unpriv = "R0 invalid mem access 'scalar'", - .result_unpriv = REJECT, .result = ACCEPT, .retval = 2, .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, diff --git a/tools/testing/selftests/bpf/verifier/jset.c b/tools/testing/selftests/bpf/verifier/jset.c index 11fc68da735e..e901eefd774a 100644 --- a/tools/testing/selftests/bpf/verifier/jset.c +++ b/tools/testing/selftests/bpf/verifier/jset.c @@ -78,12 +78,11 @@ .insns = { BPF_MOV64_IMM(BPF_REG_0, 1), BPF_JMP_IMM(BPF_JSET, BPF_REG_0, 1, 1), + /* unpriv: nospec (inserted to prevent "R9 !read_ok") */ BPF_LDX_MEM(BPF_B, BPF_REG_8, BPF_REG_9, 0), BPF_EXIT_INSN(), }, .prog_type = BPF_PROG_TYPE_SOCKET_FILTER, - .errstr_unpriv = "R9 !read_ok", - .result_unpriv = REJECT, .retval = 1, .result = ACCEPT, }, @@ -136,13 +135,12 @@ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_get_prandom_u32), BPF_ALU64_IMM(BPF_OR, BPF_REG_0, 2), BPF_JMP_IMM(BPF_JSET, BPF_REG_0, 3, 1), + /* unpriv: nospec (inserted to prevent "R9 !read_ok") */ BPF_LDX_MEM(BPF_B, BPF_REG_8, BPF_REG_9, 0), BPF_MOV64_IMM(BPF_REG_0, 0), BPF_EXIT_INSN(), }, .prog_type = BPF_PROG_TYPE_SOCKET_FILTER, - .errstr_unpriv = "R9 !read_ok", - .result_unpriv = REJECT, .result = ACCEPT, }, { @@ -154,16 +152,16 @@ BPF_ALU64_IMM(BPF_AND, BPF_REG_1, 0xff), BPF_JMP_IMM(BPF_JSET, BPF_REG_1, 0xf0, 3), BPF_JMP_IMM(BPF_JLT, BPF_REG_1, 0x10, 1), + /* unpriv: nospec (inserted to prevent "R9 !read_ok") */ BPF_LDX_MEM(BPF_B, BPF_REG_8, BPF_REG_9, 0), BPF_EXIT_INSN(), BPF_JMP_IMM(BPF_JSET, BPF_REG_1, 0x10, 1), BPF_EXIT_INSN(), BPF_JMP_IMM(BPF_JGE, BPF_REG_1, 0x10, 1), + /* unpriv: nospec (inserted to prevent "R9 !read_ok") */ BPF_LDX_MEM(BPF_B, BPF_REG_8, BPF_REG_9, 0), BPF_EXIT_INSN(), }, .prog_type = BPF_PROG_TYPE_SOCKET_FILTER, - .errstr_unpriv = "R9 !read_ok", - .result_unpriv = REJECT, .result = ACCEPT, }, diff --git a/tools/testing/selftests/bpf/veristat.c b/tools/testing/selftests/bpf/veristat.c index b2bb20b00952..1e9f61f9fd0a 100644 --- a/tools/testing/selftests/bpf/veristat.c +++ b/tools/testing/selftests/bpf/veristat.c @@ -49,6 +49,7 @@ enum stat_id { STACK, PROG_TYPE, ATTACH_TYPE, + MEMORY_PEAK, FILE_NAME, PROG_NAME, @@ -155,13 +156,27 @@ struct filter { bool abs; }; -struct var_preset { - char *name; +struct rvalue { enum { INTEGRAL, ENUMERATOR } type; union { long long ivalue; char *svalue; }; +}; + +struct field_access { + enum { FIELD_NAME, ARRAY_INDEX } type; + union { + char *name; + struct rvalue index; + }; +}; + +struct var_preset { + struct field_access *atoms; + int atom_count; + char *full_name; + struct rvalue value; bool applied; }; @@ -208,6 +223,9 @@ static struct env { int top_src_lines; struct var_preset *presets; int npresets; + char orig_cgroup[PATH_MAX]; + char stat_cgroup[PATH_MAX]; + int memory_peak_fd; } env; static int libbpf_print_fn(enum libbpf_print_level level, const char *format, va_list args) @@ -219,6 +237,22 @@ static int libbpf_print_fn(enum libbpf_print_level level, const char *format, va return vfprintf(stderr, format, args); } +#define log_errno(fmt, ...) log_errno_aux(__FILE__, __LINE__, fmt, ##__VA_ARGS__) + +__printf(3, 4) +static int log_errno_aux(const char *file, int line, const char *fmt, ...) +{ + int err = -errno; + va_list ap; + + va_start(ap, fmt); + fprintf(stderr, "%s:%d: ", file, line); + vfprintf(stderr, fmt, ap); + fprintf(stderr, " failed with error '%s'.\n", strerror(errno)); + va_end(ap); + return err; +} + #ifndef VERISTAT_VERSION #define VERISTAT_VERSION "<kernel>" #endif @@ -344,6 +378,7 @@ static error_t parse_arg(int key, char *arg, struct argp_state *state) fprintf(stderr, "invalid top N specifier: %s\n", arg); argp_usage(state); } + break; case 'C': env.comparison_mode = true; break; @@ -734,13 +769,13 @@ cleanup: } static const struct stat_specs default_csv_output_spec = { - .spec_cnt = 14, + .spec_cnt = 15, .ids = { FILE_NAME, PROG_NAME, VERDICT, DURATION, TOTAL_INSNS, TOTAL_STATES, PEAK_STATES, MAX_STATES_PER_INSN, MARK_READ_MAX_LEN, SIZE, JITED_SIZE, PROG_TYPE, ATTACH_TYPE, - STACK, + STACK, MEMORY_PEAK, }, }; @@ -781,6 +816,7 @@ static struct stat_def { [STACK] = {"Stack depth", {"stack_depth", "stack"}, }, [PROG_TYPE] = { "Program type", {"prog_type"}, }, [ATTACH_TYPE] = { "Attach type", {"attach_type", }, }, + [MEMORY_PEAK] = { "Peak memory (MiB)", {"mem_peak", }, }, }; static bool parse_stat_id_var(const char *name, size_t len, int *id, @@ -1182,6 +1218,7 @@ static void fixup_obj(struct bpf_object *obj, struct bpf_program *prog, const ch case BPF_MAP_TYPE_TASK_STORAGE: case BPF_MAP_TYPE_INODE_STORAGE: case BPF_MAP_TYPE_CGROUP_STORAGE: + case BPF_MAP_TYPE_CGRP_STORAGE: break; case BPF_MAP_TYPE_STRUCT_OPS: mask_unrelated_struct_ops_progs(obj, map, prog); @@ -1278,16 +1315,243 @@ static int max_verifier_log_size(void) return log_size; } +static bool output_stat_enabled(int id) +{ + int i; + + for (i = 0; i < env.output_spec.spec_cnt; i++) + if (env.output_spec.ids[i] == id) + return true; + return false; +} + +__printf(2, 3) +static int write_one_line(const char *file, const char *fmt, ...) +{ + int err, saved_errno; + va_list ap; + FILE *f; + + f = fopen(file, "w"); + if (!f) + return -1; + + va_start(ap, fmt); + errno = 0; + err = vfprintf(f, fmt, ap); + saved_errno = errno; + va_end(ap); + fclose(f); + errno = saved_errno; + return err < 0 ? -1 : 0; +} + +__scanf(3, 4) +static int scanf_one_line(const char *file, int fields_expected, const char *fmt, ...) +{ + int res = 0, saved_errno = 0; + char *line = NULL; + size_t line_len; + va_list ap; + FILE *f; + + f = fopen(file, "r"); + if (!f) + return -1; + + va_start(ap, fmt); + while (getline(&line, &line_len, f) > 0) { + res = vsscanf(line, fmt, ap); + if (res == fields_expected) + goto out; + } + if (ferror(f)) { + saved_errno = errno; + res = -1; + } + +out: + va_end(ap); + free(line); + fclose(f); + errno = saved_errno; + return res; +} + +static void destroy_stat_cgroup(void) +{ + char buf[PATH_MAX]; + int err; + + close(env.memory_peak_fd); + + if (env.orig_cgroup[0]) { + snprintf(buf, sizeof(buf), "%s/cgroup.procs", env.orig_cgroup); + err = write_one_line(buf, "%d\n", getpid()); + if (err < 0) + log_errno("moving self to original cgroup %s\n", env.orig_cgroup); + } + + if (env.stat_cgroup[0]) { + err = rmdir(env.stat_cgroup); + if (err < 0) + log_errno("deletion of cgroup %s", env.stat_cgroup); + } + + env.memory_peak_fd = -1; + env.orig_cgroup[0] = 0; + env.stat_cgroup[0] = 0; +} + +/* + * Creates a cgroup at /sys/fs/cgroup/veristat-accounting-<pid>, + * moves current process to this cgroup. + */ +static void create_stat_cgroup(void) +{ + char cgroup_fs_mount[4096]; + char buf[4096]; + int err; + + env.memory_peak_fd = -1; + + if (!output_stat_enabled(MEMORY_PEAK)) + return; + + err = scanf_one_line("/proc/self/mounts", 2, "%*s %4095s cgroup2 %s", + cgroup_fs_mount, buf); + if (err != 2) { + if (err < 0) + log_errno("reading /proc/self/mounts"); + else if (!env.quiet) + fprintf(stderr, "Can't find cgroupfs v2 mount point.\n"); + goto err_out; + } + + /* cgroup-v2.rst promises the line "0::<group>" for cgroups v2 */ + err = scanf_one_line("/proc/self/cgroup", 1, "0::%4095s", buf); + if (err != 1) { + if (err < 0) + log_errno("reading /proc/self/cgroup"); + else if (!env.quiet) + fprintf(stderr, "Can't infer veristat process cgroup."); + goto err_out; + } + + snprintf(env.orig_cgroup, sizeof(env.orig_cgroup), "%s/%s", cgroup_fs_mount, buf); + + snprintf(buf, sizeof(buf), "%s/veristat-accounting-%d", cgroup_fs_mount, getpid()); + err = mkdir(buf, 0777); + if (err < 0) { + log_errno("creation of cgroup %s", buf); + goto err_out; + } + strcpy(env.stat_cgroup, buf); + + snprintf(buf, sizeof(buf), "%s/cgroup.procs", env.stat_cgroup); + err = write_one_line(buf, "%d\n", getpid()); + if (err < 0) { + log_errno("entering cgroup %s", buf); + goto err_out; + } + + snprintf(buf, sizeof(buf), "%s/memory.peak", env.stat_cgroup); + env.memory_peak_fd = open(buf, O_RDWR | O_APPEND); + if (env.memory_peak_fd < 0) { + log_errno("opening %s", buf); + goto err_out; + } + + return; + +err_out: + if (!env.quiet) + fprintf(stderr, "Memory usage metric unavailable.\n"); + destroy_stat_cgroup(); +} + +/* Current value of /sys/fs/cgroup/veristat-accounting-<pid>/memory.peak */ +static long cgroup_memory_peak(void) +{ + long err, memory_peak; + char buf[32]; + + if (env.memory_peak_fd < 0) + return -1; + + err = pread(env.memory_peak_fd, buf, sizeof(buf) - 1, 0); + if (err <= 0) { + log_errno("pread(%s/memory.peak)", env.stat_cgroup); + return -1; + } + + buf[err] = 0; + errno = 0; + memory_peak = strtoll(buf, NULL, 10); + if (errno) { + log_errno("%s/memory.peak:strtoll(%s)", env.stat_cgroup, buf); + return -1; + } + + return memory_peak; +} + +static int reset_stat_cgroup(void) +{ + char buf[] = "r\n"; + int err; + + if (env.memory_peak_fd < 0) + return -1; + + err = pwrite(env.memory_peak_fd, buf, sizeof(buf), 0); + if (err <= 0) { + log_errno("pwrite(%s/memory.peak)", env.stat_cgroup); + return -1; + } + return 0; +} + +static int parse_rvalue(const char *val, struct rvalue *rvalue) +{ + long long value; + char *val_end; + + if (val[0] == '-' || isdigit(val[0])) { + /* must be a number */ + errno = 0; + value = strtoll(val, &val_end, 0); + if (errno == ERANGE) { + errno = 0; + value = strtoull(val, &val_end, 0); + } + if (errno || *val_end != '\0') { + fprintf(stderr, "Failed to parse value '%s'\n", val); + return -EINVAL; + } + rvalue->ivalue = value; + rvalue->type = INTEGRAL; + } else { + /* if not a number, consider it enum value */ + rvalue->svalue = strdup(val); + if (!rvalue->svalue) + return -ENOMEM; + rvalue->type = ENUMERATOR; + } + return 0; +} + static int process_prog(const char *filename, struct bpf_object *obj, struct bpf_program *prog) { const char *base_filename = basename(strdupa(filename)); const char *prog_name = bpf_program__name(prog); + long mem_peak_a, mem_peak_b, mem_peak = -1; char *buf; int buf_sz, log_level; struct verif_stats *stats; struct bpf_prog_info info; __u32 info_len = sizeof(info); - int err = 0; + int err = 0, cgroup_err; void *tmp; int fd; @@ -1332,7 +1596,15 @@ static int process_prog(const char *filename, struct bpf_object *obj, struct bpf if (env.force_reg_invariants) bpf_program__set_flags(prog, bpf_program__flags(prog) | BPF_F_TEST_REG_INVARIANTS); - err = bpf_object__load(obj); + err = bpf_object__prepare(obj); + if (!err) { + cgroup_err = reset_stat_cgroup(); + mem_peak_a = cgroup_memory_peak(); + err = bpf_object__load(obj); + mem_peak_b = cgroup_memory_peak(); + if (!cgroup_err && mem_peak_a >= 0 && mem_peak_b >= 0) + mem_peak = mem_peak_b - mem_peak_a; + } env.progs_processed++; stats->file_name = strdup(base_filename); @@ -1341,6 +1613,7 @@ static int process_prog(const char *filename, struct bpf_object *obj, struct bpf stats->stats[SIZE] = bpf_program__insn_cnt(prog); stats->stats[PROG_TYPE] = bpf_program__type(prog); stats->stats[ATTACH_TYPE] = bpf_program__expected_attach_type(prog); + stats->stats[MEMORY_PEAK] = mem_peak < 0 ? -1 : mem_peak / (1024 * 1024); memset(&info, 0, info_len); fd = bpf_program__fd(prog); @@ -1361,15 +1634,70 @@ static int process_prog(const char *filename, struct bpf_object *obj, struct bpf free(buf); return 0; -}; +} + +static int append_preset_atom(struct var_preset *preset, char *value, bool is_index) +{ + struct field_access *tmp; + int i = preset->atom_count; + int err; + + tmp = reallocarray(preset->atoms, i + 1, sizeof(*preset->atoms)); + if (!tmp) + return -ENOMEM; + + preset->atoms = tmp; + preset->atom_count++; + + if (is_index) { + preset->atoms[i].type = ARRAY_INDEX; + err = parse_rvalue(value, &preset->atoms[i].index); + if (err) + return err; + } else { + preset->atoms[i].type = FIELD_NAME; + preset->atoms[i].name = strdup(value); + if (!preset->atoms[i].name) + return -ENOMEM; + } + return 0; +} + +static int parse_var_atoms(const char *full_var, struct var_preset *preset) +{ + char expr[256], var[256], *name, *saveptr; + int n, len, off; + + snprintf(expr, sizeof(expr), "%s", full_var); + preset->atom_count = 0; + while ((name = strtok_r(preset->atom_count ? NULL : expr, ".", &saveptr))) { + len = strlen(name); + /* parse variable name */ + if (sscanf(name, "%[a-zA-Z0-9_] %n", var, &off) != 1) { + fprintf(stderr, "Can't parse %s\n", name); + return -EINVAL; + } + append_preset_atom(preset, var, false); + + /* parse optional array indexes */ + while (off < len) { + if (sscanf(name + off, " [ %[a-zA-Z0-9_] ] %n", var, &n) != 1) { + fprintf(stderr, "Can't parse %s as index\n", name + off); + return -EINVAL; + } + append_preset_atom(preset, var, true); + off += n; + } + } + return 0; +} static int append_var_preset(struct var_preset **presets, int *cnt, const char *expr) { void *tmp; struct var_preset *cur; - char var[256], val[256], *val_end; - long long value; - int n; + char var[256], val[256]; + int n, err, i; tmp = realloc(*presets, (*cnt + 1) * sizeof(**presets)); if (!tmp) @@ -1379,37 +1707,29 @@ static int append_var_preset(struct var_preset **presets, int *cnt, const char * memset(cur, 0, sizeof(*cur)); (*cnt)++; - if (sscanf(expr, "%s = %s %n", var, val, &n) != 2 || n != strlen(expr)) { + if (sscanf(expr, " %[][a-zA-Z0-9_. ] = %s %n", var, val, &n) != 2 || n != strlen(expr)) { fprintf(stderr, "Failed to parse expression '%s'\n", expr); return -EINVAL; } - - if (val[0] == '-' || isdigit(val[0])) { - /* must be a number */ - errno = 0; - value = strtoll(val, &val_end, 0); - if (errno == ERANGE) { - errno = 0; - value = strtoull(val, &val_end, 0); - } - if (errno || *val_end != '\0') { - fprintf(stderr, "Failed to parse value '%s'\n", val); - return -EINVAL; - } - cur->ivalue = value; - cur->type = INTEGRAL; - } else { - /* if not a number, consider it enum value */ - cur->svalue = strdup(val); - if (!cur->svalue) - return -ENOMEM; - cur->type = ENUMERATOR; + /* Remove trailing spaces from var, as scanf may add those */ + for (i = strlen(var) - 1; i > 0; --i) { + if (!isspace(var[i])) + break; + var[i] = '\0'; } - cur->name = strdup(var); - if (!cur->name) + err = parse_rvalue(val, &cur->value); + if (err) + return err; + + cur->full_name = strdup(var); + if (!cur->full_name) return -ENOMEM; + err = parse_var_atoms(var, cur); + if (err) + return err; + return 0; } @@ -1486,22 +1806,96 @@ static bool is_preset_supported(const struct btf_type *t) return btf_is_int(t) || btf_is_enum(t) || btf_is_enum64(t); } -const int btf_find_member(const struct btf *btf, - const struct btf_type *parent_type, - __u32 parent_offset, - const char *member_name, - int *member_tid, - __u32 *member_offset) +static int find_enum_value(const struct btf *btf, const char *name, long long *value) +{ + const struct btf_type *t; + int cnt, i; + long long lvalue; + + cnt = btf__type_cnt(btf); + for (i = 1; i != cnt; ++i) { + t = btf__type_by_id(btf, i); + + if (!btf_is_any_enum(t)) + continue; + + if (enum_value_from_name(btf, t, name, &lvalue) == 0) { + *value = lvalue; + return 0; + } + } + return -ESRCH; +} + +static int resolve_rvalue(struct btf *btf, const struct rvalue *rvalue, long long *result) +{ + int err = 0; + + switch (rvalue->type) { + case INTEGRAL: + *result = rvalue->ivalue; + return 0; + case ENUMERATOR: + err = find_enum_value(btf, rvalue->svalue, result); + if (err) { + fprintf(stderr, "Can't resolve enum value %s\n", rvalue->svalue); + return err; + } + return 0; + default: + fprintf(stderr, "Unknown rvalue type\n"); + return -EOPNOTSUPP; + } + return 0; +} + +static int adjust_var_secinfo_array(struct btf *btf, int tid, struct field_access *atom, + const char *array_name, struct btf_var_secinfo *sinfo) +{ + const struct btf_type *t; + struct btf_array *barr; + long long idx; + int err; + + tid = btf__resolve_type(btf, tid); + t = btf__type_by_id(btf, tid); + if (!btf_is_array(t)) { + fprintf(stderr, "Array index is not expected for %s\n", + array_name); + return -EINVAL; + } + barr = btf_array(t); + err = resolve_rvalue(btf, &atom->index, &idx); + if (err) + return err; + if (idx < 0 || idx >= barr->nelems) { + fprintf(stderr, "Array index %lld is out of bounds [0, %u]: %s\n", + idx, barr->nelems, array_name); + return -EINVAL; + } + sinfo->size = btf__resolve_size(btf, barr->type); + sinfo->offset += sinfo->size * idx; + sinfo->type = btf__resolve_type(btf, barr->type); + return 0; +} + +static int adjust_var_secinfo_member(const struct btf *btf, + const struct btf_type *parent_type, + __u32 parent_offset, + const char *member_name, + struct btf_var_secinfo *sinfo) { int i; - if (!btf_is_composite(parent_type)) + if (!btf_is_composite(parent_type)) { + fprintf(stderr, "Can't resolve field %s for non-composite type\n", member_name); return -EINVAL; + } for (i = 0; i < btf_vlen(parent_type); ++i) { const struct btf_member *member; const struct btf_type *member_type; - int tid; + int tid, off; member = btf_members(parent_type) + i; tid = btf__resolve_type(btf, member->type); @@ -1509,6 +1903,7 @@ const int btf_find_member(const struct btf *btf, return -EINVAL; member_type = btf__type_by_id(btf, tid); + off = parent_offset + member->offset; if (member->name_off) { const char *name = btf__name_by_offset(btf, member->name_off); @@ -1518,15 +1913,16 @@ const int btf_find_member(const struct btf *btf, name); return -EINVAL; } - *member_offset = parent_offset + member->offset; - *member_tid = tid; + sinfo->offset += off / 8; + sinfo->type = tid; + sinfo->size = member_type->size; return 0; } } else if (btf_is_composite(member_type)) { int err; - err = btf_find_member(btf, member_type, parent_offset + member->offset, - member_name, member_tid, member_offset); + err = adjust_var_secinfo_member(btf, member_type, off, + member_name, sinfo); if (!err) return 0; } @@ -1536,30 +1932,41 @@ const int btf_find_member(const struct btf *btf, } static int adjust_var_secinfo(struct btf *btf, const struct btf_type *t, - struct btf_var_secinfo *sinfo, const char *var) + struct btf_var_secinfo *sinfo, struct var_preset *preset) { - char expr[256], *saveptr; - const struct btf_type *base_type, *member_type; - int err, member_tid; - char *name; - __u32 member_offset = 0; + const struct btf_type *base_type; + const char *prev_name; + int err, i; + int tid; - base_type = btf__type_by_id(btf, btf__resolve_type(btf, t->type)); - snprintf(expr, sizeof(expr), "%s", var); - strtok_r(expr, ".", &saveptr); + assert(preset->atom_count > 0); + assert(preset->atoms[0].type == FIELD_NAME); - while ((name = strtok_r(NULL, ".", &saveptr))) { - err = btf_find_member(btf, base_type, 0, name, &member_tid, &member_offset); - if (err) { - fprintf(stderr, "Could not find member %s for variable %s\n", name, var); - return err; + tid = btf__resolve_type(btf, t->type); + base_type = btf__type_by_id(btf, tid); + prev_name = preset->atoms[0].name; + + for (i = 1; i < preset->atom_count; ++i) { + struct field_access *atom = preset->atoms + i; + + switch (atom->type) { + case ARRAY_INDEX: + err = adjust_var_secinfo_array(btf, tid, atom, prev_name, sinfo); + break; + case FIELD_NAME: + err = adjust_var_secinfo_member(btf, base_type, 0, atom->name, sinfo); + prev_name = atom->name; + break; + default: + fprintf(stderr, "Unknown field_access type\n"); + return -EOPNOTSUPP; } - member_type = btf__type_by_id(btf, member_tid); - sinfo->offset += member_offset / 8; - sinfo->size = member_type->size; - sinfo->type = member_tid; - base_type = member_type; + if (err) + return err; + base_type = btf__type_by_id(btf, sinfo->type); + tid = sinfo->type; } + return 0; } @@ -1569,7 +1976,7 @@ static int set_global_var(struct bpf_object *obj, struct btf *btf, { const struct btf_type *base_type; void *ptr; - long long value = preset->ivalue; + long long value = preset->value.ivalue; size_t size; base_type = btf__type_by_id(btf, btf__resolve_type(btf, sinfo->type)); @@ -1578,22 +1985,23 @@ static int set_global_var(struct bpf_object *obj, struct btf *btf, return -EINVAL; } if (!is_preset_supported(base_type)) { - fprintf(stderr, "Setting value for type %s is not supported\n", - btf__name_by_offset(btf, base_type->name_off)); + fprintf(stderr, "Can't set %s. Only ints and enums are supported\n", + preset->full_name); return -EINVAL; } - if (preset->type == ENUMERATOR) { + if (preset->value.type == ENUMERATOR) { if (btf_is_any_enum(base_type)) { - if (enum_value_from_name(btf, base_type, preset->svalue, &value)) { + if (enum_value_from_name(btf, base_type, preset->value.svalue, &value)) { fprintf(stderr, "Failed to find integer value for enum element %s\n", - preset->svalue); + preset->value.svalue); return -EINVAL; } } else { fprintf(stderr, "Value %s is not supported for type %s\n", - preset->svalue, btf__name_by_offset(btf, base_type->name_off)); + preset->value.svalue, + btf__name_by_offset(btf, base_type->name_off)); return -EINVAL; } } @@ -1660,20 +2068,16 @@ static int set_global_vars(struct bpf_object *obj, struct var_preset *presets, i for (j = 0; j < n; ++j, ++sinfo) { const struct btf_type *var_type = btf__type_by_id(btf, sinfo->type); const char *var_name; - int var_len; if (!btf_is_var(var_type)) continue; var_name = btf__name_by_offset(btf, var_type->name_off); - var_len = strlen(var_name); for (k = 0; k < npresets; ++k) { struct btf_var_secinfo tmp_sinfo; - if (strncmp(var_name, presets[k].name, var_len) != 0 || - (presets[k].name[var_len] != '\0' && - presets[k].name[var_len] != '.')) + if (strcmp(var_name, presets[k].atoms[0].name) != 0) continue; if (presets[k].applied) { @@ -1683,7 +2087,7 @@ static int set_global_vars(struct bpf_object *obj, struct var_preset *presets, i } tmp_sinfo = *sinfo; err = adjust_var_secinfo(btf, var_type, - &tmp_sinfo, presets[k].name); + &tmp_sinfo, presets + k); if (err) return err; @@ -1698,7 +2102,8 @@ static int set_global_vars(struct bpf_object *obj, struct var_preset *presets, i for (i = 0; i < npresets; ++i) { if (!presets[i].applied) { fprintf(stderr, "Global variable preset %s has not been applied\n", - presets[i].name); + presets[i].full_name); + err = -EINVAL; } presets[i].applied = false; } @@ -1824,6 +2229,7 @@ static int cmp_stat(const struct verif_stats *s1, const struct verif_stats *s2, case TOTAL_STATES: case PEAK_STATES: case MAX_STATES_PER_INSN: + case MEMORY_PEAK: case MARK_READ_MAX_LEN: { long v1 = s1->stats[id]; long v2 = s2->stats[id]; @@ -2053,6 +2459,7 @@ static void prepare_value(const struct verif_stats *s, enum stat_id id, case STACK: case SIZE: case JITED_SIZE: + case MEMORY_PEAK: *val = s ? s->stats[id] : 0; break; default: @@ -2139,6 +2546,7 @@ static int parse_stat_value(const char *str, enum stat_id id, struct verif_stats case MARK_READ_MAX_LEN: case SIZE: case JITED_SIZE: + case MEMORY_PEAK: case STACK: { long val; int err, n; @@ -2776,7 +3184,7 @@ static void output_prog_stats(void) static int handle_verif_mode(void) { - int i, err; + int i, err = 0; if (env.filename_cnt == 0) { fprintf(stderr, "Please provide path to BPF object file!\n\n"); @@ -2784,11 +3192,12 @@ static int handle_verif_mode(void) return -EINVAL; } + create_stat_cgroup(); for (i = 0; i < env.filename_cnt; i++) { err = process_obj(env.filenames[i]); if (err) { fprintf(stderr, "Failed to process '%s': %d\n", env.filenames[i], err); - return err; + goto out; } } @@ -2796,7 +3205,9 @@ static int handle_verif_mode(void) output_prog_stats(); - return 0; +out: + destroy_stat_cgroup(); + return err; } static int handle_replay_mode(void) @@ -2826,7 +3237,7 @@ static int handle_replay_mode(void) int main(int argc, char **argv) { - int err = 0, i; + int err = 0, i, j; if (argp_parse(&argp, argc, argv, 0, NULL, NULL)) return 1; @@ -2885,9 +3296,19 @@ int main(int argc, char **argv) } free(env.deny_filters); for (i = 0; i < env.npresets; ++i) { - free(env.presets[i].name); - if (env.presets[i].type == ENUMERATOR) - free(env.presets[i].svalue); + free(env.presets[i].full_name); + for (j = 0; j < env.presets[i].atom_count; ++j) { + switch (env.presets[i].atoms[j].type) { + case FIELD_NAME: + free(env.presets[i].atoms[j].name); + break; + case ARRAY_INDEX: + if (env.presets[i].atoms[j].index.type == ENUMERATOR) + free(env.presets[i].atoms[j].index.svalue); + break; + } + } + free(env.presets[i].atoms); } free(env.presets); return -err; diff --git a/tools/testing/selftests/bpf/vmtest.sh b/tools/testing/selftests/bpf/vmtest.sh index 79505d294c44..2f869daf8a06 100755 --- a/tools/testing/selftests/bpf/vmtest.sh +++ b/tools/testing/selftests/bpf/vmtest.sh @@ -43,6 +43,15 @@ riscv64) BZIMAGE="arch/riscv/boot/Image" ARCH="riscv" ;; +ppc64el) + QEMU_BINARY=qemu-system-ppc64 + QEMU_CONSOLE="hvc0" + # KVM could not be tested for powerpc, therefore not enabled for now. + HOST_FLAGS=(-machine pseries -cpu POWER9) + CROSS_FLAGS=(-machine pseries -cpu POWER9) + BZIMAGE="vmlinux" + ARCH="powerpc" + ;; *) echo "Unsupported architecture" exit 1 diff --git a/tools/testing/selftests/drivers/net/hw/rss_ctx.py b/tools/testing/selftests/drivers/net/hw/rss_ctx.py index ca60ae325c22..7bb552f8b182 100755 --- a/tools/testing/selftests/drivers/net/hw/rss_ctx.py +++ b/tools/testing/selftests/drivers/net/hw/rss_ctx.py @@ -747,6 +747,62 @@ def test_rss_ntuple_addition(cfg): 'noise' : (0,) }) +def test_rss_default_context_rule(cfg): + """ + Allocate a port, direct this port to context 0, then create a new RSS + context and steer all TCP traffic to it (context 1). Verify that: + * Traffic to the specific port continues to use queues of the main + context (0/1). + * Traffic to any other TCP port is redirected to the new context + (queues 2/3). + """ + + require_ntuple(cfg) + + queue_cnt = len(_get_rx_cnts(cfg)) + if queue_cnt < 4: + try: + ksft_pr(f"Increasing queue count {queue_cnt} -> 4") + ethtool(f"-L {cfg.ifname} combined 4") + defer(ethtool, f"-L {cfg.ifname} combined {queue_cnt}") + except Exception as exc: + raise KsftSkipEx("Not enough queues for the test") from exc + + # Use queues 0 and 1 for the main context + ethtool(f"-X {cfg.ifname} equal 2") + defer(ethtool, f"-X {cfg.ifname} default") + + # Create a new RSS context that uses queues 2 and 3 + ctx_id = ethtool_create(cfg, "-X", "context new start 2 equal 2") + defer(ethtool, f"-X {cfg.ifname} context {ctx_id} delete") + + # Generic low-priority rule: redirect all TCP traffic to the new context. + # Give it an explicit higher location number (lower priority). + flow_generic = f"flow-type tcp{cfg.addr_ipver} dst-ip {cfg.addr} context {ctx_id} loc 1" + ethtool(f"-N {cfg.ifname} {flow_generic}") + defer(ethtool, f"-N {cfg.ifname} delete 1") + + # Specific high-priority rule for a random port that should stay on context 0. + # Assign loc 0 so it is evaluated before the generic rule. + port_main = rand_port() + flow_main = f"flow-type tcp{cfg.addr_ipver} dst-ip {cfg.addr} dst-port {port_main} context 0 loc 0" + ethtool(f"-N {cfg.ifname} {flow_main}") + defer(ethtool, f"-N {cfg.ifname} delete 0") + + _ntuple_rule_check(cfg, 1, ctx_id) + + # Verify that traffic matching the specific rule still goes to queues 0/1 + _send_traffic_check(cfg, port_main, "context 0", + { 'target': (0, 1), + 'empty' : (2, 3) }) + + # And that traffic for any other port is steered to the new context + port_other = rand_port() + _send_traffic_check(cfg, port_other, f"context {ctx_id}", + { 'target': (2, 3), + 'noise' : (0, 1) }) + + def main() -> None: with NetDrvEpEnv(__file__, nsim_test=False) as cfg: cfg.context_cnt = None @@ -760,7 +816,8 @@ def main() -> None: test_rss_context_overlap, test_rss_context_overlap2, test_rss_context_out_of_order, test_rss_context4_create_with_cfg, test_flow_add_context_missing, - test_delete_rss_context_busy, test_rss_ntuple_addition], + test_delete_rss_context_busy, test_rss_ntuple_addition, + test_rss_default_context_rule], args=(cfg, )) ksft_exit() diff --git a/tools/testing/selftests/drivers/net/hw/rss_input_xfrm.py b/tools/testing/selftests/drivers/net/hw/rss_input_xfrm.py index f439c434ba36..648ff50bc1c3 100755 --- a/tools/testing/selftests/drivers/net/hw/rss_input_xfrm.py +++ b/tools/testing/selftests/drivers/net/hw/rss_input_xfrm.py @@ -38,7 +38,7 @@ def test_rss_input_xfrm(cfg, ipver): raise KsftSkipEx("socket.SO_INCOMING_CPU was added in Python 3.11") input_xfrm = cfg.ethnl.rss_get( - {'header': {'dev-name': cfg.ifname}}).get('input_xfrm') + {'header': {'dev-name': cfg.ifname}}).get('input-xfrm') # Check for symmetric xor/or-xor if not input_xfrm or (input_xfrm != 1 and input_xfrm != 2): diff --git a/tools/testing/selftests/drivers/net/netdevsim/peer.sh b/tools/testing/selftests/drivers/net/netdevsim/peer.sh index 1bb46ec435d4..7f32b5600925 100755 --- a/tools/testing/selftests/drivers/net/netdevsim/peer.sh +++ b/tools/testing/selftests/drivers/net/netdevsim/peer.sh @@ -1,7 +1,8 @@ #!/bin/bash # SPDX-License-Identifier: GPL-2.0-only -source ../../../net/lib.sh +lib_dir=$(dirname $0)/../../../net +source $lib_dir/lib.sh NSIM_DEV_1_ID=$((256 + RANDOM % 256)) NSIM_DEV_1_SYS=/sys/bus/netdevsim/devices/netdevsim$NSIM_DEV_1_ID diff --git a/tools/testing/selftests/futex/functional/futex_numa_mpol.c b/tools/testing/selftests/futex/functional/futex_numa_mpol.c index 20a9d3ecf743..a9ecfb2d3932 100644 --- a/tools/testing/selftests/futex/functional/futex_numa_mpol.c +++ b/tools/testing/selftests/futex/functional/futex_numa_mpol.c @@ -144,7 +144,7 @@ int main(int argc, char *argv[]) struct futex32_numa *futex_numa; int mem_size, i; void *futex_ptr; - char c; + int c; while ((c = getopt(argc, argv, "chv:")) != -1) { switch (c) { @@ -210,6 +210,10 @@ int main(int argc, char *argv[]) ret = mbind(futex_ptr, mem_size, MPOL_BIND, &nodemask, sizeof(nodemask) * 8, 0); if (ret == 0) { + ret = numa_set_mempolicy_home_node(futex_ptr, mem_size, i, 0); + if (ret != 0) + ksft_exit_fail_msg("Failed to set home node: %m, %d\n", errno); + ksft_print_msg("Node %d test\n", i); futex_numa->futex = 0; futex_numa->numa = FUTEX_NO_NODE; @@ -220,8 +224,8 @@ int main(int argc, char *argv[]) if (0) test_futex_mpol(futex_numa, 0); if (futex_numa->numa != i) { - ksft_test_result_fail("Returned NUMA node is %d expected %d\n", - futex_numa->numa, i); + ksft_exit_fail_msg("Returned NUMA node is %d expected %d\n", + futex_numa->numa, i); } } } diff --git a/tools/testing/selftests/futex/functional/futex_priv_hash.c b/tools/testing/selftests/futex/functional/futex_priv_hash.c index 2dca18fefedc..24a92dc94eb8 100644 --- a/tools/testing/selftests/futex/functional/futex_priv_hash.c +++ b/tools/testing/selftests/futex/functional/futex_priv_hash.c @@ -130,7 +130,7 @@ int main(int argc, char *argv[]) pthread_mutexattr_t mutex_attr_pi; int use_global_hash = 0; int ret; - char c; + int c; while ((c = getopt(argc, argv, "cghv:")) != -1) { switch (c) { diff --git a/tools/testing/selftests/kvm/arm64/arch_timer_edge_cases.c b/tools/testing/selftests/kvm/arm64/arch_timer_edge_cases.c index a36a7e2db434..4e71740a098b 100644 --- a/tools/testing/selftests/kvm/arm64/arch_timer_edge_cases.c +++ b/tools/testing/selftests/kvm/arm64/arch_timer_edge_cases.c @@ -22,7 +22,8 @@ #include "gic.h" #include "vgic.h" -static const uint64_t CVAL_MAX = ~0ULL; +/* Depends on counter width. */ +static uint64_t CVAL_MAX; /* tval is a signed 32-bit int. */ static const int32_t TVAL_MAX = INT32_MAX; static const int32_t TVAL_MIN = INT32_MIN; @@ -30,8 +31,8 @@ static const int32_t TVAL_MIN = INT32_MIN; /* After how much time we say there is no IRQ. */ static const uint32_t TIMEOUT_NO_IRQ_US = 50000; -/* A nice counter value to use as the starting one for most tests. */ -static const uint64_t DEF_CNT = (CVAL_MAX / 2); +/* Counter value to use as the starting one for most tests. Set to CVAL_MAX/2 */ +static uint64_t DEF_CNT; /* Number of runs. */ static const uint32_t NR_TEST_ITERS_DEF = 5; @@ -191,8 +192,8 @@ static void set_tval_irq(enum arch_timer timer, uint64_t tval_cycles, { atomic_set(&shared_data.handled, 0); atomic_set(&shared_data.spurious, 0); - timer_set_ctl(timer, ctl); timer_set_tval(timer, tval_cycles); + timer_set_ctl(timer, ctl); } static void set_xval_irq(enum arch_timer timer, uint64_t xval, uint32_t ctl, @@ -732,12 +733,6 @@ static void test_move_counters_ahead_of_timers(enum arch_timer timer) test_set_cnt_after_tval(timer, 0, tval, (uint64_t) tval + 1, wm); } - - for (i = 0; i < ARRAY_SIZE(sleep_method); i++) { - sleep_method_t sm = sleep_method[i]; - - test_set_cnt_after_cval_no_irq(timer, 0, DEF_CNT, CVAL_MAX, sm); - } } /* @@ -849,17 +844,17 @@ static void guest_code(enum arch_timer timer) GUEST_DONE(); } +static cpu_set_t default_cpuset; + static uint32_t next_pcpu(void) { uint32_t max = get_nprocs(); uint32_t cur = sched_getcpu(); uint32_t next = cur; - cpu_set_t cpuset; + cpu_set_t cpuset = default_cpuset; TEST_ASSERT(max > 1, "Need at least two physical cpus"); - sched_getaffinity(0, sizeof(cpuset), &cpuset); - do { next = (next + 1) % CPU_SETSIZE; } while (!CPU_ISSET(next, &cpuset)); @@ -959,6 +954,8 @@ static void test_init_timer_irq(struct kvm_vm *vm, struct kvm_vcpu *vcpu) pr_debug("ptimer_irq: %d; vtimer_irq: %d\n", ptimer_irq, vtimer_irq); } +static int gic_fd; + static void test_vm_create(struct kvm_vm **vm, struct kvm_vcpu **vcpu, enum arch_timer timer) { @@ -973,8 +970,18 @@ static void test_vm_create(struct kvm_vm **vm, struct kvm_vcpu **vcpu, vcpu_args_set(*vcpu, 1, timer); test_init_timer_irq(*vm, *vcpu); - vgic_v3_setup(*vm, 1, 64); + gic_fd = vgic_v3_setup(*vm, 1, 64); + __TEST_REQUIRE(gic_fd >= 0, "Failed to create vgic-v3"); + sync_global_to_guest(*vm, test_args); + sync_global_to_guest(*vm, CVAL_MAX); + sync_global_to_guest(*vm, DEF_CNT); +} + +static void test_vm_cleanup(struct kvm_vm *vm) +{ + close(gic_fd); + kvm_vm_free(vm); } static void test_print_help(char *name) @@ -986,7 +993,7 @@ static void test_print_help(char *name) pr_info("\t-b: Test both physical and virtual timers (default: true)\n"); pr_info("\t-l: Delta (in ms) used for long wait time test (default: %u)\n", LONG_WAIT_TEST_MS); - pr_info("\t-l: Delta (in ms) used for wait times (default: %u)\n", + pr_info("\t-w: Delta (in ms) used for wait times (default: %u)\n", WAIT_TEST_MS); pr_info("\t-p: Test physical timer (default: true)\n"); pr_info("\t-v: Test virtual timer (default: true)\n"); @@ -1035,6 +1042,17 @@ static bool parse_args(int argc, char *argv[]) return false; } +static void set_counter_defaults(void) +{ + const uint64_t MIN_ROLLOVER_SECS = 40ULL * 365 * 24 * 3600; + uint64_t freq = read_sysreg(CNTFRQ_EL0); + uint64_t width = ilog2(MIN_ROLLOVER_SECS * freq); + + width = clamp(width, 56, 64); + CVAL_MAX = GENMASK_ULL(width - 1, 0); + DEF_CNT = CVAL_MAX / 2; +} + int main(int argc, char *argv[]) { struct kvm_vcpu *vcpu; @@ -1046,16 +1064,19 @@ int main(int argc, char *argv[]) if (!parse_args(argc, argv)) exit(KSFT_SKIP); + sched_getaffinity(0, sizeof(default_cpuset), &default_cpuset); + set_counter_defaults(); + if (test_args.test_virtual) { test_vm_create(&vm, &vcpu, VIRTUAL); test_run(vm, vcpu); - kvm_vm_free(vm); + test_vm_cleanup(vm); } if (test_args.test_physical) { test_vm_create(&vm, &vcpu, PHYSICAL); test_run(vm, vcpu); - kvm_vm_free(vm); + test_vm_cleanup(vm); } return 0; diff --git a/tools/testing/selftests/mm/config b/tools/testing/selftests/mm/config index a28baa536332..deba93379c80 100644 --- a/tools/testing/selftests/mm/config +++ b/tools/testing/selftests/mm/config @@ -8,3 +8,6 @@ CONFIG_GUP_TEST=y CONFIG_TRANSPARENT_HUGEPAGE=y CONFIG_MEM_SOFT_DIRTY=y CONFIG_ANON_VMA_NAME=y +CONFIG_FTRACE=y +CONFIG_PROFILING=y +CONFIG_UPROBES=y diff --git a/tools/testing/selftests/mm/gup_longterm.c b/tools/testing/selftests/mm/gup_longterm.c index 8a97ac5176a4..29047d2e0c49 100644 --- a/tools/testing/selftests/mm/gup_longterm.c +++ b/tools/testing/selftests/mm/gup_longterm.c @@ -298,8 +298,11 @@ static void run_with_memfd(test_fn fn, const char *desc) log_test_start("%s ... with memfd", desc); fd = memfd_create("test", 0); - if (fd < 0) + if (fd < 0) { ksft_print_msg("memfd_create() failed (%s)\n", strerror(errno)); + log_test_result(KSFT_SKIP); + return; + } fn(fd, pagesize); close(fd); @@ -366,6 +369,8 @@ static void run_with_memfd_hugetlb(test_fn fn, const char *desc, fd = memfd_create("test", flags); if (fd < 0) { ksft_print_msg("memfd_create() failed (%s)\n", strerror(errno)); + log_test_result(KSFT_SKIP); + return; } fn(fd, hugetlbsize); diff --git a/tools/testing/selftests/mm/merge.c b/tools/testing/selftests/mm/merge.c index bbae66fc5038..cc26480098ae 100644 --- a/tools/testing/selftests/mm/merge.c +++ b/tools/testing/selftests/mm/merge.c @@ -470,7 +470,9 @@ TEST_F(merge, handle_uprobe_upon_merged_vma) ASSERT_GE(fd, 0); ASSERT_EQ(ftruncate(fd, page_size), 0); - ASSERT_EQ(read_sysfs("/sys/bus/event_source/devices/uprobe/type", &type), 0); + if (read_sysfs("/sys/bus/event_source/devices/uprobe/type", &type) != 0) { + SKIP(goto out, "Failed to read uprobe sysfs file, skipping"); + } memset(&attr, 0, attr_sz); attr.size = attr_sz; @@ -491,6 +493,7 @@ TEST_F(merge, handle_uprobe_upon_merged_vma) ASSERT_NE(mremap(ptr2, page_size, page_size, MREMAP_MAYMOVE | MREMAP_FIXED, ptr1), MAP_FAILED); +out: close(fd); remove(probe_file); } diff --git a/tools/testing/selftests/mm/settings b/tools/testing/selftests/mm/settings index a953c96aa16e..e2206265f67c 100644 --- a/tools/testing/selftests/mm/settings +++ b/tools/testing/selftests/mm/settings @@ -1 +1 @@ -timeout=180 +timeout=900 diff --git a/tools/testing/selftests/net/.gitignore b/tools/testing/selftests/net/.gitignore index 532bb732bc6d..c6dd2a335cf4 100644 --- a/tools/testing/selftests/net/.gitignore +++ b/tools/testing/selftests/net/.gitignore @@ -50,6 +50,7 @@ tap tcp_fastopen_backup_key tcp_inq tcp_mmap +tfo timestamping tls toeplitz diff --git a/tools/testing/selftests/net/Makefile b/tools/testing/selftests/net/Makefile index ea84b88bcb30..332f387615d7 100644 --- a/tools/testing/selftests/net/Makefile +++ b/tools/testing/selftests/net/Makefile @@ -27,6 +27,7 @@ TEST_PROGS += amt.sh TEST_PROGS += unicast_extensions.sh TEST_PROGS += udpgro_fwd.sh TEST_PROGS += udpgro_frglist.sh +TEST_PROGS += nat6to4.sh TEST_PROGS += veth.sh TEST_PROGS += ioam6.sh TEST_PROGS += gro.sh @@ -109,6 +110,8 @@ TEST_GEN_PROGS += proc_net_pktgen TEST_PROGS += lwt_dst_cache_ref_loop.sh TEST_PROGS += skf_net_off.sh TEST_GEN_FILES += skf_net_off +TEST_GEN_FILES += tfo +TEST_PROGS += tfo_passive.sh # YNL files, must be before "include ..lib.mk" YNL_GEN_FILES := busy_poller netlink-dumps diff --git a/tools/testing/selftests/net/af_unix/msg_oob.c b/tools/testing/selftests/net/af_unix/msg_oob.c index 3ed3882a93b8..b5f474969917 100644 --- a/tools/testing/selftests/net/af_unix/msg_oob.c +++ b/tools/testing/selftests/net/af_unix/msg_oob.c @@ -210,7 +210,7 @@ static void __sendpair(struct __test_metadata *_metadata, static void __recvpair(struct __test_metadata *_metadata, FIXTURE_DATA(msg_oob) *self, const char *expected_buf, int expected_len, - int buf_len, int flags) + int buf_len, int flags, bool is_sender) { int i, ret[2], recv_errno[2], expected_errno = 0; char recv_buf[2][BUF_SZ] = {}; @@ -221,7 +221,9 @@ static void __recvpair(struct __test_metadata *_metadata, errno = 0; for (i = 0; i < 2; i++) { - ret[i] = recv(self->fd[i * 2 + 1], recv_buf[i], buf_len, flags); + int index = is_sender ? i * 2 : i * 2 + 1; + + ret[i] = recv(self->fd[index], recv_buf[i], buf_len, flags); recv_errno[i] = errno; } @@ -308,6 +310,20 @@ static void __siocatmarkpair(struct __test_metadata *_metadata, ASSERT_EQ(answ[0], answ[1]); } +static void __resetpair(struct __test_metadata *_metadata, + FIXTURE_DATA(msg_oob) *self, + const FIXTURE_VARIANT(msg_oob) *variant, + bool reset) +{ + int i; + + for (i = 0; i < 2; i++) + close(self->fd[i * 2 + 1]); + + __recvpair(_metadata, self, "", reset ? -ECONNRESET : 0, 1, + variant->peek ? MSG_PEEK : 0, true); +} + #define sendpair(buf, len, flags) \ __sendpair(_metadata, self, buf, len, flags) @@ -316,9 +332,10 @@ static void __siocatmarkpair(struct __test_metadata *_metadata, if (variant->peek) \ __recvpair(_metadata, self, \ expected_buf, expected_len, \ - buf_len, (flags) | MSG_PEEK); \ + buf_len, (flags) | MSG_PEEK, false); \ __recvpair(_metadata, self, \ - expected_buf, expected_len, buf_len, flags); \ + expected_buf, expected_len, \ + buf_len, flags, false); \ } while (0) #define epollpair(oob_remaining) \ @@ -330,6 +347,9 @@ static void __siocatmarkpair(struct __test_metadata *_metadata, #define setinlinepair() \ __setinlinepair(_metadata, self) +#define resetpair(reset) \ + __resetpair(_metadata, self, variant, reset) + #define tcp_incompliant \ for (self->tcp_compliant = false; \ self->tcp_compliant == false; \ @@ -344,6 +364,21 @@ TEST_F(msg_oob, non_oob) recvpair("", -EINVAL, 1, MSG_OOB); epollpair(false); siocatmarkpair(false); + + resetpair(true); +} + +TEST_F(msg_oob, non_oob_no_reset) +{ + sendpair("x", 1, 0); + epollpair(false); + siocatmarkpair(false); + + recvpair("x", 1, 1, 0); + epollpair(false); + siocatmarkpair(false); + + resetpair(false); } TEST_F(msg_oob, oob) @@ -355,6 +390,19 @@ TEST_F(msg_oob, oob) recvpair("x", 1, 1, MSG_OOB); epollpair(false); siocatmarkpair(true); + + tcp_incompliant { + resetpair(false); /* TCP sets -ECONNRESET for ex-OOB. */ + } +} + +TEST_F(msg_oob, oob_reset) +{ + sendpair("x", 1, MSG_OOB); + epollpair(true); + siocatmarkpair(true); + + resetpair(true); } TEST_F(msg_oob, oob_drop) @@ -370,6 +418,8 @@ TEST_F(msg_oob, oob_drop) recvpair("", -EINVAL, 1, MSG_OOB); epollpair(false); siocatmarkpair(false); + + resetpair(false); } TEST_F(msg_oob, oob_ahead) @@ -385,6 +435,10 @@ TEST_F(msg_oob, oob_ahead) recvpair("hell", 4, 4, 0); epollpair(false); siocatmarkpair(true); + + tcp_incompliant { + resetpair(false); /* TCP sets -ECONNRESET for ex-OOB. */ + } } TEST_F(msg_oob, oob_break) @@ -403,6 +457,8 @@ TEST_F(msg_oob, oob_break) recvpair("", -EAGAIN, 1, 0); siocatmarkpair(false); + + resetpair(false); } TEST_F(msg_oob, oob_ahead_break) @@ -426,6 +482,8 @@ TEST_F(msg_oob, oob_ahead_break) recvpair("world", 5, 5, 0); epollpair(false); siocatmarkpair(false); + + resetpair(false); } TEST_F(msg_oob, oob_break_drop) @@ -449,6 +507,8 @@ TEST_F(msg_oob, oob_break_drop) recvpair("", -EINVAL, 1, MSG_OOB); epollpair(false); siocatmarkpair(false); + + resetpair(false); } TEST_F(msg_oob, ex_oob_break) @@ -476,6 +536,8 @@ TEST_F(msg_oob, ex_oob_break) recvpair("ld", 2, 2, 0); epollpair(false); siocatmarkpair(false); + + resetpair(false); } TEST_F(msg_oob, ex_oob_drop) @@ -498,6 +560,8 @@ TEST_F(msg_oob, ex_oob_drop) epollpair(false); siocatmarkpair(true); } + + resetpair(false); } TEST_F(msg_oob, ex_oob_drop_2) @@ -523,6 +587,8 @@ TEST_F(msg_oob, ex_oob_drop_2) epollpair(false); siocatmarkpair(true); } + + resetpair(false); } TEST_F(msg_oob, ex_oob_oob) @@ -546,6 +612,54 @@ TEST_F(msg_oob, ex_oob_oob) recvpair("", -EINVAL, 1, MSG_OOB); epollpair(false); siocatmarkpair(false); + + resetpair(false); +} + +TEST_F(msg_oob, ex_oob_ex_oob) +{ + sendpair("x", 1, MSG_OOB); + epollpair(true); + siocatmarkpair(true); + + recvpair("x", 1, 1, MSG_OOB); + epollpair(false); + siocatmarkpair(true); + + sendpair("y", 1, MSG_OOB); + epollpair(true); + siocatmarkpair(true); + + recvpair("y", 1, 1, MSG_OOB); + epollpair(false); + siocatmarkpair(true); + + tcp_incompliant { + resetpair(false); /* TCP sets -ECONNRESET for ex-OOB. */ + } +} + +TEST_F(msg_oob, ex_oob_ex_oob_oob) +{ + sendpair("x", 1, MSG_OOB); + epollpair(true); + siocatmarkpair(true); + + recvpair("x", 1, 1, MSG_OOB); + epollpair(false); + siocatmarkpair(true); + + sendpair("y", 1, MSG_OOB); + epollpair(true); + siocatmarkpair(true); + + recvpair("y", 1, 1, MSG_OOB); + epollpair(false); + siocatmarkpair(true); + + sendpair("z", 1, MSG_OOB); + epollpair(true); + siocatmarkpair(true); } TEST_F(msg_oob, ex_oob_ahead_break) @@ -576,6 +690,10 @@ TEST_F(msg_oob, ex_oob_ahead_break) recvpair("d", 1, 1, MSG_OOB); epollpair(false); siocatmarkpair(true); + + tcp_incompliant { + resetpair(false); /* TCP sets -ECONNRESET for ex-OOB. */ + } } TEST_F(msg_oob, ex_oob_siocatmark) @@ -595,6 +713,8 @@ TEST_F(msg_oob, ex_oob_siocatmark) recvpair("hell", 4, 4, 0); /* Intentionally stop at ex-OOB. */ epollpair(true); siocatmarkpair(false); + + resetpair(true); } TEST_F(msg_oob, inline_oob) @@ -612,6 +732,8 @@ TEST_F(msg_oob, inline_oob) recvpair("x", 1, 1, 0); epollpair(false); siocatmarkpair(false); + + resetpair(false); } TEST_F(msg_oob, inline_oob_break) @@ -633,6 +755,8 @@ TEST_F(msg_oob, inline_oob_break) recvpair("o", 1, 1, 0); epollpair(false); siocatmarkpair(false); + + resetpair(false); } TEST_F(msg_oob, inline_oob_ahead_break) @@ -661,6 +785,8 @@ TEST_F(msg_oob, inline_oob_ahead_break) epollpair(false); siocatmarkpair(false); + + resetpair(false); } TEST_F(msg_oob, inline_ex_oob_break) @@ -686,6 +812,8 @@ TEST_F(msg_oob, inline_ex_oob_break) recvpair("rld", 3, 3, 0); epollpair(false); siocatmarkpair(false); + + resetpair(false); } TEST_F(msg_oob, inline_ex_oob_no_drop) @@ -707,6 +835,8 @@ TEST_F(msg_oob, inline_ex_oob_no_drop) recvpair("y", 1, 1, 0); epollpair(false); siocatmarkpair(false); + + resetpair(false); } TEST_F(msg_oob, inline_ex_oob_drop) @@ -731,6 +861,8 @@ TEST_F(msg_oob, inline_ex_oob_drop) epollpair(false); siocatmarkpair(false); } + + resetpair(false); } TEST_F(msg_oob, inline_ex_oob_siocatmark) @@ -752,6 +884,8 @@ TEST_F(msg_oob, inline_ex_oob_siocatmark) recvpair("hell", 4, 4, 0); /* Intentionally stop at ex-OOB. */ epollpair(true); siocatmarkpair(false); + + resetpair(true); } TEST_HARNESS_MAIN diff --git a/tools/testing/selftests/net/nat6to4.sh b/tools/testing/selftests/net/nat6to4.sh new file mode 100755 index 000000000000..0ee859b622a4 --- /dev/null +++ b/tools/testing/selftests/net/nat6to4.sh @@ -0,0 +1,15 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +NS="ns-peer-$(mktemp -u XXXXXX)" + +ip netns add "${NS}" +ip -netns "${NS}" link set lo up +ip -netns "${NS}" route add default via 127.0.0.2 dev lo + +tc -n "${NS}" qdisc add dev lo ingress +tc -n "${NS}" filter add dev lo ingress prio 4 protocol ip \ + bpf object-file nat6to4.bpf.o section schedcls/egress4/snat4 direct-action + +ip netns exec "${NS}" \ + bash -c 'echo 012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789abc | socat - UDP4-DATAGRAM:224.1.0.1:6666,ip-multicast-loop=1' diff --git a/tools/testing/selftests/net/tfo.c b/tools/testing/selftests/net/tfo.c new file mode 100644 index 000000000000..eb3cac5e583c --- /dev/null +++ b/tools/testing/selftests/net/tfo.c @@ -0,0 +1,171 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <error.h> +#include <fcntl.h> +#include <limits.h> +#include <stdbool.h> +#include <stdint.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <unistd.h> +#include <arpa/inet.h> +#include <sys/socket.h> +#include <netinet/tcp.h> +#include <errno.h> + +static int cfg_server; +static int cfg_client; +static int cfg_port = 8000; +static struct sockaddr_in6 cfg_addr; +static char *cfg_outfile; + +static int parse_address(const char *str, int port, struct sockaddr_in6 *sin6) +{ + int ret; + + sin6->sin6_family = AF_INET6; + sin6->sin6_port = htons(port); + + ret = inet_pton(sin6->sin6_family, str, &sin6->sin6_addr); + if (ret != 1) { + /* fallback to plain IPv4 */ + ret = inet_pton(AF_INET, str, &sin6->sin6_addr.s6_addr32[3]); + if (ret != 1) + return -1; + + /* add ::ffff prefix */ + sin6->sin6_addr.s6_addr32[0] = 0; + sin6->sin6_addr.s6_addr32[1] = 0; + sin6->sin6_addr.s6_addr16[4] = 0; + sin6->sin6_addr.s6_addr16[5] = 0xffff; + } + + return 0; +} + +static void run_server(void) +{ + unsigned long qlen = 32; + int fd, opt, connfd; + socklen_t len; + char buf[64]; + FILE *outfile; + + outfile = fopen(cfg_outfile, "w"); + if (!outfile) + error(1, errno, "fopen() outfile"); + + fd = socket(AF_INET6, SOCK_STREAM, 0); + if (fd == -1) + error(1, errno, "socket()"); + + opt = 1; + if (setsockopt(fd, SOL_SOCKET, SO_REUSEADDR, &opt, sizeof(opt)) < 0) + error(1, errno, "setsockopt(SO_REUSEADDR)"); + + if (setsockopt(fd, SOL_TCP, TCP_FASTOPEN, &qlen, sizeof(qlen)) < 0) + error(1, errno, "setsockopt(TCP_FASTOPEN)"); + + if (bind(fd, (struct sockaddr *)&cfg_addr, sizeof(cfg_addr)) < 0) + error(1, errno, "bind()"); + + if (listen(fd, 5) < 0) + error(1, errno, "listen()"); + + len = sizeof(cfg_addr); + connfd = accept(fd, (struct sockaddr *)&cfg_addr, &len); + if (connfd < 0) + error(1, errno, "accept()"); + + len = sizeof(opt); + if (getsockopt(connfd, SOL_SOCKET, SO_INCOMING_NAPI_ID, &opt, &len) < 0) + error(1, errno, "getsockopt(SO_INCOMING_NAPI_ID)"); + + read(connfd, buf, 64); + fprintf(outfile, "%d\n", opt); + + fclose(outfile); + close(connfd); + close(fd); +} + +static void run_client(void) +{ + int fd; + char *msg = "Hello, world!"; + + fd = socket(AF_INET6, SOCK_STREAM, 0); + if (fd == -1) + error(1, errno, "socket()"); + + sendto(fd, msg, strlen(msg), MSG_FASTOPEN, (struct sockaddr *)&cfg_addr, sizeof(cfg_addr)); + + close(fd); +} + +static void usage(const char *filepath) +{ + error(1, 0, "Usage: %s (-s|-c) -h<server_ip> -p<port> -o<outfile> ", filepath); +} + +static void parse_opts(int argc, char **argv) +{ + struct sockaddr_in6 *addr6 = (void *) &cfg_addr; + char *addr = NULL; + int ret; + int c; + + if (argc <= 1) + usage(argv[0]); + + while ((c = getopt(argc, argv, "sch:p:o:")) != -1) { + switch (c) { + case 's': + if (cfg_client) + error(1, 0, "Pass one of -s or -c"); + cfg_server = 1; + break; + case 'c': + if (cfg_server) + error(1, 0, "Pass one of -s or -c"); + cfg_client = 1; + break; + case 'h': + addr = optarg; + break; + case 'p': + cfg_port = strtoul(optarg, NULL, 0); + break; + case 'o': + cfg_outfile = strdup(optarg); + if (!cfg_outfile) + error(1, 0, "outfile invalid"); + break; + } + } + + if (cfg_server && addr) + error(1, 0, "Server cannot have -h specified"); + + memset(addr6, 0, sizeof(*addr6)); + addr6->sin6_family = AF_INET6; + addr6->sin6_port = htons(cfg_port); + addr6->sin6_addr = in6addr_any; + if (addr) { + ret = parse_address(addr, cfg_port, addr6); + if (ret) + error(1, 0, "Client address parse error: %s", addr); + } +} + +int main(int argc, char **argv) +{ + parse_opts(argc, argv); + + if (cfg_server) + run_server(); + else if (cfg_client) + run_client(); + + return 0; +} diff --git a/tools/testing/selftests/net/tfo_passive.sh b/tools/testing/selftests/net/tfo_passive.sh new file mode 100755 index 000000000000..80bf11fdc046 --- /dev/null +++ b/tools/testing/selftests/net/tfo_passive.sh @@ -0,0 +1,112 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +source lib.sh + +NSIM_SV_ID=$((256 + RANDOM % 256)) +NSIM_SV_SYS=/sys/bus/netdevsim/devices/netdevsim$NSIM_SV_ID +NSIM_CL_ID=$((512 + RANDOM % 256)) +NSIM_CL_SYS=/sys/bus/netdevsim/devices/netdevsim$NSIM_CL_ID + +NSIM_DEV_SYS_NEW=/sys/bus/netdevsim/new_device +NSIM_DEV_SYS_DEL=/sys/bus/netdevsim/del_device +NSIM_DEV_SYS_LINK=/sys/bus/netdevsim/link_device +NSIM_DEV_SYS_UNLINK=/sys/bus/netdevsim/unlink_device + +SERVER_IP=192.168.1.1 +CLIENT_IP=192.168.1.2 +SERVER_PORT=48675 + +setup_ns() +{ + set -e + ip netns add nssv + ip netns add nscl + + NSIM_SV_NAME=$(find $NSIM_SV_SYS/net -maxdepth 1 -type d ! \ + -path $NSIM_SV_SYS/net -exec basename {} \;) + NSIM_CL_NAME=$(find $NSIM_CL_SYS/net -maxdepth 1 -type d ! \ + -path $NSIM_CL_SYS/net -exec basename {} \;) + + ip link set $NSIM_SV_NAME netns nssv + ip link set $NSIM_CL_NAME netns nscl + + ip netns exec nssv ip addr add "${SERVER_IP}/24" dev $NSIM_SV_NAME + ip netns exec nscl ip addr add "${CLIENT_IP}/24" dev $NSIM_CL_NAME + + ip netns exec nssv ip link set dev $NSIM_SV_NAME up + ip netns exec nscl ip link set dev $NSIM_CL_NAME up + + # Enable passive TFO + ip netns exec nssv sysctl -w net.ipv4.tcp_fastopen=519 > /dev/null + + set +e +} + +cleanup_ns() +{ + ip netns del nscl + ip netns del nssv +} + +### +### Code start +### + +modprobe netdevsim + +# linking + +echo $NSIM_SV_ID > $NSIM_DEV_SYS_NEW +echo $NSIM_CL_ID > $NSIM_DEV_SYS_NEW +udevadm settle + +setup_ns + +NSIM_SV_FD=$((256 + RANDOM % 256)) +exec {NSIM_SV_FD}</var/run/netns/nssv +NSIM_SV_IFIDX=$(ip netns exec nssv cat /sys/class/net/$NSIM_SV_NAME/ifindex) + +NSIM_CL_FD=$((256 + RANDOM % 256)) +exec {NSIM_CL_FD}</var/run/netns/nscl +NSIM_CL_IFIDX=$(ip netns exec nscl cat /sys/class/net/$NSIM_CL_NAME/ifindex) + +echo "$NSIM_SV_FD:$NSIM_SV_IFIDX $NSIM_CL_FD:$NSIM_CL_IFIDX" > \ + $NSIM_DEV_SYS_LINK + +if [ $? -ne 0 ]; then + echo "linking netdevsim1 with netdevsim2 should succeed" + cleanup_ns + exit 1 +fi + +out_file=$(mktemp) + +timeout -k 1s 30s ip netns exec nssv ./tfo \ + -s \ + -p ${SERVER_PORT} \ + -o ${out_file}& + +wait_local_port_listen nssv ${SERVER_PORT} tcp + +ip netns exec nscl ./tfo -c -h ${SERVER_IP} -p ${SERVER_PORT} + +wait + +res=$(cat $out_file) +rm $out_file + +if [ $res -eq 0 ]; then + echo "got invalid NAPI ID from passive TFO socket" + cleanup_ns + exit 1 +fi + +echo "$NSIM_SV_FD:$NSIM_SV_IFIDX" > $NSIM_DEV_SYS_UNLINK + +echo $NSIM_CL_ID > $NSIM_DEV_SYS_DEL + +cleanup_ns + +modprobe -r netdevsim + +exit 0 diff --git a/tools/testing/selftests/x86/Makefile b/tools/testing/selftests/x86/Makefile index f703fcfe9f7c..83148875a12c 100644 --- a/tools/testing/selftests/x86/Makefile +++ b/tools/testing/selftests/x86/Makefile @@ -12,7 +12,7 @@ CAN_BUILD_WITH_NOPIE := $(shell ./check_cc.sh "$(CC)" trivial_program.c -no-pie) TARGETS_C_BOTHBITS := single_step_syscall sysret_ss_attrs syscall_nt test_mremap_vdso \ check_initial_reg_state sigreturn iopl ioperm \ - test_vsyscall mov_ss_trap \ + test_vsyscall mov_ss_trap sigtrap_loop \ syscall_arg_fault fsgsbase_restore sigaltstack TARGETS_C_BOTHBITS += nx_stack TARGETS_C_32BIT_ONLY := entry_from_vm86 test_syscall_vdso unwind_vdso \ diff --git a/tools/testing/selftests/x86/sigtrap_loop.c b/tools/testing/selftests/x86/sigtrap_loop.c new file mode 100644 index 000000000000..9d065479e89f --- /dev/null +++ b/tools/testing/selftests/x86/sigtrap_loop.c @@ -0,0 +1,101 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (C) 2025 Intel Corporation + */ +#define _GNU_SOURCE + +#include <err.h> +#include <signal.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <sys/ucontext.h> + +#ifdef __x86_64__ +# define REG_IP REG_RIP +#else +# define REG_IP REG_EIP +#endif + +static void sethandler(int sig, void (*handler)(int, siginfo_t *, void *), int flags) +{ + struct sigaction sa; + + memset(&sa, 0, sizeof(sa)); + sa.sa_sigaction = handler; + sa.sa_flags = SA_SIGINFO | flags; + sigemptyset(&sa.sa_mask); + + if (sigaction(sig, &sa, 0)) + err(1, "sigaction"); + + return; +} + +static void sigtrap(int sig, siginfo_t *info, void *ctx_void) +{ + ucontext_t *ctx = (ucontext_t *)ctx_void; + static unsigned int loop_count_on_same_ip; + static unsigned long last_trap_ip; + + if (last_trap_ip == ctx->uc_mcontext.gregs[REG_IP]) { + printf("\tTrapped at %016lx\n", last_trap_ip); + + /* + * If the same IP is hit more than 10 times in a row, it is + * _considered_ an infinite loop. + */ + if (++loop_count_on_same_ip > 10) { + printf("[FAIL]\tDetected SIGTRAP infinite loop\n"); + exit(1); + } + + return; + } + + loop_count_on_same_ip = 0; + last_trap_ip = ctx->uc_mcontext.gregs[REG_IP]; + printf("\tTrapped at %016lx\n", last_trap_ip); +} + +int main(int argc, char *argv[]) +{ + sethandler(SIGTRAP, sigtrap, 0); + + /* + * Set the Trap Flag (TF) to single-step the test code, therefore to + * trigger a SIGTRAP signal after each instruction until the TF is + * cleared. + * + * Because the arithmetic flags are not significant here, the TF is + * set by pushing 0x302 onto the stack and then popping it into the + * flags register. + * + * Four instructions in the following asm code are executed with the + * TF set, thus the SIGTRAP handler is expected to run four times. + */ + printf("[RUN]\tSIGTRAP infinite loop detection\n"); + asm volatile( +#ifdef __x86_64__ + /* + * Avoid clobbering the redzone + * + * Equivalent to "sub $128, %rsp", however -128 can be encoded + * in a single byte immediate while 128 uses 4 bytes. + */ + "add $-128, %rsp\n\t" +#endif + "push $0x302\n\t" + "popf\n\t" + "nop\n\t" + "nop\n\t" + "push $0x202\n\t" + "popf\n\t" +#ifdef __x86_64__ + "sub $-128, %rsp\n\t" +#endif + ); + + printf("[OK]\tNo SIGTRAP infinite loop detected\n"); + return 0; +} diff --git a/tools/testing/vma/vma_internal.h b/tools/testing/vma/vma_internal.h index 4505b1c31be1..14718ca23a05 100644 --- a/tools/testing/vma/vma_internal.h +++ b/tools/testing/vma/vma_internal.h @@ -159,6 +159,14 @@ typedef __bitwise unsigned int vm_fault_t; #define ASSERT_EXCLUSIVE_WRITER(x) +/** + * swap - swap values of @a and @b + * @a: first value + * @b: second value + */ +#define swap(a, b) \ + do { typeof(a) __tmp = (a); (a) = (b); (b) = __tmp; } while (0) + struct kref { refcount_t refcount; }; @@ -1468,4 +1476,12 @@ static inline void fixup_hugetlb_reservations(struct vm_area_struct *vma) (void)vma; } +static inline void vma_set_file(struct vm_area_struct *vma, struct file *file) +{ + /* Changing an anonymous vma with this is illegal */ + get_file(file); + swap(vma->vm_file, file); + fput(file); +} + #endif /* __MM_VMA_INTERNAL_H */ |