diff options
1012 files changed, 12187 insertions, 8653 deletions
@@ -416,6 +416,7 @@ Kenneth W Chen <kenneth.w.chen@intel.com> Kenneth Westfield <quic_kwestfie@quicinc.com> <kwestfie@codeaurora.org> Kiran Gunda <quic_kgunda@quicinc.com> <kgunda@codeaurora.org> Kirill Tkhai <tkhai@ya.ru> <ktkhai@virtuozzo.com> +Kirill A. Shutemov <kas@kernel.org> <kirill.shutemov@linux.intel.com> Kishon Vijay Abraham I <kishon@kernel.org> <kishon@ti.com> Konrad Dybcio <konradybcio@kernel.org> <konrad.dybcio@linaro.org> Konrad Dybcio <konradybcio@kernel.org> <konrad.dybcio@somainline.org> @@ -693,6 +694,10 @@ Sedat Dilek <sedat.dilek@gmail.com> <sedat.dilek@credativ.de> Senthilkumar N L <quic_snlakshm@quicinc.com> <snlakshm@codeaurora.org> Serge Hallyn <sergeh@kernel.org> <serge.hallyn@canonical.com> Serge Hallyn <sergeh@kernel.org> <serue@us.ibm.com> +Sergey Senozhatsky <senozhatsky@chromium.org> <sergey.senozhatsky.work@gmail.com> +Sergey Senozhatsky <senozhatsky@chromium.org> <sergey.senozhatsky@gmail.com> +Sergey Senozhatsky <senozhatsky@chromium.org> <sergey.senozhatsky@mail.by> +Sergey Senozhatsky <senozhatsky@chromium.org> <senozhatsky@google.com> Seth Forshee <sforshee@kernel.org> <seth.forshee@canonical.com> Shakeel Butt <shakeel.butt@linux.dev> <shakeelb@google.com> Shannon Nelson <sln@onemain.com> <shannon.nelson@amd.com> @@ -1397,6 +1397,10 @@ N: Thomas Gleixner E: tglx@linutronix.de D: NAND flash hardware support, JFFS2 on NAND flash +N: Jérôme Glisse +E: jglisse@redhat.com +D: HMM - Heterogeneous Memory Management + N: Richard E. Gooch E: rgooch@atnf.csiro.au D: parent process death signal to children diff --git a/Documentation/ABI/testing/sysfs-devices-system-cpu b/Documentation/ABI/testing/sysfs-devices-system-cpu index bf85f4de6862..ab8cd337f43a 100644 --- a/Documentation/ABI/testing/sysfs-devices-system-cpu +++ b/Documentation/ABI/testing/sysfs-devices-system-cpu @@ -584,6 +584,7 @@ What: /sys/devices/system/cpu/vulnerabilities /sys/devices/system/cpu/vulnerabilities/spectre_v1 /sys/devices/system/cpu/vulnerabilities/spectre_v2 /sys/devices/system/cpu/vulnerabilities/srbds + /sys/devices/system/cpu/vulnerabilities/tsa /sys/devices/system/cpu/vulnerabilities/tsx_async_abort Date: January 2018 Contact: Linux kernel mailing list <linux-kernel@vger.kernel.org> diff --git a/Documentation/ABI/testing/sysfs-fs-erofs b/Documentation/ABI/testing/sysfs-fs-erofs index bf3b6299c15e..76d9808ed581 100644 --- a/Documentation/ABI/testing/sysfs-fs-erofs +++ b/Documentation/ABI/testing/sysfs-fs-erofs @@ -5,7 +5,7 @@ Description: Shows all enabled kernel features. Supported features: zero_padding, compr_cfgs, big_pcluster, chunked_file, device_table, compr_head2, sb_chksum, ztailpacking, - dedupe, fragments. + dedupe, fragments, 48bit, metabox. What: /sys/fs/erofs/<disk>/sync_decompress Date: November 2021 @@ -35,3 +35,11 @@ Description: Used to set or show hardware accelerators in effect and multiple accelerators are separated by '\n'. Supported accelerator(s): qat_deflate. Disable all accelerators with an empty string (echo > accel). + +What: /sys/fs/erofs/<disk>/dir_ra_bytes +Date: July 2025 +Contact: "Chao Yu" <chao@kernel.org> +Description: Used to set or show readahead bytes during readdir(), by + default the value is 16384. + + - 0: disable readahead. diff --git a/Documentation/admin-guide/cgroup-v2.rst b/Documentation/admin-guide/cgroup-v2.rst index 0cc35a14afbe..bd98ea3175ec 100644 --- a/Documentation/admin-guide/cgroup-v2.rst +++ b/Documentation/admin-guide/cgroup-v2.rst @@ -1732,12 +1732,6 @@ The following nested keys are defined. numa_hint_faults (npn) Number of NUMA hinting faults. - numa_task_migrated (npn) - Number of task migration by NUMA balancing. - - numa_task_swapped (npn) - Number of task swap by NUMA balancing. - pgdemote_kswapd Number of pages demoted by kswapd. diff --git a/Documentation/admin-guide/hw-vuln/processor_mmio_stale_data.rst b/Documentation/admin-guide/hw-vuln/processor_mmio_stale_data.rst index 1302fd1b55e8..6dba18dbb9ab 100644 --- a/Documentation/admin-guide/hw-vuln/processor_mmio_stale_data.rst +++ b/Documentation/admin-guide/hw-vuln/processor_mmio_stale_data.rst @@ -157,9 +157,7 @@ This is achieved by using the otherwise unused and obsolete VERW instruction in combination with a microcode update. The microcode clears the affected CPU buffers when the VERW instruction is executed. -Kernel reuses the MDS function to invoke the buffer clearing: - - mds_clear_cpu_buffers() +Kernel does the buffer clearing with x86_clear_cpu_buffers(). On MDS affected CPUs, the kernel already invokes CPU buffer clear on kernel/userspace, hypervisor/guest and C-state (idle) transitions. No diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index f1f2c0874da9..07e22ba5bfe3 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -7488,6 +7488,19 @@ having this key zero'ed is acceptable. E.g. in testing scenarios. + tsa= [X86] Control mitigation for Transient Scheduler + Attacks on AMD CPUs. Search the following in your + favourite search engine for more details: + + "Technical guidance for mitigating transient scheduler + attacks". + + off - disable the mitigation + on - enable the mitigation (default) + user - mitigate only user/kernel transitions + vm - mitigate only guest/host transitions + + tsc= Disable clocksource stability checks for TSC. Format: <string> [x86] reliable: mark tsc clocksource as reliable, this diff --git a/Documentation/arch/x86/mds.rst b/Documentation/arch/x86/mds.rst index 5a2e6c0ef04a..3518671e1a85 100644 --- a/Documentation/arch/x86/mds.rst +++ b/Documentation/arch/x86/mds.rst @@ -93,7 +93,7 @@ enters a C-state. The kernel provides a function to invoke the buffer clearing: - mds_clear_cpu_buffers() + x86_clear_cpu_buffers() Also macro CLEAR_CPU_BUFFERS can be used in ASM late in exit-to-user path. Other than CFLAGS.ZF, this macro doesn't clobber any registers. @@ -185,9 +185,9 @@ Mitigation points idle clearing would be a window dressing exercise and is therefore not activated. - The invocation is controlled by the static key mds_idle_clear which is - switched depending on the chosen mitigation mode and the SMT state of - the system. + The invocation is controlled by the static key cpu_buf_idle_clear which is + switched depending on the chosen mitigation mode and the SMT state of the + system. The buffer clear is only invoked before entering the C-State to prevent that stale data from the idling CPU from spilling to the Hyper-Thread diff --git a/Documentation/devicetree/bindings/clock/mediatek,mt8188-clock.yaml b/Documentation/devicetree/bindings/clock/mediatek,mt8188-clock.yaml index 2985c8c717d7..5403242545ab 100644 --- a/Documentation/devicetree/bindings/clock/mediatek,mt8188-clock.yaml +++ b/Documentation/devicetree/bindings/clock/mediatek,mt8188-clock.yaml @@ -52,6 +52,9 @@ properties: '#clock-cells': const: 1 + '#reset-cells': + const: 1 + required: - compatible - reg diff --git a/Documentation/devicetree/bindings/iio/adc/adi,ad7606.yaml b/Documentation/devicetree/bindings/iio/adc/adi,ad7606.yaml index 29f12d650442..1a5209139e13 100644 --- a/Documentation/devicetree/bindings/iio/adc/adi,ad7606.yaml +++ b/Documentation/devicetree/bindings/iio/adc/adi,ad7606.yaml @@ -223,12 +223,6 @@ allOf: - required: - pwms - - oneOf: - - required: - - interrupts - - required: - - io-backends - - if: properties: compatible: diff --git a/Documentation/devicetree/bindings/iio/gyroscope/invensense,mpu3050.yaml b/Documentation/devicetree/bindings/iio/gyroscope/invensense,mpu3050.yaml index d1a6103fc37a..f3242dc0e7e6 100644 --- a/Documentation/devicetree/bindings/iio/gyroscope/invensense,mpu3050.yaml +++ b/Documentation/devicetree/bindings/iio/gyroscope/invensense,mpu3050.yaml @@ -21,7 +21,7 @@ properties: vlogic-supply: true interrupts: - minItems: 1 + maxItems: 1 description: Interrupt mapping for the trigger interrupt from the internal oscillator. diff --git a/Documentation/devicetree/bindings/media/allwinner,sun6i-a31-csi.yaml b/Documentation/devicetree/bindings/media/allwinner,sun6i-a31-csi.yaml index b3d6db922693..1aa5775ba2bc 100644 --- a/Documentation/devicetree/bindings/media/allwinner,sun6i-a31-csi.yaml +++ b/Documentation/devicetree/bindings/media/allwinner,sun6i-a31-csi.yaml @@ -110,7 +110,7 @@ examples: reg = <0x01cb4000 0x1000>; interrupts = <GIC_SPI 84 IRQ_TYPE_LEVEL_HIGH>; clocks = <&ccu CLK_BUS_CSI>, - <&ccu CLK_CSI1_SCLK>, + <&ccu CLK_CSI_SCLK>, <&ccu CLK_DRAM_CSI>; clock-names = "bus", "mod", diff --git a/Documentation/devicetree/bindings/media/allwinner,sun6i-a31-isp.yaml b/Documentation/devicetree/bindings/media/allwinner,sun6i-a31-isp.yaml index a61a76bb611c..3ea4a4290f23 100644 --- a/Documentation/devicetree/bindings/media/allwinner,sun6i-a31-isp.yaml +++ b/Documentation/devicetree/bindings/media/allwinner,sun6i-a31-isp.yaml @@ -79,7 +79,7 @@ examples: reg = <0x01cb8000 0x1000>; interrupts = <GIC_SPI 83 IRQ_TYPE_LEVEL_HIGH>; clocks = <&ccu CLK_BUS_CSI>, - <&ccu CLK_CSI1_SCLK>, + <&ccu CLK_CSI_SCLK>, <&ccu CLK_DRAM_CSI>; clock-names = "bus", "mod", "ram"; resets = <&ccu RST_BUS_CSI>; diff --git a/Documentation/devicetree/bindings/media/allwinner,sun6i-a31-mipi-csi2.yaml b/Documentation/devicetree/bindings/media/allwinner,sun6i-a31-mipi-csi2.yaml index 54e15ab8a7f5..627b28e94354 100644 --- a/Documentation/devicetree/bindings/media/allwinner,sun6i-a31-mipi-csi2.yaml +++ b/Documentation/devicetree/bindings/media/allwinner,sun6i-a31-mipi-csi2.yaml @@ -103,7 +103,7 @@ examples: reg = <0x01cb1000 0x1000>; interrupts = <GIC_SPI 90 IRQ_TYPE_LEVEL_HIGH>; clocks = <&ccu CLK_BUS_CSI>, - <&ccu CLK_CSI1_SCLK>; + <&ccu CLK_CSI_SCLK>; clock-names = "bus", "mod"; resets = <&ccu RST_BUS_CSI>; diff --git a/Documentation/devicetree/bindings/net/allwinner,sun8i-a83t-emac.yaml b/Documentation/devicetree/bindings/net/allwinner,sun8i-a83t-emac.yaml index 7b6a2fde8175..19934d5c24e5 100644 --- a/Documentation/devicetree/bindings/net/allwinner,sun8i-a83t-emac.yaml +++ b/Documentation/devicetree/bindings/net/allwinner,sun8i-a83t-emac.yaml @@ -23,7 +23,7 @@ properties: - allwinner,sun20i-d1-emac - allwinner,sun50i-h6-emac - allwinner,sun50i-h616-emac0 - - allwinner,sun55i-a523-emac0 + - allwinner,sun55i-a523-gmac0 - const: allwinner,sun50i-a64-emac reg: diff --git a/Documentation/filesystems/porting.rst b/Documentation/filesystems/porting.rst index a5734bdd1cc7..bc5a389c0c98 100644 --- a/Documentation/filesystems/porting.rst +++ b/Documentation/filesystems/porting.rst @@ -1258,3 +1258,21 @@ iterator needed. Instead of a cloned mount tree, the new interface returns an array of struct path, one for each mount collect_mounts() would've created. These struct path point to locations in the caller's namespace that would be roots of the cloned mounts. + +--- + +**mandatory** + +If your filesystem sets the default dentry_operations, use set_default_d_op() +rather than manually setting sb->s_d_op. + +--- + +**mandatory** + +d_set_d_op() is no longer exported (or public, for that matter); _if_ +your filesystem really needed that, make use of d_splice_alias_ops() +to have them set. Better yet, think hard whether you need different +->d_op for different dentries - if not, just use set_default_d_op() +at mount time and be done with that. Currently procfs is the only +thing that really needs ->d_op varying between dentries. diff --git a/Documentation/hwmon/ina238.rst b/Documentation/hwmon/ina238.rst index d1b93cf8627f..9a24da4786a4 100644 --- a/Documentation/hwmon/ina238.rst +++ b/Documentation/hwmon/ina238.rst @@ -65,7 +65,7 @@ Additional sysfs entries for sq52206 ------------------------------------ ======================= ======================================================= -energy1_input Energy measurement (mJ) +energy1_input Energy measurement (uJ) power1_input_highest Peak Power (uW) ======================= ======================================================= diff --git a/Documentation/netlink/specs/ovpn.yaml b/Documentation/netlink/specs/ovpn.yaml index 096c51f0c69a..ba76426a542d 100644 --- a/Documentation/netlink/specs/ovpn.yaml +++ b/Documentation/netlink/specs/ovpn.yaml @@ -161,6 +161,66 @@ attribute-sets: type: uint doc: Number of packets transmitted at the transport level - + name: peer-new-input + subset-of: peer + attributes: + - + name: id + - + name: remote-ipv4 + - + name: remote-ipv6 + - + name: remote-ipv6-scope-id + - + name: remote-port + - + name: socket + - + name: vpn-ipv4 + - + name: vpn-ipv6 + - + name: local-ipv4 + - + name: local-ipv6 + - + name: keepalive-interval + - + name: keepalive-timeout + - + name: peer-set-input + subset-of: peer + attributes: + - + name: id + - + name: remote-ipv4 + - + name: remote-ipv6 + - + name: remote-ipv6-scope-id + - + name: remote-port + - + name: vpn-ipv4 + - + name: vpn-ipv6 + - + name: local-ipv4 + - + name: local-ipv6 + - + name: keepalive-interval + - + name: keepalive-timeout + - + name: peer-del-input + subset-of: peer + attributes: + - + name: id + - name: keyconf attributes: - @@ -216,6 +276,33 @@ attribute-sets: obtain the actual cipher IV checks: exact-len: nonce-tail-size + + - + name: keyconf-get + subset-of: keyconf + attributes: + - + name: peer-id + - + name: slot + - + name: key-id + - + name: cipher-alg + - + name: keyconf-swap-input + subset-of: keyconf + attributes: + - + name: peer-id + - + name: keyconf-del-input + subset-of: keyconf + attributes: + - + name: peer-id + - + name: slot - name: ovpn attributes: @@ -235,12 +322,66 @@ attribute-sets: type: nest doc: Peer specific cipher configuration nested-attributes: keyconf + - + name: ovpn-peer-new-input + subset-of: ovpn + attributes: + - + name: ifindex + - + name: peer + nested-attributes: peer-new-input + - + name: ovpn-peer-set-input + subset-of: ovpn + attributes: + - + name: ifindex + - + name: peer + nested-attributes: peer-set-input + - + name: ovpn-peer-del-input + subset-of: ovpn + attributes: + - + name: ifindex + - + name: peer + nested-attributes: peer-del-input + - + name: ovpn-keyconf-get + subset-of: ovpn + attributes: + - + name: ifindex + - + name: keyconf + nested-attributes: keyconf-get + - + name: ovpn-keyconf-swap-input + subset-of: ovpn + attributes: + - + name: ifindex + - + name: keyconf + nested-attributes: keyconf-swap-input + - + name: ovpn-keyconf-del-input + subset-of: ovpn + attributes: + - + name: ifindex + - + name: keyconf + nested-attributes: keyconf-del-input operations: list: - name: peer-new - attribute-set: ovpn + attribute-set: ovpn-peer-new-input flags: [ admin-perm ] doc: Add a remote peer do: @@ -252,7 +393,7 @@ operations: - peer - name: peer-set - attribute-set: ovpn + attribute-set: ovpn-peer-set-input flags: [ admin-perm ] doc: modify a remote peer do: @@ -286,7 +427,7 @@ operations: - peer - name: peer-del - attribute-set: ovpn + attribute-set: ovpn-peer-del-input flags: [ admin-perm ] doc: Delete existing remote peer do: @@ -316,7 +457,7 @@ operations: - keyconf - name: key-get - attribute-set: ovpn + attribute-set: ovpn-keyconf-get flags: [ admin-perm ] doc: Retrieve non-sensitive data about peer key and cipher do: @@ -331,7 +472,7 @@ operations: - keyconf - name: key-swap - attribute-set: ovpn + attribute-set: ovpn-keyconf-swap-input flags: [ admin-perm ] doc: Swap primary and secondary session keys for a specific peer do: @@ -350,7 +491,7 @@ operations: mcgrp: peers - name: key-del - attribute-set: ovpn + attribute-set: ovpn-keyconf-del-input flags: [ admin-perm ] doc: Delete cipher key for a specific peer do: diff --git a/Documentation/virt/kvm/api.rst b/Documentation/virt/kvm/api.rst index 9abf93ee5f65..544fb11351d9 100644 --- a/Documentation/virt/kvm/api.rst +++ b/Documentation/virt/kvm/api.rst @@ -2008,6 +2008,13 @@ If the KVM_CAP_VM_TSC_CONTROL capability is advertised, this can also be used as a vm ioctl to set the initial tsc frequency of subsequently created vCPUs. +For TSC protected Confidential Computing (CoCo) VMs where TSC frequency +is configured once at VM scope and remains unchanged during VM's +lifetime, the vm ioctl should be used to configure the TSC frequency +and the vcpu ioctl is not supported. + +Example of such CoCo VMs: TDX guests. + 4.56 KVM_GET_TSC_KHZ -------------------- @@ -7196,6 +7203,10 @@ The valid value for 'flags' is: u64 leaf; u64 r11, r12, r13, r14; } get_tdvmcall_info; + struct { + u64 ret; + u64 vector; + } setup_event_notify; }; } tdx; @@ -7210,21 +7221,24 @@ number from register R11. The remaining field of the union provide the inputs and outputs of the TDVMCALL. Currently the following values of ``nr`` are defined: -* ``TDVMCALL_GET_QUOTE``: the guest has requested to generate a TD-Quote -signed by a service hosting TD-Quoting Enclave operating on the host. -Parameters and return value are in the ``get_quote`` field of the union. -The ``gpa`` field and ``size`` specify the guest physical address -(without the shared bit set) and the size of a shared-memory buffer, in -which the TDX guest passes a TD Report. The ``ret`` field represents -the return value of the GetQuote request. When the request has been -queued successfully, the TDX guest can poll the status field in the -shared-memory area to check whether the Quote generation is completed or -not. When completed, the generated Quote is returned via the same buffer. - -* ``TDVMCALL_GET_TD_VM_CALL_INFO``: the guest has requested the support -status of TDVMCALLs. The output values for the given leaf should be -placed in fields from ``r11`` to ``r14`` of the ``get_tdvmcall_info`` -field of the union. + * ``TDVMCALL_GET_QUOTE``: the guest has requested to generate a TD-Quote + signed by a service hosting TD-Quoting Enclave operating on the host. + Parameters and return value are in the ``get_quote`` field of the union. + The ``gpa`` field and ``size`` specify the guest physical address + (without the shared bit set) and the size of a shared-memory buffer, in + which the TDX guest passes a TD Report. The ``ret`` field represents + the return value of the GetQuote request. When the request has been + queued successfully, the TDX guest can poll the status field in the + shared-memory area to check whether the Quote generation is completed or + not. When completed, the generated Quote is returned via the same buffer. + + * ``TDVMCALL_GET_TD_VM_CALL_INFO``: the guest has requested the support + status of TDVMCALLs. The output values for the given leaf should be + placed in fields from ``r11`` to ``r14`` of the ``get_tdvmcall_info`` + field of the union. + + * ``TDVMCALL_SETUP_EVENT_NOTIFY_INTERRUPT``: the guest has requested to + set up a notification interrupt for vector ``vector``. KVM may add support for more values in the future that may cause a userspace exit, even without calls to ``KVM_ENABLE_CAP`` or similar. In this case, diff --git a/Documentation/virt/kvm/review-checklist.rst b/Documentation/virt/kvm/review-checklist.rst index dc01aea4057b..debac54e14e7 100644 --- a/Documentation/virt/kvm/review-checklist.rst +++ b/Documentation/virt/kvm/review-checklist.rst @@ -7,7 +7,7 @@ Review checklist for kvm patches 1. The patch must follow Documentation/process/coding-style.rst and Documentation/process/submitting-patches.rst. -2. Patches should be against kvm.git master branch. +2. Patches should be against kvm.git master or next branches. 3. If the patch introduces or modifies a new userspace API: - the API must be documented in Documentation/virt/kvm/api.rst @@ -18,10 +18,10 @@ Review checklist for kvm patches 5. New features must default to off (userspace should explicitly request them). Performance improvements can and should default to on. -6. New cpu features should be exposed via KVM_GET_SUPPORTED_CPUID2 +6. New cpu features should be exposed via KVM_GET_SUPPORTED_CPUID2, + or its equivalent for non-x86 architectures -7. Emulator changes should be accompanied by unit tests for qemu-kvm.git - kvm/test directory. +7. The feature should be testable (see below). 8. Changes should be vendor neutral when possible. Changes to common code are better than duplicating changes to vendor code. @@ -36,6 +36,87 @@ Review checklist for kvm patches 11. New guest visible features must either be documented in a hardware manual or be accompanied by documentation. -12. Features must be robust against reset and kexec - for example, shared - host/guest memory must be unshared to prevent the host from writing to - guest memory that the guest has not reserved for this purpose. +Testing of KVM code +------------------- + +All features contributed to KVM, and in many cases bugfixes too, should be +accompanied by some kind of tests and/or enablement in open source guests +and VMMs. KVM is covered by multiple test suites: + +*Selftests* + These are low level tests that allow granular testing of kernel APIs. + This includes API failure scenarios, invoking APIs after specific + guest instructions, and testing multiple calls to ``KVM_CREATE_VM`` + within a single test. They are included in the kernel tree at + ``tools/testing/selftests/kvm``. + +``kvm-unit-tests`` + A collection of small guests that test CPU and emulated device features + from a guest's perspective. They run under QEMU or ``kvmtool``, and + are generally not KVM-specific: they can be run with any accelerator + that QEMU support or even on bare metal, making it possible to compare + behavior across hypervisors and processor families. + +Functional test suites + Various sets of functional tests exist, such as QEMU's ``tests/functional`` + suite and `avocado-vt <https://avocado-vt.readthedocs.io/en/latest/>`__. + These typically involve running a full operating system in a virtual + machine. + +The best testing approach depends on the feature's complexity and +operation. Here are some examples and guidelines: + +New instructions (no new registers or APIs) + The corresponding CPU features (if applicable) should be made available + in QEMU. If the instructions require emulation support or other code in + KVM, it is worth adding coverage to ``kvm-unit-tests`` or selftests; + the latter can be a better choice if the instructions relate to an API + that already has good selftest coverage. + +New hardware features (new registers, no new APIs) + These should be tested via ``kvm-unit-tests``; this more or less implies + supporting them in QEMU and/or ``kvmtool``. In some cases selftests + can be used instead, similar to the previous case, or specifically to + test corner cases in guest state save/restore. + +Bug fixes and performance improvements + These usually do not introduce new APIs, but it's worth sharing + any benchmarks and tests that will validate your contribution, + ideally in the form of regression tests. Tests and benchmarks + can be included in either ``kvm-unit-tests`` or selftests, depending + on the specifics of your change. Selftests are especially useful for + regression tests because they are included directly in Linux's tree. + +Large scale internal changes + While it's difficult to provide a single policy, you should ensure that + the changed code is covered by either ``kvm-unit-tests`` or selftests. + In some cases the affected code is run for any guests and functional + tests suffice. Explain your testing process in the cover letter, + as that can help identify gaps in existing test suites. + +New APIs + It is important to demonstrate your use case. This can be as simple as + explaining that the feature is already in use on bare metal, or it can be + a proof-of-concept implementation in userspace. The latter need not be + open source, though that is of course preferrable for easier testing. + Selftests should test corner cases of the APIs, and should also cover + basic host and guest operation if no open source VMM uses the feature. + +Bigger features, usually spanning host and guest + These should be supported by Linux guests, with limited exceptions for + Hyper-V features that are testable on Windows guests. It is strongly + suggested that the feature be usable with an open source host VMM, such + as at least one of QEMU or crosvm, and guest firmware. Selftests should + test at least API error cases. Guest operation can be covered by + either selftests of ``kvm-unit-tests`` (this is especially important for + paravirtualized and Windows-only features). Strong selftest coverage + can also be a replacement for implementation in an open source VMM, + but this is generally not recommended. + +Following the above suggestions for testing in selftests and +``kvm-unit-tests`` will make it easier for the maintainers to review +and accept your code. In fact, even before you contribute your changes +upstream it will make it easier for you to develop for KVM. + +Of course, the KVM maintainers reserve the right to require more tests, +though they may also waive the requirement from time to time. diff --git a/Documentation/virt/kvm/x86/intel-tdx.rst b/Documentation/virt/kvm/x86/intel-tdx.rst index 76bdd95334d6..5efac62c92c7 100644 --- a/Documentation/virt/kvm/x86/intel-tdx.rst +++ b/Documentation/virt/kvm/x86/intel-tdx.rst @@ -79,7 +79,20 @@ to be configured to the TDX guest. struct kvm_tdx_capabilities { __u64 supported_attrs; __u64 supported_xfam; - __u64 reserved[254]; + + /* TDG.VP.VMCALL hypercalls executed in kernel and forwarded to + * userspace, respectively + */ + __u64 kernel_tdvmcallinfo_1_r11; + __u64 user_tdvmcallinfo_1_r11; + + /* TDG.VP.VMCALL instruction executions subfunctions executed in kernel + * and forwarded to userspace, respectively + */ + __u64 kernel_tdvmcallinfo_1_r12; + __u64 user_tdvmcallinfo_1_r12; + + __u64 reserved[250]; /* Configurable CPUID bits for userspace */ struct kvm_cpuid2 cpuid; diff --git a/MAINTAINERS b/MAINTAINERS index fad6cb025a19..c0b444e5fd5a 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -4181,6 +4181,7 @@ F: include/linux/cpumask_types.h F: include/linux/find.h F: include/linux/nodemask.h F: include/linux/nodemask_types.h +F: include/uapi/linux/bits.h F: include/vdso/bits.h F: lib/bitmap-str.c F: lib/bitmap.c @@ -4193,6 +4194,7 @@ F: tools/include/linux/bitfield.h F: tools/include/linux/bitmap.h F: tools/include/linux/bits.h F: tools/include/linux/find.h +F: tools/include/uapi/linux/bits.h F: tools/include/vdso/bits.h F: tools/lib/bitmap.c F: tools/lib/find_bit.c @@ -5568,6 +5570,7 @@ T: git git://git.kernel.org/pub/scm/linux/kernel/git/gregkh/char-misc.git F: drivers/char/ F: drivers/misc/ F: include/linux/miscdevice.h +F: rust/kernel/miscdevice.rs F: samples/rust/rust_misc_device.rs X: drivers/char/agp/ X: drivers/char/hw_random/ @@ -10504,7 +10507,7 @@ S: Maintained F: block/partitions/efi.* HABANALABS PCI DRIVER -M: Ofir Bitton <obitton@habana.ai> +M: Yaron Avizrat <yaron.avizrat@intel.com> L: dri-devel@lists.freedesktop.org S: Supported C: irc://irc.oftc.net/dri-devel @@ -11006,7 +11009,8 @@ F: Documentation/ABI/testing/debugfs-hisi-zip F: drivers/crypto/hisilicon/zip/ HMM - Heterogeneous Memory Management -M: Jérôme Glisse <jglisse@redhat.com> +M: Jason Gunthorpe <jgg@nvidia.com> +M: Leon Romanovsky <leonro@nvidia.com> L: linux-mm@kvack.org S: Maintained F: Documentation/mm/hmm.rst @@ -12185,9 +12189,8 @@ F: drivers/dma/idxd/* F: include/uapi/linux/idxd.h INTEL IN FIELD SCAN (IFS) DEVICE -M: Jithu Joseph <jithu.joseph@intel.com> +M: Tony Luck <tony.luck@intel.com> R: Ashok Raj <ashok.raj.linux@gmail.com> -R: Tony Luck <tony.luck@intel.com> S: Maintained F: drivers/platform/x86/intel/ifs F: include/trace/events/intel_ifs.h @@ -12527,8 +12530,7 @@ T: git git://git.kernel.org/pub/scm/linux/kernel/git/iwlwifi/iwlwifi.git F: drivers/net/wireless/intel/iwlwifi/ INTEL WMI SLIM BOOTLOADER (SBL) FIRMWARE UPDATE DRIVER -M: Jithu Joseph <jithu.joseph@intel.com> -S: Maintained +S: Orphan W: https://slimbootloader.github.io/security/firmware-update.html F: drivers/platform/x86/intel/wmi/sbl-fw-update.c @@ -16822,8 +16824,8 @@ F: include/dt-bindings/clock/mobileye,eyeq5-clk.h MODULE SUPPORT M: Luis Chamberlain <mcgrof@kernel.org> M: Petr Pavlu <petr.pavlu@suse.com> +M: Daniel Gomez <da.gomez@kernel.org> R: Sami Tolvanen <samitolvanen@google.com> -R: Daniel Gomez <da.gomez@samsung.com> L: linux-modules@vger.kernel.org L: linux-kernel@vger.kernel.org S: Maintained @@ -17222,10 +17224,10 @@ F: drivers/rtc/rtc-ntxec.c F: include/linux/mfd/ntxec.h NETRONOME ETHERNET DRIVERS -M: Louis Peens <louis.peens@corigine.com> R: Jakub Kicinski <kuba@kernel.org> +R: Simon Horman <horms@kernel.org> L: oss-drivers@corigine.com -S: Maintained +S: Odd Fixes F: drivers/net/ethernet/netronome/ NETWORK BLOCK DEVICE (NBD) @@ -17382,6 +17384,7 @@ F: include/linux/ethtool.h F: include/linux/framer/framer-provider.h F: include/linux/framer/framer.h F: include/linux/in.h +F: include/linux/in6.h F: include/linux/indirect_call_wrapper.h F: include/linux/inet.h F: include/linux/inet_diag.h @@ -19601,8 +19604,7 @@ T: git git://git.kernel.org/pub/scm/linux/kernel/git/pinctrl/intel.git F: drivers/pinctrl/intel/ PIN CONTROLLER - KEEMBAY -M: Lakshmi Sowjanya D <lakshmi.sowjanya.d@intel.com> -S: Supported +S: Orphan F: drivers/pinctrl/pinctrl-keembay* PIN CONTROLLER - MEDIATEK @@ -20155,21 +20157,15 @@ S: Supported F: Documentation/devicetree/bindings/soc/qcom/qcom,apr* F: Documentation/devicetree/bindings/sound/qcom,* F: drivers/soc/qcom/apr.c -F: include/dt-bindings/sound/qcom,wcd9335.h -F: include/dt-bindings/sound/qcom,wcd934x.h -F: sound/soc/codecs/lpass-rx-macro.* -F: sound/soc/codecs/lpass-tx-macro.* -F: sound/soc/codecs/lpass-va-macro.c -F: sound/soc/codecs/lpass-wsa-macro.* +F: drivers/soundwire/qcom.c +F: include/dt-bindings/sound/qcom,wcd93* +F: sound/soc/codecs/lpass-*.* F: sound/soc/codecs/msm8916-wcd-analog.c F: sound/soc/codecs/msm8916-wcd-digital.c F: sound/soc/codecs/wcd-clsh-v2.* F: sound/soc/codecs/wcd-mbhc-v2.* -F: sound/soc/codecs/wcd9335.* -F: sound/soc/codecs/wcd934x.c -F: sound/soc/codecs/wsa881x.c -F: sound/soc/codecs/wsa883x.c -F: sound/soc/codecs/wsa884x.c +F: sound/soc/codecs/wcd93*.* +F: sound/soc/codecs/wsa88*.* F: sound/soc/qcom/ QCOM EMBEDDED USB DEBUGGER (EUD) @@ -25907,6 +25903,8 @@ F: fs/hostfs/ USERSPACE COPYIN/COPYOUT (UIOVEC) M: Alexander Viro <viro@zeniv.linux.org.uk> +L: linux-block@vger.kernel.org +L: linux-fsdevel@vger.kernel.org S: Maintained F: include/linux/uio.h F: lib/iov_iter.c @@ -26944,7 +26942,7 @@ F: arch/x86/kernel/stacktrace.c F: arch/x86/kernel/unwind_*.c X86 TRUST DOMAIN EXTENSIONS (TDX) -M: Kirill A. Shutemov <kirill.shutemov@linux.intel.com> +M: Kirill A. Shutemov <kas@kernel.org> R: Dave Hansen <dave.hansen@linux.intel.com> L: x86@kernel.org L: linux-coco@lists.linux.dev @@ -27313,13 +27311,6 @@ S: Supported W: http://www.marvell.com F: drivers/i2c/busses/i2c-xlp9xx.c -XRA1403 GPIO EXPANDER -M: Nandor Han <nandor.han@ge.com> -L: linux-gpio@vger.kernel.org -S: Maintained -F: Documentation/devicetree/bindings/gpio/gpio-xra1403.txt -F: drivers/gpio/gpio-xra1403.c - XTENSA XTFPGA PLATFORM SUPPORT M: Max Filippov <jcmvbkbc@gmail.com> S: Maintained @@ -2,7 +2,7 @@ VERSION = 6 PATCHLEVEL = 16 SUBLEVEL = 0 -EXTRAVERSION = -rc5 +EXTRAVERSION = NAME = Baby Opossum Posse # *DOCUMENTATION* diff --git a/arch/alpha/include/asm/param.h b/arch/alpha/include/asm/param.h deleted file mode 100644 index cfe947ce9461..000000000000 --- a/arch/alpha/include/asm/param.h +++ /dev/null @@ -1,12 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef _ASM_ALPHA_PARAM_H -#define _ASM_ALPHA_PARAM_H - -#include <uapi/asm/param.h> - -# undef HZ -# define HZ CONFIG_HZ -# define USER_HZ 1024 -# define CLOCKS_PER_SEC USER_HZ /* frequency at which times() counts */ - -#endif /* _ASM_ALPHA_PARAM_H */ diff --git a/arch/alpha/include/uapi/asm/param.h b/arch/alpha/include/uapi/asm/param.h index 49c7119934e2..e4e410f9bf85 100644 --- a/arch/alpha/include/uapi/asm/param.h +++ b/arch/alpha/include/uapi/asm/param.h @@ -2,14 +2,9 @@ #ifndef _UAPI_ASM_ALPHA_PARAM_H #define _UAPI_ASM_ALPHA_PARAM_H -#define HZ 1024 - +#define __USER_HZ 1024 #define EXEC_PAGESIZE 8192 -#ifndef NOGROUP -#define NOGROUP (-1) -#endif - -#define MAXHOSTNAMELEN 64 /* max length of hostname */ +#include <asm-generic/param.h> #endif /* _UAPI_ASM_ALPHA_PARAM_H */ diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig index 3072731fe09c..962451e54fdd 100644 --- a/arch/arm/Kconfig +++ b/arch/arm/Kconfig @@ -121,7 +121,7 @@ config ARM select HAVE_KERNEL_XZ select HAVE_KPROBES if !XIP_KERNEL && !CPU_ENDIAN_BE32 && !CPU_V7M select HAVE_KRETPROBES if HAVE_KPROBES - select HAVE_LD_DEAD_CODE_DATA_ELIMINATION if (LD_VERSION >= 23600 || LD_CAN_USE_KEEP_IN_OVERLAY) + select HAVE_LD_DEAD_CODE_DATA_ELIMINATION if (LD_VERSION >= 23600 || LD_IS_LLD) && LD_CAN_USE_KEEP_IN_OVERLAY select HAVE_MOD_ARCH_SPECIFIC select HAVE_NMI select HAVE_OPTPROBES if !THUMB2_KERNEL diff --git a/arch/arm/Makefile b/arch/arm/Makefile index 4808d3ed98e4..e31e95ffd33f 100644 --- a/arch/arm/Makefile +++ b/arch/arm/Makefile @@ -149,7 +149,7 @@ endif # Need -Uarm for gcc < 3.x KBUILD_CPPFLAGS +=$(cpp-y) KBUILD_CFLAGS +=$(CFLAGS_ABI) $(CFLAGS_ISA) $(arch-y) $(tune-y) $(call cc-option,-mshort-load-bytes,$(call cc-option,-malignment-traps,)) -msoft-float -Uarm -KBUILD_AFLAGS +=$(CFLAGS_ABI) $(AFLAGS_ISA) -Wa,$(arch-y) $(tune-y) -include asm/unified.h -msoft-float +KBUILD_AFLAGS +=$(CFLAGS_ABI) $(AFLAGS_ISA) -Wa,$(arch-y) $(tune-y) -include $(srctree)/arch/arm/include/asm/unified.h -msoft-float KBUILD_RUSTFLAGS += --target=arm-unknown-linux-gnueabi CHECKFLAGS += -D__arm__ diff --git a/arch/arm/boot/dts/allwinner/sun8i-v3s.dtsi b/arch/arm/boot/dts/allwinner/sun8i-v3s.dtsi index f909b1d4dbca..e82cf312da25 100644 --- a/arch/arm/boot/dts/allwinner/sun8i-v3s.dtsi +++ b/arch/arm/boot/dts/allwinner/sun8i-v3s.dtsi @@ -652,7 +652,7 @@ reg = <0x01cb4000 0x3000>; interrupts = <GIC_SPI 84 IRQ_TYPE_LEVEL_HIGH>; clocks = <&ccu CLK_BUS_CSI>, - <&ccu CLK_CSI1_SCLK>, + <&ccu CLK_CSI_SCLK>, <&ccu CLK_DRAM_CSI>; clock-names = "bus", "mod", "ram"; resets = <&ccu RST_BUS_CSI>; diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 55fc331af337..393d71124f5d 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -256,6 +256,7 @@ config ARM64 select HOTPLUG_SMT if HOTPLUG_CPU select IRQ_DOMAIN select IRQ_FORCED_THREADING + select JUMP_LABEL select KASAN_VMALLOC if KASAN select LOCK_MM_AND_FIND_VMA select MODULES_USE_ELF_RELA diff --git a/arch/arm64/boot/dts/allwinner/sun55i-a523.dtsi b/arch/arm64/boot/dts/allwinner/sun55i-a523.dtsi index 8b7cbc2e78f5..51cd148f4227 100644 --- a/arch/arm64/boot/dts/allwinner/sun55i-a523.dtsi +++ b/arch/arm64/boot/dts/allwinner/sun55i-a523.dtsi @@ -131,7 +131,7 @@ "PH5", "PH6", "PH7", "PH9", "PH10", "PH14", "PH15", "PH16", "PH17", "PH18"; allwinner,pinmux = <5>; - function = "emac0"; + function = "gmac0"; drive-strength = <40>; bias-disable; }; @@ -540,8 +540,8 @@ status = "disabled"; }; - emac0: ethernet@4500000 { - compatible = "allwinner,sun55i-a523-emac0", + gmac0: ethernet@4500000 { + compatible = "allwinner,sun55i-a523-gmac0", "allwinner,sun50i-a64-emac"; reg = <0x04500000 0x10000>; clocks = <&ccu CLK_BUS_EMAC0>; diff --git a/arch/arm64/boot/dts/allwinner/sun55i-a527-cubie-a5e.dts b/arch/arm64/boot/dts/allwinner/sun55i-a527-cubie-a5e.dts index 0f58d92a6adc..8bc0f2c72a24 100644 --- a/arch/arm64/boot/dts/allwinner/sun55i-a527-cubie-a5e.dts +++ b/arch/arm64/boot/dts/allwinner/sun55i-a527-cubie-a5e.dts @@ -12,7 +12,7 @@ compatible = "radxa,cubie-a5e", "allwinner,sun55i-a527"; aliases { - ethernet0 = &emac0; + ethernet0 = &gmac0; serial0 = &uart0; }; @@ -55,7 +55,7 @@ status = "okay"; }; -&emac0 { +&gmac0 { phy-mode = "rgmii-id"; phy-handle = <&ext_rgmii_phy>; phy-supply = <®_cldo3>; diff --git a/arch/arm64/boot/dts/allwinner/sun55i-t527-avaota-a1.dts b/arch/arm64/boot/dts/allwinner/sun55i-t527-avaota-a1.dts index 08127f0cdd35..142177c1f737 100644 --- a/arch/arm64/boot/dts/allwinner/sun55i-t527-avaota-a1.dts +++ b/arch/arm64/boot/dts/allwinner/sun55i-t527-avaota-a1.dts @@ -12,7 +12,7 @@ compatible = "yuzukihd,avaota-a1", "allwinner,sun55i-t527"; aliases { - ethernet0 = &emac0; + ethernet0 = &gmac0; serial0 = &uart0; }; @@ -65,7 +65,7 @@ status = "okay"; }; -&emac0 { +&gmac0 { phy-mode = "rgmii-id"; phy-handle = <&ext_rgmii_phy>; phy-supply = <®_dcdc4>; diff --git a/arch/arm64/boot/dts/freescale/fsl-ls1046a.dtsi b/arch/arm64/boot/dts/freescale/fsl-ls1046a.dtsi index 0baf256b4400..983b2f0e8797 100644 --- a/arch/arm64/boot/dts/freescale/fsl-ls1046a.dtsi +++ b/arch/arm64/boot/dts/freescale/fsl-ls1046a.dtsi @@ -687,11 +687,12 @@ }; wdog0: watchdog@2ad0000 { - compatible = "fsl,imx21-wdt"; + compatible = "fsl,ls1046a-wdt", "fsl,imx21-wdt"; reg = <0x0 0x2ad0000 0x0 0x10000>; interrupts = <GIC_SPI 83 IRQ_TYPE_LEVEL_HIGH>; clocks = <&clockgen QORIQ_CLK_PLATFORM_PLL QORIQ_CLK_PLL_DIV(2)>; + big-endian; }; edma0: dma-controller@2c00000 { diff --git a/arch/arm64/boot/dts/freescale/imx8mm-verdin.dtsi b/arch/arm64/boot/dts/freescale/imx8mm-verdin.dtsi index d29710772569..1594ce9182a5 100644 --- a/arch/arm64/boot/dts/freescale/imx8mm-verdin.dtsi +++ b/arch/arm64/boot/dts/freescale/imx8mm-verdin.dtsi @@ -464,6 +464,7 @@ }; reg_nvcc_sd: LDO5 { + regulator-always-on; regulator-max-microvolt = <3300000>; regulator-min-microvolt = <1800000>; regulator-name = "On-module +V3.3_1.8_SD (LDO5)"; diff --git a/arch/arm64/boot/dts/freescale/imx8mp-venice-gw71xx.dtsi b/arch/arm64/boot/dts/freescale/imx8mp-venice-gw71xx.dtsi index 2f740d74707b..4bf818873fe3 100644 --- a/arch/arm64/boot/dts/freescale/imx8mp-venice-gw71xx.dtsi +++ b/arch/arm64/boot/dts/freescale/imx8mp-venice-gw71xx.dtsi @@ -70,7 +70,7 @@ tpm@1 { compatible = "atmel,attpm20p", "tcg,tpm_tis-spi"; reg = <0x1>; - spi-max-frequency = <36000000>; + spi-max-frequency = <25000000>; }; }; diff --git a/arch/arm64/boot/dts/freescale/imx8mp-venice-gw72xx.dtsi b/arch/arm64/boot/dts/freescale/imx8mp-venice-gw72xx.dtsi index 5ab3ffe9931d..cf747ec6fa16 100644 --- a/arch/arm64/boot/dts/freescale/imx8mp-venice-gw72xx.dtsi +++ b/arch/arm64/boot/dts/freescale/imx8mp-venice-gw72xx.dtsi @@ -110,7 +110,7 @@ tpm@1 { compatible = "atmel,attpm20p", "tcg,tpm_tis-spi"; reg = <0x1>; - spi-max-frequency = <36000000>; + spi-max-frequency = <25000000>; }; }; diff --git a/arch/arm64/boot/dts/freescale/imx8mp-venice-gw73xx.dtsi b/arch/arm64/boot/dts/freescale/imx8mp-venice-gw73xx.dtsi index e2b5e7ac3e46..5eb114d2360a 100644 --- a/arch/arm64/boot/dts/freescale/imx8mp-venice-gw73xx.dtsi +++ b/arch/arm64/boot/dts/freescale/imx8mp-venice-gw73xx.dtsi @@ -122,7 +122,7 @@ tpm@1 { compatible = "atmel,attpm20p", "tcg,tpm_tis-spi"; reg = <0x1>; - spi-max-frequency = <36000000>; + spi-max-frequency = <25000000>; }; }; diff --git a/arch/arm64/boot/dts/freescale/imx8mp-venice-gw74xx.dts b/arch/arm64/boot/dts/freescale/imx8mp-venice-gw74xx.dts index 6daa2313f879..568d24265ddf 100644 --- a/arch/arm64/boot/dts/freescale/imx8mp-venice-gw74xx.dts +++ b/arch/arm64/boot/dts/freescale/imx8mp-venice-gw74xx.dts @@ -201,7 +201,7 @@ tpm@0 { compatible = "atmel,attpm20p", "tcg,tpm_tis-spi"; reg = <0x0>; - spi-max-frequency = <36000000>; + spi-max-frequency = <25000000>; }; }; diff --git a/arch/arm64/boot/dts/freescale/imx95-15x15-evk.dts b/arch/arm64/boot/dts/freescale/imx95-15x15-evk.dts index 6c47f4b47356..9f4d0899a94d 100644 --- a/arch/arm64/boot/dts/freescale/imx95-15x15-evk.dts +++ b/arch/arm64/boot/dts/freescale/imx95-15x15-evk.dts @@ -574,17 +574,17 @@ &scmi_iomuxc { pinctrl_emdio: emdiogrp { fsl,pins = < - IMX95_PAD_ENET2_MDC__NETCMIX_TOP_NETC_MDC 0x57e - IMX95_PAD_ENET2_MDIO__NETCMIX_TOP_NETC_MDIO 0x97e + IMX95_PAD_ENET2_MDC__NETCMIX_TOP_NETC_MDC 0x50e + IMX95_PAD_ENET2_MDIO__NETCMIX_TOP_NETC_MDIO 0x90e >; }; pinctrl_enetc0: enetc0grp { fsl,pins = < - IMX95_PAD_ENET1_TD3__NETCMIX_TOP_ETH0_RGMII_TD3 0x57e - IMX95_PAD_ENET1_TD2__NETCMIX_TOP_ETH0_RGMII_TD2 0x57e - IMX95_PAD_ENET1_TD1__NETCMIX_TOP_ETH0_RGMII_TD1 0x57e - IMX95_PAD_ENET1_TD0__NETCMIX_TOP_ETH0_RGMII_TD0 0x57e + IMX95_PAD_ENET1_TD3__NETCMIX_TOP_ETH0_RGMII_TD3 0x50e + IMX95_PAD_ENET1_TD2__NETCMIX_TOP_ETH0_RGMII_TD2 0x50e + IMX95_PAD_ENET1_TD1__NETCMIX_TOP_ETH0_RGMII_TD1 0x50e + IMX95_PAD_ENET1_TD0__NETCMIX_TOP_ETH0_RGMII_TD0 0x50e IMX95_PAD_ENET1_TX_CTL__NETCMIX_TOP_ETH0_RGMII_TX_CTL 0x57e IMX95_PAD_ENET1_TXC__NETCMIX_TOP_ETH0_RGMII_TX_CLK 0x58e IMX95_PAD_ENET1_RX_CTL__NETCMIX_TOP_ETH0_RGMII_RX_CTL 0x57e @@ -598,10 +598,10 @@ pinctrl_enetc1: enetc1grp { fsl,pins = < - IMX95_PAD_ENET2_TD3__NETCMIX_TOP_ETH1_RGMII_TD3 0x57e - IMX95_PAD_ENET2_TD2__NETCMIX_TOP_ETH1_RGMII_TD2 0x57e - IMX95_PAD_ENET2_TD1__NETCMIX_TOP_ETH1_RGMII_TD1 0x57e - IMX95_PAD_ENET2_TD0__NETCMIX_TOP_ETH1_RGMII_TD0 0x57e + IMX95_PAD_ENET2_TD3__NETCMIX_TOP_ETH1_RGMII_TD3 0x50e + IMX95_PAD_ENET2_TD2__NETCMIX_TOP_ETH1_RGMII_TD2 0x50e + IMX95_PAD_ENET2_TD1__NETCMIX_TOP_ETH1_RGMII_TD1 0x50e + IMX95_PAD_ENET2_TD0__NETCMIX_TOP_ETH1_RGMII_TD0 0x50e IMX95_PAD_ENET2_TX_CTL__NETCMIX_TOP_ETH1_RGMII_TX_CTL 0x57e IMX95_PAD_ENET2_TXC__NETCMIX_TOP_ETH1_RGMII_TX_CLK 0x58e IMX95_PAD_ENET2_RX_CTL__NETCMIX_TOP_ETH1_RGMII_RX_CTL 0x57e diff --git a/arch/arm64/boot/dts/freescale/imx95-19x19-evk.dts b/arch/arm64/boot/dts/freescale/imx95-19x19-evk.dts index 6886ea766655..d7d845231312 100644 --- a/arch/arm64/boot/dts/freescale/imx95-19x19-evk.dts +++ b/arch/arm64/boot/dts/freescale/imx95-19x19-evk.dts @@ -566,17 +566,17 @@ &scmi_iomuxc { pinctrl_emdio: emdiogrp{ fsl,pins = < - IMX95_PAD_ENET1_MDC__NETCMIX_TOP_NETC_MDC 0x57e - IMX95_PAD_ENET1_MDIO__NETCMIX_TOP_NETC_MDIO 0x97e + IMX95_PAD_ENET1_MDC__NETCMIX_TOP_NETC_MDC 0x50e + IMX95_PAD_ENET1_MDIO__NETCMIX_TOP_NETC_MDIO 0x90e >; }; pinctrl_enetc0: enetc0grp { fsl,pins = < - IMX95_PAD_ENET1_TD3__NETCMIX_TOP_ETH0_RGMII_TD3 0x57e - IMX95_PAD_ENET1_TD2__NETCMIX_TOP_ETH0_RGMII_TD2 0x57e - IMX95_PAD_ENET1_TD1__NETCMIX_TOP_ETH0_RGMII_TD1 0x57e - IMX95_PAD_ENET1_TD0__NETCMIX_TOP_ETH0_RGMII_TD0 0x57e + IMX95_PAD_ENET1_TD3__NETCMIX_TOP_ETH0_RGMII_TD3 0x50e + IMX95_PAD_ENET1_TD2__NETCMIX_TOP_ETH0_RGMII_TD2 0x50e + IMX95_PAD_ENET1_TD1__NETCMIX_TOP_ETH0_RGMII_TD1 0x50e + IMX95_PAD_ENET1_TD0__NETCMIX_TOP_ETH0_RGMII_TD0 0x50e IMX95_PAD_ENET1_TX_CTL__NETCMIX_TOP_ETH0_RGMII_TX_CTL 0x57e IMX95_PAD_ENET1_TXC__NETCMIX_TOP_ETH0_RGMII_TX_CLK 0x58e IMX95_PAD_ENET1_RX_CTL__NETCMIX_TOP_ETH0_RGMII_RX_CTL 0x57e diff --git a/arch/arm64/boot/dts/freescale/imx95.dtsi b/arch/arm64/boot/dts/freescale/imx95.dtsi index 632631a29112..5aecdd9b62ff 100644 --- a/arch/arm64/boot/dts/freescale/imx95.dtsi +++ b/arch/arm64/boot/dts/freescale/imx95.dtsi @@ -1708,7 +1708,7 @@ <0x9 0 1 0>; reg-names = "dbi","atu", "dbi2", "app", "dma", "addr_space"; num-lanes = <1>; - interrupts = <GIC_SPI 317 IRQ_TYPE_LEVEL_HIGH>; + interrupts = <GIC_SPI 311 IRQ_TYPE_LEVEL_HIGH>; interrupt-names = "dma"; clocks = <&scmi_clk IMX95_CLK_HSIO>, <&scmi_clk IMX95_CLK_HSIOPLL>, diff --git a/arch/arm64/boot/dts/qcom/sc8280xp-lenovo-thinkpad-x13s.dts b/arch/arm64/boot/dts/qcom/sc8280xp-lenovo-thinkpad-x13s.dts index ae7a275fd223..cefecb7a23cf 100644 --- a/arch/arm64/boot/dts/qcom/sc8280xp-lenovo-thinkpad-x13s.dts +++ b/arch/arm64/boot/dts/qcom/sc8280xp-lenovo-thinkpad-x13s.dts @@ -1090,6 +1090,8 @@ }; &pmk8280_rtc { + qcom,uefi-rtc-info; + status = "okay"; }; diff --git a/arch/arm64/boot/dts/qcom/x1e80100-pmics.dtsi b/arch/arm64/boot/dts/qcom/x1e80100-pmics.dtsi index c02fd4d15c96..e3888bc143a0 100644 --- a/arch/arm64/boot/dts/qcom/x1e80100-pmics.dtsi +++ b/arch/arm64/boot/dts/qcom/x1e80100-pmics.dtsi @@ -224,6 +224,7 @@ reg-names = "rtc", "alarm"; interrupts = <0x0 0x62 0x1 IRQ_TYPE_EDGE_RISING>; qcom,no-alarm; /* alarm owned by ADSP */ + qcom,uefi-rtc-info; }; pmk8550_sdam_2: nvram@7100 { diff --git a/arch/arm64/boot/dts/rockchip/px30-ringneck.dtsi b/arch/arm64/boot/dts/rockchip/px30-ringneck.dtsi index ab232e5c7ad6..4203b335a263 100644 --- a/arch/arm64/boot/dts/rockchip/px30-ringneck.dtsi +++ b/arch/arm64/boot/dts/rockchip/px30-ringneck.dtsi @@ -379,6 +379,18 @@ <0 RK_PA7 RK_FUNC_GPIO &pcfg_pull_up>; }; }; + + spi1 { + spi1_csn0_gpio_pin: spi1-csn0-gpio-pin { + rockchip,pins = + <3 RK_PB1 RK_FUNC_GPIO &pcfg_pull_up_4ma>; + }; + + spi1_csn1_gpio_pin: spi1-csn1-gpio-pin { + rockchip,pins = + <3 RK_PB2 RK_FUNC_GPIO &pcfg_pull_up_4ma>; + }; + }; }; &pmu_io_domains { @@ -396,6 +408,17 @@ vqmmc-supply = <&vccio_sd>; }; +&spi1 { + /* + * Hardware CS has a very slow rise time of about 6us, + * causing transmission errors. + * With cs-gpios we have a rise time of about 20ns. + */ + cs-gpios = <&gpio3 RK_PB1 GPIO_ACTIVE_LOW>, <&gpio3 RK_PB2 GPIO_ACTIVE_LOW>; + pinctrl-names = "default"; + pinctrl-0 = <&spi1_clk &spi1_csn0_gpio_pin &spi1_csn1_gpio_pin &spi1_miso &spi1_mosi>; +}; + &tsadc { status = "okay"; }; diff --git a/arch/arm64/boot/dts/rockchip/rk3566-quartz64-a.dts b/arch/arm64/boot/dts/rockchip/rk3566-quartz64-a.dts index 3c127c5c2607..a9021c524afb 100644 --- a/arch/arm64/boot/dts/rockchip/rk3566-quartz64-a.dts +++ b/arch/arm64/boot/dts/rockchip/rk3566-quartz64-a.dts @@ -30,6 +30,7 @@ fan: gpio_fan { compatible = "gpio-fan"; + fan-supply = <&vcc12v_dcin>; gpios = <&gpio0 RK_PD5 GPIO_ACTIVE_HIGH>; gpio-fan,speed-map = < 0 0>, diff --git a/arch/arm64/boot/dts/rockchip/rk3568-nanopi-r5s.dts b/arch/arm64/boot/dts/rockchip/rk3568-nanopi-r5s.dts index 3b31f0dd8f3b..539edc3c535f 100644 --- a/arch/arm64/boot/dts/rockchip/rk3568-nanopi-r5s.dts +++ b/arch/arm64/boot/dts/rockchip/rk3568-nanopi-r5s.dts @@ -29,7 +29,6 @@ function-enumerator = <1>; gpios = <&gpio3 RK_PD6 GPIO_ACTIVE_HIGH>; label = "LAN-1"; - linux,default-trigger = "netdev"; }; led-lan2 { @@ -39,7 +38,6 @@ function-enumerator = <2>; gpios = <&gpio3 RK_PD7 GPIO_ACTIVE_HIGH>; label = "LAN-2"; - linux,default-trigger = "netdev"; }; power_led: led-sys { @@ -56,7 +54,6 @@ function = LED_FUNCTION_WAN; gpios = <&gpio2 RK_PC1 GPIO_ACTIVE_HIGH>; label = "WAN"; - linux,default-trigger = "netdev"; }; }; }; diff --git a/arch/arm64/boot/dts/rockchip/rk3576-armsom-sige5.dts b/arch/arm64/boot/dts/rockchip/rk3576-armsom-sige5.dts index b09e789c75c4..801b40fea4e8 100644 --- a/arch/arm64/boot/dts/rockchip/rk3576-armsom-sige5.dts +++ b/arch/arm64/boot/dts/rockchip/rk3576-armsom-sige5.dts @@ -211,10 +211,38 @@ status = "okay"; }; +&cpu_b0 { + cpu-supply = <&vdd_cpu_big_s0>; +}; + +&cpu_b1 { + cpu-supply = <&vdd_cpu_big_s0>; +}; + +&cpu_b2 { + cpu-supply = <&vdd_cpu_big_s0>; +}; + +&cpu_b3 { + cpu-supply = <&vdd_cpu_big_s0>; +}; + &cpu_l0 { cpu-supply = <&vdd_cpu_lit_s0>; }; +&cpu_l1 { + cpu-supply = <&vdd_cpu_lit_s0>; +}; + +&cpu_l2 { + cpu-supply = <&vdd_cpu_lit_s0>; +}; + +&cpu_l3 { + cpu-supply = <&vdd_cpu_lit_s0>; +}; + &gmac0 { phy-mode = "rgmii-id"; clock_in_out = "output"; diff --git a/arch/arm64/boot/dts/rockchip/rk3576.dtsi b/arch/arm64/boot/dts/rockchip/rk3576.dtsi index 1086482f0479..64812e3bcb61 100644 --- a/arch/arm64/boot/dts/rockchip/rk3576.dtsi +++ b/arch/arm64/boot/dts/rockchip/rk3576.dtsi @@ -615,7 +615,7 @@ <0 0 0 2 &pcie1_intc 1>, <0 0 0 3 &pcie1_intc 2>, <0 0 0 4 &pcie1_intc 3>; - linux,pci-domain = <0>; + linux,pci-domain = <1>; max-link-speed = <2>; num-ib-windows = <8>; num-viewport = <8>; diff --git a/arch/arm64/boot/dts/rockchip/rk3588-base-pinctrl.dtsi b/arch/arm64/boot/dts/rockchip/rk3588-base-pinctrl.dtsi index 7f874c77410c..6584d73660f6 100644 --- a/arch/arm64/boot/dts/rockchip/rk3588-base-pinctrl.dtsi +++ b/arch/arm64/boot/dts/rockchip/rk3588-base-pinctrl.dtsi @@ -578,14 +578,14 @@ hdmim0_tx0_scl: hdmim0-tx0-scl { rockchip,pins = /* hdmim0_tx0_scl */ - <4 RK_PB7 5 &pcfg_pull_none>; + <4 RK_PB7 5 &pcfg_pull_none_drv_level_5_smt>; }; /omit-if-no-ref/ hdmim0_tx0_sda: hdmim0-tx0-sda { rockchip,pins = /* hdmim0_tx0_sda */ - <4 RK_PC0 5 &pcfg_pull_none>; + <4 RK_PC0 5 &pcfg_pull_none_drv_level_1_smt>; }; /omit-if-no-ref/ @@ -640,14 +640,14 @@ hdmim1_tx0_scl: hdmim1-tx0-scl { rockchip,pins = /* hdmim1_tx0_scl */ - <0 RK_PD5 11 &pcfg_pull_none>; + <0 RK_PD5 11 &pcfg_pull_none_drv_level_5_smt>; }; /omit-if-no-ref/ hdmim1_tx0_sda: hdmim1-tx0-sda { rockchip,pins = /* hdmim1_tx0_sda */ - <0 RK_PD4 11 &pcfg_pull_none>; + <0 RK_PD4 11 &pcfg_pull_none_drv_level_1_smt>; }; /omit-if-no-ref/ @@ -668,14 +668,14 @@ hdmim1_tx1_scl: hdmim1-tx1-scl { rockchip,pins = /* hdmim1_tx1_scl */ - <3 RK_PC6 5 &pcfg_pull_none>; + <3 RK_PC6 5 &pcfg_pull_none_drv_level_5_smt>; }; /omit-if-no-ref/ hdmim1_tx1_sda: hdmim1-tx1-sda { rockchip,pins = /* hdmim1_tx1_sda */ - <3 RK_PC5 5 &pcfg_pull_none>; + <3 RK_PC5 5 &pcfg_pull_none_drv_level_1_smt>; }; /omit-if-no-ref/ hdmim2_rx_cec: hdmim2-rx-cec { @@ -709,14 +709,14 @@ hdmim2_tx0_scl: hdmim2-tx0-scl { rockchip,pins = /* hdmim2_tx0_scl */ - <3 RK_PC7 5 &pcfg_pull_none>; + <3 RK_PC7 5 &pcfg_pull_none_drv_level_5_smt>; }; /omit-if-no-ref/ hdmim2_tx0_sda: hdmim2-tx0-sda { rockchip,pins = /* hdmim2_tx0_sda */ - <3 RK_PD0 5 &pcfg_pull_none>; + <3 RK_PD0 5 &pcfg_pull_none_drv_level_1_smt>; }; /omit-if-no-ref/ @@ -730,14 +730,14 @@ hdmim2_tx1_scl: hdmim2-tx1-scl { rockchip,pins = /* hdmim2_tx1_scl */ - <1 RK_PA4 5 &pcfg_pull_none>; + <1 RK_PA4 5 &pcfg_pull_none_drv_level_5_smt>; }; /omit-if-no-ref/ hdmim2_tx1_sda: hdmim2-tx1-sda { rockchip,pins = /* hdmim2_tx1_sda */ - <1 RK_PA3 5 &pcfg_pull_none>; + <1 RK_PA3 5 &pcfg_pull_none_drv_level_1_smt>; }; /omit-if-no-ref/ diff --git a/arch/arm64/boot/dts/rockchip/rk3588-coolpi-cm5.dtsi b/arch/arm64/boot/dts/rockchip/rk3588-coolpi-cm5.dtsi index cc37f082adea..b07543315f87 100644 --- a/arch/arm64/boot/dts/rockchip/rk3588-coolpi-cm5.dtsi +++ b/arch/arm64/boot/dts/rockchip/rk3588-coolpi-cm5.dtsi @@ -321,6 +321,7 @@ bus-width = <4>; cap-mmc-highspeed; cap-sd-highspeed; + cd-gpios = <&gpio0 RK_PA4 GPIO_ACTIVE_LOW>; disable-wp; max-frequency = <150000000>; no-sdio; diff --git a/arch/arm64/boot/dts/rockchip/rk3588-extra-pinctrl.dtsi b/arch/arm64/boot/dts/rockchip/rk3588-extra-pinctrl.dtsi index 244c66faa161..fb48ddc04bcb 100644 --- a/arch/arm64/boot/dts/rockchip/rk3588-extra-pinctrl.dtsi +++ b/arch/arm64/boot/dts/rockchip/rk3588-extra-pinctrl.dtsi @@ -160,14 +160,15 @@ hdmim0_tx1_scl: hdmim0-tx1-scl { rockchip,pins = /* hdmim0_tx1_scl */ - <2 RK_PB5 4 &pcfg_pull_none>; + <2 RK_PB5 4 &pcfg_pull_none_drv_level_3_smt>; }; /omit-if-no-ref/ hdmim0_tx1_sda: hdmim0-tx1-sda { rockchip,pins = /* hdmim0_tx1_sda */ - <2 RK_PB4 4 &pcfg_pull_none>; + <2 RK_PB4 4 &pcfg_pull_none_drv_level_1_smt>; + }; }; diff --git a/arch/arm64/boot/dts/rockchip/rk3588s-coolpi-4b.dts b/arch/arm64/boot/dts/rockchip/rk3588s-coolpi-4b.dts index 8b717c4017a4..b2947b36fada 100644 --- a/arch/arm64/boot/dts/rockchip/rk3588s-coolpi-4b.dts +++ b/arch/arm64/boot/dts/rockchip/rk3588s-coolpi-4b.dts @@ -474,6 +474,7 @@ bus-width = <4>; cap-mmc-highspeed; cap-sd-highspeed; + cd-gpios = <&gpio0 RK_PA4 GPIO_ACTIVE_LOW>; disable-wp; max-frequency = <150000000>; no-sdio; diff --git a/arch/arm64/boot/dts/rockchip/rockchip-pinconf.dtsi b/arch/arm64/boot/dts/rockchip/rockchip-pinconf.dtsi index 5c645437b507..b0475b7c655a 100644 --- a/arch/arm64/boot/dts/rockchip/rockchip-pinconf.dtsi +++ b/arch/arm64/boot/dts/rockchip/rockchip-pinconf.dtsi @@ -333,6 +333,41 @@ }; /omit-if-no-ref/ + pcfg_pull_none_drv_level_1_smt: pcfg-pull-none-drv-level-1-smt { + bias-disable; + drive-strength = <1>; + input-schmitt-enable; + }; + + /omit-if-no-ref/ + pcfg_pull_none_drv_level_2_smt: pcfg-pull-none-drv-level-2-smt { + bias-disable; + drive-strength = <2>; + input-schmitt-enable; + }; + + /omit-if-no-ref/ + pcfg_pull_none_drv_level_3_smt: pcfg-pull-none-drv-level-3-smt { + bias-disable; + drive-strength = <3>; + input-schmitt-enable; + }; + + /omit-if-no-ref/ + pcfg_pull_none_drv_level_4_smt: pcfg-pull-none-drv-level-4-smt { + bias-disable; + drive-strength = <4>; + input-schmitt-enable; + }; + + /omit-if-no-ref/ + pcfg_pull_none_drv_level_5_smt: pcfg-pull-none-drv-level-5-smt { + bias-disable; + drive-strength = <5>; + input-schmitt-enable; + }; + + /omit-if-no-ref/ pcfg_output_high: pcfg-output-high { output-high; }; diff --git a/arch/arm64/configs/defconfig b/arch/arm64/configs/defconfig index 7e04a2905ce4..eb5c17d4c7ec 100644 --- a/arch/arm64/configs/defconfig +++ b/arch/arm64/configs/defconfig @@ -1444,6 +1444,7 @@ CONFIG_PLATFORM_MHU=y CONFIG_BCM2835_MBOX=y CONFIG_QCOM_APCS_IPC=y CONFIG_MTK_ADSP_MBOX=m +CONFIG_QCOM_CPUCP_MBOX=m CONFIG_QCOM_IPCC=y CONFIG_ROCKCHIP_IOMMU=y CONFIG_TEGRA_IOMMU_SMMU=y diff --git a/arch/arm64/include/asm/assembler.h b/arch/arm64/include/asm/assembler.h index ad63457a05c5..c56c21bb1eec 100644 --- a/arch/arm64/include/asm/assembler.h +++ b/arch/arm64/include/asm/assembler.h @@ -41,6 +41,11 @@ /* * Save/restore interrupts. */ + .macro save_and_disable_daif, flags + mrs \flags, daif + msr daifset, #0xf + .endm + .macro save_and_disable_irq, flags mrs \flags, daif msr daifset, #3 diff --git a/arch/arm64/include/asm/el2_setup.h b/arch/arm64/include/asm/el2_setup.h index ba5df0df02a4..9f38340d24c2 100644 --- a/arch/arm64/include/asm/el2_setup.h +++ b/arch/arm64/include/asm/el2_setup.h @@ -287,17 +287,6 @@ .Lskip_fgt2_\@: .endm -.macro __init_el2_gcs - mrs_s x1, SYS_ID_AA64PFR1_EL1 - ubfx x1, x1, #ID_AA64PFR1_EL1_GCS_SHIFT, #4 - cbz x1, .Lskip_gcs_\@ - - /* Ensure GCS is not enabled when we start trying to do BLs */ - msr_s SYS_GCSCR_EL1, xzr - msr_s SYS_GCSCRE0_EL1, xzr -.Lskip_gcs_\@: -.endm - /** * Initialize EL2 registers to sane values. This should be called early on all * cores that were booted in EL2. Note that everything gets initialised as @@ -319,7 +308,6 @@ __init_el2_cptr __init_el2_fgt __init_el2_fgt2 - __init_el2_gcs .endm #ifndef __KVM_NVHE_HYPERVISOR__ @@ -371,6 +359,13 @@ msr_s SYS_MPAMHCR_EL2, xzr // clear TRAP_MPAMIDR_EL1 -> EL2 .Lskip_mpam_\@: + check_override id_aa64pfr1, ID_AA64PFR1_EL1_GCS_SHIFT, .Linit_gcs_\@, .Lskip_gcs_\@, x1, x2 + +.Linit_gcs_\@: + msr_s SYS_GCSCR_EL1, xzr + msr_s SYS_GCSCRE0_EL1, xzr + +.Lskip_gcs_\@: check_override id_aa64pfr0, ID_AA64PFR0_EL1_SVE_SHIFT, .Linit_sve_\@, .Lskip_sve_\@, x1, x2 .Linit_sve_\@: /* SVE register access */ diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index d27079968341..3e41a880b062 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -1480,7 +1480,6 @@ int kvm_vm_ioctl_get_reg_writable_masks(struct kvm *kvm, struct reg_mask_range *range); /* Guest/host FPSIMD coordination helpers */ -int kvm_arch_vcpu_run_map_fp(struct kvm_vcpu *vcpu); void kvm_arch_vcpu_load_fp(struct kvm_vcpu *vcpu); void kvm_arch_vcpu_ctxflush_fp(struct kvm_vcpu *vcpu); void kvm_arch_vcpu_ctxsync_fp(struct kvm_vcpu *vcpu); diff --git a/arch/arm64/kernel/Makefile b/arch/arm64/kernel/Makefile index 2920b0a51403..a2faf0049dab 100644 --- a/arch/arm64/kernel/Makefile +++ b/arch/arm64/kernel/Makefile @@ -34,7 +34,7 @@ obj-y := debug-monitors.o entry.o irq.o fpsimd.o \ cpufeature.o alternative.o cacheinfo.o \ smp.o smp_spin_table.o topology.o smccc-call.o \ syscall.o proton-pack.o idle.o patching.o pi/ \ - rsi.o + rsi.o jump_label.o obj-$(CONFIG_COMPAT) += sys32.o signal32.o \ sys_compat.o @@ -47,7 +47,6 @@ obj-$(CONFIG_PERF_EVENTS) += perf_regs.o perf_callchain.o obj-$(CONFIG_HARDLOCKUP_DETECTOR_PERF) += watchdog_hld.o obj-$(CONFIG_HAVE_HW_BREAKPOINT) += hw_breakpoint.o obj-$(CONFIG_CPU_PM) += sleep.o suspend.o -obj-$(CONFIG_JUMP_LABEL) += jump_label.o obj-$(CONFIG_KGDB) += kgdb.o obj-$(CONFIG_EFI) += efi.o efi-rt-wrapper.o obj-$(CONFIG_PCI) += pci.o diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index b34044e20128..e151585c6cca 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -3135,6 +3135,13 @@ static bool has_sve_feature(const struct arm64_cpu_capabilities *cap, int scope) } #endif +#ifdef CONFIG_ARM64_SME +static bool has_sme_feature(const struct arm64_cpu_capabilities *cap, int scope) +{ + return system_supports_sme() && has_user_cpuid_feature(cap, scope); +} +#endif + static const struct arm64_cpu_capabilities arm64_elf_hwcaps[] = { HWCAP_CAP(ID_AA64ISAR0_EL1, AES, PMULL, CAP_HWCAP, KERNEL_HWCAP_PMULL), HWCAP_CAP(ID_AA64ISAR0_EL1, AES, AES, CAP_HWCAP, KERNEL_HWCAP_AES), @@ -3223,31 +3230,31 @@ static const struct arm64_cpu_capabilities arm64_elf_hwcaps[] = { HWCAP_CAP(ID_AA64ISAR2_EL1, BC, IMP, CAP_HWCAP, KERNEL_HWCAP_HBC), #ifdef CONFIG_ARM64_SME HWCAP_CAP(ID_AA64PFR1_EL1, SME, IMP, CAP_HWCAP, KERNEL_HWCAP_SME), - HWCAP_CAP(ID_AA64SMFR0_EL1, FA64, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_FA64), - HWCAP_CAP(ID_AA64SMFR0_EL1, LUTv2, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_LUTV2), - HWCAP_CAP(ID_AA64SMFR0_EL1, SMEver, SME2p2, CAP_HWCAP, KERNEL_HWCAP_SME2P2), - HWCAP_CAP(ID_AA64SMFR0_EL1, SMEver, SME2p1, CAP_HWCAP, KERNEL_HWCAP_SME2P1), - HWCAP_CAP(ID_AA64SMFR0_EL1, SMEver, SME2, CAP_HWCAP, KERNEL_HWCAP_SME2), - HWCAP_CAP(ID_AA64SMFR0_EL1, I16I64, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_I16I64), - HWCAP_CAP(ID_AA64SMFR0_EL1, F64F64, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_F64F64), - HWCAP_CAP(ID_AA64SMFR0_EL1, I16I32, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_I16I32), - HWCAP_CAP(ID_AA64SMFR0_EL1, B16B16, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_B16B16), - HWCAP_CAP(ID_AA64SMFR0_EL1, F16F16, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_F16F16), - HWCAP_CAP(ID_AA64SMFR0_EL1, F8F16, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_F8F16), - HWCAP_CAP(ID_AA64SMFR0_EL1, F8F32, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_F8F32), - HWCAP_CAP(ID_AA64SMFR0_EL1, I8I32, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_I8I32), - HWCAP_CAP(ID_AA64SMFR0_EL1, F16F32, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_F16F32), - HWCAP_CAP(ID_AA64SMFR0_EL1, B16F32, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_B16F32), - HWCAP_CAP(ID_AA64SMFR0_EL1, BI32I32, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_BI32I32), - HWCAP_CAP(ID_AA64SMFR0_EL1, F32F32, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_F32F32), - HWCAP_CAP(ID_AA64SMFR0_EL1, SF8FMA, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_SF8FMA), - HWCAP_CAP(ID_AA64SMFR0_EL1, SF8DP4, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_SF8DP4), - HWCAP_CAP(ID_AA64SMFR0_EL1, SF8DP2, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_SF8DP2), - HWCAP_CAP(ID_AA64SMFR0_EL1, SBitPerm, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_SBITPERM), - HWCAP_CAP(ID_AA64SMFR0_EL1, AES, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_AES), - HWCAP_CAP(ID_AA64SMFR0_EL1, SFEXPA, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_SFEXPA), - HWCAP_CAP(ID_AA64SMFR0_EL1, STMOP, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_STMOP), - HWCAP_CAP(ID_AA64SMFR0_EL1, SMOP4, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_SMOP4), + HWCAP_CAP_MATCH_ID(has_sme_feature, ID_AA64SMFR0_EL1, FA64, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_FA64), + HWCAP_CAP_MATCH_ID(has_sme_feature, ID_AA64SMFR0_EL1, LUTv2, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_LUTV2), + HWCAP_CAP_MATCH_ID(has_sme_feature, ID_AA64SMFR0_EL1, SMEver, SME2p2, CAP_HWCAP, KERNEL_HWCAP_SME2P2), + HWCAP_CAP_MATCH_ID(has_sme_feature, ID_AA64SMFR0_EL1, SMEver, SME2p1, CAP_HWCAP, KERNEL_HWCAP_SME2P1), + HWCAP_CAP_MATCH_ID(has_sme_feature, ID_AA64SMFR0_EL1, SMEver, SME2, CAP_HWCAP, KERNEL_HWCAP_SME2), + HWCAP_CAP_MATCH_ID(has_sme_feature, ID_AA64SMFR0_EL1, I16I64, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_I16I64), + HWCAP_CAP_MATCH_ID(has_sme_feature, ID_AA64SMFR0_EL1, F64F64, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_F64F64), + HWCAP_CAP_MATCH_ID(has_sme_feature, ID_AA64SMFR0_EL1, I16I32, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_I16I32), + HWCAP_CAP_MATCH_ID(has_sme_feature, ID_AA64SMFR0_EL1, B16B16, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_B16B16), + HWCAP_CAP_MATCH_ID(has_sme_feature, ID_AA64SMFR0_EL1, F16F16, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_F16F16), + HWCAP_CAP_MATCH_ID(has_sme_feature, ID_AA64SMFR0_EL1, F8F16, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_F8F16), + HWCAP_CAP_MATCH_ID(has_sme_feature, ID_AA64SMFR0_EL1, F8F32, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_F8F32), + HWCAP_CAP_MATCH_ID(has_sme_feature, ID_AA64SMFR0_EL1, I8I32, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_I8I32), + HWCAP_CAP_MATCH_ID(has_sme_feature, ID_AA64SMFR0_EL1, F16F32, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_F16F32), + HWCAP_CAP_MATCH_ID(has_sme_feature, ID_AA64SMFR0_EL1, B16F32, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_B16F32), + HWCAP_CAP_MATCH_ID(has_sme_feature, ID_AA64SMFR0_EL1, BI32I32, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_BI32I32), + HWCAP_CAP_MATCH_ID(has_sme_feature, ID_AA64SMFR0_EL1, F32F32, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_F32F32), + HWCAP_CAP_MATCH_ID(has_sme_feature, ID_AA64SMFR0_EL1, SF8FMA, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_SF8FMA), + HWCAP_CAP_MATCH_ID(has_sme_feature, ID_AA64SMFR0_EL1, SF8DP4, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_SF8DP4), + HWCAP_CAP_MATCH_ID(has_sme_feature, ID_AA64SMFR0_EL1, SF8DP2, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_SF8DP2), + HWCAP_CAP_MATCH_ID(has_sme_feature, ID_AA64SMFR0_EL1, SBitPerm, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_SBITPERM), + HWCAP_CAP_MATCH_ID(has_sme_feature, ID_AA64SMFR0_EL1, AES, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_AES), + HWCAP_CAP_MATCH_ID(has_sme_feature, ID_AA64SMFR0_EL1, SFEXPA, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_SFEXPA), + HWCAP_CAP_MATCH_ID(has_sme_feature, ID_AA64SMFR0_EL1, STMOP, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_STMOP), + HWCAP_CAP_MATCH_ID(has_sme_feature, ID_AA64SMFR0_EL1, SMOP4, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_SMOP4), #endif /* CONFIG_ARM64_SME */ HWCAP_CAP(ID_AA64FPFR0_EL1, F8CVT, IMP, CAP_HWCAP, KERNEL_HWCAP_F8CVT), HWCAP_CAP(ID_AA64FPFR0_EL1, F8FMA, IMP, CAP_HWCAP, KERNEL_HWCAP_F8FMA), diff --git a/arch/arm64/kernel/efi.c b/arch/arm64/kernel/efi.c index 3857fd7ee8d4..62230d6dd919 100644 --- a/arch/arm64/kernel/efi.c +++ b/arch/arm64/kernel/efi.c @@ -15,6 +15,7 @@ #include <asm/efi.h> #include <asm/stacktrace.h> +#include <asm/vmap_stack.h> static bool region_is_misaligned(const efi_memory_desc_t *md) { @@ -214,9 +215,13 @@ static int __init arm64_efi_rt_init(void) if (!efi_enabled(EFI_RUNTIME_SERVICES)) return 0; - p = __vmalloc_node(THREAD_SIZE, THREAD_ALIGN, GFP_KERNEL, - NUMA_NO_NODE, &&l); -l: if (!p) { + if (!IS_ENABLED(CONFIG_VMAP_STACK)) { + clear_bit(EFI_RUNTIME_SERVICES, &efi.flags); + return -ENOMEM; + } + + p = arch_alloc_vmap_stack(THREAD_SIZE, NUMA_NO_NODE); + if (!p) { pr_warn("Failed to allocate EFI runtime stack\n"); clear_bit(EFI_RUNTIME_SERVICES, &efi.flags); return -ENOMEM; diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S index 5ae2a34b50bd..30dcb719685b 100644 --- a/arch/arm64/kernel/entry.S +++ b/arch/arm64/kernel/entry.S @@ -825,6 +825,7 @@ SYM_CODE_END(__bp_harden_el1_vectors) * */ SYM_FUNC_START(cpu_switch_to) + save_and_disable_daif x11 mov x10, #THREAD_CPU_CONTEXT add x8, x0, x10 mov x9, sp @@ -848,6 +849,7 @@ SYM_FUNC_START(cpu_switch_to) ptrauth_keys_install_kernel x1, x8, x9, x10 scs_save x0 scs_load_current + restore_irq x11 ret SYM_FUNC_END(cpu_switch_to) NOKPROBE(cpu_switch_to) @@ -874,6 +876,7 @@ NOKPROBE(ret_from_fork) * Calls func(regs) using this CPU's irq stack and shadow irq stack. */ SYM_FUNC_START(call_on_irq_stack) + save_and_disable_daif x9 #ifdef CONFIG_SHADOW_CALL_STACK get_current_task x16 scs_save x16 @@ -888,8 +891,10 @@ SYM_FUNC_START(call_on_irq_stack) /* Move to the new stack and call the function there */ add sp, x16, #IRQ_STACK_SIZE + restore_irq x9 blr x1 + save_and_disable_daif x9 /* * Restore the SP from the FP, and restore the FP and LR from the frame * record. @@ -897,6 +902,7 @@ SYM_FUNC_START(call_on_irq_stack) mov sp, x29 ldp x29, x30, [sp], #16 scs_load_current + restore_irq x9 ret SYM_FUNC_END(call_on_irq_stack) NOKPROBE(call_on_irq_stack) diff --git a/arch/arm64/kernel/process.c b/arch/arm64/kernel/process.c index 5954cec19660..08b7042a2e2d 100644 --- a/arch/arm64/kernel/process.c +++ b/arch/arm64/kernel/process.c @@ -673,6 +673,11 @@ static void permission_overlay_switch(struct task_struct *next) current->thread.por_el0 = read_sysreg_s(SYS_POR_EL0); if (current->thread.por_el0 != next->thread.por_el0) { write_sysreg_s(next->thread.por_el0, SYS_POR_EL0); + /* + * No ISB required as we can tolerate spurious Overlay faults - + * the fault handler will check again based on the new value + * of POR_EL0. + */ } } diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c index 3b3f6b56e733..21a795303568 100644 --- a/arch/arm64/kernel/smp.c +++ b/arch/arm64/kernel/smp.c @@ -1143,7 +1143,7 @@ static inline unsigned int num_other_online_cpus(void) void smp_send_stop(void) { static unsigned long stop_in_progress; - cpumask_t mask; + static cpumask_t mask; unsigned long timeout; /* diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c index 38a91bb5d4c7..23dd3f3fc3eb 100644 --- a/arch/arm64/kvm/arm.c +++ b/arch/arm64/kvm/arm.c @@ -825,10 +825,6 @@ int kvm_arch_vcpu_run_pid_change(struct kvm_vcpu *vcpu) if (!kvm_arm_vcpu_is_finalized(vcpu)) return -EPERM; - ret = kvm_arch_vcpu_run_map_fp(vcpu); - if (ret) - return ret; - if (likely(vcpu_has_run_once(vcpu))) return 0; @@ -2129,7 +2125,7 @@ static void cpu_hyp_init(void *discard) static void cpu_hyp_uninit(void *discard) { - if (__this_cpu_read(kvm_hyp_initialized)) { + if (!is_protected_kvm_enabled() && __this_cpu_read(kvm_hyp_initialized)) { cpu_hyp_reset(); __this_cpu_write(kvm_hyp_initialized, 0); } @@ -2345,8 +2341,13 @@ static void __init teardown_hyp_mode(void) free_hyp_pgds(); for_each_possible_cpu(cpu) { + if (per_cpu(kvm_hyp_initialized, cpu)) + continue; + free_pages(per_cpu(kvm_arm_hyp_stack_base, cpu), NVHE_STACK_SHIFT - PAGE_SHIFT); - free_pages(kvm_nvhe_sym(kvm_arm_hyp_percpu_base)[cpu], nvhe_percpu_order()); + + if (!kvm_nvhe_sym(kvm_arm_hyp_percpu_base)[cpu]) + continue; if (free_sve) { struct cpu_sve_state *sve_state; @@ -2354,6 +2355,9 @@ static void __init teardown_hyp_mode(void) sve_state = per_cpu_ptr_nvhe_sym(kvm_host_data, cpu)->sve_state; free_pages((unsigned long) sve_state, pkvm_host_sve_state_order()); } + + free_pages(kvm_nvhe_sym(kvm_arm_hyp_percpu_base)[cpu], nvhe_percpu_order()); + } } diff --git a/arch/arm64/kvm/fpsimd.c b/arch/arm64/kvm/fpsimd.c index 8f6c8f57c6b9..15e17aca1dec 100644 --- a/arch/arm64/kvm/fpsimd.c +++ b/arch/arm64/kvm/fpsimd.c @@ -15,32 +15,6 @@ #include <asm/sysreg.h> /* - * Called on entry to KVM_RUN unless this vcpu previously ran at least - * once and the most recent prior KVM_RUN for this vcpu was called from - * the same task as current (highly likely). - * - * This is guaranteed to execute before kvm_arch_vcpu_load_fp(vcpu), - * such that on entering hyp the relevant parts of current are already - * mapped. - */ -int kvm_arch_vcpu_run_map_fp(struct kvm_vcpu *vcpu) -{ - struct user_fpsimd_state *fpsimd = ¤t->thread.uw.fpsimd_state; - int ret; - - /* pKVM has its own tracking of the host fpsimd state. */ - if (is_protected_kvm_enabled()) - return 0; - - /* Make sure the host task fpsimd state is visible to hyp: */ - ret = kvm_share_hyp(fpsimd, fpsimd + 1); - if (ret) - return ret; - - return 0; -} - -/* * Prepare vcpu for saving the host's FPSIMD state and loading the guest's. * The actual loading is done by the FPSIMD access trap taken to hyp. * diff --git a/arch/arm64/kvm/hyp/nvhe/mem_protect.c b/arch/arm64/kvm/hyp/nvhe/mem_protect.c index 95d7534c9679..8957734d6183 100644 --- a/arch/arm64/kvm/hyp/nvhe/mem_protect.c +++ b/arch/arm64/kvm/hyp/nvhe/mem_protect.c @@ -479,6 +479,7 @@ static int host_stage2_adjust_range(u64 addr, struct kvm_mem_range *range) { struct kvm_mem_range cur; kvm_pte_t pte; + u64 granule; s8 level; int ret; @@ -496,18 +497,21 @@ static int host_stage2_adjust_range(u64 addr, struct kvm_mem_range *range) return -EPERM; } - do { - u64 granule = kvm_granule_size(level); + for (; level <= KVM_PGTABLE_LAST_LEVEL; level++) { + if (!kvm_level_supports_block_mapping(level)) + continue; + granule = kvm_granule_size(level); cur.start = ALIGN_DOWN(addr, granule); cur.end = cur.start + granule; - level++; - } while ((level <= KVM_PGTABLE_LAST_LEVEL) && - !(kvm_level_supports_block_mapping(level) && - range_included(&cur, range))); + if (!range_included(&cur, range)) + continue; + *range = cur; + return 0; + } - *range = cur; + WARN_ON(1); - return 0; + return -EINVAL; } int host_stage2_idmap_locked(phys_addr_t addr, u64 size, diff --git a/arch/arm64/kvm/nested.c b/arch/arm64/kvm/nested.c index 5b191f4dc566..dc1d26559bfa 100644 --- a/arch/arm64/kvm/nested.c +++ b/arch/arm64/kvm/nested.c @@ -1402,6 +1402,21 @@ static void kvm_map_l1_vncr(struct kvm_vcpu *vcpu) } } +#define has_tgran_2(__r, __sz) \ + ({ \ + u64 _s1, _s2, _mmfr0 = __r; \ + \ + _s2 = SYS_FIELD_GET(ID_AA64MMFR0_EL1, \ + TGRAN##__sz##_2, _mmfr0); \ + \ + _s1 = SYS_FIELD_GET(ID_AA64MMFR0_EL1, \ + TGRAN##__sz, _mmfr0); \ + \ + ((_s2 != ID_AA64MMFR0_EL1_TGRAN##__sz##_2_NI && \ + _s2 != ID_AA64MMFR0_EL1_TGRAN##__sz##_2_TGRAN##__sz) || \ + (_s2 == ID_AA64MMFR0_EL1_TGRAN##__sz##_2_TGRAN##__sz && \ + _s1 != ID_AA64MMFR0_EL1_TGRAN##__sz##_NI)); \ + }) /* * Our emulated CPU doesn't support all the possible features. For the * sake of simplicity (and probably mental sanity), wipe out a number @@ -1411,6 +1426,8 @@ static void kvm_map_l1_vncr(struct kvm_vcpu *vcpu) */ u64 limit_nv_id_reg(struct kvm *kvm, u32 reg, u64 val) { + u64 orig_val = val; + switch (reg) { case SYS_ID_AA64ISAR0_EL1: /* Support everything but TME */ @@ -1480,13 +1497,16 @@ u64 limit_nv_id_reg(struct kvm *kvm, u32 reg, u64 val) */ switch (PAGE_SIZE) { case SZ_4K: - val |= SYS_FIELD_PREP_ENUM(ID_AA64MMFR0_EL1, TGRAN4_2, IMP); + if (has_tgran_2(orig_val, 4)) + val |= SYS_FIELD_PREP_ENUM(ID_AA64MMFR0_EL1, TGRAN4_2, IMP); fallthrough; case SZ_16K: - val |= SYS_FIELD_PREP_ENUM(ID_AA64MMFR0_EL1, TGRAN16_2, IMP); + if (has_tgran_2(orig_val, 16)) + val |= SYS_FIELD_PREP_ENUM(ID_AA64MMFR0_EL1, TGRAN16_2, IMP); fallthrough; case SZ_64K: - val |= SYS_FIELD_PREP_ENUM(ID_AA64MMFR0_EL1, TGRAN64_2, IMP); + if (has_tgran_2(orig_val, 64)) + val |= SYS_FIELD_PREP_ENUM(ID_AA64MMFR0_EL1, TGRAN64_2, IMP); break; } diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c index 76c2f0da821f..c20bd6f21e60 100644 --- a/arch/arm64/kvm/sys_regs.c +++ b/arch/arm64/kvm/sys_regs.c @@ -2624,7 +2624,7 @@ static bool access_mdcr(struct kvm_vcpu *vcpu, */ if (hpmn > vcpu->kvm->arch.nr_pmu_counters) { hpmn = vcpu->kvm->arch.nr_pmu_counters; - u64_replace_bits(val, hpmn, MDCR_EL2_HPMN); + u64p_replace_bits(&val, hpmn, MDCR_EL2_HPMN); } __vcpu_assign_sys_reg(vcpu, MDCR_EL2, val); diff --git a/arch/arm64/kvm/vgic/vgic-v3-nested.c b/arch/arm64/kvm/vgic/vgic-v3-nested.c index a50fb7e6841f..679aafe77de2 100644 --- a/arch/arm64/kvm/vgic/vgic-v3-nested.c +++ b/arch/arm64/kvm/vgic/vgic-v3-nested.c @@ -401,9 +401,7 @@ void vgic_v3_nested_update_mi(struct kvm_vcpu *vcpu) { bool level; - level = __vcpu_sys_reg(vcpu, ICH_HCR_EL2) & ICH_HCR_EL2_En; - if (level) - level &= vgic_v3_get_misr(vcpu); + level = (__vcpu_sys_reg(vcpu, ICH_HCR_EL2) & ICH_HCR_EL2_En) && vgic_v3_get_misr(vcpu); kvm_vgic_inject_irq(vcpu->kvm, vcpu, vcpu->kvm->arch.vgic.mi_intid, level, vcpu); } diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c index ec0a337891dd..11eb8d1adc84 100644 --- a/arch/arm64/mm/fault.c +++ b/arch/arm64/mm/fault.c @@ -487,17 +487,29 @@ static void do_bad_area(unsigned long far, unsigned long esr, } } -static bool fault_from_pkey(unsigned long esr, struct vm_area_struct *vma, - unsigned int mm_flags) +static bool fault_from_pkey(struct vm_area_struct *vma, unsigned int mm_flags) { - unsigned long iss2 = ESR_ELx_ISS2(esr); - if (!system_supports_poe()) return false; - if (esr_fsc_is_permission_fault(esr) && (iss2 & ESR_ELx_Overlay)) - return true; - + /* + * We do not check whether an Overlay fault has occurred because we + * cannot make a decision based solely on its value: + * + * - If Overlay is set, a fault did occur due to POE, but it may be + * spurious in those cases where we update POR_EL0 without ISB (e.g. + * on context-switch). We would then need to manually check POR_EL0 + * against vma_pkey(vma), which is exactly what + * arch_vma_access_permitted() does. + * + * - If Overlay is not set, we may still need to report a pkey fault. + * This is the case if an access was made within a mapping but with no + * page mapped, and POR_EL0 forbids the access (according to + * vma_pkey()). Such access will result in a SIGSEGV regardless + * because core code checks arch_vma_access_permitted(), but in order + * to report the correct error code - SEGV_PKUERR - we must handle + * that case here. + */ return !arch_vma_access_permitted(vma, mm_flags & FAULT_FLAG_WRITE, mm_flags & FAULT_FLAG_INSTRUCTION, @@ -635,7 +647,7 @@ static int __kprobes do_page_fault(unsigned long far, unsigned long esr, goto bad_area; } - if (fault_from_pkey(esr, vma, mm_flags)) { + if (fault_from_pkey(vma, mm_flags)) { pkey = vma_pkey(vma); vma_end_read(vma); fault = 0; @@ -679,7 +691,7 @@ retry: goto bad_area; } - if (fault_from_pkey(esr, vma, mm_flags)) { + if (fault_from_pkey(vma, mm_flags)) { pkey = vma_pkey(vma); mmap_read_unlock(mm); fault = 0; diff --git a/arch/arm64/mm/proc.S b/arch/arm64/mm/proc.S index 80d470aa469d..54dccfd6aa11 100644 --- a/arch/arm64/mm/proc.S +++ b/arch/arm64/mm/proc.S @@ -518,7 +518,6 @@ alternative_else_nop_endif msr REG_PIR_EL1, x0 orr tcr2, tcr2, TCR2_EL1_PIE - msr REG_TCR2_EL1, x0 .Lskip_indirection: diff --git a/arch/loongarch/include/asm/Kbuild b/arch/loongarch/include/asm/Kbuild index 80ddb5edb845..b04d2cef935f 100644 --- a/arch/loongarch/include/asm/Kbuild +++ b/arch/loongarch/include/asm/Kbuild @@ -10,5 +10,4 @@ generic-y += user.h generic-y += ioctl.h generic-y += mmzone.h generic-y += statfs.h -generic-y += param.h generic-y += text-patching.h diff --git a/arch/powerpc/platforms/cell/spufs/inode.c b/arch/powerpc/platforms/cell/spufs/inode.c index 9f9e4b871627..7ec60290abe6 100644 --- a/arch/powerpc/platforms/cell/spufs/inode.c +++ b/arch/powerpc/platforms/cell/spufs/inode.c @@ -143,42 +143,13 @@ spufs_evict_inode(struct inode *inode) put_spu_gang(ei->i_gang); } -static void spufs_prune_dir(struct dentry *dir) -{ - struct dentry *dentry; - struct hlist_node *n; - - inode_lock(d_inode(dir)); - hlist_for_each_entry_safe(dentry, n, &dir->d_children, d_sib) { - spin_lock(&dentry->d_lock); - if (simple_positive(dentry)) { - dget_dlock(dentry); - __d_drop(dentry); - spin_unlock(&dentry->d_lock); - simple_unlink(d_inode(dir), dentry); - /* XXX: what was dcache_lock protecting here? Other - * filesystems (IB, configfs) release dcache_lock - * before unlink */ - dput(dentry); - } else { - spin_unlock(&dentry->d_lock); - } - } - shrink_dcache_parent(dir); - inode_unlock(d_inode(dir)); -} - /* Caller must hold parent->i_mutex */ -static int spufs_rmdir(struct inode *parent, struct dentry *dir) +static void spufs_rmdir(struct inode *parent, struct dentry *dir) { - /* remove all entries */ - int res; - spufs_prune_dir(dir); - d_drop(dir); - res = simple_rmdir(parent, dir); - /* We have to give up the mm_struct */ - spu_forget(SPUFS_I(d_inode(dir))->i_ctx); - return res; + struct spu_context *ctx = SPUFS_I(d_inode(dir))->i_ctx; + + locked_recursive_removal(dir, NULL); + spu_forget(ctx); } static int spufs_fill_dir(struct dentry *dir, @@ -222,15 +193,13 @@ static int spufs_dir_close(struct inode *inode, struct file *file) { struct inode *parent; struct dentry *dir; - int ret; dir = file->f_path.dentry; parent = d_inode(dir->d_parent); inode_lock_nested(parent, I_MUTEX_PARENT); - ret = spufs_rmdir(parent, dir); + spufs_rmdir(parent, dir); inode_unlock(parent); - WARN_ON(ret); unuse_gang(dir->d_parent); return dcache_dir_close(inode, file); @@ -288,11 +257,11 @@ spufs_mkdir(struct inode *dir, struct dentry *dentry, unsigned int flags, ret = spufs_fill_dir(dentry, spufs_dir_debug_contents, mode, ctx); + inode_unlock(inode); + if (ret) spufs_rmdir(dir, dentry); - inode_unlock(inode); - return ret; } @@ -475,7 +444,7 @@ spufs_create_context(struct inode *inode, struct dentry *dentry, ret = spufs_context_open(&path); if (ret < 0) - WARN_ON(spufs_rmdir(inode, dentry)); + spufs_rmdir(inode, dentry); out_aff_unlock: if (affinity) diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig index d71ea0f4466f..1c5544401530 100644 --- a/arch/riscv/Kconfig +++ b/arch/riscv/Kconfig @@ -98,6 +98,7 @@ config RISCV select CLONE_BACKWARDS select COMMON_CLK select CPU_PM if CPU_IDLE || HIBERNATION || SUSPEND + select DYNAMIC_FTRACE if FUNCTION_TRACER select EDAC_SUPPORT select FRAME_POINTER if PERF_EVENTS || (FUNCTION_TRACER && !DYNAMIC_FTRACE) select FTRACE_MCOUNT_USE_PATCHABLE_FUNCTION_ENTRY if DYNAMIC_FTRACE @@ -162,7 +163,7 @@ config RISCV select HAVE_FTRACE_MCOUNT_RECORD if !XIP_KERNEL select HAVE_FUNCTION_GRAPH_TRACER if HAVE_DYNAMIC_FTRACE_WITH_ARGS select HAVE_FUNCTION_GRAPH_FREGS - select HAVE_FUNCTION_TRACER if !XIP_KERNEL + select HAVE_FUNCTION_TRACER if !XIP_KERNEL && HAVE_DYNAMIC_FTRACE select HAVE_EBPF_JIT if MMU select HAVE_GUP_FAST if MMU select HAVE_FUNCTION_ARG_ACCESS_API diff --git a/arch/riscv/include/asm/kvm_aia.h b/arch/riscv/include/asm/kvm_aia.h index 3b643b9efc07..5acce285e56e 100644 --- a/arch/riscv/include/asm/kvm_aia.h +++ b/arch/riscv/include/asm/kvm_aia.h @@ -87,6 +87,9 @@ DECLARE_STATIC_KEY_FALSE(kvm_riscv_aia_available); extern struct kvm_device_ops kvm_riscv_aia_device_ops; +bool kvm_riscv_vcpu_aia_imsic_has_interrupt(struct kvm_vcpu *vcpu); +void kvm_riscv_vcpu_aia_imsic_load(struct kvm_vcpu *vcpu, int cpu); +void kvm_riscv_vcpu_aia_imsic_put(struct kvm_vcpu *vcpu); void kvm_riscv_vcpu_aia_imsic_release(struct kvm_vcpu *vcpu); int kvm_riscv_vcpu_aia_imsic_update(struct kvm_vcpu *vcpu); @@ -161,7 +164,6 @@ void kvm_riscv_aia_destroy_vm(struct kvm *kvm); int kvm_riscv_aia_alloc_hgei(int cpu, struct kvm_vcpu *owner, void __iomem **hgei_va, phys_addr_t *hgei_pa); void kvm_riscv_aia_free_hgei(int cpu, int hgei); -void kvm_riscv_aia_wakeon_hgei(struct kvm_vcpu *owner, bool enable); void kvm_riscv_aia_enable(void); void kvm_riscv_aia_disable(void); diff --git a/arch/riscv/include/asm/kvm_host.h b/arch/riscv/include/asm/kvm_host.h index 85cfebc32e4c..bcbf8b1ec115 100644 --- a/arch/riscv/include/asm/kvm_host.h +++ b/arch/riscv/include/asm/kvm_host.h @@ -306,6 +306,9 @@ static inline bool kvm_arch_pmi_in_guest(struct kvm_vcpu *vcpu) return IS_ENABLED(CONFIG_GUEST_PERF_EVENTS) && !!vcpu; } +static inline void kvm_arch_vcpu_blocking(struct kvm_vcpu *vcpu) {} +static inline void kvm_arch_vcpu_unblocking(struct kvm_vcpu *vcpu) {} + #define KVM_RISCV_GSTAGE_TLB_MIN_ORDER 12 void kvm_riscv_local_hfence_gvma_vmid_gpa(unsigned long vmid, diff --git a/arch/riscv/include/asm/uaccess.h b/arch/riscv/include/asm/uaccess.h index 525e50db24f7..b88a6218b7f2 100644 --- a/arch/riscv/include/asm/uaccess.h +++ b/arch/riscv/include/asm/uaccess.h @@ -311,8 +311,8 @@ do { \ do { \ if (!IS_ENABLED(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) && \ !IS_ALIGNED((uintptr_t)__gu_ptr, sizeof(*__gu_ptr))) { \ - __inttype(x) val = (__inttype(x))x; \ - if (__asm_copy_to_user_sum_enabled(__gu_ptr, &(val), sizeof(*__gu_ptr))) \ + __inttype(x) ___val = (__inttype(x))x; \ + if (__asm_copy_to_user_sum_enabled(__gu_ptr, &(___val), sizeof(*__gu_ptr))) \ goto label; \ break; \ } \ diff --git a/arch/riscv/kernel/ftrace.c b/arch/riscv/kernel/ftrace.c index 4c6c24380cfd..8d18d6727f0f 100644 --- a/arch/riscv/kernel/ftrace.c +++ b/arch/riscv/kernel/ftrace.c @@ -14,6 +14,18 @@ #include <asm/text-patching.h> #ifdef CONFIG_DYNAMIC_FTRACE +void ftrace_arch_code_modify_prepare(void) + __acquires(&text_mutex) +{ + mutex_lock(&text_mutex); +} + +void ftrace_arch_code_modify_post_process(void) + __releases(&text_mutex) +{ + mutex_unlock(&text_mutex); +} + unsigned long ftrace_call_adjust(unsigned long addr) { if (IS_ENABLED(CONFIG_DYNAMIC_FTRACE_WITH_CALL_OPS)) @@ -29,10 +41,8 @@ unsigned long arch_ftrace_get_symaddr(unsigned long fentry_ip) void arch_ftrace_update_code(int command) { - mutex_lock(&text_mutex); command |= FTRACE_MAY_SLEEP; ftrace_modify_all_code(command); - mutex_unlock(&text_mutex); flush_icache_all(); } @@ -149,6 +159,8 @@ int ftrace_init_nop(struct module *mod, struct dyn_ftrace *rec) unsigned int nops[2], offset; int ret; + guard(mutex)(&text_mutex); + ret = ftrace_rec_set_nop_ops(rec); if (ret) return ret; @@ -157,9 +169,7 @@ int ftrace_init_nop(struct module *mod, struct dyn_ftrace *rec) nops[0] = to_auipc_t0(offset); nops[1] = RISCV_INSN_NOP4; - mutex_lock(&text_mutex); ret = patch_insn_write((void *)pc, nops, 2 * MCOUNT_INSN_SIZE); - mutex_unlock(&text_mutex); return ret; } diff --git a/arch/riscv/kernel/traps.c b/arch/riscv/kernel/traps.c index 9c83848797a7..80230de167de 100644 --- a/arch/riscv/kernel/traps.c +++ b/arch/riscv/kernel/traps.c @@ -6,6 +6,7 @@ #include <linux/cpu.h> #include <linux/kernel.h> #include <linux/init.h> +#include <linux/irqflags.h> #include <linux/randomize_kstack.h> #include <linux/sched.h> #include <linux/sched/debug.h> @@ -151,7 +152,9 @@ asmlinkage __visible __trap_section void name(struct pt_regs *regs) \ { \ if (user_mode(regs)) { \ irqentry_enter_from_user_mode(regs); \ + local_irq_enable(); \ do_trap_error(regs, signo, code, regs->epc, "Oops - " str); \ + local_irq_disable(); \ irqentry_exit_to_user_mode(regs); \ } else { \ irqentry_state_t state = irqentry_nmi_enter(regs); \ @@ -173,17 +176,14 @@ asmlinkage __visible __trap_section void do_trap_insn_illegal(struct pt_regs *re if (user_mode(regs)) { irqentry_enter_from_user_mode(regs); - local_irq_enable(); handled = riscv_v_first_use_handler(regs); - - local_irq_disable(); - if (!handled) do_trap_error(regs, SIGILL, ILL_ILLOPC, regs->epc, "Oops - illegal instruction"); + local_irq_disable(); irqentry_exit_to_user_mode(regs); } else { irqentry_state_t state = irqentry_nmi_enter(regs); @@ -308,9 +308,11 @@ asmlinkage __visible __trap_section void do_trap_break(struct pt_regs *regs) { if (user_mode(regs)) { irqentry_enter_from_user_mode(regs); + local_irq_enable(); handle_break(regs); + local_irq_disable(); irqentry_exit_to_user_mode(regs); } else { irqentry_state_t state = irqentry_nmi_enter(regs); diff --git a/arch/riscv/kernel/traps_misaligned.c b/arch/riscv/kernel/traps_misaligned.c index 93043924fe6c..f760e4fcc052 100644 --- a/arch/riscv/kernel/traps_misaligned.c +++ b/arch/riscv/kernel/traps_misaligned.c @@ -461,7 +461,7 @@ static int handle_scalar_misaligned_load(struct pt_regs *regs) } if (!fp) - SET_RD(insn, regs, val.data_ulong << shift >> shift); + SET_RD(insn, regs, (long)(val.data_ulong << shift) >> shift); else if (len == 8) set_f64_rd(insn, regs, val.data_u64); else diff --git a/arch/riscv/kvm/aia.c b/arch/riscv/kvm/aia.c index 19afd1f23537..dad318185660 100644 --- a/arch/riscv/kvm/aia.c +++ b/arch/riscv/kvm/aia.c @@ -30,28 +30,6 @@ unsigned int kvm_riscv_aia_nr_hgei; unsigned int kvm_riscv_aia_max_ids; DEFINE_STATIC_KEY_FALSE(kvm_riscv_aia_available); -static int aia_find_hgei(struct kvm_vcpu *owner) -{ - int i, hgei; - unsigned long flags; - struct aia_hgei_control *hgctrl = get_cpu_ptr(&aia_hgei); - - raw_spin_lock_irqsave(&hgctrl->lock, flags); - - hgei = -1; - for (i = 1; i <= kvm_riscv_aia_nr_hgei; i++) { - if (hgctrl->owners[i] == owner) { - hgei = i; - break; - } - } - - raw_spin_unlock_irqrestore(&hgctrl->lock, flags); - - put_cpu_ptr(&aia_hgei); - return hgei; -} - static inline unsigned long aia_hvictl_value(bool ext_irq_pending) { unsigned long hvictl; @@ -95,7 +73,6 @@ void kvm_riscv_vcpu_aia_sync_interrupts(struct kvm_vcpu *vcpu) bool kvm_riscv_vcpu_aia_has_interrupts(struct kvm_vcpu *vcpu, u64 mask) { - int hgei; unsigned long seip; if (!kvm_riscv_aia_available()) @@ -114,11 +91,7 @@ bool kvm_riscv_vcpu_aia_has_interrupts(struct kvm_vcpu *vcpu, u64 mask) if (!kvm_riscv_aia_initialized(vcpu->kvm) || !seip) return false; - hgei = aia_find_hgei(vcpu); - if (hgei > 0) - return !!(ncsr_read(CSR_HGEIP) & BIT(hgei)); - - return false; + return kvm_riscv_vcpu_aia_imsic_has_interrupt(vcpu); } void kvm_riscv_vcpu_aia_update_hvip(struct kvm_vcpu *vcpu) @@ -164,6 +137,9 @@ void kvm_riscv_vcpu_aia_load(struct kvm_vcpu *vcpu, int cpu) csr_write(CSR_HVIPRIO2H, csr->hviprio2h); #endif } + + if (kvm_riscv_aia_initialized(vcpu->kvm)) + kvm_riscv_vcpu_aia_imsic_load(vcpu, cpu); } void kvm_riscv_vcpu_aia_put(struct kvm_vcpu *vcpu) @@ -174,6 +150,9 @@ void kvm_riscv_vcpu_aia_put(struct kvm_vcpu *vcpu) if (!kvm_riscv_aia_available()) return; + if (kvm_riscv_aia_initialized(vcpu->kvm)) + kvm_riscv_vcpu_aia_imsic_put(vcpu); + if (kvm_riscv_nacl_available()) { nsh = nacl_shmem(); csr->vsiselect = nacl_csr_read(nsh, CSR_VSISELECT); @@ -472,22 +451,6 @@ void kvm_riscv_aia_free_hgei(int cpu, int hgei) raw_spin_unlock_irqrestore(&hgctrl->lock, flags); } -void kvm_riscv_aia_wakeon_hgei(struct kvm_vcpu *owner, bool enable) -{ - int hgei; - - if (!kvm_riscv_aia_available()) - return; - - hgei = aia_find_hgei(owner); - if (hgei > 0) { - if (enable) - csr_set(CSR_HGEIE, BIT(hgei)); - else - csr_clear(CSR_HGEIE, BIT(hgei)); - } -} - static irqreturn_t hgei_interrupt(int irq, void *dev_id) { int i; diff --git a/arch/riscv/kvm/aia_imsic.c b/arch/riscv/kvm/aia_imsic.c index 29ef9c2133a9..2ff865943ebb 100644 --- a/arch/riscv/kvm/aia_imsic.c +++ b/arch/riscv/kvm/aia_imsic.c @@ -676,6 +676,48 @@ static void imsic_swfile_update(struct kvm_vcpu *vcpu, imsic_swfile_extirq_update(vcpu); } +bool kvm_riscv_vcpu_aia_imsic_has_interrupt(struct kvm_vcpu *vcpu) +{ + struct imsic *imsic = vcpu->arch.aia_context.imsic_state; + unsigned long flags; + bool ret = false; + + /* + * The IMSIC SW-file directly injects interrupt via hvip so + * only check for interrupt when IMSIC VS-file is being used. + */ + + read_lock_irqsave(&imsic->vsfile_lock, flags); + if (imsic->vsfile_cpu > -1) + ret = !!(csr_read(CSR_HGEIP) & BIT(imsic->vsfile_hgei)); + read_unlock_irqrestore(&imsic->vsfile_lock, flags); + + return ret; +} + +void kvm_riscv_vcpu_aia_imsic_load(struct kvm_vcpu *vcpu, int cpu) +{ + /* + * No need to explicitly clear HGEIE CSR bits because the + * hgei interrupt handler (aka hgei_interrupt()) will always + * clear it for us. + */ +} + +void kvm_riscv_vcpu_aia_imsic_put(struct kvm_vcpu *vcpu) +{ + struct imsic *imsic = vcpu->arch.aia_context.imsic_state; + unsigned long flags; + + if (!kvm_vcpu_is_blocking(vcpu)) + return; + + read_lock_irqsave(&imsic->vsfile_lock, flags); + if (imsic->vsfile_cpu > -1) + csr_set(CSR_HGEIE, BIT(imsic->vsfile_hgei)); + read_unlock_irqrestore(&imsic->vsfile_lock, flags); +} + void kvm_riscv_vcpu_aia_imsic_release(struct kvm_vcpu *vcpu) { unsigned long flags; @@ -781,6 +823,9 @@ int kvm_riscv_vcpu_aia_imsic_update(struct kvm_vcpu *vcpu) * producers to the new IMSIC VS-file. */ + /* Ensure HGEIE CSR bit is zero before using the new IMSIC VS-file */ + csr_clear(CSR_HGEIE, BIT(new_vsfile_hgei)); + /* Zero-out new IMSIC VS-file */ imsic_vsfile_local_clear(new_vsfile_hgei, imsic->nr_hw_eix); diff --git a/arch/riscv/kvm/vcpu.c b/arch/riscv/kvm/vcpu.c index e0a01af426ff..0462863206ca 100644 --- a/arch/riscv/kvm/vcpu.c +++ b/arch/riscv/kvm/vcpu.c @@ -207,16 +207,6 @@ int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu) return kvm_riscv_vcpu_timer_pending(vcpu); } -void kvm_arch_vcpu_blocking(struct kvm_vcpu *vcpu) -{ - kvm_riscv_aia_wakeon_hgei(vcpu, true); -} - -void kvm_arch_vcpu_unblocking(struct kvm_vcpu *vcpu) -{ - kvm_riscv_aia_wakeon_hgei(vcpu, false); -} - int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu) { return (kvm_riscv_vcpu_has_interrupts(vcpu, -1UL) && diff --git a/arch/riscv/kvm/vcpu_timer.c b/arch/riscv/kvm/vcpu_timer.c index ff672fa71fcc..85a7262115e1 100644 --- a/arch/riscv/kvm/vcpu_timer.c +++ b/arch/riscv/kvm/vcpu_timer.c @@ -345,8 +345,24 @@ void kvm_riscv_vcpu_timer_save(struct kvm_vcpu *vcpu) /* * The vstimecmp CSRs are saved by kvm_riscv_vcpu_timer_sync() * upon every VM exit so no need to save here. + * + * If VS-timer expires when no VCPU running on a host CPU then + * WFI executed by such host CPU will be effective NOP resulting + * in no power savings. This is because as-per RISC-V Privileged + * specificaiton: "WFI is also required to resume execution for + * locally enabled interrupts pending at any privilege level, + * regardless of the global interrupt enable at each privilege + * level." + * + * To address the above issue, vstimecmp CSR must be set to -1UL + * over here when VCPU is scheduled-out or exits to user space. */ + csr_write(CSR_VSTIMECMP, -1UL); +#if defined(CONFIG_32BIT) + csr_write(CSR_VSTIMECMPH, -1UL); +#endif + /* timer should be enabled for the remaining operations */ if (unlikely(!t->init_done)) return; diff --git a/arch/riscv/tools/relocs_check.sh b/arch/riscv/tools/relocs_check.sh index baeb2e7b2290..742993e6a8cb 100755 --- a/arch/riscv/tools/relocs_check.sh +++ b/arch/riscv/tools/relocs_check.sh @@ -14,7 +14,9 @@ bad_relocs=$( ${srctree}/scripts/relocs_check.sh "$@" | # These relocations are okay # R_RISCV_RELATIVE - grep -F -w -v 'R_RISCV_RELATIVE' + # R_RISCV_NONE + grep -F -w -v 'R_RISCV_RELATIVE +R_RISCV_NONE' ) if [ -z "$bad_relocs" ]; then diff --git a/arch/s390/crypto/sha1_s390.c b/arch/s390/crypto/sha1_s390.c index d229cbd2ba22..9b0d55be1239 100644 --- a/arch/s390/crypto/sha1_s390.c +++ b/arch/s390/crypto/sha1_s390.c @@ -38,6 +38,7 @@ static int s390_sha1_init(struct shash_desc *desc) sctx->state[4] = SHA1_H4; sctx->count = 0; sctx->func = CPACF_KIMD_SHA_1; + sctx->first_message_part = 0; return 0; } @@ -60,6 +61,7 @@ static int s390_sha1_import(struct shash_desc *desc, const void *in) sctx->count = ictx->count; memcpy(sctx->state, ictx->state, sizeof(ictx->state)); sctx->func = CPACF_KIMD_SHA_1; + sctx->first_message_part = 0; return 0; } diff --git a/arch/s390/crypto/sha512_s390.c b/arch/s390/crypto/sha512_s390.c index 33711a29618c..6cbbf5e8555f 100644 --- a/arch/s390/crypto/sha512_s390.c +++ b/arch/s390/crypto/sha512_s390.c @@ -32,6 +32,7 @@ static int sha512_init(struct shash_desc *desc) ctx->count = 0; ctx->sha512.count_hi = 0; ctx->func = CPACF_KIMD_SHA_512; + ctx->first_message_part = 0; return 0; } @@ -57,6 +58,7 @@ static int sha512_import(struct shash_desc *desc, const void *in) memcpy(sctx->state, ictx->state, sizeof(ictx->state)); sctx->func = CPACF_KIMD_SHA_512; + sctx->first_message_part = 0; return 0; } @@ -97,6 +99,7 @@ static int sha384_init(struct shash_desc *desc) ctx->count = 0; ctx->sha512.count_hi = 0; ctx->func = CPACF_KIMD_SHA_512; + ctx->first_message_part = 0; return 0; } diff --git a/arch/s390/net/bpf_jit_comp.c b/arch/s390/net/bpf_jit_comp.c index c7f8313ba449..0c9a35782c83 100644 --- a/arch/s390/net/bpf_jit_comp.c +++ b/arch/s390/net/bpf_jit_comp.c @@ -566,7 +566,15 @@ static void bpf_jit_plt(struct bpf_plt *plt, void *ret, void *target) { memcpy(plt, &bpf_plt, sizeof(*plt)); plt->ret = ret; - plt->target = target; + /* + * (target == NULL) implies that the branch to this PLT entry was + * patched and became a no-op. However, some CPU could have jumped + * to this PLT entry before patching and may be still executing it. + * + * Since the intention in this case is to make the PLT entry a no-op, + * make the target point to the return label instead of NULL. + */ + plt->target = target ?: ret; } /* diff --git a/arch/um/include/asm/Kbuild b/arch/um/include/asm/Kbuild index 04ab3b653a48..b6810db24ca4 100644 --- a/arch/um/include/asm/Kbuild +++ b/arch/um/include/asm/Kbuild @@ -15,7 +15,6 @@ generic-y += mcs_spinlock.h generic-y += mmiowb.h generic-y += module.h generic-y += module.lds.h -generic-y += param.h generic-y += parport.h generic-y += percpu.h generic-y += preempt.h diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 71019b3b54ea..8bed9030ad47 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -147,7 +147,7 @@ config X86 select ARCH_WANTS_DYNAMIC_TASK_STRUCT select ARCH_WANTS_NO_INSTR select ARCH_WANT_GENERAL_HUGETLB - select ARCH_WANT_HUGE_PMD_SHARE + select ARCH_WANT_HUGE_PMD_SHARE if X86_64 select ARCH_WANT_LD_ORPHAN_WARN select ARCH_WANT_OPTIMIZE_DAX_VMEMMAP if X86_64 select ARCH_WANT_OPTIMIZE_HUGETLB_VMEMMAP if X86_64 @@ -2695,6 +2695,15 @@ config MITIGATION_ITS disabled, mitigation cannot be enabled via cmdline. See <file:Documentation/admin-guide/hw-vuln/indirect-target-selection.rst> +config MITIGATION_TSA + bool "Mitigate Transient Scheduler Attacks" + depends on CPU_SUP_AMD + default y + help + Enable mitigation for Transient Scheduler Attacks. TSA is a hardware + security vulnerability on AMD CPUs which can lead to forwarding of + invalid info to subsequent instructions and thus can affect their + timing and thereby cause a leakage. endif config ARCH_HAS_ADD_PAGES diff --git a/arch/x86/coco/sev/Makefile b/arch/x86/coco/sev/Makefile index db3255b979bd..342d79f0ab6a 100644 --- a/arch/x86/coco/sev/Makefile +++ b/arch/x86/coco/sev/Makefile @@ -5,5 +5,6 @@ obj-y += core.o sev-nmi.o vc-handle.o # Clang 14 and older may fail to respect __no_sanitize_undefined when inlining UBSAN_SANITIZE_sev-nmi.o := n -# GCC may fail to respect __no_sanitize_address when inlining +# GCC may fail to respect __no_sanitize_address or __no_kcsan when inlining KASAN_SANITIZE_sev-nmi.o := n +KCSAN_SANITIZE_sev-nmi.o := n diff --git a/arch/x86/entry/entry.S b/arch/x86/entry/entry.S index 175958b02f2b..8e9a0cc20a4a 100644 --- a/arch/x86/entry/entry.S +++ b/arch/x86/entry/entry.S @@ -36,20 +36,20 @@ EXPORT_SYMBOL_GPL(write_ibpb); /* * Define the VERW operand that is disguised as entry code so that - * it can be referenced with KPTI enabled. This ensure VERW can be + * it can be referenced with KPTI enabled. This ensures VERW can be * used late in exit-to-user path after page tables are switched. */ .pushsection .entry.text, "ax" .align L1_CACHE_BYTES, 0xcc -SYM_CODE_START_NOALIGN(mds_verw_sel) +SYM_CODE_START_NOALIGN(x86_verw_sel) UNWIND_HINT_UNDEFINED ANNOTATE_NOENDBR .word __KERNEL_DS .align L1_CACHE_BYTES, 0xcc -SYM_CODE_END(mds_verw_sel); +SYM_CODE_END(x86_verw_sel); /* For KVM */ -EXPORT_SYMBOL_GPL(mds_verw_sel); +EXPORT_SYMBOL_GPL(x86_verw_sel); .popsection diff --git a/arch/x86/hyperv/hv_init.c b/arch/x86/hyperv/hv_init.c index 3d1d3547095a..afdbda2dd7b7 100644 --- a/arch/x86/hyperv/hv_init.c +++ b/arch/x86/hyperv/hv_init.c @@ -34,6 +34,7 @@ #include <linux/syscore_ops.h> #include <clocksource/hyperv_timer.h> #include <linux/highmem.h> +#include <linux/export.h> void *hv_hypercall_pg; EXPORT_SYMBOL_GPL(hv_hypercall_pg); diff --git a/arch/x86/hyperv/irqdomain.c b/arch/x86/hyperv/irqdomain.c index 31f0d29cbc5e..090f5ac9f492 100644 --- a/arch/x86/hyperv/irqdomain.c +++ b/arch/x86/hyperv/irqdomain.c @@ -10,6 +10,7 @@ #include <linux/pci.h> #include <linux/irq.h> +#include <linux/export.h> #include <asm/mshyperv.h> static int hv_map_interrupt(union hv_device_id device_id, bool level, @@ -46,7 +47,7 @@ static int hv_map_interrupt(union hv_device_id device_id, bool level, if (nr_bank < 0) { local_irq_restore(flags); pr_err("%s: unable to generate VP set\n", __func__); - return EINVAL; + return -EINVAL; } intr_desc->target.flags = HV_DEVICE_INTERRUPT_TARGET_PROCESSOR_SET; @@ -66,7 +67,7 @@ static int hv_map_interrupt(union hv_device_id device_id, bool level, if (!hv_result_success(status)) hv_status_err(status, "\n"); - return hv_result(status); + return hv_result_to_errno(status); } static int hv_unmap_interrupt(u64 id, struct hv_interrupt_entry *old_entry) @@ -88,7 +89,10 @@ static int hv_unmap_interrupt(u64 id, struct hv_interrupt_entry *old_entry) status = hv_do_hypercall(HVCALL_UNMAP_DEVICE_INTERRUPT, input, NULL); local_irq_restore(flags); - return hv_result(status); + if (!hv_result_success(status)) + hv_status_err(status, "\n"); + + return hv_result_to_errno(status); } #ifdef CONFIG_PCI_MSI @@ -169,13 +173,34 @@ static union hv_device_id hv_build_pci_dev_id(struct pci_dev *dev) return dev_id; } -static int hv_map_msi_interrupt(struct pci_dev *dev, int cpu, int vector, - struct hv_interrupt_entry *entry) +/** + * hv_map_msi_interrupt() - "Map" the MSI IRQ in the hypervisor. + * @data: Describes the IRQ + * @out_entry: Hypervisor (MSI) interrupt entry (can be NULL) + * + * Map the IRQ in the hypervisor by issuing a MAP_DEVICE_INTERRUPT hypercall. + * + * Return: 0 on success, -errno on failure + */ +int hv_map_msi_interrupt(struct irq_data *data, + struct hv_interrupt_entry *out_entry) { - union hv_device_id device_id = hv_build_pci_dev_id(dev); + struct irq_cfg *cfg = irqd_cfg(data); + struct hv_interrupt_entry dummy; + union hv_device_id device_id; + struct msi_desc *msidesc; + struct pci_dev *dev; + int cpu; - return hv_map_interrupt(device_id, false, cpu, vector, entry); + msidesc = irq_data_get_msi_desc(data); + dev = msi_desc_to_pci_dev(msidesc); + device_id = hv_build_pci_dev_id(dev); + cpu = cpumask_first(irq_data_get_effective_affinity_mask(data)); + + return hv_map_interrupt(device_id, false, cpu, cfg->vector, + out_entry ? out_entry : &dummy); } +EXPORT_SYMBOL_GPL(hv_map_msi_interrupt); static inline void entry_to_msi_msg(struct hv_interrupt_entry *entry, struct msi_msg *msg) { @@ -188,13 +213,11 @@ static inline void entry_to_msi_msg(struct hv_interrupt_entry *entry, struct msi static int hv_unmap_msi_interrupt(struct pci_dev *dev, struct hv_interrupt_entry *old_entry); static void hv_irq_compose_msi_msg(struct irq_data *data, struct msi_msg *msg) { + struct hv_interrupt_entry *stored_entry; + struct irq_cfg *cfg = irqd_cfg(data); struct msi_desc *msidesc; struct pci_dev *dev; - struct hv_interrupt_entry out_entry, *stored_entry; - struct irq_cfg *cfg = irqd_cfg(data); - const cpumask_t *affinity; - int cpu; - u64 status; + int ret; msidesc = irq_data_get_msi_desc(data); dev = msi_desc_to_pci_dev(msidesc); @@ -204,9 +227,6 @@ static void hv_irq_compose_msi_msg(struct irq_data *data, struct msi_msg *msg) return; } - affinity = irq_data_get_effective_affinity_mask(data); - cpu = cpumask_first_and(affinity, cpu_online_mask); - if (data->chip_data) { /* * This interrupt is already mapped. Let's unmap first. @@ -219,14 +239,12 @@ static void hv_irq_compose_msi_msg(struct irq_data *data, struct msi_msg *msg) stored_entry = data->chip_data; data->chip_data = NULL; - status = hv_unmap_msi_interrupt(dev, stored_entry); + ret = hv_unmap_msi_interrupt(dev, stored_entry); kfree(stored_entry); - if (status != HV_STATUS_SUCCESS) { - hv_status_debug(status, "failed to unmap\n"); + if (ret) return; - } } stored_entry = kzalloc(sizeof(*stored_entry), GFP_ATOMIC); @@ -235,15 +253,14 @@ static void hv_irq_compose_msi_msg(struct irq_data *data, struct msi_msg *msg) return; } - status = hv_map_msi_interrupt(dev, cpu, cfg->vector, &out_entry); - if (status != HV_STATUS_SUCCESS) { + ret = hv_map_msi_interrupt(data, stored_entry); + if (ret) { kfree(stored_entry); return; } - *stored_entry = out_entry; data->chip_data = stored_entry; - entry_to_msi_msg(&out_entry, msg); + entry_to_msi_msg(data->chip_data, msg); return; } @@ -257,7 +274,6 @@ static void hv_teardown_msi_irq(struct pci_dev *dev, struct irq_data *irqd) { struct hv_interrupt_entry old_entry; struct msi_msg msg; - u64 status; if (!irqd->chip_data) { pr_debug("%s: no chip data\n!", __func__); @@ -270,10 +286,7 @@ static void hv_teardown_msi_irq(struct pci_dev *dev, struct irq_data *irqd) kfree(irqd->chip_data); irqd->chip_data = NULL; - status = hv_unmap_msi_interrupt(dev, &old_entry); - - if (status != HV_STATUS_SUCCESS) - hv_status_err(status, "\n"); + (void)hv_unmap_msi_interrupt(dev, &old_entry); } static void hv_msi_free_irq(struct irq_domain *domain, diff --git a/arch/x86/hyperv/ivm.c b/arch/x86/hyperv/ivm.c index e93a2f488ff7..ade6c665c97e 100644 --- a/arch/x86/hyperv/ivm.c +++ b/arch/x86/hyperv/ivm.c @@ -10,6 +10,7 @@ #include <linux/types.h> #include <linux/slab.h> #include <linux/cpu.h> +#include <linux/export.h> #include <asm/svm.h> #include <asm/sev.h> #include <asm/io.h> diff --git a/arch/x86/hyperv/nested.c b/arch/x86/hyperv/nested.c index 1083dc8646f9..8ccbb7c4fc27 100644 --- a/arch/x86/hyperv/nested.c +++ b/arch/x86/hyperv/nested.c @@ -11,6 +11,7 @@ #include <linux/types.h> +#include <linux/export.h> #include <hyperv/hvhdk.h> #include <asm/mshyperv.h> #include <asm/tlbflush.h> diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h index ee176236c2be..286d509f9363 100644 --- a/arch/x86/include/asm/cpufeatures.h +++ b/arch/x86/include/asm/cpufeatures.h @@ -456,6 +456,7 @@ #define X86_FEATURE_NO_NESTED_DATA_BP (20*32+ 0) /* No Nested Data Breakpoints */ #define X86_FEATURE_WRMSR_XX_BASE_NS (20*32+ 1) /* WRMSR to {FS,GS,KERNEL_GS}_BASE is non-serializing */ #define X86_FEATURE_LFENCE_RDTSC (20*32+ 2) /* LFENCE always serializing / synchronizes RDTSC */ +#define X86_FEATURE_VERW_CLEAR (20*32+ 5) /* The memory form of VERW mitigates TSA */ #define X86_FEATURE_NULL_SEL_CLR_BASE (20*32+ 6) /* Null Selector Clears Base */ #define X86_FEATURE_AUTOIBRS (20*32+ 8) /* Automatic IBRS */ #define X86_FEATURE_NO_SMM_CTL_MSR (20*32+ 9) /* SMM_CTL MSR is not present */ @@ -487,6 +488,9 @@ #define X86_FEATURE_PREFER_YMM (21*32+ 8) /* Avoid ZMM registers due to downclocking */ #define X86_FEATURE_APX (21*32+ 9) /* Advanced Performance Extensions */ #define X86_FEATURE_INDIRECT_THUNK_ITS (21*32+10) /* Use thunk for indirect branches in lower half of cacheline */ +#define X86_FEATURE_TSA_SQ_NO (21*32+11) /* AMD CPU not vulnerable to TSA-SQ */ +#define X86_FEATURE_TSA_L1_NO (21*32+12) /* AMD CPU not vulnerable to TSA-L1 */ +#define X86_FEATURE_CLEAR_CPU_BUF_VM (21*32+13) /* Clear CPU buffers using VERW before VMRUN */ /* * BUG word(s) @@ -542,5 +546,5 @@ #define X86_BUG_OLD_MICROCODE X86_BUG( 1*32+ 6) /* "old_microcode" CPU has old microcode, it is surely vulnerable to something */ #define X86_BUG_ITS X86_BUG( 1*32+ 7) /* "its" CPU is affected by Indirect Target Selection */ #define X86_BUG_ITS_NATIVE_ONLY X86_BUG( 1*32+ 8) /* "its_native_only" CPU is affected by ITS, VMX is not affected */ - +#define X86_BUG_TSA X86_BUG( 1*32+ 9) /* "tsa" CPU is affected by Transient Scheduler Attacks */ #endif /* _ASM_X86_CPUFEATURES_H */ diff --git a/arch/x86/include/asm/irqflags.h b/arch/x86/include/asm/irqflags.h index 9a9b21b78905..b30e5474c18e 100644 --- a/arch/x86/include/asm/irqflags.h +++ b/arch/x86/include/asm/irqflags.h @@ -44,13 +44,13 @@ static __always_inline void native_irq_enable(void) static __always_inline void native_safe_halt(void) { - mds_idle_clear_cpu_buffers(); + x86_idle_clear_cpu_buffers(); asm volatile("sti; hlt": : :"memory"); } static __always_inline void native_halt(void) { - mds_idle_clear_cpu_buffers(); + x86_idle_clear_cpu_buffers(); asm volatile("hlt": : :"memory"); } diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 639d9bcee842..f7af967aa16f 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -700,8 +700,13 @@ struct kvm_vcpu_hv { struct kvm_vcpu_hv_tlb_flush_fifo tlb_flush_fifo[HV_NR_TLB_FLUSH_FIFOS]; - /* Preallocated buffer for handling hypercalls passing sparse vCPU set */ + /* + * Preallocated buffers for handling hypercalls that pass sparse vCPU + * sets (for high vCPU counts, they're too large to comfortably fit on + * the stack). + */ u64 sparse_banks[HV_MAX_SPARSE_VCPU_BANKS]; + DECLARE_BITMAP(vcpu_mask, KVM_MAX_VCPUS); struct hv_vp_assist_page vp_assist_page; @@ -764,6 +769,7 @@ enum kvm_only_cpuid_leafs { CPUID_8000_0022_EAX, CPUID_7_2_EDX, CPUID_24_0_EBX, + CPUID_8000_0021_ECX, NR_KVM_CPU_CAPS, NKVMCAPINTS = NR_KVM_CPU_CAPS - NCAPINTS, diff --git a/arch/x86/include/asm/mshyperv.h b/arch/x86/include/asm/mshyperv.h index e1752ba47e67..abc4659f5809 100644 --- a/arch/x86/include/asm/mshyperv.h +++ b/arch/x86/include/asm/mshyperv.h @@ -112,12 +112,6 @@ static inline u64 hv_do_hypercall(u64 control, void *input, void *output) return hv_status; } -/* Hypercall to the L0 hypervisor */ -static inline u64 hv_do_nested_hypercall(u64 control, void *input, void *output) -{ - return hv_do_hypercall(control | HV_HYPERCALL_NESTED, input, output); -} - /* Fast hypercall with 8 bytes of input and no output */ static inline u64 _hv_do_fast_hypercall8(u64 control, u64 input1) { @@ -165,13 +159,6 @@ static inline u64 hv_do_fast_hypercall8(u16 code, u64 input1) return _hv_do_fast_hypercall8(control, input1); } -static inline u64 hv_do_fast_nested_hypercall8(u16 code, u64 input1) -{ - u64 control = (u64)code | HV_HYPERCALL_FAST_BIT | HV_HYPERCALL_NESTED; - - return _hv_do_fast_hypercall8(control, input1); -} - /* Fast hypercall with 16 bytes of input */ static inline u64 _hv_do_fast_hypercall16(u64 control, u64 input1, u64 input2) { @@ -223,13 +210,6 @@ static inline u64 hv_do_fast_hypercall16(u16 code, u64 input1, u64 input2) return _hv_do_fast_hypercall16(control, input1, input2); } -static inline u64 hv_do_fast_nested_hypercall16(u16 code, u64 input1, u64 input2) -{ - u64 control = (u64)code | HV_HYPERCALL_FAST_BIT | HV_HYPERCALL_NESTED; - - return _hv_do_fast_hypercall16(control, input1, input2); -} - extern struct hv_vp_assist_page **hv_vp_assist_page; static inline struct hv_vp_assist_page *hv_get_vp_assist_page(unsigned int cpu) @@ -262,6 +242,8 @@ static inline void hv_apic_init(void) {} struct irq_domain *hv_create_pci_msi_domain(void); +int hv_map_msi_interrupt(struct irq_data *data, + struct hv_interrupt_entry *out_entry); int hv_map_ioapic_interrupt(int ioapic_id, bool level, int vcpu, int vector, struct hv_interrupt_entry *entry); int hv_unmap_ioapic_interrupt(int ioapic_id, struct hv_interrupt_entry *entry); diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h index b7dded3c8113..5cfb5d74dd5f 100644 --- a/arch/x86/include/asm/msr-index.h +++ b/arch/x86/include/asm/msr-index.h @@ -628,6 +628,7 @@ #define MSR_AMD64_OSVW_STATUS 0xc0010141 #define MSR_AMD_PPIN_CTL 0xc00102f0 #define MSR_AMD_PPIN 0xc00102f1 +#define MSR_AMD64_CPUID_FN_7 0xc0011002 #define MSR_AMD64_CPUID_FN_1 0xc0011004 #define MSR_AMD64_LS_CFG 0xc0011020 #define MSR_AMD64_DC_CFG 0xc0011022 diff --git a/arch/x86/include/asm/mwait.h b/arch/x86/include/asm/mwait.h index dd2b129b0418..6ca6516c7492 100644 --- a/arch/x86/include/asm/mwait.h +++ b/arch/x86/include/asm/mwait.h @@ -43,8 +43,6 @@ static __always_inline void __monitorx(const void *eax, u32 ecx, u32 edx) static __always_inline void __mwait(u32 eax, u32 ecx) { - mds_idle_clear_cpu_buffers(); - /* * Use the instruction mnemonic with implicit operands, as the LLVM * assembler fails to assemble the mnemonic with explicit operands: @@ -80,7 +78,7 @@ static __always_inline void __mwait(u32 eax, u32 ecx) */ static __always_inline void __mwaitx(u32 eax, u32 ebx, u32 ecx) { - /* No MDS buffer clear as this is AMD/HYGON only */ + /* No need for TSA buffer clearing on AMD */ /* "mwaitx %eax, %ebx, %ecx" */ asm volatile(".byte 0x0f, 0x01, 0xfb" @@ -98,7 +96,6 @@ static __always_inline void __mwaitx(u32 eax, u32 ebx, u32 ecx) */ static __always_inline void __sti_mwait(u32 eax, u32 ecx) { - mds_idle_clear_cpu_buffers(); asm volatile("sti; mwait" :: "a" (eax), "c" (ecx)); } @@ -115,21 +112,29 @@ static __always_inline void __sti_mwait(u32 eax, u32 ecx) */ static __always_inline void mwait_idle_with_hints(u32 eax, u32 ecx) { + if (need_resched()) + return; + + x86_idle_clear_cpu_buffers(); + if (static_cpu_has_bug(X86_BUG_MONITOR) || !current_set_polling_and_test()) { const void *addr = ¤t_thread_info()->flags; alternative_input("", "clflush (%[addr])", X86_BUG_CLFLUSH_MONITOR, [addr] "a" (addr)); __monitor(addr, 0, 0); - if (!need_resched()) { - if (ecx & 1) { - __mwait(eax, ecx); - } else { - __sti_mwait(eax, ecx); - raw_local_irq_disable(); - } + if (need_resched()) + goto out; + + if (ecx & 1) { + __mwait(eax, ecx); + } else { + __sti_mwait(eax, ecx); + raw_local_irq_disable(); } } + +out: current_clr_polling(); } diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h index 20d754b98f3f..10f261678749 100644 --- a/arch/x86/include/asm/nospec-branch.h +++ b/arch/x86/include/asm/nospec-branch.h @@ -302,25 +302,31 @@ .endm /* - * Macro to execute VERW instruction that mitigate transient data sampling - * attacks such as MDS. On affected systems a microcode update overloaded VERW - * instruction to also clear the CPU buffers. VERW clobbers CFLAGS.ZF. - * + * Macro to execute VERW insns that mitigate transient data sampling + * attacks such as MDS or TSA. On affected systems a microcode update + * overloaded VERW insns to also clear the CPU buffers. VERW clobbers + * CFLAGS.ZF. * Note: Only the memory operand variant of VERW clears the CPU buffers. */ -.macro CLEAR_CPU_BUFFERS +.macro __CLEAR_CPU_BUFFERS feature #ifdef CONFIG_X86_64 - ALTERNATIVE "", "verw mds_verw_sel(%rip)", X86_FEATURE_CLEAR_CPU_BUF + ALTERNATIVE "", "verw x86_verw_sel(%rip)", \feature #else /* * In 32bit mode, the memory operand must be a %cs reference. The data * segments may not be usable (vm86 mode), and the stack segment may not * be flat (ESPFIX32). */ - ALTERNATIVE "", "verw %cs:mds_verw_sel", X86_FEATURE_CLEAR_CPU_BUF + ALTERNATIVE "", "verw %cs:x86_verw_sel", \feature #endif .endm +#define CLEAR_CPU_BUFFERS \ + __CLEAR_CPU_BUFFERS X86_FEATURE_CLEAR_CPU_BUF + +#define VM_CLEAR_CPU_BUFFERS \ + __CLEAR_CPU_BUFFERS X86_FEATURE_CLEAR_CPU_BUF_VM + #ifdef CONFIG_X86_64 .macro CLEAR_BRANCH_HISTORY ALTERNATIVE "", "call clear_bhb_loop", X86_FEATURE_CLEAR_BHB_LOOP @@ -567,24 +573,24 @@ DECLARE_STATIC_KEY_FALSE(switch_mm_always_ibpb); DECLARE_STATIC_KEY_FALSE(switch_vcpu_ibpb); -DECLARE_STATIC_KEY_FALSE(mds_idle_clear); +DECLARE_STATIC_KEY_FALSE(cpu_buf_idle_clear); DECLARE_STATIC_KEY_FALSE(switch_mm_cond_l1d_flush); DECLARE_STATIC_KEY_FALSE(cpu_buf_vm_clear); -extern u16 mds_verw_sel; +extern u16 x86_verw_sel; #include <asm/segment.h> /** - * mds_clear_cpu_buffers - Mitigation for MDS and TAA vulnerability + * x86_clear_cpu_buffers - Buffer clearing support for different x86 CPU vulns * * This uses the otherwise unused and obsolete VERW instruction in * combination with microcode which triggers a CPU buffer flush when the * instruction is executed. */ -static __always_inline void mds_clear_cpu_buffers(void) +static __always_inline void x86_clear_cpu_buffers(void) { static const u16 ds = __KERNEL_DS; @@ -601,14 +607,15 @@ static __always_inline void mds_clear_cpu_buffers(void) } /** - * mds_idle_clear_cpu_buffers - Mitigation for MDS vulnerability + * x86_idle_clear_cpu_buffers - Buffer clearing support in idle for the MDS + * and TSA vulnerabilities. * * Clear CPU buffers if the corresponding static key is enabled */ -static __always_inline void mds_idle_clear_cpu_buffers(void) +static __always_inline void x86_idle_clear_cpu_buffers(void) { - if (static_branch_likely(&mds_idle_clear)) - mds_clear_cpu_buffers(); + if (static_branch_likely(&cpu_buf_idle_clear)) + x86_clear_cpu_buffers(); } #endif /* __ASSEMBLER__ */ diff --git a/arch/x86/include/asm/shared/tdx.h b/arch/x86/include/asm/shared/tdx.h index d8525e6ef50a..8bc074c8d7c6 100644 --- a/arch/x86/include/asm/shared/tdx.h +++ b/arch/x86/include/asm/shared/tdx.h @@ -72,6 +72,7 @@ #define TDVMCALL_MAP_GPA 0x10001 #define TDVMCALL_GET_QUOTE 0x10002 #define TDVMCALL_REPORT_FATAL_ERROR 0x10003 +#define TDVMCALL_SETUP_EVENT_NOTIFY_INTERRUPT 0x10004ULL /* * TDG.VP.VMCALL Status Codes (returned in R10) diff --git a/arch/x86/include/uapi/asm/kvm.h b/arch/x86/include/uapi/asm/kvm.h index 6f3499507c5e..0f15d683817d 100644 --- a/arch/x86/include/uapi/asm/kvm.h +++ b/arch/x86/include/uapi/asm/kvm.h @@ -965,7 +965,13 @@ struct kvm_tdx_cmd { struct kvm_tdx_capabilities { __u64 supported_attrs; __u64 supported_xfam; - __u64 reserved[254]; + + __u64 kernel_tdvmcallinfo_1_r11; + __u64 user_tdvmcallinfo_1_r11; + __u64 kernel_tdvmcallinfo_1_r12; + __u64 user_tdvmcallinfo_1_r12; + + __u64 reserved[250]; /* Configurable CPUID bits for userspace */ struct kvm_cpuid2 cpuid; diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index 655f44f89ded..329ee185d8cc 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c @@ -377,6 +377,47 @@ static void bsp_determine_snp(struct cpuinfo_x86 *c) #endif } +#define ZEN_MODEL_STEP_UCODE(fam, model, step, ucode) \ + X86_MATCH_VFM_STEPS(VFM_MAKE(X86_VENDOR_AMD, fam, model), \ + step, step, ucode) + +static const struct x86_cpu_id amd_tsa_microcode[] = { + ZEN_MODEL_STEP_UCODE(0x19, 0x01, 0x1, 0x0a0011d7), + ZEN_MODEL_STEP_UCODE(0x19, 0x01, 0x2, 0x0a00123b), + ZEN_MODEL_STEP_UCODE(0x19, 0x08, 0x2, 0x0a00820d), + ZEN_MODEL_STEP_UCODE(0x19, 0x11, 0x1, 0x0a10114c), + ZEN_MODEL_STEP_UCODE(0x19, 0x11, 0x2, 0x0a10124c), + ZEN_MODEL_STEP_UCODE(0x19, 0x18, 0x1, 0x0a108109), + ZEN_MODEL_STEP_UCODE(0x19, 0x21, 0x0, 0x0a20102e), + ZEN_MODEL_STEP_UCODE(0x19, 0x21, 0x2, 0x0a201211), + ZEN_MODEL_STEP_UCODE(0x19, 0x44, 0x1, 0x0a404108), + ZEN_MODEL_STEP_UCODE(0x19, 0x50, 0x0, 0x0a500012), + ZEN_MODEL_STEP_UCODE(0x19, 0x61, 0x2, 0x0a60120a), + ZEN_MODEL_STEP_UCODE(0x19, 0x74, 0x1, 0x0a704108), + ZEN_MODEL_STEP_UCODE(0x19, 0x75, 0x2, 0x0a705208), + ZEN_MODEL_STEP_UCODE(0x19, 0x78, 0x0, 0x0a708008), + ZEN_MODEL_STEP_UCODE(0x19, 0x7c, 0x0, 0x0a70c008), + ZEN_MODEL_STEP_UCODE(0x19, 0xa0, 0x2, 0x0aa00216), + {}, +}; + +static void tsa_init(struct cpuinfo_x86 *c) +{ + if (cpu_has(c, X86_FEATURE_HYPERVISOR)) + return; + + if (cpu_has(c, X86_FEATURE_ZEN3) || + cpu_has(c, X86_FEATURE_ZEN4)) { + if (x86_match_min_microcode_rev(amd_tsa_microcode)) + setup_force_cpu_cap(X86_FEATURE_VERW_CLEAR); + else + pr_debug("%s: current revision: 0x%x\n", __func__, c->microcode); + } else { + setup_force_cpu_cap(X86_FEATURE_TSA_SQ_NO); + setup_force_cpu_cap(X86_FEATURE_TSA_L1_NO); + } +} + static void bsp_init_amd(struct cpuinfo_x86 *c) { if (cpu_has(c, X86_FEATURE_CONSTANT_TSC)) { @@ -489,6 +530,9 @@ static void bsp_init_amd(struct cpuinfo_x86 *c) } bsp_determine_snp(c); + + tsa_init(c); + return; warn: @@ -930,6 +974,16 @@ static void init_amd_zen2(struct cpuinfo_x86 *c) init_spectral_chicken(c); fix_erratum_1386(c); zen2_zenbleed_check(c); + + /* Disable RDSEED on AMD Cyan Skillfish because of an error. */ + if (c->x86_model == 0x47 && c->x86_stepping == 0x0) { + clear_cpu_cap(c, X86_FEATURE_RDSEED); + msr_clear_bit(MSR_AMD64_CPUID_FN_7, 18); + pr_emerg("RDSEED is not reliable on this platform; disabling.\n"); + } + + /* Correct misconfigured CPUID on some clients. */ + clear_cpu_cap(c, X86_FEATURE_INVLPGB); } static void init_amd_zen3(struct cpuinfo_x86 *c) diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index 7f94e6a5497d..f4d3abb12317 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -94,6 +94,8 @@ static void __init bhi_apply_mitigation(void); static void __init its_select_mitigation(void); static void __init its_update_mitigation(void); static void __init its_apply_mitigation(void); +static void __init tsa_select_mitigation(void); +static void __init tsa_apply_mitigation(void); /* The base value of the SPEC_CTRL MSR without task-specific bits set */ u64 x86_spec_ctrl_base; @@ -169,9 +171,9 @@ DEFINE_STATIC_KEY_FALSE(switch_mm_always_ibpb); DEFINE_STATIC_KEY_FALSE(switch_vcpu_ibpb); EXPORT_SYMBOL_GPL(switch_vcpu_ibpb); -/* Control MDS CPU buffer clear before idling (halt, mwait) */ -DEFINE_STATIC_KEY_FALSE(mds_idle_clear); -EXPORT_SYMBOL_GPL(mds_idle_clear); +/* Control CPU buffer clear before idling (halt, mwait) */ +DEFINE_STATIC_KEY_FALSE(cpu_buf_idle_clear); +EXPORT_SYMBOL_GPL(cpu_buf_idle_clear); /* * Controls whether l1d flush based mitigations are enabled, @@ -225,6 +227,7 @@ void __init cpu_select_mitigations(void) gds_select_mitigation(); its_select_mitigation(); bhi_select_mitigation(); + tsa_select_mitigation(); /* * After mitigations are selected, some may need to update their @@ -272,6 +275,7 @@ void __init cpu_select_mitigations(void) gds_apply_mitigation(); its_apply_mitigation(); bhi_apply_mitigation(); + tsa_apply_mitigation(); } /* @@ -637,7 +641,7 @@ static void __init mmio_apply_mitigation(void) * is required irrespective of SMT state. */ if (!(x86_arch_cap_msr & ARCH_CAP_FBSDP_NO)) - static_branch_enable(&mds_idle_clear); + static_branch_enable(&cpu_buf_idle_clear); if (mmio_nosmt || cpu_mitigations_auto_nosmt()) cpu_smt_disable(false); @@ -1488,6 +1492,94 @@ static void __init its_apply_mitigation(void) } #undef pr_fmt +#define pr_fmt(fmt) "Transient Scheduler Attacks: " fmt + +enum tsa_mitigations { + TSA_MITIGATION_NONE, + TSA_MITIGATION_AUTO, + TSA_MITIGATION_UCODE_NEEDED, + TSA_MITIGATION_USER_KERNEL, + TSA_MITIGATION_VM, + TSA_MITIGATION_FULL, +}; + +static const char * const tsa_strings[] = { + [TSA_MITIGATION_NONE] = "Vulnerable", + [TSA_MITIGATION_UCODE_NEEDED] = "Vulnerable: No microcode", + [TSA_MITIGATION_USER_KERNEL] = "Mitigation: Clear CPU buffers: user/kernel boundary", + [TSA_MITIGATION_VM] = "Mitigation: Clear CPU buffers: VM", + [TSA_MITIGATION_FULL] = "Mitigation: Clear CPU buffers", +}; + +static enum tsa_mitigations tsa_mitigation __ro_after_init = + IS_ENABLED(CONFIG_MITIGATION_TSA) ? TSA_MITIGATION_AUTO : TSA_MITIGATION_NONE; + +static int __init tsa_parse_cmdline(char *str) +{ + if (!str) + return -EINVAL; + + if (!strcmp(str, "off")) + tsa_mitigation = TSA_MITIGATION_NONE; + else if (!strcmp(str, "on")) + tsa_mitigation = TSA_MITIGATION_FULL; + else if (!strcmp(str, "user")) + tsa_mitigation = TSA_MITIGATION_USER_KERNEL; + else if (!strcmp(str, "vm")) + tsa_mitigation = TSA_MITIGATION_VM; + else + pr_err("Ignoring unknown tsa=%s option.\n", str); + + return 0; +} +early_param("tsa", tsa_parse_cmdline); + +static void __init tsa_select_mitigation(void) +{ + if (cpu_mitigations_off() || !boot_cpu_has_bug(X86_BUG_TSA)) { + tsa_mitigation = TSA_MITIGATION_NONE; + return; + } + + if (tsa_mitigation == TSA_MITIGATION_NONE) + return; + + if (!boot_cpu_has(X86_FEATURE_VERW_CLEAR)) { + tsa_mitigation = TSA_MITIGATION_UCODE_NEEDED; + goto out; + } + + if (tsa_mitigation == TSA_MITIGATION_AUTO) + tsa_mitigation = TSA_MITIGATION_FULL; + + /* + * No need to set verw_clear_cpu_buf_mitigation_selected - it + * doesn't fit all cases here and it is not needed because this + * is the only VERW-based mitigation on AMD. + */ +out: + pr_info("%s\n", tsa_strings[tsa_mitigation]); +} + +static void __init tsa_apply_mitigation(void) +{ + switch (tsa_mitigation) { + case TSA_MITIGATION_USER_KERNEL: + setup_force_cpu_cap(X86_FEATURE_CLEAR_CPU_BUF); + break; + case TSA_MITIGATION_VM: + setup_force_cpu_cap(X86_FEATURE_CLEAR_CPU_BUF_VM); + break; + case TSA_MITIGATION_FULL: + setup_force_cpu_cap(X86_FEATURE_CLEAR_CPU_BUF); + setup_force_cpu_cap(X86_FEATURE_CLEAR_CPU_BUF_VM); + break; + default: + break; + } +} + +#undef pr_fmt #define pr_fmt(fmt) "Spectre V2 : " fmt static enum spectre_v2_user_mitigation spectre_v2_user_stibp __ro_after_init = @@ -2249,10 +2341,10 @@ static void update_mds_branch_idle(void) return; if (sched_smt_active()) { - static_branch_enable(&mds_idle_clear); + static_branch_enable(&cpu_buf_idle_clear); } else if (mmio_mitigation == MMIO_MITIGATION_OFF || (x86_arch_cap_msr & ARCH_CAP_FBSDP_NO)) { - static_branch_disable(&mds_idle_clear); + static_branch_disable(&cpu_buf_idle_clear); } } @@ -2316,6 +2408,25 @@ void cpu_bugs_smt_update(void) break; } + switch (tsa_mitigation) { + case TSA_MITIGATION_USER_KERNEL: + case TSA_MITIGATION_VM: + case TSA_MITIGATION_AUTO: + case TSA_MITIGATION_FULL: + /* + * TSA-SQ can potentially lead to info leakage between + * SMT threads. + */ + if (sched_smt_active()) + static_branch_enable(&cpu_buf_idle_clear); + else + static_branch_disable(&cpu_buf_idle_clear); + break; + case TSA_MITIGATION_NONE: + case TSA_MITIGATION_UCODE_NEEDED: + break; + } + mutex_unlock(&spec_ctrl_mutex); } @@ -3265,6 +3376,11 @@ static ssize_t gds_show_state(char *buf) return sysfs_emit(buf, "%s\n", gds_strings[gds_mitigation]); } +static ssize_t tsa_show_state(char *buf) +{ + return sysfs_emit(buf, "%s\n", tsa_strings[tsa_mitigation]); +} + static ssize_t cpu_show_common(struct device *dev, struct device_attribute *attr, char *buf, unsigned int bug) { @@ -3328,6 +3444,9 @@ static ssize_t cpu_show_common(struct device *dev, struct device_attribute *attr case X86_BUG_ITS: return its_show_state(buf); + case X86_BUG_TSA: + return tsa_show_state(buf); + default: break; } @@ -3414,6 +3533,11 @@ ssize_t cpu_show_indirect_target_selection(struct device *dev, struct device_att { return cpu_show_common(dev, attr, buf, X86_BUG_ITS); } + +ssize_t cpu_show_tsa(struct device *dev, struct device_attribute *attr, char *buf) +{ + return cpu_show_common(dev, attr, buf, X86_BUG_TSA); +} #endif void __warn_thunk(void) diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 27125e009847..fb50c1dd53ef 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -1233,6 +1233,8 @@ static const __initconst struct x86_cpu_id cpu_vuln_whitelist[] = { #define ITS BIT(8) /* CPU is affected by Indirect Target Selection, but guest-host isolation is not affected */ #define ITS_NATIVE_ONLY BIT(9) +/* CPU is affected by Transient Scheduler Attacks */ +#define TSA BIT(10) static const struct x86_cpu_id cpu_vuln_blacklist[] __initconst = { VULNBL_INTEL_STEPS(INTEL_IVYBRIDGE, X86_STEP_MAX, SRBDS), @@ -1280,7 +1282,7 @@ static const struct x86_cpu_id cpu_vuln_blacklist[] __initconst = { VULNBL_AMD(0x16, RETBLEED), VULNBL_AMD(0x17, RETBLEED | SMT_RSB | SRSO), VULNBL_HYGON(0x18, RETBLEED | SMT_RSB | SRSO), - VULNBL_AMD(0x19, SRSO), + VULNBL_AMD(0x19, SRSO | TSA), VULNBL_AMD(0x1a, SRSO), {} }; @@ -1530,6 +1532,16 @@ static void __init cpu_set_bug_bits(struct cpuinfo_x86 *c) setup_force_cpu_bug(X86_BUG_ITS_NATIVE_ONLY); } + if (c->x86_vendor == X86_VENDOR_AMD) { + if (!cpu_has(c, X86_FEATURE_TSA_SQ_NO) || + !cpu_has(c, X86_FEATURE_TSA_L1_NO)) { + if (cpu_matches(cpu_vuln_blacklist, TSA) || + /* Enable bug on Zen guests to allow for live migration. */ + (cpu_has(c, X86_FEATURE_HYPERVISOR) && cpu_has(c, X86_FEATURE_ZEN))) + setup_force_cpu_bug(X86_BUG_TSA); + } + } + if (cpu_matches(cpu_vuln_whitelist, NO_MELTDOWN)) return; diff --git a/arch/x86/kernel/cpu/microcode/amd_shas.c b/arch/x86/kernel/cpu/microcode/amd_shas.c index 2a1655b1fdd8..1fd349cfc802 100644 --- a/arch/x86/kernel/cpu/microcode/amd_shas.c +++ b/arch/x86/kernel/cpu/microcode/amd_shas.c @@ -231,6 +231,13 @@ static const struct patch_digest phashes[] = { 0x0d,0x5b,0x65,0x34,0x69,0xb2,0x62,0x21, } }, + { 0xa0011d7, { + 0x35,0x07,0xcd,0x40,0x94,0xbc,0x81,0x6b, + 0xfc,0x61,0x56,0x1a,0xe2,0xdb,0x96,0x12, + 0x1c,0x1c,0x31,0xb1,0x02,0x6f,0xe5,0xd2, + 0xfe,0x1b,0x04,0x03,0x2c,0x8f,0x4c,0x36, + } + }, { 0xa001223, { 0xfb,0x32,0x5f,0xc6,0x83,0x4f,0x8c,0xb8, 0xa4,0x05,0xf9,0x71,0x53,0x01,0x16,0xc4, @@ -294,6 +301,13 @@ static const struct patch_digest phashes[] = { 0xc0,0xcd,0x33,0xf2,0x8d,0xf9,0xef,0x59, } }, + { 0xa00123b, { + 0xef,0xa1,0x1e,0x71,0xf1,0xc3,0x2c,0xe2, + 0xc3,0xef,0x69,0x41,0x7a,0x54,0xca,0xc3, + 0x8f,0x62,0x84,0xee,0xc2,0x39,0xd9,0x28, + 0x95,0xa7,0x12,0x49,0x1e,0x30,0x71,0x72, + } + }, { 0xa00820c, { 0xa8,0x0c,0x81,0xc0,0xa6,0x00,0xe7,0xf3, 0x5f,0x65,0xd3,0xb9,0x6f,0xea,0x93,0x63, @@ -301,6 +315,13 @@ static const struct patch_digest phashes[] = { 0xe1,0x3b,0x8d,0xb2,0xf8,0x22,0x03,0xe2, } }, + { 0xa00820d, { + 0xf9,0x2a,0xc0,0xf4,0x9e,0xa4,0x87,0xa4, + 0x7d,0x87,0x00,0xfd,0xab,0xda,0x19,0xca, + 0x26,0x51,0x32,0xc1,0x57,0x91,0xdf,0xc1, + 0x05,0xeb,0x01,0x7c,0x5a,0x95,0x21,0xb7, + } + }, { 0xa10113e, { 0x05,0x3c,0x66,0xd7,0xa9,0x5a,0x33,0x10, 0x1b,0xf8,0x9c,0x8f,0xed,0xfc,0xa7,0xa0, @@ -322,6 +343,13 @@ static const struct patch_digest phashes[] = { 0xf1,0x5e,0xb0,0xde,0xb4,0x98,0xae,0xc4, } }, + { 0xa10114c, { + 0x9e,0xb6,0xa2,0xd9,0x87,0x38,0xc5,0x64, + 0xd8,0x88,0xfa,0x78,0x98,0xf9,0x6f,0x74, + 0x39,0x90,0x1b,0xa5,0xcf,0x5e,0xb4,0x2a, + 0x02,0xff,0xd4,0x8c,0x71,0x8b,0xe2,0xc0, + } + }, { 0xa10123e, { 0x03,0xb9,0x2c,0x76,0x48,0x93,0xc9,0x18, 0xfb,0x56,0xfd,0xf7,0xe2,0x1d,0xca,0x4d, @@ -343,6 +371,13 @@ static const struct patch_digest phashes[] = { 0x1b,0x7d,0x64,0x9d,0x4b,0x53,0x13,0x75, } }, + { 0xa10124c, { + 0x29,0xea,0xf1,0x2c,0xb2,0xe4,0xef,0x90, + 0xa4,0xcd,0x1d,0x86,0x97,0x17,0x61,0x46, + 0xfc,0x22,0xcb,0x57,0x75,0x19,0xc8,0xcc, + 0x0c,0xf5,0xbc,0xac,0x81,0x9d,0x9a,0xd2, + } + }, { 0xa108108, { 0xed,0xc2,0xec,0xa1,0x15,0xc6,0x65,0xe9, 0xd0,0xef,0x39,0xaa,0x7f,0x55,0x06,0xc6, @@ -350,6 +385,13 @@ static const struct patch_digest phashes[] = { 0x28,0x1e,0x9c,0x59,0x69,0x99,0x4d,0x16, } }, + { 0xa108109, { + 0x85,0xb4,0xbd,0x7c,0x49,0xa7,0xbd,0xfa, + 0x49,0x36,0x80,0x81,0xc5,0xb7,0x39,0x1b, + 0x9a,0xaa,0x50,0xde,0x9b,0xe9,0x32,0x35, + 0x42,0x7e,0x51,0x4f,0x52,0x2c,0x28,0x59, + } + }, { 0xa20102d, { 0xf9,0x6e,0xf2,0x32,0xd3,0x0f,0x5f,0x11, 0x59,0xa1,0xfe,0xcc,0xcd,0x9b,0x42,0x89, @@ -357,6 +399,13 @@ static const struct patch_digest phashes[] = { 0x8c,0xe9,0x19,0x3e,0xcc,0x3f,0x7b,0xb4, } }, + { 0xa20102e, { + 0xbe,0x1f,0x32,0x04,0x0d,0x3c,0x9c,0xdd, + 0xe1,0xa4,0xbf,0x76,0x3a,0xec,0xc2,0xf6, + 0x11,0x00,0xa7,0xaf,0x0f,0xe5,0x02,0xc5, + 0x54,0x3a,0x1f,0x8c,0x16,0xb5,0xff,0xbe, + } + }, { 0xa201210, { 0xe8,0x6d,0x51,0x6a,0x8e,0x72,0xf3,0xfe, 0x6e,0x16,0xbc,0x62,0x59,0x40,0x17,0xe9, @@ -364,6 +413,13 @@ static const struct patch_digest phashes[] = { 0xf7,0x55,0xf0,0x13,0xbb,0x22,0xf6,0x41, } }, + { 0xa201211, { + 0x69,0xa1,0x17,0xec,0xd0,0xf6,0x6c,0x95, + 0xe2,0x1e,0xc5,0x59,0x1a,0x52,0x0a,0x27, + 0xc4,0xed,0xd5,0x59,0x1f,0xbf,0x00,0xff, + 0x08,0x88,0xb5,0xe1,0x12,0xb6,0xcc,0x27, + } + }, { 0xa404107, { 0xbb,0x04,0x4e,0x47,0xdd,0x5e,0x26,0x45, 0x1a,0xc9,0x56,0x24,0xa4,0x4c,0x82,0xb0, @@ -371,6 +427,13 @@ static const struct patch_digest phashes[] = { 0x13,0xbc,0xc5,0x25,0xe4,0xc5,0xc3,0x99, } }, + { 0xa404108, { + 0x69,0x67,0x43,0x06,0xf8,0x0c,0x62,0xdc, + 0xa4,0x21,0x30,0x4f,0x0f,0x21,0x2c,0xcb, + 0xcc,0x37,0xf1,0x1c,0xc3,0xf8,0x2f,0x19, + 0xdf,0x53,0x53,0x46,0xb1,0x15,0xea,0x00, + } + }, { 0xa500011, { 0x23,0x3d,0x70,0x7d,0x03,0xc3,0xc4,0xf4, 0x2b,0x82,0xc6,0x05,0xda,0x80,0x0a,0xf1, @@ -378,6 +441,13 @@ static const struct patch_digest phashes[] = { 0x11,0x5e,0x96,0x7e,0x71,0xe9,0xfc,0x74, } }, + { 0xa500012, { + 0xeb,0x74,0x0d,0x47,0xa1,0x8e,0x09,0xe4, + 0x93,0x4c,0xad,0x03,0x32,0x4c,0x38,0x16, + 0x10,0x39,0xdd,0x06,0xaa,0xce,0xd6,0x0f, + 0x62,0x83,0x9d,0x8e,0x64,0x55,0xbe,0x63, + } + }, { 0xa601209, { 0x66,0x48,0xd4,0x09,0x05,0xcb,0x29,0x32, 0x66,0xb7,0x9a,0x76,0xcd,0x11,0xf3,0x30, @@ -385,6 +455,13 @@ static const struct patch_digest phashes[] = { 0xe8,0x73,0xe2,0xd6,0xdb,0xd2,0x77,0x1d, } }, + { 0xa60120a, { + 0x0c,0x8b,0x3d,0xfd,0x52,0x52,0x85,0x7d, + 0x20,0x3a,0xe1,0x7e,0xa4,0x21,0x3b,0x7b, + 0x17,0x86,0xae,0xac,0x13,0xb8,0x63,0x9d, + 0x06,0x01,0xd0,0xa0,0x51,0x9a,0x91,0x2c, + } + }, { 0xa704107, { 0xf3,0xc6,0x58,0x26,0xee,0xac,0x3f,0xd6, 0xce,0xa1,0x72,0x47,0x3b,0xba,0x2b,0x93, @@ -392,6 +469,13 @@ static const struct patch_digest phashes[] = { 0x64,0x39,0x71,0x8c,0xce,0xe7,0x41,0x39, } }, + { 0xa704108, { + 0xd7,0x55,0x15,0x2b,0xfe,0xc4,0xbc,0x93, + 0xec,0x91,0xa0,0xae,0x45,0xb7,0xc3,0x98, + 0x4e,0xff,0x61,0x77,0x88,0xc2,0x70,0x49, + 0xe0,0x3a,0x1d,0x84,0x38,0x52,0xbf,0x5a, + } + }, { 0xa705206, { 0x8d,0xc0,0x76,0xbd,0x58,0x9f,0x8f,0xa4, 0x12,0x9d,0x21,0xfb,0x48,0x21,0xbc,0xe7, @@ -399,6 +483,13 @@ static const struct patch_digest phashes[] = { 0x03,0x35,0xe9,0xbe,0xfb,0x06,0xdf,0xfc, } }, + { 0xa705208, { + 0x30,0x1d,0x55,0x24,0xbc,0x6b,0x5a,0x19, + 0x0c,0x7d,0x1d,0x74,0xaa,0xd1,0xeb,0xd2, + 0x16,0x62,0xf7,0x5b,0xe1,0x1f,0x18,0x11, + 0x5c,0xf0,0x94,0x90,0x26,0xec,0x69,0xff, + } + }, { 0xa708007, { 0x6b,0x76,0xcc,0x78,0xc5,0x8a,0xa3,0xe3, 0x32,0x2d,0x79,0xe4,0xc3,0x80,0xdb,0xb2, @@ -406,6 +497,13 @@ static const struct patch_digest phashes[] = { 0xdf,0x92,0x73,0x84,0x87,0x3c,0x73,0x93, } }, + { 0xa708008, { + 0x08,0x6e,0xf0,0x22,0x4b,0x8e,0xc4,0x46, + 0x58,0x34,0xe6,0x47,0xa2,0x28,0xfd,0xab, + 0x22,0x3d,0xdd,0xd8,0x52,0x9e,0x1d,0x16, + 0xfa,0x01,0x68,0x14,0x79,0x3e,0xe8,0x6b, + } + }, { 0xa70c005, { 0x88,0x5d,0xfb,0x79,0x64,0xd8,0x46,0x3b, 0x4a,0x83,0x8e,0x77,0x7e,0xcf,0xb3,0x0f, @@ -413,6 +511,13 @@ static const struct patch_digest phashes[] = { 0xee,0x49,0xac,0xe1,0x8b,0x13,0xc5,0x13, } }, + { 0xa70c008, { + 0x0f,0xdb,0x37,0xa1,0x10,0xaf,0xd4,0x21, + 0x94,0x0d,0xa4,0xa2,0xe9,0x86,0x6c,0x0e, + 0x85,0x7c,0x36,0x30,0xa3,0x3a,0x78,0x66, + 0x18,0x10,0x60,0x0d,0x78,0x3d,0x44,0xd0, + } + }, { 0xaa00116, { 0xe8,0x4c,0x2c,0x88,0xa1,0xac,0x24,0x63, 0x65,0xe5,0xaa,0x2d,0x16,0xa9,0xc3,0xf5, @@ -441,4 +546,11 @@ static const struct patch_digest phashes[] = { 0x68,0x2f,0x46,0xee,0xfe,0xc6,0x6d,0xef, } }, + { 0xaa00216, { + 0x79,0xfb,0x5b,0x9f,0xb6,0xe6,0xa8,0xf5, + 0x4e,0x7c,0x4f,0x8e,0x1d,0xad,0xd0,0x08, + 0xc2,0x43,0x7c,0x8b,0xe6,0xdb,0xd0,0xd2, + 0xe8,0x39,0x26,0xc1,0xe5,0x5a,0x48,0xf1, + } + }, }; diff --git a/arch/x86/kernel/cpu/scattered.c b/arch/x86/kernel/cpu/scattered.c index dbf6d71bdf18..b4a1f6732a3a 100644 --- a/arch/x86/kernel/cpu/scattered.c +++ b/arch/x86/kernel/cpu/scattered.c @@ -50,6 +50,8 @@ static const struct cpuid_bit cpuid_bits[] = { { X86_FEATURE_MBA, CPUID_EBX, 6, 0x80000008, 0 }, { X86_FEATURE_SMBA, CPUID_EBX, 2, 0x80000020, 0 }, { X86_FEATURE_BMEC, CPUID_EBX, 3, 0x80000020, 0 }, + { X86_FEATURE_TSA_SQ_NO, CPUID_ECX, 1, 0x80000021, 0 }, + { X86_FEATURE_TSA_L1_NO, CPUID_ECX, 2, 0x80000021, 0 }, { X86_FEATURE_AMD_WORKLOAD_CLASS, CPUID_EAX, 22, 0x80000021, 0 }, { X86_FEATURE_PERFMON_V2, CPUID_EAX, 0, 0x80000022, 0 }, { X86_FEATURE_AMD_LBR_V2, CPUID_EAX, 1, 0x80000022, 0 }, diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index 704883c21f3a..a838be04f874 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c @@ -907,16 +907,24 @@ static __init bool prefer_mwait_c1_over_halt(void) */ static __cpuidle void mwait_idle(void) { + if (need_resched()) + return; + + x86_idle_clear_cpu_buffers(); + if (!current_set_polling_and_test()) { const void *addr = ¤t_thread_info()->flags; alternative_input("", "clflush (%[addr])", X86_BUG_CLFLUSH_MONITOR, [addr] "a" (addr)); __monitor(addr, 0, 0); - if (!need_resched()) { - __sti_mwait(0, 0); - raw_local_irq_disable(); - } + if (need_resched()) + goto out; + + __sti_mwait(0, 0); + raw_local_irq_disable(); } + +out: __current_clr_polling(); } diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c index b2d006756e02..f84bc0569c9c 100644 --- a/arch/x86/kvm/cpuid.c +++ b/arch/x86/kvm/cpuid.c @@ -1165,6 +1165,8 @@ void kvm_set_cpu_caps(void) */ SYNTHESIZED_F(LFENCE_RDTSC), /* SmmPgCfgLock */ + /* 4: Resv */ + SYNTHESIZED_F(VERW_CLEAR), F(NULL_SEL_CLR_BASE), /* UpperAddressIgnore */ F(AUTOIBRS), @@ -1179,6 +1181,11 @@ void kvm_set_cpu_caps(void) F(SRSO_USER_KERNEL_NO), ); + kvm_cpu_cap_init(CPUID_8000_0021_ECX, + SYNTHESIZED_F(TSA_SQ_NO), + SYNTHESIZED_F(TSA_L1_NO), + ); + kvm_cpu_cap_init(CPUID_8000_0022_EAX, F(PERFMON_V2), ); @@ -1748,8 +1755,9 @@ static inline int __do_cpuid_func(struct kvm_cpuid_array *array, u32 function) entry->eax = entry->ebx = entry->ecx = entry->edx = 0; break; case 0x80000021: - entry->ebx = entry->ecx = entry->edx = 0; + entry->ebx = entry->edx = 0; cpuid_entry_override(entry, CPUID_8000_0021_EAX); + cpuid_entry_override(entry, CPUID_8000_0021_ECX); break; /* AMD Extended Performance Monitoring and Debug */ case 0x80000022: { diff --git a/arch/x86/kvm/hyperv.c b/arch/x86/kvm/hyperv.c index 24f0318c50d7..ee27064dd72f 100644 --- a/arch/x86/kvm/hyperv.c +++ b/arch/x86/kvm/hyperv.c @@ -1979,6 +1979,9 @@ int kvm_hv_vcpu_flush_tlb(struct kvm_vcpu *vcpu) if (entries[i] == KVM_HV_TLB_FLUSHALL_ENTRY) goto out_flush_all; + if (is_noncanonical_invlpg_address(entries[i], vcpu)) + continue; + /* * Lower 12 bits of 'address' encode the number of additional * pages to flush. @@ -2001,11 +2004,11 @@ out_flush_all: static u64 kvm_hv_flush_tlb(struct kvm_vcpu *vcpu, struct kvm_hv_hcall *hc) { struct kvm_vcpu_hv *hv_vcpu = to_hv_vcpu(vcpu); + unsigned long *vcpu_mask = hv_vcpu->vcpu_mask; u64 *sparse_banks = hv_vcpu->sparse_banks; struct kvm *kvm = vcpu->kvm; struct hv_tlb_flush_ex flush_ex; struct hv_tlb_flush flush; - DECLARE_BITMAP(vcpu_mask, KVM_MAX_VCPUS); struct kvm_vcpu_hv_tlb_flush_fifo *tlb_flush_fifo; /* * Normally, there can be no more than 'KVM_HV_TLB_FLUSH_FIFO_SIZE' diff --git a/arch/x86/kvm/reverse_cpuid.h b/arch/x86/kvm/reverse_cpuid.h index fde0ae986003..c53b92379e6e 100644 --- a/arch/x86/kvm/reverse_cpuid.h +++ b/arch/x86/kvm/reverse_cpuid.h @@ -52,6 +52,10 @@ /* CPUID level 0x80000022 (EAX) */ #define KVM_X86_FEATURE_PERFMON_V2 KVM_X86_FEATURE(CPUID_8000_0022_EAX, 0) +/* CPUID level 0x80000021 (ECX) */ +#define KVM_X86_FEATURE_TSA_SQ_NO KVM_X86_FEATURE(CPUID_8000_0021_ECX, 1) +#define KVM_X86_FEATURE_TSA_L1_NO KVM_X86_FEATURE(CPUID_8000_0021_ECX, 2) + struct cpuid_reg { u32 function; u32 index; @@ -82,6 +86,7 @@ static const struct cpuid_reg reverse_cpuid[] = { [CPUID_8000_0022_EAX] = {0x80000022, 0, CPUID_EAX}, [CPUID_7_2_EDX] = { 7, 2, CPUID_EDX}, [CPUID_24_0_EBX] = { 0x24, 0, CPUID_EBX}, + [CPUID_8000_0021_ECX] = {0x80000021, 0, CPUID_ECX}, }; /* @@ -121,6 +126,8 @@ static __always_inline u32 __feature_translate(int x86_feature) KVM_X86_TRANSLATE_FEATURE(PERFMON_V2); KVM_X86_TRANSLATE_FEATURE(RRSBA_CTRL); KVM_X86_TRANSLATE_FEATURE(BHI_CTRL); + KVM_X86_TRANSLATE_FEATURE(TSA_SQ_NO); + KVM_X86_TRANSLATE_FEATURE(TSA_L1_NO); default: return x86_feature; } diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c index 459c3b791fd4..b201f77fcd49 100644 --- a/arch/x86/kvm/svm/sev.c +++ b/arch/x86/kvm/svm/sev.c @@ -1971,6 +1971,10 @@ static int sev_check_source_vcpus(struct kvm *dst, struct kvm *src) struct kvm_vcpu *src_vcpu; unsigned long i; + if (src->created_vcpus != atomic_read(&src->online_vcpus) || + dst->created_vcpus != atomic_read(&dst->online_vcpus)) + return -EBUSY; + if (!sev_es_guest(src)) return 0; @@ -4445,8 +4449,12 @@ static void sev_es_init_vmcb(struct vcpu_svm *svm) * the VMSA will be NULL if this vCPU is the destination for intrahost * migration, and will be copied later. */ - if (svm->sev_es.vmsa && !svm->sev_es.snp_has_guest_vmsa) - svm->vmcb->control.vmsa_pa = __pa(svm->sev_es.vmsa); + if (!svm->sev_es.snp_has_guest_vmsa) { + if (svm->sev_es.vmsa) + svm->vmcb->control.vmsa_pa = __pa(svm->sev_es.vmsa); + else + svm->vmcb->control.vmsa_pa = INVALID_PAGE; + } if (cpu_feature_enabled(X86_FEATURE_ALLOWED_SEV_FEATURES)) svm->vmcb->control.allowed_sev_features = sev->vmsa_features | diff --git a/arch/x86/kvm/svm/vmenter.S b/arch/x86/kvm/svm/vmenter.S index 0c61153b275f..235c4af6b692 100644 --- a/arch/x86/kvm/svm/vmenter.S +++ b/arch/x86/kvm/svm/vmenter.S @@ -169,6 +169,9 @@ SYM_FUNC_START(__svm_vcpu_run) #endif mov VCPU_RDI(%_ASM_DI), %_ASM_DI + /* Clobbers EFLAGS.ZF */ + VM_CLEAR_CPU_BUFFERS + /* Enter guest mode */ 3: vmrun %_ASM_AX 4: @@ -335,6 +338,9 @@ SYM_FUNC_START(__svm_sev_es_vcpu_run) mov SVM_current_vmcb(%rdi), %rax mov KVM_VMCB_pa(%rax), %rax + /* Clobbers EFLAGS.ZF */ + VM_CLEAR_CPU_BUFFERS + /* Enter guest mode */ 1: vmrun %rax 2: diff --git a/arch/x86/kvm/vmx/tdx.c b/arch/x86/kvm/vmx/tdx.c index 1ad20c273f3b..ec79aacc446f 100644 --- a/arch/x86/kvm/vmx/tdx.c +++ b/arch/x86/kvm/vmx/tdx.c @@ -173,6 +173,8 @@ static void td_init_cpuid_entry2(struct kvm_cpuid_entry2 *entry, unsigned char i tdx_clear_unsupported_cpuid(entry); } +#define TDVMCALLINFO_SETUP_EVENT_NOTIFY_INTERRUPT BIT(1) + static int init_kvm_tdx_caps(const struct tdx_sys_info_td_conf *td_conf, struct kvm_tdx_capabilities *caps) { @@ -188,6 +190,9 @@ static int init_kvm_tdx_caps(const struct tdx_sys_info_td_conf *td_conf, caps->cpuid.nent = td_conf->num_cpuid_config; + caps->user_tdvmcallinfo_1_r11 = + TDVMCALLINFO_SETUP_EVENT_NOTIFY_INTERRUPT; + for (i = 0; i < td_conf->num_cpuid_config; i++) td_init_cpuid_entry2(&caps->cpuid.entries[i], i); @@ -1530,6 +1535,27 @@ static int tdx_get_quote(struct kvm_vcpu *vcpu) return 0; } +static int tdx_setup_event_notify_interrupt(struct kvm_vcpu *vcpu) +{ + struct vcpu_tdx *tdx = to_tdx(vcpu); + u64 vector = tdx->vp_enter_args.r12; + + if (vector < 32 || vector > 255) { + tdvmcall_set_return_code(vcpu, TDVMCALL_STATUS_INVALID_OPERAND); + return 1; + } + + vcpu->run->exit_reason = KVM_EXIT_TDX; + vcpu->run->tdx.flags = 0; + vcpu->run->tdx.nr = TDVMCALL_SETUP_EVENT_NOTIFY_INTERRUPT; + vcpu->run->tdx.setup_event_notify.ret = TDVMCALL_STATUS_SUBFUNC_UNSUPPORTED; + vcpu->run->tdx.setup_event_notify.vector = vector; + + vcpu->arch.complete_userspace_io = tdx_complete_simple; + + return 0; +} + static int handle_tdvmcall(struct kvm_vcpu *vcpu) { switch (tdvmcall_leaf(vcpu)) { @@ -1541,6 +1567,8 @@ static int handle_tdvmcall(struct kvm_vcpu *vcpu) return tdx_get_td_vm_call_info(vcpu); case TDVMCALL_GET_QUOTE: return tdx_get_quote(vcpu); + case TDVMCALL_SETUP_EVENT_NOTIFY_INTERRUPT: + return tdx_setup_event_notify_interrupt(vcpu); default: break; } @@ -2241,25 +2269,26 @@ static int tdx_get_capabilities(struct kvm_tdx_cmd *cmd) const struct tdx_sys_info_td_conf *td_conf = &tdx_sysinfo->td_conf; struct kvm_tdx_capabilities __user *user_caps; struct kvm_tdx_capabilities *caps = NULL; + u32 nr_user_entries; int ret = 0; /* flags is reserved for future use */ if (cmd->flags) return -EINVAL; - caps = kmalloc(sizeof(*caps) + + caps = kzalloc(sizeof(*caps) + sizeof(struct kvm_cpuid_entry2) * td_conf->num_cpuid_config, GFP_KERNEL); if (!caps) return -ENOMEM; user_caps = u64_to_user_ptr(cmd->data); - if (copy_from_user(caps, user_caps, sizeof(*caps))) { + if (get_user(nr_user_entries, &user_caps->cpuid.nent)) { ret = -EFAULT; goto out; } - if (caps->cpuid.nent < td_conf->num_cpuid_config) { + if (nr_user_entries < td_conf->num_cpuid_config) { ret = -E2BIG; goto out; } diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index 4953846cb30d..191a9ed0da22 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -7291,7 +7291,7 @@ static noinstr void vmx_vcpu_enter_exit(struct kvm_vcpu *vcpu, vmx_l1d_flush(vcpu); else if (static_branch_unlikely(&cpu_buf_vm_clear) && kvm_arch_has_assigned_device(vcpu->kvm)) - mds_clear_cpu_buffers(); + x86_clear_cpu_buffers(); vmx_disable_fb_clear(vmx); diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index a9d992d5652f..93636f77c42d 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -3258,9 +3258,11 @@ int kvm_guest_time_update(struct kvm_vcpu *v) /* With all the info we got, fill in the values */ - if (kvm_caps.has_tsc_control) + if (kvm_caps.has_tsc_control) { tgt_tsc_khz = kvm_scale_tsc(tgt_tsc_khz, v->arch.l1_tsc_scaling_ratio); + tgt_tsc_khz = tgt_tsc_khz ? : 1; + } if (unlikely(vcpu->hw_tsc_khz != tgt_tsc_khz)) { kvm_get_time_scale(NSEC_PER_SEC, tgt_tsc_khz * 1000LL, @@ -6186,6 +6188,10 @@ long kvm_arch_vcpu_ioctl(struct file *filp, u32 user_tsc_khz; r = -EINVAL; + + if (vcpu->arch.guest_tsc_protected) + goto out; + user_tsc_khz = (u32)arg; if (kvm_caps.has_tsc_control && diff --git a/arch/x86/kvm/xen.c b/arch/x86/kvm/xen.c index 9b029bb29a16..d6b2a665b499 100644 --- a/arch/x86/kvm/xen.c +++ b/arch/x86/kvm/xen.c @@ -1526,7 +1526,7 @@ static bool kvm_xen_schedop_poll(struct kvm_vcpu *vcpu, bool longmode, if (kvm_read_guest_virt(vcpu, (gva_t)sched_poll.ports, ports, sched_poll.nr_ports * sizeof(*ports), &e)) { *r = -EFAULT; - return true; + goto out; } for (i = 0; i < sched_poll.nr_ports; i++) { @@ -1971,8 +1971,19 @@ int kvm_xen_setup_evtchn(struct kvm *kvm, { struct kvm_vcpu *vcpu; - if (ue->u.xen_evtchn.port >= max_evtchn_port(kvm)) - return -EINVAL; + /* + * Don't check for the port being within range of max_evtchn_port(). + * Userspace can configure what ever targets it likes; events just won't + * be delivered if/while the target is invalid, just like userspace can + * configure MSIs which target non-existent APICs. + * + * This allow on Live Migration and Live Update, the IRQ routing table + * can be restored *independently* of other things like creating vCPUs, + * without imposing an ordering dependency on userspace. In this + * particular case, the problematic ordering would be with setting the + * Xen 'long mode' flag, which changes max_evtchn_port() to allow 4096 + * instead of 1024 event channels. + */ /* We only support 2 level event channels for now */ if (ue->u.xen_evtchn.priority != KVM_IRQ_ROUTING_XEN_EVTCHN_PRIO_2LEVEL) diff --git a/arch/xtensa/include/asm/Kbuild b/arch/xtensa/include/asm/Kbuild index cc5dba738389..13fe45dea296 100644 --- a/arch/xtensa/include/asm/Kbuild +++ b/arch/xtensa/include/asm/Kbuild @@ -3,7 +3,6 @@ generated-y += syscall_table.h generic-y += extable.h generic-y += kvm_para.h generic-y += mcs_spinlock.h -generic-y += param.h generic-y += parport.h generic-y += qrwlock.h generic-y += qspinlock.h diff --git a/arch/xtensa/include/uapi/asm/param.h b/arch/xtensa/include/uapi/asm/param.h deleted file mode 100644 index e6feb4ee0590..000000000000 --- a/arch/xtensa/include/uapi/asm/param.h +++ /dev/null @@ -1,31 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ -/* - * include/asm-xtensa/param.h - * - * This file is subject to the terms and conditions of the GNU General Public - * License. See the file "COPYING" in the main directory of this archive - * for more details. - * - * Copyright (C) 2001 - 2005 Tensilica Inc. - */ - -#ifndef _UAPI_XTENSA_PARAM_H -#define _UAPI_XTENSA_PARAM_H - -#ifndef __KERNEL__ -# define HZ 100 -#endif - -#define EXEC_PAGESIZE 4096 - -#ifndef NGROUPS -#define NGROUPS 32 -#endif - -#ifndef NOGROUP -#define NOGROUP (-1) -#endif - -#define MAXHOSTNAMELEN 64 /* max length of hostname */ - -#endif /* _UAPI_XTENSA_PARAM_H */ diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c index b2b9b89d6967..c611444480b3 100644 --- a/block/blk-sysfs.c +++ b/block/blk-sysfs.c @@ -960,4 +960,5 @@ void blk_unregister_queue(struct gendisk *disk) elevator_set_none(q); blk_debugfs_remove(disk); + kobject_put(&disk->queue_kobj); } diff --git a/block/elevator.c b/block/elevator.c index ab22542e6cf0..a960bdc869bc 100644 --- a/block/elevator.c +++ b/block/elevator.c @@ -719,7 +719,8 @@ void elevator_set_default(struct request_queue *q) .name = "mq-deadline", .no_uevent = true, }; - int err = 0; + int err; + struct elevator_type *e; /* now we allow to switch elevator */ blk_queue_flag_clear(QUEUE_FLAG_NO_ELV_SWITCH, q); @@ -732,12 +733,18 @@ void elevator_set_default(struct request_queue *q) * have multiple queues or mq-deadline is not available, default * to "none". */ - if (elevator_find_get(ctx.name) && (q->nr_hw_queues == 1 || - blk_mq_is_shared_tags(q->tag_set->flags))) + e = elevator_find_get(ctx.name); + if (!e) + return; + + if ((q->nr_hw_queues == 1 || + blk_mq_is_shared_tags(q->tag_set->flags))) { err = elevator_change(q, &ctx); - if (err < 0) - pr_warn("\"%s\" elevator initialization, failed %d, " - "falling back to \"none\"\n", ctx.name, err); + if (err < 0) + pr_warn("\"%s\" elevator initialization, failed %d, falling back to \"none\"\n", + ctx.name, err); + } + elevator_put(e); } void elevator_set_none(struct request_queue *q) diff --git a/drivers/acpi/riscv/cppc.c b/drivers/acpi/riscv/cppc.c index 4cdff387deff..440cf9fb91aa 100644 --- a/drivers/acpi/riscv/cppc.c +++ b/drivers/acpi/riscv/cppc.c @@ -37,10 +37,8 @@ static int __init sbi_cppc_init(void) { if (sbi_spec_version >= sbi_mk_version(2, 0) && sbi_probe_extension(SBI_EXT_CPPC) > 0) { - pr_info("SBI CPPC extension detected\n"); cppc_ext_present = true; } else { - pr_info("SBI CPPC extension NOT detected!!\n"); cppc_ext_present = false; } diff --git a/drivers/android/binder.c b/drivers/android/binder.c index c463ca4a8fff..1ba2192ae667 100644 --- a/drivers/android/binder.c +++ b/drivers/android/binder.c @@ -5384,10 +5384,9 @@ static int binder_ioctl_write_read(struct file *filp, unsigned long arg, void __user *ubuf = (void __user *)arg; struct binder_write_read bwr; - if (copy_from_user(&bwr, ubuf, sizeof(bwr))) { - ret = -EFAULT; - goto out; - } + if (copy_from_user(&bwr, ubuf, sizeof(bwr))) + return -EFAULT; + binder_debug(BINDER_DEBUG_READ_WRITE, "%d:%d write %lld at %016llx, read %lld at %016llx\n", proc->pid, thread->pid, @@ -5402,8 +5401,6 @@ static int binder_ioctl_write_read(struct file *filp, unsigned long arg, trace_binder_write_done(ret); if (ret < 0) { bwr.read_consumed = 0; - if (copy_to_user(ubuf, &bwr, sizeof(bwr))) - ret = -EFAULT; goto out; } } @@ -5417,22 +5414,17 @@ static int binder_ioctl_write_read(struct file *filp, unsigned long arg, if (!binder_worklist_empty_ilocked(&proc->todo)) binder_wakeup_proc_ilocked(proc); binder_inner_proc_unlock(proc); - if (ret < 0) { - if (copy_to_user(ubuf, &bwr, sizeof(bwr))) - ret = -EFAULT; + if (ret < 0) goto out; - } } binder_debug(BINDER_DEBUG_READ_WRITE, "%d:%d wrote %lld of %lld, read return %lld of %lld\n", proc->pid, thread->pid, (u64)bwr.write_consumed, (u64)bwr.write_size, (u64)bwr.read_consumed, (u64)bwr.read_size); - if (copy_to_user(ubuf, &bwr, sizeof(bwr))) { - ret = -EFAULT; - goto out; - } out: + if (copy_to_user(ubuf, &bwr, sizeof(bwr))) + ret = -EFAULT; return ret; } @@ -6128,7 +6120,7 @@ static int binder_release(struct inode *nodp, struct file *filp) debugfs_remove(proc->debugfs_entry); if (proc->binderfs_entry) { - binderfs_remove_file(proc->binderfs_entry); + simple_recursive_removal(proc->binderfs_entry, NULL); proc->binderfs_entry = NULL; } diff --git a/drivers/android/binder_internal.h b/drivers/android/binder_internal.h index 1ba5caf1d88d..a5fd23dcafcd 100644 --- a/drivers/android/binder_internal.h +++ b/drivers/android/binder_internal.h @@ -81,7 +81,6 @@ extern bool is_binderfs_device(const struct inode *inode); extern struct dentry *binderfs_create_file(struct dentry *dir, const char *name, const struct file_operations *fops, void *data); -extern void binderfs_remove_file(struct dentry *dentry); #else static inline bool is_binderfs_device(const struct inode *inode) { @@ -94,7 +93,6 @@ static inline struct dentry *binderfs_create_file(struct dentry *dir, { return NULL; } -static inline void binderfs_remove_file(struct dentry *dentry) {} #endif #ifdef CONFIG_ANDROID_BINDERFS diff --git a/drivers/android/binderfs.c b/drivers/android/binderfs.c index 024275dbfdd8..acc946bb1457 100644 --- a/drivers/android/binderfs.c +++ b/drivers/android/binderfs.c @@ -500,21 +500,6 @@ static struct dentry *binderfs_create_dentry(struct dentry *parent, return dentry; } -void binderfs_remove_file(struct dentry *dentry) -{ - struct inode *parent_inode; - - parent_inode = d_inode(dentry->d_parent); - inode_lock(parent_inode); - if (simple_positive(dentry)) { - dget(dentry); - simple_unlink(parent_inode, dentry); - d_delete(dentry); - dput(dentry); - } - inode_unlock(parent_inode); -} - struct dentry *binderfs_create_file(struct dentry *parent, const char *name, const struct file_operations *fops, void *data) diff --git a/drivers/base/cpu.c b/drivers/base/cpu.c index 7779ab0ca7ce..efc575a00edd 100644 --- a/drivers/base/cpu.c +++ b/drivers/base/cpu.c @@ -602,6 +602,7 @@ CPU_SHOW_VULN_FALLBACK(reg_file_data_sampling); CPU_SHOW_VULN_FALLBACK(ghostwrite); CPU_SHOW_VULN_FALLBACK(old_microcode); CPU_SHOW_VULN_FALLBACK(indirect_target_selection); +CPU_SHOW_VULN_FALLBACK(tsa); static DEVICE_ATTR(meltdown, 0444, cpu_show_meltdown, NULL); static DEVICE_ATTR(spectre_v1, 0444, cpu_show_spectre_v1, NULL); @@ -620,6 +621,7 @@ static DEVICE_ATTR(reg_file_data_sampling, 0444, cpu_show_reg_file_data_sampling static DEVICE_ATTR(ghostwrite, 0444, cpu_show_ghostwrite, NULL); static DEVICE_ATTR(old_microcode, 0444, cpu_show_old_microcode, NULL); static DEVICE_ATTR(indirect_target_selection, 0444, cpu_show_indirect_target_selection, NULL); +static DEVICE_ATTR(tsa, 0444, cpu_show_tsa, NULL); static struct attribute *cpu_root_vulnerabilities_attrs[] = { &dev_attr_meltdown.attr, @@ -639,6 +641,7 @@ static struct attribute *cpu_root_vulnerabilities_attrs[] = { &dev_attr_ghostwrite.attr, &dev_attr_old_microcode.attr, &dev_attr_indirect_target_selection.attr, + &dev_attr_tsa.attr, NULL }; diff --git a/drivers/base/power/main.c b/drivers/base/power/main.c index bf77d28e959f..7a50af416cac 100644 --- a/drivers/base/power/main.c +++ b/drivers/base/power/main.c @@ -1236,8 +1236,8 @@ void dpm_complete(pm_message_t state) */ void dpm_resume_end(pm_message_t state) { - pm_restore_gfp_mask(); dpm_resume(state); + pm_restore_gfp_mask(); dpm_complete(state); } EXPORT_SYMBOL_GPL(dpm_resume_end); @@ -1280,6 +1280,22 @@ static void dpm_async_suspend_parent(struct device *dev, async_func_t func) dpm_async_with_cleanup(dev->parent, func); } +static void dpm_async_suspend_complete_all(struct list_head *device_list) +{ + struct device *dev; + + guard(mutex)(&async_wip_mtx); + + list_for_each_entry_reverse(dev, device_list, power.entry) { + /* + * In case the device is being waited for and async processing + * has not started for it yet, let the waiters make progress. + */ + if (!dev->power.work_in_progress) + complete_all(&dev->power.completion); + } +} + /** * resume_event - Return a "resume" message for given "suspend" sleep state. * @sleep_state: PM message representing a sleep state. @@ -1456,6 +1472,7 @@ static int dpm_noirq_suspend_devices(pm_message_t state) mutex_lock(&dpm_list_mtx); if (error || async_error) { + dpm_async_suspend_complete_all(&dpm_late_early_list); /* * Move all devices to the target list to resume them * properly. @@ -1658,6 +1675,7 @@ int dpm_suspend_late(pm_message_t state) mutex_lock(&dpm_list_mtx); if (error || async_error) { + dpm_async_suspend_complete_all(&dpm_suspended_list); /* * Move all devices to the target list to resume them * properly. @@ -1951,6 +1969,7 @@ int dpm_suspend(pm_message_t state) mutex_lock(&dpm_list_mtx); if (error || async_error) { + dpm_async_suspend_complete_all(&dpm_prepared_list); /* * Move all devices to the target list to resume them * properly. diff --git a/drivers/base/regmap/regmap.c b/drivers/base/regmap/regmap.c index f2843f814675..1f3f782a04ba 100644 --- a/drivers/base/regmap/regmap.c +++ b/drivers/base/regmap/regmap.c @@ -1173,6 +1173,8 @@ err_name: err_map: kfree(map); err: + if (bus && bus->free_on_exit) + kfree(bus); return ERR_PTR(ret); } EXPORT_SYMBOL_GPL(__regmap_init); diff --git a/drivers/block/loop.c b/drivers/block/loop.c index 500840e4a74e..8d994cae3b83 100644 --- a/drivers/block/loop.c +++ b/drivers/block/loop.c @@ -308,14 +308,13 @@ end_io: static void lo_rw_aio_do_completion(struct loop_cmd *cmd) { struct request *rq = blk_mq_rq_from_pdu(cmd); - struct loop_device *lo = rq->q->queuedata; if (!atomic_dec_and_test(&cmd->ref)) return; kfree(cmd->bvec); cmd->bvec = NULL; if (req_op(rq) == REQ_OP_WRITE) - file_end_write(lo->lo_backing_file); + kiocb_end_write(&cmd->iocb); if (likely(!blk_should_fake_timeout(rq->q))) blk_mq_complete_request(rq); } @@ -391,7 +390,7 @@ static int lo_rw_aio(struct loop_device *lo, struct loop_cmd *cmd, } if (rw == ITER_SOURCE) { - file_start_write(lo->lo_backing_file); + kiocb_start_write(&cmd->iocb); ret = file->f_op->write_iter(&cmd->iocb, &iter); } else ret = file->f_op->read_iter(&cmd->iocb, &iter); diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c index 7bdc7eb808ea..2592bd19ebc1 100644 --- a/drivers/block/nbd.c +++ b/drivers/block/nbd.c @@ -2198,9 +2198,7 @@ again: goto out; } } - ret = nbd_start_device(nbd); - if (ret) - goto out; + if (info->attrs[NBD_ATTR_BACKEND_IDENTIFIER]) { nbd->backend = nla_strdup(info->attrs[NBD_ATTR_BACKEND_IDENTIFIER], GFP_KERNEL); @@ -2216,6 +2214,8 @@ again: goto out; } set_bit(NBD_RT_HAS_BACKEND_FILE, &config->runtime_flags); + + ret = nbd_start_device(nbd); out: mutex_unlock(&nbd->config_lock); if (!ret) { diff --git a/drivers/bluetooth/bfusb.c b/drivers/bluetooth/bfusb.c index 0d6ad50da046..8df310983bf6 100644 --- a/drivers/bluetooth/bfusb.c +++ b/drivers/bluetooth/bfusb.c @@ -670,7 +670,7 @@ static int bfusb_probe(struct usb_interface *intf, const struct usb_device_id *i hdev->flush = bfusb_flush; hdev->send = bfusb_send_frame; - set_bit(HCI_QUIRK_BROKEN_LOCAL_COMMANDS, &hdev->quirks); + hci_set_quirk(hdev, HCI_QUIRK_BROKEN_LOCAL_COMMANDS); if (hci_register_dev(hdev) < 0) { BT_ERR("Can't register HCI device"); diff --git a/drivers/bluetooth/bpa10x.c b/drivers/bluetooth/bpa10x.c index 1fa58c059cbf..8b43dfc755de 100644 --- a/drivers/bluetooth/bpa10x.c +++ b/drivers/bluetooth/bpa10x.c @@ -398,7 +398,7 @@ static int bpa10x_probe(struct usb_interface *intf, hdev->send = bpa10x_send_frame; hdev->set_diag = bpa10x_set_diag; - set_bit(HCI_QUIRK_RESET_ON_CLOSE, &hdev->quirks); + hci_set_quirk(hdev, HCI_QUIRK_RESET_ON_CLOSE); err = hci_register_dev(hdev); if (err < 0) { diff --git a/drivers/bluetooth/btbcm.c b/drivers/bluetooth/btbcm.c index 0a60660fc8ce..3a3a56ddbb06 100644 --- a/drivers/bluetooth/btbcm.c +++ b/drivers/bluetooth/btbcm.c @@ -135,7 +135,7 @@ int btbcm_check_bdaddr(struct hci_dev *hdev) if (btbcm_set_bdaddr_from_efi(hdev) != 0) { bt_dev_info(hdev, "BCM: Using default device address (%pMR)", &bda->bdaddr); - set_bit(HCI_QUIRK_INVALID_BDADDR, &hdev->quirks); + hci_set_quirk(hdev, HCI_QUIRK_INVALID_BDADDR); } } @@ -467,7 +467,7 @@ static int btbcm_print_controller_features(struct hci_dev *hdev) /* Read DMI and disable broken Read LE Min/Max Tx Power */ if (dmi_first_match(disable_broken_read_transmit_power)) - set_bit(HCI_QUIRK_BROKEN_READ_TRANSMIT_POWER, &hdev->quirks); + hci_set_quirk(hdev, HCI_QUIRK_BROKEN_READ_TRANSMIT_POWER); return 0; } @@ -706,7 +706,7 @@ int btbcm_finalize(struct hci_dev *hdev, bool *fw_load_done, bool use_autobaud_m btbcm_check_bdaddr(hdev); - set_bit(HCI_QUIRK_STRICT_DUPLICATE_FILTER, &hdev->quirks); + hci_set_quirk(hdev, HCI_QUIRK_STRICT_DUPLICATE_FILTER); return 0; } @@ -769,7 +769,7 @@ int btbcm_setup_apple(struct hci_dev *hdev) kfree_skb(skb); } - set_bit(HCI_QUIRK_STRICT_DUPLICATE_FILTER, &hdev->quirks); + hci_set_quirk(hdev, HCI_QUIRK_STRICT_DUPLICATE_FILTER); return 0; } diff --git a/drivers/bluetooth/btintel.c b/drivers/bluetooth/btintel.c index 55cc1652bfe4..06016ac3965c 100644 --- a/drivers/bluetooth/btintel.c +++ b/drivers/bluetooth/btintel.c @@ -88,7 +88,7 @@ int btintel_check_bdaddr(struct hci_dev *hdev) if (!bacmp(&bda->bdaddr, BDADDR_INTEL)) { bt_dev_err(hdev, "Found Intel default device address (%pMR)", &bda->bdaddr); - set_bit(HCI_QUIRK_INVALID_BDADDR, &hdev->quirks); + hci_set_quirk(hdev, HCI_QUIRK_INVALID_BDADDR); } kfree_skb(skb); @@ -2027,7 +2027,7 @@ static int btintel_download_fw(struct hci_dev *hdev, */ if (!bacmp(¶ms->otp_bdaddr, BDADDR_ANY)) { bt_dev_info(hdev, "No device address configured"); - set_bit(HCI_QUIRK_INVALID_BDADDR, &hdev->quirks); + hci_set_quirk(hdev, HCI_QUIRK_INVALID_BDADDR); } download: @@ -2295,7 +2295,7 @@ static int btintel_prepare_fw_download_tlv(struct hci_dev *hdev, */ if (!bacmp(&ver->otp_bd_addr, BDADDR_ANY)) { bt_dev_info(hdev, "No device address configured"); - set_bit(HCI_QUIRK_INVALID_BDADDR, &hdev->quirks); + hci_set_quirk(hdev, HCI_QUIRK_INVALID_BDADDR); } } @@ -2670,7 +2670,7 @@ static u8 btintel_classify_pkt_type(struct hci_dev *hdev, struct sk_buff *skb) * Distinguish ISO data packets form ACL data packets * based on their connection handle value range. */ - if (hci_skb_pkt_type(skb) == HCI_ACLDATA_PKT) { + if (iso_capable(hdev) && hci_skb_pkt_type(skb) == HCI_ACLDATA_PKT) { __u16 handle = __le16_to_cpu(hci_acl_hdr(skb)->handle); if (hci_handle(handle) >= BTINTEL_ISODATA_HANDLE_BASE) @@ -3435,9 +3435,9 @@ static int btintel_setup_combined(struct hci_dev *hdev) } /* Apply the common HCI quirks for Intel device */ - set_bit(HCI_QUIRK_STRICT_DUPLICATE_FILTER, &hdev->quirks); - set_bit(HCI_QUIRK_SIMULTANEOUS_DISCOVERY, &hdev->quirks); - set_bit(HCI_QUIRK_NON_PERSISTENT_DIAG, &hdev->quirks); + hci_set_quirk(hdev, HCI_QUIRK_STRICT_DUPLICATE_FILTER); + hci_set_quirk(hdev, HCI_QUIRK_SIMULTANEOUS_DISCOVERY); + hci_set_quirk(hdev, HCI_QUIRK_NON_PERSISTENT_DIAG); /* Set up the quality report callback for Intel devices */ hdev->set_quality_report = btintel_set_quality_report; @@ -3475,8 +3475,8 @@ static int btintel_setup_combined(struct hci_dev *hdev) */ if (!btintel_test_flag(hdev, INTEL_ROM_LEGACY_NO_WBS_SUPPORT)) - set_bit(HCI_QUIRK_WIDEBAND_SPEECH_SUPPORTED, - &hdev->quirks); + hci_set_quirk(hdev, + HCI_QUIRK_WIDEBAND_SPEECH_SUPPORTED); err = btintel_legacy_rom_setup(hdev, &ver); break; @@ -3491,11 +3491,11 @@ static int btintel_setup_combined(struct hci_dev *hdev) * * All Legacy bootloader devices support WBS */ - set_bit(HCI_QUIRK_WIDEBAND_SPEECH_SUPPORTED, - &hdev->quirks); + hci_set_quirk(hdev, + HCI_QUIRK_WIDEBAND_SPEECH_SUPPORTED); /* These variants don't seem to support LE Coded PHY */ - set_bit(HCI_QUIRK_BROKEN_LE_CODED, &hdev->quirks); + hci_set_quirk(hdev, HCI_QUIRK_BROKEN_LE_CODED); /* Setup MSFT Extension support */ btintel_set_msft_opcode(hdev, ver.hw_variant); @@ -3571,10 +3571,10 @@ static int btintel_setup_combined(struct hci_dev *hdev) * * All Legacy bootloader devices support WBS */ - set_bit(HCI_QUIRK_WIDEBAND_SPEECH_SUPPORTED, &hdev->quirks); + hci_set_quirk(hdev, HCI_QUIRK_WIDEBAND_SPEECH_SUPPORTED); /* These variants don't seem to support LE Coded PHY */ - set_bit(HCI_QUIRK_BROKEN_LE_CODED, &hdev->quirks); + hci_set_quirk(hdev, HCI_QUIRK_BROKEN_LE_CODED); /* Setup MSFT Extension support */ btintel_set_msft_opcode(hdev, ver.hw_variant); @@ -3600,7 +3600,7 @@ static int btintel_setup_combined(struct hci_dev *hdev) * * All TLV based devices support WBS */ - set_bit(HCI_QUIRK_WIDEBAND_SPEECH_SUPPORTED, &hdev->quirks); + hci_set_quirk(hdev, HCI_QUIRK_WIDEBAND_SPEECH_SUPPORTED); /* Setup MSFT Extension support */ btintel_set_msft_opcode(hdev, diff --git a/drivers/bluetooth/btintel_pcie.c b/drivers/bluetooth/btintel_pcie.c index e1c688dd2d45..f4e3fb54fe76 100644 --- a/drivers/bluetooth/btintel_pcie.c +++ b/drivers/bluetooth/btintel_pcie.c @@ -2081,9 +2081,9 @@ static int btintel_pcie_setup_internal(struct hci_dev *hdev) } /* Apply the common HCI quirks for Intel device */ - set_bit(HCI_QUIRK_STRICT_DUPLICATE_FILTER, &hdev->quirks); - set_bit(HCI_QUIRK_SIMULTANEOUS_DISCOVERY, &hdev->quirks); - set_bit(HCI_QUIRK_NON_PERSISTENT_DIAG, &hdev->quirks); + hci_set_quirk(hdev, HCI_QUIRK_STRICT_DUPLICATE_FILTER); + hci_set_quirk(hdev, HCI_QUIRK_SIMULTANEOUS_DISCOVERY); + hci_set_quirk(hdev, HCI_QUIRK_NON_PERSISTENT_DIAG); /* Set up the quality report callback for Intel devices */ hdev->set_quality_report = btintel_set_quality_report; @@ -2123,7 +2123,7 @@ static int btintel_pcie_setup_internal(struct hci_dev *hdev) * * All TLV based devices support WBS */ - set_bit(HCI_QUIRK_WIDEBAND_SPEECH_SUPPORTED, &hdev->quirks); + hci_set_quirk(hdev, HCI_QUIRK_WIDEBAND_SPEECH_SUPPORTED); /* Setup MSFT Extension support */ btintel_set_msft_opcode(hdev, diff --git a/drivers/bluetooth/btmtksdio.c b/drivers/bluetooth/btmtksdio.c index c16a3518b8ff..4fc673640bfc 100644 --- a/drivers/bluetooth/btmtksdio.c +++ b/drivers/bluetooth/btmtksdio.c @@ -1141,7 +1141,7 @@ static int btmtksdio_setup(struct hci_dev *hdev) } /* Enable WBS with mSBC codec */ - set_bit(HCI_QUIRK_WIDEBAND_SPEECH_SUPPORTED, &hdev->quirks); + hci_set_quirk(hdev, HCI_QUIRK_WIDEBAND_SPEECH_SUPPORTED); /* Enable GPIO reset mechanism */ if (bdev->reset) { @@ -1384,7 +1384,7 @@ static int btmtksdio_probe(struct sdio_func *func, SET_HCIDEV_DEV(hdev, &func->dev); hdev->manufacturer = 70; - set_bit(HCI_QUIRK_NON_PERSISTENT_SETUP, &hdev->quirks); + hci_set_quirk(hdev, HCI_QUIRK_NON_PERSISTENT_SETUP); sdio_set_drvdata(func, bdev); diff --git a/drivers/bluetooth/btmtkuart.c b/drivers/bluetooth/btmtkuart.c index c97e260fcb0c..51400a891f6e 100644 --- a/drivers/bluetooth/btmtkuart.c +++ b/drivers/bluetooth/btmtkuart.c @@ -872,7 +872,7 @@ static int btmtkuart_probe(struct serdev_device *serdev) SET_HCIDEV_DEV(hdev, &serdev->dev); hdev->manufacturer = 70; - set_bit(HCI_QUIRK_NON_PERSISTENT_SETUP, &hdev->quirks); + hci_set_quirk(hdev, HCI_QUIRK_NON_PERSISTENT_SETUP); if (btmtkuart_is_standalone(bdev)) { err = clk_prepare_enable(bdev->osc); diff --git a/drivers/bluetooth/btnxpuart.c b/drivers/bluetooth/btnxpuart.c index 1088db6056a4..24f9b52605a1 100644 --- a/drivers/bluetooth/btnxpuart.c +++ b/drivers/bluetooth/btnxpuart.c @@ -1807,7 +1807,7 @@ static int nxp_serdev_probe(struct serdev_device *serdev) "local-bd-address", (u8 *)&ba, sizeof(ba)); if (bacmp(&ba, BDADDR_ANY)) - set_bit(HCI_QUIRK_USE_BDADDR_PROPERTY, &hdev->quirks); + hci_set_quirk(hdev, HCI_QUIRK_USE_BDADDR_PROPERTY); if (hci_register_dev(hdev) < 0) { dev_err(&serdev->dev, "Can't register HCI device\n"); diff --git a/drivers/bluetooth/btqca.c b/drivers/bluetooth/btqca.c index edefb9dc76aa..7c958d6065be 100644 --- a/drivers/bluetooth/btqca.c +++ b/drivers/bluetooth/btqca.c @@ -739,7 +739,7 @@ static int qca_check_bdaddr(struct hci_dev *hdev, const struct qca_fw_config *co bda = (struct hci_rp_read_bd_addr *)skb->data; if (!bacmp(&bda->bdaddr, &config->bdaddr)) - set_bit(HCI_QUIRK_USE_BDADDR_PROPERTY, &hdev->quirks); + hci_set_quirk(hdev, HCI_QUIRK_USE_BDADDR_PROPERTY); kfree_skb(skb); diff --git a/drivers/bluetooth/btqcomsmd.c b/drivers/bluetooth/btqcomsmd.c index c0eb71d6ffd3..d2e13fcb6bab 100644 --- a/drivers/bluetooth/btqcomsmd.c +++ b/drivers/bluetooth/btqcomsmd.c @@ -117,7 +117,7 @@ static int btqcomsmd_setup(struct hci_dev *hdev) /* Devices do not have persistent storage for BD address. Retrieve * it from the firmware node property. */ - set_bit(HCI_QUIRK_USE_BDADDR_PROPERTY, &hdev->quirks); + hci_set_quirk(hdev, HCI_QUIRK_USE_BDADDR_PROPERTY); return 0; } diff --git a/drivers/bluetooth/btrtl.c b/drivers/bluetooth/btrtl.c index 7838c89e529e..4d182cf6e037 100644 --- a/drivers/bluetooth/btrtl.c +++ b/drivers/bluetooth/btrtl.c @@ -1287,7 +1287,7 @@ void btrtl_set_quirks(struct hci_dev *hdev, struct btrtl_device_info *btrtl_dev) /* Enable controller to do both LE scan and BR/EDR inquiry * simultaneously. */ - set_bit(HCI_QUIRK_SIMULTANEOUS_DISCOVERY, &hdev->quirks); + hci_set_quirk(hdev, HCI_QUIRK_SIMULTANEOUS_DISCOVERY); /* Enable central-peripheral role (able to create new connections with * an existing connection in slave role). @@ -1301,7 +1301,7 @@ void btrtl_set_quirks(struct hci_dev *hdev, struct btrtl_device_info *btrtl_dev) case CHIP_ID_8851B: case CHIP_ID_8922A: case CHIP_ID_8852BT: - set_bit(HCI_QUIRK_WIDEBAND_SPEECH_SUPPORTED, &hdev->quirks); + hci_set_quirk(hdev, HCI_QUIRK_WIDEBAND_SPEECH_SUPPORTED); /* RTL8852C needs to transmit mSBC data continuously without * the zero length of USB packets for the ALT 6 supported chips @@ -1312,7 +1312,8 @@ void btrtl_set_quirks(struct hci_dev *hdev, struct btrtl_device_info *btrtl_dev) if (btrtl_dev->project_id == CHIP_ID_8852A || btrtl_dev->project_id == CHIP_ID_8852B || btrtl_dev->project_id == CHIP_ID_8852C) - set_bit(HCI_QUIRK_USE_MSFT_EXT_ADDRESS_FILTER, &hdev->quirks); + hci_set_quirk(hdev, + HCI_QUIRK_USE_MSFT_EXT_ADDRESS_FILTER); hci_set_aosp_capable(hdev); break; @@ -1331,8 +1332,7 @@ void btrtl_set_quirks(struct hci_dev *hdev, struct btrtl_device_info *btrtl_dev) * but it doesn't support any features from page 2 - * it either responds with garbage or with error status */ - set_bit(HCI_QUIRK_BROKEN_LOCAL_EXT_FEATURES_PAGE_2, - &hdev->quirks); + hci_set_quirk(hdev, HCI_QUIRK_BROKEN_LOCAL_EXT_FEATURES_PAGE_2); break; default: break; diff --git a/drivers/bluetooth/btsdio.c b/drivers/bluetooth/btsdio.c index a69feb08486a..8325655ce6aa 100644 --- a/drivers/bluetooth/btsdio.c +++ b/drivers/bluetooth/btsdio.c @@ -327,7 +327,7 @@ static int btsdio_probe(struct sdio_func *func, hdev->send = btsdio_send_frame; if (func->vendor == 0x0104 && func->device == 0x00c5) - set_bit(HCI_QUIRK_RESET_ON_CLOSE, &hdev->quirks); + hci_set_quirk(hdev, HCI_QUIRK_RESET_ON_CLOSE); err = hci_register_dev(hdev); if (err < 0) { diff --git a/drivers/bluetooth/btusb.c b/drivers/bluetooth/btusb.c index 9ab661d2d1e6..f9eeec0aed57 100644 --- a/drivers/bluetooth/btusb.c +++ b/drivers/bluetooth/btusb.c @@ -2472,18 +2472,18 @@ static int btusb_setup_csr(struct hci_dev *hdev) * Probably will need to be expanded in the future; * without these the controller will lock up. */ - set_bit(HCI_QUIRK_BROKEN_STORED_LINK_KEY, &hdev->quirks); - set_bit(HCI_QUIRK_BROKEN_ERR_DATA_REPORTING, &hdev->quirks); - set_bit(HCI_QUIRK_BROKEN_FILTER_CLEAR_ALL, &hdev->quirks); - set_bit(HCI_QUIRK_NO_SUSPEND_NOTIFIER, &hdev->quirks); - set_bit(HCI_QUIRK_BROKEN_READ_VOICE_SETTING, &hdev->quirks); - set_bit(HCI_QUIRK_BROKEN_READ_PAGE_SCAN_TYPE, &hdev->quirks); + hci_set_quirk(hdev, HCI_QUIRK_BROKEN_STORED_LINK_KEY); + hci_set_quirk(hdev, HCI_QUIRK_BROKEN_ERR_DATA_REPORTING); + hci_set_quirk(hdev, HCI_QUIRK_BROKEN_FILTER_CLEAR_ALL); + hci_set_quirk(hdev, HCI_QUIRK_NO_SUSPEND_NOTIFIER); + hci_set_quirk(hdev, HCI_QUIRK_BROKEN_READ_VOICE_SETTING); + hci_set_quirk(hdev, HCI_QUIRK_BROKEN_READ_PAGE_SCAN_TYPE); /* Clear the reset quirk since this is not an actual * early Bluetooth 1.1 device from CSR. */ - clear_bit(HCI_QUIRK_RESET_ON_CLOSE, &hdev->quirks); - clear_bit(HCI_QUIRK_SIMULTANEOUS_DISCOVERY, &hdev->quirks); + hci_clear_quirk(hdev, HCI_QUIRK_RESET_ON_CLOSE); + hci_clear_quirk(hdev, HCI_QUIRK_SIMULTANEOUS_DISCOVERY); /* * Special workaround for these BT 4.0 chip clones, and potentially more: @@ -3192,6 +3192,32 @@ static const struct qca_device_info qca_devices_table[] = { { 0x00190200, 40, 4, 16 }, /* WCN785x 2.0 */ }; +static u16 qca_extract_board_id(const struct qca_version *ver) +{ + u16 flag = le16_to_cpu(ver->flag); + u16 board_id = 0; + + if (((flag >> 8) & 0xff) == QCA_FLAG_MULTI_NVM) { + /* The board_id should be split into two bytes + * The 1st byte is chip ID, and the 2nd byte is platform ID + * For example, board ID 0x010A, 0x01 is platform ID. 0x0A is chip ID + * we have several platforms, and platform IDs are continuously added + * Platform ID: + * 0x00 is for Mobile + * 0x01 is for X86 + * 0x02 is for Automotive + * 0x03 is for Consumer electronic + */ + board_id = (ver->chip_id << 8) + ver->platform_id; + } + + /* Take 0xffff as invalid board ID */ + if (board_id == 0xffff) + board_id = 0; + + return board_id; +} + static int btusb_qca_send_vendor_req(struct usb_device *udev, u8 request, void *data, u16 size) { @@ -3348,44 +3374,28 @@ static void btusb_generate_qca_nvm_name(char *fwname, size_t max_size, const struct qca_version *ver) { u32 rom_version = le32_to_cpu(ver->rom_version); - u16 flag = le16_to_cpu(ver->flag); + const char *variant; + int len; + u16 board_id; - if (((flag >> 8) & 0xff) == QCA_FLAG_MULTI_NVM) { - /* The board_id should be split into two bytes - * The 1st byte is chip ID, and the 2nd byte is platform ID - * For example, board ID 0x010A, 0x01 is platform ID. 0x0A is chip ID - * we have several platforms, and platform IDs are continuously added - * Platform ID: - * 0x00 is for Mobile - * 0x01 is for X86 - * 0x02 is for Automotive - * 0x03 is for Consumer electronic - */ - u16 board_id = (ver->chip_id << 8) + ver->platform_id; - const char *variant; + board_id = qca_extract_board_id(ver); - switch (le32_to_cpu(ver->ram_version)) { - case WCN6855_2_0_RAM_VERSION_GF: - case WCN6855_2_1_RAM_VERSION_GF: - variant = "_gf"; - break; - default: - variant = ""; - break; - } - - if (board_id == 0) { - snprintf(fwname, max_size, "qca/nvm_usb_%08x%s.bin", - rom_version, variant); - } else { - snprintf(fwname, max_size, "qca/nvm_usb_%08x%s_%04x.bin", - rom_version, variant, board_id); - } - } else { - snprintf(fwname, max_size, "qca/nvm_usb_%08x.bin", - rom_version); + switch (le32_to_cpu(ver->ram_version)) { + case WCN6855_2_0_RAM_VERSION_GF: + case WCN6855_2_1_RAM_VERSION_GF: + variant = "_gf"; + break; + default: + variant = NULL; + break; } + len = snprintf(fwname, max_size, "qca/nvm_usb_%08x", rom_version); + if (variant) + len += snprintf(fwname + len, max_size - len, "%s", variant); + if (board_id) + len += snprintf(fwname + len, max_size - len, "_%04x", board_id); + len += snprintf(fwname + len, max_size - len, ".bin"); } static int btusb_setup_qca_load_nvm(struct hci_dev *hdev, @@ -3494,7 +3504,7 @@ static int btusb_setup_qca(struct hci_dev *hdev) /* Mark HCI_OP_ENHANCED_SETUP_SYNC_CONN as broken as it doesn't seem to * work with the likes of HSP/HFP mSBC. */ - set_bit(HCI_QUIRK_BROKEN_ENHANCED_SETUP_SYNC_CONN, &hdev->quirks); + hci_set_quirk(hdev, HCI_QUIRK_BROKEN_ENHANCED_SETUP_SYNC_CONN); return 0; } @@ -4008,10 +4018,10 @@ static int btusb_probe(struct usb_interface *intf, } #endif if (id->driver_info & BTUSB_CW6622) - set_bit(HCI_QUIRK_BROKEN_STORED_LINK_KEY, &hdev->quirks); + hci_set_quirk(hdev, HCI_QUIRK_BROKEN_STORED_LINK_KEY); if (id->driver_info & BTUSB_BCM2045) - set_bit(HCI_QUIRK_BROKEN_STORED_LINK_KEY, &hdev->quirks); + hci_set_quirk(hdev, HCI_QUIRK_BROKEN_STORED_LINK_KEY); if (id->driver_info & BTUSB_BCM92035) hdev->setup = btusb_setup_bcm92035; @@ -4068,8 +4078,8 @@ static int btusb_probe(struct usb_interface *intf, hdev->reset = btmtk_reset_sync; hdev->set_bdaddr = btmtk_set_bdaddr; hdev->send = btusb_send_frame_mtk; - set_bit(HCI_QUIRK_BROKEN_ENHANCED_SETUP_SYNC_CONN, &hdev->quirks); - set_bit(HCI_QUIRK_NON_PERSISTENT_SETUP, &hdev->quirks); + hci_set_quirk(hdev, HCI_QUIRK_BROKEN_ENHANCED_SETUP_SYNC_CONN); + hci_set_quirk(hdev, HCI_QUIRK_NON_PERSISTENT_SETUP); data->recv_acl = btmtk_usb_recv_acl; data->suspend = btmtk_usb_suspend; data->resume = btmtk_usb_resume; @@ -4077,20 +4087,20 @@ static int btusb_probe(struct usb_interface *intf, } if (id->driver_info & BTUSB_SWAVE) { - set_bit(HCI_QUIRK_FIXUP_INQUIRY_MODE, &hdev->quirks); - set_bit(HCI_QUIRK_BROKEN_LOCAL_COMMANDS, &hdev->quirks); + hci_set_quirk(hdev, HCI_QUIRK_FIXUP_INQUIRY_MODE); + hci_set_quirk(hdev, HCI_QUIRK_BROKEN_LOCAL_COMMANDS); } if (id->driver_info & BTUSB_INTEL_BOOT) { hdev->manufacturer = 2; - set_bit(HCI_QUIRK_RAW_DEVICE, &hdev->quirks); + hci_set_quirk(hdev, HCI_QUIRK_RAW_DEVICE); } if (id->driver_info & BTUSB_ATH3012) { data->setup_on_usb = btusb_setup_qca; hdev->set_bdaddr = btusb_set_bdaddr_ath3012; - set_bit(HCI_QUIRK_SIMULTANEOUS_DISCOVERY, &hdev->quirks); - set_bit(HCI_QUIRK_STRICT_DUPLICATE_FILTER, &hdev->quirks); + hci_set_quirk(hdev, HCI_QUIRK_SIMULTANEOUS_DISCOVERY); + hci_set_quirk(hdev, HCI_QUIRK_STRICT_DUPLICATE_FILTER); } if (id->driver_info & BTUSB_QCA_ROME) { @@ -4098,7 +4108,7 @@ static int btusb_probe(struct usb_interface *intf, hdev->shutdown = btusb_shutdown_qca; hdev->set_bdaddr = btusb_set_bdaddr_ath3012; hdev->reset = btusb_qca_reset; - set_bit(HCI_QUIRK_SIMULTANEOUS_DISCOVERY, &hdev->quirks); + hci_set_quirk(hdev, HCI_QUIRK_SIMULTANEOUS_DISCOVERY); btusb_check_needs_reset_resume(intf); } @@ -4112,7 +4122,7 @@ static int btusb_probe(struct usb_interface *intf, hdev->shutdown = btusb_shutdown_qca; hdev->set_bdaddr = btusb_set_bdaddr_wcn6855; hdev->reset = btusb_qca_reset; - set_bit(HCI_QUIRK_SIMULTANEOUS_DISCOVERY, &hdev->quirks); + hci_set_quirk(hdev, HCI_QUIRK_SIMULTANEOUS_DISCOVERY); hci_set_msft_opcode(hdev, 0xFD70); } @@ -4140,35 +4150,35 @@ static int btusb_probe(struct usb_interface *intf, if (id->driver_info & BTUSB_ACTIONS_SEMI) { /* Support is advertised, but not implemented */ - set_bit(HCI_QUIRK_BROKEN_ERR_DATA_REPORTING, &hdev->quirks); - set_bit(HCI_QUIRK_BROKEN_READ_TRANSMIT_POWER, &hdev->quirks); - set_bit(HCI_QUIRK_BROKEN_SET_RPA_TIMEOUT, &hdev->quirks); - set_bit(HCI_QUIRK_BROKEN_EXT_SCAN, &hdev->quirks); - set_bit(HCI_QUIRK_BROKEN_READ_ENC_KEY_SIZE, &hdev->quirks); - set_bit(HCI_QUIRK_BROKEN_EXT_CREATE_CONN, &hdev->quirks); - set_bit(HCI_QUIRK_BROKEN_WRITE_AUTH_PAYLOAD_TIMEOUT, &hdev->quirks); + hci_set_quirk(hdev, HCI_QUIRK_BROKEN_ERR_DATA_REPORTING); + hci_set_quirk(hdev, HCI_QUIRK_BROKEN_READ_TRANSMIT_POWER); + hci_set_quirk(hdev, HCI_QUIRK_BROKEN_SET_RPA_TIMEOUT); + hci_set_quirk(hdev, HCI_QUIRK_BROKEN_EXT_SCAN); + hci_set_quirk(hdev, HCI_QUIRK_BROKEN_READ_ENC_KEY_SIZE); + hci_set_quirk(hdev, HCI_QUIRK_BROKEN_EXT_CREATE_CONN); + hci_set_quirk(hdev, HCI_QUIRK_BROKEN_WRITE_AUTH_PAYLOAD_TIMEOUT); } if (!reset) - set_bit(HCI_QUIRK_RESET_ON_CLOSE, &hdev->quirks); + hci_set_quirk(hdev, HCI_QUIRK_RESET_ON_CLOSE); if (force_scofix || id->driver_info & BTUSB_WRONG_SCO_MTU) { if (!disable_scofix) - set_bit(HCI_QUIRK_FIXUP_BUFFER_SIZE, &hdev->quirks); + hci_set_quirk(hdev, HCI_QUIRK_FIXUP_BUFFER_SIZE); } if (id->driver_info & BTUSB_BROKEN_ISOC) data->isoc = NULL; if (id->driver_info & BTUSB_WIDEBAND_SPEECH) - set_bit(HCI_QUIRK_WIDEBAND_SPEECH_SUPPORTED, &hdev->quirks); + hci_set_quirk(hdev, HCI_QUIRK_WIDEBAND_SPEECH_SUPPORTED); if (id->driver_info & BTUSB_INVALID_LE_STATES) - set_bit(HCI_QUIRK_BROKEN_LE_STATES, &hdev->quirks); + hci_set_quirk(hdev, HCI_QUIRK_BROKEN_LE_STATES); if (id->driver_info & BTUSB_DIGIANSWER) { data->cmdreq_type = USB_TYPE_VENDOR; - set_bit(HCI_QUIRK_RESET_ON_CLOSE, &hdev->quirks); + hci_set_quirk(hdev, HCI_QUIRK_RESET_ON_CLOSE); } if (id->driver_info & BTUSB_CSR) { @@ -4177,10 +4187,10 @@ static int btusb_probe(struct usb_interface *intf, /* Old firmware would otherwise execute USB reset */ if (bcdDevice < 0x117) - set_bit(HCI_QUIRK_RESET_ON_CLOSE, &hdev->quirks); + hci_set_quirk(hdev, HCI_QUIRK_RESET_ON_CLOSE); /* This must be set first in case we disable it for fakes */ - set_bit(HCI_QUIRK_SIMULTANEOUS_DISCOVERY, &hdev->quirks); + hci_set_quirk(hdev, HCI_QUIRK_SIMULTANEOUS_DISCOVERY); /* Fake CSR devices with broken commands */ if (le16_to_cpu(udev->descriptor.idVendor) == 0x0a12 && @@ -4193,7 +4203,7 @@ static int btusb_probe(struct usb_interface *intf, /* New sniffer firmware has crippled HCI interface */ if (le16_to_cpu(udev->descriptor.bcdDevice) > 0x997) - set_bit(HCI_QUIRK_RAW_DEVICE, &hdev->quirks); + hci_set_quirk(hdev, HCI_QUIRK_RAW_DEVICE); } if (id->driver_info & BTUSB_INTEL_BOOT) { diff --git a/drivers/bluetooth/hci_aml.c b/drivers/bluetooth/hci_aml.c index 1394c575aa6d..707e90f80130 100644 --- a/drivers/bluetooth/hci_aml.c +++ b/drivers/bluetooth/hci_aml.c @@ -424,7 +424,7 @@ static int aml_check_bdaddr(struct hci_dev *hdev) if (!bacmp(&paddr->bdaddr, AML_BDADDR_DEFAULT)) { bt_dev_info(hdev, "amlbt using default bdaddr (%pM)", &paddr->bdaddr); - set_bit(HCI_QUIRK_INVALID_BDADDR, &hdev->quirks); + hci_set_quirk(hdev, HCI_QUIRK_INVALID_BDADDR); } exit: diff --git a/drivers/bluetooth/hci_bcm.c b/drivers/bluetooth/hci_bcm.c index 9684eb16059b..f96617b85d87 100644 --- a/drivers/bluetooth/hci_bcm.c +++ b/drivers/bluetooth/hci_bcm.c @@ -643,8 +643,8 @@ static int bcm_setup(struct hci_uart *hu) * Allow the bootloader to set a valid address through the * device tree. */ - if (test_bit(HCI_QUIRK_INVALID_BDADDR, &hu->hdev->quirks)) - set_bit(HCI_QUIRK_USE_BDADDR_PROPERTY, &hu->hdev->quirks); + if (hci_test_quirk(hu->hdev, HCI_QUIRK_INVALID_BDADDR)) + hci_set_quirk(hu->hdev, HCI_QUIRK_USE_BDADDR_PROPERTY); if (!bcm_request_irq(bcm)) err = bcm_setup_sleep(hu); diff --git a/drivers/bluetooth/hci_bcm4377.c b/drivers/bluetooth/hci_bcm4377.c index 9bce53e49cfa..8a9aa33776b0 100644 --- a/drivers/bluetooth/hci_bcm4377.c +++ b/drivers/bluetooth/hci_bcm4377.c @@ -1435,7 +1435,7 @@ static int bcm4377_check_bdaddr(struct bcm4377_data *bcm4377) bda = (struct hci_rp_read_bd_addr *)skb->data; if (!bcm4377_is_valid_bdaddr(bcm4377, &bda->bdaddr)) - set_bit(HCI_QUIRK_USE_BDADDR_PROPERTY, &bcm4377->hdev->quirks); + hci_set_quirk(bcm4377->hdev, HCI_QUIRK_USE_BDADDR_PROPERTY); kfree_skb(skb); return 0; @@ -2389,13 +2389,13 @@ static int bcm4377_probe(struct pci_dev *pdev, const struct pci_device_id *id) hdev->setup = bcm4377_hci_setup; if (bcm4377->hw->broken_mws_transport_config) - set_bit(HCI_QUIRK_BROKEN_MWS_TRANSPORT_CONFIG, &hdev->quirks); + hci_set_quirk(hdev, HCI_QUIRK_BROKEN_MWS_TRANSPORT_CONFIG); if (bcm4377->hw->broken_ext_scan) - set_bit(HCI_QUIRK_BROKEN_EXT_SCAN, &hdev->quirks); + hci_set_quirk(hdev, HCI_QUIRK_BROKEN_EXT_SCAN); if (bcm4377->hw->broken_le_coded) - set_bit(HCI_QUIRK_BROKEN_LE_CODED, &hdev->quirks); + hci_set_quirk(hdev, HCI_QUIRK_BROKEN_LE_CODED); if (bcm4377->hw->broken_le_ext_adv_report_phy) - set_bit(HCI_QUIRK_FIXUP_LE_EXT_ADV_REPORT_PHY, &hdev->quirks); + hci_set_quirk(hdev, HCI_QUIRK_FIXUP_LE_EXT_ADV_REPORT_PHY); pci_set_drvdata(pdev, bcm4377); hci_set_drvdata(hdev, bcm4377); diff --git a/drivers/bluetooth/hci_intel.c b/drivers/bluetooth/hci_intel.c index 811f33701f84..d22fbb7f9fc5 100644 --- a/drivers/bluetooth/hci_intel.c +++ b/drivers/bluetooth/hci_intel.c @@ -660,7 +660,7 @@ static int intel_setup(struct hci_uart *hu) */ if (!bacmp(¶ms.otp_bdaddr, BDADDR_ANY)) { bt_dev_info(hdev, "No device address configured"); - set_bit(HCI_QUIRK_INVALID_BDADDR, &hdev->quirks); + hci_set_quirk(hdev, HCI_QUIRK_INVALID_BDADDR); } /* With this Intel bootloader only the hardware variant and device diff --git a/drivers/bluetooth/hci_ldisc.c b/drivers/bluetooth/hci_ldisc.c index acba83156de9..d0adae3267b4 100644 --- a/drivers/bluetooth/hci_ldisc.c +++ b/drivers/bluetooth/hci_ldisc.c @@ -667,13 +667,13 @@ static int hci_uart_register_dev(struct hci_uart *hu) SET_HCIDEV_DEV(hdev, hu->tty->dev); if (test_bit(HCI_UART_RAW_DEVICE, &hu->hdev_flags)) - set_bit(HCI_QUIRK_RAW_DEVICE, &hdev->quirks); + hci_set_quirk(hdev, HCI_QUIRK_RAW_DEVICE); if (test_bit(HCI_UART_EXT_CONFIG, &hu->hdev_flags)) - set_bit(HCI_QUIRK_EXTERNAL_CONFIG, &hdev->quirks); + hci_set_quirk(hdev, HCI_QUIRK_EXTERNAL_CONFIG); if (!test_bit(HCI_UART_RESET_ON_INIT, &hu->hdev_flags)) - set_bit(HCI_QUIRK_RESET_ON_CLOSE, &hdev->quirks); + hci_set_quirk(hdev, HCI_QUIRK_RESET_ON_CLOSE); /* Only call open() for the protocol after hdev is fully initialized as * open() (or a timer/workqueue it starts) may attempt to reference it. diff --git a/drivers/bluetooth/hci_ll.c b/drivers/bluetooth/hci_ll.c index e19e9bd49555..7044c86325ce 100644 --- a/drivers/bluetooth/hci_ll.c +++ b/drivers/bluetooth/hci_ll.c @@ -649,11 +649,11 @@ static int ll_setup(struct hci_uart *hu) /* This means that there was an error getting the BD address * during probe, so mark the device as having a bad address. */ - set_bit(HCI_QUIRK_INVALID_BDADDR, &hu->hdev->quirks); + hci_set_quirk(hu->hdev, HCI_QUIRK_INVALID_BDADDR); } else if (bacmp(&lldev->bdaddr, BDADDR_ANY)) { err = ll_set_bdaddr(hu->hdev, &lldev->bdaddr); if (err) - set_bit(HCI_QUIRK_INVALID_BDADDR, &hu->hdev->quirks); + hci_set_quirk(hu->hdev, HCI_QUIRK_INVALID_BDADDR); } /* Operational speed if any */ diff --git a/drivers/bluetooth/hci_nokia.c b/drivers/bluetooth/hci_nokia.c index 9fc10a16fd96..cd7575c20f65 100644 --- a/drivers/bluetooth/hci_nokia.c +++ b/drivers/bluetooth/hci_nokia.c @@ -439,7 +439,7 @@ static int nokia_setup(struct hci_uart *hu) if (btdev->man_id == NOKIA_ID_BCM2048) { hu->hdev->set_bdaddr = btbcm_set_bdaddr; - set_bit(HCI_QUIRK_INVALID_BDADDR, &hu->hdev->quirks); + hci_set_quirk(hu->hdev, HCI_QUIRK_INVALID_BDADDR); dev_dbg(dev, "bcm2048 has invalid bluetooth address!"); } diff --git a/drivers/bluetooth/hci_qca.c b/drivers/bluetooth/hci_qca.c index 3ec0be496820..33c43503714b 100644 --- a/drivers/bluetooth/hci_qca.c +++ b/drivers/bluetooth/hci_qca.c @@ -1892,7 +1892,7 @@ static int qca_setup(struct hci_uart *hu) /* Enable controller to do both LE scan and BR/EDR inquiry * simultaneously. */ - set_bit(HCI_QUIRK_SIMULTANEOUS_DISCOVERY, &hdev->quirks); + hci_set_quirk(hdev, HCI_QUIRK_SIMULTANEOUS_DISCOVERY); switch (soc_type) { case QCA_QCA2066: @@ -1944,7 +1944,7 @@ retry: case QCA_WCN7850: qcadev = serdev_device_get_drvdata(hu->serdev); if (qcadev->bdaddr_property_broken) - set_bit(HCI_QUIRK_BDADDR_PROPERTY_BROKEN, &hdev->quirks); + hci_set_quirk(hdev, HCI_QUIRK_BDADDR_PROPERTY_BROKEN); hci_set_aosp_capable(hdev); @@ -2487,7 +2487,7 @@ static int qca_serdev_probe(struct serdev_device *serdev) hdev = qcadev->serdev_hu.hdev; if (power_ctrl_enabled) { - set_bit(HCI_QUIRK_NON_PERSISTENT_SETUP, &hdev->quirks); + hci_set_quirk(hdev, HCI_QUIRK_NON_PERSISTENT_SETUP); hdev->shutdown = qca_power_off; } @@ -2496,11 +2496,11 @@ static int qca_serdev_probe(struct serdev_device *serdev) * be queried via hci. Same with the valid le states quirk. */ if (data->capabilities & QCA_CAP_WIDEBAND_SPEECH) - set_bit(HCI_QUIRK_WIDEBAND_SPEECH_SUPPORTED, - &hdev->quirks); + hci_set_quirk(hdev, + HCI_QUIRK_WIDEBAND_SPEECH_SUPPORTED); if (!(data->capabilities & QCA_CAP_VALID_LE_STATES)) - set_bit(HCI_QUIRK_BROKEN_LE_STATES, &hdev->quirks); + hci_set_quirk(hdev, HCI_QUIRK_BROKEN_LE_STATES); } return 0; @@ -2550,7 +2550,7 @@ static void qca_serdev_shutdown(struct device *dev) * invoked and the SOC is already in the initial state, so * don't also need to send the VSC. */ - if (test_bit(HCI_QUIRK_NON_PERSISTENT_SETUP, &hdev->quirks) || + if (hci_test_quirk(hdev, HCI_QUIRK_NON_PERSISTENT_SETUP) || hci_dev_test_flag(hdev, HCI_SETUP)) return; diff --git a/drivers/bluetooth/hci_serdev.c b/drivers/bluetooth/hci_serdev.c index 89a22e9b3253..593d9cefbbf9 100644 --- a/drivers/bluetooth/hci_serdev.c +++ b/drivers/bluetooth/hci_serdev.c @@ -152,7 +152,7 @@ static int hci_uart_close(struct hci_dev *hdev) * BT SOC is completely powered OFF during BT OFF, holding port * open may drain the battery. */ - if (test_bit(HCI_QUIRK_NON_PERSISTENT_SETUP, &hdev->quirks)) { + if (hci_test_quirk(hdev, HCI_QUIRK_NON_PERSISTENT_SETUP)) { clear_bit(HCI_UART_PROTO_READY, &hu->flags); serdev_device_close(hu->serdev); } @@ -358,13 +358,13 @@ int hci_uart_register_device_priv(struct hci_uart *hu, SET_HCIDEV_DEV(hdev, &hu->serdev->dev); if (test_bit(HCI_UART_NO_SUSPEND_NOTIFIER, &hu->flags)) - set_bit(HCI_QUIRK_NO_SUSPEND_NOTIFIER, &hdev->quirks); + hci_set_quirk(hdev, HCI_QUIRK_NO_SUSPEND_NOTIFIER); if (test_bit(HCI_UART_RAW_DEVICE, &hu->hdev_flags)) - set_bit(HCI_QUIRK_RAW_DEVICE, &hdev->quirks); + hci_set_quirk(hdev, HCI_QUIRK_RAW_DEVICE); if (test_bit(HCI_UART_EXT_CONFIG, &hu->hdev_flags)) - set_bit(HCI_QUIRK_EXTERNAL_CONFIG, &hdev->quirks); + hci_set_quirk(hdev, HCI_QUIRK_EXTERNAL_CONFIG); if (test_bit(HCI_UART_INIT_PENDING, &hu->hdev_flags)) return 0; diff --git a/drivers/bluetooth/hci_vhci.c b/drivers/bluetooth/hci_vhci.c index 59f4d7bdffdc..f7d8c3c00655 100644 --- a/drivers/bluetooth/hci_vhci.c +++ b/drivers/bluetooth/hci_vhci.c @@ -415,16 +415,16 @@ static int __vhci_create_device(struct vhci_data *data, __u8 opcode) hdev->get_codec_config_data = vhci_get_codec_config_data; hdev->wakeup = vhci_wakeup; hdev->setup = vhci_setup; - set_bit(HCI_QUIRK_NON_PERSISTENT_SETUP, &hdev->quirks); - set_bit(HCI_QUIRK_SYNC_FLOWCTL_SUPPORTED, &hdev->quirks); + hci_set_quirk(hdev, HCI_QUIRK_NON_PERSISTENT_SETUP); + hci_set_quirk(hdev, HCI_QUIRK_SYNC_FLOWCTL_SUPPORTED); /* bit 6 is for external configuration */ if (opcode & 0x40) - set_bit(HCI_QUIRK_EXTERNAL_CONFIG, &hdev->quirks); + hci_set_quirk(hdev, HCI_QUIRK_EXTERNAL_CONFIG); /* bit 7 is for raw device */ if (opcode & 0x80) - set_bit(HCI_QUIRK_RAW_DEVICE, &hdev->quirks); + hci_set_quirk(hdev, HCI_QUIRK_RAW_DEVICE); if (hci_register_dev(hdev) < 0) { BT_ERR("Can't register HCI device"); diff --git a/drivers/bluetooth/virtio_bt.c b/drivers/bluetooth/virtio_bt.c index 756f292df9e8..6f1a37e85c6a 100644 --- a/drivers/bluetooth/virtio_bt.c +++ b/drivers/bluetooth/virtio_bt.c @@ -327,17 +327,17 @@ static int virtbt_probe(struct virtio_device *vdev) hdev->setup = virtbt_setup_intel; hdev->shutdown = virtbt_shutdown_generic; hdev->set_bdaddr = virtbt_set_bdaddr_intel; - set_bit(HCI_QUIRK_STRICT_DUPLICATE_FILTER, &hdev->quirks); - set_bit(HCI_QUIRK_SIMULTANEOUS_DISCOVERY, &hdev->quirks); - set_bit(HCI_QUIRK_WIDEBAND_SPEECH_SUPPORTED, &hdev->quirks); + hci_set_quirk(hdev, HCI_QUIRK_STRICT_DUPLICATE_FILTER); + hci_set_quirk(hdev, HCI_QUIRK_SIMULTANEOUS_DISCOVERY); + hci_set_quirk(hdev, HCI_QUIRK_WIDEBAND_SPEECH_SUPPORTED); break; case VIRTIO_BT_CONFIG_VENDOR_REALTEK: hdev->manufacturer = 93; hdev->setup = virtbt_setup_realtek; hdev->shutdown = virtbt_shutdown_generic; - set_bit(HCI_QUIRK_SIMULTANEOUS_DISCOVERY, &hdev->quirks); - set_bit(HCI_QUIRK_WIDEBAND_SPEECH_SUPPORTED, &hdev->quirks); + hci_set_quirk(hdev, HCI_QUIRK_SIMULTANEOUS_DISCOVERY); + hci_set_quirk(hdev, HCI_QUIRK_WIDEBAND_SPEECH_SUPPORTED); break; } } diff --git a/drivers/bus/fsl-mc/fsl-mc-bus.c b/drivers/bus/fsl-mc/fsl-mc-bus.c index 7671bd158545..c1c0a4759c7e 100644 --- a/drivers/bus/fsl-mc/fsl-mc-bus.c +++ b/drivers/bus/fsl-mc/fsl-mc-bus.c @@ -943,6 +943,7 @@ struct fsl_mc_device *fsl_mc_get_endpoint(struct fsl_mc_device *mc_dev, struct fsl_mc_obj_desc endpoint_desc = {{ 0 }}; struct dprc_endpoint endpoint1 = {{ 0 }}; struct dprc_endpoint endpoint2 = {{ 0 }}; + struct fsl_mc_bus *mc_bus; int state, err; mc_bus_dev = to_fsl_mc_device(mc_dev->dev.parent); @@ -966,6 +967,8 @@ struct fsl_mc_device *fsl_mc_get_endpoint(struct fsl_mc_device *mc_dev, strcpy(endpoint_desc.type, endpoint2.type); endpoint_desc.id = endpoint2.id; endpoint = fsl_mc_device_lookup(&endpoint_desc, mc_bus_dev); + if (endpoint) + return endpoint; /* * We know that the device has an endpoint because we verified by @@ -973,17 +976,13 @@ struct fsl_mc_device *fsl_mc_get_endpoint(struct fsl_mc_device *mc_dev, * yet discovered by the fsl-mc bus, thus the lookup returned NULL. * Force a rescan of the devices in this container and retry the lookup. */ - if (!endpoint) { - struct fsl_mc_bus *mc_bus = to_fsl_mc_bus(mc_bus_dev); - - if (mutex_trylock(&mc_bus->scan_mutex)) { - err = dprc_scan_objects(mc_bus_dev, true); - mutex_unlock(&mc_bus->scan_mutex); - } - - if (err < 0) - return ERR_PTR(err); + mc_bus = to_fsl_mc_bus(mc_bus_dev); + if (mutex_trylock(&mc_bus->scan_mutex)) { + err = dprc_scan_objects(mc_bus_dev, true); + mutex_unlock(&mc_bus->scan_mutex); } + if (err < 0) + return ERR_PTR(err); endpoint = fsl_mc_device_lookup(&endpoint_desc, mc_bus_dev); /* diff --git a/drivers/char/agp/amd64-agp.c b/drivers/char/agp/amd64-agp.c index bf490967241a..2505df1f4e69 100644 --- a/drivers/char/agp/amd64-agp.c +++ b/drivers/char/agp/amd64-agp.c @@ -720,11 +720,6 @@ static const struct pci_device_id agp_amd64_pci_table[] = { MODULE_DEVICE_TABLE(pci, agp_amd64_pci_table); -static const struct pci_device_id agp_amd64_pci_promisc_table[] = { - { PCI_DEVICE_CLASS(0, 0) }, - { } -}; - static DEFINE_SIMPLE_DEV_PM_OPS(agp_amd64_pm_ops, NULL, agp_amd64_resume); static struct pci_driver agp_amd64_pci_driver = { @@ -739,6 +734,7 @@ static struct pci_driver agp_amd64_pci_driver = { /* Not static due to IOMMU code calling it early. */ int __init agp_amd64_init(void) { + struct pci_dev *pdev = NULL; int err = 0; if (agp_off) @@ -767,9 +763,13 @@ int __init agp_amd64_init(void) } /* Look for any AGP bridge */ - agp_amd64_pci_driver.id_table = agp_amd64_pci_promisc_table; - err = driver_attach(&agp_amd64_pci_driver.driver); - if (err == 0 && agp_bridges_found == 0) { + for_each_pci_dev(pdev) + if (pci_find_capability(pdev, PCI_CAP_ID_AGP)) + pci_add_dynid(&agp_amd64_pci_driver, + pdev->vendor, pdev->device, + pdev->subsystem_vendor, + pdev->subsystem_device, 0, 0, 0); + if (agp_bridges_found == 0) { pci_unregister_driver(&agp_amd64_pci_driver); err = -ENODEV; } diff --git a/drivers/char/tpm/eventlog/common.c b/drivers/char/tpm/eventlog/common.c index 4c0bbba64ee5..691813d2a5a2 100644 --- a/drivers/char/tpm/eventlog/common.c +++ b/drivers/char/tpm/eventlog/common.c @@ -32,7 +32,7 @@ static int tpm_bios_measurements_open(struct inode *inode, struct tpm_chip *chip; inode_lock(inode); - if (!inode->i_private) { + if (!inode->i_nlink) { inode_unlock(inode); return -ENODEV; } @@ -105,7 +105,7 @@ static int tpm_read_log(struct tpm_chip *chip) void tpm_bios_log_setup(struct tpm_chip *chip) { const char *name = dev_name(&chip->dev); - unsigned int cnt; + struct dentry *dentry; int log_version; int rc = 0; @@ -117,14 +117,12 @@ void tpm_bios_log_setup(struct tpm_chip *chip) return; log_version = rc; - cnt = 0; - chip->bios_dir[cnt] = securityfs_create_dir(name, NULL); + chip->bios_dir = securityfs_create_dir(name, NULL); /* NOTE: securityfs_create_dir can return ENODEV if securityfs is * compiled out. The caller should ignore the ENODEV return code. */ - if (IS_ERR(chip->bios_dir[cnt])) - goto err; - cnt++; + if (IS_ERR(chip->bios_dir)) + return; chip->bin_log_seqops.chip = chip; if (log_version == EFI_TCG2_EVENT_LOG_FORMAT_TCG_2) @@ -135,14 +133,13 @@ void tpm_bios_log_setup(struct tpm_chip *chip) &tpm1_binary_b_measurements_seqops; - chip->bios_dir[cnt] = + dentry = securityfs_create_file("binary_bios_measurements", - 0440, chip->bios_dir[0], + 0440, chip->bios_dir, (void *)&chip->bin_log_seqops, &tpm_bios_measurements_ops); - if (IS_ERR(chip->bios_dir[cnt])) + if (IS_ERR(dentry)) goto err; - cnt++; if (!(chip->flags & TPM_CHIP_FLAG_TPM2)) { @@ -150,42 +147,23 @@ void tpm_bios_log_setup(struct tpm_chip *chip) chip->ascii_log_seqops.seqops = &tpm1_ascii_b_measurements_seqops; - chip->bios_dir[cnt] = + dentry = securityfs_create_file("ascii_bios_measurements", - 0440, chip->bios_dir[0], + 0440, chip->bios_dir, (void *)&chip->ascii_log_seqops, &tpm_bios_measurements_ops); - if (IS_ERR(chip->bios_dir[cnt])) + if (IS_ERR(dentry)) goto err; - cnt++; } return; err: - chip->bios_dir[cnt] = NULL; tpm_bios_log_teardown(chip); return; } void tpm_bios_log_teardown(struct tpm_chip *chip) { - int i; - struct inode *inode; - - /* securityfs_remove currently doesn't take care of handling sync - * between removal and opening of pseudo files. To handle this, a - * workaround is added by making i_private = NULL here during removal - * and to check it during open(), both within inode_lock()/unlock(). - * This design ensures that open() either safely gets kref or fails. - */ - for (i = (TPM_NUM_EVENT_LOG_FILES - 1); i >= 0; i--) { - if (chip->bios_dir[i]) { - inode = d_inode(chip->bios_dir[i]); - inode_lock(inode); - inode->i_private = NULL; - inode_unlock(inode); - securityfs_remove(chip->bios_dir[i]); - } - } + securityfs_remove(chip->bios_dir); } diff --git a/drivers/clk/clk-scmi.c b/drivers/clk/clk-scmi.c index 15510c2ff21c..1b1561c84127 100644 --- a/drivers/clk/clk-scmi.c +++ b/drivers/clk/clk-scmi.c @@ -404,6 +404,7 @@ static int scmi_clocks_probe(struct scmi_device *sdev) const struct scmi_handle *handle = sdev->handle; struct scmi_protocol_handle *ph; const struct clk_ops *scmi_clk_ops_db[SCMI_MAX_CLK_OPS] = {}; + struct scmi_clk *sclks; if (!handle) return -ENODEV; @@ -430,18 +431,21 @@ static int scmi_clocks_probe(struct scmi_device *sdev) transport_is_atomic = handle->is_transport_atomic(handle, &atomic_threshold_us); + sclks = devm_kcalloc(dev, count, sizeof(*sclks), GFP_KERNEL); + if (!sclks) + return -ENOMEM; + + for (idx = 0; idx < count; idx++) + hws[idx] = &sclks[idx].hw; + for (idx = 0; idx < count; idx++) { - struct scmi_clk *sclk; + struct scmi_clk *sclk = &sclks[idx]; const struct clk_ops *scmi_ops; - sclk = devm_kzalloc(dev, sizeof(*sclk), GFP_KERNEL); - if (!sclk) - return -ENOMEM; - sclk->info = scmi_proto_clk_ops->info_get(ph, idx); if (!sclk->info) { dev_dbg(dev, "invalid clock info for idx %d\n", idx); - devm_kfree(dev, sclk); + hws[idx] = NULL; continue; } @@ -479,13 +483,11 @@ static int scmi_clocks_probe(struct scmi_device *sdev) if (err) { dev_err(dev, "failed to register clock %d\n", idx); devm_kfree(dev, sclk->parent_data); - devm_kfree(dev, sclk); hws[idx] = NULL; } else { dev_dbg(dev, "Registered clock:%s%s\n", sclk->info->name, scmi_ops->enable ? " (atomic ops)" : ""); - hws[idx] = &sclk->hw; } } diff --git a/drivers/clk/imx/clk-imx95-blk-ctl.c b/drivers/clk/imx/clk-imx95-blk-ctl.c index 25974947ad0c..cc2ee2be1819 100644 --- a/drivers/clk/imx/clk-imx95-blk-ctl.c +++ b/drivers/clk/imx/clk-imx95-blk-ctl.c @@ -219,11 +219,15 @@ static const struct imx95_blk_ctl_dev_data lvds_csr_dev_data = { .clk_reg_offset = 0, }; +static const char * const disp_engine_parents[] = { + "videopll1", "dsi_pll", "ldb_pll_div7" +}; + static const struct imx95_blk_ctl_clk_dev_data dispmix_csr_clk_dev_data[] = { [IMX95_CLK_DISPMIX_ENG0_SEL] = { .name = "disp_engine0_sel", - .parent_names = (const char *[]){"videopll1", "dsi_pll", "ldb_pll_div7", }, - .num_parents = 4, + .parent_names = disp_engine_parents, + .num_parents = ARRAY_SIZE(disp_engine_parents), .reg = 0, .bit_idx = 0, .bit_width = 2, @@ -232,8 +236,8 @@ static const struct imx95_blk_ctl_clk_dev_data dispmix_csr_clk_dev_data[] = { }, [IMX95_CLK_DISPMIX_ENG1_SEL] = { .name = "disp_engine1_sel", - .parent_names = (const char *[]){"videopll1", "dsi_pll", "ldb_pll_div7", }, - .num_parents = 4, + .parent_names = disp_engine_parents, + .num_parents = ARRAY_SIZE(disp_engine_parents), .reg = 0, .bit_idx = 2, .bit_width = 2, diff --git a/drivers/clk/sunxi-ng/ccu-sun55i-a523.c b/drivers/clk/sunxi-ng/ccu-sun55i-a523.c index 9efb9fd24b42..1a9a1cb869e2 100644 --- a/drivers/clk/sunxi-ng/ccu-sun55i-a523.c +++ b/drivers/clk/sunxi-ng/ccu-sun55i-a523.c @@ -385,7 +385,8 @@ static SUNXI_CCU_MP_DATA_WITH_MUX_GATE_FEAT(mbus_clk, "mbus", mbus_parents, 0, 0, /* no P */ 24, 3, /* mux */ BIT(31), /* gate */ - 0, CCU_FEATURE_UPDATE_BIT); + CLK_IS_CRITICAL, + CCU_FEATURE_UPDATE_BIT); static const struct clk_hw *mbus_hws[] = { &mbus_clk.common.hw }; diff --git a/drivers/clk/sunxi-ng/ccu-sun8i-v3s.c b/drivers/clk/sunxi-ng/ccu-sun8i-v3s.c index 579a81bb46df..52e4369664c5 100644 --- a/drivers/clk/sunxi-ng/ccu-sun8i-v3s.c +++ b/drivers/clk/sunxi-ng/ccu-sun8i-v3s.c @@ -350,7 +350,7 @@ static SUNXI_CCU_M_WITH_MUX_GATE(de_clk, "de", de_parents, 0x104, 0, 4, 24, 2, BIT(31), CLK_SET_RATE_PARENT); -static const char * const tcon_parents[] = { "pll-video" }; +static const char * const tcon_parents[] = { "pll-video", "pll-periph0" }; static SUNXI_CCU_M_WITH_MUX_GATE(tcon_clk, "tcon", tcon_parents, 0x118, 0, 4, 24, 3, BIT(31), 0); @@ -362,11 +362,11 @@ static const char * const csi_mclk_parents[] = { "osc24M", "pll-video", static SUNXI_CCU_M_WITH_MUX_GATE(csi0_mclk_clk, "csi0-mclk", csi_mclk_parents, 0x130, 0, 5, 8, 3, BIT(15), 0); -static const char * const csi1_sclk_parents[] = { "pll-video", "pll-isp" }; -static SUNXI_CCU_M_WITH_MUX_GATE(csi1_sclk_clk, "csi-sclk", csi1_sclk_parents, +static const char * const csi_sclk_parents[] = { "pll-video", "pll-isp" }; +static SUNXI_CCU_M_WITH_MUX_GATE(csi_sclk_clk, "csi-sclk", csi_sclk_parents, 0x134, 16, 4, 24, 3, BIT(31), 0); -static SUNXI_CCU_M_WITH_MUX_GATE(csi1_mclk_clk, "csi-mclk", csi_mclk_parents, +static SUNXI_CCU_M_WITH_MUX_GATE(csi1_mclk_clk, "csi1-mclk", csi_mclk_parents, 0x134, 0, 5, 8, 3, BIT(15), 0); static SUNXI_CCU_M_WITH_GATE(ve_clk, "ve", "pll-ve", @@ -452,7 +452,7 @@ static struct ccu_common *sun8i_v3s_ccu_clks[] = { &tcon_clk.common, &csi_misc_clk.common, &csi0_mclk_clk.common, - &csi1_sclk_clk.common, + &csi_sclk_clk.common, &csi1_mclk_clk.common, &ve_clk.common, &ac_dig_clk.common, @@ -551,7 +551,7 @@ static struct clk_hw_onecell_data sun8i_v3s_hw_clks = { [CLK_TCON0] = &tcon_clk.common.hw, [CLK_CSI_MISC] = &csi_misc_clk.common.hw, [CLK_CSI0_MCLK] = &csi0_mclk_clk.common.hw, - [CLK_CSI1_SCLK] = &csi1_sclk_clk.common.hw, + [CLK_CSI_SCLK] = &csi_sclk_clk.common.hw, [CLK_CSI1_MCLK] = &csi1_mclk_clk.common.hw, [CLK_VE] = &ve_clk.common.hw, [CLK_AC_DIG] = &ac_dig_clk.common.hw, @@ -633,7 +633,7 @@ static struct clk_hw_onecell_data sun8i_v3_hw_clks = { [CLK_TCON0] = &tcon_clk.common.hw, [CLK_CSI_MISC] = &csi_misc_clk.common.hw, [CLK_CSI0_MCLK] = &csi0_mclk_clk.common.hw, - [CLK_CSI1_SCLK] = &csi1_sclk_clk.common.hw, + [CLK_CSI_SCLK] = &csi_sclk_clk.common.hw, [CLK_CSI1_MCLK] = &csi1_mclk_clk.common.hw, [CLK_VE] = &ve_clk.common.hw, [CLK_AC_DIG] = &ac_dig_clk.common.hw, diff --git a/drivers/clocksource/hyperv_timer.c b/drivers/clocksource/hyperv_timer.c index 09549451dd51..2edc13ca184e 100644 --- a/drivers/clocksource/hyperv_timer.c +++ b/drivers/clocksource/hyperv_timer.c @@ -22,6 +22,7 @@ #include <linux/irq.h> #include <linux/acpi.h> #include <linux/hyperv.h> +#include <linux/export.h> #include <clocksource/hyperv_timer.h> #include <hyperv/hvhdk.h> #include <asm/mshyperv.h> diff --git a/drivers/comedi/comedi_fops.c b/drivers/comedi/comedi_fops.c index 3383a7ce27ff..c83fd14dd7ad 100644 --- a/drivers/comedi/comedi_fops.c +++ b/drivers/comedi/comedi_fops.c @@ -1556,21 +1556,27 @@ static int do_insnlist_ioctl(struct comedi_device *dev, } for (i = 0; i < n_insns; ++i) { + unsigned int n = insns[i].n; + if (insns[i].insn & INSN_MASK_WRITE) { if (copy_from_user(data, insns[i].data, - insns[i].n * sizeof(unsigned int))) { + n * sizeof(unsigned int))) { dev_dbg(dev->class_dev, "copy_from_user failed\n"); ret = -EFAULT; goto error; } + if (n < MIN_SAMPLES) { + memset(&data[n], 0, (MIN_SAMPLES - n) * + sizeof(unsigned int)); + } } ret = parse_insn(dev, insns + i, data, file); if (ret < 0) goto error; if (insns[i].insn & INSN_MASK_READ) { if (copy_to_user(insns[i].data, data, - insns[i].n * sizeof(unsigned int))) { + n * sizeof(unsigned int))) { dev_dbg(dev->class_dev, "copy_to_user failed\n"); ret = -EFAULT; @@ -1589,6 +1595,16 @@ error: return i; } +#define MAX_INSNS MAX_SAMPLES +static int check_insnlist_len(struct comedi_device *dev, unsigned int n_insns) +{ + if (n_insns > MAX_INSNS) { + dev_dbg(dev->class_dev, "insnlist length too large\n"); + return -EINVAL; + } + return 0; +} + /* * COMEDI_INSN ioctl * synchronous instruction @@ -1633,6 +1649,10 @@ static int do_insn_ioctl(struct comedi_device *dev, ret = -EFAULT; goto error; } + if (insn->n < MIN_SAMPLES) { + memset(&data[insn->n], 0, + (MIN_SAMPLES - insn->n) * sizeof(unsigned int)); + } } ret = parse_insn(dev, insn, data, file); if (ret < 0) @@ -2239,6 +2259,9 @@ static long comedi_unlocked_ioctl(struct file *file, unsigned int cmd, rc = -EFAULT; break; } + rc = check_insnlist_len(dev, insnlist.n_insns); + if (rc) + break; insns = kcalloc(insnlist.n_insns, sizeof(*insns), GFP_KERNEL); if (!insns) { rc = -ENOMEM; @@ -3142,6 +3165,9 @@ static int compat_insnlist(struct file *file, unsigned long arg) if (copy_from_user(&insnlist32, compat_ptr(arg), sizeof(insnlist32))) return -EFAULT; + rc = check_insnlist_len(dev, insnlist32.n_insns); + if (rc) + return rc; insns = kcalloc(insnlist32.n_insns, sizeof(*insns), GFP_KERNEL); if (!insns) return -ENOMEM; diff --git a/drivers/comedi/drivers.c b/drivers/comedi/drivers.c index 376130bfba8a..9e4b7c840a8f 100644 --- a/drivers/comedi/drivers.c +++ b/drivers/comedi/drivers.c @@ -339,10 +339,10 @@ int comedi_dio_insn_config(struct comedi_device *dev, unsigned int *data, unsigned int mask) { - unsigned int chan_mask = 1 << CR_CHAN(insn->chanspec); + unsigned int chan = CR_CHAN(insn->chanspec); - if (!mask) - mask = chan_mask; + if (!mask && chan < 32) + mask = 1U << chan; switch (data[0]) { case INSN_CONFIG_DIO_INPUT: @@ -382,7 +382,7 @@ EXPORT_SYMBOL_GPL(comedi_dio_insn_config); unsigned int comedi_dio_update_state(struct comedi_subdevice *s, unsigned int *data) { - unsigned int chanmask = (s->n_chan < 32) ? ((1 << s->n_chan) - 1) + unsigned int chanmask = (s->n_chan < 32) ? ((1U << s->n_chan) - 1) : 0xffffffff; unsigned int mask = data[0] & chanmask; unsigned int bits = data[1]; @@ -615,6 +615,9 @@ static int insn_rw_emulate_bits(struct comedi_device *dev, unsigned int _data[2]; int ret; + if (insn->n == 0) + return 0; + memset(_data, 0, sizeof(_data)); memset(&_insn, 0, sizeof(_insn)); _insn.insn = INSN_BITS; @@ -625,8 +628,8 @@ static int insn_rw_emulate_bits(struct comedi_device *dev, if (insn->insn == INSN_WRITE) { if (!(s->subdev_flags & SDF_WRITABLE)) return -EINVAL; - _data[0] = 1 << (chan - base_chan); /* mask */ - _data[1] = data[0] ? (1 << (chan - base_chan)) : 0; /* bits */ + _data[0] = 1U << (chan - base_chan); /* mask */ + _data[1] = data[0] ? (1U << (chan - base_chan)) : 0; /* bits */ } ret = s->insn_bits(dev, s, &_insn, _data); @@ -709,7 +712,7 @@ static int __comedi_device_postconfig(struct comedi_device *dev) if (s->type == COMEDI_SUBD_DO) { if (s->n_chan < 32) - s->io_bits = (1 << s->n_chan) - 1; + s->io_bits = (1U << s->n_chan) - 1; else s->io_bits = 0xffffffff; } diff --git a/drivers/comedi/drivers/aio_iiro_16.c b/drivers/comedi/drivers/aio_iiro_16.c index b00fab0b89d4..739cc4db52ac 100644 --- a/drivers/comedi/drivers/aio_iiro_16.c +++ b/drivers/comedi/drivers/aio_iiro_16.c @@ -177,7 +177,8 @@ static int aio_iiro_16_attach(struct comedi_device *dev, * Digital input change of state interrupts are optionally supported * using IRQ 2-7, 10-12, 14, or 15. */ - if ((1 << it->options[1]) & 0xdcfc) { + if (it->options[1] > 0 && it->options[1] < 16 && + (1 << it->options[1]) & 0xdcfc) { ret = request_irq(it->options[1], aio_iiro_16_cos, 0, dev->board_name, dev); if (ret == 0) diff --git a/drivers/comedi/drivers/comedi_test.c b/drivers/comedi/drivers/comedi_test.c index 9747e6d1f6eb..7984950f0f99 100644 --- a/drivers/comedi/drivers/comedi_test.c +++ b/drivers/comedi/drivers/comedi_test.c @@ -792,7 +792,7 @@ static void waveform_detach(struct comedi_device *dev) { struct waveform_private *devpriv = dev->private; - if (devpriv) { + if (devpriv && dev->n_subdevices) { timer_delete_sync(&devpriv->ai_timer); timer_delete_sync(&devpriv->ao_timer); } diff --git a/drivers/comedi/drivers/das16m1.c b/drivers/comedi/drivers/das16m1.c index b8ea737ad3d1..1b638f5b5a4f 100644 --- a/drivers/comedi/drivers/das16m1.c +++ b/drivers/comedi/drivers/das16m1.c @@ -522,7 +522,8 @@ static int das16m1_attach(struct comedi_device *dev, devpriv->extra_iobase = dev->iobase + DAS16M1_8255_IOBASE; /* only irqs 2, 3, 4, 5, 6, 7, 10, 11, 12, 14, and 15 are valid */ - if ((1 << it->options[1]) & 0xdcfc) { + if (it->options[1] >= 2 && it->options[1] <= 15 && + (1 << it->options[1]) & 0xdcfc) { ret = request_irq(it->options[1], das16m1_interrupt, 0, dev->board_name, dev); if (ret == 0) diff --git a/drivers/comedi/drivers/das6402.c b/drivers/comedi/drivers/das6402.c index 68f95330de45..7660487e563c 100644 --- a/drivers/comedi/drivers/das6402.c +++ b/drivers/comedi/drivers/das6402.c @@ -567,7 +567,8 @@ static int das6402_attach(struct comedi_device *dev, das6402_reset(dev); /* IRQs 2,3,5,6,7, 10,11,15 are valid for "enhanced" mode */ - if ((1 << it->options[1]) & 0x8cec) { + if (it->options[1] > 0 && it->options[1] < 16 && + (1 << it->options[1]) & 0x8cec) { ret = request_irq(it->options[1], das6402_interrupt, 0, dev->board_name, dev); if (ret == 0) { diff --git a/drivers/comedi/drivers/pcl812.c b/drivers/comedi/drivers/pcl812.c index 0df639c6a595..abca61a72cf7 100644 --- a/drivers/comedi/drivers/pcl812.c +++ b/drivers/comedi/drivers/pcl812.c @@ -1149,7 +1149,8 @@ static int pcl812_attach(struct comedi_device *dev, struct comedi_devconfig *it) if (IS_ERR(dev->pacer)) return PTR_ERR(dev->pacer); - if ((1 << it->options[1]) & board->irq_bits) { + if (it->options[1] > 0 && it->options[1] < 16 && + (1 << it->options[1]) & board->irq_bits) { ret = request_irq(it->options[1], pcl812_interrupt, 0, dev->board_name, dev); if (ret == 0) diff --git a/drivers/cpuidle/cpuidle-psci.c b/drivers/cpuidle/cpuidle-psci.c index 4e1ba35deda9..b19bc60cc627 100644 --- a/drivers/cpuidle/cpuidle-psci.c +++ b/drivers/cpuidle/cpuidle-psci.c @@ -45,7 +45,6 @@ struct psci_cpuidle_domain_state { static DEFINE_PER_CPU_READ_MOSTLY(struct psci_cpuidle_data, psci_cpuidle_data); static DEFINE_PER_CPU(struct psci_cpuidle_domain_state, psci_domain_state); static bool psci_cpuidle_use_syscore; -static bool psci_cpuidle_use_cpuhp; void psci_set_domain_state(struct generic_pm_domain *pd, unsigned int state_idx, u32 state) @@ -124,8 +123,12 @@ static int psci_idle_cpuhp_up(unsigned int cpu) { struct device *pd_dev = __this_cpu_read(psci_cpuidle_data.dev); - if (pd_dev) - pm_runtime_get_sync(pd_dev); + if (pd_dev) { + if (!IS_ENABLED(CONFIG_PREEMPT_RT)) + pm_runtime_get_sync(pd_dev); + else + dev_pm_genpd_resume(pd_dev); + } return 0; } @@ -135,7 +138,11 @@ static int psci_idle_cpuhp_down(unsigned int cpu) struct device *pd_dev = __this_cpu_read(psci_cpuidle_data.dev); if (pd_dev) { - pm_runtime_put_sync(pd_dev); + if (!IS_ENABLED(CONFIG_PREEMPT_RT)) + pm_runtime_put_sync(pd_dev); + else + dev_pm_genpd_suspend(pd_dev); + /* Clear domain state to start fresh at next online. */ psci_clear_domain_state(); } @@ -196,9 +203,6 @@ static void psci_idle_init_cpuhp(void) { int err; - if (!psci_cpuidle_use_cpuhp) - return; - err = cpuhp_setup_state_nocalls(CPUHP_AP_CPU_PM_STARTING, "cpuidle/psci:online", psci_idle_cpuhp_up, @@ -259,10 +263,8 @@ static int psci_dt_cpu_init_topology(struct cpuidle_driver *drv, * s2ram and s2idle. */ drv->states[state_count - 1].enter_s2idle = psci_enter_s2idle_domain_idle_state; - if (!IS_ENABLED(CONFIG_PREEMPT_RT)) { + if (!IS_ENABLED(CONFIG_PREEMPT_RT)) drv->states[state_count - 1].enter = psci_enter_domain_idle_state; - psci_cpuidle_use_cpuhp = true; - } return 0; } @@ -339,7 +341,6 @@ static void psci_cpu_deinit_idle(int cpu) dt_idle_detach_cpu(data->dev); psci_cpuidle_use_syscore = false; - psci_cpuidle_use_cpuhp = false; } static int psci_idle_init_cpu(struct device *dev, int cpu) diff --git a/drivers/crypto/chelsio/chcr_algo.c b/drivers/crypto/chelsio/chcr_algo.c index af37477ffd8d..be21e4e2016c 100644 --- a/drivers/crypto/chelsio/chcr_algo.c +++ b/drivers/crypto/chelsio/chcr_algo.c @@ -314,30 +314,30 @@ static int chcr_compute_partial_hash(struct shash_desc *desc, if (digest_size == SHA1_DIGEST_SIZE) { error = crypto_shash_init(desc) ?: crypto_shash_update(desc, iopad, SHA1_BLOCK_SIZE) ?: - crypto_shash_export(desc, (void *)&sha1_st); + crypto_shash_export_core(desc, &sha1_st); memcpy(result_hash, sha1_st.state, SHA1_DIGEST_SIZE); } else if (digest_size == SHA224_DIGEST_SIZE) { error = crypto_shash_init(desc) ?: crypto_shash_update(desc, iopad, SHA256_BLOCK_SIZE) ?: - crypto_shash_export(desc, (void *)&sha256_st); + crypto_shash_export_core(desc, &sha256_st); memcpy(result_hash, sha256_st.state, SHA256_DIGEST_SIZE); } else if (digest_size == SHA256_DIGEST_SIZE) { error = crypto_shash_init(desc) ?: crypto_shash_update(desc, iopad, SHA256_BLOCK_SIZE) ?: - crypto_shash_export(desc, (void *)&sha256_st); + crypto_shash_export_core(desc, &sha256_st); memcpy(result_hash, sha256_st.state, SHA256_DIGEST_SIZE); } else if (digest_size == SHA384_DIGEST_SIZE) { error = crypto_shash_init(desc) ?: crypto_shash_update(desc, iopad, SHA512_BLOCK_SIZE) ?: - crypto_shash_export(desc, (void *)&sha512_st); + crypto_shash_export_core(desc, &sha512_st); memcpy(result_hash, sha512_st.state, SHA512_DIGEST_SIZE); } else if (digest_size == SHA512_DIGEST_SIZE) { error = crypto_shash_init(desc) ?: crypto_shash_update(desc, iopad, SHA512_BLOCK_SIZE) ?: - crypto_shash_export(desc, (void *)&sha512_st); + crypto_shash_export_core(desc, &sha512_st); memcpy(result_hash, sha512_st.state, SHA512_DIGEST_SIZE); } else { error = -EINVAL; diff --git a/drivers/crypto/intel/qat/qat_common/qat_algs.c b/drivers/crypto/intel/qat/qat_common/qat_algs.c index 3c4bba4a8779..c03a69851114 100644 --- a/drivers/crypto/intel/qat/qat_common/qat_algs.c +++ b/drivers/crypto/intel/qat/qat_common/qat_algs.c @@ -5,11 +5,11 @@ #include <linux/crypto.h> #include <crypto/internal/aead.h> #include <crypto/internal/cipher.h> +#include <crypto/internal/hash.h> #include <crypto/internal/skcipher.h> #include <crypto/aes.h> #include <crypto/sha1.h> #include <crypto/sha2.h> -#include <crypto/hash.h> #include <crypto/hmac.h> #include <crypto/algapi.h> #include <crypto/authenc.h> @@ -154,19 +154,19 @@ static int qat_alg_do_precomputes(struct icp_qat_hw_auth_algo_blk *hash, switch (ctx->qat_hash_alg) { case ICP_QAT_HW_AUTH_ALGO_SHA1: - if (crypto_shash_export(shash, &ctx->sha1)) + if (crypto_shash_export_core(shash, &ctx->sha1)) return -EFAULT; for (i = 0; i < digest_size >> 2; i++, hash_state_out++) *hash_state_out = cpu_to_be32(ctx->sha1.state[i]); break; case ICP_QAT_HW_AUTH_ALGO_SHA256: - if (crypto_shash_export(shash, &ctx->sha256)) + if (crypto_shash_export_core(shash, &ctx->sha256)) return -EFAULT; for (i = 0; i < digest_size >> 2; i++, hash_state_out++) *hash_state_out = cpu_to_be32(ctx->sha256.state[i]); break; case ICP_QAT_HW_AUTH_ALGO_SHA512: - if (crypto_shash_export(shash, &ctx->sha512)) + if (crypto_shash_export_core(shash, &ctx->sha512)) return -EFAULT; for (i = 0; i < digest_size >> 3; i++, hash512_state_out++) *hash512_state_out = cpu_to_be64(ctx->sha512.state[i]); @@ -190,19 +190,19 @@ static int qat_alg_do_precomputes(struct icp_qat_hw_auth_algo_blk *hash, switch (ctx->qat_hash_alg) { case ICP_QAT_HW_AUTH_ALGO_SHA1: - if (crypto_shash_export(shash, &ctx->sha1)) + if (crypto_shash_export_core(shash, &ctx->sha1)) return -EFAULT; for (i = 0; i < digest_size >> 2; i++, hash_state_out++) *hash_state_out = cpu_to_be32(ctx->sha1.state[i]); break; case ICP_QAT_HW_AUTH_ALGO_SHA256: - if (crypto_shash_export(shash, &ctx->sha256)) + if (crypto_shash_export_core(shash, &ctx->sha256)) return -EFAULT; for (i = 0; i < digest_size >> 2; i++, hash_state_out++) *hash_state_out = cpu_to_be32(ctx->sha256.state[i]); break; case ICP_QAT_HW_AUTH_ALGO_SHA512: - if (crypto_shash_export(shash, &ctx->sha512)) + if (crypto_shash_export_core(shash, &ctx->sha512)) return -EFAULT; for (i = 0; i < digest_size >> 3; i++, hash512_state_out++) *hash512_state_out = cpu_to_be64(ctx->sha512.state[i]); diff --git a/drivers/dma/dw-edma/dw-edma-pcie.c b/drivers/dma/dw-edma/dw-edma-pcie.c index 49f09998e5c0..3371e0a76d3c 100644 --- a/drivers/dma/dw-edma/dw-edma-pcie.c +++ b/drivers/dma/dw-edma/dw-edma-pcie.c @@ -161,12 +161,16 @@ static int dw_edma_pcie_probe(struct pci_dev *pdev, const struct pci_device_id *pid) { struct dw_edma_pcie_data *pdata = (void *)pid->driver_data; - struct dw_edma_pcie_data vsec_data; + struct dw_edma_pcie_data *vsec_data __free(kfree) = NULL; struct device *dev = &pdev->dev; struct dw_edma_chip *chip; int err, nr_irqs; int i, mask; + vsec_data = kmalloc(sizeof(*vsec_data), GFP_KERNEL); + if (!vsec_data) + return -ENOMEM; + /* Enable PCI device */ err = pcim_enable_device(pdev); if (err) { @@ -174,23 +178,23 @@ static int dw_edma_pcie_probe(struct pci_dev *pdev, return err; } - memcpy(&vsec_data, pdata, sizeof(struct dw_edma_pcie_data)); + memcpy(vsec_data, pdata, sizeof(struct dw_edma_pcie_data)); /* * Tries to find if exists a PCIe Vendor-Specific Extended Capability * for the DMA, if one exists, then reconfigures it. */ - dw_edma_pcie_get_vsec_dma_data(pdev, &vsec_data); + dw_edma_pcie_get_vsec_dma_data(pdev, vsec_data); /* Mapping PCI BAR regions */ - mask = BIT(vsec_data.rg.bar); - for (i = 0; i < vsec_data.wr_ch_cnt; i++) { - mask |= BIT(vsec_data.ll_wr[i].bar); - mask |= BIT(vsec_data.dt_wr[i].bar); + mask = BIT(vsec_data->rg.bar); + for (i = 0; i < vsec_data->wr_ch_cnt; i++) { + mask |= BIT(vsec_data->ll_wr[i].bar); + mask |= BIT(vsec_data->dt_wr[i].bar); } - for (i = 0; i < vsec_data.rd_ch_cnt; i++) { - mask |= BIT(vsec_data.ll_rd[i].bar); - mask |= BIT(vsec_data.dt_rd[i].bar); + for (i = 0; i < vsec_data->rd_ch_cnt; i++) { + mask |= BIT(vsec_data->ll_rd[i].bar); + mask |= BIT(vsec_data->dt_rd[i].bar); } err = pcim_iomap_regions(pdev, mask, pci_name(pdev)); if (err) { @@ -213,7 +217,7 @@ static int dw_edma_pcie_probe(struct pci_dev *pdev, return -ENOMEM; /* IRQs allocation */ - nr_irqs = pci_alloc_irq_vectors(pdev, 1, vsec_data.irqs, + nr_irqs = pci_alloc_irq_vectors(pdev, 1, vsec_data->irqs, PCI_IRQ_MSI | PCI_IRQ_MSIX); if (nr_irqs < 1) { pci_err(pdev, "fail to alloc IRQ vector (number of IRQs=%u)\n", @@ -224,22 +228,22 @@ static int dw_edma_pcie_probe(struct pci_dev *pdev, /* Data structure initialization */ chip->dev = dev; - chip->mf = vsec_data.mf; + chip->mf = vsec_data->mf; chip->nr_irqs = nr_irqs; chip->ops = &dw_edma_pcie_plat_ops; - chip->ll_wr_cnt = vsec_data.wr_ch_cnt; - chip->ll_rd_cnt = vsec_data.rd_ch_cnt; + chip->ll_wr_cnt = vsec_data->wr_ch_cnt; + chip->ll_rd_cnt = vsec_data->rd_ch_cnt; - chip->reg_base = pcim_iomap_table(pdev)[vsec_data.rg.bar]; + chip->reg_base = pcim_iomap_table(pdev)[vsec_data->rg.bar]; if (!chip->reg_base) return -ENOMEM; for (i = 0; i < chip->ll_wr_cnt; i++) { struct dw_edma_region *ll_region = &chip->ll_region_wr[i]; struct dw_edma_region *dt_region = &chip->dt_region_wr[i]; - struct dw_edma_block *ll_block = &vsec_data.ll_wr[i]; - struct dw_edma_block *dt_block = &vsec_data.dt_wr[i]; + struct dw_edma_block *ll_block = &vsec_data->ll_wr[i]; + struct dw_edma_block *dt_block = &vsec_data->dt_wr[i]; ll_region->vaddr.io = pcim_iomap_table(pdev)[ll_block->bar]; if (!ll_region->vaddr.io) @@ -263,8 +267,8 @@ static int dw_edma_pcie_probe(struct pci_dev *pdev, for (i = 0; i < chip->ll_rd_cnt; i++) { struct dw_edma_region *ll_region = &chip->ll_region_rd[i]; struct dw_edma_region *dt_region = &chip->dt_region_rd[i]; - struct dw_edma_block *ll_block = &vsec_data.ll_rd[i]; - struct dw_edma_block *dt_block = &vsec_data.dt_rd[i]; + struct dw_edma_block *ll_block = &vsec_data->ll_rd[i]; + struct dw_edma_block *dt_block = &vsec_data->dt_rd[i]; ll_region->vaddr.io = pcim_iomap_table(pdev)[ll_block->bar]; if (!ll_region->vaddr.io) @@ -298,31 +302,31 @@ static int dw_edma_pcie_probe(struct pci_dev *pdev, pci_dbg(pdev, "Version:\tUnknown (0x%x)\n", chip->mf); pci_dbg(pdev, "Registers:\tBAR=%u, off=0x%.8lx, sz=0x%zx bytes, addr(v=%p)\n", - vsec_data.rg.bar, vsec_data.rg.off, vsec_data.rg.sz, + vsec_data->rg.bar, vsec_data->rg.off, vsec_data->rg.sz, chip->reg_base); for (i = 0; i < chip->ll_wr_cnt; i++) { pci_dbg(pdev, "L. List:\tWRITE CH%.2u, BAR=%u, off=0x%.8lx, sz=0x%zx bytes, addr(v=%p, p=%pa)\n", - i, vsec_data.ll_wr[i].bar, - vsec_data.ll_wr[i].off, chip->ll_region_wr[i].sz, + i, vsec_data->ll_wr[i].bar, + vsec_data->ll_wr[i].off, chip->ll_region_wr[i].sz, chip->ll_region_wr[i].vaddr.io, &chip->ll_region_wr[i].paddr); pci_dbg(pdev, "Data:\tWRITE CH%.2u, BAR=%u, off=0x%.8lx, sz=0x%zx bytes, addr(v=%p, p=%pa)\n", - i, vsec_data.dt_wr[i].bar, - vsec_data.dt_wr[i].off, chip->dt_region_wr[i].sz, + i, vsec_data->dt_wr[i].bar, + vsec_data->dt_wr[i].off, chip->dt_region_wr[i].sz, chip->dt_region_wr[i].vaddr.io, &chip->dt_region_wr[i].paddr); } for (i = 0; i < chip->ll_rd_cnt; i++) { pci_dbg(pdev, "L. List:\tREAD CH%.2u, BAR=%u, off=0x%.8lx, sz=0x%zx bytes, addr(v=%p, p=%pa)\n", - i, vsec_data.ll_rd[i].bar, - vsec_data.ll_rd[i].off, chip->ll_region_rd[i].sz, + i, vsec_data->ll_rd[i].bar, + vsec_data->ll_rd[i].off, chip->ll_region_rd[i].sz, chip->ll_region_rd[i].vaddr.io, &chip->ll_region_rd[i].paddr); pci_dbg(pdev, "Data:\tREAD CH%.2u, BAR=%u, off=0x%.8lx, sz=0x%zx bytes, addr(v=%p, p=%pa)\n", - i, vsec_data.dt_rd[i].bar, - vsec_data.dt_rd[i].off, chip->dt_region_rd[i].sz, + i, vsec_data->dt_rd[i].bar, + vsec_data->dt_rd[i].off, chip->dt_region_rd[i].sz, chip->dt_region_rd[i].vaddr.io, &chip->dt_region_rd[i].paddr); } diff --git a/drivers/dma/mediatek/mtk-cqdma.c b/drivers/dma/mediatek/mtk-cqdma.c index 47c8adfdc155..9f0c41ca7770 100644 --- a/drivers/dma/mediatek/mtk-cqdma.c +++ b/drivers/dma/mediatek/mtk-cqdma.c @@ -449,9 +449,9 @@ static enum dma_status mtk_cqdma_tx_status(struct dma_chan *c, return ret; spin_lock_irqsave(&cvc->pc->lock, flags); - spin_lock_irqsave(&cvc->vc.lock, flags); + spin_lock(&cvc->vc.lock); vd = mtk_cqdma_find_active_desc(c, cookie); - spin_unlock_irqrestore(&cvc->vc.lock, flags); + spin_unlock(&cvc->vc.lock); spin_unlock_irqrestore(&cvc->pc->lock, flags); if (vd) { diff --git a/drivers/dma/nbpfaxi.c b/drivers/dma/nbpfaxi.c index 0d6324c4e2be..7a2488a0d6a3 100644 --- a/drivers/dma/nbpfaxi.c +++ b/drivers/dma/nbpfaxi.c @@ -1351,7 +1351,7 @@ static int nbpf_probe(struct platform_device *pdev) if (irqs == 1) { eirq = irqbuf[0]; - for (i = 0; i <= num_channels; i++) + for (i = 0; i < num_channels; i++) nbpf->chan[i].irq = irqbuf[0]; } else { eirq = platform_get_irq_byname(pdev, "error"); @@ -1361,16 +1361,15 @@ static int nbpf_probe(struct platform_device *pdev) if (irqs == num_channels + 1) { struct nbpf_channel *chan; - for (i = 0, chan = nbpf->chan; i <= num_channels; + for (i = 0, chan = nbpf->chan; i < num_channels; i++, chan++) { /* Skip the error IRQ */ if (irqbuf[i] == eirq) i++; + if (i >= ARRAY_SIZE(irqbuf)) + return -EINVAL; chan->irq = irqbuf[i]; } - - if (chan != nbpf->chan + num_channels) - return -EINVAL; } else { /* 2 IRQs and more than one channel */ if (irqbuf[0] == eirq) @@ -1378,7 +1377,7 @@ static int nbpf_probe(struct platform_device *pdev) else irq = irqbuf[0]; - for (i = 0; i <= num_channels; i++) + for (i = 0; i < num_channels; i++) nbpf->chan[i].irq = irq; } } diff --git a/drivers/firmware/efi/libstub/zboot.lds b/drivers/firmware/efi/libstub/zboot.lds index c3a166675450..367907eb7d86 100644 --- a/drivers/firmware/efi/libstub/zboot.lds +++ b/drivers/firmware/efi/libstub/zboot.lds @@ -29,14 +29,12 @@ SECTIONS . = _etext; } -#ifdef CONFIG_EFI_SBAT .sbat : ALIGN(4096) { _sbat = .; *(.sbat) _esbat = ALIGN(4096); . = _esbat; } -#endif .data : ALIGN(4096) { _data = .; @@ -60,6 +58,6 @@ SECTIONS PROVIDE(__efistub__gzdata_size = ABSOLUTE(__efistub__gzdata_end - __efistub__gzdata_start)); -PROVIDE(__data_rawsize = ABSOLUTE(_edata - _etext)); -PROVIDE(__data_size = ABSOLUTE(_end - _etext)); +PROVIDE(__data_rawsize = ABSOLUTE(_edata - _data)); +PROVIDE(__data_size = ABSOLUTE(_end - _data)); PROVIDE(__sbat_size = ABSOLUTE(_esbat - _sbat)); diff --git a/drivers/gpio/gpiolib-acpi-quirks.c b/drivers/gpio/gpiolib-acpi-quirks.c index 219667315b2c..c13545dce349 100644 --- a/drivers/gpio/gpiolib-acpi-quirks.c +++ b/drivers/gpio/gpiolib-acpi-quirks.c @@ -331,6 +331,19 @@ static const struct dmi_system_id gpiolib_acpi_quirks[] __initconst = { .ignore_interrupt = "AMDI0030:00@11", }, }, + { + /* + * Wakeup only works when keyboard backlight is turned off + * https://gitlab.freedesktop.org/drm/amd/-/issues/4169 + */ + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "Acer"), + DMI_MATCH(DMI_PRODUCT_FAMILY, "Acer Nitro V 15"), + }, + .driver_data = &(struct acpi_gpiolib_dmi_quirk) { + .ignore_interrupt = "AMDI0030:00@8", + }, + }, {} /* Terminating entry */ }; diff --git a/drivers/gpio/gpiolib-devres.c b/drivers/gpio/gpiolib-devres.c index 4d5f83b17624..72422c5db364 100644 --- a/drivers/gpio/gpiolib-devres.c +++ b/drivers/gpio/gpiolib-devres.c @@ -319,7 +319,7 @@ EXPORT_SYMBOL_GPL(devm_gpiod_unhinge); */ void devm_gpiod_put_array(struct device *dev, struct gpio_descs *descs) { - devm_remove_action(dev, devm_gpiod_release_array, descs); + devm_release_action(dev, devm_gpiod_release_array, descs); } EXPORT_SYMBOL_GPL(devm_gpiod_put_array); diff --git a/drivers/gpio/gpiolib-of.c b/drivers/gpio/gpiolib-of.c index 73ba73b31cb1..37ab78243fab 100644 --- a/drivers/gpio/gpiolib-of.c +++ b/drivers/gpio/gpiolib-of.c @@ -708,7 +708,7 @@ struct gpio_desc *of_find_gpio(struct device_node *np, const char *con_id, unsigned int idx, unsigned long *flags) { char propname[32]; /* 32 is max size of property name */ - enum of_gpio_flags of_flags; + enum of_gpio_flags of_flags = 0; const of_find_gpio_quirk *q; struct gpio_desc *desc; diff --git a/drivers/gpio/gpiolib.c b/drivers/gpio/gpiolib.c index fdafa0df1b43..3a3eca5b4c40 100644 --- a/drivers/gpio/gpiolib.c +++ b/drivers/gpio/gpiolib.c @@ -3297,14 +3297,15 @@ static int gpiod_get_raw_value_commit(const struct gpio_desc *desc) static int gpio_chip_get_multiple(struct gpio_chip *gc, unsigned long *mask, unsigned long *bits) { - int ret; - lockdep_assert_held(&gc->gpiodev->srcu); if (gc->get_multiple) { + int ret; + ret = gc->get_multiple(gc, mask, bits); if (ret > 0) return -EBADE; + return ret; } if (gc->get) { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index 78f8755996f0..aa32df7e2fb2 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -5193,6 +5193,8 @@ exit: dev->dev->power.disable_depth--; #endif } + + amdgpu_vram_mgr_clear_reset_blocks(adev); adev->in_suspend = false; if (amdgpu_acpi_smart_shift_update(dev, AMDGPU_SS_DEV_D0)) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c index 426834806fbf..6ac0ce361a2d 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c @@ -427,6 +427,7 @@ bool amdgpu_ring_soft_recovery(struct amdgpu_ring *ring, unsigned int vmid, { unsigned long flags; ktime_t deadline; + bool ret; if (unlikely(ring->adev->debug_disable_soft_recovery)) return false; @@ -441,12 +442,16 @@ bool amdgpu_ring_soft_recovery(struct amdgpu_ring *ring, unsigned int vmid, dma_fence_set_error(fence, -ENODATA); spin_unlock_irqrestore(fence->lock, flags); - atomic_inc(&ring->adev->gpu_reset_counter); while (!dma_fence_is_signaled(fence) && ktime_to_ns(ktime_sub(deadline, ktime_get())) > 0) ring->funcs->soft_recovery(ring, vmid); - return dma_fence_is_signaled(fence); + ret = dma_fence_is_signaled(fence); + /* increment the counter only if soft reset worked */ + if (ret) + atomic_inc(&ring->adev->gpu_reset_counter); + + return ret; } /* diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h index 208b7d1d8a27..450e4bf093b7 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h @@ -154,6 +154,7 @@ int amdgpu_vram_mgr_reserve_range(struct amdgpu_vram_mgr *mgr, uint64_t start, uint64_t size); int amdgpu_vram_mgr_query_page_status(struct amdgpu_vram_mgr *mgr, uint64_t start); +void amdgpu_vram_mgr_clear_reset_blocks(struct amdgpu_device *adev); bool amdgpu_res_cpu_visible(struct amdgpu_device *adev, struct ttm_resource *res); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c index abdc52b0895a..07c936e90d8e 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c @@ -783,6 +783,23 @@ uint64_t amdgpu_vram_mgr_vis_usage(struct amdgpu_vram_mgr *mgr) } /** + * amdgpu_vram_mgr_clear_reset_blocks - reset clear blocks + * + * @adev: amdgpu device pointer + * + * Reset the cleared drm buddy blocks. + */ +void amdgpu_vram_mgr_clear_reset_blocks(struct amdgpu_device *adev) +{ + struct amdgpu_vram_mgr *mgr = &adev->mman.vram_mgr; + struct drm_buddy *mm = &mgr->mm; + + mutex_lock(&mgr->lock); + drm_buddy_reset_clear(mm, false); + mutex_unlock(&mgr->lock); +} + +/** * amdgpu_vram_mgr_intersects - test each drm buddy block for intersection * * @man: TTM memory type manager diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c index 5ee2237d8ee8..bc983ecf3d99 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c @@ -4640,6 +4640,7 @@ static int gfx_v8_0_kcq_init_queue(struct amdgpu_ring *ring) memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(struct vi_mqd_allocation)); /* reset ring buffer */ ring->wptr = 0; + atomic64_set((atomic64_t *)ring->wptr_cpu_addr, 0); amdgpu_ring_clear_ring(ring); } return 0; diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crtc.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crtc.c index 87058271b00c..2551823382f8 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crtc.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crtc.c @@ -728,7 +728,16 @@ int amdgpu_dm_crtc_init(struct amdgpu_display_manager *dm, * support programmable degamma anywhere. */ is_dcn = dm->adev->dm.dc->caps.color.dpp.dcn_arch; - drm_crtc_enable_color_mgmt(&acrtc->base, is_dcn ? MAX_COLOR_LUT_ENTRIES : 0, + /* Dont't enable DRM CRTC degamma property for DCN401 since the + * pre-blending degamma LUT doesn't apply to cursor, and therefore + * can't work similar to a post-blending degamma LUT as in other hw + * versions. + * TODO: revisit it once KMS plane color API is merged. + */ + drm_crtc_enable_color_mgmt(&acrtc->base, + (is_dcn && + dm->adev->dm.dc->ctx->dce_version != DCN_VERSION_4_01) ? + MAX_COLOR_LUT_ENTRIES : 0, true, MAX_COLOR_LUT_ENTRIES); drm_mode_crtc_set_gamma_size(&acrtc->base, MAX_COLOR_LEGACY_LUT_ENTRIES); diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn401/dcn401_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn401/dcn401_clk_mgr.c index a3b8e3d4a429..4b17d2fcd565 100644 --- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn401/dcn401_clk_mgr.c +++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn401/dcn401_clk_mgr.c @@ -1565,7 +1565,7 @@ struct clk_mgr_internal *dcn401_clk_mgr_construct( clk_mgr->base.bw_params = kzalloc(sizeof(*clk_mgr->base.bw_params), GFP_KERNEL); if (!clk_mgr->base.bw_params) { BREAK_TO_DEBUGGER(); - kfree(clk_mgr); + kfree(clk_mgr401); return NULL; } @@ -1576,6 +1576,7 @@ struct clk_mgr_internal *dcn401_clk_mgr_construct( if (!clk_mgr->wm_range_table) { BREAK_TO_DEBUGGER(); kfree(clk_mgr->base.bw_params); + kfree(clk_mgr401); return NULL; } diff --git a/drivers/gpu/drm/bridge/ti-sn65dsi86.c b/drivers/gpu/drm/bridge/ti-sn65dsi86.c index de9c23537465..834b42a4d31f 100644 --- a/drivers/gpu/drm/bridge/ti-sn65dsi86.c +++ b/drivers/gpu/drm/bridge/ti-sn65dsi86.c @@ -1373,7 +1373,7 @@ static int ti_sn_bridge_probe(struct auxiliary_device *adev, regmap_update_bits(pdata->regmap, SN_HPD_DISABLE_REG, HPD_DISABLE, 0); mutex_unlock(&pdata->comms_mutex); - }; + } drm_bridge_add(&pdata->bridge); diff --git a/drivers/gpu/drm/display/drm_dp_helper.c b/drivers/gpu/drm/display/drm_dp_helper.c index dc622c78db9d..ea78c6c8ca7a 100644 --- a/drivers/gpu/drm/display/drm_dp_helper.c +++ b/drivers/gpu/drm/display/drm_dp_helper.c @@ -725,7 +725,7 @@ ssize_t drm_dp_dpcd_read(struct drm_dp_aux *aux, unsigned int offset, * monitor doesn't power down exactly after the throw away read. */ if (!aux->is_remote) { - ret = drm_dp_dpcd_probe(aux, DP_LANE0_1_STATUS); + ret = drm_dp_dpcd_probe(aux, DP_TRAINING_PATTERN_SET); if (ret < 0) return ret; } diff --git a/drivers/gpu/drm/drm_buddy.c b/drivers/gpu/drm/drm_buddy.c index 241c855f891f..66aff35f8647 100644 --- a/drivers/gpu/drm/drm_buddy.c +++ b/drivers/gpu/drm/drm_buddy.c @@ -405,6 +405,49 @@ drm_get_buddy(struct drm_buddy_block *block) EXPORT_SYMBOL(drm_get_buddy); /** + * drm_buddy_reset_clear - reset blocks clear state + * + * @mm: DRM buddy manager + * @is_clear: blocks clear state + * + * Reset the clear state based on @is_clear value for each block + * in the freelist. + */ +void drm_buddy_reset_clear(struct drm_buddy *mm, bool is_clear) +{ + u64 root_size, size, start; + unsigned int order; + int i; + + size = mm->size; + for (i = 0; i < mm->n_roots; ++i) { + order = ilog2(size) - ilog2(mm->chunk_size); + start = drm_buddy_block_offset(mm->roots[i]); + __force_merge(mm, start, start + size, order); + + root_size = mm->chunk_size << order; + size -= root_size; + } + + for (i = 0; i <= mm->max_order; ++i) { + struct drm_buddy_block *block; + + list_for_each_entry_reverse(block, &mm->free_list[i], link) { + if (is_clear != drm_buddy_block_is_clear(block)) { + if (is_clear) { + mark_cleared(block); + mm->clear_avail += drm_buddy_block_size(mm, block); + } else { + clear_reset(block); + mm->clear_avail -= drm_buddy_block_size(mm, block); + } + } + } + } +} +EXPORT_SYMBOL(drm_buddy_reset_clear); + +/** * drm_buddy_free_block - free a block * * @mm: DRM buddy manager diff --git a/drivers/gpu/drm/drm_framebuffer.c b/drivers/gpu/drm/drm_framebuffer.c index b781601946db..63a70f285cce 100644 --- a/drivers/gpu/drm/drm_framebuffer.c +++ b/drivers/gpu/drm/drm_framebuffer.c @@ -862,11 +862,23 @@ EXPORT_SYMBOL_FOR_TESTS_ONLY(drm_framebuffer_free); int drm_framebuffer_init(struct drm_device *dev, struct drm_framebuffer *fb, const struct drm_framebuffer_funcs *funcs) { + unsigned int i; int ret; + bool exists; if (WARN_ON_ONCE(fb->dev != dev || !fb->format)) return -EINVAL; + for (i = 0; i < fb->format->num_planes; i++) { + if (drm_WARN_ON_ONCE(dev, fb->internal_flags & DRM_FRAMEBUFFER_HAS_HANDLE_REF(i))) + fb->internal_flags &= ~DRM_FRAMEBUFFER_HAS_HANDLE_REF(i); + if (fb->obj[i]) { + exists = drm_gem_object_handle_get_if_exists_unlocked(fb->obj[i]); + if (exists) + fb->internal_flags |= DRM_FRAMEBUFFER_HAS_HANDLE_REF(i); + } + } + INIT_LIST_HEAD(&fb->filp_head); fb->funcs = funcs; @@ -875,7 +887,7 @@ int drm_framebuffer_init(struct drm_device *dev, struct drm_framebuffer *fb, ret = __drm_mode_object_add(dev, &fb->base, DRM_MODE_OBJECT_FB, false, drm_framebuffer_free); if (ret) - goto out; + goto err; mutex_lock(&dev->mode_config.fb_lock); dev->mode_config.num_fb++; @@ -883,7 +895,16 @@ int drm_framebuffer_init(struct drm_device *dev, struct drm_framebuffer *fb, mutex_unlock(&dev->mode_config.fb_lock); drm_mode_object_register(dev, &fb->base); -out: + + return 0; + +err: + for (i = 0; i < fb->format->num_planes; i++) { + if (fb->internal_flags & DRM_FRAMEBUFFER_HAS_HANDLE_REF(i)) { + drm_gem_object_handle_put_unlocked(fb->obj[i]); + fb->internal_flags &= ~DRM_FRAMEBUFFER_HAS_HANDLE_REF(i); + } + } return ret; } EXPORT_SYMBOL(drm_framebuffer_init); @@ -960,6 +981,12 @@ EXPORT_SYMBOL(drm_framebuffer_unregister_private); void drm_framebuffer_cleanup(struct drm_framebuffer *fb) { struct drm_device *dev = fb->dev; + unsigned int i; + + for (i = 0; i < fb->format->num_planes; i++) { + if (fb->internal_flags & DRM_FRAMEBUFFER_HAS_HANDLE_REF(i)) + drm_gem_object_handle_put_unlocked(fb->obj[i]); + } mutex_lock(&dev->mode_config.fb_lock); list_del(&fb->head); diff --git a/drivers/gpu/drm/drm_gem.c b/drivers/gpu/drm/drm_gem.c index 4bf0a76bb35e..ac0524595bd6 100644 --- a/drivers/gpu/drm/drm_gem.c +++ b/drivers/gpu/drm/drm_gem.c @@ -223,23 +223,34 @@ static void drm_gem_object_handle_get(struct drm_gem_object *obj) } /** - * drm_gem_object_handle_get_unlocked - acquire reference on user-space handles + * drm_gem_object_handle_get_if_exists_unlocked - acquire reference on user-space handle, if any * @obj: GEM object * - * Acquires a reference on the GEM buffer object's handle. Required - * to keep the GEM object alive. Call drm_gem_object_handle_put_unlocked() - * to release the reference. + * Acquires a reference on the GEM buffer object's handle. Required to keep + * the GEM object alive. Call drm_gem_object_handle_put_if_exists_unlocked() + * to release the reference. Does nothing if the buffer object has no handle. + * + * Returns: + * True if a handle exists, or false otherwise */ -void drm_gem_object_handle_get_unlocked(struct drm_gem_object *obj) +bool drm_gem_object_handle_get_if_exists_unlocked(struct drm_gem_object *obj) { struct drm_device *dev = obj->dev; guard(mutex)(&dev->object_name_lock); - drm_WARN_ON(dev, !obj->handle_count); /* first ref taken in create-tail helper */ + /* + * First ref taken during GEM object creation, if any. Some + * drivers set up internal framebuffers with GEM objects that + * do not have a GEM handle. Hence, this counter can be zero. + */ + if (!obj->handle_count) + return false; + drm_gem_object_handle_get(obj); + + return true; } -EXPORT_SYMBOL(drm_gem_object_handle_get_unlocked); /** * drm_gem_object_handle_free - release resources bound to userspace handles @@ -272,7 +283,7 @@ static void drm_gem_object_exported_dma_buf_free(struct drm_gem_object *obj) } /** - * drm_gem_object_handle_put_unlocked - releases reference on user-space handles + * drm_gem_object_handle_put_unlocked - releases reference on user-space handle * @obj: GEM object * * Releases a reference on the GEM buffer object's handle. Possibly releases @@ -283,14 +294,14 @@ void drm_gem_object_handle_put_unlocked(struct drm_gem_object *obj) struct drm_device *dev = obj->dev; bool final = false; - if (WARN_ON(READ_ONCE(obj->handle_count) == 0)) + if (drm_WARN_ON(dev, READ_ONCE(obj->handle_count) == 0)) return; /* - * Must bump handle count first as this may be the last - * ref, in which case the object would disappear before we - * checked for a name - */ + * Must bump handle count first as this may be the last + * ref, in which case the object would disappear before + * we checked for a name. + */ mutex_lock(&dev->object_name_lock); if (--obj->handle_count == 0) { @@ -303,7 +314,6 @@ void drm_gem_object_handle_put_unlocked(struct drm_gem_object *obj) if (final) drm_gem_object_put(obj); } -EXPORT_SYMBOL(drm_gem_object_handle_put_unlocked); /* * Called at device or object close to release the file's @@ -315,6 +325,9 @@ drm_gem_object_release_handle(int id, void *ptr, void *data) struct drm_file *file_priv = data; struct drm_gem_object *obj = ptr; + if (drm_WARN_ON(obj->dev, !data)) + return 0; + if (obj->funcs->close) obj->funcs->close(obj, file_priv); @@ -435,7 +448,7 @@ drm_gem_handle_create_tail(struct drm_file *file_priv, idr_preload(GFP_KERNEL); spin_lock(&file_priv->table_lock); - ret = idr_alloc(&file_priv->object_idr, obj, 1, 0, GFP_NOWAIT); + ret = idr_alloc(&file_priv->object_idr, NULL, 1, 0, GFP_NOWAIT); spin_unlock(&file_priv->table_lock); idr_preload_end(); @@ -456,6 +469,11 @@ drm_gem_handle_create_tail(struct drm_file *file_priv, goto err_revoke; } + /* mirrors drm_gem_handle_delete to avoid races */ + spin_lock(&file_priv->table_lock); + obj = idr_replace(&file_priv->object_idr, obj, handle); + WARN_ON(obj != NULL); + spin_unlock(&file_priv->table_lock); *handlep = handle; return 0; diff --git a/drivers/gpu/drm/drm_gem_dma_helper.c b/drivers/gpu/drm/drm_gem_dma_helper.c index b7f033d4352a..4f0320df858f 100644 --- a/drivers/gpu/drm/drm_gem_dma_helper.c +++ b/drivers/gpu/drm/drm_gem_dma_helper.c @@ -230,7 +230,7 @@ void drm_gem_dma_free(struct drm_gem_dma_object *dma_obj) if (drm_gem_is_imported(gem_obj)) { if (dma_obj->vaddr) - dma_buf_vunmap_unlocked(gem_obj->dma_buf, &map); + dma_buf_vunmap_unlocked(gem_obj->import_attach->dmabuf, &map); drm_prime_gem_destroy(gem_obj, dma_obj->sgt); } else if (dma_obj->vaddr) { if (dma_obj->map_noncoherent) diff --git a/drivers/gpu/drm/drm_gem_framebuffer_helper.c b/drivers/gpu/drm/drm_gem_framebuffer_helper.c index 14a87788695d..6ff22e04029e 100644 --- a/drivers/gpu/drm/drm_gem_framebuffer_helper.c +++ b/drivers/gpu/drm/drm_gem_framebuffer_helper.c @@ -99,7 +99,7 @@ void drm_gem_fb_destroy(struct drm_framebuffer *fb) unsigned int i; for (i = 0; i < fb->format->num_planes; i++) - drm_gem_object_handle_put_unlocked(fb->obj[i]); + drm_gem_object_put(fb->obj[i]); drm_framebuffer_cleanup(fb); kfree(fb); @@ -182,10 +182,8 @@ int drm_gem_fb_init_with_funcs(struct drm_device *dev, if (!objs[i]) { drm_dbg_kms(dev, "Failed to lookup GEM object\n"); ret = -ENOENT; - goto err_gem_object_handle_put_unlocked; + goto err_gem_object_put; } - drm_gem_object_handle_get_unlocked(objs[i]); - drm_gem_object_put(objs[i]); min_size = (height - 1) * mode_cmd->pitches[i] + drm_format_info_min_pitch(info, i, width) @@ -195,22 +193,22 @@ int drm_gem_fb_init_with_funcs(struct drm_device *dev, drm_dbg_kms(dev, "GEM object size (%zu) smaller than minimum size (%u) for plane %d\n", objs[i]->size, min_size, i); - drm_gem_object_handle_put_unlocked(objs[i]); + drm_gem_object_put(objs[i]); ret = -EINVAL; - goto err_gem_object_handle_put_unlocked; + goto err_gem_object_put; } } ret = drm_gem_fb_init(dev, fb, mode_cmd, objs, i, funcs); if (ret) - goto err_gem_object_handle_put_unlocked; + goto err_gem_object_put; return 0; -err_gem_object_handle_put_unlocked: +err_gem_object_put: while (i > 0) { --i; - drm_gem_object_handle_put_unlocked(objs[i]); + drm_gem_object_put(objs[i]); } return ret; } @@ -421,6 +419,7 @@ EXPORT_SYMBOL(drm_gem_fb_vunmap); static void __drm_gem_fb_end_cpu_access(struct drm_framebuffer *fb, enum dma_data_direction dir, unsigned int num_planes) { + struct dma_buf_attachment *import_attach; struct drm_gem_object *obj; int ret; @@ -429,9 +428,10 @@ static void __drm_gem_fb_end_cpu_access(struct drm_framebuffer *fb, enum dma_dat obj = drm_gem_fb_get_obj(fb, num_planes); if (!obj) continue; + import_attach = obj->import_attach; if (!drm_gem_is_imported(obj)) continue; - ret = dma_buf_end_cpu_access(obj->dma_buf, dir); + ret = dma_buf_end_cpu_access(import_attach->dmabuf, dir); if (ret) drm_err(fb->dev, "dma_buf_end_cpu_access(%u, %d) failed: %d\n", ret, num_planes, dir); @@ -454,6 +454,7 @@ static void __drm_gem_fb_end_cpu_access(struct drm_framebuffer *fb, enum dma_dat */ int drm_gem_fb_begin_cpu_access(struct drm_framebuffer *fb, enum dma_data_direction dir) { + struct dma_buf_attachment *import_attach; struct drm_gem_object *obj; unsigned int i; int ret; @@ -464,9 +465,10 @@ int drm_gem_fb_begin_cpu_access(struct drm_framebuffer *fb, enum dma_data_direct ret = -EINVAL; goto err___drm_gem_fb_end_cpu_access; } + import_attach = obj->import_attach; if (!drm_gem_is_imported(obj)) continue; - ret = dma_buf_begin_cpu_access(obj->dma_buf, dir); + ret = dma_buf_begin_cpu_access(import_attach->dmabuf, dir); if (ret) goto err___drm_gem_fb_end_cpu_access; } diff --git a/drivers/gpu/drm/drm_gem_shmem_helper.c b/drivers/gpu/drm/drm_gem_shmem_helper.c index aa43265f4f4f..a5dbee6974ab 100644 --- a/drivers/gpu/drm/drm_gem_shmem_helper.c +++ b/drivers/gpu/drm/drm_gem_shmem_helper.c @@ -349,7 +349,7 @@ int drm_gem_shmem_vmap_locked(struct drm_gem_shmem_object *shmem, int ret = 0; if (drm_gem_is_imported(obj)) { - ret = dma_buf_vmap(obj->dma_buf, map); + ret = dma_buf_vmap(obj->import_attach->dmabuf, map); } else { pgprot_t prot = PAGE_KERNEL; @@ -409,7 +409,7 @@ void drm_gem_shmem_vunmap_locked(struct drm_gem_shmem_object *shmem, struct drm_gem_object *obj = &shmem->base; if (drm_gem_is_imported(obj)) { - dma_buf_vunmap(obj->dma_buf, map); + dma_buf_vunmap(obj->import_attach->dmabuf, map); } else { dma_resv_assert_held(shmem->base.resv); diff --git a/drivers/gpu/drm/drm_internal.h b/drivers/gpu/drm/drm_internal.h index be77d61a16ce..60c282881958 100644 --- a/drivers/gpu/drm/drm_internal.h +++ b/drivers/gpu/drm/drm_internal.h @@ -161,7 +161,7 @@ void drm_sysfs_lease_event(struct drm_device *dev); /* drm_gem.c */ int drm_gem_init(struct drm_device *dev); -void drm_gem_object_handle_get_unlocked(struct drm_gem_object *obj); +bool drm_gem_object_handle_get_if_exists_unlocked(struct drm_gem_object *obj); void drm_gem_object_handle_put_unlocked(struct drm_gem_object *obj); int drm_gem_handle_create_tail(struct drm_file *file_priv, struct drm_gem_object *obj, diff --git a/drivers/gpu/drm/drm_panic_qr.rs b/drivers/gpu/drm/drm_panic_qr.rs index dd55b1cb764d..18492daae4b3 100644 --- a/drivers/gpu/drm/drm_panic_qr.rs +++ b/drivers/gpu/drm/drm_panic_qr.rs @@ -27,7 +27,7 @@ //! * <https://github.com/erwanvivien/fast_qr> //! * <https://github.com/bjguillot/qr> -use kernel::{prelude::*, str::CStr}; +use kernel::prelude::*; #[derive(Debug, Clone, Copy, PartialEq, Eq, Ord, PartialOrd)] struct Version(usize); diff --git a/drivers/gpu/drm/drm_prime.c b/drivers/gpu/drm/drm_prime.c index d828502268b8..a0a5d725eab0 100644 --- a/drivers/gpu/drm/drm_prime.c +++ b/drivers/gpu/drm/drm_prime.c @@ -453,7 +453,13 @@ struct dma_buf *drm_gem_prime_handle_to_dmabuf(struct drm_device *dev, } mutex_lock(&dev->object_name_lock); - /* re-export the original imported/exported object */ + /* re-export the original imported object */ + if (obj->import_attach) { + dmabuf = obj->import_attach->dmabuf; + get_dma_buf(dmabuf); + goto out_have_obj; + } + if (obj->dma_buf) { get_dma_buf(obj->dma_buf); dmabuf = obj->dma_buf; diff --git a/drivers/gpu/drm/etnaviv/etnaviv_gem_prime.c b/drivers/gpu/drm/etnaviv/etnaviv_gem_prime.c index 917ad527c961..40a50c60dfff 100644 --- a/drivers/gpu/drm/etnaviv/etnaviv_gem_prime.c +++ b/drivers/gpu/drm/etnaviv/etnaviv_gem_prime.c @@ -65,7 +65,7 @@ static void etnaviv_gem_prime_release(struct etnaviv_gem_object *etnaviv_obj) struct iosys_map map = IOSYS_MAP_INIT_VADDR(etnaviv_obj->vaddr); if (etnaviv_obj->vaddr) - dma_buf_vunmap_unlocked(etnaviv_obj->base.dma_buf, &map); + dma_buf_vunmap_unlocked(etnaviv_obj->base.import_attach->dmabuf, &map); /* Don't drop the pages for imported dmabuf, as they are not * ours, just free the array we allocated: @@ -82,7 +82,7 @@ static void *etnaviv_gem_prime_vmap_impl(struct etnaviv_gem_object *etnaviv_obj) lockdep_assert_held(&etnaviv_obj->lock); - ret = dma_buf_vmap(etnaviv_obj->base.dma_buf, &map); + ret = dma_buf_vmap(etnaviv_obj->base.import_attach->dmabuf, &map); if (ret) return NULL; return map.vaddr; diff --git a/drivers/gpu/drm/i915/display/intel_bios.c b/drivers/gpu/drm/i915/display/intel_bios.c index ba7b8938b17c..166ee11831ab 100644 --- a/drivers/gpu/drm/i915/display/intel_bios.c +++ b/drivers/gpu/drm/i915/display/intel_bios.c @@ -1938,7 +1938,7 @@ static int get_init_otp_deassert_fragment_len(struct intel_display *display, int index, len; if (drm_WARN_ON(display->drm, - !data || panel->vbt.dsi.seq_version != 1)) + !data || panel->vbt.dsi.seq_version >= 3)) return 0; /* index = 1 to skip sequence byte */ @@ -1961,7 +1961,7 @@ static int get_init_otp_deassert_fragment_len(struct intel_display *display, } /* - * Some v1 VBT MIPI sequences do the deassert in the init OTP sequence. + * Some v1/v2 VBT MIPI sequences do the deassert in the init OTP sequence. * The deassert must be done before calling intel_dsi_device_ready, so for * these devices we split the init OTP sequence into a deassert sequence and * the actual init OTP part. @@ -1972,9 +1972,9 @@ static void vlv_fixup_mipi_sequences(struct intel_display *display, u8 *init_otp; int len; - /* Limit this to v1 vid-mode sequences */ + /* Limit this to v1/v2 vid-mode sequences */ if (panel->vbt.dsi.config->is_cmd_mode || - panel->vbt.dsi.seq_version != 1) + panel->vbt.dsi.seq_version >= 3) return; /* Only do this if there are otp and assert seqs and no deassert seq */ diff --git a/drivers/gpu/drm/i915/display/intel_display.c b/drivers/gpu/drm/i915/display/intel_display.c index 6f0a0bc71b06..43aa1f97378b 100644 --- a/drivers/gpu/drm/i915/display/intel_display.c +++ b/drivers/gpu/drm/i915/display/intel_display.c @@ -7061,7 +7061,8 @@ static void intel_atomic_commit_fence_wait(struct intel_atomic_state *intel_stat struct drm_i915_private *i915 = to_i915(intel_state->base.dev); struct drm_plane *plane; struct drm_plane_state *new_plane_state; - int ret, i; + long ret; + int i; for_each_new_plane_in_state(&intel_state->base, plane, new_plane_state, i) { if (new_plane_state->fence) { diff --git a/drivers/gpu/drm/i915/display/intel_dp.c b/drivers/gpu/drm/i915/display/intel_dp.c index 640c43bf62d4..724de7ed3c04 100644 --- a/drivers/gpu/drm/i915/display/intel_dp.c +++ b/drivers/gpu/drm/i915/display/intel_dp.c @@ -1604,6 +1604,12 @@ int intel_dp_rate_select(struct intel_dp *intel_dp, int rate) void intel_dp_compute_rate(struct intel_dp *intel_dp, int port_clock, u8 *link_bw, u8 *rate_select) { + struct intel_display *display = to_intel_display(intel_dp); + + /* FIXME g4x can't generate an exact 2.7GHz with the 96MHz non-SSC refclk */ + if (display->platform.g4x && port_clock == 268800) + port_clock = 270000; + /* eDP 1.4 rate select method. */ if (intel_dp->use_rate_select) { *link_bw = 0; diff --git a/drivers/gpu/drm/imagination/pvr_power.c b/drivers/gpu/drm/imagination/pvr_power.c index 41f5d89e78b8..3e349d039fc0 100644 --- a/drivers/gpu/drm/imagination/pvr_power.c +++ b/drivers/gpu/drm/imagination/pvr_power.c @@ -386,13 +386,13 @@ pvr_power_reset(struct pvr_device *pvr_dev, bool hard_reset) if (!err) { if (hard_reset) { pvr_dev->fw_dev.booted = false; - WARN_ON(pm_runtime_force_suspend(from_pvr_device(pvr_dev)->dev)); + WARN_ON(pvr_power_device_suspend(from_pvr_device(pvr_dev)->dev)); err = pvr_fw_hard_reset(pvr_dev); if (err) goto err_device_lost; - err = pm_runtime_force_resume(from_pvr_device(pvr_dev)->dev); + err = pvr_power_device_resume(from_pvr_device(pvr_dev)->dev); pvr_dev->fw_dev.booted = true; if (err) goto err_device_lost; diff --git a/drivers/gpu/drm/mediatek/mtk_crtc.c b/drivers/gpu/drm/mediatek/mtk_crtc.c index 8f6fba4217ec..bc7527542fdc 100644 --- a/drivers/gpu/drm/mediatek/mtk_crtc.c +++ b/drivers/gpu/drm/mediatek/mtk_crtc.c @@ -719,6 +719,39 @@ int mtk_crtc_plane_check(struct drm_crtc *crtc, struct drm_plane *plane, return 0; } +void mtk_crtc_plane_disable(struct drm_crtc *crtc, struct drm_plane *plane) +{ +#if IS_REACHABLE(CONFIG_MTK_CMDQ) + struct mtk_crtc *mtk_crtc = to_mtk_crtc(crtc); + struct mtk_plane_state *plane_state = to_mtk_plane_state(plane->state); + int i; + + /* no need to wait for disabling the plane by CPU */ + if (!mtk_crtc->cmdq_client.chan) + return; + + if (!mtk_crtc->enabled) + return; + + /* set pending plane state to disabled */ + for (i = 0; i < mtk_crtc->layer_nr; i++) { + struct drm_plane *mtk_plane = &mtk_crtc->planes[i]; + struct mtk_plane_state *mtk_plane_state = to_mtk_plane_state(mtk_plane->state); + + if (mtk_plane->index == plane->index) { + memcpy(mtk_plane_state, plane_state, sizeof(*plane_state)); + break; + } + } + mtk_crtc_update_config(mtk_crtc, false); + + /* wait for planes to be disabled by CMDQ */ + wait_event_timeout(mtk_crtc->cb_blocking_queue, + mtk_crtc->cmdq_vblank_cnt == 0, + msecs_to_jiffies(500)); +#endif +} + void mtk_crtc_async_update(struct drm_crtc *crtc, struct drm_plane *plane, struct drm_atomic_state *state) { @@ -930,7 +963,8 @@ static int mtk_crtc_init_comp_planes(struct drm_device *drm_dev, mtk_ddp_comp_supported_rotations(comp), mtk_ddp_comp_get_blend_modes(comp), mtk_ddp_comp_get_formats(comp), - mtk_ddp_comp_get_num_formats(comp), i); + mtk_ddp_comp_get_num_formats(comp), + mtk_ddp_comp_is_afbc_supported(comp), i); if (ret) return ret; diff --git a/drivers/gpu/drm/mediatek/mtk_crtc.h b/drivers/gpu/drm/mediatek/mtk_crtc.h index 388e900b6f4d..828f109b83e7 100644 --- a/drivers/gpu/drm/mediatek/mtk_crtc.h +++ b/drivers/gpu/drm/mediatek/mtk_crtc.h @@ -21,6 +21,7 @@ int mtk_crtc_create(struct drm_device *drm_dev, const unsigned int *path, unsigned int num_conn_routes); int mtk_crtc_plane_check(struct drm_crtc *crtc, struct drm_plane *plane, struct mtk_plane_state *state); +void mtk_crtc_plane_disable(struct drm_crtc *crtc, struct drm_plane *plane); void mtk_crtc_async_update(struct drm_crtc *crtc, struct drm_plane *plane, struct drm_atomic_state *plane_state); struct device *mtk_crtc_dma_dev_get(struct drm_crtc *crtc); diff --git a/drivers/gpu/drm/mediatek/mtk_ddp_comp.c b/drivers/gpu/drm/mediatek/mtk_ddp_comp.c index edc6417639e6..ac6620e10262 100644 --- a/drivers/gpu/drm/mediatek/mtk_ddp_comp.c +++ b/drivers/gpu/drm/mediatek/mtk_ddp_comp.c @@ -366,6 +366,7 @@ static const struct mtk_ddp_comp_funcs ddp_ovl = { .get_blend_modes = mtk_ovl_get_blend_modes, .get_formats = mtk_ovl_get_formats, .get_num_formats = mtk_ovl_get_num_formats, + .is_afbc_supported = mtk_ovl_is_afbc_supported, }; static const struct mtk_ddp_comp_funcs ddp_postmask = { diff --git a/drivers/gpu/drm/mediatek/mtk_ddp_comp.h b/drivers/gpu/drm/mediatek/mtk_ddp_comp.h index 39720b27f4e9..7289b3dcf22f 100644 --- a/drivers/gpu/drm/mediatek/mtk_ddp_comp.h +++ b/drivers/gpu/drm/mediatek/mtk_ddp_comp.h @@ -83,6 +83,7 @@ struct mtk_ddp_comp_funcs { u32 (*get_blend_modes)(struct device *dev); const u32 *(*get_formats)(struct device *dev); size_t (*get_num_formats)(struct device *dev); + bool (*is_afbc_supported)(struct device *dev); void (*connect)(struct device *dev, struct device *mmsys_dev, unsigned int next); void (*disconnect)(struct device *dev, struct device *mmsys_dev, unsigned int next); void (*add)(struct device *dev, struct mtk_mutex *mutex); @@ -294,6 +295,14 @@ size_t mtk_ddp_comp_get_num_formats(struct mtk_ddp_comp *comp) return 0; } +static inline bool mtk_ddp_comp_is_afbc_supported(struct mtk_ddp_comp *comp) +{ + if (comp->funcs && comp->funcs->is_afbc_supported) + return comp->funcs->is_afbc_supported(comp->dev); + + return false; +} + static inline bool mtk_ddp_comp_add(struct mtk_ddp_comp *comp, struct mtk_mutex *mutex) { if (comp->funcs && comp->funcs->add) { diff --git a/drivers/gpu/drm/mediatek/mtk_disp_drv.h b/drivers/gpu/drm/mediatek/mtk_disp_drv.h index 04217a36939c..679d413bf10b 100644 --- a/drivers/gpu/drm/mediatek/mtk_disp_drv.h +++ b/drivers/gpu/drm/mediatek/mtk_disp_drv.h @@ -106,6 +106,7 @@ void mtk_ovl_disable_vblank(struct device *dev); u32 mtk_ovl_get_blend_modes(struct device *dev); const u32 *mtk_ovl_get_formats(struct device *dev); size_t mtk_ovl_get_num_formats(struct device *dev); +bool mtk_ovl_is_afbc_supported(struct device *dev); void mtk_ovl_adaptor_add_comp(struct device *dev, struct mtk_mutex *mutex); void mtk_ovl_adaptor_remove_comp(struct device *dev, struct mtk_mutex *mutex); diff --git a/drivers/gpu/drm/mediatek/mtk_disp_ovl.c b/drivers/gpu/drm/mediatek/mtk_disp_ovl.c index d0581c4e3c99..e0236353d499 100644 --- a/drivers/gpu/drm/mediatek/mtk_disp_ovl.c +++ b/drivers/gpu/drm/mediatek/mtk_disp_ovl.c @@ -236,6 +236,13 @@ size_t mtk_ovl_get_num_formats(struct device *dev) return ovl->data->num_formats; } +bool mtk_ovl_is_afbc_supported(struct device *dev) +{ + struct mtk_disp_ovl *ovl = dev_get_drvdata(dev); + + return ovl->data->supports_afbc; +} + int mtk_ovl_clk_enable(struct device *dev) { struct mtk_disp_ovl *ovl = dev_get_drvdata(dev); diff --git a/drivers/gpu/drm/mediatek/mtk_dpi.c b/drivers/gpu/drm/mediatek/mtk_dpi.c index 6fb85bc6487a..a2fdceadf209 100644 --- a/drivers/gpu/drm/mediatek/mtk_dpi.c +++ b/drivers/gpu/drm/mediatek/mtk_dpi.c @@ -1095,7 +1095,6 @@ static const u32 mt8183_output_fmts[] = { }; static const u32 mt8195_dpi_output_fmts[] = { - MEDIA_BUS_FMT_BGR888_1X24, MEDIA_BUS_FMT_RGB888_1X24, MEDIA_BUS_FMT_RGB888_2X12_LE, MEDIA_BUS_FMT_RGB888_2X12_BE, @@ -1103,18 +1102,19 @@ static const u32 mt8195_dpi_output_fmts[] = { MEDIA_BUS_FMT_YUYV8_1X16, MEDIA_BUS_FMT_YUYV10_1X20, MEDIA_BUS_FMT_YUYV12_1X24, + MEDIA_BUS_FMT_BGR888_1X24, MEDIA_BUS_FMT_YUV8_1X24, MEDIA_BUS_FMT_YUV10_1X30, }; static const u32 mt8195_dp_intf_output_fmts[] = { - MEDIA_BUS_FMT_BGR888_1X24, MEDIA_BUS_FMT_RGB888_1X24, MEDIA_BUS_FMT_RGB888_2X12_LE, MEDIA_BUS_FMT_RGB888_2X12_BE, MEDIA_BUS_FMT_RGB101010_1X30, MEDIA_BUS_FMT_YUYV8_1X16, MEDIA_BUS_FMT_YUYV10_1X20, + MEDIA_BUS_FMT_BGR888_1X24, MEDIA_BUS_FMT_YUV8_1X24, MEDIA_BUS_FMT_YUV10_1X30, }; diff --git a/drivers/gpu/drm/mediatek/mtk_plane.c b/drivers/gpu/drm/mediatek/mtk_plane.c index 655106bbb76d..cbc4f37da8ba 100644 --- a/drivers/gpu/drm/mediatek/mtk_plane.c +++ b/drivers/gpu/drm/mediatek/mtk_plane.c @@ -285,9 +285,14 @@ static void mtk_plane_atomic_disable(struct drm_plane *plane, struct drm_plane_state *new_state = drm_atomic_get_new_plane_state(state, plane); struct mtk_plane_state *mtk_plane_state = to_mtk_plane_state(new_state); + struct drm_plane_state *old_state = drm_atomic_get_old_plane_state(state, + plane); + mtk_plane_state->pending.enable = false; wmb(); /* Make sure the above parameter is set before update */ mtk_plane_state->pending.dirty = true; + + mtk_crtc_plane_disable(old_state->crtc, plane); } static void mtk_plane_atomic_update(struct drm_plane *plane, @@ -321,7 +326,8 @@ static const struct drm_plane_helper_funcs mtk_plane_helper_funcs = { int mtk_plane_init(struct drm_device *dev, struct drm_plane *plane, unsigned long possible_crtcs, enum drm_plane_type type, unsigned int supported_rotations, const u32 blend_modes, - const u32 *formats, size_t num_formats, unsigned int plane_idx) + const u32 *formats, size_t num_formats, + bool supports_afbc, unsigned int plane_idx) { int err; @@ -332,7 +338,9 @@ int mtk_plane_init(struct drm_device *dev, struct drm_plane *plane, err = drm_universal_plane_init(dev, plane, possible_crtcs, &mtk_plane_funcs, formats, - num_formats, modifiers, type, NULL); + num_formats, + supports_afbc ? modifiers : NULL, + type, NULL); if (err) { DRM_ERROR("failed to initialize plane\n"); return err; diff --git a/drivers/gpu/drm/mediatek/mtk_plane.h b/drivers/gpu/drm/mediatek/mtk_plane.h index 3b13b89989c7..95c5fa5295d8 100644 --- a/drivers/gpu/drm/mediatek/mtk_plane.h +++ b/drivers/gpu/drm/mediatek/mtk_plane.h @@ -49,5 +49,6 @@ to_mtk_plane_state(struct drm_plane_state *state) int mtk_plane_init(struct drm_device *dev, struct drm_plane *plane, unsigned long possible_crtcs, enum drm_plane_type type, unsigned int supported_rotations, const u32 blend_modes, - const u32 *formats, size_t num_formats, unsigned int plane_idx); + const u32 *formats, size_t num_formats, + bool supports_afbc, unsigned int plane_idx); #endif diff --git a/drivers/gpu/drm/nouveau/nouveau_debugfs.c b/drivers/gpu/drm/nouveau/nouveau_debugfs.c index 200e65a7cefc..c7869a639bef 100644 --- a/drivers/gpu/drm/nouveau/nouveau_debugfs.c +++ b/drivers/gpu/drm/nouveau/nouveau_debugfs.c @@ -314,14 +314,10 @@ nouveau_debugfs_fini(struct nouveau_drm *drm) drm->debugfs = NULL; } -int +void nouveau_module_debugfs_init(void) { nouveau_debugfs_root = debugfs_create_dir("nouveau", NULL); - if (IS_ERR(nouveau_debugfs_root)) - return PTR_ERR(nouveau_debugfs_root); - - return 0; } void diff --git a/drivers/gpu/drm/nouveau/nouveau_debugfs.h b/drivers/gpu/drm/nouveau/nouveau_debugfs.h index b7617b344ee2..d05ed0e641c4 100644 --- a/drivers/gpu/drm/nouveau/nouveau_debugfs.h +++ b/drivers/gpu/drm/nouveau/nouveau_debugfs.h @@ -24,7 +24,7 @@ extern void nouveau_debugfs_fini(struct nouveau_drm *); extern struct dentry *nouveau_debugfs_root; -int nouveau_module_debugfs_init(void); +void nouveau_module_debugfs_init(void); void nouveau_module_debugfs_fini(void); #else static inline void @@ -42,10 +42,9 @@ nouveau_debugfs_fini(struct nouveau_drm *drm) { } -static inline int +static inline void nouveau_module_debugfs_init(void) { - return 0; } static inline void diff --git a/drivers/gpu/drm/nouveau/nouveau_drm.c b/drivers/gpu/drm/nouveau/nouveau_drm.c index 0c82a63cd49d..1527b801f013 100644 --- a/drivers/gpu/drm/nouveau/nouveau_drm.c +++ b/drivers/gpu/drm/nouveau/nouveau_drm.c @@ -1461,9 +1461,7 @@ nouveau_drm_init(void) if (!nouveau_modeset) return 0; - ret = nouveau_module_debugfs_init(); - if (ret) - return ret; + nouveau_module_debugfs_init(); #ifdef CONFIG_NOUVEAU_PLATFORM_DRIVER platform_driver_register(&nouveau_platform_driver); diff --git a/drivers/gpu/drm/nouveau/nvif/chan.c b/drivers/gpu/drm/nouveau/nvif/chan.c index baa10227d51a..80c01017d642 100644 --- a/drivers/gpu/drm/nouveau/nvif/chan.c +++ b/drivers/gpu/drm/nouveau/nvif/chan.c @@ -39,6 +39,9 @@ nvif_chan_gpfifo_post(struct nvif_chan *chan) const u32 pbptr = (chan->push.cur - map) + chan->func->gpfifo.post_size; const u32 gpptr = (chan->gpfifo.cur + 1) & chan->gpfifo.max; + if (!chan->func->gpfifo.post) + return 0; + return chan->func->gpfifo.post(chan, gpptr, pbptr); } diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/gsp/rm/r535/gsp.c b/drivers/gpu/drm/nouveau/nvkm/subdev/gsp/rm/r535/gsp.c index baf42339f93e..588cb4ab85cb 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/gsp/rm/r535/gsp.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/gsp/rm/r535/gsp.c @@ -719,7 +719,6 @@ r535_gsp_acpi_caps(acpi_handle handle, CAPS_METHOD_DATA *caps) union acpi_object argv4 = { .buffer.type = ACPI_TYPE_BUFFER, .buffer.length = 4, - .buffer.pointer = kmalloc(argv4.buffer.length, GFP_KERNEL), }, *obj; caps->status = 0xffff; @@ -727,17 +726,22 @@ r535_gsp_acpi_caps(acpi_handle handle, CAPS_METHOD_DATA *caps) if (!acpi_check_dsm(handle, &NVOP_DSM_GUID, NVOP_DSM_REV, BIT_ULL(0x1a))) return; + argv4.buffer.pointer = kmalloc(argv4.buffer.length, GFP_KERNEL); + if (!argv4.buffer.pointer) + return; + obj = acpi_evaluate_dsm(handle, &NVOP_DSM_GUID, NVOP_DSM_REV, 0x1a, &argv4); if (!obj) - return; + goto done; if (WARN_ON(obj->type != ACPI_TYPE_BUFFER) || WARN_ON(obj->buffer.length != 4)) - return; + goto done; caps->status = 0; caps->optimusCaps = *(u32 *)obj->buffer.pointer; +done: ACPI_FREE(obj); kfree(argv4.buffer.pointer); @@ -754,24 +758,28 @@ r535_gsp_acpi_jt(acpi_handle handle, JT_METHOD_DATA *jt) union acpi_object argv4 = { .buffer.type = ACPI_TYPE_BUFFER, .buffer.length = sizeof(caps), - .buffer.pointer = kmalloc(argv4.buffer.length, GFP_KERNEL), }, *obj; jt->status = 0xffff; + argv4.buffer.pointer = kmalloc(argv4.buffer.length, GFP_KERNEL); + if (!argv4.buffer.pointer) + return; + obj = acpi_evaluate_dsm(handle, &JT_DSM_GUID, JT_DSM_REV, 0x1, &argv4); if (!obj) - return; + goto done; if (WARN_ON(obj->type != ACPI_TYPE_BUFFER) || WARN_ON(obj->buffer.length != 4)) - return; + goto done; jt->status = 0; jt->jtCaps = *(u32 *)obj->buffer.pointer; jt->jtRevId = (jt->jtCaps & 0xfff00000) >> 20; jt->bSBIOSCaps = 0; +done: ACPI_FREE(obj); kfree(argv4.buffer.pointer); @@ -1744,6 +1752,13 @@ r535_gsp_fini(struct nvkm_gsp *gsp, bool suspend) nvkm_gsp_sg_free(gsp->subdev.device, &gsp->sr.sgt); return ret; } + + /* + * TODO: Debug the GSP firmware / RPC handling to find out why + * without this Turing (but none of the other architectures) + * ends up resetting all channels after resume. + */ + msleep(50); } ret = r535_gsp_rpc_unloading_guest_driver(gsp, suspend); diff --git a/drivers/gpu/drm/panfrost/panfrost_job.c b/drivers/gpu/drm/panfrost/panfrost_job.c index 5657106c2f7d..15e2d505550f 100644 --- a/drivers/gpu/drm/panfrost/panfrost_job.c +++ b/drivers/gpu/drm/panfrost/panfrost_job.c @@ -841,7 +841,6 @@ int panfrost_job_init(struct panfrost_device *pfdev) .num_rqs = DRM_SCHED_PRIORITY_COUNT, .credit_limit = 2, .timeout = msecs_to_jiffies(JOB_TIMEOUT_MS), - .timeout_wq = pfdev->reset.wq, .name = "pan_js", .dev = pfdev->dev, }; @@ -879,6 +878,7 @@ int panfrost_job_init(struct panfrost_device *pfdev) pfdev->reset.wq = alloc_ordered_workqueue("panfrost-reset", 0); if (!pfdev->reset.wq) return -ENOMEM; + args.timeout_wq = pfdev->reset.wq; for (j = 0; j < NUM_JOB_SLOTS; j++) { js->queue[j].fence_context = dma_fence_context_alloc(1); diff --git a/drivers/gpu/drm/radeon/radeon_device.c b/drivers/gpu/drm/radeon/radeon_device.c index bbd39348a7ab..7a3e510327b7 100644 --- a/drivers/gpu/drm/radeon/radeon_device.c +++ b/drivers/gpu/drm/radeon/radeon_device.c @@ -26,7 +26,6 @@ * Jerome Glisse */ -#include <linux/console.h> #include <linux/efi.h> #include <linux/pci.h> #include <linux/pm_runtime.h> @@ -1635,11 +1634,9 @@ int radeon_suspend_kms(struct drm_device *dev, bool suspend, pci_set_power_state(pdev, PCI_D3hot); } - if (notify_clients) { - console_lock(); - drm_client_dev_suspend(dev, true); - console_unlock(); - } + if (notify_clients) + drm_client_dev_suspend(dev, false); + return 0; } @@ -1661,17 +1658,11 @@ int radeon_resume_kms(struct drm_device *dev, bool resume, bool notify_clients) if (dev->switch_power_state == DRM_SWITCH_POWER_OFF) return 0; - if (notify_clients) { - console_lock(); - } if (resume) { pci_set_power_state(pdev, PCI_D0); pci_restore_state(pdev); - if (pci_enable_device(pdev)) { - if (notify_clients) - console_unlock(); + if (pci_enable_device(pdev)) return -1; - } } /* resume AGP if in use */ radeon_agp_resume(rdev); @@ -1747,10 +1738,8 @@ int radeon_resume_kms(struct drm_device *dev, bool resume, bool notify_clients) if ((rdev->pm.pm_method == PM_METHOD_DPM) && rdev->pm.dpm_enabled) radeon_pm_compute_clocks(rdev); - if (notify_clients) { - drm_client_dev_resume(dev, true); - console_unlock(); - } + if (notify_clients) + drm_client_dev_resume(dev, false); return 0; } diff --git a/drivers/gpu/drm/scheduler/sched_entity.c b/drivers/gpu/drm/scheduler/sched_entity.c index e671aa241720..ac678de7fe5e 100644 --- a/drivers/gpu/drm/scheduler/sched_entity.c +++ b/drivers/gpu/drm/scheduler/sched_entity.c @@ -355,17 +355,6 @@ void drm_sched_entity_destroy(struct drm_sched_entity *entity) } EXPORT_SYMBOL(drm_sched_entity_destroy); -/* drm_sched_entity_clear_dep - callback to clear the entities dependency */ -static void drm_sched_entity_clear_dep(struct dma_fence *f, - struct dma_fence_cb *cb) -{ - struct drm_sched_entity *entity = - container_of(cb, struct drm_sched_entity, cb); - - entity->dependency = NULL; - dma_fence_put(f); -} - /* * drm_sched_entity_wakeup - callback to clear the entity's dependency and * wake up the scheduler @@ -376,7 +365,8 @@ static void drm_sched_entity_wakeup(struct dma_fence *f, struct drm_sched_entity *entity = container_of(cb, struct drm_sched_entity, cb); - drm_sched_entity_clear_dep(f, cb); + entity->dependency = NULL; + dma_fence_put(f); drm_sched_wakeup(entity->rq->sched); } @@ -429,13 +419,6 @@ static bool drm_sched_entity_add_dependency_cb(struct drm_sched_entity *entity) fence = dma_fence_get(&s_fence->scheduled); dma_fence_put(entity->dependency); entity->dependency = fence; - if (!dma_fence_add_callback(fence, &entity->cb, - drm_sched_entity_clear_dep)) - return true; - - /* Ignore it when it is already scheduled */ - dma_fence_put(fence); - return false; } if (!dma_fence_add_callback(entity->dependency, &entity->cb, diff --git a/drivers/gpu/drm/tegra/nvdec.c b/drivers/gpu/drm/tegra/nvdec.c index 2d9a0a3f6c38..7a38664e890e 100644 --- a/drivers/gpu/drm/tegra/nvdec.c +++ b/drivers/gpu/drm/tegra/nvdec.c @@ -261,10 +261,8 @@ static int nvdec_load_falcon_firmware(struct nvdec *nvdec) if (!client->group) { virt = dma_alloc_coherent(nvdec->dev, size, &iova, GFP_KERNEL); - - err = dma_mapping_error(nvdec->dev, iova); - if (err < 0) - return err; + if (!virt) + return -ENOMEM; } else { virt = tegra_drm_alloc(tegra, size, &iova); if (IS_ERR(virt)) diff --git a/drivers/gpu/drm/virtio/virtgpu_prime.c b/drivers/gpu/drm/virtio/virtgpu_prime.c index 1118a0250279..ce49282198cb 100644 --- a/drivers/gpu/drm/virtio/virtgpu_prime.c +++ b/drivers/gpu/drm/virtio/virtgpu_prime.c @@ -204,15 +204,16 @@ static void virtgpu_dma_buf_free_obj(struct drm_gem_object *obj) { struct virtio_gpu_object *bo = gem_to_virtio_gpu_obj(obj); struct virtio_gpu_device *vgdev = obj->dev->dev_private; + struct dma_buf_attachment *attach = obj->import_attach; if (drm_gem_is_imported(obj)) { - struct dma_buf *dmabuf = obj->dma_buf; + struct dma_buf *dmabuf = attach->dmabuf; dma_resv_lock(dmabuf->resv, NULL); virtgpu_dma_buf_unmap(bo); dma_resv_unlock(dmabuf->resv); - dma_buf_detach(dmabuf, obj->import_attach); + dma_buf_detach(dmabuf, attach); dma_buf_put(dmabuf); } diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_gem.c b/drivers/gpu/drm/vmwgfx/vmwgfx_gem.c index c55382167c1b..e417921af584 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_gem.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_gem.c @@ -85,10 +85,10 @@ static int vmw_gem_vmap(struct drm_gem_object *obj, struct iosys_map *map) int ret; if (drm_gem_is_imported(obj)) { - ret = dma_buf_vmap(obj->dma_buf, map); + ret = dma_buf_vmap(obj->import_attach->dmabuf, map); if (!ret) { if (drm_WARN_ON(obj->dev, map->is_iomem)) { - dma_buf_vunmap(obj->dma_buf, map); + dma_buf_vunmap(obj->import_attach->dmabuf, map); return -EIO; } } @@ -102,7 +102,7 @@ static int vmw_gem_vmap(struct drm_gem_object *obj, struct iosys_map *map) static void vmw_gem_vunmap(struct drm_gem_object *obj, struct iosys_map *map) { if (drm_gem_is_imported(obj)) - dma_buf_vunmap(obj->dma_buf, map); + dma_buf_vunmap(obj->import_attach->dmabuf, map); else drm_gem_ttm_vunmap(obj, map); } diff --git a/drivers/gpu/drm/xe/xe_devcoredump.c b/drivers/gpu/drm/xe/xe_devcoredump.c index 7a8af2311318..11e60d687572 100644 --- a/drivers/gpu/drm/xe/xe_devcoredump.c +++ b/drivers/gpu/drm/xe/xe_devcoredump.c @@ -171,14 +171,32 @@ static void xe_devcoredump_snapshot_free(struct xe_devcoredump_snapshot *ss) #define XE_DEVCOREDUMP_CHUNK_MAX (SZ_512M + SZ_1G) +/** + * xe_devcoredump_read() - Read data from the Xe device coredump snapshot + * @buffer: Destination buffer to copy the coredump data into + * @offset: Offset in the coredump data to start reading from + * @count: Number of bytes to read + * @data: Pointer to the xe_devcoredump structure + * @datalen: Length of the data (unused) + * + * Reads a chunk of the coredump snapshot data into the provided buffer. + * If the devcoredump is smaller than 1.5 GB (XE_DEVCOREDUMP_CHUNK_MAX), + * it is read directly from a pre-written buffer. For larger devcoredumps, + * the pre-written buffer must be periodically repopulated from the snapshot + * state due to kmalloc size limitations. + * + * Return: Number of bytes copied on success, or a negative error code on failure. + */ static ssize_t xe_devcoredump_read(char *buffer, loff_t offset, size_t count, void *data, size_t datalen) { struct xe_devcoredump *coredump = data; struct xe_devcoredump_snapshot *ss; - ssize_t byte_copied; + ssize_t byte_copied = 0; u32 chunk_offset; ssize_t new_chunk_position; + bool pm_needed = false; + int ret = 0; if (!coredump) return -ENODEV; @@ -188,20 +206,19 @@ static ssize_t xe_devcoredump_read(char *buffer, loff_t offset, /* Ensure delayed work is captured before continuing */ flush_work(&ss->work); - if (ss->read.size > XE_DEVCOREDUMP_CHUNK_MAX) + pm_needed = ss->read.size > XE_DEVCOREDUMP_CHUNK_MAX; + if (pm_needed) xe_pm_runtime_get(gt_to_xe(ss->gt)); mutex_lock(&coredump->lock); if (!ss->read.buffer) { - mutex_unlock(&coredump->lock); - return -ENODEV; + ret = -ENODEV; + goto unlock; } - if (offset >= ss->read.size) { - mutex_unlock(&coredump->lock); - return 0; - } + if (offset >= ss->read.size) + goto unlock; new_chunk_position = div_u64_rem(offset, XE_DEVCOREDUMP_CHUNK_MAX, @@ -221,12 +238,13 @@ static ssize_t xe_devcoredump_read(char *buffer, loff_t offset, ss->read.size - offset; memcpy(buffer, ss->read.buffer + chunk_offset, byte_copied); +unlock: mutex_unlock(&coredump->lock); - if (ss->read.size > XE_DEVCOREDUMP_CHUNK_MAX) + if (pm_needed) xe_pm_runtime_put(gt_to_xe(ss->gt)); - return byte_copied; + return byte_copied ? byte_copied : ret; } static void xe_devcoredump_free(void *data) diff --git a/drivers/gpu/drm/xe/xe_gt.c b/drivers/gpu/drm/xe/xe_gt.c index 6c4cb9576fb6..e3517ce2e18c 100644 --- a/drivers/gpu/drm/xe/xe_gt.c +++ b/drivers/gpu/drm/xe/xe_gt.c @@ -417,6 +417,8 @@ int xe_gt_init_early(struct xe_gt *gt) if (err) return err; + xe_mocs_init_early(gt); + return 0; } @@ -630,17 +632,15 @@ int xe_gt_init(struct xe_gt *gt) if (err) return err; - err = xe_gt_pagefault_init(gt); + err = xe_gt_sysfs_init(gt); if (err) return err; - xe_mocs_init_early(gt); - - err = xe_gt_sysfs_init(gt); + err = gt_fw_domain_init(gt); if (err) return err; - err = gt_fw_domain_init(gt); + err = xe_gt_pagefault_init(gt); if (err) return err; @@ -839,6 +839,9 @@ static int gt_reset(struct xe_gt *gt) goto err_out; } + if (IS_SRIOV_PF(gt_to_xe(gt))) + xe_gt_sriov_pf_stop_prepare(gt); + xe_uc_gucrc_disable(>->uc); xe_uc_stop_prepare(>->uc); xe_gt_pagefault_reset(gt); diff --git a/drivers/gpu/drm/xe/xe_gt.h b/drivers/gpu/drm/xe/xe_gt.h index 187fa6490eaf..6357325f3939 100644 --- a/drivers/gpu/drm/xe/xe_gt.h +++ b/drivers/gpu/drm/xe/xe_gt.h @@ -24,7 +24,7 @@ extern struct fault_attr gt_reset_failure; static inline bool xe_fault_inject_gt_reset(void) { - return should_fail(>_reset_failure, 1); + return IS_ENABLED(CONFIG_DEBUG_FS) && should_fail(>_reset_failure, 1); } struct xe_gt *xe_gt_alloc(struct xe_tile *tile); diff --git a/drivers/gpu/drm/xe/xe_gt_pagefault.c b/drivers/gpu/drm/xe/xe_gt_pagefault.c index 10622ca471a2..6717a636b1d9 100644 --- a/drivers/gpu/drm/xe/xe_gt_pagefault.c +++ b/drivers/gpu/drm/xe/xe_gt_pagefault.c @@ -444,6 +444,7 @@ static int xe_alloc_pf_queue(struct xe_gt *gt, struct pf_queue *pf_queue) #define PF_MULTIPLIER 8 pf_queue->num_dw = (num_eus + XE_NUM_HW_ENGINES) * PF_MSG_LEN_DW * PF_MULTIPLIER; + pf_queue->num_dw = roundup_pow_of_two(pf_queue->num_dw); #undef PF_MULTIPLIER pf_queue->gt = gt; diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_pf.c b/drivers/gpu/drm/xe/xe_gt_sriov_pf.c index c08efca6420e..35489fa81825 100644 --- a/drivers/gpu/drm/xe/xe_gt_sriov_pf.c +++ b/drivers/gpu/drm/xe/xe_gt_sriov_pf.c @@ -172,6 +172,25 @@ void xe_gt_sriov_pf_sanitize_hw(struct xe_gt *gt, unsigned int vfid) pf_clear_vf_scratch_regs(gt, vfid); } +static void pf_cancel_restart(struct xe_gt *gt) +{ + xe_gt_assert(gt, IS_SRIOV_PF(gt_to_xe(gt))); + + if (cancel_work_sync(>->sriov.pf.workers.restart)) + xe_gt_sriov_dbg_verbose(gt, "pending restart canceled!\n"); +} + +/** + * xe_gt_sriov_pf_stop_prepare() - Prepare to stop SR-IOV support. + * @gt: the &xe_gt + * + * This function can only be called on the PF. + */ +void xe_gt_sriov_pf_stop_prepare(struct xe_gt *gt) +{ + pf_cancel_restart(gt); +} + static void pf_restart(struct xe_gt *gt) { struct xe_device *xe = gt_to_xe(gt); diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_pf.h b/drivers/gpu/drm/xe/xe_gt_sriov_pf.h index f474509411c0..e2b2ff8132dc 100644 --- a/drivers/gpu/drm/xe/xe_gt_sriov_pf.h +++ b/drivers/gpu/drm/xe/xe_gt_sriov_pf.h @@ -13,6 +13,7 @@ int xe_gt_sriov_pf_init_early(struct xe_gt *gt); int xe_gt_sriov_pf_init(struct xe_gt *gt); void xe_gt_sriov_pf_init_hw(struct xe_gt *gt); void xe_gt_sriov_pf_sanitize_hw(struct xe_gt *gt, unsigned int vfid); +void xe_gt_sriov_pf_stop_prepare(struct xe_gt *gt); void xe_gt_sriov_pf_restart(struct xe_gt *gt); #else static inline int xe_gt_sriov_pf_init_early(struct xe_gt *gt) @@ -29,6 +30,10 @@ static inline void xe_gt_sriov_pf_init_hw(struct xe_gt *gt) { } +static inline void xe_gt_sriov_pf_stop_prepare(struct xe_gt *gt) +{ +} + static inline void xe_gt_sriov_pf_restart(struct xe_gt *gt) { } diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c b/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c index 2420a548cacc..53a44702c04a 100644 --- a/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c +++ b/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c @@ -2364,6 +2364,21 @@ int xe_gt_sriov_pf_config_restore(struct xe_gt *gt, unsigned int vfid, return err; } +static int pf_push_self_config(struct xe_gt *gt) +{ + int err; + + err = pf_push_full_vf_config(gt, PFID); + if (err) { + xe_gt_sriov_err(gt, "Failed to push self configuration (%pe)\n", + ERR_PTR(err)); + return err; + } + + xe_gt_sriov_dbg_verbose(gt, "self configuration completed\n"); + return 0; +} + static void fini_config(void *arg) { struct xe_gt *gt = arg; @@ -2387,9 +2402,17 @@ static void fini_config(void *arg) int xe_gt_sriov_pf_config_init(struct xe_gt *gt) { struct xe_device *xe = gt_to_xe(gt); + int err; xe_gt_assert(gt, IS_SRIOV_PF(xe)); + mutex_lock(xe_gt_sriov_pf_master_mutex(gt)); + err = pf_push_self_config(gt); + mutex_unlock(xe_gt_sriov_pf_master_mutex(gt)); + + if (err) + return err; + return devm_add_action_or_reset(xe->drm.dev, fini_config, gt); } @@ -2407,6 +2430,10 @@ void xe_gt_sriov_pf_config_restart(struct xe_gt *gt) unsigned int n, total_vfs = xe_sriov_pf_get_totalvfs(gt_to_xe(gt)); unsigned int fail = 0, skip = 0; + mutex_lock(xe_gt_sriov_pf_master_mutex(gt)); + pf_push_self_config(gt); + mutex_unlock(xe_gt_sriov_pf_master_mutex(gt)); + for (n = 1; n <= total_vfs; n++) { if (xe_gt_sriov_pf_config_is_empty(gt, n)) skip++; diff --git a/drivers/gpu/drm/xe/xe_lmtt.c b/drivers/gpu/drm/xe/xe_lmtt.c index 63db66df064b..023ed6a6b49d 100644 --- a/drivers/gpu/drm/xe/xe_lmtt.c +++ b/drivers/gpu/drm/xe/xe_lmtt.c @@ -78,6 +78,9 @@ static struct xe_lmtt_pt *lmtt_pt_alloc(struct xe_lmtt *lmtt, unsigned int level } lmtt_assert(lmtt, xe_bo_is_vram(bo)); + lmtt_debug(lmtt, "level=%u addr=%#llx\n", level, (u64)xe_bo_main_addr(bo, XE_PAGE_SIZE)); + + xe_map_memset(lmtt_to_xe(lmtt), &bo->vmap, 0, 0, bo->size); pt->level = level; pt->bo = bo; @@ -91,6 +94,9 @@ out: static void lmtt_pt_free(struct xe_lmtt_pt *pt) { + lmtt_debug(&pt->bo->tile->sriov.pf.lmtt, "level=%u addr=%llx\n", + pt->level, (u64)xe_bo_main_addr(pt->bo, XE_PAGE_SIZE)); + xe_bo_unpin_map_no_vm(pt->bo); kfree(pt); } @@ -226,9 +232,14 @@ static void lmtt_write_pte(struct xe_lmtt *lmtt, struct xe_lmtt_pt *pt, switch (lmtt->ops->lmtt_pte_size(level)) { case sizeof(u32): + lmtt_assert(lmtt, !overflows_type(pte, u32)); + lmtt_assert(lmtt, !pte || !iosys_map_rd(&pt->bo->vmap, idx * sizeof(u32), u32)); + xe_map_wr(lmtt_to_xe(lmtt), &pt->bo->vmap, idx * sizeof(u32), u32, pte); break; case sizeof(u64): + lmtt_assert(lmtt, !pte || !iosys_map_rd(&pt->bo->vmap, idx * sizeof(u64), u64)); + xe_map_wr(lmtt_to_xe(lmtt), &pt->bo->vmap, idx * sizeof(u64), u64, pte); break; default: diff --git a/drivers/gpu/drm/xe/xe_migrate.c b/drivers/gpu/drm/xe/xe_migrate.c index 7acdc4c78866..07a5161c7d5b 100644 --- a/drivers/gpu/drm/xe/xe_migrate.c +++ b/drivers/gpu/drm/xe/xe_migrate.c @@ -863,7 +863,7 @@ struct dma_fence *xe_migrate_copy(struct xe_migrate *m, if (src_is_vram && xe_migrate_allow_identity(src_L0, &src_it)) xe_res_next(&src_it, src_L0); else - emit_pte(m, bb, src_L0_pt, src_is_vram, copy_system_ccs, + emit_pte(m, bb, src_L0_pt, src_is_vram, copy_system_ccs || use_comp_pat, &src_it, src_L0, src); if (dst_is_vram && xe_migrate_allow_identity(src_L0, &dst_it)) @@ -1817,8 +1817,8 @@ int xe_migrate_access_memory(struct xe_migrate *m, struct xe_bo *bo, xe_bo_assert_held(bo); /* Use bounce buffer for small access and unaligned access */ - if (len & XE_CACHELINE_MASK || - ((uintptr_t)buf | offset) & XE_CACHELINE_MASK) { + if (!IS_ALIGNED(len, XE_CACHELINE_BYTES) || + !IS_ALIGNED((unsigned long)buf + offset, XE_CACHELINE_BYTES)) { int buf_offset = 0; /* @@ -1848,7 +1848,7 @@ int xe_migrate_access_memory(struct xe_migrate *m, struct xe_bo *bo, err = xe_migrate_access_memory(m, bo, offset & ~XE_CACHELINE_MASK, (void *)ptr, - sizeof(bounce), 0); + sizeof(bounce), write); if (err) return err; } else { diff --git a/drivers/gpu/drm/xe/xe_module.c b/drivers/gpu/drm/xe/xe_module.c index e4742e27e2cd..da6793c2f991 100644 --- a/drivers/gpu/drm/xe/xe_module.c +++ b/drivers/gpu/drm/xe/xe_module.c @@ -20,7 +20,7 @@ struct xe_modparam xe_modparam = { .probe_display = true, - .guc_log_level = 3, + .guc_log_level = IS_ENABLED(CONFIG_DRM_XE_DEBUG) ? 3 : 1, .force_probe = CONFIG_DRM_XE_FORCE_PROBE, .wedged_mode = 1, .svm_notifier_size = 512, diff --git a/drivers/gpu/drm/xe/xe_pci.c b/drivers/gpu/drm/xe/xe_pci.c index ac4beaed58ff..278af53c74dc 100644 --- a/drivers/gpu/drm/xe/xe_pci.c +++ b/drivers/gpu/drm/xe/xe_pci.c @@ -140,7 +140,6 @@ static const struct xe_graphics_desc graphics_xelpg = { .has_asid = 1, \ .has_atomic_enable_pte_bit = 1, \ .has_flat_ccs = 1, \ - .has_indirect_ring_state = 1, \ .has_range_tlb_invalidation = 1, \ .has_usm = 1, \ .has_64bit_timestamp = 1, \ diff --git a/drivers/gpu/drm/xe/xe_pm.c b/drivers/gpu/drm/xe/xe_pm.c index ff749edc005b..ad263de44111 100644 --- a/drivers/gpu/drm/xe/xe_pm.c +++ b/drivers/gpu/drm/xe/xe_pm.c @@ -134,7 +134,7 @@ int xe_pm_suspend(struct xe_device *xe) /* FIXME: Super racey... */ err = xe_bo_evict_all(xe); if (err) - goto err_pxp; + goto err_display; for_each_gt(gt, xe, id) { err = xe_gt_suspend(gt); @@ -151,7 +151,6 @@ int xe_pm_suspend(struct xe_device *xe) err_display: xe_display_pm_resume(xe); -err_pxp: xe_pxp_pm_resume(xe->pxp); err: drm_dbg(&xe->drm, "Device suspend failed %d\n", err); @@ -753,11 +752,13 @@ void xe_pm_assert_unbounded_bridge(struct xe_device *xe) } /** - * xe_pm_set_vram_threshold - Set a vram threshold for allowing/blocking D3Cold + * xe_pm_set_vram_threshold - Set a VRAM threshold for allowing/blocking D3Cold * @xe: xe device instance - * @threshold: VRAM size in bites for the D3cold threshold + * @threshold: VRAM size in MiB for the D3cold threshold * - * Returns 0 for success, negative error code otherwise. + * Return: + * * 0 - success + * * -EINVAL - invalid argument */ int xe_pm_set_vram_threshold(struct xe_device *xe, u32 threshold) { diff --git a/drivers/gpu/drm/xe/xe_ring_ops.c b/drivers/gpu/drm/xe/xe_ring_ops.c index bc1689db4cd7..7b50c7c1ee21 100644 --- a/drivers/gpu/drm/xe/xe_ring_ops.c +++ b/drivers/gpu/drm/xe/xe_ring_ops.c @@ -110,13 +110,14 @@ static int emit_bb_start(u64 batch_addr, u32 ppgtt_flag, u32 *dw, int i) return i; } -static int emit_flush_invalidate(u32 *dw, int i) +static int emit_flush_invalidate(u32 addr, u32 val, u32 *dw, int i) { dw[i++] = MI_FLUSH_DW | MI_INVALIDATE_TLB | MI_FLUSH_DW_OP_STOREDW | - MI_FLUSH_IMM_DW | MI_FLUSH_DW_STORE_INDEX; - dw[i++] = LRC_PPHWSP_FLUSH_INVAL_SCRATCH_ADDR; - dw[i++] = 0; + MI_FLUSH_IMM_DW; + + dw[i++] = addr | MI_FLUSH_DW_USE_GTT; dw[i++] = 0; + dw[i++] = val; return i; } @@ -397,23 +398,20 @@ static void __emit_job_gen12_render_compute(struct xe_sched_job *job, static void emit_migration_job_gen12(struct xe_sched_job *job, struct xe_lrc *lrc, u32 seqno) { + u32 saddr = xe_lrc_start_seqno_ggtt_addr(lrc); u32 dw[MAX_JOB_SIZE_DW], i = 0; i = emit_copy_timestamp(lrc, dw, i); - i = emit_store_imm_ggtt(xe_lrc_start_seqno_ggtt_addr(lrc), - seqno, dw, i); + i = emit_store_imm_ggtt(saddr, seqno, dw, i); dw[i++] = MI_ARB_ON_OFF | MI_ARB_DISABLE; /* Enabled again below */ i = emit_bb_start(job->ptrs[0].batch_addr, BIT(8), dw, i); - if (!IS_SRIOV_VF(gt_to_xe(job->q->gt))) { - /* XXX: Do we need this? Leaving for now. */ - dw[i++] = preparser_disable(true); - i = emit_flush_invalidate(dw, i); - dw[i++] = preparser_disable(false); - } + dw[i++] = preparser_disable(true); + i = emit_flush_invalidate(saddr, seqno, dw, i); + dw[i++] = preparser_disable(false); i = emit_bb_start(job->ptrs[1].batch_addr, BIT(8), dw, i); diff --git a/drivers/gpu/drm/xe/xe_uc_fw.c b/drivers/gpu/drm/xe/xe_uc_fw.c index 2741849bbf4d..a6612105201a 100644 --- a/drivers/gpu/drm/xe/xe_uc_fw.c +++ b/drivers/gpu/drm/xe/xe_uc_fw.c @@ -114,10 +114,10 @@ struct fw_blobs_by_type { #define XE_GT_TYPE_ANY XE_GT_TYPE_UNINITIALIZED #define XE_GUC_FIRMWARE_DEFS(fw_def, mmp_ver, major_ver) \ - fw_def(BATTLEMAGE, GT_TYPE_ANY, major_ver(xe, guc, bmg, 70, 44, 1)) \ - fw_def(LUNARLAKE, GT_TYPE_ANY, major_ver(xe, guc, lnl, 70, 44, 1)) \ + fw_def(BATTLEMAGE, GT_TYPE_ANY, major_ver(xe, guc, bmg, 70, 45, 2)) \ + fw_def(LUNARLAKE, GT_TYPE_ANY, major_ver(xe, guc, lnl, 70, 45, 2)) \ fw_def(METEORLAKE, GT_TYPE_ANY, major_ver(i915, guc, mtl, 70, 44, 1)) \ - fw_def(DG2, GT_TYPE_ANY, major_ver(i915, guc, dg2, 70, 44, 1)) \ + fw_def(DG2, GT_TYPE_ANY, major_ver(i915, guc, dg2, 70, 45, 2)) \ fw_def(DG1, GT_TYPE_ANY, major_ver(i915, guc, dg1, 70, 44, 1)) \ fw_def(ALDERLAKE_N, GT_TYPE_ANY, major_ver(i915, guc, tgl, 70, 44, 1)) \ fw_def(ALDERLAKE_P, GT_TYPE_ANY, major_ver(i915, guc, adlp, 70, 44, 1)) \ diff --git a/drivers/gpu/drm/xe/xe_wa_oob.rules b/drivers/gpu/drm/xe/xe_wa_oob.rules index 69c1d7fc695e..6d70109fcc43 100644 --- a/drivers/gpu/drm/xe/xe_wa_oob.rules +++ b/drivers/gpu/drm/xe/xe_wa_oob.rules @@ -38,10 +38,10 @@ GRAPHICS_VERSION(2004) GRAPHICS_VERSION_RANGE(3000, 3001) 22019338487 MEDIA_VERSION(2000) - GRAPHICS_VERSION(2001) + GRAPHICS_VERSION(2001), FUNC(xe_rtp_match_not_sriov_vf) MEDIA_VERSION(3000), MEDIA_STEP(A0, B0), FUNC(xe_rtp_match_not_sriov_vf) 22019338487_display PLATFORM(LUNARLAKE) -16023588340 GRAPHICS_VERSION(2001) +16023588340 GRAPHICS_VERSION(2001), FUNC(xe_rtp_match_not_sriov_vf) 14019789679 GRAPHICS_VERSION(1255) GRAPHICS_VERSION_RANGE(1270, 2004) no_media_l3 MEDIA_VERSION(3000) diff --git a/drivers/hid/hid-core.c b/drivers/hid/hid-core.c index b348d0464314..b31b8a2fd540 100644 --- a/drivers/hid/hid-core.c +++ b/drivers/hid/hid-core.c @@ -1883,9 +1883,12 @@ u8 *hid_alloc_report_buf(struct hid_report *report, gfp_t flags) /* * 7 extra bytes are necessary to achieve proper functionality * of implement() working on 8 byte chunks + * 1 extra byte for the report ID if it is null (not used) so + * we can reserve that extra byte in the first position of the buffer + * when sending it to .raw_request() */ - u32 len = hid_report_len(report) + 7; + u32 len = hid_report_len(report) + 7 + (report->id == 0); return kzalloc(len, flags); } @@ -1973,7 +1976,7 @@ static struct hid_report *hid_get_report(struct hid_report_enum *report_enum, int __hid_request(struct hid_device *hid, struct hid_report *report, enum hid_class_request reqtype) { - char *buf; + char *buf, *data_buf; int ret; u32 len; @@ -1981,13 +1984,19 @@ int __hid_request(struct hid_device *hid, struct hid_report *report, if (!buf) return -ENOMEM; + data_buf = buf; len = hid_report_len(report); + if (report->id == 0) { + /* reserve the first byte for the report ID */ + data_buf++; + len++; + } + if (reqtype == HID_REQ_SET_REPORT) - hid_output_report(report, buf); + hid_output_report(report, data_buf); - ret = hid->ll_driver->raw_request(hid, report->id, buf, len, - report->type, reqtype); + ret = hid_hw_raw_request(hid, report->id, buf, len, report->type, reqtype); if (ret < 0) { dbg_hid("unable to complete request: %d\n", ret); goto out; diff --git a/drivers/hid/hid-debug.c b/drivers/hid/hid-debug.c index c6b6b1029540..4424c0512bae 100644 --- a/drivers/hid/hid-debug.c +++ b/drivers/hid/hid-debug.c @@ -3299,7 +3299,7 @@ static const char *keys[KEY_MAX + 1] = { [BTN_STYLUS2] = "Stylus2", [BTN_TOOL_DOUBLETAP] = "ToolDoubleTap", [BTN_TOOL_TRIPLETAP] = "ToolTripleTap", [BTN_TOOL_QUADTAP] = "ToolQuadrupleTap", [BTN_GEAR_DOWN] = "BtnGearDown", [BTN_GEAR_UP] = "BtnGearUp", - [BTN_WHEEL] = "BtnWheel", [KEY_OK] = "Ok", + [KEY_OK] = "Ok", [KEY_SELECT] = "Select", [KEY_GOTO] = "Goto", [KEY_CLEAR] = "Clear", [KEY_POWER2] = "Power2", [KEY_OPTION] = "Option", [KEY_INFO] = "Info", diff --git a/drivers/hv/Kconfig b/drivers/hv/Kconfig index 1cd188b73b74..57623ca7f350 100644 --- a/drivers/hv/Kconfig +++ b/drivers/hv/Kconfig @@ -9,7 +9,7 @@ config HYPERV select PARAVIRT select X86_HV_CALLBACK_VECTOR if X86 select OF_EARLY_FLATTREE if OF - select SYSFB if !HYPERV_VTL_MODE + select SYSFB if EFI && !HYPERV_VTL_MODE help Select this option to run Linux as a Hyper-V client operating system. diff --git a/drivers/hv/channel.c b/drivers/hv/channel.c index 35f26fa1ffe7..7c7c66e0dc3f 100644 --- a/drivers/hv/channel.c +++ b/drivers/hv/channel.c @@ -18,6 +18,7 @@ #include <linux/uio.h> #include <linux/interrupt.h> #include <linux/set_memory.h> +#include <linux/export.h> #include <asm/page.h> #include <asm/mshyperv.h> diff --git a/drivers/hv/channel_mgmt.c b/drivers/hv/channel_mgmt.c index 6e084c207414..65dd299e2944 100644 --- a/drivers/hv/channel_mgmt.c +++ b/drivers/hv/channel_mgmt.c @@ -20,6 +20,7 @@ #include <linux/delay.h> #include <linux/cpu.h> #include <linux/hyperv.h> +#include <linux/export.h> #include <asm/mshyperv.h> #include <linux/sched/isolation.h> diff --git a/drivers/hv/connection.c b/drivers/hv/connection.c index be490c598785..1fe3573ae52a 100644 --- a/drivers/hv/connection.c +++ b/drivers/hv/connection.c @@ -519,7 +519,10 @@ void vmbus_set_event(struct vmbus_channel *channel) else WARN_ON_ONCE(1); } else { - hv_do_fast_hypercall8(HVCALL_SIGNAL_EVENT, channel->sig_event); + u64 control = HVCALL_SIGNAL_EVENT; + + control |= hv_nested ? HV_HYPERCALL_NESTED : 0; + hv_do_fast_hypercall8(control, channel->sig_event); } } EXPORT_SYMBOL_GPL(vmbus_set_event); diff --git a/drivers/hv/hv.c b/drivers/hv/hv.c index 308c8f279df8..b14c5f9e0ef2 100644 --- a/drivers/hv/hv.c +++ b/drivers/hv/hv.c @@ -85,8 +85,10 @@ int hv_post_message(union hv_connection_id connection_id, else status = HV_STATUS_INVALID_PARAMETER; } else { - status = hv_do_hypercall(HVCALL_POST_MESSAGE, - aligned_msg, NULL); + u64 control = HVCALL_POST_MESSAGE; + + control |= hv_nested ? HV_HYPERCALL_NESTED : 0; + status = hv_do_hypercall(control, aligned_msg, NULL); } local_irq_restore(flags); diff --git a/drivers/hv/hv_proc.c b/drivers/hv/hv_proc.c index 7d7ecb6f6137..fbb4eb3901bb 100644 --- a/drivers/hv/hv_proc.c +++ b/drivers/hv/hv_proc.c @@ -6,6 +6,7 @@ #include <linux/slab.h> #include <linux/cpuhotplug.h> #include <linux/minmax.h> +#include <linux/export.h> #include <asm/mshyperv.h> /* diff --git a/drivers/hv/mshv_common.c b/drivers/hv/mshv_common.c index 2575e6d7a71f..6f227a8a5af7 100644 --- a/drivers/hv/mshv_common.c +++ b/drivers/hv/mshv_common.c @@ -13,6 +13,7 @@ #include <linux/mm.h> #include <asm/mshyperv.h> #include <linux/resume_user_mode.h> +#include <linux/export.h> #include "mshv.h" diff --git a/drivers/hv/mshv_root_hv_call.c b/drivers/hv/mshv_root_hv_call.c index a222a16107f6..c9c274f29c3c 100644 --- a/drivers/hv/mshv_root_hv_call.c +++ b/drivers/hv/mshv_root_hv_call.c @@ -9,6 +9,7 @@ #include <linux/kernel.h> #include <linux/mm.h> +#include <linux/export.h> #include <asm/mshyperv.h> #include "mshv_root.h" diff --git a/drivers/hv/ring_buffer.c b/drivers/hv/ring_buffer.c index 3c9b02471760..23ce1fb70de1 100644 --- a/drivers/hv/ring_buffer.c +++ b/drivers/hv/ring_buffer.c @@ -18,6 +18,7 @@ #include <linux/slab.h> #include <linux/prefetch.h> #include <linux/io.h> +#include <linux/export.h> #include <asm/mshyperv.h> #include "hyperv_vmbus.h" diff --git a/drivers/hv/vmbus_drv.c b/drivers/hv/vmbus_drv.c index 33b524b4eb5e..2ed5a1e89d69 100644 --- a/drivers/hv/vmbus_drv.c +++ b/drivers/hv/vmbus_drv.c @@ -2509,7 +2509,7 @@ static int vmbus_acpi_add(struct platform_device *pdev) return 0; } #endif - +#ifndef HYPERVISOR_CALLBACK_VECTOR static int vmbus_set_irq(struct platform_device *pdev) { struct irq_data *data; @@ -2534,6 +2534,7 @@ static int vmbus_set_irq(struct platform_device *pdev) return 0; } +#endif static int vmbus_device_add(struct platform_device *pdev) { @@ -2549,11 +2550,11 @@ static int vmbus_device_add(struct platform_device *pdev) if (ret) return ret; - if (!__is_defined(HYPERVISOR_CALLBACK_VECTOR)) - ret = vmbus_set_irq(pdev); +#ifndef HYPERVISOR_CALLBACK_VECTOR + ret = vmbus_set_irq(pdev); if (ret) return ret; - +#endif for_each_of_range(&parser, &range) { struct resource *res; diff --git a/drivers/hwmon/corsair-cpro.c b/drivers/hwmon/corsair-cpro.c index e1a7f7aa7f80..b7b911f8359c 100644 --- a/drivers/hwmon/corsair-cpro.c +++ b/drivers/hwmon/corsair-cpro.c @@ -89,6 +89,7 @@ struct ccp_device { struct mutex mutex; /* whenever buffer is used, lock before send_usb_cmd */ u8 *cmd_buffer; u8 *buffer; + int buffer_recv_size; /* number of received bytes in buffer */ int target[6]; DECLARE_BITMAP(temp_cnct, NUM_TEMP_SENSORS); DECLARE_BITMAP(fan_cnct, NUM_FANS); @@ -146,6 +147,9 @@ static int send_usb_cmd(struct ccp_device *ccp, u8 command, u8 byte1, u8 byte2, if (!t) return -ETIMEDOUT; + if (ccp->buffer_recv_size != IN_BUFFER_SIZE) + return -EPROTO; + return ccp_get_errno(ccp); } @@ -157,6 +161,7 @@ static int ccp_raw_event(struct hid_device *hdev, struct hid_report *report, u8 spin_lock(&ccp->wait_input_report_lock); if (!completion_done(&ccp->wait_input_report)) { memcpy(ccp->buffer, data, min(IN_BUFFER_SIZE, size)); + ccp->buffer_recv_size = size; complete_all(&ccp->wait_input_report); } spin_unlock(&ccp->wait_input_report_lock); diff --git a/drivers/hwmon/ina238.c b/drivers/hwmon/ina238.c index a4a41742786b..9a5fd16a4ec2 100644 --- a/drivers/hwmon/ina238.c +++ b/drivers/hwmon/ina238.c @@ -97,7 +97,7 @@ * Power (mW) = 0.2 * register value * 20000 / rshunt / 4 * gain * (Specific for SQ52206) * Power (mW) = 0.24 * register value * 20000 / rshunt / 4 * gain - * Energy (mJ) = 16 * 0.24 * register value * 20000 / rshunt / 4 * gain + * Energy (uJ) = 16 * 0.24 * register value * 20000 / rshunt / 4 * gain * 1000 */ #define INA238_CALIBRATION_VALUE 16384 #define INA238_FIXED_SHUNT 20000 @@ -500,9 +500,9 @@ static ssize_t energy1_input_show(struct device *dev, if (ret) return ret; - /* result in mJ */ - energy = div_u64(regval * INA238_FIXED_SHUNT * data->gain * 16 * - data->config->power_calculate_factor, 4 * 100 * data->rshunt); + /* result in uJ */ + energy = div_u64(regval * INA238_FIXED_SHUNT * data->gain * 16 * 10 * + data->config->power_calculate_factor, 4 * data->rshunt); return sysfs_emit(buf, "%llu\n", energy); } diff --git a/drivers/hwmon/pmbus/ucd9000.c b/drivers/hwmon/pmbus/ucd9000.c index 2bc8cccb01fd..52d4000902d5 100644 --- a/drivers/hwmon/pmbus/ucd9000.c +++ b/drivers/hwmon/pmbus/ucd9000.c @@ -226,15 +226,15 @@ static int ucd9000_gpio_set(struct gpio_chip *gc, unsigned int offset, } if (value) { - if (ret & UCD9000_GPIO_CONFIG_STATUS) + if (ret & UCD9000_GPIO_CONFIG_OUT_VALUE) return 0; - ret |= UCD9000_GPIO_CONFIG_STATUS; + ret |= UCD9000_GPIO_CONFIG_OUT_VALUE; } else { - if (!(ret & UCD9000_GPIO_CONFIG_STATUS)) + if (!(ret & UCD9000_GPIO_CONFIG_OUT_VALUE)) return 0; - ret &= ~UCD9000_GPIO_CONFIG_STATUS; + ret &= ~UCD9000_GPIO_CONFIG_OUT_VALUE; } ret |= UCD9000_GPIO_CONFIG_ENABLE; diff --git a/drivers/i2c/busses/i2c-omap.c b/drivers/i2c/busses/i2c-omap.c index 8b01df3cc8e9..5fcc9f6c33e5 100644 --- a/drivers/i2c/busses/i2c-omap.c +++ b/drivers/i2c/busses/i2c-omap.c @@ -1472,7 +1472,9 @@ omap_i2c_probe(struct platform_device *pdev) } /* reset ASAP, clearing any IRQs */ - omap_i2c_init(omap); + r = omap_i2c_init(omap); + if (r) + goto err_mux_state_deselect; if (omap->rev < OMAP_I2C_OMAP1_REV_2) r = devm_request_irq(&pdev->dev, omap->irq, omap_i2c_omap1_isr, @@ -1515,12 +1517,13 @@ omap_i2c_probe(struct platform_device *pdev) err_unuse_clocks: omap_i2c_write_reg(omap, OMAP_I2C_CON_REG, 0); +err_mux_state_deselect: if (omap->mux_state) mux_state_deselect(omap->mux_state); err_put_pm: - pm_runtime_dont_use_autosuspend(omap->dev); pm_runtime_put_sync(omap->dev); err_disable_pm: + pm_runtime_dont_use_autosuspend(omap->dev); pm_runtime_disable(&pdev->dev); return r; diff --git a/drivers/i2c/busses/i2c-qup.c b/drivers/i2c/busses/i2c-qup.c index 6059f585843e..fc348924d522 100644 --- a/drivers/i2c/busses/i2c-qup.c +++ b/drivers/i2c/busses/i2c-qup.c @@ -452,8 +452,10 @@ static int qup_i2c_bus_active(struct qup_i2c_dev *qup, int len) if (!(status & I2C_STATUS_BUS_ACTIVE)) break; - if (time_after(jiffies, timeout)) + if (time_after(jiffies, timeout)) { ret = -ETIMEDOUT; + break; + } usleep_range(len, len * 2); } diff --git a/drivers/i2c/busses/i2c-stm32.c b/drivers/i2c/busses/i2c-stm32.c index 157c64e27d0b..f84ec056e36d 100644 --- a/drivers/i2c/busses/i2c-stm32.c +++ b/drivers/i2c/busses/i2c-stm32.c @@ -102,7 +102,6 @@ int stm32_i2c_prep_dma_xfer(struct device *dev, struct stm32_i2c_dma *dma, void *dma_async_param) { struct dma_async_tx_descriptor *txdesc; - struct device *chan_dev; int ret; if (rd_wr) { @@ -116,11 +115,10 @@ int stm32_i2c_prep_dma_xfer(struct device *dev, struct stm32_i2c_dma *dma, } dma->dma_len = len; - chan_dev = dma->chan_using->device->dev; - dma->dma_buf = dma_map_single(chan_dev, buf, dma->dma_len, + dma->dma_buf = dma_map_single(dev, buf, dma->dma_len, dma->dma_data_dir); - if (dma_mapping_error(chan_dev, dma->dma_buf)) { + if (dma_mapping_error(dev, dma->dma_buf)) { dev_err(dev, "DMA mapping failed\n"); return -EINVAL; } @@ -150,7 +148,7 @@ int stm32_i2c_prep_dma_xfer(struct device *dev, struct stm32_i2c_dma *dma, return 0; err: - dma_unmap_single(chan_dev, dma->dma_buf, dma->dma_len, + dma_unmap_single(dev, dma->dma_buf, dma->dma_len, dma->dma_data_dir); return ret; } diff --git a/drivers/i2c/busses/i2c-stm32f7.c b/drivers/i2c/busses/i2c-stm32f7.c index e4aaeb2262d0..73a7b8894c0d 100644 --- a/drivers/i2c/busses/i2c-stm32f7.c +++ b/drivers/i2c/busses/i2c-stm32f7.c @@ -739,12 +739,13 @@ static void stm32f7_i2c_disable_dma_req(struct stm32f7_i2c_dev *i2c_dev) static void stm32f7_i2c_dma_callback(void *arg) { - struct stm32f7_i2c_dev *i2c_dev = (struct stm32f7_i2c_dev *)arg; + struct stm32f7_i2c_dev *i2c_dev = arg; struct stm32_i2c_dma *dma = i2c_dev->dma; - struct device *dev = dma->chan_using->device->dev; stm32f7_i2c_disable_dma_req(i2c_dev); - dma_unmap_single(dev, dma->dma_buf, dma->dma_len, dma->dma_data_dir); + dmaengine_terminate_async(dma->chan_using); + dma_unmap_single(i2c_dev->dev, dma->dma_buf, dma->dma_len, + dma->dma_data_dir); complete(&dma->dma_complete); } @@ -1510,7 +1511,6 @@ static irqreturn_t stm32f7_i2c_handle_isr_errs(struct stm32f7_i2c_dev *i2c_dev, u16 addr = f7_msg->addr; void __iomem *base = i2c_dev->base; struct device *dev = i2c_dev->dev; - struct stm32_i2c_dma *dma = i2c_dev->dma; /* Bus error */ if (status & STM32F7_I2C_ISR_BERR) { @@ -1551,10 +1551,8 @@ static irqreturn_t stm32f7_i2c_handle_isr_errs(struct stm32f7_i2c_dev *i2c_dev, } /* Disable dma */ - if (i2c_dev->use_dma) { - stm32f7_i2c_disable_dma_req(i2c_dev); - dmaengine_terminate_async(dma->chan_using); - } + if (i2c_dev->use_dma) + stm32f7_i2c_dma_callback(i2c_dev); i2c_dev->master_mode = false; complete(&i2c_dev->complete); @@ -1600,7 +1598,6 @@ static irqreturn_t stm32f7_i2c_isr_event_thread(int irq, void *data) { struct stm32f7_i2c_dev *i2c_dev = data; struct stm32f7_i2c_msg *f7_msg = &i2c_dev->f7_msg; - struct stm32_i2c_dma *dma = i2c_dev->dma; void __iomem *base = i2c_dev->base; u32 status, mask; int ret; @@ -1619,10 +1616,8 @@ static irqreturn_t stm32f7_i2c_isr_event_thread(int irq, void *data) dev_dbg(i2c_dev->dev, "<%s>: Receive NACK (addr %x)\n", __func__, f7_msg->addr); writel_relaxed(STM32F7_I2C_ICR_NACKCF, base + STM32F7_I2C_ICR); - if (i2c_dev->use_dma) { - stm32f7_i2c_disable_dma_req(i2c_dev); - dmaengine_terminate_async(dma->chan_using); - } + if (i2c_dev->use_dma) + stm32f7_i2c_dma_callback(i2c_dev); f7_msg->result = -ENXIO; } @@ -1640,8 +1635,7 @@ static irqreturn_t stm32f7_i2c_isr_event_thread(int irq, void *data) ret = wait_for_completion_timeout(&i2c_dev->dma->dma_complete, HZ); if (!ret) { dev_dbg(i2c_dev->dev, "<%s>: Timed out\n", __func__); - stm32f7_i2c_disable_dma_req(i2c_dev); - dmaengine_terminate_async(dma->chan_using); + stm32f7_i2c_dma_callback(i2c_dev); f7_msg->result = -ETIMEDOUT; } } diff --git a/drivers/i2c/busses/i2c-tegra.c b/drivers/i2c/busses/i2c-tegra.c index 0862b98007f5..6316360475e7 100644 --- a/drivers/i2c/busses/i2c-tegra.c +++ b/drivers/i2c/busses/i2c-tegra.c @@ -607,7 +607,6 @@ static int tegra_i2c_wait_for_config_load(struct tegra_i2c_dev *i2c_dev) static int tegra_i2c_init(struct tegra_i2c_dev *i2c_dev) { u32 val, clk_divisor, clk_multiplier, tsu_thd, tlow, thigh, non_hs_mode; - acpi_handle handle = ACPI_HANDLE(i2c_dev->dev); struct i2c_timings *t = &i2c_dev->timings; int err; @@ -619,11 +618,7 @@ static int tegra_i2c_init(struct tegra_i2c_dev *i2c_dev) * emit a noisy warning on error, which won't stay unnoticed and * won't hose machine entirely. */ - if (handle) - err = acpi_evaluate_object(handle, "_RST", NULL, NULL); - else - err = reset_control_reset(i2c_dev->rst); - + err = device_reset(i2c_dev->dev); WARN_ON_ONCE(err); if (IS_DVC(i2c_dev)) @@ -1666,19 +1661,6 @@ static void tegra_i2c_parse_dt(struct tegra_i2c_dev *i2c_dev) i2c_dev->is_vi = true; } -static int tegra_i2c_init_reset(struct tegra_i2c_dev *i2c_dev) -{ - if (ACPI_HANDLE(i2c_dev->dev)) - return 0; - - i2c_dev->rst = devm_reset_control_get_exclusive(i2c_dev->dev, "i2c"); - if (IS_ERR(i2c_dev->rst)) - return dev_err_probe(i2c_dev->dev, PTR_ERR(i2c_dev->rst), - "failed to get reset control\n"); - - return 0; -} - static int tegra_i2c_init_clocks(struct tegra_i2c_dev *i2c_dev) { int err; @@ -1788,10 +1770,6 @@ static int tegra_i2c_probe(struct platform_device *pdev) tegra_i2c_parse_dt(i2c_dev); - err = tegra_i2c_init_reset(i2c_dev); - if (err) - return err; - err = tegra_i2c_init_clocks(i2c_dev); if (err) return err; diff --git a/drivers/i2c/busses/i2c-virtio.c b/drivers/i2c/busses/i2c-virtio.c index 9b05ff53d3d7..af1381949f50 100644 --- a/drivers/i2c/busses/i2c-virtio.c +++ b/drivers/i2c/busses/i2c-virtio.c @@ -116,15 +116,16 @@ static int virtio_i2c_complete_reqs(struct virtqueue *vq, for (i = 0; i < num; i++) { struct virtio_i2c_req *req = &reqs[i]; - wait_for_completion(&req->completion); - - if (!failed && req->in_hdr.status != VIRTIO_I2C_MSG_OK) - failed = true; + if (!failed) { + if (wait_for_completion_interruptible(&req->completion)) + failed = true; + else if (req->in_hdr.status != VIRTIO_I2C_MSG_OK) + failed = true; + else + j++; + } i2c_put_dma_safe_msg_buf(reqs[i].buf, &msgs[i], !failed); - - if (!failed) - j++; } return j; diff --git a/drivers/iio/accel/fxls8962af-core.c b/drivers/iio/accel/fxls8962af-core.c index 12598feaa693..b10a30960e1e 100644 --- a/drivers/iio/accel/fxls8962af-core.c +++ b/drivers/iio/accel/fxls8962af-core.c @@ -877,6 +877,8 @@ static int fxls8962af_buffer_predisable(struct iio_dev *indio_dev) if (ret) return ret; + synchronize_irq(data->irq); + ret = __fxls8962af_fifo_set_mode(data, false); if (data->enable_event) diff --git a/drivers/iio/accel/st_accel_core.c b/drivers/iio/accel/st_accel_core.c index 99cb661fabb2..a7961c610ed2 100644 --- a/drivers/iio/accel/st_accel_core.c +++ b/drivers/iio/accel/st_accel_core.c @@ -1353,6 +1353,7 @@ static int apply_acpi_orientation(struct iio_dev *indio_dev) union acpi_object *ont; union acpi_object *elements; acpi_status status; + struct device *parent = indio_dev->dev.parent; int ret = -EINVAL; unsigned int val; int i, j; @@ -1371,7 +1372,7 @@ static int apply_acpi_orientation(struct iio_dev *indio_dev) }; - adev = ACPI_COMPANION(indio_dev->dev.parent); + adev = ACPI_COMPANION(parent); if (!adev) return -ENXIO; @@ -1380,8 +1381,7 @@ static int apply_acpi_orientation(struct iio_dev *indio_dev) if (status == AE_NOT_FOUND) { return -ENXIO; } else if (ACPI_FAILURE(status)) { - dev_warn(&indio_dev->dev, "failed to execute _ONT: %d\n", - status); + dev_warn(parent, "failed to execute _ONT: %d\n", status); return status; } @@ -1457,12 +1457,12 @@ static int apply_acpi_orientation(struct iio_dev *indio_dev) } ret = 0; - dev_info(&indio_dev->dev, "computed mount matrix from ACPI\n"); + dev_info(parent, "computed mount matrix from ACPI\n"); out: kfree(buffer.pointer); if (ret) - dev_dbg(&indio_dev->dev, + dev_dbg(parent, "failed to apply ACPI orientation data: %d\n", ret); return ret; diff --git a/drivers/iio/adc/ad7380.c b/drivers/iio/adc/ad7380.c index d96bd12dfea6..cabf5511d116 100644 --- a/drivers/iio/adc/ad7380.c +++ b/drivers/iio/adc/ad7380.c @@ -1953,8 +1953,9 @@ static int ad7380_probe(struct spi_device *spi) if (st->chip_info->has_hardware_gain) { device_for_each_child_node_scoped(dev, node) { - unsigned int channel, gain; + unsigned int channel; int gain_idx; + u16 gain; ret = fwnode_property_read_u32(node, "reg", &channel); if (ret) @@ -1966,7 +1967,7 @@ static int ad7380_probe(struct spi_device *spi) "Invalid channel number %i\n", channel); - ret = fwnode_property_read_u32(node, "adi,gain-milli", + ret = fwnode_property_read_u16(node, "adi,gain-milli", &gain); if (ret && ret != -EINVAL) return dev_err_probe(dev, ret, diff --git a/drivers/iio/adc/ad7949.c b/drivers/iio/adc/ad7949.c index edd0c3a35ab7..202561cad401 100644 --- a/drivers/iio/adc/ad7949.c +++ b/drivers/iio/adc/ad7949.c @@ -308,7 +308,6 @@ static void ad7949_disable_reg(void *reg) static int ad7949_spi_probe(struct spi_device *spi) { - u32 spi_ctrl_mask = spi->controller->bits_per_word_mask; struct device *dev = &spi->dev; const struct ad7949_adc_spec *spec; struct ad7949_adc_chip *ad7949_adc; @@ -337,11 +336,11 @@ static int ad7949_spi_probe(struct spi_device *spi) ad7949_adc->resolution = spec->resolution; /* Set SPI bits per word */ - if (spi_ctrl_mask & SPI_BPW_MASK(ad7949_adc->resolution)) { + if (spi_is_bpw_supported(spi, ad7949_adc->resolution)) { spi->bits_per_word = ad7949_adc->resolution; - } else if (spi_ctrl_mask == SPI_BPW_MASK(16)) { + } else if (spi_is_bpw_supported(spi, 16)) { spi->bits_per_word = 16; - } else if (spi_ctrl_mask == SPI_BPW_MASK(8)) { + } else if (spi_is_bpw_supported(spi, 8)) { spi->bits_per_word = 8; } else { dev_err(dev, "unable to find common BPW with spi controller\n"); diff --git a/drivers/iio/adc/adi-axi-adc.c b/drivers/iio/adc/adi-axi-adc.c index 4116c44197b8..2dbaa0b5b3d6 100644 --- a/drivers/iio/adc/adi-axi-adc.c +++ b/drivers/iio/adc/adi-axi-adc.c @@ -445,7 +445,7 @@ static int axi_adc_raw_read(struct iio_backend *back, u32 *val) static int ad7606_bus_reg_read(struct iio_backend *back, u32 reg, u32 *val) { struct adi_axi_adc_state *st = iio_backend_get_priv(back); - int addr; + u32 addr, reg_val; guard(mutex)(&st->lock); @@ -455,7 +455,9 @@ static int ad7606_bus_reg_read(struct iio_backend *back, u32 reg, u32 *val) */ addr = FIELD_PREP(ADI_AXI_REG_ADDRESS_MASK, reg) | ADI_AXI_REG_READ_BIT; axi_adc_raw_write(back, addr); - axi_adc_raw_read(back, val); + axi_adc_raw_read(back, ®_val); + + *val = FIELD_GET(ADI_AXI_REG_VALUE_MASK, reg_val); /* Write 0x0 on the bus to get back to ADC mode */ axi_adc_raw_write(back, 0); diff --git a/drivers/iio/adc/axp20x_adc.c b/drivers/iio/adc/axp20x_adc.c index 71584ffd3632..1b49325ec1ce 100644 --- a/drivers/iio/adc/axp20x_adc.c +++ b/drivers/iio/adc/axp20x_adc.c @@ -187,6 +187,7 @@ static struct iio_map axp717_maps[] = { .consumer_channel = "batt_chrg_i", .adc_channel_label = "batt_chrg_i", }, + { } }; /* diff --git a/drivers/iio/adc/max1363.c b/drivers/iio/adc/max1363.c index a7e9912fb44a..9dd547e62b6c 100644 --- a/drivers/iio/adc/max1363.c +++ b/drivers/iio/adc/max1363.c @@ -511,10 +511,10 @@ static const struct iio_event_spec max1363_events[] = { MAX1363_CHAN_U(1, _s1, 1, bits, ev_spec, num_ev_spec), \ MAX1363_CHAN_U(2, _s2, 2, bits, ev_spec, num_ev_spec), \ MAX1363_CHAN_U(3, _s3, 3, bits, ev_spec, num_ev_spec), \ - MAX1363_CHAN_B(0, 1, d0m1, 4, bits, ev_spec, num_ev_spec), \ - MAX1363_CHAN_B(2, 3, d2m3, 5, bits, ev_spec, num_ev_spec), \ - MAX1363_CHAN_B(1, 0, d1m0, 6, bits, ev_spec, num_ev_spec), \ - MAX1363_CHAN_B(3, 2, d3m2, 7, bits, ev_spec, num_ev_spec), \ + MAX1363_CHAN_B(0, 1, d0m1, 12, bits, ev_spec, num_ev_spec), \ + MAX1363_CHAN_B(2, 3, d2m3, 13, bits, ev_spec, num_ev_spec), \ + MAX1363_CHAN_B(1, 0, d1m0, 18, bits, ev_spec, num_ev_spec), \ + MAX1363_CHAN_B(3, 2, d3m2, 19, bits, ev_spec, num_ev_spec), \ IIO_CHAN_SOFT_TIMESTAMP(8) \ } @@ -532,23 +532,23 @@ static const struct iio_chan_spec max1363_channels[] = /* Applies to max1236, max1237 */ static const enum max1363_modes max1236_mode_list[] = { _s0, _s1, _s2, _s3, - s0to1, s0to2, s0to3, + s0to1, s0to2, s2to3, s0to3, d0m1, d2m3, d1m0, d3m2, d0m1to2m3, d1m0to3m2, - s2to3, }; /* Applies to max1238, max1239 */ static const enum max1363_modes max1238_mode_list[] = { _s0, _s1, _s2, _s3, _s4, _s5, _s6, _s7, _s8, _s9, _s10, _s11, s0to1, s0to2, s0to3, s0to4, s0to5, s0to6, + s6to7, s6to8, s6to9, s6to10, s6to11, s0to7, s0to8, s0to9, s0to10, s0to11, d0m1, d2m3, d4m5, d6m7, d8m9, d10m11, d1m0, d3m2, d5m4, d7m6, d9m8, d11m10, - d0m1to2m3, d0m1to4m5, d0m1to6m7, d0m1to8m9, d0m1to10m11, - d1m0to3m2, d1m0to5m4, d1m0to7m6, d1m0to9m8, d1m0to11m10, - s6to7, s6to8, s6to9, s6to10, s6to11, - d6m7to8m9, d6m7to10m11, d7m6to9m8, d7m6to11m10, + d0m1to2m3, d0m1to4m5, d0m1to6m7, d6m7to8m9, + d0m1to8m9, d6m7to10m11, d0m1to10m11, d1m0to3m2, + d1m0to5m4, d1m0to7m6, d7m6to9m8, d1m0to9m8, + d7m6to11m10, d1m0to11m10, }; #define MAX1363_12X_CHANS(bits) { \ @@ -584,16 +584,15 @@ static const struct iio_chan_spec max1238_channels[] = MAX1363_12X_CHANS(12); static const enum max1363_modes max11607_mode_list[] = { _s0, _s1, _s2, _s3, - s0to1, s0to2, s0to3, - s2to3, + s0to1, s0to2, s2to3, + s0to3, d0m1, d2m3, d1m0, d3m2, d0m1to2m3, d1m0to3m2, }; static const enum max1363_modes max11608_mode_list[] = { _s0, _s1, _s2, _s3, _s4, _s5, _s6, _s7, - s0to1, s0to2, s0to3, s0to4, s0to5, s0to6, s0to7, - s6to7, + s0to1, s0to2, s0to3, s0to4, s0to5, s0to6, s6to7, s0to7, d0m1, d2m3, d4m5, d6m7, d1m0, d3m2, d5m4, d7m6, d0m1to2m3, d0m1to4m5, d0m1to6m7, @@ -609,14 +608,14 @@ static const enum max1363_modes max11608_mode_list[] = { MAX1363_CHAN_U(5, _s5, 5, bits, NULL, 0), \ MAX1363_CHAN_U(6, _s6, 6, bits, NULL, 0), \ MAX1363_CHAN_U(7, _s7, 7, bits, NULL, 0), \ - MAX1363_CHAN_B(0, 1, d0m1, 8, bits, NULL, 0), \ - MAX1363_CHAN_B(2, 3, d2m3, 9, bits, NULL, 0), \ - MAX1363_CHAN_B(4, 5, d4m5, 10, bits, NULL, 0), \ - MAX1363_CHAN_B(6, 7, d6m7, 11, bits, NULL, 0), \ - MAX1363_CHAN_B(1, 0, d1m0, 12, bits, NULL, 0), \ - MAX1363_CHAN_B(3, 2, d3m2, 13, bits, NULL, 0), \ - MAX1363_CHAN_B(5, 4, d5m4, 14, bits, NULL, 0), \ - MAX1363_CHAN_B(7, 6, d7m6, 15, bits, NULL, 0), \ + MAX1363_CHAN_B(0, 1, d0m1, 12, bits, NULL, 0), \ + MAX1363_CHAN_B(2, 3, d2m3, 13, bits, NULL, 0), \ + MAX1363_CHAN_B(4, 5, d4m5, 14, bits, NULL, 0), \ + MAX1363_CHAN_B(6, 7, d6m7, 15, bits, NULL, 0), \ + MAX1363_CHAN_B(1, 0, d1m0, 18, bits, NULL, 0), \ + MAX1363_CHAN_B(3, 2, d3m2, 19, bits, NULL, 0), \ + MAX1363_CHAN_B(5, 4, d5m4, 20, bits, NULL, 0), \ + MAX1363_CHAN_B(7, 6, d7m6, 21, bits, NULL, 0), \ IIO_CHAN_SOFT_TIMESTAMP(16) \ } static const struct iio_chan_spec max11602_channels[] = MAX1363_8X_CHANS(8); diff --git a/drivers/iio/adc/stm32-adc-core.c b/drivers/iio/adc/stm32-adc-core.c index bd3458965bff..21c04a98b3b6 100644 --- a/drivers/iio/adc/stm32-adc-core.c +++ b/drivers/iio/adc/stm32-adc-core.c @@ -430,10 +430,9 @@ static int stm32_adc_irq_probe(struct platform_device *pdev, return -ENOMEM; } - for (i = 0; i < priv->cfg->num_irqs; i++) { - irq_set_chained_handler(priv->irq[i], stm32_adc_irq_handler); - irq_set_handler_data(priv->irq[i], priv); - } + for (i = 0; i < priv->cfg->num_irqs; i++) + irq_set_chained_handler_and_data(priv->irq[i], + stm32_adc_irq_handler, priv); return 0; } diff --git a/drivers/iio/common/st_sensors/st_sensors_core.c b/drivers/iio/common/st_sensors/st_sensors_core.c index 8ce1dccfea4f..dac593be5695 100644 --- a/drivers/iio/common/st_sensors/st_sensors_core.c +++ b/drivers/iio/common/st_sensors/st_sensors_core.c @@ -154,7 +154,7 @@ static int st_sensors_set_fullscale(struct iio_dev *indio_dev, unsigned int fs) return err; st_accel_set_fullscale_error: - dev_err(&indio_dev->dev, "failed to set new fullscale.\n"); + dev_err(indio_dev->dev.parent, "failed to set new fullscale.\n"); return err; } @@ -231,8 +231,7 @@ int st_sensors_power_enable(struct iio_dev *indio_dev) ARRAY_SIZE(regulator_names), regulator_names); if (err) - return dev_err_probe(&indio_dev->dev, err, - "unable to enable supplies\n"); + return dev_err_probe(parent, err, "unable to enable supplies\n"); return 0; } @@ -241,13 +240,14 @@ EXPORT_SYMBOL_NS(st_sensors_power_enable, "IIO_ST_SENSORS"); static int st_sensors_set_drdy_int_pin(struct iio_dev *indio_dev, struct st_sensors_platform_data *pdata) { + struct device *parent = indio_dev->dev.parent; struct st_sensor_data *sdata = iio_priv(indio_dev); /* Sensor does not support interrupts */ if (!sdata->sensor_settings->drdy_irq.int1.addr && !sdata->sensor_settings->drdy_irq.int2.addr) { if (pdata->drdy_int_pin) - dev_info(&indio_dev->dev, + dev_info(parent, "DRDY on pin INT%d specified, but sensor does not support interrupts\n", pdata->drdy_int_pin); return 0; @@ -256,29 +256,27 @@ static int st_sensors_set_drdy_int_pin(struct iio_dev *indio_dev, switch (pdata->drdy_int_pin) { case 1: if (!sdata->sensor_settings->drdy_irq.int1.mask) { - dev_err(&indio_dev->dev, - "DRDY on INT1 not available.\n"); + dev_err(parent, "DRDY on INT1 not available.\n"); return -EINVAL; } sdata->drdy_int_pin = 1; break; case 2: if (!sdata->sensor_settings->drdy_irq.int2.mask) { - dev_err(&indio_dev->dev, - "DRDY on INT2 not available.\n"); + dev_err(parent, "DRDY on INT2 not available.\n"); return -EINVAL; } sdata->drdy_int_pin = 2; break; default: - dev_err(&indio_dev->dev, "DRDY on pdata not valid.\n"); + dev_err(parent, "DRDY on pdata not valid.\n"); return -EINVAL; } if (pdata->open_drain) { if (!sdata->sensor_settings->drdy_irq.int1.addr_od && !sdata->sensor_settings->drdy_irq.int2.addr_od) - dev_err(&indio_dev->dev, + dev_err(parent, "open drain requested but unsupported.\n"); else sdata->int_pin_open_drain = true; @@ -336,6 +334,7 @@ EXPORT_SYMBOL_NS(st_sensors_dev_name_probe, "IIO_ST_SENSORS"); int st_sensors_init_sensor(struct iio_dev *indio_dev, struct st_sensors_platform_data *pdata) { + struct device *parent = indio_dev->dev.parent; struct st_sensor_data *sdata = iio_priv(indio_dev); struct st_sensors_platform_data *of_pdata; int err = 0; @@ -343,7 +342,7 @@ int st_sensors_init_sensor(struct iio_dev *indio_dev, mutex_init(&sdata->odr_lock); /* If OF/DT pdata exists, it will take precedence of anything else */ - of_pdata = st_sensors_dev_probe(indio_dev->dev.parent, pdata); + of_pdata = st_sensors_dev_probe(parent, pdata); if (IS_ERR(of_pdata)) return PTR_ERR(of_pdata); if (of_pdata) @@ -370,7 +369,7 @@ int st_sensors_init_sensor(struct iio_dev *indio_dev, if (err < 0) return err; } else - dev_info(&indio_dev->dev, "Full-scale not possible\n"); + dev_info(parent, "Full-scale not possible\n"); err = st_sensors_set_odr(indio_dev, sdata->odr); if (err < 0) @@ -405,7 +404,7 @@ int st_sensors_init_sensor(struct iio_dev *indio_dev, mask = sdata->sensor_settings->drdy_irq.int2.mask_od; } - dev_info(&indio_dev->dev, + dev_info(parent, "set interrupt line to open drain mode on pin %d\n", sdata->drdy_int_pin); err = st_sensors_write_data_with_mask(indio_dev, addr, @@ -593,21 +592,20 @@ EXPORT_SYMBOL_NS(st_sensors_get_settings_index, "IIO_ST_SENSORS"); int st_sensors_verify_id(struct iio_dev *indio_dev) { struct st_sensor_data *sdata = iio_priv(indio_dev); + struct device *parent = indio_dev->dev.parent; int wai, err; if (sdata->sensor_settings->wai_addr) { err = regmap_read(sdata->regmap, sdata->sensor_settings->wai_addr, &wai); if (err < 0) { - dev_err(&indio_dev->dev, - "failed to read Who-Am-I register.\n"); - return err; + return dev_err_probe(parent, err, + "failed to read Who-Am-I register.\n"); } if (sdata->sensor_settings->wai != wai) { - dev_warn(&indio_dev->dev, - "%s: WhoAmI mismatch (0x%x).\n", - indio_dev->name, wai); + dev_warn(parent, "%s: WhoAmI mismatch (0x%x).\n", + indio_dev->name, wai); } } diff --git a/drivers/iio/common/st_sensors/st_sensors_trigger.c b/drivers/iio/common/st_sensors/st_sensors_trigger.c index 9d4bf822a15d..8a8ab688d798 100644 --- a/drivers/iio/common/st_sensors/st_sensors_trigger.c +++ b/drivers/iio/common/st_sensors/st_sensors_trigger.c @@ -127,7 +127,7 @@ int st_sensors_allocate_trigger(struct iio_dev *indio_dev, sdata->trig = devm_iio_trigger_alloc(parent, "%s-trigger", indio_dev->name); if (sdata->trig == NULL) { - dev_err(&indio_dev->dev, "failed to allocate iio trigger.\n"); + dev_err(parent, "failed to allocate iio trigger.\n"); return -ENOMEM; } @@ -143,7 +143,7 @@ int st_sensors_allocate_trigger(struct iio_dev *indio_dev, case IRQF_TRIGGER_FALLING: case IRQF_TRIGGER_LOW: if (!sdata->sensor_settings->drdy_irq.addr_ihl) { - dev_err(&indio_dev->dev, + dev_err(parent, "falling/low specified for IRQ but hardware supports only rising/high: will request rising/high\n"); if (irq_trig == IRQF_TRIGGER_FALLING) irq_trig = IRQF_TRIGGER_RISING; @@ -156,21 +156,19 @@ int st_sensors_allocate_trigger(struct iio_dev *indio_dev, sdata->sensor_settings->drdy_irq.mask_ihl, 1); if (err < 0) return err; - dev_info(&indio_dev->dev, + dev_info(parent, "interrupts on the falling edge or active low level\n"); } break; case IRQF_TRIGGER_RISING: - dev_info(&indio_dev->dev, - "interrupts on the rising edge\n"); + dev_info(parent, "interrupts on the rising edge\n"); break; case IRQF_TRIGGER_HIGH: - dev_info(&indio_dev->dev, - "interrupts active high level\n"); + dev_info(parent, "interrupts active high level\n"); break; default: /* This is the most preferred mode, if possible */ - dev_err(&indio_dev->dev, + dev_err(parent, "unsupported IRQ trigger specified (%lx), enforce rising edge\n", irq_trig); irq_trig = IRQF_TRIGGER_RISING; } @@ -179,7 +177,7 @@ int st_sensors_allocate_trigger(struct iio_dev *indio_dev, if (irq_trig == IRQF_TRIGGER_FALLING || irq_trig == IRQF_TRIGGER_RISING) { if (!sdata->sensor_settings->drdy_irq.stat_drdy.addr) { - dev_err(&indio_dev->dev, + dev_err(parent, "edge IRQ not supported w/o stat register.\n"); return -EOPNOTSUPP; } @@ -214,13 +212,13 @@ int st_sensors_allocate_trigger(struct iio_dev *indio_dev, sdata->trig->name, sdata->trig); if (err) { - dev_err(&indio_dev->dev, "failed to request trigger IRQ.\n"); + dev_err(parent, "failed to request trigger IRQ.\n"); return err; } err = devm_iio_trigger_register(parent, sdata->trig); if (err < 0) { - dev_err(&indio_dev->dev, "failed to register iio trigger.\n"); + dev_err(parent, "failed to register iio trigger.\n"); return err; } indio_dev->trig = iio_trigger_get(sdata->trig); diff --git a/drivers/iio/dac/ad3530r.c b/drivers/iio/dac/ad3530r.c index f9752a571aa5..6134613777b8 100644 --- a/drivers/iio/dac/ad3530r.c +++ b/drivers/iio/dac/ad3530r.c @@ -166,7 +166,9 @@ static ssize_t ad3530r_set_dac_powerdown(struct iio_dev *indio_dev, AD3530R_OUTPUT_OPERATING_MODE_0 : AD3530R_OUTPUT_OPERATING_MODE_1; pdmode = powerdown ? st->chan[chan->channel].powerdown_mode : 0; - mask = AD3530R_OP_MODE_CHAN_MSK(chan->channel); + mask = chan->channel < AD3531R_MAX_CHANNELS ? + AD3530R_OP_MODE_CHAN_MSK(chan->channel) : + AD3530R_OP_MODE_CHAN_MSK(chan->channel - 4); val = field_prep(mask, pdmode); ret = regmap_update_bits(st->regmap, reg, mask, val); diff --git a/drivers/iio/industrialio-backend.c b/drivers/iio/industrialio-backend.c index c1eb9ef9db08..266e1b29bf91 100644 --- a/drivers/iio/industrialio-backend.c +++ b/drivers/iio/industrialio-backend.c @@ -155,11 +155,14 @@ static ssize_t iio_backend_debugfs_write_reg(struct file *file, ssize_t rc; int ret; + if (count >= sizeof(buf)) + return -ENOSPC; + rc = simple_write_to_buffer(buf, sizeof(buf) - 1, ppos, userbuf, count); if (rc < 0) return rc; - buf[count] = '\0'; + buf[rc] = '\0'; ret = sscanf(buf, "%i %i", &back->cached_reg_addr, &val); diff --git a/drivers/iio/industrialio-core.c b/drivers/iio/industrialio-core.c index 178e99b111de..5ffda104d4b2 100644 --- a/drivers/iio/industrialio-core.c +++ b/drivers/iio/industrialio-core.c @@ -411,12 +411,15 @@ static ssize_t iio_debugfs_write_reg(struct file *file, char buf[80]; int ret; + if (count >= sizeof(buf)) + return -EINVAL; + ret = simple_write_to_buffer(buf, sizeof(buf) - 1, ppos, userbuf, count); if (ret < 0) return ret; - buf[count] = '\0'; + buf[ret] = '\0'; ret = sscanf(buf, "%i %i", ®, &val); diff --git a/drivers/input/joystick/xpad.c b/drivers/input/joystick/xpad.c index 5d9b7007a730..1d8c579b5433 100644 --- a/drivers/input/joystick/xpad.c +++ b/drivers/input/joystick/xpad.c @@ -172,12 +172,12 @@ static const struct xpad_device { { 0x046d, 0xca88, "Logitech Compact Controller for Xbox", 0, XTYPE_XBOX }, { 0x046d, 0xca8a, "Logitech Precision Vibration Feedback Wheel", 0, XTYPE_XBOX }, { 0x046d, 0xcaa3, "Logitech DriveFx Racing Wheel", 0, XTYPE_XBOX360 }, + { 0x0502, 0x1305, "Acer NGR200", 0, XTYPE_XBOX360 }, { 0x056e, 0x2004, "Elecom JC-U3613M", 0, XTYPE_XBOX360 }, { 0x05fd, 0x1007, "Mad Catz Controller (unverified)", 0, XTYPE_XBOX }, { 0x05fd, 0x107a, "InterAct 'PowerPad Pro' X-Box pad (Germany)", 0, XTYPE_XBOX }, { 0x05fe, 0x3030, "Chic Controller", 0, XTYPE_XBOX }, { 0x05fe, 0x3031, "Chic Controller", 0, XTYPE_XBOX }, - { 0x0502, 0x1305, "Acer NGR200", 0, XTYPE_XBOX }, { 0x062a, 0x0020, "Logic3 Xbox GamePad", 0, XTYPE_XBOX }, { 0x062a, 0x0033, "Competition Pro Steering Wheel", 0, XTYPE_XBOX }, { 0x06a3, 0x0200, "Saitek Racing Wheel", 0, XTYPE_XBOX }, diff --git a/drivers/interconnect/core.c b/drivers/interconnect/core.c index 1a41e59c77f8..3ebf37ddfc18 100644 --- a/drivers/interconnect/core.c +++ b/drivers/interconnect/core.c @@ -20,7 +20,7 @@ #include "internal.h" -#define ICC_DYN_ID_START 10000 +#define ICC_DYN_ID_START 100000 #define CREATE_TRACE_POINTS #include "trace.h" @@ -819,6 +819,9 @@ static struct icc_node *icc_node_create_nolock(int id) { struct icc_node *node; + if (id >= ICC_DYN_ID_START) + return ERR_PTR(-EINVAL); + /* check if node already exists */ node = node_find(id); if (node) @@ -906,11 +909,36 @@ void icc_node_destroy(int id) return; kfree(node->links); + if (node->id >= ICC_DYN_ID_START) + kfree(node->name); kfree(node); } EXPORT_SYMBOL_GPL(icc_node_destroy); /** + * icc_node_set_name() - set node name + * @node: node + * @provider: node provider + * @name: node name + * + * Return: 0 on success, or -ENOMEM on allocation failure + */ +int icc_node_set_name(struct icc_node *node, const struct icc_provider *provider, const char *name) +{ + if (node->id >= ICC_DYN_ID_START) { + node->name = kasprintf(GFP_KERNEL, "%s@%s", name, + dev_name(provider->dev)); + if (!node->name) + return -ENOMEM; + } else { + node->name = name; + } + + return 0; +} +EXPORT_SYMBOL_GPL(icc_node_set_name); + +/** * icc_link_nodes() - create link between two nodes * @src_node: source node * @dst_node: destination node @@ -1038,10 +1066,6 @@ void icc_node_add(struct icc_node *node, struct icc_provider *provider) node->avg_bw = node->init_avg; node->peak_bw = node->init_peak; - if (node->id >= ICC_DYN_ID_START) - node->name = devm_kasprintf(provider->dev, GFP_KERNEL, "%s@%s", - node->name, dev_name(provider->dev)); - if (node->avg_bw || node->peak_bw) { if (provider->pre_aggregate) provider->pre_aggregate(node); diff --git a/drivers/interconnect/icc-clk.c b/drivers/interconnect/icc-clk.c index 88f311c11020..93c030608d3e 100644 --- a/drivers/interconnect/icc-clk.c +++ b/drivers/interconnect/icc-clk.c @@ -117,6 +117,7 @@ struct icc_provider *icc_clk_register(struct device *dev, node->name = devm_kasprintf(dev, GFP_KERNEL, "%s_master", data[i].name); if (!node->name) { + icc_node_destroy(node->id); ret = -ENOMEM; goto err; } @@ -135,6 +136,7 @@ struct icc_provider *icc_clk_register(struct device *dev, node->name = devm_kasprintf(dev, GFP_KERNEL, "%s_slave", data[i].name); if (!node->name) { + icc_node_destroy(node->id); ret = -ENOMEM; goto err; } diff --git a/drivers/interconnect/qcom/icc-rpmh.c b/drivers/interconnect/qcom/icc-rpmh.c index 41bfc6e7ee1d..001404e91041 100644 --- a/drivers/interconnect/qcom/icc-rpmh.c +++ b/drivers/interconnect/qcom/icc-rpmh.c @@ -293,7 +293,12 @@ int qcom_icc_rpmh_probe(struct platform_device *pdev) goto err_remove_nodes; } - node->name = qn->name; + ret = icc_node_set_name(node, provider, qn->name); + if (ret) { + icc_node_destroy(node->id); + goto err_remove_nodes; + } + node->data = qn; icc_node_add(node, provider); diff --git a/drivers/interconnect/qcom/osm-l3.c b/drivers/interconnect/qcom/osm-l3.c index baecbf2533f7..b33f00da1880 100644 --- a/drivers/interconnect/qcom/osm-l3.c +++ b/drivers/interconnect/qcom/osm-l3.c @@ -236,7 +236,12 @@ static int qcom_osm_l3_probe(struct platform_device *pdev) goto err; } - node->name = qnodes[i]->name; + ret = icc_node_set_name(node, provider, qnodes[i]->name); + if (ret) { + icc_node_destroy(node->id); + goto err; + } + /* Cast away const and add it back in qcom_osm_l3_set() */ node->data = (void *)qnodes[i]; icc_node_add(node, provider); diff --git a/drivers/interconnect/qcom/sc7280.c b/drivers/interconnect/qcom/sc7280.c index 346f18d70e9e..905403a3a930 100644 --- a/drivers/interconnect/qcom/sc7280.c +++ b/drivers/interconnect/qcom/sc7280.c @@ -238,6 +238,7 @@ static struct qcom_icc_node xm_pcie3_1 = { .id = SC7280_MASTER_PCIE_1, .channels = 1, .buswidth = 8, + .num_links = 1, .links = { SC7280_SLAVE_ANOC_PCIE_GEM_NOC }, }; diff --git a/drivers/interconnect/samsung/exynos.c b/drivers/interconnect/samsung/exynos.c index 9e041365d909..8e8f56186a36 100644 --- a/drivers/interconnect/samsung/exynos.c +++ b/drivers/interconnect/samsung/exynos.c @@ -134,6 +134,11 @@ static int exynos_generic_icc_probe(struct platform_device *pdev) priv->node = icc_node; icc_node->name = devm_kasprintf(&pdev->dev, GFP_KERNEL, "%pOFn", bus_dev->of_node); + if (!icc_node->name) { + icc_node_destroy(pdev->id); + return -ENOMEM; + } + if (of_property_read_u32(bus_dev->of_node, "samsung,data-clock-ratio", &priv->bus_clk_ratio)) priv->bus_clk_ratio = EXYNOS_ICC_DEFAULT_BUS_CLK_RATIO; diff --git a/drivers/iommu/hyperv-iommu.c b/drivers/iommu/hyperv-iommu.c index 761ab647f372..0961ac805944 100644 --- a/drivers/iommu/hyperv-iommu.c +++ b/drivers/iommu/hyperv-iommu.c @@ -193,15 +193,13 @@ struct hyperv_root_ir_data { static void hyperv_root_ir_compose_msi_msg(struct irq_data *irq_data, struct msi_msg *msg) { - u64 status; - u32 vector; - struct irq_cfg *cfg; - int ioapic_id; - const struct cpumask *affinity; - int cpu; - struct hv_interrupt_entry entry; struct hyperv_root_ir_data *data = irq_data->chip_data; + struct hv_interrupt_entry entry; + const struct cpumask *affinity; struct IO_APIC_route_entry e; + struct irq_cfg *cfg; + int cpu, ioapic_id; + u32 vector; cfg = irqd_cfg(irq_data); affinity = irq_data_get_effective_affinity_mask(irq_data); @@ -214,23 +212,16 @@ hyperv_root_ir_compose_msi_msg(struct irq_data *irq_data, struct msi_msg *msg) && data->entry.ioapic_rte.as_uint64) { entry = data->entry; - status = hv_unmap_ioapic_interrupt(ioapic_id, &entry); - - if (status != HV_STATUS_SUCCESS) - hv_status_debug(status, "failed to unmap\n"); + (void)hv_unmap_ioapic_interrupt(ioapic_id, &entry); data->entry.ioapic_rte.as_uint64 = 0; data->entry.source = 0; /* Invalid source */ } - status = hv_map_ioapic_interrupt(ioapic_id, data->is_level, cpu, - vector, &entry); - - if (status != HV_STATUS_SUCCESS) { - hv_status_err(status, "map failed\n"); + if (hv_map_ioapic_interrupt(ioapic_id, data->is_level, cpu, + vector, &entry)) return; - } data->entry = entry; @@ -322,10 +313,10 @@ static void hyperv_root_irq_remapping_free(struct irq_domain *domain, data = irq_data->chip_data; e = &data->entry; - if (e->source == HV_DEVICE_TYPE_IOAPIC - && e->ioapic_rte.as_uint64) - hv_unmap_ioapic_interrupt(data->ioapic_id, - &data->entry); + if (e->source == HV_DEVICE_TYPE_IOAPIC && + e->ioapic_rte.as_uint64) + (void)hv_unmap_ioapic_interrupt(data->ioapic_id, + &data->entry); kfree(data); } diff --git a/drivers/md/dm-bufio.c b/drivers/md/dm-bufio.c index ec84ba5e93e5..ff7595caf440 100644 --- a/drivers/md/dm-bufio.c +++ b/drivers/md/dm-bufio.c @@ -2742,7 +2742,11 @@ static unsigned long __evict_a_few(unsigned long nr_buffers) __make_buffer_clean(b); __free_buffer_wake(b); - cond_resched(); + if (need_resched()) { + dm_bufio_unlock(c); + cond_resched(); + dm_bufio_lock(c); + } } dm_bufio_unlock(c); diff --git a/drivers/md/md-bitmap.c b/drivers/md/md-bitmap.c index bd694910b01b..7f524a26cebc 100644 --- a/drivers/md/md-bitmap.c +++ b/drivers/md/md-bitmap.c @@ -2366,8 +2366,7 @@ static int bitmap_get_stats(void *data, struct md_bitmap_stats *stats) if (!bitmap) return -ENOENT; - if (!bitmap->mddev->bitmap_info.external && - !bitmap->storage.sb_page) + if (!bitmap->storage.sb_page) return -EINVAL; sb = kmap_local_page(bitmap->storage.sb_page); stats->sync_size = le64_to_cpu(sb->sync_size); diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c index 19c5a0ce5a40..64b8176907a9 100644 --- a/drivers/md/raid1.c +++ b/drivers/md/raid1.c @@ -1399,7 +1399,7 @@ static void raid1_read_request(struct mddev *mddev, struct bio *bio, } read_bio = bio_alloc_clone(mirror->rdev->bdev, bio, gfp, &mddev->bio_set); - + read_bio->bi_opf &= ~REQ_NOWAIT; r1_bio->bios[rdisk] = read_bio; read_bio->bi_iter.bi_sector = r1_bio->sector + @@ -1649,6 +1649,7 @@ static void raid1_write_request(struct mddev *mddev, struct bio *bio, wait_for_serialization(rdev, r1_bio); } + mbio->bi_opf &= ~REQ_NOWAIT; r1_bio->bios[i] = mbio; mbio->bi_iter.bi_sector = (r1_bio->sector + rdev->data_offset); @@ -3428,6 +3429,7 @@ static int raid1_reshape(struct mddev *mddev) /* ok, everything is stopped */ oldpool = conf->r1bio_pool; conf->r1bio_pool = newpool; + init_waitqueue_head(&conf->r1bio_pool.wait); for (d = d2 = 0; d < conf->raid_disks; d++) { struct md_rdev *rdev = conf->mirrors[d].rdev; diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c index b74780af4c22..c9bd2005bfd0 100644 --- a/drivers/md/raid10.c +++ b/drivers/md/raid10.c @@ -1182,8 +1182,11 @@ static void raid10_read_request(struct mddev *mddev, struct bio *bio, } } - if (!regular_request_wait(mddev, conf, bio, r10_bio->sectors)) + if (!regular_request_wait(mddev, conf, bio, r10_bio->sectors)) { + raid_end_bio_io(r10_bio); return; + } + rdev = read_balance(conf, r10_bio, &max_sectors); if (!rdev) { if (err_rdev) { @@ -1221,6 +1224,7 @@ static void raid10_read_request(struct mddev *mddev, struct bio *bio, r10_bio->master_bio = bio; } read_bio = bio_alloc_clone(rdev->bdev, bio, gfp, &mddev->bio_set); + read_bio->bi_opf &= ~REQ_NOWAIT; r10_bio->devs[slot].bio = read_bio; r10_bio->devs[slot].rdev = rdev; @@ -1256,6 +1260,7 @@ static void raid10_write_one_disk(struct mddev *mddev, struct r10bio *r10_bio, conf->mirrors[devnum].rdev; mbio = bio_alloc_clone(rdev->bdev, bio, GFP_NOIO, &mddev->bio_set); + mbio->bi_opf &= ~REQ_NOWAIT; if (replacement) r10_bio->devs[n_copy].repl_bio = mbio; else @@ -1370,8 +1375,11 @@ static void raid10_write_request(struct mddev *mddev, struct bio *bio, } sectors = r10_bio->sectors; - if (!regular_request_wait(mddev, conf, bio, sectors)) + if (!regular_request_wait(mddev, conf, bio, sectors)) { + raid_end_bio_io(r10_bio); return; + } + if (test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery) && (mddev->reshape_backwards ? (bio->bi_iter.bi_sector < conf->reshape_safe && diff --git a/drivers/memstick/core/memstick.c b/drivers/memstick/core/memstick.c index 043b9ec756ff..7f3f47db4c98 100644 --- a/drivers/memstick/core/memstick.c +++ b/drivers/memstick/core/memstick.c @@ -324,7 +324,7 @@ EXPORT_SYMBOL(memstick_init_req); static int h_memstick_read_dev_id(struct memstick_dev *card, struct memstick_request **mrq) { - struct ms_id_register id_reg; + struct ms_id_register id_reg = {}; if (!(*mrq)) { memstick_init_req(&card->current_mrq, MS_TPC_READ_REG, &id_reg, diff --git a/drivers/misc/amd-sbi/rmi-core.c b/drivers/misc/amd-sbi/rmi-core.c index b653a21a909e..3dec2fc00124 100644 --- a/drivers/misc/amd-sbi/rmi-core.c +++ b/drivers/misc/amd-sbi/rmi-core.c @@ -42,7 +42,6 @@ #define RD_MCA_CMD 0x86 /* CPUID MCAMSR mask & index */ -#define CPUID_MCA_THRD_MASK GENMASK(15, 0) #define CPUID_MCA_THRD_INDEX 32 #define CPUID_MCA_FUNC_MASK GENMASK(31, 0) #define CPUID_EXT_FUNC_INDEX 56 @@ -129,7 +128,7 @@ static int rmi_cpuid_read(struct sbrmi_data *data, goto exit_unlock; } - thread = msg->cpu_in_out << CPUID_MCA_THRD_INDEX & CPUID_MCA_THRD_MASK; + thread = msg->cpu_in_out >> CPUID_MCA_THRD_INDEX; /* Thread > 127, Thread128 CS register, 1'b1 needs to be set to 1 */ if (thread > 127) { @@ -210,7 +209,7 @@ static int rmi_mca_msr_read(struct sbrmi_data *data, goto exit_unlock; } - thread = msg->mcamsr_in_out << CPUID_MCA_THRD_INDEX & CPUID_MCA_THRD_MASK; + thread = msg->mcamsr_in_out >> CPUID_MCA_THRD_INDEX; /* Thread > 127, Thread128 CS register, 1'b1 needs to be set to 1 */ if (thread > 127) { @@ -321,6 +320,10 @@ int rmi_mailbox_xfer(struct sbrmi_data *data, ret = regmap_read(data->regmap, SBRMI_OUTBNDMSG7, &ec); if (ret || ec) goto exit_clear_alert; + + /* Clear the input value before updating the output data */ + msg->mb_in_out = 0; + /* * For a read operation, the initiator (BMC) reads the firmware * response Command Data Out[31:0] from SBRMI::OutBndMsg_inst[4:1] @@ -373,7 +376,8 @@ static int apml_rmi_reg_xfer(struct sbrmi_data *data, mutex_unlock(&data->lock); if (msg.rflag && !ret) - return copy_to_user(arg, &msg, sizeof(struct apml_reg_xfer_msg)); + if (copy_to_user(arg, &msg, sizeof(struct apml_reg_xfer_msg))) + return -EFAULT; return ret; } @@ -391,7 +395,9 @@ static int apml_mailbox_xfer(struct sbrmi_data *data, struct apml_mbox_msg __use if (ret && ret != -EPROTOTYPE) return ret; - return copy_to_user(arg, &msg, sizeof(struct apml_mbox_msg)); + if (copy_to_user(arg, &msg, sizeof(struct apml_mbox_msg))) + return -EFAULT; + return ret; } static int apml_cpuid_xfer(struct sbrmi_data *data, struct apml_cpuid_msg __user *arg) @@ -408,7 +414,9 @@ static int apml_cpuid_xfer(struct sbrmi_data *data, struct apml_cpuid_msg __user if (ret && ret != -EPROTOTYPE) return ret; - return copy_to_user(arg, &msg, sizeof(struct apml_cpuid_msg)); + if (copy_to_user(arg, &msg, sizeof(struct apml_cpuid_msg))) + return -EFAULT; + return ret; } static int apml_mcamsr_xfer(struct sbrmi_data *data, struct apml_mcamsr_msg __user *arg) @@ -425,7 +433,9 @@ static int apml_mcamsr_xfer(struct sbrmi_data *data, struct apml_mcamsr_msg __us if (ret && ret != -EPROTOTYPE) return ret; - return copy_to_user(arg, &msg, sizeof(struct apml_mcamsr_msg)); + if (copy_to_user(arg, &msg, sizeof(struct apml_mcamsr_msg))) + return -EFAULT; + return ret; } static long sbrmi_ioctl(struct file *fp, unsigned int cmd, unsigned long arg) diff --git a/drivers/mmc/host/bcm2835.c b/drivers/mmc/host/bcm2835.c index def054ddd256..4fced9b36c80 100644 --- a/drivers/mmc/host/bcm2835.c +++ b/drivers/mmc/host/bcm2835.c @@ -503,7 +503,8 @@ void bcm2835_prepare_dma(struct bcm2835_host *host, struct mmc_data *data) DMA_PREP_INTERRUPT | DMA_CTRL_ACK); if (!desc) { - dma_unmap_sg(dma_chan->device->dev, data->sg, sg_len, dir_data); + dma_unmap_sg(dma_chan->device->dev, data->sg, data->sg_len, + dir_data); return; } diff --git a/drivers/mmc/host/sdhci-pci-core.c b/drivers/mmc/host/sdhci-pci-core.c index 13a84b9309e0..e3877a1c72a9 100644 --- a/drivers/mmc/host/sdhci-pci-core.c +++ b/drivers/mmc/host/sdhci-pci-core.c @@ -913,7 +913,8 @@ static bool glk_broken_cqhci(struct sdhci_pci_slot *slot) { return slot->chip->pdev->device == PCI_DEVICE_ID_INTEL_GLK_EMMC && (dmi_match(DMI_BIOS_VENDOR, "LENOVO") || - dmi_match(DMI_SYS_VENDOR, "IRBIS")); + dmi_match(DMI_SYS_VENDOR, "IRBIS") || + dmi_match(DMI_SYS_VENDOR, "Positivo Tecnologia SA")); } static bool jsl_broken_hs400es(struct sdhci_pci_slot *slot) diff --git a/drivers/mmc/host/sdhci_am654.c b/drivers/mmc/host/sdhci_am654.c index 73385ff4c0f3..9e94998e8df7 100644 --- a/drivers/mmc/host/sdhci_am654.c +++ b/drivers/mmc/host/sdhci_am654.c @@ -613,7 +613,8 @@ static const struct sdhci_ops sdhci_am654_ops = { static const struct sdhci_pltfm_data sdhci_am654_pdata = { .ops = &sdhci_am654_ops, .quirks = SDHCI_QUIRK_MULTIBLOCK_READ_ACMD12, - .quirks2 = SDHCI_QUIRK2_PRESET_VALUE_BROKEN, + .quirks2 = SDHCI_QUIRK2_PRESET_VALUE_BROKEN | + SDHCI_QUIRK2_DISABLE_HW_TIMEOUT, }; static const struct sdhci_am654_driver_data sdhci_am654_sr1_drvdata = { @@ -643,7 +644,8 @@ static const struct sdhci_ops sdhci_j721e_8bit_ops = { static const struct sdhci_pltfm_data sdhci_j721e_8bit_pdata = { .ops = &sdhci_j721e_8bit_ops, .quirks = SDHCI_QUIRK_MULTIBLOCK_READ_ACMD12, - .quirks2 = SDHCI_QUIRK2_PRESET_VALUE_BROKEN, + .quirks2 = SDHCI_QUIRK2_PRESET_VALUE_BROKEN | + SDHCI_QUIRK2_DISABLE_HW_TIMEOUT, }; static const struct sdhci_am654_driver_data sdhci_j721e_8bit_drvdata = { @@ -667,7 +669,8 @@ static const struct sdhci_ops sdhci_j721e_4bit_ops = { static const struct sdhci_pltfm_data sdhci_j721e_4bit_pdata = { .ops = &sdhci_j721e_4bit_ops, .quirks = SDHCI_QUIRK_MULTIBLOCK_READ_ACMD12, - .quirks2 = SDHCI_QUIRK2_PRESET_VALUE_BROKEN, + .quirks2 = SDHCI_QUIRK2_PRESET_VALUE_BROKEN | + SDHCI_QUIRK2_DISABLE_HW_TIMEOUT, }; static const struct sdhci_am654_driver_data sdhci_j721e_4bit_drvdata = { diff --git a/drivers/mux/Kconfig b/drivers/mux/Kconfig index 80f015cf6e54..c68132e38138 100644 --- a/drivers/mux/Kconfig +++ b/drivers/mux/Kconfig @@ -48,6 +48,7 @@ config MUX_GPIO config MUX_MMIO tristate "MMIO/Regmap register bitfield-controlled Multiplexer" depends on OF + select REGMAP_MMIO help MMIO/Regmap register bitfield-controlled Multiplexer controller. diff --git a/drivers/net/can/dev/dev.c b/drivers/net/can/dev/dev.c index ea8c807af4d8..3913971125de 100644 --- a/drivers/net/can/dev/dev.c +++ b/drivers/net/can/dev/dev.c @@ -145,13 +145,16 @@ void can_change_state(struct net_device *dev, struct can_frame *cf, EXPORT_SYMBOL_GPL(can_change_state); /* CAN device restart for bus-off recovery */ -static void can_restart(struct net_device *dev) +static int can_restart(struct net_device *dev) { struct can_priv *priv = netdev_priv(dev); struct sk_buff *skb; struct can_frame *cf; int err; + if (!priv->do_set_mode) + return -EOPNOTSUPP; + if (netif_carrier_ok(dev)) netdev_err(dev, "Attempt to restart for bus-off recovery, but carrier is OK?\n"); @@ -173,10 +176,14 @@ static void can_restart(struct net_device *dev) if (err) { netdev_err(dev, "Restart failed, error %pe\n", ERR_PTR(err)); netif_carrier_off(dev); + + return err; } else { netdev_dbg(dev, "Restarted\n"); priv->can_stats.restarts++; } + + return 0; } static void can_restart_work(struct work_struct *work) @@ -201,9 +208,8 @@ int can_restart_now(struct net_device *dev) return -EBUSY; cancel_delayed_work_sync(&priv->restart_work); - can_restart(dev); - return 0; + return can_restart(dev); } /* CAN bus-off diff --git a/drivers/net/can/dev/netlink.c b/drivers/net/can/dev/netlink.c index a36842ace084..f0e3f0d538fb 100644 --- a/drivers/net/can/dev/netlink.c +++ b/drivers/net/can/dev/netlink.c @@ -285,6 +285,12 @@ static int can_changelink(struct net_device *dev, struct nlattr *tb[], } if (data[IFLA_CAN_RESTART_MS]) { + if (!priv->do_set_mode) { + NL_SET_ERR_MSG(extack, + "Device doesn't support restart from Bus Off"); + return -EOPNOTSUPP; + } + /* Do not allow changing restart delay while running */ if (dev->flags & IFF_UP) return -EBUSY; @@ -292,6 +298,12 @@ static int can_changelink(struct net_device *dev, struct nlattr *tb[], } if (data[IFLA_CAN_RESTART]) { + if (!priv->do_set_mode) { + NL_SET_ERR_MSG(extack, + "Device doesn't support restart from Bus Off"); + return -EOPNOTSUPP; + } + /* Do not allow a restart while not running */ if (!(dev->flags & IFF_UP)) return -EINVAL; diff --git a/drivers/net/can/m_can/m_can.c b/drivers/net/can/m_can/m_can.c index 6c656bfdb323..fe74dbd2c966 100644 --- a/drivers/net/can/m_can/m_can.c +++ b/drivers/net/can/m_can/m_can.c @@ -665,7 +665,7 @@ static int m_can_handle_lost_msg(struct net_device *dev) struct can_frame *frame; u32 timestamp = 0; - netdev_err(dev, "msg lost in rxf0\n"); + netdev_dbg(dev, "msg lost in rxf0\n"); stats->rx_errors++; stats->rx_over_errors++; diff --git a/drivers/net/can/m_can/tcan4x5x-core.c b/drivers/net/can/m_can/tcan4x5x-core.c index 8edaa339d590..39b0b5277b11 100644 --- a/drivers/net/can/m_can/tcan4x5x-core.c +++ b/drivers/net/can/m_can/tcan4x5x-core.c @@ -343,21 +343,19 @@ static void tcan4x5x_get_dt_data(struct m_can_classdev *cdev) of_property_read_bool(cdev->dev->of_node, "ti,nwkrq-voltage-vio"); } -static int tcan4x5x_get_gpios(struct m_can_classdev *cdev, - const struct tcan4x5x_version_info *version_info) +static int tcan4x5x_get_gpios(struct m_can_classdev *cdev) { struct tcan4x5x_priv *tcan4x5x = cdev_to_priv(cdev); int ret; - if (version_info->has_wake_pin) { - tcan4x5x->device_wake_gpio = devm_gpiod_get(cdev->dev, "device-wake", - GPIOD_OUT_HIGH); - if (IS_ERR(tcan4x5x->device_wake_gpio)) { - if (PTR_ERR(tcan4x5x->device_wake_gpio) == -EPROBE_DEFER) - return -EPROBE_DEFER; + tcan4x5x->device_wake_gpio = devm_gpiod_get_optional(cdev->dev, + "device-wake", + GPIOD_OUT_HIGH); + if (IS_ERR(tcan4x5x->device_wake_gpio)) { + if (PTR_ERR(tcan4x5x->device_wake_gpio) == -EPROBE_DEFER) + return -EPROBE_DEFER; - tcan4x5x_disable_wake(cdev); - } + tcan4x5x->device_wake_gpio = NULL; } tcan4x5x->reset_gpio = devm_gpiod_get_optional(cdev->dev, "reset", @@ -369,14 +367,31 @@ static int tcan4x5x_get_gpios(struct m_can_classdev *cdev, if (ret) return ret; - if (version_info->has_state_pin) { - tcan4x5x->device_state_gpio = devm_gpiod_get_optional(cdev->dev, - "device-state", - GPIOD_IN); - if (IS_ERR(tcan4x5x->device_state_gpio)) { - tcan4x5x->device_state_gpio = NULL; - tcan4x5x_disable_state(cdev); - } + tcan4x5x->device_state_gpio = devm_gpiod_get_optional(cdev->dev, + "device-state", + GPIOD_IN); + if (IS_ERR(tcan4x5x->device_state_gpio)) + tcan4x5x->device_state_gpio = NULL; + + return 0; +} + +static int tcan4x5x_check_gpios(struct m_can_classdev *cdev, + const struct tcan4x5x_version_info *version_info) +{ + struct tcan4x5x_priv *tcan4x5x = cdev_to_priv(cdev); + int ret; + + if (version_info->has_wake_pin && !tcan4x5x->device_wake_gpio) { + ret = tcan4x5x_disable_wake(cdev); + if (ret) + return ret; + } + + if (version_info->has_state_pin && !tcan4x5x->device_state_gpio) { + ret = tcan4x5x_disable_state(cdev); + if (ret) + return ret; } return 0; @@ -468,15 +483,21 @@ static int tcan4x5x_can_probe(struct spi_device *spi) goto out_m_can_class_free_dev; } + ret = tcan4x5x_get_gpios(mcan_class); + if (ret) { + dev_err(&spi->dev, "Getting gpios failed %pe\n", ERR_PTR(ret)); + goto out_power; + } + version_info = tcan4x5x_find_version(priv); if (IS_ERR(version_info)) { ret = PTR_ERR(version_info); goto out_power; } - ret = tcan4x5x_get_gpios(mcan_class, version_info); + ret = tcan4x5x_check_gpios(mcan_class, version_info); if (ret) { - dev_err(&spi->dev, "Getting gpios failed %pe\n", ERR_PTR(ret)); + dev_err(&spi->dev, "Checking gpios failed %pe\n", ERR_PTR(ret)); goto out_power; } diff --git a/drivers/net/ethernet/airoha/airoha_eth.c b/drivers/net/ethernet/airoha/airoha_eth.c index 06dea3a13e77..9057180051df 100644 --- a/drivers/net/ethernet/airoha/airoha_eth.c +++ b/drivers/net/ethernet/airoha/airoha_eth.c @@ -2984,6 +2984,7 @@ static int airoha_probe(struct platform_device *pdev) error_napi_stop: for (i = 0; i < ARRAY_SIZE(eth->qdma); i++) airoha_qdma_stop_napi(ð->qdma[i]); + airoha_ppe_deinit(eth); error_hw_cleanup: for (i = 0; i < ARRAY_SIZE(eth->qdma); i++) airoha_hw_cleanup(ð->qdma[i]); diff --git a/drivers/net/ethernet/airoha/airoha_npu.c b/drivers/net/ethernet/airoha/airoha_npu.c index 0e5b8c21b9aa..1e58a4aeb9a0 100644 --- a/drivers/net/ethernet/airoha/airoha_npu.c +++ b/drivers/net/ethernet/airoha/airoha_npu.c @@ -401,12 +401,13 @@ struct airoha_npu *airoha_npu_get(struct device *dev, dma_addr_t *stats_addr) return ERR_PTR(-ENODEV); pdev = of_find_device_by_node(np); - of_node_put(np); if (!pdev) { dev_err(dev, "cannot find device node %s\n", np->name); + of_node_put(np); return ERR_PTR(-ENODEV); } + of_node_put(np); if (!try_module_get(THIS_MODULE)) { dev_err(dev, "failed to get the device driver module\n"); diff --git a/drivers/net/ethernet/broadcom/asp2/bcmasp_intf.c b/drivers/net/ethernet/broadcom/asp2/bcmasp_intf.c index 0d61b8580d72..f832562bd7a3 100644 --- a/drivers/net/ethernet/broadcom/asp2/bcmasp_intf.c +++ b/drivers/net/ethernet/broadcom/asp2/bcmasp_intf.c @@ -818,6 +818,9 @@ static void bcmasp_init_tx(struct bcmasp_intf *intf) /* Tx SPB */ tx_spb_ctrl_wl(intf, ((intf->channel + 8) << TX_SPB_CTRL_XF_BID_SHIFT), TX_SPB_CTRL_XF_CTRL2); + + if (intf->parent->tx_chan_offset) + tx_pause_ctrl_wl(intf, (1 << (intf->channel + 8)), TX_PAUSE_MAP_VECTOR); tx_spb_top_wl(intf, 0x1e, TX_SPB_TOP_BLKOUT); tx_spb_dma_wq(intf, intf->tx_spb_dma_addr, TX_SPB_DMA_READ); diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index ae89a981e052..243cb13cb01c 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -11607,11 +11607,9 @@ static void bnxt_free_irq(struct bnxt *bp) static int bnxt_request_irq(struct bnxt *bp) { + struct cpu_rmap *rmap = NULL; int i, j, rc = 0; unsigned long flags = 0; -#ifdef CONFIG_RFS_ACCEL - struct cpu_rmap *rmap; -#endif rc = bnxt_setup_int_mode(bp); if (rc) { @@ -11632,15 +11630,15 @@ static int bnxt_request_irq(struct bnxt *bp) int map_idx = bnxt_cp_num_to_irq_num(bp, i); struct bnxt_irq *irq = &bp->irq_tbl[map_idx]; -#ifdef CONFIG_RFS_ACCEL - if (rmap && bp->bnapi[i]->rx_ring) { + if (IS_ENABLED(CONFIG_RFS_ACCEL) && + rmap && bp->bnapi[i]->rx_ring) { rc = irq_cpu_rmap_add(rmap, irq->vector); if (rc) netdev_warn(bp->dev, "failed adding irq rmap for ring %d\n", j); j++; } -#endif + rc = request_irq(irq->vector, irq->handler, flags, irq->name, bp->bnapi[i]); if (rc) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_coredump.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_coredump.c index ce97befd3cb3..67e70d3d0980 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_coredump.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_coredump.c @@ -368,23 +368,27 @@ static u32 bnxt_get_ctx_coredump(struct bnxt *bp, void *buf, u32 offset, if (!ctxm->mem_valid || !seg_id) continue; - if (trace) + if (trace) { extra_hlen = BNXT_SEG_RCD_LEN; + if (buf) { + u16 trace_type = bnxt_bstore_to_trace[type]; + + bnxt_fill_drv_seg_record(bp, &record, ctxm, + trace_type); + } + } + if (buf) data = buf + BNXT_SEG_HDR_LEN + extra_hlen; + seg_len = bnxt_copy_ctx_mem(bp, ctxm, data, 0) + extra_hlen; if (buf) { bnxt_fill_coredump_seg_hdr(bp, &seg_hdr, NULL, seg_len, 0, 0, 0, comp_id, seg_id); memcpy(buf, &seg_hdr, BNXT_SEG_HDR_LEN); buf += BNXT_SEG_HDR_LEN; - if (trace) { - u16 trace_type = bnxt_bstore_to_trace[type]; - - bnxt_fill_drv_seg_record(bp, &record, ctxm, - trace_type); + if (trace) memcpy(buf, &record, BNXT_SEG_RCD_LEN); - } buf += seg_len; } len += BNXT_SEG_HDR_LEN + seg_len; diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_dcb.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_dcb.c index 0dbb880a7aa0..71e14be2507e 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_dcb.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_dcb.c @@ -487,7 +487,9 @@ static int bnxt_ets_validate(struct bnxt *bp, struct ieee_ets *ets, u8 *tc) if ((ets->tc_tx_bw[i] || ets->tc_tsa[i]) && i > bp->max_tc) return -EINVAL; + } + for (i = 0; i < max_tc; i++) { switch (ets->tc_tsa[i]) { case IEEE_8021QAZ_TSA_STRICT: break; diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c index 4a6d8cb9f970..09e7e8efa6fa 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c @@ -115,7 +115,7 @@ static void __bnxt_xmit_xdp_redirect(struct bnxt *bp, tx_buf->action = XDP_REDIRECT; tx_buf->xdpf = xdpf; dma_unmap_addr_set(tx_buf, mapping, mapping); - dma_unmap_len_set(tx_buf, len, 0); + dma_unmap_len_set(tx_buf, len, len); } void bnxt_tx_int_xdp(struct bnxt *bp, struct bnxt_napi *bnapi, int budget) diff --git a/drivers/net/ethernet/broadcom/genet/bcmgenet.c b/drivers/net/ethernet/broadcom/genet/bcmgenet.c index fa0077bc67b7..97585c160de3 100644 --- a/drivers/net/ethernet/broadcom/genet/bcmgenet.c +++ b/drivers/net/ethernet/broadcom/genet/bcmgenet.c @@ -4092,6 +4092,12 @@ static int bcmgenet_probe(struct platform_device *pdev) for (i = 0; i <= priv->hw_params->rx_queues; i++) priv->rx_rings[i].rx_max_coalesced_frames = 1; + /* Initialize u64 stats seq counter for 32bit machines */ + for (i = 0; i <= priv->hw_params->rx_queues; i++) + u64_stats_init(&priv->rx_rings[i].stats64.syncp); + for (i = 0; i <= priv->hw_params->tx_queues; i++) + u64_stats_init(&priv->tx_rings[i].stats64.syncp); + /* libphy will determine the link state */ netif_carrier_off(dev); diff --git a/drivers/net/ethernet/cavium/thunder/nicvf_main.c b/drivers/net/ethernet/cavium/thunder/nicvf_main.c index aebb9fef3f6e..1be2dc40a1a6 100644 --- a/drivers/net/ethernet/cavium/thunder/nicvf_main.c +++ b/drivers/net/ethernet/cavium/thunder/nicvf_main.c @@ -1578,7 +1578,6 @@ napi_del: static int nicvf_change_mtu(struct net_device *netdev, int new_mtu) { struct nicvf *nic = netdev_priv(netdev); - int orig_mtu = netdev->mtu; /* For now just support only the usual MTU sized frames, * plus some headroom for VLAN, QinQ. @@ -1589,15 +1588,10 @@ static int nicvf_change_mtu(struct net_device *netdev, int new_mtu) return -EINVAL; } - WRITE_ONCE(netdev->mtu, new_mtu); - - if (!netif_running(netdev)) - return 0; - - if (nicvf_update_hw_max_frs(nic, new_mtu)) { - netdev->mtu = orig_mtu; + if (netif_running(netdev) && nicvf_update_hw_max_frs(nic, new_mtu)) return -EINVAL; - } + + WRITE_ONCE(netdev->mtu, new_mtu); return 0; } diff --git a/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c b/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c index b82f121cadad..0f4efd505332 100644 --- a/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c +++ b/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c @@ -4666,12 +4666,19 @@ static int dpaa2_eth_connect_mac(struct dpaa2_eth_priv *priv) return PTR_ERR(dpmac_dev); } - if (IS_ERR(dpmac_dev) || dpmac_dev->dev.type != &fsl_mc_bus_dpmac_type) + if (IS_ERR(dpmac_dev)) return 0; + if (dpmac_dev->dev.type != &fsl_mc_bus_dpmac_type) { + err = 0; + goto out_put_device; + } + mac = kzalloc(sizeof(struct dpaa2_mac), GFP_KERNEL); - if (!mac) - return -ENOMEM; + if (!mac) { + err = -ENOMEM; + goto out_put_device; + } mac->mc_dev = dpmac_dev; mac->mc_io = priv->mc_io; @@ -4705,6 +4712,8 @@ err_close_mac: dpaa2_mac_close(mac); err_free_mac: kfree(mac); +out_put_device: + put_device(&dpmac_dev->dev); return err; } diff --git a/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch.c b/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch.c index 147a93bf9fa9..4643a3380618 100644 --- a/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch.c +++ b/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch.c @@ -1448,12 +1448,19 @@ static int dpaa2_switch_port_connect_mac(struct ethsw_port_priv *port_priv) if (PTR_ERR(dpmac_dev) == -EPROBE_DEFER) return PTR_ERR(dpmac_dev); - if (IS_ERR(dpmac_dev) || dpmac_dev->dev.type != &fsl_mc_bus_dpmac_type) + if (IS_ERR(dpmac_dev)) return 0; + if (dpmac_dev->dev.type != &fsl_mc_bus_dpmac_type) { + err = 0; + goto out_put_device; + } + mac = kzalloc(sizeof(*mac), GFP_KERNEL); - if (!mac) - return -ENOMEM; + if (!mac) { + err = -ENOMEM; + goto out_put_device; + } mac->mc_dev = dpmac_dev; mac->mc_io = port_priv->ethsw_data->mc_io; @@ -1483,6 +1490,8 @@ err_close_mac: dpaa2_mac_close(mac); err_free_mac: kfree(mac); +out_put_device: + put_device(&dpmac_dev->dev); return err; } diff --git a/drivers/net/ethernet/google/gve/gve_main.c b/drivers/net/ethernet/google/gve/gve_main.c index dc35a23ec47f..d1aeb722d48f 100644 --- a/drivers/net/ethernet/google/gve/gve_main.c +++ b/drivers/net/ethernet/google/gve/gve_main.c @@ -1917,49 +1917,56 @@ static void gve_turnup_and_check_status(struct gve_priv *priv) gve_handle_link_status(priv, GVE_DEVICE_STATUS_LINK_STATUS_MASK & status); } -static void gve_tx_timeout(struct net_device *dev, unsigned int txqueue) +static struct gve_notify_block *gve_get_tx_notify_block(struct gve_priv *priv, + unsigned int txqueue) { - struct gve_notify_block *block; - struct gve_tx_ring *tx = NULL; - struct gve_priv *priv; - u32 last_nic_done; - u32 current_time; u32 ntfy_idx; - netdev_info(dev, "Timeout on tx queue, %d", txqueue); - priv = netdev_priv(dev); if (txqueue > priv->tx_cfg.num_queues) - goto reset; + return NULL; ntfy_idx = gve_tx_idx_to_ntfy(priv, txqueue); if (ntfy_idx >= priv->num_ntfy_blks) - goto reset; + return NULL; + + return &priv->ntfy_blocks[ntfy_idx]; +} + +static bool gve_tx_timeout_try_q_kick(struct gve_priv *priv, + unsigned int txqueue) +{ + struct gve_notify_block *block; + u32 current_time; - block = &priv->ntfy_blocks[ntfy_idx]; - tx = block->tx; + block = gve_get_tx_notify_block(priv, txqueue); + + if (!block) + return false; current_time = jiffies_to_msecs(jiffies); - if (tx->last_kick_msec + MIN_TX_TIMEOUT_GAP > current_time) - goto reset; + if (block->tx->last_kick_msec + MIN_TX_TIMEOUT_GAP > current_time) + return false; - /* Check to see if there are missed completions, which will allow us to - * kick the queue. - */ - last_nic_done = gve_tx_load_event_counter(priv, tx); - if (last_nic_done - tx->done) { - netdev_info(dev, "Kicking queue %d", txqueue); - iowrite32be(GVE_IRQ_MASK, gve_irq_doorbell(priv, block)); - napi_schedule(&block->napi); - tx->last_kick_msec = current_time; - goto out; - } // Else reset. + netdev_info(priv->dev, "Kicking queue %d", txqueue); + napi_schedule(&block->napi); + block->tx->last_kick_msec = current_time; + return true; +} -reset: - gve_schedule_reset(priv); +static void gve_tx_timeout(struct net_device *dev, unsigned int txqueue) +{ + struct gve_notify_block *block; + struct gve_priv *priv; -out: - if (tx) - tx->queue_timeout++; + netdev_info(dev, "Timeout on tx queue, %d", txqueue); + priv = netdev_priv(dev); + + if (!gve_tx_timeout_try_q_kick(priv, txqueue)) + gve_schedule_reset(priv); + + block = gve_get_tx_notify_block(priv, txqueue); + if (block) + block->tx->queue_timeout++; priv->tx_timeo_cnt++; } diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c index b03b8758c777..aaa803563bd2 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c @@ -11,6 +11,7 @@ #include <linux/irq.h> #include <linux/ip.h> #include <linux/ipv6.h> +#include <linux/iommu.h> #include <linux/module.h> #include <linux/pci.h> #include <linux/skbuff.h> @@ -1039,6 +1040,8 @@ static bool hns3_can_use_tx_sgl(struct hns3_enet_ring *ring, static void hns3_init_tx_spare_buffer(struct hns3_enet_ring *ring) { u32 alloc_size = ring->tqp->handle->kinfo.tx_spare_buf_size; + struct net_device *netdev = ring_to_netdev(ring); + struct hns3_nic_priv *priv = netdev_priv(netdev); struct hns3_tx_spare *tx_spare; struct page *page; dma_addr_t dma; @@ -1080,6 +1083,7 @@ static void hns3_init_tx_spare_buffer(struct hns3_enet_ring *ring) tx_spare->buf = page_address(page); tx_spare->len = PAGE_SIZE << order; ring->tx_spare = tx_spare; + ring->tx_copybreak = priv->tx_copybreak; return; dma_mapping_error: @@ -4874,6 +4878,30 @@ static void hns3_nic_dealloc_vector_data(struct hns3_nic_priv *priv) devm_kfree(&pdev->dev, priv->tqp_vector); } +static void hns3_update_tx_spare_buf_config(struct hns3_nic_priv *priv) +{ +#define HNS3_MIN_SPARE_BUF_SIZE (2 * 1024 * 1024) +#define HNS3_MAX_PACKET_SIZE (64 * 1024) + + struct iommu_domain *domain = iommu_get_domain_for_dev(priv->dev); + struct hnae3_ae_dev *ae_dev = hns3_get_ae_dev(priv->ae_handle); + struct hnae3_handle *handle = priv->ae_handle; + + if (ae_dev->dev_version < HNAE3_DEVICE_VERSION_V3) + return; + + if (!(domain && iommu_is_dma_domain(domain))) + return; + + priv->min_tx_copybreak = HNS3_MAX_PACKET_SIZE; + priv->min_tx_spare_buf_size = HNS3_MIN_SPARE_BUF_SIZE; + + if (priv->tx_copybreak < priv->min_tx_copybreak) + priv->tx_copybreak = priv->min_tx_copybreak; + if (handle->kinfo.tx_spare_buf_size < priv->min_tx_spare_buf_size) + handle->kinfo.tx_spare_buf_size = priv->min_tx_spare_buf_size; +} + static void hns3_ring_get_cfg(struct hnae3_queue *q, struct hns3_nic_priv *priv, unsigned int ring_type) { @@ -5107,6 +5135,7 @@ int hns3_init_all_ring(struct hns3_nic_priv *priv) int i, j; int ret; + hns3_update_tx_spare_buf_config(priv); for (i = 0; i < ring_num; i++) { ret = hns3_alloc_ring_memory(&priv->ring[i]); if (ret) { @@ -5311,6 +5340,8 @@ static int hns3_client_init(struct hnae3_handle *handle) priv->ae_handle = handle; priv->tx_timeout_count = 0; priv->max_non_tso_bd_num = ae_dev->dev_specs.max_non_tso_bd_num; + priv->min_tx_copybreak = 0; + priv->min_tx_spare_buf_size = 0; set_bit(HNS3_NIC_STATE_DOWN, &priv->state); handle->msg_enable = netif_msg_init(debug, DEFAULT_MSG_LEVEL); diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.h b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.h index d36c4ed16d8d..caf7a4df8585 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.h +++ b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.h @@ -596,6 +596,8 @@ struct hns3_nic_priv { struct hns3_enet_coalesce rx_coal; u32 tx_copybreak; u32 rx_copybreak; + u32 min_tx_copybreak; + u32 min_tx_spare_buf_size; }; union l3_hdr_info { diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c index a7de67699a01..30bdec1acb57 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c @@ -9576,33 +9576,36 @@ static bool hclge_need_enable_vport_vlan_filter(struct hclge_vport *vport) return false; } -int hclge_enable_vport_vlan_filter(struct hclge_vport *vport, bool request_en) +static int __hclge_enable_vport_vlan_filter(struct hclge_vport *vport, + bool request_en) { - struct hclge_dev *hdev = vport->back; bool need_en; int ret; - mutex_lock(&hdev->vport_lock); - - vport->req_vlan_fltr_en = request_en; - need_en = hclge_need_enable_vport_vlan_filter(vport); - if (need_en == vport->cur_vlan_fltr_en) { - mutex_unlock(&hdev->vport_lock); + if (need_en == vport->cur_vlan_fltr_en) return 0; - } ret = hclge_set_vport_vlan_filter(vport, need_en); - if (ret) { - mutex_unlock(&hdev->vport_lock); + if (ret) return ret; - } vport->cur_vlan_fltr_en = need_en; + return 0; +} + +int hclge_enable_vport_vlan_filter(struct hclge_vport *vport, bool request_en) +{ + struct hclge_dev *hdev = vport->back; + int ret; + + mutex_lock(&hdev->vport_lock); + vport->req_vlan_fltr_en = request_en; + ret = __hclge_enable_vport_vlan_filter(vport, request_en); mutex_unlock(&hdev->vport_lock); - return 0; + return ret; } static int hclge_enable_vlan_filter(struct hnae3_handle *handle, bool enable) @@ -10623,16 +10626,19 @@ static void hclge_sync_vlan_fltr_state(struct hclge_dev *hdev) &vport->state)) continue; - ret = hclge_enable_vport_vlan_filter(vport, - vport->req_vlan_fltr_en); + mutex_lock(&hdev->vport_lock); + ret = __hclge_enable_vport_vlan_filter(vport, + vport->req_vlan_fltr_en); if (ret) { dev_err(&hdev->pdev->dev, "failed to sync vlan filter state for vport%u, ret = %d\n", vport->vport_id, ret); set_bit(HCLGE_VPORT_STATE_VLAN_FLTR_CHANGE, &vport->state); + mutex_unlock(&hdev->vport_lock); return; } + mutex_unlock(&hdev->vport_lock); } } diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_ptp.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_ptp.c index ec581d4b696f..4bd52eab3914 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_ptp.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_ptp.c @@ -497,14 +497,14 @@ int hclge_ptp_init(struct hclge_dev *hdev) if (ret) { dev_err(&hdev->pdev->dev, "failed to init freq, ret = %d\n", ret); - goto out; + goto out_clear_int; } ret = hclge_ptp_set_ts_mode(hdev, &hdev->ptp->ts_cfg); if (ret) { dev_err(&hdev->pdev->dev, "failed to init ts mode, ret = %d\n", ret); - goto out; + goto out_clear_int; } ktime_get_real_ts64(&ts); @@ -512,7 +512,7 @@ int hclge_ptp_init(struct hclge_dev *hdev) if (ret) { dev_err(&hdev->pdev->dev, "failed to init ts time, ret = %d\n", ret); - goto out; + goto out_clear_int; } set_bit(HCLGE_STATE_PTP_EN, &hdev->state); @@ -520,6 +520,9 @@ int hclge_ptp_init(struct hclge_dev *hdev) return 0; +out_clear_int: + clear_bit(HCLGE_PTP_FLAG_EN, &hdev->ptp->flags); + hclge_ptp_int_en(hdev, false); out: hclge_ptp_destroy_clock(hdev); diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c index c4f35e8e2177..4cf6ac04662a 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c @@ -3094,11 +3094,7 @@ static void hclgevf_uninit_ae_dev(struct hnae3_ae_dev *ae_dev) static u32 hclgevf_get_max_channels(struct hclgevf_dev *hdev) { - struct hnae3_handle *nic = &hdev->nic; - struct hnae3_knic_private_info *kinfo = &nic->kinfo; - - return min_t(u32, hdev->rss_size_max, - hdev->num_tqps / kinfo->tc_info.num_tc); + return min(hdev->rss_size_max, hdev->num_tqps); } /** diff --git a/drivers/net/ethernet/ibm/ibmvnic.h b/drivers/net/ethernet/ibm/ibmvnic.h index a189038d88df..246ddce753f9 100644 --- a/drivers/net/ethernet/ibm/ibmvnic.h +++ b/drivers/net/ethernet/ibm/ibmvnic.h @@ -211,7 +211,6 @@ struct ibmvnic_statistics { u8 reserved[72]; } __packed __aligned(8); -#define NUM_TX_STATS 3 struct ibmvnic_tx_queue_stats { u64 batched_packets; u64 direct_packets; @@ -219,13 +218,18 @@ struct ibmvnic_tx_queue_stats { u64 dropped_packets; }; -#define NUM_RX_STATS 3 +#define NUM_TX_STATS \ + (sizeof(struct ibmvnic_tx_queue_stats) / sizeof(u64)) + struct ibmvnic_rx_queue_stats { u64 packets; u64 bytes; u64 interrupts; }; +#define NUM_RX_STATS \ + (sizeof(struct ibmvnic_rx_queue_stats) / sizeof(u64)) + struct ibmvnic_acl_buffer { __be32 len; __be32 version; diff --git a/drivers/net/ethernet/intel/e1000e/defines.h b/drivers/net/ethernet/intel/e1000e/defines.h index 8294a7c4f122..ba331899d186 100644 --- a/drivers/net/ethernet/intel/e1000e/defines.h +++ b/drivers/net/ethernet/intel/e1000e/defines.h @@ -638,6 +638,9 @@ /* For checksumming, the sum of all words in the NVM should equal 0xBABA. */ #define NVM_SUM 0xBABA +/* Uninitialized ("empty") checksum word value */ +#define NVM_CHECKSUM_UNINITIALIZED 0xFFFF + /* PBA (printed board assembly) number words */ #define NVM_PBA_OFFSET_0 8 #define NVM_PBA_OFFSET_1 9 diff --git a/drivers/net/ethernet/intel/e1000e/ich8lan.c b/drivers/net/ethernet/intel/e1000e/ich8lan.c index 364378133526..df4e7d781cb1 100644 --- a/drivers/net/ethernet/intel/e1000e/ich8lan.c +++ b/drivers/net/ethernet/intel/e1000e/ich8lan.c @@ -4274,6 +4274,8 @@ static s32 e1000_validate_nvm_checksum_ich8lan(struct e1000_hw *hw) ret_val = e1000e_update_nvm_checksum(hw); if (ret_val) return ret_val; + } else if (hw->mac.type == e1000_pch_tgp) { + return 0; } } diff --git a/drivers/net/ethernet/intel/e1000e/nvm.c b/drivers/net/ethernet/intel/e1000e/nvm.c index e609f4df86f4..16369e6d245a 100644 --- a/drivers/net/ethernet/intel/e1000e/nvm.c +++ b/drivers/net/ethernet/intel/e1000e/nvm.c @@ -558,6 +558,12 @@ s32 e1000e_validate_nvm_checksum_generic(struct e1000_hw *hw) checksum += nvm_data; } + if (hw->mac.type == e1000_pch_tgp && + nvm_data == NVM_CHECKSUM_UNINITIALIZED) { + e_dbg("Uninitialized NVM Checksum on TGP platform - ignoring\n"); + return 0; + } + if (checksum != (u16)NVM_SUM) { e_dbg("NVM Checksum Invalid\n"); return -E1000_ERR_NVM; diff --git a/drivers/net/ethernet/intel/fm10k/fm10k.h b/drivers/net/ethernet/intel/fm10k/fm10k.h index 6119a4108838..65a2816142d9 100644 --- a/drivers/net/ethernet/intel/fm10k/fm10k.h +++ b/drivers/net/ethernet/intel/fm10k/fm10k.h @@ -189,13 +189,14 @@ struct fm10k_q_vector { struct fm10k_ring_container rx, tx; struct napi_struct napi; + struct rcu_head rcu; /* to avoid race with update stats on free */ + cpumask_t affinity_mask; char name[IFNAMSIZ + 9]; #ifdef CONFIG_DEBUG_FS struct dentry *dbg_q_vector; #endif /* CONFIG_DEBUG_FS */ - struct rcu_head rcu; /* to avoid race with update stats on free */ /* for dynamic allocation of rings associated with this q_vector */ struct fm10k_ring ring[] ____cacheline_internodealigned_in_smp; diff --git a/drivers/net/ethernet/intel/i40e/i40e.h b/drivers/net/ethernet/intel/i40e/i40e.h index c67963bfe14e..7c600d6e66ba 100644 --- a/drivers/net/ethernet/intel/i40e/i40e.h +++ b/drivers/net/ethernet/intel/i40e/i40e.h @@ -945,6 +945,7 @@ struct i40e_q_vector { u16 reg_idx; /* register index of the interrupt */ struct napi_struct napi; + struct rcu_head rcu; /* to avoid race with update stats on free */ struct i40e_ring_container rx; struct i40e_ring_container tx; @@ -955,7 +956,6 @@ struct i40e_q_vector { cpumask_t affinity_mask; struct irq_affinity_notify affinity_notify; - struct rcu_head rcu; /* to avoid race with update stats on free */ char name[I40E_INT_NAME_STR_LEN]; bool arm_wb_state; bool in_busy_poll; diff --git a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c index 88e6bef69342..7ccfc1191ae5 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c +++ b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c @@ -3137,10 +3137,10 @@ static int i40e_vc_del_mac_addr_msg(struct i40e_vf *vf, u8 *msg) const u8 *addr = al->list[i].addr; /* Allow to delete VF primary MAC only if it was not set - * administratively by PF or if VF is trusted. + * administratively by PF. */ if (ether_addr_equal(addr, vf->default_lan_addr.addr)) { - if (i40e_can_vf_change_mac(vf)) + if (!vf->pf_set_mac) was_unimac_deleted = true; else continue; @@ -5006,7 +5006,7 @@ int i40e_get_vf_stats(struct net_device *netdev, int vf_id, vf_stats->broadcast = stats->rx_broadcast; vf_stats->multicast = stats->rx_multicast; vf_stats->rx_dropped = stats->rx_discards + stats->rx_discards_other; - vf_stats->tx_dropped = stats->tx_discards; + vf_stats->tx_dropped = stats->tx_errors; return 0; } diff --git a/drivers/net/ethernet/intel/ice/ice_ddp.c b/drivers/net/ethernet/intel/ice/ice_ddp.c index 59323c019544..351824dc3c62 100644 --- a/drivers/net/ethernet/intel/ice/ice_ddp.c +++ b/drivers/net/ethernet/intel/ice/ice_ddp.c @@ -2301,6 +2301,8 @@ enum ice_ddp_state ice_copy_and_init_pkg(struct ice_hw *hw, const u8 *buf, return ICE_DDP_PKG_ERR; buf_copy = devm_kmemdup(ice_hw_to_dev(hw), buf, len, GFP_KERNEL); + if (!buf_copy) + return ICE_DDP_PKG_ERR; state = ice_init_pkg(hw, buf_copy, len); if (!ice_is_init_pkg_successful(state)) { diff --git a/drivers/net/ethernet/intel/ice/ice_debugfs.c b/drivers/net/ethernet/intel/ice/ice_debugfs.c index 9fc0fd95a13d..cb71eca6a85b 100644 --- a/drivers/net/ethernet/intel/ice/ice_debugfs.c +++ b/drivers/net/ethernet/intel/ice/ice_debugfs.c @@ -606,7 +606,7 @@ void ice_debugfs_fwlog_init(struct ice_pf *pf) pf->ice_debugfs_pf_fwlog = debugfs_create_dir("fwlog", pf->ice_debugfs_pf); - if (IS_ERR(pf->ice_debugfs_pf)) + if (IS_ERR(pf->ice_debugfs_pf_fwlog)) goto err_create_module_files; fw_modules_dir = debugfs_create_dir("modules", diff --git a/drivers/net/ethernet/intel/ice/ice_lag.c b/drivers/net/ethernet/intel/ice/ice_lag.c index 2410aee59fb2..d132eb477551 100644 --- a/drivers/net/ethernet/intel/ice/ice_lag.c +++ b/drivers/net/ethernet/intel/ice/ice_lag.c @@ -2226,7 +2226,8 @@ bool ice_lag_is_switchdev_running(struct ice_pf *pf) struct ice_lag *lag = pf->lag; struct net_device *tmp_nd; - if (!ice_is_feature_supported(pf, ICE_F_SRIOV_LAG) || !lag) + if (!ice_is_feature_supported(pf, ICE_F_SRIOV_LAG) || + !lag || !lag->upper_netdev) return false; rcu_read_lock(); diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe.h b/drivers/net/ethernet/intel/ixgbe/ixgbe.h index 47311b134a7a..e6acd791bf64 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe.h +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe.h @@ -507,9 +507,10 @@ struct ixgbe_q_vector { struct ixgbe_ring_container rx, tx; struct napi_struct napi; + struct rcu_head rcu; /* to avoid race with update stats on free */ + cpumask_t affinity_mask; int numa_node; - struct rcu_head rcu; /* to avoid race with update stats on free */ char name[IFNAMSIZ + 9]; /* for dynamic allocation of rings associated with this q_vector */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c index b1aeea7c4a91..e395ef5f356e 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c @@ -1947,8 +1947,8 @@ static int cmd_exec(struct mlx5_core_dev *dev, void *in, int in_size, void *out, err = mlx5_cmd_invoke(dev, inb, outb, out, out_size, callback, context, pages_queue, token, force_polling); - if (callback) - return err; + if (callback && !err) + return 0; if (err > 0) /* Failed in FW, command didn't execute */ err = deliv_status_to_err(err); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/fs.h b/drivers/net/ethernet/mellanox/mlx5/core/en/fs.h index b5c3a2a9d2a5..9560fcba643f 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/fs.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/fs.h @@ -18,7 +18,8 @@ enum { enum { MLX5E_TC_PRIO = 0, - MLX5E_NIC_PRIO + MLX5E_PROMISC_PRIO, + MLX5E_NIC_PRIO, }; struct mlx5e_flow_table { @@ -68,9 +69,13 @@ struct mlx5e_l2_table { MLX5_HASH_FIELD_SEL_DST_IP |\ MLX5_HASH_FIELD_SEL_IPSEC_SPI) -/* NIC prio FTS */ +/* NIC promisc FT level */ enum { MLX5E_PROMISC_FT_LEVEL, +}; + +/* NIC prio FTS */ +enum { MLX5E_VLAN_FT_LEVEL, MLX5E_L2_FT_LEVEL, MLX5E_TTC_FT_LEVEL, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_dim.c b/drivers/net/ethernet/mellanox/mlx5/core/en_dim.c index 298bb74ec5e9..d1d629697e28 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_dim.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_dim.c @@ -113,7 +113,7 @@ int mlx5e_dim_rx_change(struct mlx5e_rq *rq, bool enable) __set_bit(MLX5E_RQ_STATE_DIM, &rq->state); } else { __clear_bit(MLX5E_RQ_STATE_DIM, &rq->state); - + synchronize_net(); mlx5e_dim_disable(rq->dim); rq->dim = NULL; } @@ -140,7 +140,7 @@ int mlx5e_dim_tx_change(struct mlx5e_txqsq *sq, bool enable) __set_bit(MLX5E_SQ_STATE_DIM, &sq->state); } else { __clear_bit(MLX5E_SQ_STATE_DIM, &sq->state); - + synchronize_net(); mlx5e_dim_disable(sq->dim); sq->dim = NULL; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_fs.c b/drivers/net/ethernet/mellanox/mlx5/core/en_fs.c index 04a969128161..265c4ca85f7d 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_fs.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_fs.c @@ -780,7 +780,7 @@ static int mlx5e_create_promisc_table(struct mlx5e_flow_steering *fs) ft_attr.max_fte = MLX5E_PROMISC_TABLE_SIZE; ft_attr.autogroup.max_num_groups = 1; ft_attr.level = MLX5E_PROMISC_FT_LEVEL; - ft_attr.prio = MLX5E_NIC_PRIO; + ft_attr.prio = MLX5E_PROMISC_PRIO; ft->t = mlx5_create_auto_grouped_flow_table(fs->ns, &ft_attr); if (IS_ERR(ft->t)) { diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c index 84b1ab8233b8..7462514c7f3d 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c @@ -1154,8 +1154,9 @@ static void mlx5e_lro_update_tcp_hdr(struct mlx5_cqe64 *cqe, struct tcphdr *tcp) } } -static void mlx5e_lro_update_hdr(struct sk_buff *skb, struct mlx5_cqe64 *cqe, - u32 cqe_bcnt) +static unsigned int mlx5e_lro_update_hdr(struct sk_buff *skb, + struct mlx5_cqe64 *cqe, + u32 cqe_bcnt) { struct ethhdr *eth = (struct ethhdr *)(skb->data); struct tcphdr *tcp; @@ -1205,6 +1206,8 @@ static void mlx5e_lro_update_hdr(struct sk_buff *skb, struct mlx5_cqe64 *cqe, tcp->check = tcp_v6_check(payload_len, &ipv6->saddr, &ipv6->daddr, check); } + + return (unsigned int)((unsigned char *)tcp + tcp->doff * 4 - skb->data); } static void *mlx5e_shampo_get_packet_hd(struct mlx5e_rq *rq, u16 header_index) @@ -1561,8 +1564,9 @@ static inline void mlx5e_build_rx_skb(struct mlx5_cqe64 *cqe, mlx5e_macsec_offload_handle_rx_skb(netdev, skb, cqe); if (lro_num_seg > 1) { - mlx5e_lro_update_hdr(skb, cqe, cqe_bcnt); - skb_shinfo(skb)->gso_size = DIV_ROUND_UP(cqe_bcnt, lro_num_seg); + unsigned int hdrlen = mlx5e_lro_update_hdr(skb, cqe, cqe_bcnt); + + skb_shinfo(skb)->gso_size = DIV_ROUND_UP(cqe_bcnt - hdrlen, lro_num_seg); /* Subtract one since we already counted this as one * "regular" packet in mlx5e_complete_rx_cqe() */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/qos.c b/drivers/net/ethernet/mellanox/mlx5/core/esw/qos.c index b6ae384396b3..ad9f6fca9b6a 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/esw/qos.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/qos.c @@ -1076,6 +1076,7 @@ static int esw_qos_vports_node_update_parent(struct mlx5_esw_sched_node *node, return err; } esw_qos_node_set_parent(node, parent); + node->bw_share = 0; return 0; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c index 0e3a977d5332..bee906661282 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c @@ -1182,19 +1182,19 @@ static void esw_set_peer_miss_rule_source_port(struct mlx5_eswitch *esw, static int esw_add_fdb_peer_miss_rules(struct mlx5_eswitch *esw, struct mlx5_core_dev *peer_dev) { + struct mlx5_eswitch *peer_esw = peer_dev->priv.eswitch; struct mlx5_flow_destination dest = {}; struct mlx5_flow_act flow_act = {0}; struct mlx5_flow_handle **flows; - /* total vports is the same for both e-switches */ - int nvports = esw->total_vports; struct mlx5_flow_handle *flow; + struct mlx5_vport *peer_vport; struct mlx5_flow_spec *spec; - struct mlx5_vport *vport; int err, pfindex; unsigned long i; void *misc; - if (!MLX5_VPORT_MANAGER(esw->dev) && !mlx5_core_is_ecpf_esw_manager(esw->dev)) + if (!MLX5_VPORT_MANAGER(peer_dev) && + !mlx5_core_is_ecpf_esw_manager(peer_dev)) return 0; spec = kvzalloc(sizeof(*spec), GFP_KERNEL); @@ -1203,7 +1203,7 @@ static int esw_add_fdb_peer_miss_rules(struct mlx5_eswitch *esw, peer_miss_rules_setup(esw, peer_dev, spec, &dest); - flows = kvcalloc(nvports, sizeof(*flows), GFP_KERNEL); + flows = kvcalloc(peer_esw->total_vports, sizeof(*flows), GFP_KERNEL); if (!flows) { err = -ENOMEM; goto alloc_flows_err; @@ -1213,10 +1213,10 @@ static int esw_add_fdb_peer_miss_rules(struct mlx5_eswitch *esw, misc = MLX5_ADDR_OF(fte_match_param, spec->match_value, misc_parameters); - if (mlx5_core_is_ecpf_esw_manager(esw->dev)) { - vport = mlx5_eswitch_get_vport(esw, MLX5_VPORT_PF); - esw_set_peer_miss_rule_source_port(esw, peer_dev->priv.eswitch, - spec, MLX5_VPORT_PF); + if (mlx5_core_is_ecpf_esw_manager(peer_dev)) { + peer_vport = mlx5_eswitch_get_vport(peer_esw, MLX5_VPORT_PF); + esw_set_peer_miss_rule_source_port(esw, peer_esw, spec, + MLX5_VPORT_PF); flow = mlx5_add_flow_rules(mlx5_eswitch_get_slow_fdb(esw), spec, &flow_act, &dest, 1); @@ -1224,11 +1224,11 @@ static int esw_add_fdb_peer_miss_rules(struct mlx5_eswitch *esw, err = PTR_ERR(flow); goto add_pf_flow_err; } - flows[vport->index] = flow; + flows[peer_vport->index] = flow; } - if (mlx5_ecpf_vport_exists(esw->dev)) { - vport = mlx5_eswitch_get_vport(esw, MLX5_VPORT_ECPF); + if (mlx5_ecpf_vport_exists(peer_dev)) { + peer_vport = mlx5_eswitch_get_vport(peer_esw, MLX5_VPORT_ECPF); MLX5_SET(fte_match_set_misc, misc, source_port, MLX5_VPORT_ECPF); flow = mlx5_add_flow_rules(mlx5_eswitch_get_slow_fdb(esw), spec, &flow_act, &dest, 1); @@ -1236,13 +1236,14 @@ static int esw_add_fdb_peer_miss_rules(struct mlx5_eswitch *esw, err = PTR_ERR(flow); goto add_ecpf_flow_err; } - flows[vport->index] = flow; + flows[peer_vport->index] = flow; } - mlx5_esw_for_each_vf_vport(esw, i, vport, mlx5_core_max_vfs(esw->dev)) { + mlx5_esw_for_each_vf_vport(peer_esw, i, peer_vport, + mlx5_core_max_vfs(peer_dev)) { esw_set_peer_miss_rule_source_port(esw, - peer_dev->priv.eswitch, - spec, vport->vport); + peer_esw, + spec, peer_vport->vport); flow = mlx5_add_flow_rules(mlx5_eswitch_get_slow_fdb(esw), spec, &flow_act, &dest, 1); @@ -1250,22 +1251,22 @@ static int esw_add_fdb_peer_miss_rules(struct mlx5_eswitch *esw, err = PTR_ERR(flow); goto add_vf_flow_err; } - flows[vport->index] = flow; + flows[peer_vport->index] = flow; } - if (mlx5_core_ec_sriov_enabled(esw->dev)) { - mlx5_esw_for_each_ec_vf_vport(esw, i, vport, mlx5_core_max_ec_vfs(esw->dev)) { - if (i >= mlx5_core_max_ec_vfs(peer_dev)) - break; - esw_set_peer_miss_rule_source_port(esw, peer_dev->priv.eswitch, - spec, vport->vport); + if (mlx5_core_ec_sriov_enabled(peer_dev)) { + mlx5_esw_for_each_ec_vf_vport(peer_esw, i, peer_vport, + mlx5_core_max_ec_vfs(peer_dev)) { + esw_set_peer_miss_rule_source_port(esw, peer_esw, + spec, + peer_vport->vport); flow = mlx5_add_flow_rules(esw->fdb_table.offloads.slow_fdb, spec, &flow_act, &dest, 1); if (IS_ERR(flow)) { err = PTR_ERR(flow); goto add_ec_vf_flow_err; } - flows[vport->index] = flow; + flows[peer_vport->index] = flow; } } @@ -1282,25 +1283,27 @@ static int esw_add_fdb_peer_miss_rules(struct mlx5_eswitch *esw, return 0; add_ec_vf_flow_err: - mlx5_esw_for_each_ec_vf_vport(esw, i, vport, mlx5_core_max_ec_vfs(esw->dev)) { - if (!flows[vport->index]) + mlx5_esw_for_each_ec_vf_vport(peer_esw, i, peer_vport, + mlx5_core_max_ec_vfs(peer_dev)) { + if (!flows[peer_vport->index]) continue; - mlx5_del_flow_rules(flows[vport->index]); + mlx5_del_flow_rules(flows[peer_vport->index]); } add_vf_flow_err: - mlx5_esw_for_each_vf_vport(esw, i, vport, mlx5_core_max_vfs(esw->dev)) { - if (!flows[vport->index]) + mlx5_esw_for_each_vf_vport(peer_esw, i, peer_vport, + mlx5_core_max_vfs(peer_dev)) { + if (!flows[peer_vport->index]) continue; - mlx5_del_flow_rules(flows[vport->index]); + mlx5_del_flow_rules(flows[peer_vport->index]); } - if (mlx5_ecpf_vport_exists(esw->dev)) { - vport = mlx5_eswitch_get_vport(esw, MLX5_VPORT_ECPF); - mlx5_del_flow_rules(flows[vport->index]); + if (mlx5_ecpf_vport_exists(peer_dev)) { + peer_vport = mlx5_eswitch_get_vport(peer_esw, MLX5_VPORT_ECPF); + mlx5_del_flow_rules(flows[peer_vport->index]); } add_ecpf_flow_err: - if (mlx5_core_is_ecpf_esw_manager(esw->dev)) { - vport = mlx5_eswitch_get_vport(esw, MLX5_VPORT_PF); - mlx5_del_flow_rules(flows[vport->index]); + if (mlx5_core_is_ecpf_esw_manager(peer_dev)) { + peer_vport = mlx5_eswitch_get_vport(peer_esw, MLX5_VPORT_PF); + mlx5_del_flow_rules(flows[peer_vport->index]); } add_pf_flow_err: esw_warn(esw->dev, "FDB: Failed to add peer miss flow rule err %d\n", err); @@ -1313,37 +1316,34 @@ alloc_flows_err: static void esw_del_fdb_peer_miss_rules(struct mlx5_eswitch *esw, struct mlx5_core_dev *peer_dev) { + struct mlx5_eswitch *peer_esw = peer_dev->priv.eswitch; u16 peer_index = mlx5_get_dev_index(peer_dev); struct mlx5_flow_handle **flows; - struct mlx5_vport *vport; + struct mlx5_vport *peer_vport; unsigned long i; flows = esw->fdb_table.offloads.peer_miss_rules[peer_index]; if (!flows) return; - if (mlx5_core_ec_sriov_enabled(esw->dev)) { - mlx5_esw_for_each_ec_vf_vport(esw, i, vport, mlx5_core_max_ec_vfs(esw->dev)) { - /* The flow for a particular vport could be NULL if the other ECPF - * has fewer or no VFs enabled - */ - if (!flows[vport->index]) - continue; - mlx5_del_flow_rules(flows[vport->index]); - } + if (mlx5_core_ec_sriov_enabled(peer_dev)) { + mlx5_esw_for_each_ec_vf_vport(peer_esw, i, peer_vport, + mlx5_core_max_ec_vfs(peer_dev)) + mlx5_del_flow_rules(flows[peer_vport->index]); } - mlx5_esw_for_each_vf_vport(esw, i, vport, mlx5_core_max_vfs(esw->dev)) - mlx5_del_flow_rules(flows[vport->index]); + mlx5_esw_for_each_vf_vport(peer_esw, i, peer_vport, + mlx5_core_max_vfs(peer_dev)) + mlx5_del_flow_rules(flows[peer_vport->index]); - if (mlx5_ecpf_vport_exists(esw->dev)) { - vport = mlx5_eswitch_get_vport(esw, MLX5_VPORT_ECPF); - mlx5_del_flow_rules(flows[vport->index]); + if (mlx5_ecpf_vport_exists(peer_dev)) { + peer_vport = mlx5_eswitch_get_vport(peer_esw, MLX5_VPORT_ECPF); + mlx5_del_flow_rules(flows[peer_vport->index]); } - if (mlx5_core_is_ecpf_esw_manager(esw->dev)) { - vport = mlx5_eswitch_get_vport(esw, MLX5_VPORT_PF); - mlx5_del_flow_rules(flows[vport->index]); + if (mlx5_core_is_ecpf_esw_manager(peer_dev)) { + peer_vport = mlx5_eswitch_get_vport(peer_esw, MLX5_VPORT_PF); + mlx5_del_flow_rules(flows[peer_vport->index]); } kvfree(flows); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c index a8046200d376..3dd9a6f40709 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c @@ -113,13 +113,16 @@ #define ETHTOOL_PRIO_NUM_LEVELS 1 #define ETHTOOL_NUM_PRIOS 11 #define ETHTOOL_MIN_LEVEL (KERNEL_MIN_LEVEL + ETHTOOL_NUM_PRIOS) -/* Promiscuous, Vlan, mac, ttc, inner ttc, {UDP/ANY/aRFS/accel/{esp, esp_err}}, IPsec policy, +/* Vlan, mac, ttc, inner ttc, {UDP/ANY/aRFS/accel/{esp, esp_err}}, IPsec policy, * {IPsec RoCE MPV,Alias table},IPsec RoCE policy */ -#define KERNEL_NIC_PRIO_NUM_LEVELS 11 +#define KERNEL_NIC_PRIO_NUM_LEVELS 10 #define KERNEL_NIC_NUM_PRIOS 1 -/* One more level for tc */ -#define KERNEL_MIN_LEVEL (KERNEL_NIC_PRIO_NUM_LEVELS + 1) +/* One more level for tc, and one more for promisc */ +#define KERNEL_MIN_LEVEL (KERNEL_NIC_PRIO_NUM_LEVELS + 2) + +#define KERNEL_NIC_PROMISC_NUM_PRIOS 1 +#define KERNEL_NIC_PROMISC_NUM_LEVELS 1 #define KERNEL_NIC_TC_NUM_PRIOS 1 #define KERNEL_NIC_TC_NUM_LEVELS 3 @@ -187,6 +190,8 @@ static struct init_tree_node { ADD_NS(MLX5_FLOW_TABLE_MISS_ACTION_DEF, ADD_MULTIPLE_PRIO(KERNEL_NIC_TC_NUM_PRIOS, KERNEL_NIC_TC_NUM_LEVELS), + ADD_MULTIPLE_PRIO(KERNEL_NIC_PROMISC_NUM_PRIOS, + KERNEL_NIC_PROMISC_NUM_LEVELS), ADD_MULTIPLE_PRIO(KERNEL_NIC_NUM_PRIOS, KERNEL_NIC_PRIO_NUM_LEVELS))), ADD_PRIO(0, BY_PASS_MIN_LEVEL, 0, FS_CHAINING_CAPS, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c index 41e8660c819c..9c1504d29d34 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c @@ -2257,6 +2257,7 @@ static const struct pci_device_id mlx5_core_pci_table[] = { { PCI_VDEVICE(MELLANOX, 0x1021) }, /* ConnectX-7 */ { PCI_VDEVICE(MELLANOX, 0x1023) }, /* ConnectX-8 */ { PCI_VDEVICE(MELLANOX, 0x1025) }, /* ConnectX-9 */ + { PCI_VDEVICE(MELLANOX, 0x1027) }, /* ConnectX-10 */ { PCI_VDEVICE(MELLANOX, 0xa2d2) }, /* BlueField integrated ConnectX-5 network controller */ { PCI_VDEVICE(MELLANOX, 0xa2d3), MLX5_PCI_DEV_IS_VF}, /* BlueField integrated ConnectX-5 network controller VF */ { PCI_VDEVICE(MELLANOX, 0xa2d6) }, /* BlueField-2 integrated ConnectX-6 Dx network controller */ diff --git a/drivers/net/ethernet/microsoft/mana/gdma_main.c b/drivers/net/ethernet/microsoft/mana/gdma_main.c index 52cf7112762c..58f8ee710912 100644 --- a/drivers/net/ethernet/microsoft/mana/gdma_main.c +++ b/drivers/net/ethernet/microsoft/mana/gdma_main.c @@ -6,6 +6,7 @@ #include <linux/pci.h> #include <linux/utsname.h> #include <linux/version.h> +#include <linux/export.h> #include <net/mana/mana.h> diff --git a/drivers/net/ethernet/microsoft/mana/mana_en.c b/drivers/net/ethernet/microsoft/mana/mana_en.c index ccd2885c939e..faad1cb880f8 100644 --- a/drivers/net/ethernet/microsoft/mana/mana_en.c +++ b/drivers/net/ethernet/microsoft/mana/mana_en.c @@ -10,6 +10,7 @@ #include <linux/filter.h> #include <linux/mm.h> #include <linux/pci.h> +#include <linux/export.h> #include <net/checksum.h> #include <net/ip6_checksum.h> diff --git a/drivers/net/ethernet/renesas/rtsn.c b/drivers/net/ethernet/renesas/rtsn.c index 6b3f7fca8d15..05c4b6c8c9c3 100644 --- a/drivers/net/ethernet/renesas/rtsn.c +++ b/drivers/net/ethernet/renesas/rtsn.c @@ -1259,7 +1259,12 @@ static int rtsn_probe(struct platform_device *pdev) priv = netdev_priv(ndev); priv->pdev = pdev; priv->ndev = ndev; + priv->ptp_priv = rcar_gen4_ptp_alloc(pdev); + if (!priv->ptp_priv) { + ret = -ENOMEM; + goto error_free; + } spin_lock_init(&priv->lock); platform_set_drvdata(pdev, priv); diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-intel.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-intel.c index 9a47015254bb..ea33ae39be6b 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-intel.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-intel.c @@ -433,6 +433,12 @@ static int intel_crosststamp(ktime_t *device, return -ETIMEDOUT; } + *system = (struct system_counterval_t) { + .cycles = 0, + .cs_id = CSID_X86_ART, + .use_nsecs = false, + }; + num_snapshot = (readl(ioaddr + GMAC_TIMESTAMP_STATUS) & GMAC_TIMESTAMP_ATSNS_MASK) >> GMAC_TIMESTAMP_ATSNS_SHIFT; @@ -448,7 +454,7 @@ static int intel_crosststamp(ktime_t *device, } system->cycles *= intel_priv->crossts_adj; - system->cs_id = CSID_X86_ART; + priv->plat->flags &= ~STMMAC_FLAG_INT_SNAPSHOT_EN; return 0; diff --git a/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_dma.c b/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_dma.c index 7840bc403788..5dcc95bc0ad2 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_dma.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_dma.c @@ -364,19 +364,17 @@ static int dwxgmac2_dma_interrupt(struct stmmac_priv *priv, } /* TX/RX NORMAL interrupts */ - if (likely(intr_status & XGMAC_NIS)) { - if (likely(intr_status & XGMAC_RI)) { - u64_stats_update_begin(&stats->syncp); - u64_stats_inc(&stats->rx_normal_irq_n[chan]); - u64_stats_update_end(&stats->syncp); - ret |= handle_rx; - } - if (likely(intr_status & (XGMAC_TI | XGMAC_TBU))) { - u64_stats_update_begin(&stats->syncp); - u64_stats_inc(&stats->tx_normal_irq_n[chan]); - u64_stats_update_end(&stats->syncp); - ret |= handle_tx; - } + if (likely(intr_status & XGMAC_RI)) { + u64_stats_update_begin(&stats->syncp); + u64_stats_inc(&stats->rx_normal_irq_n[chan]); + u64_stats_update_end(&stats->syncp); + ret |= handle_rx; + } + if (likely(intr_status & (XGMAC_TI | XGMAC_TBU))) { + u64_stats_update_begin(&stats->syncp); + u64_stats_inc(&stats->tx_normal_irq_n[chan]); + u64_stats_update_end(&stats->syncp); + ret |= handle_tx; } /* Clear interrupts */ diff --git a/drivers/net/ethernet/ti/am65-cpsw-nuss.c b/drivers/net/ethernet/ti/am65-cpsw-nuss.c index f20d1ff192ef..231ca141331f 100644 --- a/drivers/net/ethernet/ti/am65-cpsw-nuss.c +++ b/drivers/net/ethernet/ti/am65-cpsw-nuss.c @@ -856,8 +856,6 @@ static struct sk_buff *am65_cpsw_build_skb(void *page_addr, { struct sk_buff *skb; - len += AM65_CPSW_HEADROOM; - skb = build_skb(page_addr, len); if (unlikely(!skb)) return NULL; @@ -1344,7 +1342,7 @@ static int am65_cpsw_nuss_rx_packets(struct am65_cpsw_rx_flow *flow, } skb = am65_cpsw_build_skb(page_addr, ndev, - AM65_CPSW_MAX_PACKET_SIZE, headroom); + PAGE_SIZE, headroom); if (unlikely(!skb)) { new_page = page; goto requeue; diff --git a/drivers/net/ethernet/ti/icssg/icssg_config.c b/drivers/net/ethernet/ti/icssg/icssg_config.c index ddfd1c02a885..da53eb04b0a4 100644 --- a/drivers/net/ethernet/ti/icssg/icssg_config.c +++ b/drivers/net/ethernet/ti/icssg/icssg_config.c @@ -288,8 +288,12 @@ static int prueth_fw_offload_buffer_setup(struct prueth_emac *emac) int i; addr = lower_32_bits(prueth->msmcram.pa); - if (slice) - addr += PRUETH_NUM_BUF_POOLS * PRUETH_EMAC_BUF_POOL_SIZE; + if (slice) { + if (prueth->pdata.banked_ms_ram) + addr += MSMC_RAM_BANK_SIZE; + else + addr += PRUETH_SW_TOTAL_BUF_SIZE_PER_SLICE; + } if (addr % SZ_64K) { dev_warn(prueth->dev, "buffer pool needs to be 64KB aligned\n"); @@ -297,43 +301,66 @@ static int prueth_fw_offload_buffer_setup(struct prueth_emac *emac) } bpool_cfg = emac->dram.va + BUFFER_POOL_0_ADDR_OFFSET; - /* workaround for f/w bug. bpool 0 needs to be initialized */ - for (i = 0; i < PRUETH_NUM_BUF_POOLS; i++) { + + /* Configure buffer pools for forwarding buffers + * - used by firmware to store packets to be forwarded to other port + * - 8 total pools per slice + */ + for (i = 0; i < PRUETH_NUM_FWD_BUF_POOLS_PER_SLICE; i++) { writel(addr, &bpool_cfg[i].addr); - writel(PRUETH_EMAC_BUF_POOL_SIZE, &bpool_cfg[i].len); - addr += PRUETH_EMAC_BUF_POOL_SIZE; + writel(PRUETH_SW_FWD_BUF_POOL_SIZE, &bpool_cfg[i].len); + addr += PRUETH_SW_FWD_BUF_POOL_SIZE; } - if (!slice) - addr += PRUETH_NUM_BUF_POOLS * PRUETH_EMAC_BUF_POOL_SIZE; - else - addr += PRUETH_SW_NUM_BUF_POOLS_HOST * PRUETH_SW_BUF_POOL_SIZE_HOST; - - for (i = PRUETH_NUM_BUF_POOLS; - i < 2 * PRUETH_SW_NUM_BUF_POOLS_HOST + PRUETH_NUM_BUF_POOLS; - i++) { - /* The driver only uses first 4 queues per PRU so only initialize them */ - if (i % PRUETH_SW_NUM_BUF_POOLS_HOST < PRUETH_SW_NUM_BUF_POOLS_PER_PRU) { - writel(addr, &bpool_cfg[i].addr); - writel(PRUETH_SW_BUF_POOL_SIZE_HOST, &bpool_cfg[i].len); - addr += PRUETH_SW_BUF_POOL_SIZE_HOST; + /* Configure buffer pools for Local Injection buffers + * - used by firmware to store packets received from host core + * - 16 total pools per slice + */ + for (i = 0; i < PRUETH_NUM_LI_BUF_POOLS_PER_SLICE; i++) { + int cfg_idx = i + PRUETH_NUM_FWD_BUF_POOLS_PER_SLICE; + + /* The driver only uses first 4 queues per PRU, + * so only initialize buffer for them + */ + if ((i % PRUETH_NUM_LI_BUF_POOLS_PER_PORT_PER_SLICE) + < PRUETH_SW_USED_LI_BUF_POOLS_PER_PORT_PER_SLICE) { + writel(addr, &bpool_cfg[cfg_idx].addr); + writel(PRUETH_SW_LI_BUF_POOL_SIZE, + &bpool_cfg[cfg_idx].len); + addr += PRUETH_SW_LI_BUF_POOL_SIZE; } else { - writel(0, &bpool_cfg[i].addr); - writel(0, &bpool_cfg[i].len); + writel(0, &bpool_cfg[cfg_idx].addr); + writel(0, &bpool_cfg[cfg_idx].len); } } - if (!slice) - addr += PRUETH_SW_NUM_BUF_POOLS_HOST * PRUETH_SW_BUF_POOL_SIZE_HOST; - else - addr += PRUETH_EMAC_RX_CTX_BUF_SIZE; + /* Express RX buffer queue + * - used by firmware to store express packets to be transmitted + * to the host core + */ + rxq_ctx = emac->dram.va + HOST_RX_Q_EXP_CONTEXT_OFFSET; + for (i = 0; i < 3; i++) + writel(addr, &rxq_ctx->start[i]); + + addr += PRUETH_SW_HOST_EXP_BUF_POOL_SIZE; + writel(addr, &rxq_ctx->end); + /* Pre-emptible RX buffer queue + * - used by firmware to store preemptible packets to be transmitted + * to the host core + */ rxq_ctx = emac->dram.va + HOST_RX_Q_PRE_CONTEXT_OFFSET; for (i = 0; i < 3; i++) writel(addr, &rxq_ctx->start[i]); - addr += PRUETH_EMAC_RX_CTX_BUF_SIZE; - writel(addr - SZ_2K, &rxq_ctx->end); + addr += PRUETH_SW_HOST_PRE_BUF_POOL_SIZE; + writel(addr, &rxq_ctx->end); + + /* Set pointer for default dropped packet write + * - used by firmware to temporarily store packet to be dropped + */ + rxq_ctx = emac->dram.va + DEFAULT_MSMC_Q_OFFSET; + writel(addr, &rxq_ctx->start[0]); return 0; } @@ -347,13 +374,13 @@ static int prueth_emac_buffer_setup(struct prueth_emac *emac) u32 addr; int i; - /* Layout to have 64KB aligned buffer pool - * |BPOOL0|BPOOL1|RX_CTX0|RX_CTX1| - */ - addr = lower_32_bits(prueth->msmcram.pa); - if (slice) - addr += PRUETH_NUM_BUF_POOLS * PRUETH_EMAC_BUF_POOL_SIZE; + if (slice) { + if (prueth->pdata.banked_ms_ram) + addr += MSMC_RAM_BANK_SIZE; + else + addr += PRUETH_EMAC_TOTAL_BUF_SIZE_PER_SLICE; + } if (addr % SZ_64K) { dev_warn(prueth->dev, "buffer pool needs to be 64KB aligned\n"); @@ -361,39 +388,66 @@ static int prueth_emac_buffer_setup(struct prueth_emac *emac) } bpool_cfg = emac->dram.va + BUFFER_POOL_0_ADDR_OFFSET; - /* workaround for f/w bug. bpool 0 needs to be initilalized */ - writel(addr, &bpool_cfg[0].addr); - writel(0, &bpool_cfg[0].len); - for (i = PRUETH_EMAC_BUF_POOL_START; - i < PRUETH_EMAC_BUF_POOL_START + PRUETH_NUM_BUF_POOLS; - i++) { - writel(addr, &bpool_cfg[i].addr); - writel(PRUETH_EMAC_BUF_POOL_SIZE, &bpool_cfg[i].len); - addr += PRUETH_EMAC_BUF_POOL_SIZE; + /* Configure buffer pools for forwarding buffers + * - in mac mode - no forwarding so initialize all pools to 0 + * - 8 total pools per slice + */ + for (i = 0; i < PRUETH_NUM_FWD_BUF_POOLS_PER_SLICE; i++) { + writel(0, &bpool_cfg[i].addr); + writel(0, &bpool_cfg[i].len); } - if (!slice) - addr += PRUETH_NUM_BUF_POOLS * PRUETH_EMAC_BUF_POOL_SIZE; - else - addr += PRUETH_EMAC_RX_CTX_BUF_SIZE * 2; + /* Configure buffer pools for Local Injection buffers + * - used by firmware to store packets received from host core + * - 16 total pools per slice + */ + bpool_cfg = emac->dram.va + BUFFER_POOL_0_ADDR_OFFSET; + for (i = 0; i < PRUETH_NUM_LI_BUF_POOLS_PER_SLICE; i++) { + int cfg_idx = i + PRUETH_NUM_FWD_BUF_POOLS_PER_SLICE; + + /* In EMAC mode, only first 4 buffers are used, + * as 1 slice needs to handle only 1 port + */ + if (i < PRUETH_EMAC_USED_LI_BUF_POOLS_PER_PORT_PER_SLICE) { + writel(addr, &bpool_cfg[cfg_idx].addr); + writel(PRUETH_EMAC_LI_BUF_POOL_SIZE, + &bpool_cfg[cfg_idx].len); + addr += PRUETH_EMAC_LI_BUF_POOL_SIZE; + } else { + writel(0, &bpool_cfg[cfg_idx].addr); + writel(0, &bpool_cfg[cfg_idx].len); + } + } - /* Pre-emptible RX buffer queue */ - rxq_ctx = emac->dram.va + HOST_RX_Q_PRE_CONTEXT_OFFSET; + /* Express RX buffer queue + * - used by firmware to store express packets to be transmitted + * to host core + */ + rxq_ctx = emac->dram.va + HOST_RX_Q_EXP_CONTEXT_OFFSET; for (i = 0; i < 3; i++) writel(addr, &rxq_ctx->start[i]); - addr += PRUETH_EMAC_RX_CTX_BUF_SIZE; + addr += PRUETH_EMAC_HOST_EXP_BUF_POOL_SIZE; writel(addr, &rxq_ctx->end); - /* Express RX buffer queue */ - rxq_ctx = emac->dram.va + HOST_RX_Q_EXP_CONTEXT_OFFSET; + /* Pre-emptible RX buffer queue + * - used by firmware to store preemptible packets to be transmitted + * to host core + */ + rxq_ctx = emac->dram.va + HOST_RX_Q_PRE_CONTEXT_OFFSET; for (i = 0; i < 3; i++) writel(addr, &rxq_ctx->start[i]); - addr += PRUETH_EMAC_RX_CTX_BUF_SIZE; + addr += PRUETH_EMAC_HOST_PRE_BUF_POOL_SIZE; writel(addr, &rxq_ctx->end); + /* Set pointer for default dropped packet write + * - used by firmware to temporarily store packet to be dropped + */ + rxq_ctx = emac->dram.va + DEFAULT_MSMC_Q_OFFSET; + writel(addr, &rxq_ctx->start[0]); + return 0; } diff --git a/drivers/net/ethernet/ti/icssg/icssg_config.h b/drivers/net/ethernet/ti/icssg/icssg_config.h index c884e9fa099e..60d69744ffae 100644 --- a/drivers/net/ethernet/ti/icssg/icssg_config.h +++ b/drivers/net/ethernet/ti/icssg/icssg_config.h @@ -26,21 +26,71 @@ struct icssg_flow_cfg { #define PRUETH_MAX_RX_FLOWS 1 /* excluding default flow */ #define PRUETH_RX_FLOW_DATA 0 -#define PRUETH_EMAC_BUF_POOL_SIZE SZ_8K -#define PRUETH_EMAC_POOLS_PER_SLICE 24 -#define PRUETH_EMAC_BUF_POOL_START 8 -#define PRUETH_NUM_BUF_POOLS 8 -#define PRUETH_EMAC_RX_CTX_BUF_SIZE SZ_16K /* per slice */ -#define MSMC_RAM_SIZE \ - (2 * (PRUETH_EMAC_BUF_POOL_SIZE * PRUETH_NUM_BUF_POOLS + \ - PRUETH_EMAC_RX_CTX_BUF_SIZE * 2)) - -#define PRUETH_SW_BUF_POOL_SIZE_HOST SZ_4K -#define PRUETH_SW_NUM_BUF_POOLS_HOST 8 -#define PRUETH_SW_NUM_BUF_POOLS_PER_PRU 4 -#define MSMC_RAM_SIZE_SWITCH_MODE \ - (MSMC_RAM_SIZE + \ - (2 * PRUETH_SW_BUF_POOL_SIZE_HOST * PRUETH_SW_NUM_BUF_POOLS_HOST)) +/* Defines for forwarding path buffer pools: + * - used by firmware to store packets to be forwarded to other port + * - 8 total pools per slice + * - only used in switch mode (as no forwarding in mac mode) + */ +#define PRUETH_NUM_FWD_BUF_POOLS_PER_SLICE 8 +#define PRUETH_SW_FWD_BUF_POOL_SIZE (SZ_8K) + +/* Defines for local injection path buffer pools: + * - used by firmware to store packets received from host core + * - 16 total pools per slice + * - 8 pools per port per slice and each slice handles both ports + * - only 4 out of 8 pools used per port (as only 4 real QoS levels in ICSSG) + * - switch mode: 8 total pools used + * - mac mode: 4 total pools used + */ +#define PRUETH_NUM_LI_BUF_POOLS_PER_SLICE 16 +#define PRUETH_NUM_LI_BUF_POOLS_PER_PORT_PER_SLICE 8 +#define PRUETH_SW_LI_BUF_POOL_SIZE SZ_4K +#define PRUETH_SW_USED_LI_BUF_POOLS_PER_SLICE 8 +#define PRUETH_SW_USED_LI_BUF_POOLS_PER_PORT_PER_SLICE 4 +#define PRUETH_EMAC_LI_BUF_POOL_SIZE SZ_8K +#define PRUETH_EMAC_USED_LI_BUF_POOLS_PER_SLICE 4 +#define PRUETH_EMAC_USED_LI_BUF_POOLS_PER_PORT_PER_SLICE 4 + +/* Defines for host egress path - express and preemptible buffers + * - used by firmware to store express and preemptible packets + * to be transmitted to host core + * - used by both mac/switch modes + */ +#define PRUETH_SW_HOST_EXP_BUF_POOL_SIZE SZ_16K +#define PRUETH_SW_HOST_PRE_BUF_POOL_SIZE (SZ_16K - SZ_2K) +#define PRUETH_EMAC_HOST_EXP_BUF_POOL_SIZE PRUETH_SW_HOST_EXP_BUF_POOL_SIZE +#define PRUETH_EMAC_HOST_PRE_BUF_POOL_SIZE PRUETH_SW_HOST_PRE_BUF_POOL_SIZE + +/* Buffer used by firmware to temporarily store packet to be dropped */ +#define PRUETH_SW_DROP_PKT_BUF_SIZE SZ_2K +#define PRUETH_EMAC_DROP_PKT_BUF_SIZE PRUETH_SW_DROP_PKT_BUF_SIZE + +/* Total switch mode memory usage for buffers per slice */ +#define PRUETH_SW_TOTAL_BUF_SIZE_PER_SLICE \ + (PRUETH_SW_FWD_BUF_POOL_SIZE * PRUETH_NUM_FWD_BUF_POOLS_PER_SLICE + \ + PRUETH_SW_LI_BUF_POOL_SIZE * PRUETH_SW_USED_LI_BUF_POOLS_PER_SLICE + \ + PRUETH_SW_HOST_EXP_BUF_POOL_SIZE + \ + PRUETH_SW_HOST_PRE_BUF_POOL_SIZE + \ + PRUETH_SW_DROP_PKT_BUF_SIZE) + +/* Total switch mode memory usage for all buffers */ +#define PRUETH_SW_TOTAL_BUF_SIZE \ + (2 * PRUETH_SW_TOTAL_BUF_SIZE_PER_SLICE) + +/* Total mac mode memory usage for buffers per slice */ +#define PRUETH_EMAC_TOTAL_BUF_SIZE_PER_SLICE \ + (PRUETH_EMAC_LI_BUF_POOL_SIZE * \ + PRUETH_EMAC_USED_LI_BUF_POOLS_PER_SLICE + \ + PRUETH_EMAC_HOST_EXP_BUF_POOL_SIZE + \ + PRUETH_EMAC_HOST_PRE_BUF_POOL_SIZE + \ + PRUETH_EMAC_DROP_PKT_BUF_SIZE) + +/* Total mac mode memory usage for all buffers */ +#define PRUETH_EMAC_TOTAL_BUF_SIZE \ + (2 * PRUETH_EMAC_TOTAL_BUF_SIZE_PER_SLICE) + +/* Size of 1 bank of MSMC/OC_SRAM memory */ +#define MSMC_RAM_BANK_SIZE SZ_256K #define PRUETH_SWITCH_FDB_MASK ((SIZE_OF_FDB / NUMBER_OF_FDB_BUCKET_ENTRIES) - 1) diff --git a/drivers/net/ethernet/ti/icssg/icssg_prueth.c b/drivers/net/ethernet/ti/icssg/icssg_prueth.c index 86fc1278127c..2f5c4335dec3 100644 --- a/drivers/net/ethernet/ti/icssg/icssg_prueth.c +++ b/drivers/net/ethernet/ti/icssg/icssg_prueth.c @@ -1764,10 +1764,15 @@ static int prueth_probe(struct platform_device *pdev) goto put_mem; } - msmc_ram_size = MSMC_RAM_SIZE; prueth->is_switchmode_supported = prueth->pdata.switch_mode; - if (prueth->is_switchmode_supported) - msmc_ram_size = MSMC_RAM_SIZE_SWITCH_MODE; + if (prueth->pdata.banked_ms_ram) { + /* Reserve 2 MSMC RAM banks for buffers to avoid arbitration */ + msmc_ram_size = (2 * MSMC_RAM_BANK_SIZE); + } else { + msmc_ram_size = PRUETH_EMAC_TOTAL_BUF_SIZE; + if (prueth->is_switchmode_supported) + msmc_ram_size = PRUETH_SW_TOTAL_BUF_SIZE; + } /* NOTE: FW bug needs buffer base to be 64KB aligned */ prueth->msmcram.va = @@ -1924,7 +1929,8 @@ put_iep0: free_pool: gen_pool_free(prueth->sram_pool, - (unsigned long)prueth->msmcram.va, msmc_ram_size); + (unsigned long)prueth->msmcram.va, + prueth->msmcram.size); put_mem: pruss_release_mem_region(prueth->pruss, &prueth->shram); @@ -1976,8 +1982,8 @@ static void prueth_remove(struct platform_device *pdev) icss_iep_put(prueth->iep0); gen_pool_free(prueth->sram_pool, - (unsigned long)prueth->msmcram.va, - MSMC_RAM_SIZE); + (unsigned long)prueth->msmcram.va, + prueth->msmcram.size); pruss_release_mem_region(prueth->pruss, &prueth->shram); @@ -1994,12 +2000,14 @@ static const struct prueth_pdata am654_icssg_pdata = { .fdqring_mode = K3_RINGACC_RING_MODE_MESSAGE, .quirk_10m_link_issue = 1, .switch_mode = 1, + .banked_ms_ram = 0, }; static const struct prueth_pdata am64x_icssg_pdata = { .fdqring_mode = K3_RINGACC_RING_MODE_RING, .quirk_10m_link_issue = 1, .switch_mode = 1, + .banked_ms_ram = 1, }; static const struct of_device_id prueth_dt_match[] = { diff --git a/drivers/net/ethernet/ti/icssg/icssg_prueth.h b/drivers/net/ethernet/ti/icssg/icssg_prueth.h index 23c465f1ce7f..3bb9fd8c3804 100644 --- a/drivers/net/ethernet/ti/icssg/icssg_prueth.h +++ b/drivers/net/ethernet/ti/icssg/icssg_prueth.h @@ -251,11 +251,13 @@ struct prueth_emac { * @fdqring_mode: Free desc queue mode * @quirk_10m_link_issue: 10M link detect errata * @switch_mode: switch firmware support + * @banked_ms_ram: banked memory support */ struct prueth_pdata { enum k3_ring_mode fdqring_mode; u32 quirk_10m_link_issue:1; u32 switch_mode:1; + u32 banked_ms_ram:1; }; struct icssg_firmwares { diff --git a/drivers/net/ethernet/ti/icssg/icssg_switch_map.h b/drivers/net/ethernet/ti/icssg/icssg_switch_map.h index 490a9cc06fb0..7e053b8af3ec 100644 --- a/drivers/net/ethernet/ti/icssg/icssg_switch_map.h +++ b/drivers/net/ethernet/ti/icssg/icssg_switch_map.h @@ -180,6 +180,9 @@ /* Used to notify the FW of the current link speed */ #define PORT_LINK_SPEED_OFFSET 0x00A8 +/* 2k memory pointer reserved for default writes by PRU0*/ +#define DEFAULT_MSMC_Q_OFFSET 0x00AC + /* TAS gate mask for windows list0 */ #define TAS_GATE_MASK_LIST0 0x0100 diff --git a/drivers/net/ethernet/wangxun/libwx/wx_hw.c b/drivers/net/ethernet/wangxun/libwx/wx_hw.c index 0f4be72116b8..f0823aa1ede6 100644 --- a/drivers/net/ethernet/wangxun/libwx/wx_hw.c +++ b/drivers/net/ethernet/wangxun/libwx/wx_hw.c @@ -1912,7 +1912,6 @@ static void wx_configure_rx_ring(struct wx *wx, struct wx_ring *ring) { u16 reg_idx = ring->reg_idx; - union wx_rx_desc *rx_desc; u64 rdba = ring->dma; u32 rxdctl; @@ -1942,9 +1941,9 @@ static void wx_configure_rx_ring(struct wx *wx, memset(ring->rx_buffer_info, 0, sizeof(struct wx_rx_buffer) * ring->count); - /* initialize Rx descriptor 0 */ - rx_desc = WX_RX_DESC(ring, 0); - rx_desc->wb.upper.length = 0; + /* reset ntu and ntc to place SW in sync with hardware */ + ring->next_to_clean = 0; + ring->next_to_use = 0; /* enable receive descriptor ring */ wr32m(wx, WX_PX_RR_CFG(reg_idx), @@ -2778,6 +2777,8 @@ void wx_update_stats(struct wx *wx) hwstats->fdirmiss += rd32(wx, WX_RDB_FDIR_MISS); } + /* qmprc is not cleared on read, manual reset it */ + hwstats->qmprc = 0; for (i = wx->num_vfs * wx->num_rx_queues_per_pool; i < wx->mac.max_rx_queues; i++) hwstats->qmprc += rd32(wx, WX_PX_MPRC(i)); diff --git a/drivers/net/ethernet/wangxun/libwx/wx_lib.c b/drivers/net/ethernet/wangxun/libwx/wx_lib.c index 55e252789db3..0213ad5a6ceb 100644 --- a/drivers/net/ethernet/wangxun/libwx/wx_lib.c +++ b/drivers/net/ethernet/wangxun/libwx/wx_lib.c @@ -174,10 +174,6 @@ static void wx_dma_sync_frag(struct wx_ring *rx_ring, skb_frag_off(frag), skb_frag_size(frag), DMA_FROM_DEVICE); - - /* If the page was released, just unmap it. */ - if (unlikely(WX_CB(skb)->page_released)) - page_pool_put_full_page(rx_ring->page_pool, rx_buffer->page, false); } static struct wx_rx_buffer *wx_get_rx_buffer(struct wx_ring *rx_ring, @@ -227,10 +223,6 @@ static void wx_put_rx_buffer(struct wx_ring *rx_ring, struct sk_buff *skb, int rx_buffer_pgcnt) { - if (!IS_ERR(skb) && WX_CB(skb)->dma == rx_buffer->dma) - /* the page has been released from the ring */ - WX_CB(skb)->page_released = true; - /* clear contents of rx_buffer */ rx_buffer->page = NULL; rx_buffer->skb = NULL; @@ -315,7 +307,7 @@ static bool wx_alloc_mapped_page(struct wx_ring *rx_ring, return false; dma = page_pool_get_dma_addr(page); - bi->page_dma = dma; + bi->dma = dma; bi->page = page; bi->page_offset = 0; @@ -352,7 +344,7 @@ void wx_alloc_rx_buffers(struct wx_ring *rx_ring, u16 cleaned_count) DMA_FROM_DEVICE); rx_desc->read.pkt_addr = - cpu_to_le64(bi->page_dma + bi->page_offset); + cpu_to_le64(bi->dma + bi->page_offset); rx_desc++; bi++; @@ -365,6 +357,8 @@ void wx_alloc_rx_buffers(struct wx_ring *rx_ring, u16 cleaned_count) /* clear the status bits for the next_to_use descriptor */ rx_desc->wb.upper.status_error = 0; + /* clear the length for the next_to_use descriptor */ + rx_desc->wb.upper.length = 0; cleaned_count--; } while (cleaned_count); @@ -2423,9 +2417,6 @@ static void wx_clean_rx_ring(struct wx_ring *rx_ring) if (rx_buffer->skb) { struct sk_buff *skb = rx_buffer->skb; - if (WX_CB(skb)->page_released) - page_pool_put_full_page(rx_ring->page_pool, rx_buffer->page, false); - dev_kfree_skb(skb); } @@ -2449,6 +2440,9 @@ static void wx_clean_rx_ring(struct wx_ring *rx_ring) } } + /* Zero out the descriptor ring */ + memset(rx_ring->desc, 0, rx_ring->size); + rx_ring->next_to_alloc = 0; rx_ring->next_to_clean = 0; rx_ring->next_to_use = 0; diff --git a/drivers/net/ethernet/wangxun/libwx/wx_type.h b/drivers/net/ethernet/wangxun/libwx/wx_type.h index c363379126c0..1a90fcede86b 100644 --- a/drivers/net/ethernet/wangxun/libwx/wx_type.h +++ b/drivers/net/ethernet/wangxun/libwx/wx_type.h @@ -909,7 +909,6 @@ enum wx_reset_type { struct wx_cb { dma_addr_t dma; u16 append_cnt; /* number of skb's appended */ - bool page_released; bool dma_released; }; @@ -998,7 +997,6 @@ struct wx_tx_buffer { struct wx_rx_buffer { struct sk_buff *skb; dma_addr_t dma; - dma_addr_t page_dma; struct page *page; unsigned int page_offset; }; diff --git a/drivers/net/ethernet/xilinx/ll_temac_main.c b/drivers/net/ethernet/xilinx/ll_temac_main.c index edb36ff07a0c..6f82203a414c 100644 --- a/drivers/net/ethernet/xilinx/ll_temac_main.c +++ b/drivers/net/ethernet/xilinx/ll_temac_main.c @@ -1309,7 +1309,7 @@ ll_temac_ethtools_set_ringparam(struct net_device *ndev, if (ering->rx_pending > RX_BD_NUM_MAX || ering->rx_mini_pending || ering->rx_jumbo_pending || - ering->rx_pending > TX_BD_NUM_MAX) + ering->tx_pending > TX_BD_NUM_MAX) return -EINVAL; if (netif_running(ndev)) diff --git a/drivers/net/ethernet/xilinx/xilinx_emaclite.c b/drivers/net/ethernet/xilinx/xilinx_emaclite.c index ecf47107146d..4719d40a63ba 100644 --- a/drivers/net/ethernet/xilinx/xilinx_emaclite.c +++ b/drivers/net/ethernet/xilinx/xilinx_emaclite.c @@ -286,7 +286,7 @@ static void xemaclite_aligned_read(u32 *src_ptr, u8 *dest_ptr, /* Read the remaining data */ for (; length > 0; length--) - *to_u8_ptr = *from_u8_ptr; + *to_u8_ptr++ = *from_u8_ptr++; } } diff --git a/drivers/net/hyperv/netvsc_drv.c b/drivers/net/hyperv/netvsc_drv.c index c41a025c66f0..8be9bce66a4e 100644 --- a/drivers/net/hyperv/netvsc_drv.c +++ b/drivers/net/hyperv/netvsc_drv.c @@ -2317,8 +2317,11 @@ static int netvsc_prepare_bonding(struct net_device *vf_netdev) if (!ndev) return NOTIFY_DONE; - /* set slave flag before open to prevent IPv6 addrconf */ + /* Set slave flag and no addrconf flag before open + * to prevent IPv6 addrconf. + */ vf_netdev->flags |= IFF_SLAVE; + vf_netdev->priv_flags |= IFF_NO_ADDRCONF; return NOTIFY_DONE; } diff --git a/drivers/net/ovpn/io.c b/drivers/net/ovpn/io.c index ebf1e849506b..3e9e7f8444b3 100644 --- a/drivers/net/ovpn/io.c +++ b/drivers/net/ovpn/io.c @@ -62,6 +62,13 @@ static void ovpn_netdev_write(struct ovpn_peer *peer, struct sk_buff *skb) unsigned int pkt_len; int ret; + /* + * GSO state from the transport layer is not valid for the tunnel/data + * path. Reset all GSO fields to prevent any further GSO processing + * from entering an inconsistent state. + */ + skb_gso_reset(skb); + /* we can't guarantee the packet wasn't corrupted before entering the * VPN, therefore we give other layers a chance to check that */ diff --git a/drivers/net/ovpn/netlink-gen.c b/drivers/net/ovpn/netlink-gen.c index 58e1a4342378..14298188c5f1 100644 --- a/drivers/net/ovpn/netlink-gen.c +++ b/drivers/net/ovpn/netlink-gen.c @@ -29,6 +29,22 @@ const struct nla_policy ovpn_keyconf_nl_policy[OVPN_A_KEYCONF_DECRYPT_DIR + 1] = [OVPN_A_KEYCONF_DECRYPT_DIR] = NLA_POLICY_NESTED(ovpn_keydir_nl_policy), }; +const struct nla_policy ovpn_keyconf_del_input_nl_policy[OVPN_A_KEYCONF_SLOT + 1] = { + [OVPN_A_KEYCONF_PEER_ID] = NLA_POLICY_FULL_RANGE(NLA_U32, &ovpn_a_keyconf_peer_id_range), + [OVPN_A_KEYCONF_SLOT] = NLA_POLICY_MAX(NLA_U32, 1), +}; + +const struct nla_policy ovpn_keyconf_get_nl_policy[OVPN_A_KEYCONF_CIPHER_ALG + 1] = { + [OVPN_A_KEYCONF_PEER_ID] = NLA_POLICY_FULL_RANGE(NLA_U32, &ovpn_a_keyconf_peer_id_range), + [OVPN_A_KEYCONF_SLOT] = NLA_POLICY_MAX(NLA_U32, 1), + [OVPN_A_KEYCONF_KEY_ID] = NLA_POLICY_MAX(NLA_U32, 7), + [OVPN_A_KEYCONF_CIPHER_ALG] = NLA_POLICY_MAX(NLA_U32, 2), +}; + +const struct nla_policy ovpn_keyconf_swap_input_nl_policy[OVPN_A_KEYCONF_PEER_ID + 1] = { + [OVPN_A_KEYCONF_PEER_ID] = NLA_POLICY_FULL_RANGE(NLA_U32, &ovpn_a_keyconf_peer_id_range), +}; + const struct nla_policy ovpn_keydir_nl_policy[OVPN_A_KEYDIR_NONCE_TAIL + 1] = { [OVPN_A_KEYDIR_CIPHER_KEY] = NLA_POLICY_MAX_LEN(256), [OVPN_A_KEYDIR_NONCE_TAIL] = NLA_POLICY_EXACT_LEN(OVPN_NONCE_TAIL_SIZE), @@ -60,16 +76,49 @@ const struct nla_policy ovpn_peer_nl_policy[OVPN_A_PEER_LINK_TX_PACKETS + 1] = { [OVPN_A_PEER_LINK_TX_PACKETS] = { .type = NLA_UINT, }, }; +const struct nla_policy ovpn_peer_del_input_nl_policy[OVPN_A_PEER_ID + 1] = { + [OVPN_A_PEER_ID] = NLA_POLICY_FULL_RANGE(NLA_U32, &ovpn_a_peer_id_range), +}; + +const struct nla_policy ovpn_peer_new_input_nl_policy[OVPN_A_PEER_KEEPALIVE_TIMEOUT + 1] = { + [OVPN_A_PEER_ID] = NLA_POLICY_FULL_RANGE(NLA_U32, &ovpn_a_peer_id_range), + [OVPN_A_PEER_REMOTE_IPV4] = { .type = NLA_BE32, }, + [OVPN_A_PEER_REMOTE_IPV6] = NLA_POLICY_EXACT_LEN(16), + [OVPN_A_PEER_REMOTE_IPV6_SCOPE_ID] = { .type = NLA_U32, }, + [OVPN_A_PEER_REMOTE_PORT] = NLA_POLICY_MIN(NLA_BE16, 1), + [OVPN_A_PEER_SOCKET] = { .type = NLA_U32, }, + [OVPN_A_PEER_VPN_IPV4] = { .type = NLA_BE32, }, + [OVPN_A_PEER_VPN_IPV6] = NLA_POLICY_EXACT_LEN(16), + [OVPN_A_PEER_LOCAL_IPV4] = { .type = NLA_BE32, }, + [OVPN_A_PEER_LOCAL_IPV6] = NLA_POLICY_EXACT_LEN(16), + [OVPN_A_PEER_KEEPALIVE_INTERVAL] = { .type = NLA_U32, }, + [OVPN_A_PEER_KEEPALIVE_TIMEOUT] = { .type = NLA_U32, }, +}; + +const struct nla_policy ovpn_peer_set_input_nl_policy[OVPN_A_PEER_KEEPALIVE_TIMEOUT + 1] = { + [OVPN_A_PEER_ID] = NLA_POLICY_FULL_RANGE(NLA_U32, &ovpn_a_peer_id_range), + [OVPN_A_PEER_REMOTE_IPV4] = { .type = NLA_BE32, }, + [OVPN_A_PEER_REMOTE_IPV6] = NLA_POLICY_EXACT_LEN(16), + [OVPN_A_PEER_REMOTE_IPV6_SCOPE_ID] = { .type = NLA_U32, }, + [OVPN_A_PEER_REMOTE_PORT] = NLA_POLICY_MIN(NLA_BE16, 1), + [OVPN_A_PEER_VPN_IPV4] = { .type = NLA_BE32, }, + [OVPN_A_PEER_VPN_IPV6] = NLA_POLICY_EXACT_LEN(16), + [OVPN_A_PEER_LOCAL_IPV4] = { .type = NLA_BE32, }, + [OVPN_A_PEER_LOCAL_IPV6] = NLA_POLICY_EXACT_LEN(16), + [OVPN_A_PEER_KEEPALIVE_INTERVAL] = { .type = NLA_U32, }, + [OVPN_A_PEER_KEEPALIVE_TIMEOUT] = { .type = NLA_U32, }, +}; + /* OVPN_CMD_PEER_NEW - do */ static const struct nla_policy ovpn_peer_new_nl_policy[OVPN_A_PEER + 1] = { [OVPN_A_IFINDEX] = { .type = NLA_U32, }, - [OVPN_A_PEER] = NLA_POLICY_NESTED(ovpn_peer_nl_policy), + [OVPN_A_PEER] = NLA_POLICY_NESTED(ovpn_peer_new_input_nl_policy), }; /* OVPN_CMD_PEER_SET - do */ static const struct nla_policy ovpn_peer_set_nl_policy[OVPN_A_PEER + 1] = { [OVPN_A_IFINDEX] = { .type = NLA_U32, }, - [OVPN_A_PEER] = NLA_POLICY_NESTED(ovpn_peer_nl_policy), + [OVPN_A_PEER] = NLA_POLICY_NESTED(ovpn_peer_set_input_nl_policy), }; /* OVPN_CMD_PEER_GET - do */ @@ -86,7 +135,7 @@ static const struct nla_policy ovpn_peer_get_dump_nl_policy[OVPN_A_IFINDEX + 1] /* OVPN_CMD_PEER_DEL - do */ static const struct nla_policy ovpn_peer_del_nl_policy[OVPN_A_PEER + 1] = { [OVPN_A_IFINDEX] = { .type = NLA_U32, }, - [OVPN_A_PEER] = NLA_POLICY_NESTED(ovpn_peer_nl_policy), + [OVPN_A_PEER] = NLA_POLICY_NESTED(ovpn_peer_del_input_nl_policy), }; /* OVPN_CMD_KEY_NEW - do */ @@ -98,19 +147,19 @@ static const struct nla_policy ovpn_key_new_nl_policy[OVPN_A_KEYCONF + 1] = { /* OVPN_CMD_KEY_GET - do */ static const struct nla_policy ovpn_key_get_nl_policy[OVPN_A_KEYCONF + 1] = { [OVPN_A_IFINDEX] = { .type = NLA_U32, }, - [OVPN_A_KEYCONF] = NLA_POLICY_NESTED(ovpn_keyconf_nl_policy), + [OVPN_A_KEYCONF] = NLA_POLICY_NESTED(ovpn_keyconf_get_nl_policy), }; /* OVPN_CMD_KEY_SWAP - do */ static const struct nla_policy ovpn_key_swap_nl_policy[OVPN_A_KEYCONF + 1] = { [OVPN_A_IFINDEX] = { .type = NLA_U32, }, - [OVPN_A_KEYCONF] = NLA_POLICY_NESTED(ovpn_keyconf_nl_policy), + [OVPN_A_KEYCONF] = NLA_POLICY_NESTED(ovpn_keyconf_swap_input_nl_policy), }; /* OVPN_CMD_KEY_DEL - do */ static const struct nla_policy ovpn_key_del_nl_policy[OVPN_A_KEYCONF + 1] = { [OVPN_A_IFINDEX] = { .type = NLA_U32, }, - [OVPN_A_KEYCONF] = NLA_POLICY_NESTED(ovpn_keyconf_nl_policy), + [OVPN_A_KEYCONF] = NLA_POLICY_NESTED(ovpn_keyconf_del_input_nl_policy), }; /* Ops table for ovpn */ diff --git a/drivers/net/ovpn/netlink-gen.h b/drivers/net/ovpn/netlink-gen.h index 66a4e4a0a055..220b5b2fdd4f 100644 --- a/drivers/net/ovpn/netlink-gen.h +++ b/drivers/net/ovpn/netlink-gen.h @@ -13,8 +13,14 @@ /* Common nested types */ extern const struct nla_policy ovpn_keyconf_nl_policy[OVPN_A_KEYCONF_DECRYPT_DIR + 1]; +extern const struct nla_policy ovpn_keyconf_del_input_nl_policy[OVPN_A_KEYCONF_SLOT + 1]; +extern const struct nla_policy ovpn_keyconf_get_nl_policy[OVPN_A_KEYCONF_CIPHER_ALG + 1]; +extern const struct nla_policy ovpn_keyconf_swap_input_nl_policy[OVPN_A_KEYCONF_PEER_ID + 1]; extern const struct nla_policy ovpn_keydir_nl_policy[OVPN_A_KEYDIR_NONCE_TAIL + 1]; extern const struct nla_policy ovpn_peer_nl_policy[OVPN_A_PEER_LINK_TX_PACKETS + 1]; +extern const struct nla_policy ovpn_peer_del_input_nl_policy[OVPN_A_PEER_ID + 1]; +extern const struct nla_policy ovpn_peer_new_input_nl_policy[OVPN_A_PEER_KEEPALIVE_TIMEOUT + 1]; +extern const struct nla_policy ovpn_peer_set_input_nl_policy[OVPN_A_PEER_KEEPALIVE_TIMEOUT + 1]; int ovpn_nl_pre_doit(const struct genl_split_ops *ops, struct sk_buff *skb, struct genl_info *info); diff --git a/drivers/net/ovpn/netlink.c b/drivers/net/ovpn/netlink.c index a4ec53def46e..c7f382437630 100644 --- a/drivers/net/ovpn/netlink.c +++ b/drivers/net/ovpn/netlink.c @@ -352,7 +352,7 @@ int ovpn_nl_peer_new_doit(struct sk_buff *skb, struct genl_info *info) return -EINVAL; ret = nla_parse_nested(attrs, OVPN_A_PEER_MAX, info->attrs[OVPN_A_PEER], - ovpn_peer_nl_policy, info->extack); + ovpn_peer_new_input_nl_policy, info->extack); if (ret) return ret; @@ -476,7 +476,7 @@ int ovpn_nl_peer_set_doit(struct sk_buff *skb, struct genl_info *info) return -EINVAL; ret = nla_parse_nested(attrs, OVPN_A_PEER_MAX, info->attrs[OVPN_A_PEER], - ovpn_peer_nl_policy, info->extack); + ovpn_peer_set_input_nl_policy, info->extack); if (ret) return ret; @@ -654,7 +654,7 @@ int ovpn_nl_peer_get_doit(struct sk_buff *skb, struct genl_info *info) struct ovpn_peer *peer; struct sk_buff *msg; u32 peer_id; - int ret; + int ret, i; if (GENL_REQ_ATTR_CHECK(info, OVPN_A_PEER)) return -EINVAL; @@ -668,6 +668,23 @@ int ovpn_nl_peer_get_doit(struct sk_buff *skb, struct genl_info *info) OVPN_A_PEER_ID)) return -EINVAL; + /* OVPN_CMD_PEER_GET expects only the PEER_ID, therefore + * ensure that the user hasn't specified any other attribute. + * + * Unfortunately this check cannot be performed via netlink + * spec/policy and must be open-coded. + */ + for (i = 0; i < OVPN_A_PEER_MAX + 1; i++) { + if (i == OVPN_A_PEER_ID) + continue; + + if (attrs[i]) { + NL_SET_ERR_MSG_FMT_MOD(info->extack, + "unexpected attribute %u", i); + return -EINVAL; + } + } + peer_id = nla_get_u32(attrs[OVPN_A_PEER_ID]); peer = ovpn_peer_get_by_id(ovpn, peer_id); if (!peer) { @@ -768,7 +785,7 @@ int ovpn_nl_peer_del_doit(struct sk_buff *skb, struct genl_info *info) return -EINVAL; ret = nla_parse_nested(attrs, OVPN_A_PEER_MAX, info->attrs[OVPN_A_PEER], - ovpn_peer_nl_policy, info->extack); + ovpn_peer_del_input_nl_policy, info->extack); if (ret) return ret; @@ -969,14 +986,14 @@ int ovpn_nl_key_get_doit(struct sk_buff *skb, struct genl_info *info) struct ovpn_peer *peer; struct sk_buff *msg; u32 peer_id; - int ret; + int ret, i; if (GENL_REQ_ATTR_CHECK(info, OVPN_A_KEYCONF)) return -EINVAL; ret = nla_parse_nested(attrs, OVPN_A_KEYCONF_MAX, info->attrs[OVPN_A_KEYCONF], - ovpn_keyconf_nl_policy, info->extack); + ovpn_keyconf_get_nl_policy, info->extack); if (ret) return ret; @@ -988,6 +1005,24 @@ int ovpn_nl_key_get_doit(struct sk_buff *skb, struct genl_info *info) OVPN_A_KEYCONF_SLOT)) return -EINVAL; + /* OVPN_CMD_KEY_GET expects only the PEER_ID and the SLOT, therefore + * ensure that the user hasn't specified any other attribute. + * + * Unfortunately this check cannot be performed via netlink + * spec/policy and must be open-coded. + */ + for (i = 0; i < OVPN_A_KEYCONF_MAX + 1; i++) { + if (i == OVPN_A_KEYCONF_PEER_ID || + i == OVPN_A_KEYCONF_SLOT) + continue; + + if (attrs[i]) { + NL_SET_ERR_MSG_FMT_MOD(info->extack, + "unexpected attribute %u", i); + return -EINVAL; + } + } + peer_id = nla_get_u32(attrs[OVPN_A_KEYCONF_PEER_ID]); peer = ovpn_peer_get_by_id(ovpn, peer_id); if (!peer) { @@ -1037,7 +1072,7 @@ int ovpn_nl_key_swap_doit(struct sk_buff *skb, struct genl_info *info) ret = nla_parse_nested(attrs, OVPN_A_KEYCONF_MAX, info->attrs[OVPN_A_KEYCONF], - ovpn_keyconf_nl_policy, info->extack); + ovpn_keyconf_swap_input_nl_policy, info->extack); if (ret) return ret; @@ -1074,7 +1109,7 @@ int ovpn_nl_key_del_doit(struct sk_buff *skb, struct genl_info *info) ret = nla_parse_nested(attrs, OVPN_A_KEYCONF_MAX, info->attrs[OVPN_A_KEYCONF], - ovpn_keyconf_nl_policy, info->extack); + ovpn_keyconf_del_input_nl_policy, info->extack); if (ret) return ret; diff --git a/drivers/net/ovpn/udp.c b/drivers/net/ovpn/udp.c index bff00946eae2..60435a21f29c 100644 --- a/drivers/net/ovpn/udp.c +++ b/drivers/net/ovpn/udp.c @@ -344,6 +344,7 @@ void ovpn_udp_send_skb(struct ovpn_peer *peer, struct sock *sk, int ret; skb->dev = peer->ovpn->dev; + skb->mark = READ_ONCE(sk->sk_mark); /* no checksum performed at this layer */ skb->ip_summed = CHECKSUM_NONE; diff --git a/drivers/net/phy/microchip.c b/drivers/net/phy/microchip.c index 13570f628aa5..dc8634e7bcbe 100644 --- a/drivers/net/phy/microchip.c +++ b/drivers/net/phy/microchip.c @@ -332,7 +332,7 @@ static void lan88xx_link_change_notify(struct phy_device *phydev) * As workaround, set to 10 before setting to 100 * at forced 100 F/H mode. */ - if (!phydev->autoneg && phydev->speed == 100) { + if (phydev->state == PHY_NOLINK && !phydev->autoneg && phydev->speed == 100) { /* disable phy interrupt */ temp = phy_read(phydev, LAN88XX_INT_MASK); temp &= ~LAN88XX_INT_MASK_MDINTPIN_EN_; @@ -488,6 +488,7 @@ static struct phy_driver microchip_phy_driver[] = { .config_init = lan88xx_config_init, .config_aneg = lan88xx_config_aneg, .link_change_notify = lan88xx_link_change_notify, + .soft_reset = genphy_soft_reset, /* Interrupt handling is broken, do not define related * functions to force polling. diff --git a/drivers/net/phy/phy_device.c b/drivers/net/phy/phy_device.c index 73f9cb2e2844..f76ee8489504 100644 --- a/drivers/net/phy/phy_device.c +++ b/drivers/net/phy/phy_device.c @@ -3416,7 +3416,8 @@ static int phy_probe(struct device *dev) /* Get the LEDs from the device tree, and instantiate standard * LEDs for them. */ - if (IS_ENABLED(CONFIG_PHYLIB_LEDS)) + if (IS_ENABLED(CONFIG_PHYLIB_LEDS) && !phy_driver_is_genphy(phydev) && + !phy_driver_is_genphy_10g(phydev)) err = of_phy_leds(phydev); out: @@ -3433,7 +3434,8 @@ static int phy_remove(struct device *dev) cancel_delayed_work_sync(&phydev->state_queue); - if (IS_ENABLED(CONFIG_PHYLIB_LEDS)) + if (IS_ENABLED(CONFIG_PHYLIB_LEDS) && !phy_driver_is_genphy(phydev) && + !phy_driver_is_genphy_10g(phydev)) phy_leds_unregister(phydev); phydev->state = PHY_DOWN; diff --git a/drivers/net/phy/qcom/at803x.c b/drivers/net/phy/qcom/at803x.c index 26350b962890..8f26e395e39f 100644 --- a/drivers/net/phy/qcom/at803x.c +++ b/drivers/net/phy/qcom/at803x.c @@ -26,9 +26,6 @@ #define AT803X_LED_CONTROL 0x18 -#define AT803X_PHY_MMD3_WOL_CTRL 0x8012 -#define AT803X_WOL_EN BIT(5) - #define AT803X_REG_CHIP_CONFIG 0x1f #define AT803X_BT_BX_REG_SEL 0x8000 @@ -866,30 +863,6 @@ static int at8031_config_init(struct phy_device *phydev) return at803x_config_init(phydev); } -static int at8031_set_wol(struct phy_device *phydev, - struct ethtool_wolinfo *wol) -{ - int ret; - - /* First setup MAC address and enable WOL interrupt */ - ret = at803x_set_wol(phydev, wol); - if (ret) - return ret; - - if (wol->wolopts & WAKE_MAGIC) - /* Enable WOL function for 1588 */ - ret = phy_modify_mmd(phydev, MDIO_MMD_PCS, - AT803X_PHY_MMD3_WOL_CTRL, - 0, AT803X_WOL_EN); - else - /* Disable WoL function for 1588 */ - ret = phy_modify_mmd(phydev, MDIO_MMD_PCS, - AT803X_PHY_MMD3_WOL_CTRL, - AT803X_WOL_EN, 0); - - return ret; -} - static int at8031_config_intr(struct phy_device *phydev) { struct at803x_priv *priv = phydev->priv; diff --git a/drivers/net/phy/qcom/qca808x.c b/drivers/net/phy/qcom/qca808x.c index 71498c518f0f..6de16c0eaa08 100644 --- a/drivers/net/phy/qcom/qca808x.c +++ b/drivers/net/phy/qcom/qca808x.c @@ -633,7 +633,7 @@ static struct phy_driver qca808x_driver[] = { .handle_interrupt = at803x_handle_interrupt, .get_tunable = at803x_get_tunable, .set_tunable = at803x_set_tunable, - .set_wol = at803x_set_wol, + .set_wol = at8031_set_wol, .get_wol = at803x_get_wol, .get_features = qca808x_get_features, .config_aneg = qca808x_config_aneg, diff --git a/drivers/net/phy/qcom/qcom-phy-lib.c b/drivers/net/phy/qcom/qcom-phy-lib.c index d28815ef56bb..af7d0d8e81be 100644 --- a/drivers/net/phy/qcom/qcom-phy-lib.c +++ b/drivers/net/phy/qcom/qcom-phy-lib.c @@ -115,6 +115,31 @@ int at803x_set_wol(struct phy_device *phydev, } EXPORT_SYMBOL_GPL(at803x_set_wol); +int at8031_set_wol(struct phy_device *phydev, + struct ethtool_wolinfo *wol) +{ + int ret; + + /* First setup MAC address and enable WOL interrupt */ + ret = at803x_set_wol(phydev, wol); + if (ret) + return ret; + + if (wol->wolopts & WAKE_MAGIC) + /* Enable WOL function for 1588 */ + ret = phy_modify_mmd(phydev, MDIO_MMD_PCS, + AT803X_PHY_MMD3_WOL_CTRL, + 0, AT803X_WOL_EN); + else + /* Disable WoL function for 1588 */ + ret = phy_modify_mmd(phydev, MDIO_MMD_PCS, + AT803X_PHY_MMD3_WOL_CTRL, + AT803X_WOL_EN, 0); + + return ret; +} +EXPORT_SYMBOL_GPL(at8031_set_wol); + void at803x_get_wol(struct phy_device *phydev, struct ethtool_wolinfo *wol) { diff --git a/drivers/net/phy/qcom/qcom.h b/drivers/net/phy/qcom/qcom.h index 4bb541728846..7f7151c8baca 100644 --- a/drivers/net/phy/qcom/qcom.h +++ b/drivers/net/phy/qcom/qcom.h @@ -172,6 +172,9 @@ #define AT803X_LOC_MAC_ADDR_16_31_OFFSET 0x804B #define AT803X_LOC_MAC_ADDR_32_47_OFFSET 0x804A +#define AT803X_PHY_MMD3_WOL_CTRL 0x8012 +#define AT803X_WOL_EN BIT(5) + #define AT803X_DEBUG_ADDR 0x1D #define AT803X_DEBUG_DATA 0x1E @@ -215,6 +218,8 @@ int at803x_debug_reg_mask(struct phy_device *phydev, u16 reg, int at803x_debug_reg_write(struct phy_device *phydev, u16 reg, u16 data); int at803x_set_wol(struct phy_device *phydev, struct ethtool_wolinfo *wol); +int at8031_set_wol(struct phy_device *phydev, + struct ethtool_wolinfo *wol); void at803x_get_wol(struct phy_device *phydev, struct ethtool_wolinfo *wol); int at803x_ack_interrupt(struct phy_device *phydev); diff --git a/drivers/net/phy/smsc.c b/drivers/net/phy/smsc.c index 31463b9e5697..b6489da5cfcd 100644 --- a/drivers/net/phy/smsc.c +++ b/drivers/net/phy/smsc.c @@ -155,10 +155,29 @@ static int smsc_phy_reset(struct phy_device *phydev) static int lan87xx_config_aneg(struct phy_device *phydev) { - int rc; + u8 mdix_ctrl; int val; + int rc; + + /* When auto-negotiation is disabled (forced mode), the PHY's + * Auto-MDIX will continue toggling the TX/RX pairs. + * + * To establish a stable link, we must select a fixed MDI mode. + * If the user has not specified a fixed MDI mode (i.e., mdix_ctrl is + * 'auto'), we default to ETH_TP_MDI. This choice of a ETH_TP_MDI mode + * mirrors the behavior the hardware would exhibit if the AUTOMDIX_EN + * strap were configured for a fixed MDI connection. + */ + if (phydev->autoneg == AUTONEG_DISABLE) { + if (phydev->mdix_ctrl == ETH_TP_MDI_AUTO) + mdix_ctrl = ETH_TP_MDI; + else + mdix_ctrl = phydev->mdix_ctrl; + } else { + mdix_ctrl = phydev->mdix_ctrl; + } - switch (phydev->mdix_ctrl) { + switch (mdix_ctrl) { case ETH_TP_MDI: val = SPECIAL_CTRL_STS_OVRRD_AMDIX_; break; @@ -167,7 +186,8 @@ static int lan87xx_config_aneg(struct phy_device *phydev) SPECIAL_CTRL_STS_AMDIX_STATE_; break; case ETH_TP_MDI_AUTO: - val = SPECIAL_CTRL_STS_AMDIX_ENABLE_; + val = SPECIAL_CTRL_STS_OVRRD_AMDIX_ | + SPECIAL_CTRL_STS_AMDIX_ENABLE_; break; default: return genphy_config_aneg(phydev); @@ -183,7 +203,7 @@ static int lan87xx_config_aneg(struct phy_device *phydev) rc |= val; phy_write(phydev, SPECIAL_CTRL_STS, rc); - phydev->mdix = phydev->mdix_ctrl; + phydev->mdix = mdix_ctrl; return genphy_config_aneg(phydev); } @@ -261,6 +281,33 @@ int lan87xx_read_status(struct phy_device *phydev) } EXPORT_SYMBOL_GPL(lan87xx_read_status); +static int lan87xx_phy_config_init(struct phy_device *phydev) +{ + int rc; + + /* The LAN87xx PHY's initial MDI-X mode is determined by the AUTOMDIX_EN + * hardware strap, but the driver cannot read the strap's status. This + * creates an unpredictable initial state. + * + * To ensure consistent and reliable behavior across all boards, + * override the strap configuration on initialization and force the PHY + * into a known state with Auto-MDIX enabled, which is the expected + * default for modern hardware. + */ + rc = phy_modify(phydev, SPECIAL_CTRL_STS, + SPECIAL_CTRL_STS_OVRRD_AMDIX_ | + SPECIAL_CTRL_STS_AMDIX_ENABLE_ | + SPECIAL_CTRL_STS_AMDIX_STATE_, + SPECIAL_CTRL_STS_OVRRD_AMDIX_ | + SPECIAL_CTRL_STS_AMDIX_ENABLE_); + if (rc < 0) + return rc; + + phydev->mdix_ctrl = ETH_TP_MDI_AUTO; + + return smsc_phy_config_init(phydev); +} + static int lan874x_phy_config_init(struct phy_device *phydev) { u16 val; @@ -695,7 +742,7 @@ static struct phy_driver smsc_phy_driver[] = { /* basic functions */ .read_status = lan87xx_read_status, - .config_init = smsc_phy_config_init, + .config_init = lan87xx_phy_config_init, .soft_reset = smsc_phy_reset, .config_aneg = lan87xx_config_aneg, diff --git a/drivers/net/usb/sierra_net.c b/drivers/net/usb/sierra_net.c index c30ca415d1d3..36c73db44f77 100644 --- a/drivers/net/usb/sierra_net.c +++ b/drivers/net/usb/sierra_net.c @@ -689,6 +689,10 @@ static int sierra_net_bind(struct usbnet *dev, struct usb_interface *intf) status); return -ENODEV; } + if (!dev->status) { + dev_err(&dev->udev->dev, "No status endpoint found"); + return -ENODEV; + } /* Initialize sierra private data */ priv = kzalloc(sizeof *priv, GFP_KERNEL); if (!priv) diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c index 5d674eb9a0f2..82b4a2a2b8c4 100644 --- a/drivers/net/virtio_net.c +++ b/drivers/net/virtio_net.c @@ -7059,7 +7059,7 @@ static int virtnet_probe(struct virtio_device *vdev) otherwise get link status from config. */ netif_carrier_off(dev); if (virtio_has_feature(vi->vdev, VIRTIO_NET_F_STATUS)) { - virtnet_config_changed_work(&vi->config_work); + virtio_config_changed(vi->vdev); } else { vi->status = VIRTIO_NET_S_LINK_UP; virtnet_update_settings(vi); diff --git a/drivers/net/wireless/ath/ath12k/dp_rx.c b/drivers/net/wireless/ath/ath12k/dp_rx.c index 57648febc4a4..bd95dc88f9b2 100644 --- a/drivers/net/wireless/ath/ath12k/dp_rx.c +++ b/drivers/net/wireless/ath/ath12k/dp_rx.c @@ -1060,7 +1060,6 @@ int ath12k_dp_rx_peer_tid_setup(struct ath12k *ar, const u8 *peer_mac, int vdev_ } rx_tid = &peer->rx_tid[tid]; - paddr_aligned = rx_tid->qbuf.paddr_aligned; /* Update the tid queue if it is already setup */ if (rx_tid->active) { ret = ath12k_peer_rx_tid_reo_update(ar, peer, rx_tid, @@ -1072,6 +1071,7 @@ int ath12k_dp_rx_peer_tid_setup(struct ath12k *ar, const u8 *peer_mac, int vdev_ } if (!ab->hw_params->reoq_lut_support) { + paddr_aligned = rx_tid->qbuf.paddr_aligned; ret = ath12k_wmi_peer_rx_reorder_queue_setup(ar, vdev_id, peer_mac, paddr_aligned, tid, @@ -1098,6 +1098,7 @@ int ath12k_dp_rx_peer_tid_setup(struct ath12k *ar, const u8 *peer_mac, int vdev_ return ret; } + paddr_aligned = rx_tid->qbuf.paddr_aligned; if (ab->hw_params->reoq_lut_support) { /* Update the REO queue LUT at the corresponding peer id * and tid with qaddr. diff --git a/drivers/net/wireless/intel/iwlwifi/fw/api/nvm-reg.h b/drivers/net/wireless/intel/iwlwifi/fw/api/nvm-reg.h index 5cdc09d465d4..e90f3187e55c 100644 --- a/drivers/net/wireless/intel/iwlwifi/fw/api/nvm-reg.h +++ b/drivers/net/wireless/intel/iwlwifi/fw/api/nvm-reg.h @@ -1,6 +1,6 @@ /* SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause */ /* - * Copyright (C) 2012-2014, 2018-2024 Intel Corporation + * Copyright (C) 2012-2014, 2018-2025 Intel Corporation * Copyright (C) 2013-2015 Intel Mobile Communications GmbH * Copyright (C) 2016-2017 Intel Deutschland GmbH */ @@ -754,7 +754,7 @@ struct iwl_lari_config_change_cmd_v10 { * according to the BIOS definitions. * For LARI cmd version 11 - bits 0:4 are supported. * For LARI cmd version 12 - bits 0:6 are supported and bits 7:31 are - * reserved. No need to mask out the reserved bits. + * reserved. * @force_disable_channels_bitmap: Bitmap of disabled bands/channels. * Each bit represents a set of channels in a specific band that should be * disabled @@ -787,6 +787,7 @@ struct iwl_lari_config_change_cmd { /* Activate UNII-1 (5.2GHz) for World Wide */ #define ACTIVATE_5G2_IN_WW_MASK BIT(4) #define CHAN_STATE_ACTIVE_BITMAP_CMD_V11 0x1F +#define CHAN_STATE_ACTIVE_BITMAP_CMD_V12 0x7F /** * struct iwl_pnvm_init_complete_ntfy - PNVM initialization complete diff --git a/drivers/net/wireless/intel/iwlwifi/fw/regulatory.c b/drivers/net/wireless/intel/iwlwifi/fw/regulatory.c index 74b90bd92c48..ebfba981cf89 100644 --- a/drivers/net/wireless/intel/iwlwifi/fw/regulatory.c +++ b/drivers/net/wireless/intel/iwlwifi/fw/regulatory.c @@ -614,6 +614,7 @@ int iwl_fill_lari_config(struct iwl_fw_runtime *fwrt, ret = iwl_bios_get_dsm(fwrt, DSM_FUNC_ACTIVATE_CHANNEL, &value); if (!ret) { + value &= CHAN_STATE_ACTIVE_BITMAP_CMD_V12; if (cmd_ver < 8) value &= ~ACTIVATE_5G2_IN_WW_MASK; diff --git a/drivers/net/wireless/intel/iwlwifi/mld/regulatory.c b/drivers/net/wireless/intel/iwlwifi/mld/regulatory.c index 326c300470ea..436219d1ec5e 100644 --- a/drivers/net/wireless/intel/iwlwifi/mld/regulatory.c +++ b/drivers/net/wireless/intel/iwlwifi/mld/regulatory.c @@ -251,8 +251,10 @@ void iwl_mld_configure_lari(struct iwl_mld *mld) cpu_to_le32(value &= DSM_UNII4_ALLOW_BITMAP); ret = iwl_bios_get_dsm(fwrt, DSM_FUNC_ACTIVATE_CHANNEL, &value); - if (!ret) + if (!ret) { + value &= CHAN_STATE_ACTIVE_BITMAP_CMD_V12; cmd.chan_state_active_bitmap = cpu_to_le32(value); + } ret = iwl_bios_get_dsm(fwrt, DSM_FUNC_ENABLE_6E, &value); if (!ret) diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/trans-gen2.c b/drivers/net/wireless/intel/iwlwifi/pcie/trans-gen2.c index c8f4f3a1d2eb..5a9c3b7976a1 100644 --- a/drivers/net/wireless/intel/iwlwifi/pcie/trans-gen2.c +++ b/drivers/net/wireless/intel/iwlwifi/pcie/trans-gen2.c @@ -546,8 +546,10 @@ again: } if (WARN_ON(trans->do_top_reset && - trans->mac_cfg->device_family < IWL_DEVICE_FAMILY_SC)) - return -EINVAL; + trans->mac_cfg->device_family < IWL_DEVICE_FAMILY_SC)) { + ret = -EINVAL; + goto out; + } /* we need to wait later - set state */ if (trans->do_top_reset) diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/tx.c b/drivers/net/wireless/intel/iwlwifi/pcie/tx.c index bb467e2b1779..eee55428749c 100644 --- a/drivers/net/wireless/intel/iwlwifi/pcie/tx.c +++ b/drivers/net/wireless/intel/iwlwifi/pcie/tx.c @@ -2101,10 +2101,10 @@ static void iwl_txq_gen1_update_byte_cnt_tbl(struct iwl_trans *trans, bc_ent = cpu_to_le16(len | (sta_id << 12)); - scd_bc_tbl[txq_id * BC_TABLE_SIZE + write_ptr].tfd_offset = bc_ent; + scd_bc_tbl[txq_id * TFD_QUEUE_BC_SIZE + write_ptr].tfd_offset = bc_ent; if (write_ptr < TFD_QUEUE_SIZE_BC_DUP) - scd_bc_tbl[txq_id * BC_TABLE_SIZE + TFD_QUEUE_SIZE_MAX + write_ptr].tfd_offset = + scd_bc_tbl[txq_id * TFD_QUEUE_BC_SIZE + TFD_QUEUE_SIZE_MAX + write_ptr].tfd_offset = bc_ent; } @@ -2328,10 +2328,10 @@ static void iwl_txq_gen1_inval_byte_cnt_tbl(struct iwl_trans *trans, bc_ent = cpu_to_le16(1 | (sta_id << 12)); - scd_bc_tbl[txq_id * BC_TABLE_SIZE + read_ptr].tfd_offset = bc_ent; + scd_bc_tbl[txq_id * TFD_QUEUE_BC_SIZE + read_ptr].tfd_offset = bc_ent; if (read_ptr < TFD_QUEUE_SIZE_BC_DUP) - scd_bc_tbl[txq_id * BC_TABLE_SIZE + TFD_QUEUE_SIZE_MAX + read_ptr].tfd_offset = + scd_bc_tbl[txq_id * TFD_QUEUE_BC_SIZE + TFD_QUEUE_SIZE_MAX + read_ptr].tfd_offset = bc_ent; } diff --git a/drivers/net/wireless/marvell/mwifiex/util.c b/drivers/net/wireless/marvell/mwifiex/util.c index 4c5b1de0e936..6882e90e90b2 100644 --- a/drivers/net/wireless/marvell/mwifiex/util.c +++ b/drivers/net/wireless/marvell/mwifiex/util.c @@ -459,7 +459,9 @@ mwifiex_process_mgmt_packet(struct mwifiex_private *priv, "auth: receive authentication from %pM\n", ieee_hdr->addr3); } else { - if (!priv->wdev.connected) + if (!priv->wdev.connected || + !ether_addr_equal(ieee_hdr->addr3, + priv->curr_bss_params.bss_descriptor.mac_address)) return 0; if (ieee80211_is_deauth(ieee_hdr->frame_control)) { diff --git a/drivers/net/wireless/mediatek/mt76/mt76.h b/drivers/net/wireless/mediatek/mt76/mt76.h index 5f8d81cda6cd..74b75035d361 100644 --- a/drivers/net/wireless/mediatek/mt76/mt76.h +++ b/drivers/net/wireless/mediatek/mt76/mt76.h @@ -1224,6 +1224,16 @@ static inline int mt76_wed_dma_setup(struct mt76_dev *dev, struct mt76_queue *q, #define mt76_dereference(p, dev) \ rcu_dereference_protected(p, lockdep_is_held(&(dev)->mutex)) +static inline struct mt76_wcid * +__mt76_wcid_ptr(struct mt76_dev *dev, u16 idx) +{ + if (idx >= ARRAY_SIZE(dev->wcid)) + return NULL; + return rcu_dereference(dev->wcid[idx]); +} + +#define mt76_wcid_ptr(dev, idx) __mt76_wcid_ptr(&(dev)->mt76, idx) + struct mt76_dev *mt76_alloc_device(struct device *pdev, unsigned int size, const struct ieee80211_ops *ops, const struct mt76_driver_ops *drv_ops); diff --git a/drivers/net/wireless/mediatek/mt76/mt7603/dma.c b/drivers/net/wireless/mediatek/mt76/mt7603/dma.c index 863e5770df51..e26cc78fff94 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7603/dma.c +++ b/drivers/net/wireless/mediatek/mt76/mt7603/dma.c @@ -44,7 +44,7 @@ mt7603_rx_loopback_skb(struct mt7603_dev *dev, struct sk_buff *skb) if (idx >= MT7603_WTBL_STA - 1) goto free; - wcid = rcu_dereference(dev->mt76.wcid[idx]); + wcid = mt76_wcid_ptr(dev, idx); if (!wcid) goto free; diff --git a/drivers/net/wireless/mediatek/mt76/mt7603/mac.c b/drivers/net/wireless/mediatek/mt76/mt7603/mac.c index 413973d05b43..6387f9e61060 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7603/mac.c +++ b/drivers/net/wireless/mediatek/mt76/mt7603/mac.c @@ -487,10 +487,7 @@ mt7603_rx_get_wcid(struct mt7603_dev *dev, u8 idx, bool unicast) struct mt7603_sta *sta; struct mt76_wcid *wcid; - if (idx >= MT7603_WTBL_SIZE) - return NULL; - - wcid = rcu_dereference(dev->mt76.wcid[idx]); + wcid = mt76_wcid_ptr(dev, idx); if (unicast || !wcid) return wcid; @@ -1266,12 +1263,9 @@ void mt7603_mac_add_txs(struct mt7603_dev *dev, void *data) if (pid == MT_PACKET_ID_NO_ACK) return; - if (wcidx >= MT7603_WTBL_SIZE) - return; - rcu_read_lock(); - wcid = rcu_dereference(dev->mt76.wcid[wcidx]); + wcid = mt76_wcid_ptr(dev, wcidx); if (!wcid) goto out; diff --git a/drivers/net/wireless/mediatek/mt76/mt7615/mac.c b/drivers/net/wireless/mediatek/mt76/mt7615/mac.c index 3ca4fae7c4b0..f8d2cc94b742 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7615/mac.c +++ b/drivers/net/wireless/mediatek/mt76/mt7615/mac.c @@ -90,10 +90,7 @@ static struct mt76_wcid *mt7615_rx_get_wcid(struct mt7615_dev *dev, struct mt7615_sta *sta; struct mt76_wcid *wcid; - if (idx >= MT7615_WTBL_SIZE) - return NULL; - - wcid = rcu_dereference(dev->mt76.wcid[idx]); + wcid = mt76_wcid_ptr(dev, idx); if (unicast || !wcid) return wcid; @@ -1504,7 +1501,7 @@ static void mt7615_mac_add_txs(struct mt7615_dev *dev, void *data) rcu_read_lock(); - wcid = rcu_dereference(dev->mt76.wcid[wcidx]); + wcid = mt76_wcid_ptr(dev, wcidx); if (!wcid) goto out; diff --git a/drivers/net/wireless/mediatek/mt76/mt76_connac_mac.c b/drivers/net/wireless/mediatek/mt76/mt76_connac_mac.c index e9ac8a7317a1..0db00efe88b0 100644 --- a/drivers/net/wireless/mediatek/mt76/mt76_connac_mac.c +++ b/drivers/net/wireless/mediatek/mt76/mt76_connac_mac.c @@ -1172,7 +1172,7 @@ void mt76_connac2_txwi_free(struct mt76_dev *dev, struct mt76_txwi_cache *t, wcid_idx = wcid->idx; } else { wcid_idx = le32_get_bits(txwi[1], MT_TXD1_WLAN_IDX); - wcid = rcu_dereference(dev->wcid[wcid_idx]); + wcid = __mt76_wcid_ptr(dev, wcid_idx); if (wcid && wcid->sta) { sta = container_of((void *)wcid, struct ieee80211_sta, diff --git a/drivers/net/wireless/mediatek/mt76/mt76_connac_mcu.c b/drivers/net/wireless/mediatek/mt76/mt76_connac_mcu.c index cb13d0a76878..16db0f2082d1 100644 --- a/drivers/net/wireless/mediatek/mt76/mt76_connac_mcu.c +++ b/drivers/net/wireless/mediatek/mt76/mt76_connac_mcu.c @@ -287,7 +287,7 @@ __mt76_connac_mcu_alloc_sta_req(struct mt76_dev *dev, struct mt76_vif_link *mvif mt76_connac_mcu_get_wlan_idx(dev, wcid, &hdr.wlan_idx_lo, &hdr.wlan_idx_hi); - skb = mt76_mcu_msg_alloc(dev, NULL, len); + skb = __mt76_mcu_msg_alloc(dev, NULL, len, len, GFP_ATOMIC); if (!skb) return ERR_PTR(-ENOMEM); @@ -1740,8 +1740,8 @@ int mt76_connac_mcu_hw_scan(struct mt76_phy *phy, struct ieee80211_vif *vif, if (!sreq->ssids[i].ssid_len) continue; - req->ssids[i].ssid_len = cpu_to_le32(sreq->ssids[i].ssid_len); - memcpy(req->ssids[i].ssid, sreq->ssids[i].ssid, + req->ssids[n_ssids].ssid_len = cpu_to_le32(sreq->ssids[i].ssid_len); + memcpy(req->ssids[n_ssids].ssid, sreq->ssids[i].ssid, sreq->ssids[i].ssid_len); n_ssids++; } diff --git a/drivers/net/wireless/mediatek/mt76/mt76x02.h b/drivers/net/wireless/mediatek/mt76/mt76x02.h index 4cd63bacd742..9d7ee09b6cc9 100644 --- a/drivers/net/wireless/mediatek/mt76/mt76x02.h +++ b/drivers/net/wireless/mediatek/mt76/mt76x02.h @@ -262,10 +262,7 @@ mt76x02_rx_get_sta(struct mt76_dev *dev, u8 idx) { struct mt76_wcid *wcid; - if (idx >= MT76x02_N_WCIDS) - return NULL; - - wcid = rcu_dereference(dev->wcid[idx]); + wcid = __mt76_wcid_ptr(dev, idx); if (!wcid) return NULL; diff --git a/drivers/net/wireless/mediatek/mt76/mt76x02_mac.c b/drivers/net/wireless/mediatek/mt76/mt76x02_mac.c index d5db6ffd6d36..83488b2d6efb 100644 --- a/drivers/net/wireless/mediatek/mt76/mt76x02_mac.c +++ b/drivers/net/wireless/mediatek/mt76/mt76x02_mac.c @@ -564,9 +564,7 @@ void mt76x02_send_tx_status(struct mt76x02_dev *dev, rcu_read_lock(); - if (stat->wcid < MT76x02_N_WCIDS) - wcid = rcu_dereference(dev->mt76.wcid[stat->wcid]); - + wcid = mt76_wcid_ptr(dev, stat->wcid); if (wcid && wcid->sta) { void *priv; diff --git a/drivers/net/wireless/mediatek/mt76/mt7915/mac.c b/drivers/net/wireless/mediatek/mt76/mt7915/mac.c index 9400e4af2a04..6639976afcee 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7915/mac.c +++ b/drivers/net/wireless/mediatek/mt76/mt7915/mac.c @@ -56,10 +56,7 @@ static struct mt76_wcid *mt7915_rx_get_wcid(struct mt7915_dev *dev, struct mt7915_sta *sta; struct mt76_wcid *wcid; - if (idx >= ARRAY_SIZE(dev->mt76.wcid)) - return NULL; - - wcid = rcu_dereference(dev->mt76.wcid[idx]); + wcid = mt76_wcid_ptr(dev, idx); if (unicast || !wcid) return wcid; @@ -917,7 +914,7 @@ mt7915_mac_tx_free(struct mt7915_dev *dev, void *data, int len) u16 idx; idx = FIELD_GET(MT_TX_FREE_WLAN_ID, info); - wcid = rcu_dereference(dev->mt76.wcid[idx]); + wcid = mt76_wcid_ptr(dev, idx); sta = wcid_to_sta(wcid); if (!sta) continue; @@ -1013,12 +1010,9 @@ static void mt7915_mac_add_txs(struct mt7915_dev *dev, void *data) if (pid < MT_PACKET_ID_WED) return; - if (wcidx >= mt7915_wtbl_size(dev)) - return; - rcu_read_lock(); - wcid = rcu_dereference(dev->mt76.wcid[wcidx]); + wcid = mt76_wcid_ptr(dev, wcidx); if (!wcid) goto out; diff --git a/drivers/net/wireless/mediatek/mt76/mt7915/mcu.c b/drivers/net/wireless/mediatek/mt76/mt7915/mcu.c index 427542777abc..c6584d2b7509 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7915/mcu.c +++ b/drivers/net/wireless/mediatek/mt76/mt7915/mcu.c @@ -3986,7 +3986,7 @@ int mt7915_mcu_wed_wa_tx_stats(struct mt7915_dev *dev, u16 wlan_idx) rcu_read_lock(); - wcid = rcu_dereference(dev->mt76.wcid[wlan_idx]); + wcid = mt76_wcid_ptr(dev, wlan_idx); if (wcid) wcid->stats.tx_packets += le32_to_cpu(res->tx_packets); else diff --git a/drivers/net/wireless/mediatek/mt76/mt7915/mmio.c b/drivers/net/wireless/mediatek/mt76/mt7915/mmio.c index 9c4d5cea0c42..4a82f8e4c118 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7915/mmio.c +++ b/drivers/net/wireless/mediatek/mt76/mt7915/mmio.c @@ -587,12 +587,9 @@ static void mt7915_mmio_wed_update_rx_stats(struct mtk_wed_device *wed, dev = container_of(wed, struct mt7915_dev, mt76.mmio.wed); - if (idx >= mt7915_wtbl_size(dev)) - return; - rcu_read_lock(); - wcid = rcu_dereference(dev->mt76.wcid[idx]); + wcid = mt76_wcid_ptr(dev, idx); if (wcid) { wcid->stats.rx_bytes += le32_to_cpu(stats->rx_byte_cnt); wcid->stats.rx_packets += le32_to_cpu(stats->rx_pkt_cnt); diff --git a/drivers/net/wireless/mediatek/mt76/mt7921/mac.c b/drivers/net/wireless/mediatek/mt76/mt7921/mac.c index 5dd57de59f27..f1f76506b0a5 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7921/mac.c +++ b/drivers/net/wireless/mediatek/mt76/mt7921/mac.c @@ -465,7 +465,7 @@ void mt7921_mac_add_txs(struct mt792x_dev *dev, void *data) rcu_read_lock(); - wcid = rcu_dereference(dev->mt76.wcid[wcidx]); + wcid = mt76_wcid_ptr(dev, wcidx); if (!wcid) goto out; @@ -516,7 +516,7 @@ static void mt7921_mac_tx_free(struct mt792x_dev *dev, void *data, int len) count++; idx = FIELD_GET(MT_TX_FREE_WLAN_ID, info); - wcid = rcu_dereference(dev->mt76.wcid[idx]); + wcid = mt76_wcid_ptr(dev, idx); sta = wcid_to_sta(wcid); if (!sta) continue; @@ -816,7 +816,7 @@ void mt7921_usb_sdio_tx_complete_skb(struct mt76_dev *mdev, u16 idx; idx = le32_get_bits(txwi[1], MT_TXD1_WLAN_IDX); - wcid = rcu_dereference(mdev->wcid[idx]); + wcid = __mt76_wcid_ptr(mdev, idx); sta = wcid_to_sta(wcid); if (sta && likely(e->skb->protocol != cpu_to_be16(ETH_P_PAE))) diff --git a/drivers/net/wireless/mediatek/mt76/mt7921/main.c b/drivers/net/wireless/mediatek/mt76/mt7921/main.c index 1fffa43379b2..77f73ae1d7ec 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7921/main.c +++ b/drivers/net/wireless/mediatek/mt76/mt7921/main.c @@ -1180,6 +1180,9 @@ static void mt7921_sta_set_decap_offload(struct ieee80211_hw *hw, struct mt792x_sta *msta = (struct mt792x_sta *)sta->drv_priv; struct mt792x_dev *dev = mt792x_hw_dev(hw); + if (!msta->deflink.wcid.sta) + return; + mt792x_mutex_acquire(dev); if (enabled) diff --git a/drivers/net/wireless/mediatek/mt76/mt7925/init.c b/drivers/net/wireless/mediatek/mt76/mt7925/init.c index 2a83ff59a968..4249bad83c93 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7925/init.c +++ b/drivers/net/wireless/mediatek/mt76/mt7925/init.c @@ -52,6 +52,8 @@ static int mt7925_thermal_init(struct mt792x_phy *phy) name = devm_kasprintf(&wiphy->dev, GFP_KERNEL, "mt7925_%s", wiphy_name(wiphy)); + if (!name) + return -ENOMEM; hwmon = devm_hwmon_device_register_with_groups(&wiphy->dev, name, phy, mt7925_hwmon_groups); diff --git a/drivers/net/wireless/mediatek/mt76/mt7925/mac.c b/drivers/net/wireless/mediatek/mt76/mt7925/mac.c index c871d2f9688b..75823c9fd3a1 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7925/mac.c +++ b/drivers/net/wireless/mediatek/mt76/mt7925/mac.c @@ -1040,7 +1040,7 @@ void mt7925_mac_add_txs(struct mt792x_dev *dev, void *data) rcu_read_lock(); - wcid = rcu_dereference(dev->mt76.wcid[wcidx]); + wcid = mt76_wcid_ptr(dev, wcidx); if (!wcid) goto out; @@ -1122,7 +1122,7 @@ mt7925_mac_tx_free(struct mt792x_dev *dev, void *data, int len) u16 idx; idx = FIELD_GET(MT_TXFREE_INFO_WLAN_ID, info); - wcid = rcu_dereference(dev->mt76.wcid[idx]); + wcid = mt76_wcid_ptr(dev, idx); sta = wcid_to_sta(wcid); if (!sta) continue; @@ -1445,7 +1445,7 @@ void mt7925_usb_sdio_tx_complete_skb(struct mt76_dev *mdev, u16 idx; idx = le32_get_bits(txwi[1], MT_TXD1_WLAN_IDX); - wcid = rcu_dereference(mdev->wcid[idx]); + wcid = __mt76_wcid_ptr(mdev, idx); sta = wcid_to_sta(wcid); if (sta && likely(e->skb->protocol != cpu_to_be16(ETH_P_PAE))) diff --git a/drivers/net/wireless/mediatek/mt76/mt7925/main.c b/drivers/net/wireless/mediatek/mt76/mt7925/main.c index 94b0099dcd41..5b001548dffc 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7925/main.c +++ b/drivers/net/wireless/mediatek/mt76/mt7925/main.c @@ -1481,7 +1481,7 @@ mt7925_start_sched_scan(struct ieee80211_hw *hw, struct ieee80211_vif *vif, mt792x_mutex_acquire(dev); - err = mt7925_mcu_sched_scan_req(mphy, vif, req); + err = mt7925_mcu_sched_scan_req(mphy, vif, req, ies); if (err < 0) goto out; @@ -1603,6 +1603,9 @@ static void mt7925_sta_set_decap_offload(struct ieee80211_hw *hw, unsigned long valid = mvif->valid_links; u8 i; + if (!msta->vif) + return; + mt792x_mutex_acquire(dev); valid = ieee80211_vif_is_mld(vif) ? mvif->valid_links : BIT(0); @@ -1617,6 +1620,9 @@ static void mt7925_sta_set_decap_offload(struct ieee80211_hw *hw, else clear_bit(MT_WCID_FLAG_HDR_TRANS, &mlink->wcid.flags); + if (!mlink->wcid.sta) + continue; + mt7925_mcu_wtbl_update_hdr_trans(dev, vif, sta, i); } diff --git a/drivers/net/wireless/mediatek/mt76/mt7925/mcu.c b/drivers/net/wireless/mediatek/mt76/mt7925/mcu.c index b8542be0d945..8ac6fbb736ab 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7925/mcu.c +++ b/drivers/net/wireless/mediatek/mt76/mt7925/mcu.c @@ -164,6 +164,7 @@ mt7925_connac_mcu_set_wow_ctrl(struct mt76_phy *phy, struct ieee80211_vif *vif, bool suspend, struct cfg80211_wowlan *wowlan) { struct mt76_vif_link *mvif = (struct mt76_vif_link *)vif->drv_priv; + struct ieee80211_scan_ies ies = {}; struct mt76_dev *dev = phy->dev; struct { struct { @@ -194,7 +195,7 @@ mt7925_connac_mcu_set_wow_ctrl(struct mt76_phy *phy, struct ieee80211_vif *vif, req.wow_ctrl_tlv.trigger |= (UNI_WOW_DETECT_TYPE_DISCONNECT | UNI_WOW_DETECT_TYPE_BCN_LOST); if (wowlan->nd_config) { - mt7925_mcu_sched_scan_req(phy, vif, wowlan->nd_config); + mt7925_mcu_sched_scan_req(phy, vif, wowlan->nd_config, &ies); req.wow_ctrl_tlv.trigger |= UNI_WOW_DETECT_TYPE_SCH_SCAN_HIT; mt7925_mcu_sched_scan_enable(phy, vif, suspend); } @@ -2818,6 +2819,54 @@ int mt7925_mcu_set_dbdc(struct mt76_phy *phy, bool enable) return err; } +static void +mt7925_mcu_build_scan_ie_tlv(struct mt76_dev *mdev, + struct sk_buff *skb, + struct ieee80211_scan_ies *scan_ies) +{ + u32 max_len = sizeof(struct scan_ie_tlv) + MT76_CONNAC_SCAN_IE_LEN; + struct scan_ie_tlv *ie; + enum nl80211_band i; + struct tlv *tlv; + const u8 *ies; + u16 ies_len; + + for (i = 0; i <= NL80211_BAND_6GHZ; i++) { + if (i == NL80211_BAND_60GHZ) + continue; + + ies = scan_ies->ies[i]; + ies_len = scan_ies->len[i]; + + if (!ies || !ies_len) + continue; + + if (ies_len > max_len) + return; + + tlv = mt76_connac_mcu_add_tlv(skb, UNI_SCAN_IE, + sizeof(*ie) + ies_len); + ie = (struct scan_ie_tlv *)tlv; + + memcpy(ie->ies, ies, ies_len); + ie->ies_len = cpu_to_le16(ies_len); + + switch (i) { + case NL80211_BAND_2GHZ: + ie->band = 1; + break; + case NL80211_BAND_6GHZ: + ie->band = 3; + break; + default: + ie->band = 2; + break; + } + + max_len -= (sizeof(*ie) + ies_len); + } +} + int mt7925_mcu_hw_scan(struct mt76_phy *phy, struct ieee80211_vif *vif, struct ieee80211_scan_request *scan_req) { @@ -2843,7 +2892,8 @@ int mt7925_mcu_hw_scan(struct mt76_phy *phy, struct ieee80211_vif *vif, max_len = sizeof(*hdr) + sizeof(*req) + sizeof(*ssid) + sizeof(*bssid) * MT7925_RNR_SCAN_MAX_BSSIDS + - sizeof(*chan_info) + sizeof(*misc) + sizeof(*ie); + sizeof(*chan_info) + sizeof(*misc) + sizeof(*ie) + + MT76_CONNAC_SCAN_IE_LEN; skb = mt76_mcu_msg_alloc(mdev, NULL, max_len); if (!skb) @@ -2869,8 +2919,8 @@ int mt7925_mcu_hw_scan(struct mt76_phy *phy, struct ieee80211_vif *vif, if (i > MT7925_RNR_SCAN_MAX_BSSIDS) break; - ssid->ssids[i].ssid_len = cpu_to_le32(sreq->ssids[i].ssid_len); - memcpy(ssid->ssids[i].ssid, sreq->ssids[i].ssid, + ssid->ssids[n_ssids].ssid_len = cpu_to_le32(sreq->ssids[i].ssid_len); + memcpy(ssid->ssids[n_ssids].ssid, sreq->ssids[i].ssid, sreq->ssids[i].ssid_len); n_ssids++; } @@ -2925,13 +2975,6 @@ int mt7925_mcu_hw_scan(struct mt76_phy *phy, struct ieee80211_vif *vif, } chan_info->channel_type = sreq->n_channels ? 4 : 0; - tlv = mt76_connac_mcu_add_tlv(skb, UNI_SCAN_IE, sizeof(*ie)); - ie = (struct scan_ie_tlv *)tlv; - if (sreq->ie_len > 0) { - memcpy(ie->ies, sreq->ie, sreq->ie_len); - ie->ies_len = cpu_to_le16(sreq->ie_len); - } - req->scan_func |= SCAN_FUNC_SPLIT_SCAN; tlv = mt76_connac_mcu_add_tlv(skb, UNI_SCAN_MISC, sizeof(*misc)); @@ -2942,6 +2985,9 @@ int mt7925_mcu_hw_scan(struct mt76_phy *phy, struct ieee80211_vif *vif, req->scan_func |= SCAN_FUNC_RANDOM_MAC; } + /* Append scan probe IEs as the last tlv */ + mt7925_mcu_build_scan_ie_tlv(mdev, skb, &scan_req->ies); + err = mt76_mcu_skb_send_msg(mdev, skb, MCU_UNI_CMD(SCAN_REQ), true); if (err < 0) @@ -2953,7 +2999,8 @@ EXPORT_SYMBOL_GPL(mt7925_mcu_hw_scan); int mt7925_mcu_sched_scan_req(struct mt76_phy *phy, struct ieee80211_vif *vif, - struct cfg80211_sched_scan_request *sreq) + struct cfg80211_sched_scan_request *sreq, + struct ieee80211_scan_ies *ies) { struct mt76_vif_link *mvif = (struct mt76_vif_link *)vif->drv_priv; struct ieee80211_channel **scan_list = sreq->channels; @@ -3041,12 +3088,8 @@ int mt7925_mcu_sched_scan_req(struct mt76_phy *phy, } chan_info->channel_type = sreq->n_channels ? 4 : 0; - tlv = mt76_connac_mcu_add_tlv(skb, UNI_SCAN_IE, sizeof(*ie)); - ie = (struct scan_ie_tlv *)tlv; - if (sreq->ie_len > 0) { - memcpy(ie->ies, sreq->ie, sreq->ie_len); - ie->ies_len = cpu_to_le16(sreq->ie_len); - } + /* Append scan probe IEs as the last tlv */ + mt7925_mcu_build_scan_ie_tlv(mdev, skb, ies); return mt76_mcu_skb_send_msg(mdev, skb, MCU_UNI_CMD(SCAN_REQ), true); diff --git a/drivers/net/wireless/mediatek/mt76/mt7925/mcu.h b/drivers/net/wireless/mediatek/mt76/mt7925/mcu.h index ee6fb16e83c5..a40764d89a1f 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7925/mcu.h +++ b/drivers/net/wireless/mediatek/mt76/mt7925/mcu.h @@ -269,7 +269,7 @@ struct scan_ie_tlv { __le16 ies_len; u8 band; u8 pad; - u8 ies[MT76_CONNAC_SCAN_IE_LEN]; + u8 ies[]; }; struct scan_misc_tlv { @@ -673,7 +673,8 @@ int mt7925_mcu_cancel_hw_scan(struct mt76_phy *phy, struct ieee80211_vif *vif); int mt7925_mcu_sched_scan_req(struct mt76_phy *phy, struct ieee80211_vif *vif, - struct cfg80211_sched_scan_request *sreq); + struct cfg80211_sched_scan_request *sreq, + struct ieee80211_scan_ies *ies); int mt7925_mcu_sched_scan_enable(struct mt76_phy *phy, struct ieee80211_vif *vif, bool enable); diff --git a/drivers/net/wireless/mediatek/mt76/mt7925/regs.h b/drivers/net/wireless/mediatek/mt76/mt7925/regs.h index 547489092c29..341987e47f67 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7925/regs.h +++ b/drivers/net/wireless/mediatek/mt76/mt7925/regs.h @@ -58,7 +58,7 @@ #define MT_INT_TX_DONE_MCU (MT_INT_TX_DONE_MCU_WM | \ MT_INT_TX_DONE_FWDL) -#define MT_INT_TX_DONE_ALL (MT_INT_TX_DONE_MCU_WM | \ +#define MT_INT_TX_DONE_ALL (MT_INT_TX_DONE_MCU | \ MT_INT_TX_DONE_BAND0 | \ GENMASK(18, 4)) diff --git a/drivers/net/wireless/mediatek/mt76/mt792x_core.c b/drivers/net/wireless/mediatek/mt76/mt792x_core.c index a50c1723ca29..05130ec1e5f7 100644 --- a/drivers/net/wireless/mediatek/mt76/mt792x_core.c +++ b/drivers/net/wireless/mediatek/mt76/mt792x_core.c @@ -28,7 +28,7 @@ static const struct ieee80211_iface_combination if_comb[] = { }, }; -static const struct ieee80211_iface_limit if_limits_chanctx[] = { +static const struct ieee80211_iface_limit if_limits_chanctx_mcc[] = { { .max = 2, .types = BIT(NL80211_IFTYPE_STATION) | @@ -36,8 +36,23 @@ static const struct ieee80211_iface_limit if_limits_chanctx[] = { }, { .max = 1, - .types = BIT(NL80211_IFTYPE_AP) | - BIT(NL80211_IFTYPE_P2P_GO) + .types = BIT(NL80211_IFTYPE_P2P_GO) + }, + { + .max = 1, + .types = BIT(NL80211_IFTYPE_P2P_DEVICE) + } +}; + +static const struct ieee80211_iface_limit if_limits_chanctx_scc[] = { + { + .max = 2, + .types = BIT(NL80211_IFTYPE_STATION) | + BIT(NL80211_IFTYPE_P2P_CLIENT) + }, + { + .max = 1, + .types = BIT(NL80211_IFTYPE_AP) }, { .max = 1, @@ -47,11 +62,18 @@ static const struct ieee80211_iface_limit if_limits_chanctx[] = { static const struct ieee80211_iface_combination if_comb_chanctx[] = { { - .limits = if_limits_chanctx, - .n_limits = ARRAY_SIZE(if_limits_chanctx), + .limits = if_limits_chanctx_mcc, + .n_limits = ARRAY_SIZE(if_limits_chanctx_mcc), .max_interfaces = 3, .num_different_channels = 2, .beacon_int_infra_match = false, + }, + { + .limits = if_limits_chanctx_scc, + .n_limits = ARRAY_SIZE(if_limits_chanctx_scc), + .max_interfaces = 3, + .num_different_channels = 1, + .beacon_int_infra_match = false, } }; diff --git a/drivers/net/wireless/mediatek/mt76/mt792x_mac.c b/drivers/net/wireless/mediatek/mt76/mt792x_mac.c index 05978d9c7b91..3f1d9ba49076 100644 --- a/drivers/net/wireless/mediatek/mt76/mt792x_mac.c +++ b/drivers/net/wireless/mediatek/mt76/mt792x_mac.c @@ -142,10 +142,7 @@ struct mt76_wcid *mt792x_rx_get_wcid(struct mt792x_dev *dev, u16 idx, struct mt792x_sta *sta; struct mt76_wcid *wcid; - if (idx >= ARRAY_SIZE(dev->mt76.wcid)) - return NULL; - - wcid = rcu_dereference(dev->mt76.wcid[idx]); + wcid = mt76_wcid_ptr(dev, idx); if (unicast || !wcid) return wcid; diff --git a/drivers/net/wireless/mediatek/mt76/mt7996/mac.c b/drivers/net/wireless/mediatek/mt76/mt7996/mac.c index 0dbd4662bc84..92148518f6a5 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7996/mac.c +++ b/drivers/net/wireless/mediatek/mt76/mt7996/mac.c @@ -61,10 +61,7 @@ static struct mt76_wcid *mt7996_rx_get_wcid(struct mt7996_dev *dev, struct mt76_wcid *wcid; int i; - if (idx >= ARRAY_SIZE(dev->mt76.wcid)) - return NULL; - - wcid = rcu_dereference(dev->mt76.wcid[idx]); + wcid = mt76_wcid_ptr(dev, idx); if (!wcid) return NULL; @@ -1249,7 +1246,7 @@ mt7996_mac_tx_free(struct mt7996_dev *dev, void *data, int len) u16 idx; idx = FIELD_GET(MT_TXFREE_INFO_WLAN_ID, info); - wcid = rcu_dereference(dev->mt76.wcid[idx]); + wcid = mt76_wcid_ptr(dev, idx); sta = wcid_to_sta(wcid); if (!sta) goto next; @@ -1471,12 +1468,9 @@ static void mt7996_mac_add_txs(struct mt7996_dev *dev, void *data) if (pid < MT_PACKET_ID_NO_SKB) return; - if (wcidx >= mt7996_wtbl_size(dev)) - return; - rcu_read_lock(); - wcid = rcu_dereference(dev->mt76.wcid[wcidx]); + wcid = mt76_wcid_ptr(dev, wcidx); if (!wcid) goto out; @@ -2353,20 +2347,12 @@ void mt7996_mac_update_stats(struct mt7996_phy *phy) void mt7996_mac_sta_rc_work(struct work_struct *work) { struct mt7996_dev *dev = container_of(work, struct mt7996_dev, rc_work); - struct ieee80211_bss_conf *link_conf; - struct ieee80211_link_sta *link_sta; struct mt7996_sta_link *msta_link; - struct mt7996_vif_link *link; - struct mt76_vif_link *mlink; - struct ieee80211_sta *sta; struct ieee80211_vif *vif; - struct mt7996_sta *msta; struct mt7996_vif *mvif; LIST_HEAD(list); u32 changed; - u8 link_id; - rcu_read_lock(); spin_lock_bh(&dev->mt76.sta_poll_lock); list_splice_init(&dev->sta_rc_list, &list); @@ -2377,46 +2363,28 @@ void mt7996_mac_sta_rc_work(struct work_struct *work) changed = msta_link->changed; msta_link->changed = 0; - - sta = wcid_to_sta(&msta_link->wcid); - link_id = msta_link->wcid.link_id; - msta = msta_link->sta; - mvif = msta->vif; - vif = container_of((void *)mvif, struct ieee80211_vif, drv_priv); - - mlink = rcu_dereference(mvif->mt76.link[link_id]); - if (!mlink) - continue; - - link_sta = rcu_dereference(sta->link[link_id]); - if (!link_sta) - continue; - - link_conf = rcu_dereference(vif->link_conf[link_id]); - if (!link_conf) - continue; + mvif = msta_link->sta->vif; + vif = container_of((void *)mvif, struct ieee80211_vif, + drv_priv); spin_unlock_bh(&dev->mt76.sta_poll_lock); - link = (struct mt7996_vif_link *)mlink; - if (changed & (IEEE80211_RC_SUPP_RATES_CHANGED | IEEE80211_RC_NSS_CHANGED | IEEE80211_RC_BW_CHANGED)) - mt7996_mcu_add_rate_ctrl(dev, vif, link_conf, - link_sta, link, msta_link, + mt7996_mcu_add_rate_ctrl(dev, msta_link->sta, vif, + msta_link->wcid.link_id, true); if (changed & IEEE80211_RC_SMPS_CHANGED) - mt7996_mcu_set_fixed_field(dev, link_sta, link, - msta_link, NULL, + mt7996_mcu_set_fixed_field(dev, msta_link->sta, NULL, + msta_link->wcid.link_id, RATE_PARAM_MMPS_UPDATE); spin_lock_bh(&dev->mt76.sta_poll_lock); } spin_unlock_bh(&dev->mt76.sta_poll_lock); - rcu_read_unlock(); } void mt7996_mac_work(struct work_struct *work) diff --git a/drivers/net/wireless/mediatek/mt76/mt7996/main.c b/drivers/net/wireless/mediatek/mt76/mt7996/main.c index 78ae9f5cb176..07dd75ce94a5 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7996/main.c +++ b/drivers/net/wireless/mediatek/mt76/mt7996/main.c @@ -1112,9 +1112,8 @@ mt7996_mac_sta_event(struct mt7996_dev *dev, struct ieee80211_vif *vif, if (err) return err; - err = mt7996_mcu_add_rate_ctrl(dev, vif, link_conf, - link_sta, link, - msta_link, false); + err = mt7996_mcu_add_rate_ctrl(dev, msta_link->sta, vif, + link_id, false); if (err) return err; diff --git a/drivers/net/wireless/mediatek/mt76/mt7996/mcu.c b/drivers/net/wireless/mediatek/mt76/mt7996/mcu.c index f0adc0b4b8b6..994526c65bfc 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7996/mcu.c +++ b/drivers/net/wireless/mediatek/mt76/mt7996/mcu.c @@ -555,7 +555,7 @@ mt7996_mcu_rx_all_sta_info_event(struct mt7996_dev *dev, struct sk_buff *skb) switch (le16_to_cpu(res->tag)) { case UNI_ALL_STA_TXRX_RATE: wlan_idx = le16_to_cpu(res->rate[i].wlan_idx); - wcid = rcu_dereference(dev->mt76.wcid[wlan_idx]); + wcid = mt76_wcid_ptr(dev, wlan_idx); if (!wcid) break; @@ -565,7 +565,7 @@ mt7996_mcu_rx_all_sta_info_event(struct mt7996_dev *dev, struct sk_buff *skb) break; case UNI_ALL_STA_TXRX_ADM_STAT: wlan_idx = le16_to_cpu(res->adm_stat[i].wlan_idx); - wcid = rcu_dereference(dev->mt76.wcid[wlan_idx]); + wcid = mt76_wcid_ptr(dev, wlan_idx); if (!wcid) break; @@ -579,7 +579,7 @@ mt7996_mcu_rx_all_sta_info_event(struct mt7996_dev *dev, struct sk_buff *skb) break; case UNI_ALL_STA_TXRX_MSDU_COUNT: wlan_idx = le16_to_cpu(res->msdu_cnt[i].wlan_idx); - wcid = rcu_dereference(dev->mt76.wcid[wlan_idx]); + wcid = mt76_wcid_ptr(dev, wlan_idx); if (!wcid) break; @@ -676,10 +676,7 @@ mt7996_mcu_wed_rro_event(struct mt7996_dev *dev, struct sk_buff *skb) e = (void *)skb->data; idx = le16_to_cpu(e->wlan_id); - if (idx >= ARRAY_SIZE(dev->mt76.wcid)) - break; - - wcid = rcu_dereference(dev->mt76.wcid[idx]); + wcid = mt76_wcid_ptr(dev, idx); if (!wcid || !wcid->sta) break; @@ -1905,22 +1902,35 @@ int mt7996_mcu_set_fixed_rate_ctrl(struct mt7996_dev *dev, MCU_WM_UNI_CMD(RA), true); } -int mt7996_mcu_set_fixed_field(struct mt7996_dev *dev, - struct ieee80211_link_sta *link_sta, - struct mt7996_vif_link *link, - struct mt7996_sta_link *msta_link, - void *data, u32 field) +int mt7996_mcu_set_fixed_field(struct mt7996_dev *dev, struct mt7996_sta *msta, + void *data, u8 link_id, u32 field) { - struct sta_phy_uni *phy = data; + struct mt7996_vif *mvif = msta->vif; + struct mt7996_sta_link *msta_link; struct sta_rec_ra_fixed_uni *ra; + struct sta_phy_uni *phy = data; + struct mt76_vif_link *mlink; struct sk_buff *skb; + int err = -ENODEV; struct tlv *tlv; - skb = __mt76_connac_mcu_alloc_sta_req(&dev->mt76, &link->mt76, + rcu_read_lock(); + + mlink = rcu_dereference(mvif->mt76.link[link_id]); + if (!mlink) + goto error_unlock; + + msta_link = rcu_dereference(msta->link[link_id]); + if (!msta_link) + goto error_unlock; + + skb = __mt76_connac_mcu_alloc_sta_req(&dev->mt76, mlink, &msta_link->wcid, MT7996_STA_UPDATE_MAX_SIZE); - if (IS_ERR(skb)) - return PTR_ERR(skb); + if (IS_ERR(skb)) { + err = PTR_ERR(skb); + goto error_unlock; + } tlv = mt76_connac_mcu_add_tlv(skb, STA_REC_RA_UPDATE, sizeof(*ra)); ra = (struct sta_rec_ra_fixed_uni *)tlv; @@ -1935,106 +1945,149 @@ int mt7996_mcu_set_fixed_field(struct mt7996_dev *dev, if (phy) ra->phy = *phy; break; - case RATE_PARAM_MMPS_UPDATE: + case RATE_PARAM_MMPS_UPDATE: { + struct ieee80211_sta *sta = wcid_to_sta(&msta_link->wcid); + struct ieee80211_link_sta *link_sta; + + link_sta = rcu_dereference(sta->link[link_id]); + if (!link_sta) { + dev_kfree_skb(skb); + goto error_unlock; + } + ra->mmps_mode = mt7996_mcu_get_mmps_mode(link_sta->smps_mode); break; + } default: break; } ra->field = cpu_to_le32(field); + rcu_read_unlock(); + return mt76_mcu_skb_send_msg(&dev->mt76, skb, MCU_WMWA_UNI_CMD(STA_REC_UPDATE), true); +error_unlock: + rcu_read_unlock(); + + return err; } static int -mt7996_mcu_add_rate_ctrl_fixed(struct mt7996_dev *dev, - struct ieee80211_link_sta *link_sta, - struct mt7996_vif_link *link, - struct mt7996_sta_link *msta_link) +mt7996_mcu_add_rate_ctrl_fixed(struct mt7996_dev *dev, struct mt7996_sta *msta, + struct ieee80211_vif *vif, u8 link_id) { - struct cfg80211_chan_def *chandef = &link->phy->mt76->chandef; - struct cfg80211_bitrate_mask *mask = &link->bitrate_mask; - enum nl80211_band band = chandef->chan->band; + struct ieee80211_link_sta *link_sta; + struct cfg80211_bitrate_mask mask; + struct mt7996_sta_link *msta_link; + struct mt7996_vif_link *link; struct sta_phy_uni phy = {}; - int ret, nrates = 0; + struct ieee80211_sta *sta; + int ret, nrates = 0, idx; + enum nl80211_band band; + bool has_he; #define __sta_phy_bitrate_mask_check(_mcs, _gi, _ht, _he) \ do { \ - u8 i, gi = mask->control[band]._gi; \ + u8 i, gi = mask.control[band]._gi; \ gi = (_he) ? gi : gi == NL80211_TXRATE_FORCE_SGI; \ phy.sgi = gi; \ - phy.he_ltf = mask->control[band].he_ltf; \ - for (i = 0; i < ARRAY_SIZE(mask->control[band]._mcs); i++) { \ - if (!mask->control[band]._mcs[i]) \ + phy.he_ltf = mask.control[band].he_ltf; \ + for (i = 0; i < ARRAY_SIZE(mask.control[band]._mcs); i++) { \ + if (!mask.control[band]._mcs[i]) \ continue; \ - nrates += hweight16(mask->control[band]._mcs[i]); \ - phy.mcs = ffs(mask->control[band]._mcs[i]) - 1; \ + nrates += hweight16(mask.control[band]._mcs[i]); \ + phy.mcs = ffs(mask.control[band]._mcs[i]) - 1; \ if (_ht) \ phy.mcs += 8 * i; \ } \ } while (0) - if (link_sta->he_cap.has_he) { + rcu_read_lock(); + + link = mt7996_vif_link(dev, vif, link_id); + if (!link) + goto error_unlock; + + msta_link = rcu_dereference(msta->link[link_id]); + if (!msta_link) + goto error_unlock; + + sta = wcid_to_sta(&msta_link->wcid); + link_sta = rcu_dereference(sta->link[link_id]); + if (!link_sta) + goto error_unlock; + + band = link->phy->mt76->chandef.chan->band; + has_he = link_sta->he_cap.has_he; + mask = link->bitrate_mask; + idx = msta_link->wcid.idx; + + if (has_he) { __sta_phy_bitrate_mask_check(he_mcs, he_gi, 0, 1); } else if (link_sta->vht_cap.vht_supported) { __sta_phy_bitrate_mask_check(vht_mcs, gi, 0, 0); } else if (link_sta->ht_cap.ht_supported) { __sta_phy_bitrate_mask_check(ht_mcs, gi, 1, 0); } else { - nrates = hweight32(mask->control[band].legacy); - phy.mcs = ffs(mask->control[band].legacy) - 1; + nrates = hweight32(mask.control[band].legacy); + phy.mcs = ffs(mask.control[band].legacy) - 1; } + + rcu_read_unlock(); + #undef __sta_phy_bitrate_mask_check /* fall back to auto rate control */ - if (mask->control[band].gi == NL80211_TXRATE_DEFAULT_GI && - mask->control[band].he_gi == GENMASK(7, 0) && - mask->control[band].he_ltf == GENMASK(7, 0) && + if (mask.control[band].gi == NL80211_TXRATE_DEFAULT_GI && + mask.control[band].he_gi == GENMASK(7, 0) && + mask.control[band].he_ltf == GENMASK(7, 0) && nrates != 1) return 0; /* fixed single rate */ if (nrates == 1) { - ret = mt7996_mcu_set_fixed_field(dev, link_sta, link, - msta_link, &phy, + ret = mt7996_mcu_set_fixed_field(dev, msta, &phy, link_id, RATE_PARAM_FIXED_MCS); if (ret) return ret; } /* fixed GI */ - if (mask->control[band].gi != NL80211_TXRATE_DEFAULT_GI || - mask->control[band].he_gi != GENMASK(7, 0)) { + if (mask.control[band].gi != NL80211_TXRATE_DEFAULT_GI || + mask.control[band].he_gi != GENMASK(7, 0)) { u32 addr; /* firmware updates only TXCMD but doesn't take WTBL into * account, so driver should update here to reflect the * actual txrate hardware sends out. */ - addr = mt7996_mac_wtbl_lmac_addr(dev, msta_link->wcid.idx, 7); - if (link_sta->he_cap.has_he) + addr = mt7996_mac_wtbl_lmac_addr(dev, idx, 7); + if (has_he) mt76_rmw_field(dev, addr, GENMASK(31, 24), phy.sgi); else mt76_rmw_field(dev, addr, GENMASK(15, 12), phy.sgi); - ret = mt7996_mcu_set_fixed_field(dev, link_sta, link, - msta_link, &phy, + ret = mt7996_mcu_set_fixed_field(dev, msta, &phy, link_id, RATE_PARAM_FIXED_GI); if (ret) return ret; } /* fixed HE_LTF */ - if (mask->control[band].he_ltf != GENMASK(7, 0)) { - ret = mt7996_mcu_set_fixed_field(dev, link_sta, link, - msta_link, &phy, + if (mask.control[band].he_ltf != GENMASK(7, 0)) { + ret = mt7996_mcu_set_fixed_field(dev, msta, &phy, link_id, RATE_PARAM_FIXED_HE_LTF); if (ret) return ret; } return 0; + +error_unlock: + rcu_read_unlock(); + + return -ENODEV; } static void @@ -2145,21 +2198,44 @@ mt7996_mcu_sta_rate_ctrl_tlv(struct sk_buff *skb, struct mt7996_dev *dev, memset(ra->rx_rcpi, INIT_RCPI, sizeof(ra->rx_rcpi)); } -int mt7996_mcu_add_rate_ctrl(struct mt7996_dev *dev, - struct ieee80211_vif *vif, - struct ieee80211_bss_conf *link_conf, - struct ieee80211_link_sta *link_sta, - struct mt7996_vif_link *link, - struct mt7996_sta_link *msta_link, bool changed) +int mt7996_mcu_add_rate_ctrl(struct mt7996_dev *dev, struct mt7996_sta *msta, + struct ieee80211_vif *vif, u8 link_id, + bool changed) { + struct ieee80211_bss_conf *link_conf; + struct ieee80211_link_sta *link_sta; + struct mt7996_sta_link *msta_link; + struct mt7996_vif_link *link; + struct ieee80211_sta *sta; struct sk_buff *skb; - int ret; + int ret = -ENODEV; + + rcu_read_lock(); + + link = mt7996_vif_link(dev, vif, link_id); + if (!link) + goto error_unlock; + + msta_link = rcu_dereference(msta->link[link_id]); + if (!msta_link) + goto error_unlock; + + sta = wcid_to_sta(&msta_link->wcid); + link_sta = rcu_dereference(sta->link[link_id]); + if (!link_sta) + goto error_unlock; + + link_conf = rcu_dereference(vif->link_conf[link_id]); + if (!link_conf) + goto error_unlock; skb = __mt76_connac_mcu_alloc_sta_req(&dev->mt76, &link->mt76, &msta_link->wcid, MT7996_STA_UPDATE_MAX_SIZE); - if (IS_ERR(skb)) - return PTR_ERR(skb); + if (IS_ERR(skb)) { + ret = PTR_ERR(skb); + goto error_unlock; + } /* firmware rc algorithm refers to sta_rec_he for HE control. * once dev->rc_work changes the settings driver should also @@ -2173,12 +2249,19 @@ int mt7996_mcu_add_rate_ctrl(struct mt7996_dev *dev, */ mt7996_mcu_sta_rate_ctrl_tlv(skb, dev, vif, link_conf, link_sta, link); + rcu_read_unlock(); + ret = mt76_mcu_skb_send_msg(&dev->mt76, skb, MCU_WMWA_UNI_CMD(STA_REC_UPDATE), true); if (ret) return ret; - return mt7996_mcu_add_rate_ctrl_fixed(dev, link_sta, link, msta_link); + return mt7996_mcu_add_rate_ctrl_fixed(dev, msta, vif, link_id); + +error_unlock: + rcu_read_unlock(); + + return ret; } static int diff --git a/drivers/net/wireless/mediatek/mt76/mt7996/mt7996.h b/drivers/net/wireless/mediatek/mt76/mt7996/mt7996.h index 1ad6bc046f7c..33ac16b64ef1 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7996/mt7996.h +++ b/drivers/net/wireless/mediatek/mt76/mt7996/mt7996.h @@ -620,23 +620,17 @@ int mt7996_mcu_beacon_inband_discov(struct mt7996_dev *dev, int mt7996_mcu_add_obss_spr(struct mt7996_phy *phy, struct mt7996_vif_link *link, struct ieee80211_he_obss_pd *he_obss_pd); -int mt7996_mcu_add_rate_ctrl(struct mt7996_dev *dev, - struct ieee80211_vif *vif, - struct ieee80211_bss_conf *link_conf, - struct ieee80211_link_sta *link_sta, - struct mt7996_vif_link *link, - struct mt7996_sta_link *msta_link, bool changed); +int mt7996_mcu_add_rate_ctrl(struct mt7996_dev *dev, struct mt7996_sta *msta, + struct ieee80211_vif *vif, u8 link_id, + bool changed); int mt7996_set_channel(struct mt76_phy *mphy); int mt7996_mcu_set_chan_info(struct mt7996_phy *phy, u16 tag); int mt7996_mcu_set_tx(struct mt7996_dev *dev, struct ieee80211_vif *vif, struct ieee80211_bss_conf *link_conf); int mt7996_mcu_set_fixed_rate_ctrl(struct mt7996_dev *dev, void *data, u16 version); -int mt7996_mcu_set_fixed_field(struct mt7996_dev *dev, - struct ieee80211_link_sta *link_sta, - struct mt7996_vif_link *link, - struct mt7996_sta_link *msta_link, - void *data, u32 field); +int mt7996_mcu_set_fixed_field(struct mt7996_dev *dev, struct mt7996_sta *msta, + void *data, u8 link_id, u32 field); int mt7996_mcu_set_eeprom(struct mt7996_dev *dev); int mt7996_mcu_get_eeprom(struct mt7996_dev *dev, u32 offset, u8 *buf, u32 buf_len); int mt7996_mcu_get_eeprom_free_block(struct mt7996_dev *dev, u8 *block_num); diff --git a/drivers/net/wireless/mediatek/mt76/tx.c b/drivers/net/wireless/mediatek/mt76/tx.c index 513916469ca2..e6cf16706667 100644 --- a/drivers/net/wireless/mediatek/mt76/tx.c +++ b/drivers/net/wireless/mediatek/mt76/tx.c @@ -64,7 +64,7 @@ mt76_tx_status_unlock(struct mt76_dev *dev, struct sk_buff_head *list) struct mt76_tx_cb *cb = mt76_tx_skb_cb(skb); struct mt76_wcid *wcid; - wcid = rcu_dereference(dev->wcid[cb->wcid]); + wcid = __mt76_wcid_ptr(dev, cb->wcid); if (wcid) { status.sta = wcid_to_sta(wcid); if (status.sta && (wcid->rate.flags || wcid->rate.legacy)) { @@ -251,9 +251,7 @@ void __mt76_tx_complete_skb(struct mt76_dev *dev, u16 wcid_idx, struct sk_buff * rcu_read_lock(); - if (wcid_idx < ARRAY_SIZE(dev->wcid)) - wcid = rcu_dereference(dev->wcid[wcid_idx]); - + wcid = __mt76_wcid_ptr(dev, wcid_idx); mt76_tx_check_non_aql(dev, wcid, skb); #ifdef CONFIG_NL80211_TESTMODE @@ -538,7 +536,7 @@ mt76_txq_schedule_list(struct mt76_phy *phy, enum mt76_txq_id qid) break; mtxq = (struct mt76_txq *)txq->drv_priv; - wcid = rcu_dereference(dev->wcid[mtxq->wcid]); + wcid = __mt76_wcid_ptr(dev, mtxq->wcid); if (!wcid || test_bit(MT_WCID_FLAG_PS, &wcid->flags)) continue; @@ -617,7 +615,8 @@ mt76_txq_schedule_pending_wcid(struct mt76_phy *phy, struct mt76_wcid *wcid, if ((dev->drv->drv_flags & MT_DRV_HW_MGMT_TXQ) && !(info->flags & IEEE80211_TX_CTL_HW_80211_ENCAP) && !ieee80211_is_data(hdr->frame_control) && - !ieee80211_is_bufferable_mmpdu(skb)) + (!ieee80211_is_bufferable_mmpdu(skb) || + ieee80211_is_deauth(hdr->frame_control))) qid = MT_TXQ_PSD; q = phy->q_tx[qid]; diff --git a/drivers/net/wireless/mediatek/mt76/util.c b/drivers/net/wireless/mediatek/mt76/util.c index 95b3dc96e4c4..97249ebb4bc8 100644 --- a/drivers/net/wireless/mediatek/mt76/util.c +++ b/drivers/net/wireless/mediatek/mt76/util.c @@ -83,7 +83,7 @@ int mt76_get_min_avg_rssi(struct mt76_dev *dev, u8 phy_idx) if (!(mask & 1)) continue; - wcid = rcu_dereference(dev->wcid[j]); + wcid = __mt76_wcid_ptr(dev, j); if (!wcid || wcid->phy_idx != phy_idx) continue; diff --git a/drivers/net/wireless/ralink/rt2x00/rt2x00soc.c b/drivers/net/wireless/ralink/rt2x00/rt2x00soc.c index eface610178d..f7f3a2340c39 100644 --- a/drivers/net/wireless/ralink/rt2x00/rt2x00soc.c +++ b/drivers/net/wireless/ralink/rt2x00/rt2x00soc.c @@ -108,7 +108,7 @@ exit_free_device: } EXPORT_SYMBOL_GPL(rt2x00soc_probe); -int rt2x00soc_remove(struct platform_device *pdev) +void rt2x00soc_remove(struct platform_device *pdev) { struct ieee80211_hw *hw = platform_get_drvdata(pdev); struct rt2x00_dev *rt2x00dev = hw->priv; @@ -119,8 +119,6 @@ int rt2x00soc_remove(struct platform_device *pdev) rt2x00lib_remove_dev(rt2x00dev); rt2x00soc_free_reg(rt2x00dev); ieee80211_free_hw(hw); - - return 0; } EXPORT_SYMBOL_GPL(rt2x00soc_remove); diff --git a/drivers/net/wireless/ralink/rt2x00/rt2x00soc.h b/drivers/net/wireless/ralink/rt2x00/rt2x00soc.h index 021fd06b3627..d6226b8a10e0 100644 --- a/drivers/net/wireless/ralink/rt2x00/rt2x00soc.h +++ b/drivers/net/wireless/ralink/rt2x00/rt2x00soc.h @@ -17,7 +17,7 @@ * SoC driver handlers. */ int rt2x00soc_probe(struct platform_device *pdev, const struct rt2x00_ops *ops); -int rt2x00soc_remove(struct platform_device *pdev); +void rt2x00soc_remove(struct platform_device *pdev); #ifdef CONFIG_PM int rt2x00soc_suspend(struct platform_device *pdev, pm_message_t state); int rt2x00soc_resume(struct platform_device *pdev); diff --git a/drivers/net/wireless/zydas/zd1211rw/zd_mac.c b/drivers/net/wireless/zydas/zd1211rw/zd_mac.c index 9653dbaac3c0..781510a3ec6d 100644 --- a/drivers/net/wireless/zydas/zd1211rw/zd_mac.c +++ b/drivers/net/wireless/zydas/zd1211rw/zd_mac.c @@ -583,7 +583,11 @@ void zd_mac_tx_to_dev(struct sk_buff *skb, int error) skb_queue_tail(q, skb); while (skb_queue_len(q) > ZD_MAC_MAX_ACK_WAITERS) { - zd_mac_tx_status(hw, skb_dequeue(q), + skb = skb_dequeue(q); + if (!skb) + break; + + zd_mac_tx_status(hw, skb, mac->ack_pending ? mac->ack_signal : 0, NULL); mac->ack_pending = 0; diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 7493e5aa984c..895fb163d48e 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -381,12 +381,12 @@ static void nvme_log_err_passthru(struct request *req) nr->status & NVME_SC_MASK, /* Status Code */ nr->status & NVME_STATUS_MORE ? "MORE " : "", nr->status & NVME_STATUS_DNR ? "DNR " : "", - nr->cmd->common.cdw10, - nr->cmd->common.cdw11, - nr->cmd->common.cdw12, - nr->cmd->common.cdw13, - nr->cmd->common.cdw14, - nr->cmd->common.cdw15); + le32_to_cpu(nr->cmd->common.cdw10), + le32_to_cpu(nr->cmd->common.cdw11), + le32_to_cpu(nr->cmd->common.cdw12), + le32_to_cpu(nr->cmd->common.cdw13), + le32_to_cpu(nr->cmd->common.cdw14), + le32_to_cpu(nr->cmd->common.cdw15)); } enum nvme_disposition { @@ -764,6 +764,10 @@ blk_status_t nvme_fail_nonready_command(struct nvme_ctrl *ctrl, !test_bit(NVME_CTRL_FAILFAST_EXPIRED, &ctrl->flags) && !blk_noretry_request(rq) && !(rq->cmd_flags & REQ_NVME_MPATH)) return BLK_STS_RESOURCE; + + if (!(rq->rq_flags & RQF_DONTPREP)) + nvme_clear_nvme_request(rq); + return nvme_host_path_error(rq); } EXPORT_SYMBOL_GPL(nvme_fail_nonready_command); @@ -3537,15 +3541,6 @@ static int nvme_init_identify(struct nvme_ctrl *ctrl) if (ret) goto out_free; } - - if (le16_to_cpu(id->awupf) != ctrl->subsys->awupf) { - dev_err_ratelimited(ctrl->device, - "inconsistent AWUPF, controller not added (%u/%u).\n", - le16_to_cpu(id->awupf), ctrl->subsys->awupf); - ret = -EINVAL; - goto out_free; - } - memcpy(ctrl->subsys->firmware_rev, id->fr, sizeof(ctrl->subsys->firmware_rev)); @@ -4077,7 +4072,7 @@ static void nvme_ns_add_to_ctrl_list(struct nvme_ns *ns) return; } } - list_add(&ns->list, &ns->ctrl->namespaces); + list_add_rcu(&ns->list, &ns->ctrl->namespaces); } static void nvme_alloc_ns(struct nvme_ctrl *ctrl, struct nvme_ns_info *info) diff --git a/drivers/nvme/target/tcp.c b/drivers/nvme/target/tcp.c index 688033b88d38..470bf37e5a63 100644 --- a/drivers/nvme/target/tcp.c +++ b/drivers/nvme/target/tcp.c @@ -1928,10 +1928,10 @@ static void nvmet_tcp_alloc_queue(struct nvmet_tcp_port *port, struct sock *sk = queue->sock->sk; /* Restore the default callbacks before starting upcall */ - read_lock_bh(&sk->sk_callback_lock); + write_lock_bh(&sk->sk_callback_lock); sk->sk_user_data = NULL; sk->sk_data_ready = port->data_ready; - read_unlock_bh(&sk->sk_callback_lock); + write_unlock_bh(&sk->sk_callback_lock); if (!nvmet_tcp_try_peek_pdu(queue)) { if (!nvmet_tcp_tls_handshake(queue)) return; diff --git a/drivers/nvmem/imx-ocotp-ele.c b/drivers/nvmem/imx-ocotp-ele.c index ca6dd71d8a2e..7807ec0e2d18 100644 --- a/drivers/nvmem/imx-ocotp-ele.c +++ b/drivers/nvmem/imx-ocotp-ele.c @@ -12,6 +12,7 @@ #include <linux/of.h> #include <linux/platform_device.h> #include <linux/slab.h> +#include <linux/if_ether.h> /* ETH_ALEN */ enum fuse_type { FUSE_FSB = BIT(0), @@ -118,9 +119,11 @@ static int imx_ocotp_cell_pp(void *context, const char *id, int index, int i; /* Deal with some post processing of nvmem cell data */ - if (id && !strcmp(id, "mac-address")) + if (id && !strcmp(id, "mac-address")) { + bytes = min(bytes, ETH_ALEN); for (i = 0; i < bytes / 2; i++) swap(buf[i], buf[bytes - i - 1]); + } return 0; } diff --git a/drivers/nvmem/imx-ocotp.c b/drivers/nvmem/imx-ocotp.c index 79dd4fda0329..7bf7656d4f96 100644 --- a/drivers/nvmem/imx-ocotp.c +++ b/drivers/nvmem/imx-ocotp.c @@ -23,6 +23,7 @@ #include <linux/platform_device.h> #include <linux/slab.h> #include <linux/delay.h> +#include <linux/if_ether.h> /* ETH_ALEN */ #define IMX_OCOTP_OFFSET_B0W0 0x400 /* Offset from base address of the * OTP Bank0 Word0 @@ -227,9 +228,11 @@ static int imx_ocotp_cell_pp(void *context, const char *id, int index, int i; /* Deal with some post processing of nvmem cell data */ - if (id && !strcmp(id, "mac-address")) + if (id && !strcmp(id, "mac-address")) { + bytes = min(bytes, ETH_ALEN); for (i = 0; i < bytes / 2; i++) swap(buf[i], buf[bytes - i - 1]); + } return 0; } diff --git a/drivers/nvmem/layouts/u-boot-env.c b/drivers/nvmem/layouts/u-boot-env.c index 436426d4e8f9..8571aac56295 100644 --- a/drivers/nvmem/layouts/u-boot-env.c +++ b/drivers/nvmem/layouts/u-boot-env.c @@ -92,7 +92,7 @@ int u_boot_env_parse(struct device *dev, struct nvmem_device *nvmem, size_t crc32_data_offset; size_t crc32_data_len; size_t crc32_offset; - __le32 *crc32_addr; + uint32_t *crc32_addr; size_t data_offset; size_t data_len; size_t dev_size; @@ -143,8 +143,8 @@ int u_boot_env_parse(struct device *dev, struct nvmem_device *nvmem, goto err_kfree; } - crc32_addr = (__le32 *)(buf + crc32_offset); - crc32 = le32_to_cpu(*crc32_addr); + crc32_addr = (uint32_t *)(buf + crc32_offset); + crc32 = *crc32_addr; crc32_data_len = dev_size - crc32_data_offset; data_len = dev_size - data_offset; diff --git a/drivers/pci/controller/pci-host-common.c b/drivers/pci/controller/pci-host-common.c index b0992325dd65..b37052863847 100644 --- a/drivers/pci/controller/pci-host-common.c +++ b/drivers/pci/controller/pci-host-common.c @@ -64,13 +64,13 @@ int pci_host_common_init(struct platform_device *pdev, of_pci_check_probe_only(); + platform_set_drvdata(pdev, bridge); + /* Parse and map our Configuration Space windows */ cfg = gen_pci_init(dev, bridge, ops); if (IS_ERR(cfg)) return PTR_ERR(cfg); - platform_set_drvdata(pdev, bridge); - bridge->sysdata = cfg; bridge->ops = (struct pci_ops *)&ops->pci_ops; bridge->enable_device = ops->enable_device; diff --git a/drivers/pci/controller/pci-hyperv-intf.c b/drivers/pci/controller/pci-hyperv-intf.c index cc96be450360..28b3e93d31c0 100644 --- a/drivers/pci/controller/pci-hyperv-intf.c +++ b/drivers/pci/controller/pci-hyperv-intf.c @@ -14,6 +14,7 @@ #include <linux/kernel.h> #include <linux/module.h> #include <linux/hyperv.h> +#include <linux/export.h> struct hyperv_pci_block_ops hvpci_block_ops; EXPORT_SYMBOL_GPL(hvpci_block_ops); diff --git a/drivers/pci/controller/pci-hyperv.c b/drivers/pci/controller/pci-hyperv.c index ef5d655a0052..13680363ff19 100644 --- a/drivers/pci/controller/pci-hyperv.c +++ b/drivers/pci/controller/pci-hyperv.c @@ -600,7 +600,7 @@ static unsigned int hv_msi_get_int_vector(struct irq_data *data) #define hv_msi_prepare pci_msi_prepare /** - * hv_arch_irq_unmask() - "Unmask" the IRQ by setting its current + * hv_irq_retarget_interrupt() - "Unmask" the IRQ by setting its current * affinity. * @data: Describes the IRQ * @@ -609,7 +609,7 @@ static unsigned int hv_msi_get_int_vector(struct irq_data *data) * is built out of this PCI bus's instance GUID and the function * number of the device. */ -static void hv_arch_irq_unmask(struct irq_data *data) +static void hv_irq_retarget_interrupt(struct irq_data *data) { struct msi_desc *msi_desc = irq_data_get_msi_desc(data); struct hv_retarget_device_interrupt *params; @@ -714,6 +714,20 @@ out: dev_err(&hbus->hdev->device, "%s() failed: %#llx", __func__, res); } + +static void hv_arch_irq_unmask(struct irq_data *data) +{ + if (hv_root_partition()) + /* + * In case of the nested root partition, the nested hypervisor + * is taking care of interrupt remapping and thus the + * MAP_DEVICE_INTERRUPT hypercall is required instead of + * RETARGET_INTERRUPT. + */ + (void)hv_map_msi_interrupt(data, NULL); + else + hv_irq_retarget_interrupt(data); +} #elif defined(CONFIG_ARM64) /* * SPI vectors to use for vPCI; arch SPIs range is [32, 1019], but leaving a bit @@ -4144,6 +4158,9 @@ static int __init init_hv_pci_drv(void) if (!hv_is_hyperv_initialized()) return -ENODEV; + if (hv_root_partition() && !hv_nested) + return -ENODEV; + ret = hv_pci_irqchip_init(); if (ret) return ret; diff --git a/drivers/pci/controller/pcie-apple.c b/drivers/pci/controller/pcie-apple.c index 77fe73976654..0380d300adca 100644 --- a/drivers/pci/controller/pcie-apple.c +++ b/drivers/pci/controller/pcie-apple.c @@ -187,6 +187,7 @@ struct apple_pcie { const struct hw_info *hw; unsigned long *bitmap; struct list_head ports; + struct list_head entry; struct completion event; struct irq_fwspec fwspec; u32 nvecs; @@ -205,6 +206,9 @@ struct apple_pcie_port { int idx; }; +static LIST_HEAD(pcie_list); +static DEFINE_MUTEX(pcie_list_lock); + static void rmw_set(u32 set, void __iomem *addr) { writel_relaxed(readl_relaxed(addr) | set, addr); @@ -720,13 +724,45 @@ static int apple_msi_init(struct apple_pcie *pcie) return 0; } +static void apple_pcie_register(struct apple_pcie *pcie) +{ + guard(mutex)(&pcie_list_lock); + + list_add_tail(&pcie->entry, &pcie_list); +} + +static void apple_pcie_unregister(struct apple_pcie *pcie) +{ + guard(mutex)(&pcie_list_lock); + + list_del(&pcie->entry); +} + +static struct apple_pcie *apple_pcie_lookup(struct device *dev) +{ + struct apple_pcie *pcie; + + guard(mutex)(&pcie_list_lock); + + list_for_each_entry(pcie, &pcie_list, entry) { + if (pcie->dev == dev) + return pcie; + } + + return NULL; +} + static struct apple_pcie_port *apple_pcie_get_port(struct pci_dev *pdev) { struct pci_config_window *cfg = pdev->sysdata; - struct apple_pcie *pcie = cfg->priv; + struct apple_pcie *pcie; struct pci_dev *port_pdev; struct apple_pcie_port *port; + pcie = apple_pcie_lookup(cfg->parent); + if (WARN_ON(!pcie)) + return NULL; + /* Find the root port this device is on */ port_pdev = pcie_find_root_port(pdev); @@ -806,10 +842,14 @@ static void apple_pcie_disable_device(struct pci_host_bridge *bridge, struct pci static int apple_pcie_init(struct pci_config_window *cfg) { - struct apple_pcie *pcie = cfg->priv; struct device *dev = cfg->parent; + struct apple_pcie *pcie; int ret; + pcie = apple_pcie_lookup(dev); + if (WARN_ON(!pcie)) + return -ENOENT; + for_each_available_child_of_node_scoped(dev->of_node, of_port) { ret = apple_pcie_setup_port(pcie, of_port); if (ret) { @@ -852,13 +892,18 @@ static int apple_pcie_probe(struct platform_device *pdev) mutex_init(&pcie->lock); INIT_LIST_HEAD(&pcie->ports); - dev_set_drvdata(dev, pcie); ret = apple_msi_init(pcie); if (ret) return ret; - return pci_host_common_init(pdev, &apple_pcie_cfg_ecam_ops); + apple_pcie_register(pcie); + + ret = pci_host_common_init(pdev, &apple_pcie_cfg_ecam_ops); + if (ret) + apple_pcie_unregister(pcie); + + return ret; } static const struct of_device_id apple_pcie_of_match[] = { diff --git a/drivers/pci/ecam.c b/drivers/pci/ecam.c index 2c5e6446e00e..260b7de2dbd5 100644 --- a/drivers/pci/ecam.c +++ b/drivers/pci/ecam.c @@ -84,8 +84,6 @@ struct pci_config_window *pci_ecam_create(struct device *dev, goto err_exit_iomap; } - cfg->priv = dev_get_drvdata(dev); - if (ops->init) { err = ops->init(cfg); if (err) diff --git a/drivers/pci/msi/msi.c b/drivers/pci/msi/msi.c index 6ede55a7c5e6..d686488f4111 100644 --- a/drivers/pci/msi/msi.c +++ b/drivers/pci/msi/msi.c @@ -934,10 +934,12 @@ int pci_msix_write_tph_tag(struct pci_dev *pdev, unsigned int index, u16 tag) if (!pdev->msix_enabled) return -ENXIO; - guard(msi_descs_lock)(&pdev->dev); virq = msi_get_virq(&pdev->dev, index); if (!virq) return -ENXIO; + + guard(msi_descs_lock)(&pdev->dev); + /* * This is a horrible hack, but short of implementing a PCI * specific interrupt chip callback and a huge pile of diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c index 4b8693ec9e4c..e6a34db77826 100644 --- a/drivers/pci/probe.c +++ b/drivers/pci/probe.c @@ -2508,6 +2508,7 @@ bool pci_bus_read_dev_vendor_id(struct pci_bus *bus, int devfn, u32 *l, } EXPORT_SYMBOL(pci_bus_read_dev_vendor_id); +#if IS_ENABLED(CONFIG_PCI_PWRCTRL) static struct platform_device *pci_pwrctrl_create_device(struct pci_bus *bus, int devfn) { struct pci_host_bridge *host = pci_find_host_bridge(bus); @@ -2537,6 +2538,12 @@ static struct platform_device *pci_pwrctrl_create_device(struct pci_bus *bus, in return pdev; } +#else +static struct platform_device *pci_pwrctrl_create_device(struct pci_bus *bus, int devfn) +{ + return NULL; +} +#endif /* * Read the config data for a PCI device, sanity-check it, diff --git a/drivers/phy/phy-core.c b/drivers/phy/phy-core.c index 8e2daea81666..04a5a34e7a95 100644 --- a/drivers/phy/phy-core.c +++ b/drivers/phy/phy-core.c @@ -994,7 +994,8 @@ struct phy *phy_create(struct device *dev, struct device_node *node, } device_initialize(&phy->dev); - mutex_init(&phy->mutex); + lockdep_register_key(&phy->lockdep_key); + mutex_init_with_key(&phy->mutex, &phy->lockdep_key); phy->dev.class = &phy_class; phy->dev.parent = dev; @@ -1259,6 +1260,8 @@ static void phy_release(struct device *dev) dev_vdbg(dev, "releasing '%s'\n", dev_name(dev)); debugfs_remove_recursive(phy->debugfs); regulator_put(phy->pwr); + mutex_destroy(&phy->mutex); + lockdep_unregister_key(&phy->lockdep_key); ida_free(&phy_ida, phy->id); kfree(phy); } diff --git a/drivers/phy/phy-snps-eusb2.c b/drivers/phy/phy-snps-eusb2.c index b73a1d7e57b3..751b6d8ba2be 100644 --- a/drivers/phy/phy-snps-eusb2.c +++ b/drivers/phy/phy-snps-eusb2.c @@ -567,9 +567,11 @@ static int snps_eusb2_hsphy_probe(struct platform_device *pdev) } } - if (IS_ERR_OR_NULL(phy->ref_clk)) - return dev_err_probe(dev, PTR_ERR(phy->ref_clk), + if (IS_ERR_OR_NULL(phy->ref_clk)) { + ret = phy->ref_clk ? PTR_ERR(phy->ref_clk) : -ENOENT; + return dev_err_probe(dev, ret, "failed to get ref clk\n"); + } num = ARRAY_SIZE(phy->vregs); for (i = 0; i < num; i++) diff --git a/drivers/phy/tegra/xusb-tegra186.c b/drivers/phy/tegra/xusb-tegra186.c index 23a23f2d64e5..e818f6c3980e 100644 --- a/drivers/phy/tegra/xusb-tegra186.c +++ b/drivers/phy/tegra/xusb-tegra186.c @@ -648,14 +648,15 @@ static void tegra186_utmi_bias_pad_power_on(struct tegra_xusb_padctl *padctl) udelay(100); } - if (padctl->soc->trk_hw_mode) { - value = padctl_readl(padctl, XUSB_PADCTL_USB2_BIAS_PAD_CTL2); - value |= USB2_TRK_HW_MODE; + value = padctl_readl(padctl, XUSB_PADCTL_USB2_BIAS_PAD_CTL2); + if (padctl->soc->trk_update_on_idle) value &= ~CYA_TRK_CODE_UPDATE_ON_IDLE; - padctl_writel(padctl, value, XUSB_PADCTL_USB2_BIAS_PAD_CTL2); - } else { + if (padctl->soc->trk_hw_mode) + value |= USB2_TRK_HW_MODE; + padctl_writel(padctl, value, XUSB_PADCTL_USB2_BIAS_PAD_CTL2); + + if (!padctl->soc->trk_hw_mode) clk_disable_unprepare(priv->usb2_trk_clk); - } } static void tegra186_utmi_bias_pad_power_off(struct tegra_xusb_padctl *padctl) @@ -782,13 +783,15 @@ static int tegra186_xusb_padctl_vbus_override(struct tegra_xusb_padctl *padctl, } static int tegra186_xusb_padctl_id_override(struct tegra_xusb_padctl *padctl, - bool status) + struct tegra_xusb_usb2_port *port, bool status) { - u32 value; + u32 value, id_override; + int err = 0; dev_dbg(padctl->dev, "%s id override\n", status ? "set" : "clear"); value = padctl_readl(padctl, USB2_VBUS_ID); + id_override = value & ID_OVERRIDE(~0); if (status) { if (value & VBUS_OVERRIDE) { @@ -799,15 +802,35 @@ static int tegra186_xusb_padctl_id_override(struct tegra_xusb_padctl *padctl, value = padctl_readl(padctl, USB2_VBUS_ID); } - value &= ~ID_OVERRIDE(~0); - value |= ID_OVERRIDE_GROUNDED; + if (id_override != ID_OVERRIDE_GROUNDED) { + value &= ~ID_OVERRIDE(~0); + value |= ID_OVERRIDE_GROUNDED; + padctl_writel(padctl, value, USB2_VBUS_ID); + + err = regulator_enable(port->supply); + if (err) { + dev_err(padctl->dev, "Failed to enable regulator: %d\n", err); + return err; + } + } } else { - value &= ~ID_OVERRIDE(~0); - value |= ID_OVERRIDE_FLOATING; + if (id_override == ID_OVERRIDE_GROUNDED) { + /* + * The regulator is disabled only when the role transitions + * from USB_ROLE_HOST to USB_ROLE_NONE. + */ + err = regulator_disable(port->supply); + if (err) { + dev_err(padctl->dev, "Failed to disable regulator: %d\n", err); + return err; + } + + value &= ~ID_OVERRIDE(~0); + value |= ID_OVERRIDE_FLOATING; + padctl_writel(padctl, value, USB2_VBUS_ID); + } } - padctl_writel(padctl, value, USB2_VBUS_ID); - return 0; } @@ -826,27 +849,20 @@ static int tegra186_utmi_phy_set_mode(struct phy *phy, enum phy_mode mode, if (mode == PHY_MODE_USB_OTG) { if (submode == USB_ROLE_HOST) { - tegra186_xusb_padctl_id_override(padctl, true); - - err = regulator_enable(port->supply); + err = tegra186_xusb_padctl_id_override(padctl, port, true); + if (err) + goto out; } else if (submode == USB_ROLE_DEVICE) { tegra186_xusb_padctl_vbus_override(padctl, true); } else if (submode == USB_ROLE_NONE) { - /* - * When port is peripheral only or role transitions to - * USB_ROLE_NONE from USB_ROLE_DEVICE, regulator is not - * enabled. - */ - if (regulator_is_enabled(port->supply)) - regulator_disable(port->supply); - - tegra186_xusb_padctl_id_override(padctl, false); + err = tegra186_xusb_padctl_id_override(padctl, port, false); + if (err) + goto out; tegra186_xusb_padctl_vbus_override(padctl, false); } } - +out: mutex_unlock(&padctl->lock); - return err; } @@ -1710,7 +1726,8 @@ const struct tegra_xusb_padctl_soc tegra234_xusb_padctl_soc = { .num_supplies = ARRAY_SIZE(tegra194_xusb_padctl_supply_names), .supports_gen2 = true, .poll_trk_completed = true, - .trk_hw_mode = true, + .trk_hw_mode = false, + .trk_update_on_idle = true, .supports_lp_cfg_en = true, }; EXPORT_SYMBOL_GPL(tegra234_xusb_padctl_soc); diff --git a/drivers/phy/tegra/xusb.h b/drivers/phy/tegra/xusb.h index 6e45d194c689..d2b5f9565132 100644 --- a/drivers/phy/tegra/xusb.h +++ b/drivers/phy/tegra/xusb.h @@ -434,6 +434,7 @@ struct tegra_xusb_padctl_soc { bool need_fake_usb3_port; bool poll_trk_completed; bool trk_hw_mode; + bool trk_update_on_idle; bool supports_lp_cfg_en; }; diff --git a/drivers/pinctrl/nuvoton/pinctrl-ma35.c b/drivers/pinctrl/nuvoton/pinctrl-ma35.c index 06ae1fe8b8c5..b51704bafd81 100644 --- a/drivers/pinctrl/nuvoton/pinctrl-ma35.c +++ b/drivers/pinctrl/nuvoton/pinctrl-ma35.c @@ -1074,7 +1074,10 @@ static int ma35_pinctrl_probe_dt(struct platform_device *pdev, struct ma35_pinct u32 idx = 0; int ret; - for_each_gpiochip_node(dev, child) { + device_for_each_child_node(dev, child) { + if (fwnode_property_present(child, "gpio-controller")) + continue; + npctl->nfunctions++; npctl->ngroups += of_get_child_count(to_of_node(child)); } @@ -1092,7 +1095,10 @@ static int ma35_pinctrl_probe_dt(struct platform_device *pdev, struct ma35_pinct if (!npctl->groups) return -ENOMEM; - for_each_gpiochip_node(dev, child) { + device_for_each_child_node(dev, child) { + if (fwnode_property_present(child, "gpio-controller")) + continue; + ret = ma35_pinctrl_parse_functions(child, npctl, idx++); if (ret) { fwnode_handle_put(child); diff --git a/drivers/pinctrl/pinctrl-amd.c b/drivers/pinctrl/pinctrl-amd.c index 5cf3db6d78b7..b3f0d02aeeb3 100644 --- a/drivers/pinctrl/pinctrl-amd.c +++ b/drivers/pinctrl/pinctrl-amd.c @@ -979,6 +979,17 @@ static int amd_gpio_suspend_hibernate_common(struct device *dev, bool is_suspend pin, is_suspend ? "suspend" : "hibernate"); } + /* + * debounce enabled over suspend has shown issues with a GPIO + * being unable to wake the system, as we're only interested in + * the actual wakeup event, clear it. + */ + if (gpio_dev->saved_regs[i] & (DB_CNTRl_MASK << DB_CNTRL_OFF)) { + amd_gpio_set_debounce(gpio_dev, pin, 0); + pm_pr_dbg("Clearing debounce for GPIO #%d during %s.\n", + pin, is_suspend ? "suspend" : "hibernate"); + } + raw_spin_unlock_irqrestore(&gpio_dev->lock, flags); } diff --git a/drivers/pinctrl/pinctrl-aw9523.c b/drivers/pinctrl/pinctrl-aw9523.c index 9bf53de20be8..04afb344e9e5 100644 --- a/drivers/pinctrl/pinctrl-aw9523.c +++ b/drivers/pinctrl/pinctrl-aw9523.c @@ -784,7 +784,7 @@ static int aw9523_init_gpiochip(struct aw9523 *awi, unsigned int npins) gc->set_config = gpiochip_generic_config; gc->parent = dev; gc->owner = THIS_MODULE; - gc->can_sleep = false; + gc->can_sleep = true; return 0; } diff --git a/drivers/pinctrl/qcom/pinctrl-msm.c b/drivers/pinctrl/qcom/pinctrl-msm.c index 5c4687de1464..f713c80d7f3e 100644 --- a/drivers/pinctrl/qcom/pinctrl-msm.c +++ b/drivers/pinctrl/qcom/pinctrl-msm.c @@ -1038,6 +1038,25 @@ static bool msm_gpio_needs_dual_edge_parent_workaround(struct irq_data *d, test_bit(d->hwirq, pctrl->skip_wake_irqs); } +static void msm_gpio_irq_init_valid_mask(struct gpio_chip *gc, + unsigned long *valid_mask, + unsigned int ngpios) +{ + struct msm_pinctrl *pctrl = gpiochip_get_data(gc); + const struct msm_pingroup *g; + int i; + + bitmap_fill(valid_mask, ngpios); + + for (i = 0; i < ngpios; i++) { + g = &pctrl->soc->groups[i]; + + if (g->intr_detection_width != 1 && + g->intr_detection_width != 2) + clear_bit(i, valid_mask); + } +} + static int msm_gpio_irq_set_type(struct irq_data *d, unsigned int type) { struct gpio_chip *gc = irq_data_get_irq_chip_data(d); @@ -1441,6 +1460,7 @@ static int msm_gpio_init(struct msm_pinctrl *pctrl) girq->default_type = IRQ_TYPE_NONE; girq->handler = handle_bad_irq; girq->parents[0] = pctrl->irq; + girq->init_valid_mask = msm_gpio_irq_init_valid_mask; ret = devm_gpiochip_add_data(pctrl->dev, &pctrl->chip, pctrl); if (ret) { diff --git a/drivers/platform/arm64/huawei-gaokun-ec.c b/drivers/platform/arm64/huawei-gaokun-ec.c index 7e5aa7ca2403..7170f8eb76f7 100644 --- a/drivers/platform/arm64/huawei-gaokun-ec.c +++ b/drivers/platform/arm64/huawei-gaokun-ec.c @@ -662,6 +662,7 @@ static void gaokun_aux_release(struct device *dev) { struct auxiliary_device *adev = to_auxiliary_dev(dev); + of_node_put(dev->of_node); kfree(adev); } @@ -693,6 +694,7 @@ static int gaokun_aux_init(struct device *parent, const char *name, ret = auxiliary_device_init(adev); if (ret) { + of_node_put(adev->dev.of_node); kfree(adev); return ret; } diff --git a/drivers/platform/mellanox/mlxbf-pmc.c b/drivers/platform/mellanox/mlxbf-pmc.c index a1c529f1ff1a..4776013e0764 100644 --- a/drivers/platform/mellanox/mlxbf-pmc.c +++ b/drivers/platform/mellanox/mlxbf-pmc.c @@ -15,6 +15,7 @@ #include <linux/hwmon.h> #include <linux/platform_device.h> #include <linux/string.h> +#include <linux/string_helpers.h> #include <uapi/linux/psci.h> #define MLXBF_PMC_WRITE_REG_32 0x82000009 @@ -1222,7 +1223,7 @@ static int mlxbf_pmc_get_event_num(const char *blk, const char *evt) return -ENODEV; } -/* Get the event number given the name */ +/* Get the event name given the number */ static char *mlxbf_pmc_get_event_name(const char *blk, u32 evt) { const struct mlxbf_pmc_events *events; @@ -1784,6 +1785,7 @@ static ssize_t mlxbf_pmc_event_store(struct device *dev, attr, struct mlxbf_pmc_attribute, dev_attr); unsigned int blk_num, cnt_num; bool is_l3 = false; + char *evt_name; int evt_num; int err; @@ -1791,14 +1793,23 @@ static ssize_t mlxbf_pmc_event_store(struct device *dev, cnt_num = attr_event->index; if (isalpha(buf[0])) { + /* Remove the trailing newline character if present */ + evt_name = kstrdup_and_replace(buf, '\n', '\0', GFP_KERNEL); + if (!evt_name) + return -ENOMEM; + evt_num = mlxbf_pmc_get_event_num(pmc->block_name[blk_num], - buf); + evt_name); + kfree(evt_name); if (evt_num < 0) return -EINVAL; } else { err = kstrtouint(buf, 0, &evt_num); if (err < 0) return err; + + if (!mlxbf_pmc_get_event_name(pmc->block_name[blk_num], evt_num)) + return -EINVAL; } if (strstr(pmc->block_name[blk_num], "l3cache")) @@ -1879,13 +1890,14 @@ static ssize_t mlxbf_pmc_enable_store(struct device *dev, { struct mlxbf_pmc_attribute *attr_enable = container_of( attr, struct mlxbf_pmc_attribute, dev_attr); - unsigned int en, blk_num; + unsigned int blk_num; u32 word; int err; + bool en; blk_num = attr_enable->nr; - err = kstrtouint(buf, 0, &en); + err = kstrtobool(buf, &en); if (err < 0) return err; @@ -1905,14 +1917,11 @@ static ssize_t mlxbf_pmc_enable_store(struct device *dev, MLXBF_PMC_CRSPACE_PERFMON_CTL(pmc->block[blk_num].counters), MLXBF_PMC_WRITE_REG_32, word); } else { - if (en && en != 1) - return -EINVAL; - err = mlxbf_pmc_config_l3_counters(blk_num, false, !!en); if (err) return err; - if (en == 1) { + if (en) { err = mlxbf_pmc_config_l3_counters(blk_num, true, false); if (err) return err; diff --git a/drivers/platform/x86/Makefile b/drivers/platform/x86/Makefile index abbc2644ff6d..bea87a85ae75 100644 --- a/drivers/platform/x86/Makefile +++ b/drivers/platform/x86/Makefile @@ -58,6 +58,8 @@ obj-$(CONFIG_X86_PLATFORM_DRIVERS_HP) += hp/ # Hewlett Packard Enterprise obj-$(CONFIG_UV_SYSFS) += uv_sysfs.o +obj-$(CONFIG_FW_ATTR_CLASS) += firmware_attributes_class.o + # IBM Thinkpad and Lenovo obj-$(CONFIG_IBM_RTL) += ibm_rtl.o obj-$(CONFIG_IDEAPAD_LAPTOP) += ideapad-laptop.o @@ -128,7 +130,6 @@ obj-$(CONFIG_SYSTEM76_ACPI) += system76_acpi.o obj-$(CONFIG_TOPSTAR_LAPTOP) += topstar-laptop.o # Platform drivers -obj-$(CONFIG_FW_ATTR_CLASS) += firmware_attributes_class.o obj-$(CONFIG_SERIAL_MULTI_INSTANTIATE) += serial-multi-instantiate.o obj-$(CONFIG_TOUCHSCREEN_DMI) += touchscreen_dmi.o obj-$(CONFIG_WIRELESS_HOTKEY) += wireless-hotkey.o diff --git a/drivers/platform/x86/dell/alienware-wmi-wmax.c b/drivers/platform/x86/dell/alienware-wmi-wmax.c index 20ec122a9fe0..b58cf74197f0 100644 --- a/drivers/platform/x86/dell/alienware-wmi-wmax.c +++ b/drivers/platform/x86/dell/alienware-wmi-wmax.c @@ -90,6 +90,14 @@ static struct awcc_quirks empty_quirks; static const struct dmi_system_id awcc_dmi_table[] __initconst = { { + .ident = "Alienware Area-51m", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "Alienware"), + DMI_MATCH(DMI_PRODUCT_NAME, "Alienware Area-51m"), + }, + .driver_data = &generic_quirks, + }, + { .ident = "Alienware Area-51m R2", .matches = { DMI_MATCH(DMI_SYS_VENDOR, "Alienware"), @@ -98,6 +106,14 @@ static const struct dmi_system_id awcc_dmi_table[] __initconst = { .driver_data = &generic_quirks, }, { + .ident = "Alienware m15 R5", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "Alienware"), + DMI_MATCH(DMI_PRODUCT_NAME, "Alienware m15 R5"), + }, + .driver_data = &generic_quirks, + }, + { .ident = "Alienware m15 R7", .matches = { DMI_MATCH(DMI_SYS_VENDOR, "Alienware"), @@ -233,6 +249,7 @@ static const struct dmi_system_id awcc_dmi_table[] __initconst = { }, .driver_data = &g_series_quirks, }, + {} }; enum AWCC_GET_FAN_SENSORS_OPERATIONS { diff --git a/drivers/platform/x86/dell/dell-lis3lv02d.c b/drivers/platform/x86/dell/dell-lis3lv02d.c index 0791118dd6b7..732de5f556f8 100644 --- a/drivers/platform/x86/dell/dell-lis3lv02d.c +++ b/drivers/platform/x86/dell/dell-lis3lv02d.c @@ -49,6 +49,7 @@ static const struct dmi_system_id lis3lv02d_devices[] __initconst = { DELL_LIS3LV02D_DMI_ENTRY("Latitude E6330", 0x29), DELL_LIS3LV02D_DMI_ENTRY("Latitude E6430", 0x29), DELL_LIS3LV02D_DMI_ENTRY("Precision 3540", 0x29), + DELL_LIS3LV02D_DMI_ENTRY("Precision 3551", 0x29), DELL_LIS3LV02D_DMI_ENTRY("Precision M6800", 0x29), DELL_LIS3LV02D_DMI_ENTRY("Vostro V131", 0x1d), DELL_LIS3LV02D_DMI_ENTRY("Vostro 5568", 0x29), diff --git a/drivers/platform/x86/dell/dell-wmi-ddv.c b/drivers/platform/x86/dell/dell-wmi-ddv.c index 67f3d7158403..62e3d060f038 100644 --- a/drivers/platform/x86/dell/dell-wmi-ddv.c +++ b/drivers/platform/x86/dell/dell-wmi-ddv.c @@ -689,9 +689,13 @@ static int dell_wmi_ddv_battery_translate(struct dell_wmi_ddv_data *data, dev_dbg(&data->wdev->dev, "Translation cache miss\n"); - /* Perform a translation between a ACPI battery and a battery index */ - - ret = power_supply_get_property(battery, POWER_SUPPLY_PROP_SERIAL_NUMBER, &val); + /* + * Perform a translation between a ACPI battery and a battery index. + * We have to use power_supply_get_property_direct() here because this + * function will also get called from the callbacks of the power supply + * extension. + */ + ret = power_supply_get_property_direct(battery, POWER_SUPPLY_PROP_SERIAL_NUMBER, &val); if (ret < 0) return ret; diff --git a/drivers/platform/x86/ideapad-laptop.c b/drivers/platform/x86/ideapad-laptop.c index b5e4da6a6779..edb9d2fb02ec 100644 --- a/drivers/platform/x86/ideapad-laptop.c +++ b/drivers/platform/x86/ideapad-laptop.c @@ -1669,7 +1669,7 @@ static int ideapad_kbd_bl_init(struct ideapad_private *priv) priv->kbd_bl.led.name = "platform::" LED_FUNCTION_KBD_BACKLIGHT; priv->kbd_bl.led.brightness_get = ideapad_kbd_bl_led_cdev_brightness_get; priv->kbd_bl.led.brightness_set_blocking = ideapad_kbd_bl_led_cdev_brightness_set; - priv->kbd_bl.led.flags = LED_BRIGHT_HW_CHANGED; + priv->kbd_bl.led.flags = LED_BRIGHT_HW_CHANGED | LED_RETAIN_AT_SHUTDOWN; err = led_classdev_register(&priv->platform_device->dev, &priv->kbd_bl.led); if (err) @@ -1728,7 +1728,7 @@ static int ideapad_fn_lock_led_init(struct ideapad_private *priv) priv->fn_lock.led.name = "platform::" LED_FUNCTION_FNLOCK; priv->fn_lock.led.brightness_get = ideapad_fn_lock_led_cdev_get; priv->fn_lock.led.brightness_set_blocking = ideapad_fn_lock_led_cdev_set; - priv->fn_lock.led.flags = LED_BRIGHT_HW_CHANGED; + priv->fn_lock.led.flags = LED_BRIGHT_HW_CHANGED | LED_RETAIN_AT_SHUTDOWN; err = led_classdev_register(&priv->platform_device->dev, &priv->fn_lock.led); if (err) diff --git a/drivers/platform/x86/lenovo-wmi-hotkey-utilities.c b/drivers/platform/x86/lenovo-wmi-hotkey-utilities.c index 89153afd7015..7b9bad1978ff 100644 --- a/drivers/platform/x86/lenovo-wmi-hotkey-utilities.c +++ b/drivers/platform/x86/lenovo-wmi-hotkey-utilities.c @@ -122,26 +122,35 @@ static int lenovo_super_hotkey_wmi_led_init(enum mute_led_type led_type, struct return -EIO; union acpi_object *obj __free(kfree) = output.pointer; - if (obj && obj->type == ACPI_TYPE_INTEGER) - led_version = obj->integer.value; - else + if (!obj || obj->type != ACPI_TYPE_INTEGER) return -EIO; - wpriv->cdev[led_type].max_brightness = LED_ON; - wpriv->cdev[led_type].flags = LED_CORE_SUSPENDRESUME; + led_version = obj->integer.value; + + /* + * Output parameters define: 0 means mute LED is not supported, Non-zero means + * mute LED can be supported. + */ + if (led_version == 0) + return 0; + switch (led_type) { case MIC_MUTE: - if (led_version != WMI_LUD_SUPPORT_MICMUTE_LED_VER) - return -EIO; + if (led_version != WMI_LUD_SUPPORT_MICMUTE_LED_VER) { + pr_warn("The MIC_MUTE LED of this device isn't supported.\n"); + return 0; + } wpriv->cdev[led_type].name = "platform::micmute"; wpriv->cdev[led_type].brightness_set_blocking = &lsh_wmi_micmute_led_set; wpriv->cdev[led_type].default_trigger = "audio-micmute"; break; case AUDIO_MUTE: - if (led_version != WMI_LUD_SUPPORT_AUDIOMUTE_LED_VER) - return -EIO; + if (led_version != WMI_LUD_SUPPORT_AUDIOMUTE_LED_VER) { + pr_warn("The AUDIO_MUTE LED of this device isn't supported.\n"); + return 0; + } wpriv->cdev[led_type].name = "platform::mute"; wpriv->cdev[led_type].brightness_set_blocking = &lsh_wmi_audiomute_led_set; @@ -152,6 +161,9 @@ static int lenovo_super_hotkey_wmi_led_init(enum mute_led_type led_type, struct return -EINVAL; } + wpriv->cdev[led_type].max_brightness = LED_ON; + wpriv->cdev[led_type].flags = LED_CORE_SUSPENDRESUME; + err = devm_led_classdev_register(dev, &wpriv->cdev[led_type]); if (err < 0) { dev_err(dev, "Could not register mute LED %d : %d\n", led_type, err); diff --git a/drivers/pmdomain/governor.c b/drivers/pmdomain/governor.c index c1e148657c87..39359811a930 100644 --- a/drivers/pmdomain/governor.c +++ b/drivers/pmdomain/governor.c @@ -8,6 +8,7 @@ #include <linux/pm_domain.h> #include <linux/pm_qos.h> #include <linux/hrtimer.h> +#include <linux/cpu.h> #include <linux/cpuidle.h> #include <linux/cpumask.h> #include <linux/ktime.h> @@ -349,6 +350,8 @@ static bool cpu_power_down_ok(struct dev_pm_domain *pd) struct cpuidle_device *dev; ktime_t domain_wakeup, next_hrtimer; ktime_t now = ktime_get(); + struct device *cpu_dev; + s64 cpu_constraint, global_constraint; s64 idle_duration_ns; int cpu, i; @@ -359,6 +362,7 @@ static bool cpu_power_down_ok(struct dev_pm_domain *pd) if (!(genpd->flags & GENPD_FLAG_CPU_DOMAIN)) return true; + global_constraint = cpu_latency_qos_limit(); /* * Find the next wakeup for any of the online CPUs within the PM domain * and its subdomains. Note, we only need the genpd->cpus, as it already @@ -372,8 +376,16 @@ static bool cpu_power_down_ok(struct dev_pm_domain *pd) if (ktime_before(next_hrtimer, domain_wakeup)) domain_wakeup = next_hrtimer; } + + cpu_dev = get_cpu_device(cpu); + if (cpu_dev) { + cpu_constraint = dev_pm_qos_raw_resume_latency(cpu_dev); + if (cpu_constraint < global_constraint) + global_constraint = cpu_constraint; + } } + global_constraint *= NSEC_PER_USEC; /* The minimum idle duration is from now - until the next wakeup. */ idle_duration_ns = ktime_to_ns(ktime_sub(domain_wakeup, now)); if (idle_duration_ns <= 0) @@ -389,8 +401,10 @@ static bool cpu_power_down_ok(struct dev_pm_domain *pd) */ i = genpd->state_idx; do { - if (idle_duration_ns >= (genpd->states[i].residency_ns + - genpd->states[i].power_off_latency_ns)) { + if ((idle_duration_ns >= (genpd->states[i].residency_ns + + genpd->states[i].power_off_latency_ns)) && + (global_constraint >= (genpd->states[i].power_on_latency_ns + + genpd->states[i].power_off_latency_ns))) { genpd->state_idx = i; genpd->gd->last_enter = now; genpd->gd->reflect_residency = true; diff --git a/drivers/power/supply/power_supply_core.c b/drivers/power/supply/power_supply_core.c index 33a5bfce4604..cfb0e3e0d4aa 100644 --- a/drivers/power/supply/power_supply_core.c +++ b/drivers/power/supply/power_supply_core.c @@ -1235,9 +1235,8 @@ bool power_supply_has_property(struct power_supply *psy, return false; } -int power_supply_get_property(struct power_supply *psy, - enum power_supply_property psp, - union power_supply_propval *val) +static int __power_supply_get_property(struct power_supply *psy, enum power_supply_property psp, + union power_supply_propval *val, bool use_extensions) { struct power_supply_ext_registration *reg; @@ -1247,10 +1246,14 @@ int power_supply_get_property(struct power_supply *psy, return -ENODEV; } - scoped_guard(rwsem_read, &psy->extensions_sem) { - power_supply_for_each_extension(reg, psy) { - if (power_supply_ext_has_property(reg->ext, psp)) + if (use_extensions) { + scoped_guard(rwsem_read, &psy->extensions_sem) { + power_supply_for_each_extension(reg, psy) { + if (!power_supply_ext_has_property(reg->ext, psp)) + continue; + return reg->ext->get_property(psy, reg->ext, reg->data, psp, val); + } } } @@ -1261,20 +1264,49 @@ int power_supply_get_property(struct power_supply *psy, else return -EINVAL; } + +int power_supply_get_property(struct power_supply *psy, enum power_supply_property psp, + union power_supply_propval *val) +{ + return __power_supply_get_property(psy, psp, val, true); +} EXPORT_SYMBOL_GPL(power_supply_get_property); -int power_supply_set_property(struct power_supply *psy, - enum power_supply_property psp, - const union power_supply_propval *val) +/** + * power_supply_get_property_direct - Read a power supply property without checking for extensions + * @psy: The power supply + * @psp: The power supply property to read + * @val: The resulting value of the power supply property + * + * Read a power supply property without taking into account any power supply extensions registered + * on the given power supply. This is mostly useful for power supply extensions that want to access + * their own power supply as using power_supply_get_property() directly will result in a potential + * deadlock. + * + * Return: 0 on success or negative error code on failure. + */ +int power_supply_get_property_direct(struct power_supply *psy, enum power_supply_property psp, + union power_supply_propval *val) +{ + return __power_supply_get_property(psy, psp, val, false); +} +EXPORT_SYMBOL_GPL(power_supply_get_property_direct); + + +static int __power_supply_set_property(struct power_supply *psy, enum power_supply_property psp, + const union power_supply_propval *val, bool use_extensions) { struct power_supply_ext_registration *reg; if (atomic_read(&psy->use_cnt) <= 0) return -ENODEV; - scoped_guard(rwsem_read, &psy->extensions_sem) { - power_supply_for_each_extension(reg, psy) { - if (power_supply_ext_has_property(reg->ext, psp)) { + if (use_extensions) { + scoped_guard(rwsem_read, &psy->extensions_sem) { + power_supply_for_each_extension(reg, psy) { + if (!power_supply_ext_has_property(reg->ext, psp)) + continue; + if (reg->ext->set_property) return reg->ext->set_property(psy, reg->ext, reg->data, psp, val); @@ -1289,8 +1321,34 @@ int power_supply_set_property(struct power_supply *psy, return psy->desc->set_property(psy, psp, val); } + +int power_supply_set_property(struct power_supply *psy, enum power_supply_property psp, + const union power_supply_propval *val) +{ + return __power_supply_set_property(psy, psp, val, true); +} EXPORT_SYMBOL_GPL(power_supply_set_property); +/** + * power_supply_set_property_direct - Write a power supply property without checking for extensions + * @psy: The power supply + * @psp: The power supply property to write + * @val: The value to write to the power supply property + * + * Write a power supply property without taking into account any power supply extensions registered + * on the given power supply. This is mostly useful for power supply extensions that want to access + * their own power supply as using power_supply_set_property() directly will result in a potential + * deadlock. + * + * Return: 0 on success or negative error code on failure. + */ +int power_supply_set_property_direct(struct power_supply *psy, enum power_supply_property psp, + const union power_supply_propval *val) +{ + return __power_supply_set_property(psy, psp, val, false); +} +EXPORT_SYMBOL_GPL(power_supply_set_property_direct); + int power_supply_property_is_writeable(struct power_supply *psy, enum power_supply_property psp) { diff --git a/drivers/power/supply/test_power.c b/drivers/power/supply/test_power.c index 5bfdfcf6013b..2c0e9ad820c0 100644 --- a/drivers/power/supply/test_power.c +++ b/drivers/power/supply/test_power.c @@ -259,6 +259,7 @@ static const struct power_supply_config test_power_configs[] = { static int test_power_battery_extmanufacture_year = 1234; static int test_power_battery_exttemp_max = 1000; static const enum power_supply_property test_power_battery_extprops[] = { + POWER_SUPPLY_PROP_TIME_TO_EMPTY_NOW, POWER_SUPPLY_PROP_MANUFACTURE_YEAR, POWER_SUPPLY_PROP_TEMP_MAX, }; @@ -270,6 +271,9 @@ static int test_power_battery_extget_property(struct power_supply *psy, union power_supply_propval *val) { switch (psp) { + case POWER_SUPPLY_PROP_TIME_TO_EMPTY_NOW: + return power_supply_get_property_direct(psy, POWER_SUPPLY_PROP_TIME_TO_EMPTY_AVG, + val); case POWER_SUPPLY_PROP_MANUFACTURE_YEAR: val->intval = test_power_battery_extmanufacture_year; break; diff --git a/drivers/pwm/core.c b/drivers/pwm/core.c index 4d842c692194..edf776b8ad53 100644 --- a/drivers/pwm/core.c +++ b/drivers/pwm/core.c @@ -596,7 +596,7 @@ static bool pwm_state_valid(const struct pwm_state *state) * and supposed to be ignored. So also ignore any strange values and * consider the state ok. */ - if (state->enabled) + if (!state->enabled) return true; if (!state->period) diff --git a/drivers/pwm/pwm-mediatek.c b/drivers/pwm/pwm-mediatek.c index 7eaab5831499..33d3554b9197 100644 --- a/drivers/pwm/pwm-mediatek.c +++ b/drivers/pwm/pwm-mediatek.c @@ -130,8 +130,10 @@ static int pwm_mediatek_config(struct pwm_chip *chip, struct pwm_device *pwm, return ret; clk_rate = clk_get_rate(pc->clk_pwms[pwm->hwpwm]); - if (!clk_rate) - return -EINVAL; + if (!clk_rate) { + ret = -EINVAL; + goto out; + } /* Make sure we use the bus clock and not the 26MHz clock */ if (pc->soc->has_ck_26m_sel) @@ -150,9 +152,9 @@ static int pwm_mediatek_config(struct pwm_chip *chip, struct pwm_device *pwm, } if (clkdiv > PWM_CLK_DIV_MAX) { - pwm_mediatek_clk_disable(chip, pwm); dev_err(pwmchip_parent(chip), "period of %d ns not supported\n", period_ns); - return -EINVAL; + ret = -EINVAL; + goto out; } if (pc->soc->pwm45_fixup && pwm->hwpwm > 2) { @@ -169,9 +171,10 @@ static int pwm_mediatek_config(struct pwm_chip *chip, struct pwm_device *pwm, pwm_mediatek_writel(pc, pwm->hwpwm, reg_width, cnt_period); pwm_mediatek_writel(pc, pwm->hwpwm, reg_thres, cnt_duty); +out: pwm_mediatek_clk_disable(chip, pwm); - return 0; + return ret; } static int pwm_mediatek_enable(struct pwm_chip *chip, struct pwm_device *pwm) diff --git a/drivers/s390/net/ism_drv.c b/drivers/s390/net/ism_drv.c index b7f15f303ea2..967dc4f9eea8 100644 --- a/drivers/s390/net/ism_drv.c +++ b/drivers/s390/net/ism_drv.c @@ -130,6 +130,7 @@ static int ism_cmd(struct ism_dev *ism, void *cmd) struct ism_req_hdr *req = cmd; struct ism_resp_hdr *resp = cmd; + spin_lock(&ism->cmd_lock); __ism_write_cmd(ism, req + 1, sizeof(*req), req->len - sizeof(*req)); __ism_write_cmd(ism, req, 0, sizeof(*req)); @@ -143,6 +144,7 @@ static int ism_cmd(struct ism_dev *ism, void *cmd) } __ism_read_cmd(ism, resp + 1, sizeof(*resp), resp->len - sizeof(*resp)); out: + spin_unlock(&ism->cmd_lock); return resp->ret; } @@ -606,6 +608,7 @@ static int ism_probe(struct pci_dev *pdev, const struct pci_device_id *id) return -ENOMEM; spin_lock_init(&ism->lock); + spin_lock_init(&ism->cmd_lock); dev_set_drvdata(&pdev->dev, ism); ism->pdev = pdev; ism->dev.parent = &pdev->dev; diff --git a/drivers/soc/aspeed/aspeed-lpc-snoop.c b/drivers/soc/aspeed/aspeed-lpc-snoop.c index ef8f355589a5..fc3a2c41cc10 100644 --- a/drivers/soc/aspeed/aspeed-lpc-snoop.c +++ b/drivers/soc/aspeed/aspeed-lpc-snoop.c @@ -58,6 +58,7 @@ struct aspeed_lpc_snoop_model_data { }; struct aspeed_lpc_snoop_channel { + bool enabled; struct kfifo fifo; wait_queue_head_t wq; struct miscdevice miscdev; @@ -190,6 +191,9 @@ static int aspeed_lpc_enable_snoop(struct aspeed_lpc_snoop *lpc_snoop, const struct aspeed_lpc_snoop_model_data *model_data = of_device_get_match_data(dev); + if (WARN_ON(lpc_snoop->chan[channel].enabled)) + return -EBUSY; + init_waitqueue_head(&lpc_snoop->chan[channel].wq); /* Create FIFO datastructure */ rc = kfifo_alloc(&lpc_snoop->chan[channel].fifo, @@ -236,6 +240,8 @@ static int aspeed_lpc_enable_snoop(struct aspeed_lpc_snoop *lpc_snoop, regmap_update_bits(lpc_snoop->regmap, HICRB, hicrb_en, hicrb_en); + lpc_snoop->chan[channel].enabled = true; + return 0; err_misc_deregister: @@ -248,6 +254,9 @@ err_free_fifo: static void aspeed_lpc_disable_snoop(struct aspeed_lpc_snoop *lpc_snoop, int channel) { + if (!lpc_snoop->chan[channel].enabled) + return; + switch (channel) { case 0: regmap_update_bits(lpc_snoop->regmap, HICR5, @@ -263,8 +272,10 @@ static void aspeed_lpc_disable_snoop(struct aspeed_lpc_snoop *lpc_snoop, return; } - kfifo_free(&lpc_snoop->chan[channel].fifo); + lpc_snoop->chan[channel].enabled = false; + /* Consider improving safety wrt concurrent reader(s) */ misc_deregister(&lpc_snoop->chan[channel].miscdev); + kfifo_free(&lpc_snoop->chan[channel].fifo); } static int aspeed_lpc_snoop_probe(struct platform_device *pdev) diff --git a/drivers/soundwire/amd_manager.c b/drivers/soundwire/amd_manager.c index a12c68b93b1c..7a671a786197 100644 --- a/drivers/soundwire/amd_manager.c +++ b/drivers/soundwire/amd_manager.c @@ -238,7 +238,7 @@ static u64 amd_sdw_send_cmd_get_resp(struct amd_sdw_manager *amd_manager, u32 lo if (sts & AMD_SDW_IMM_RES_VALID) { dev_err(amd_manager->dev, "SDW%x manager is in bad state\n", amd_manager->instance); - writel(0x00, amd_manager->mmio + ACP_SW_IMM_CMD_STS); + writel(AMD_SDW_IMM_RES_VALID, amd_manager->mmio + ACP_SW_IMM_CMD_STS); } writel(upper_data, amd_manager->mmio + ACP_SW_IMM_CMD_UPPER_WORD); writel(lower_data, amd_manager->mmio + ACP_SW_IMM_CMD_LOWER_QWORD); @@ -1209,6 +1209,7 @@ static int __maybe_unused amd_suspend(struct device *dev) } if (amd_manager->power_mode_mask & AMD_SDW_CLK_STOP_MODE) { + cancel_work_sync(&amd_manager->amd_sdw_work); amd_sdw_wake_enable(amd_manager, false); if (amd_manager->acp_rev >= ACP70_PCI_REV_ID) { ret = amd_sdw_host_wake_enable(amd_manager, false); @@ -1219,6 +1220,7 @@ static int __maybe_unused amd_suspend(struct device *dev) if (ret) return ret; } else if (amd_manager->power_mode_mask & AMD_SDW_POWER_OFF_MODE) { + cancel_work_sync(&amd_manager->amd_sdw_work); amd_sdw_wake_enable(amd_manager, false); if (amd_manager->acp_rev >= ACP70_PCI_REV_ID) { ret = amd_sdw_host_wake_enable(amd_manager, false); diff --git a/drivers/soundwire/qcom.c b/drivers/soundwire/qcom.c index 295a46dc2be7..0f45e3404756 100644 --- a/drivers/soundwire/qcom.c +++ b/drivers/soundwire/qcom.c @@ -156,7 +156,6 @@ struct qcom_swrm_port_config { u8 word_length; u8 blk_group_count; u8 lane_control; - u8 ch_mask; }; /* @@ -1049,13 +1048,9 @@ static int qcom_swrm_port_enable(struct sdw_bus *bus, { u32 reg = SWRM_DP_PORT_CTRL_BANK(enable_ch->port_num, bank); struct qcom_swrm_ctrl *ctrl = to_qcom_sdw(bus); - struct qcom_swrm_port_config *pcfg; u32 val; - pcfg = &ctrl->pconfig[enable_ch->port_num]; ctrl->reg_read(ctrl, reg, &val); - if (pcfg->ch_mask != SWR_INVALID_PARAM && pcfg->ch_mask != 0) - enable_ch->ch_mask = pcfg->ch_mask; if (enable_ch->enable) val |= (enable_ch->ch_mask << SWRM_DP_PORT_CTRL_EN_CHAN_SHFT); @@ -1275,26 +1270,6 @@ static void *qcom_swrm_get_sdw_stream(struct snd_soc_dai *dai, int direction) return ctrl->sruntime[dai->id]; } -static int qcom_swrm_set_channel_map(struct snd_soc_dai *dai, - unsigned int tx_num, const unsigned int *tx_slot, - unsigned int rx_num, const unsigned int *rx_slot) -{ - struct qcom_swrm_ctrl *ctrl = dev_get_drvdata(dai->dev); - int i; - - if (tx_slot) { - for (i = 0; i < tx_num; i++) - ctrl->pconfig[i].ch_mask = tx_slot[i]; - } - - if (rx_slot) { - for (i = 0; i < rx_num; i++) - ctrl->pconfig[i].ch_mask = rx_slot[i]; - } - - return 0; -} - static int qcom_swrm_startup(struct snd_pcm_substream *substream, struct snd_soc_dai *dai) { @@ -1331,7 +1306,6 @@ static const struct snd_soc_dai_ops qcom_swrm_pdm_dai_ops = { .shutdown = qcom_swrm_shutdown, .set_stream = qcom_swrm_set_sdw_stream, .get_stream = qcom_swrm_get_sdw_stream, - .set_channel_map = qcom_swrm_set_channel_map, }; static const struct snd_soc_component_driver qcom_swrm_dai_component = { diff --git a/drivers/spi/spi-qpic-snand.c b/drivers/spi/spi-qpic-snand.c index f2e1a27b410d..3b757e3d00c0 100644 --- a/drivers/spi/spi-qpic-snand.c +++ b/drivers/spi/spi-qpic-snand.c @@ -308,7 +308,7 @@ static int qcom_spi_ecc_init_ctx_pipelined(struct nand_device *nand) ecc_cfg->bch_enabled = true; ecc_cfg->bytes = ecc_cfg->ecc_bytes_hw + ecc_cfg->spare_bytes + ecc_cfg->bbm_size; - ecc_cfg->steps = 4; + ecc_cfg->steps = cwperpage; ecc_cfg->cw_data = 516; ecc_cfg->cw_size = ecc_cfg->cw_data + ecc_cfg->bytes; bad_block_byte = mtd->writesize - ecc_cfg->cw_size * (cwperpage - 1) + 1; diff --git a/drivers/spi/spi.c b/drivers/spi/spi.c index 1bc0fdbb1bd7..0ffa3f9f2870 100644 --- a/drivers/spi/spi.c +++ b/drivers/spi/spi.c @@ -4138,10 +4138,13 @@ static int __spi_validate(struct spi_device *spi, struct spi_message *message) xfer->tx_nbits != SPI_NBITS_OCTAL) return -EINVAL; if ((xfer->tx_nbits == SPI_NBITS_DUAL) && - !(spi->mode & (SPI_TX_DUAL | SPI_TX_QUAD))) + !(spi->mode & (SPI_TX_DUAL | SPI_TX_QUAD | SPI_TX_OCTAL))) return -EINVAL; if ((xfer->tx_nbits == SPI_NBITS_QUAD) && - !(spi->mode & SPI_TX_QUAD)) + !(spi->mode & (SPI_TX_QUAD | SPI_TX_OCTAL))) + return -EINVAL; + if ((xfer->tx_nbits == SPI_NBITS_OCTAL) && + !(spi->mode & SPI_TX_OCTAL)) return -EINVAL; } /* Check transfer rx_nbits */ @@ -4154,10 +4157,13 @@ static int __spi_validate(struct spi_device *spi, struct spi_message *message) xfer->rx_nbits != SPI_NBITS_OCTAL) return -EINVAL; if ((xfer->rx_nbits == SPI_NBITS_DUAL) && - !(spi->mode & (SPI_RX_DUAL | SPI_RX_QUAD))) + !(spi->mode & (SPI_RX_DUAL | SPI_RX_QUAD | SPI_RX_OCTAL))) return -EINVAL; if ((xfer->rx_nbits == SPI_NBITS_QUAD) && - !(spi->mode & SPI_RX_QUAD)) + !(spi->mode & (SPI_RX_QUAD | SPI_RX_OCTAL))) + return -EINVAL; + if ((xfer->rx_nbits == SPI_NBITS_OCTAL) && + !(spi->mode & SPI_RX_OCTAL)) return -EINVAL; } diff --git a/drivers/staging/gpib/common/gpib_os.c b/drivers/staging/gpib/common/gpib_os.c index a193d64db033..38c6abc2ffd2 100644 --- a/drivers/staging/gpib/common/gpib_os.c +++ b/drivers/staging/gpib/common/gpib_os.c @@ -610,7 +610,7 @@ int ibclose(struct inode *inode, struct file *filep) long ibioctl(struct file *filep, unsigned int cmd, unsigned long arg) { - unsigned int minor = iminor(filep->f_path.dentry->d_inode); + unsigned int minor = iminor(file_inode(filep)); struct gpib_board *board; struct gpib_file_private *file_priv = filep->private_data; long retval = -ENOTTY; diff --git a/drivers/staging/vc04_services/interface/vchiq_arm/vchiq_arm.c b/drivers/staging/vc04_services/interface/vchiq_arm/vchiq_arm.c index 5dbf8d53db09..721b15b7e13b 100644 --- a/drivers/staging/vc04_services/interface/vchiq_arm/vchiq_arm.c +++ b/drivers/staging/vc04_services/interface/vchiq_arm/vchiq_arm.c @@ -97,6 +97,13 @@ struct vchiq_arm_state { * tracked separately with the state. */ int peer_use_count; + + /* + * Flag to indicate that the first vchiq connect has made it through. + * This means that both sides should be fully ready, and we should + * be able to suspend after this point. + */ + int first_connect; }; static int @@ -273,6 +280,29 @@ static int vchiq_platform_init(struct platform_device *pdev, struct vchiq_state return 0; } +int +vchiq_platform_init_state(struct vchiq_state *state) +{ + struct vchiq_arm_state *platform_state; + + platform_state = devm_kzalloc(state->dev, sizeof(*platform_state), GFP_KERNEL); + if (!platform_state) + return -ENOMEM; + + rwlock_init(&platform_state->susp_res_lock); + + init_completion(&platform_state->ka_evt); + atomic_set(&platform_state->ka_use_count, 0); + atomic_set(&platform_state->ka_use_ack_count, 0); + atomic_set(&platform_state->ka_release_count, 0); + + platform_state->state = state; + + state->platform_state = (struct opaque_platform_state *)platform_state; + + return 0; +} + static struct vchiq_arm_state *vchiq_platform_get_arm_state(struct vchiq_state *state) { return (struct vchiq_arm_state *)state->platform_state; @@ -363,8 +393,7 @@ int vchiq_shutdown(struct vchiq_instance *instance) struct vchiq_state *state = instance->state; int ret = 0; - if (mutex_lock_killable(&state->mutex)) - return -EAGAIN; + mutex_lock(&state->mutex); /* Remove all services */ vchiq_shutdown_internal(state, instance); @@ -982,39 +1011,6 @@ exit: } int -vchiq_platform_init_state(struct vchiq_state *state) -{ - struct vchiq_arm_state *platform_state; - char threadname[16]; - - platform_state = devm_kzalloc(state->dev, sizeof(*platform_state), GFP_KERNEL); - if (!platform_state) - return -ENOMEM; - - snprintf(threadname, sizeof(threadname), "vchiq-keep/%d", - state->id); - platform_state->ka_thread = kthread_create(&vchiq_keepalive_thread_func, - (void *)state, threadname); - if (IS_ERR(platform_state->ka_thread)) { - dev_err(state->dev, "couldn't create thread %s\n", threadname); - return PTR_ERR(platform_state->ka_thread); - } - - rwlock_init(&platform_state->susp_res_lock); - - init_completion(&platform_state->ka_evt); - atomic_set(&platform_state->ka_use_count, 0); - atomic_set(&platform_state->ka_use_ack_count, 0); - atomic_set(&platform_state->ka_release_count, 0); - - platform_state->state = state; - - state->platform_state = (struct opaque_platform_state *)platform_state; - - return 0; -} - -int vchiq_use_internal(struct vchiq_state *state, struct vchiq_service *service, enum USE_TYPE_E use_type) { @@ -1329,19 +1325,37 @@ out: return ret; } -void vchiq_platform_connected(struct vchiq_state *state) -{ - struct vchiq_arm_state *arm_state = vchiq_platform_get_arm_state(state); - - wake_up_process(arm_state->ka_thread); -} - void vchiq_platform_conn_state_changed(struct vchiq_state *state, enum vchiq_connstate oldstate, enum vchiq_connstate newstate) { + struct vchiq_arm_state *arm_state = vchiq_platform_get_arm_state(state); + char threadname[16]; + dev_dbg(state->dev, "suspend: %d: %s->%s\n", state->id, get_conn_state_name(oldstate), get_conn_state_name(newstate)); + if (state->conn_state != VCHIQ_CONNSTATE_CONNECTED) + return; + + write_lock_bh(&arm_state->susp_res_lock); + if (arm_state->first_connect) { + write_unlock_bh(&arm_state->susp_res_lock); + return; + } + + arm_state->first_connect = 1; + write_unlock_bh(&arm_state->susp_res_lock); + snprintf(threadname, sizeof(threadname), "vchiq-keep/%d", + state->id); + arm_state->ka_thread = kthread_create(&vchiq_keepalive_thread_func, + (void *)state, + threadname); + if (IS_ERR(arm_state->ka_thread)) { + dev_err(state->dev, "suspend: Couldn't create thread %s\n", + threadname); + } else { + wake_up_process(arm_state->ka_thread); + } } static const struct of_device_id vchiq_of_match[] = { diff --git a/drivers/staging/vc04_services/interface/vchiq_arm/vchiq_core.c b/drivers/staging/vc04_services/interface/vchiq_arm/vchiq_core.c index e7b0c800a205..e2cac0898b8f 100644 --- a/drivers/staging/vc04_services/interface/vchiq_arm/vchiq_core.c +++ b/drivers/staging/vc04_services/interface/vchiq_arm/vchiq_core.c @@ -3343,7 +3343,6 @@ vchiq_connect_internal(struct vchiq_state *state, struct vchiq_instance *instanc return -EAGAIN; vchiq_set_conn_state(state, VCHIQ_CONNSTATE_CONNECTED); - vchiq_platform_connected(state); complete(&state->connect); } diff --git a/drivers/staging/vc04_services/interface/vchiq_arm/vchiq_core.h b/drivers/staging/vc04_services/interface/vchiq_arm/vchiq_core.h index 3b5c0618e567..9b4e766990a4 100644 --- a/drivers/staging/vc04_services/interface/vchiq_arm/vchiq_core.h +++ b/drivers/staging/vc04_services/interface/vchiq_arm/vchiq_core.h @@ -575,8 +575,6 @@ int vchiq_send_remote_use(struct vchiq_state *state); int vchiq_send_remote_use_active(struct vchiq_state *state); -void vchiq_platform_connected(struct vchiq_state *state); - void vchiq_platform_conn_state_changed(struct vchiq_state *state, enum vchiq_connstate oldstate, enum vchiq_connstate newstate); diff --git a/drivers/thunderbolt/switch.c b/drivers/thunderbolt/switch.c index 28febb95f8fa..b6c58a7e7b6a 100644 --- a/drivers/thunderbolt/switch.c +++ b/drivers/thunderbolt/switch.c @@ -1450,7 +1450,7 @@ int tb_dp_port_set_hops(struct tb_port *port, unsigned int video, return ret; data[0] &= ~ADP_DP_CS_0_VIDEO_HOPID_MASK; - data[1] &= ~ADP_DP_CS_1_AUX_RX_HOPID_MASK; + data[1] &= ~ADP_DP_CS_1_AUX_TX_HOPID_MASK; data[1] &= ~ADP_DP_CS_1_AUX_RX_HOPID_MASK; data[0] |= (video << ADP_DP_CS_0_VIDEO_HOPID_SHIFT) & @@ -3437,7 +3437,7 @@ void tb_sw_set_unplugged(struct tb_switch *sw) } } -static int tb_switch_set_wake(struct tb_switch *sw, unsigned int flags) +static int tb_switch_set_wake(struct tb_switch *sw, unsigned int flags, bool runtime) { if (flags) tb_sw_dbg(sw, "enabling wakeup: %#x\n", flags); @@ -3445,7 +3445,7 @@ static int tb_switch_set_wake(struct tb_switch *sw, unsigned int flags) tb_sw_dbg(sw, "disabling wakeup\n"); if (tb_switch_is_usb4(sw)) - return usb4_switch_set_wake(sw, flags); + return usb4_switch_set_wake(sw, flags, runtime); return tb_lc_set_wake(sw, flags); } @@ -3521,7 +3521,7 @@ int tb_switch_resume(struct tb_switch *sw, bool runtime) tb_switch_check_wakes(sw); /* Disable wakes */ - tb_switch_set_wake(sw, 0); + tb_switch_set_wake(sw, 0, true); err = tb_switch_tmu_init(sw); if (err) @@ -3603,7 +3603,7 @@ void tb_switch_suspend(struct tb_switch *sw, bool runtime) flags |= TB_WAKE_ON_USB4 | TB_WAKE_ON_USB3 | TB_WAKE_ON_PCIE; } - tb_switch_set_wake(sw, flags); + tb_switch_set_wake(sw, flags, runtime); if (tb_switch_is_usb4(sw)) usb4_switch_set_sleep(sw); diff --git a/drivers/thunderbolt/tb.h b/drivers/thunderbolt/tb.h index 87afd5a7c504..f503bad86413 100644 --- a/drivers/thunderbolt/tb.h +++ b/drivers/thunderbolt/tb.h @@ -1317,7 +1317,7 @@ int usb4_switch_read_uid(struct tb_switch *sw, u64 *uid); int usb4_switch_drom_read(struct tb_switch *sw, unsigned int address, void *buf, size_t size); bool usb4_switch_lane_bonding_possible(struct tb_switch *sw); -int usb4_switch_set_wake(struct tb_switch *sw, unsigned int flags); +int usb4_switch_set_wake(struct tb_switch *sw, unsigned int flags, bool runtime); int usb4_switch_set_sleep(struct tb_switch *sw); int usb4_switch_nvm_sector_size(struct tb_switch *sw); int usb4_switch_nvm_read(struct tb_switch *sw, unsigned int address, void *buf, diff --git a/drivers/thunderbolt/usb4.c b/drivers/thunderbolt/usb4.c index fce3c0f2354a..fdae76c8f728 100644 --- a/drivers/thunderbolt/usb4.c +++ b/drivers/thunderbolt/usb4.c @@ -403,12 +403,12 @@ bool usb4_switch_lane_bonding_possible(struct tb_switch *sw) * usb4_switch_set_wake() - Enabled/disable wake * @sw: USB4 router * @flags: Wakeup flags (%0 to disable) + * @runtime: Wake is being programmed during system runtime * * Enables/disables router to wake up from sleep. */ -int usb4_switch_set_wake(struct tb_switch *sw, unsigned int flags) +int usb4_switch_set_wake(struct tb_switch *sw, unsigned int flags, bool runtime) { - struct usb4_port *usb4; struct tb_port *port; u64 route = tb_route(sw); u32 val; @@ -438,13 +438,11 @@ int usb4_switch_set_wake(struct tb_switch *sw, unsigned int flags) val |= PORT_CS_19_WOU4; } else { bool configured = val & PORT_CS_19_PC; - usb4 = port->usb4; + bool wakeup = runtime || device_may_wakeup(&port->usb4->dev); - if (((flags & TB_WAKE_ON_CONNECT) && - device_may_wakeup(&usb4->dev)) && !configured) + if ((flags & TB_WAKE_ON_CONNECT) && wakeup && !configured) val |= PORT_CS_19_WOC; - if (((flags & TB_WAKE_ON_DISCONNECT) && - device_may_wakeup(&usb4->dev)) && configured) + if ((flags & TB_WAKE_ON_DISCONNECT) && wakeup && configured) val |= PORT_CS_19_WOD; if ((flags & TB_WAKE_ON_USB4) && configured) val |= PORT_CS_19_WOU4; diff --git a/drivers/tty/serial/pch_uart.c b/drivers/tty/serial/pch_uart.c index 508e8c6f01d4..884fefbfd5a1 100644 --- a/drivers/tty/serial/pch_uart.c +++ b/drivers/tty/serial/pch_uart.c @@ -954,7 +954,7 @@ static unsigned int dma_handle_tx(struct eg20t_port *priv) __func__); return 0; } - dma_sync_sg_for_device(port->dev, priv->sg_tx_p, nent, DMA_TO_DEVICE); + dma_sync_sg_for_device(port->dev, priv->sg_tx_p, num, DMA_TO_DEVICE); priv->desc_tx = desc; desc->callback = pch_dma_tx_complete; desc->callback_param = priv; diff --git a/drivers/tty/serial/serial_base_bus.c b/drivers/tty/serial/serial_base_bus.c index cb3b127b06b6..22749ab0428a 100644 --- a/drivers/tty/serial/serial_base_bus.c +++ b/drivers/tty/serial/serial_base_bus.c @@ -13,6 +13,7 @@ #include <linux/device.h> #include <linux/idr.h> #include <linux/module.h> +#include <linux/of.h> #include <linux/serial_core.h> #include <linux/slab.h> #include <linux/spinlock.h> @@ -93,6 +94,7 @@ static void serial_base_ctrl_release(struct device *dev) { struct serial_ctrl_device *ctrl_dev = to_serial_base_ctrl_device(dev); + of_node_put(dev->of_node); kfree(ctrl_dev); } @@ -140,6 +142,7 @@ static void serial_base_port_release(struct device *dev) { struct serial_port_device *port_dev = to_serial_base_port_device(dev); + of_node_put(dev->of_node); kfree(port_dev); } diff --git a/drivers/usb/core/hub.c b/drivers/usb/core/hub.c index 3e1215f7a9a0..256fe8c86828 100644 --- a/drivers/usb/core/hub.c +++ b/drivers/usb/core/hub.c @@ -5751,6 +5751,7 @@ static void port_event(struct usb_hub *hub, int port1) struct usb_device *hdev = hub->hdev; u16 portstatus, portchange; int i = 0; + int err; connect_change = test_bit(port1, hub->change_bits); clear_bit(port1, hub->event_bits); @@ -5847,8 +5848,11 @@ static void port_event(struct usb_hub *hub, int port1) } else if (!udev || !(portstatus & USB_PORT_STAT_CONNECTION) || udev->state == USB_STATE_NOTATTACHED) { dev_dbg(&port_dev->dev, "do warm reset, port only\n"); - if (hub_port_reset(hub, port1, NULL, - HUB_BH_RESET_TIME, true) < 0) + err = hub_port_reset(hub, port1, NULL, + HUB_BH_RESET_TIME, true); + if (!udev && err == -ENOTCONN) + connect_change = 0; + else if (err < 0) hub_port_disable(hub, port1, 1); } else { dev_dbg(&port_dev->dev, "do warm reset, full device\n"); diff --git a/drivers/usb/dwc2/gadget.c b/drivers/usb/dwc2/gadget.c index d5b622f78cf3..0637bfbc054e 100644 --- a/drivers/usb/dwc2/gadget.c +++ b/drivers/usb/dwc2/gadget.c @@ -5389,20 +5389,34 @@ int dwc2_gadget_enter_hibernation(struct dwc2_hsotg *hsotg) if (gusbcfg & GUSBCFG_ULPI_UTMI_SEL) { /* ULPI interface */ gpwrdn |= GPWRDN_ULPI_LATCH_EN_DURING_HIB_ENTRY; - } - dwc2_writel(hsotg, gpwrdn, GPWRDN); - udelay(10); + dwc2_writel(hsotg, gpwrdn, GPWRDN); + udelay(10); - /* Suspend the Phy Clock */ - pcgcctl = dwc2_readl(hsotg, PCGCTL); - pcgcctl |= PCGCTL_STOPPCLK; - dwc2_writel(hsotg, pcgcctl, PCGCTL); - udelay(10); + /* Suspend the Phy Clock */ + pcgcctl = dwc2_readl(hsotg, PCGCTL); + pcgcctl |= PCGCTL_STOPPCLK; + dwc2_writel(hsotg, pcgcctl, PCGCTL); + udelay(10); - gpwrdn = dwc2_readl(hsotg, GPWRDN); - gpwrdn |= GPWRDN_PMUACTV; - dwc2_writel(hsotg, gpwrdn, GPWRDN); - udelay(10); + gpwrdn = dwc2_readl(hsotg, GPWRDN); + gpwrdn |= GPWRDN_PMUACTV; + dwc2_writel(hsotg, gpwrdn, GPWRDN); + udelay(10); + } else { + /* UTMI+ Interface */ + dwc2_writel(hsotg, gpwrdn, GPWRDN); + udelay(10); + + gpwrdn = dwc2_readl(hsotg, GPWRDN); + gpwrdn |= GPWRDN_PMUACTV; + dwc2_writel(hsotg, gpwrdn, GPWRDN); + udelay(10); + + pcgcctl = dwc2_readl(hsotg, PCGCTL); + pcgcctl |= PCGCTL_STOPPCLK; + dwc2_writel(hsotg, pcgcctl, PCGCTL); + udelay(10); + } /* Set flag to indicate that we are in hibernation */ hsotg->hibernated = 1; diff --git a/drivers/usb/dwc3/dwc3-qcom.c b/drivers/usb/dwc3/dwc3-qcom.c index 7334de85ad10..ca7e1c02773a 100644 --- a/drivers/usb/dwc3/dwc3-qcom.c +++ b/drivers/usb/dwc3/dwc3-qcom.c @@ -680,12 +680,12 @@ static int dwc3_qcom_probe(struct platform_device *pdev) ret = reset_control_deassert(qcom->resets); if (ret) { dev_err(&pdev->dev, "failed to deassert resets, err=%d\n", ret); - goto reset_assert; + return ret; } ret = clk_bulk_prepare_enable(qcom->num_clocks, qcom->clks); if (ret < 0) - goto reset_assert; + return ret; r = platform_get_resource(pdev, IORESOURCE_MEM, 0); if (!r) { @@ -755,8 +755,6 @@ remove_core: dwc3_core_remove(&qcom->dwc); clk_disable: clk_bulk_disable_unprepare(qcom->num_clocks, qcom->clks); -reset_assert: - reset_control_assert(qcom->resets); return ret; } @@ -771,7 +769,6 @@ static void dwc3_qcom_remove(struct platform_device *pdev) clk_bulk_disable_unprepare(qcom->num_clocks, qcom->clks); dwc3_qcom_interconnect_exit(qcom); - reset_control_assert(qcom->resets); } static int dwc3_qcom_pm_suspend(struct device *dev) diff --git a/drivers/usb/gadget/configfs.c b/drivers/usb/gadget/configfs.c index fba2a56dae97..f94ea196ce54 100644 --- a/drivers/usb/gadget/configfs.c +++ b/drivers/usb/gadget/configfs.c @@ -1065,6 +1065,8 @@ static ssize_t webusb_landingPage_store(struct config_item *item, const char *pa unsigned int bytes_to_strip = 0; int l = len; + if (!len) + return len; if (page[l - 1] == '\n') { --l; ++bytes_to_strip; @@ -1188,6 +1190,8 @@ static ssize_t os_desc_qw_sign_store(struct config_item *item, const char *page, struct gadget_info *gi = os_desc_item_to_gadget_info(item); int res, l; + if (!len) + return len; l = min_t(int, len, OS_STRING_QW_SIGN_LEN >> 1); if (page[l - 1] == '\n') --l; diff --git a/drivers/usb/gadget/function/f_fs.c b/drivers/usb/gadget/function/f_fs.c index 2dea9e42a0f8..ea5f0af1e8d2 100644 --- a/drivers/usb/gadget/function/f_fs.c +++ b/drivers/usb/gadget/function/f_fs.c @@ -2369,8 +2369,7 @@ static void ffs_epfiles_destroy(struct ffs_epfile *epfiles, unsigned count) for (; count; --count, ++epfile) { BUG_ON(mutex_is_locked(&epfile->mutex)); if (epfile->dentry) { - d_delete(epfile->dentry); - dput(epfile->dentry); + simple_recursive_removal(epfile->dentry, NULL); epfile->dentry = NULL; } } diff --git a/drivers/usb/gadget/legacy/inode.c b/drivers/usb/gadget/legacy/inode.c index fcce84a726f2..b51e132b0cd2 100644 --- a/drivers/usb/gadget/legacy/inode.c +++ b/drivers/usb/gadget/legacy/inode.c @@ -1561,7 +1561,6 @@ static void destroy_ep_files (struct dev_data *dev) spin_lock_irq (&dev->lock); while (!list_empty(&dev->epfiles)) { struct ep_data *ep; - struct inode *parent; struct dentry *dentry; /* break link to FS */ @@ -1571,7 +1570,6 @@ static void destroy_ep_files (struct dev_data *dev) dentry = ep->dentry; ep->dentry = NULL; - parent = d_inode(dentry->d_parent); /* break link to controller */ mutex_lock(&ep->lock); @@ -1586,10 +1584,7 @@ static void destroy_ep_files (struct dev_data *dev) put_ep (ep); /* break link to dcache */ - inode_lock(parent); - d_delete (dentry); - dput (dentry); - inode_unlock(parent); + simple_recursive_removal(dentry, NULL); spin_lock_irq (&dev->lock); } diff --git a/drivers/usb/musb/musb_gadget.c b/drivers/usb/musb/musb_gadget.c index 6869c58367f2..caf4d4cd4b75 100644 --- a/drivers/usb/musb/musb_gadget.c +++ b/drivers/usb/musb/musb_gadget.c @@ -1913,6 +1913,7 @@ static int musb_gadget_stop(struct usb_gadget *g) * gadget driver here and have everything work; * that currently misbehaves. */ + usb_gadget_set_state(g, USB_STATE_NOTATTACHED); /* Force check of devctl register for PM runtime */ pm_runtime_mark_last_busy(musb->controller); @@ -2019,6 +2020,7 @@ void musb_g_disconnect(struct musb *musb) case OTG_STATE_B_PERIPHERAL: case OTG_STATE_B_IDLE: musb_set_state(musb, OTG_STATE_B_IDLE); + usb_gadget_set_state(&musb->g, USB_STATE_NOTATTACHED); break; case OTG_STATE_B_SRP_INIT: break; diff --git a/drivers/usb/serial/ftdi_sio.c b/drivers/usb/serial/ftdi_sio.c index 6ac7a0a5cf07..abfcfca3f971 100644 --- a/drivers/usb/serial/ftdi_sio.c +++ b/drivers/usb/serial/ftdi_sio.c @@ -803,6 +803,8 @@ static const struct usb_device_id id_table_combined[] = { .driver_info = (kernel_ulong_t)&ftdi_NDI_device_quirk }, { USB_DEVICE(FTDI_VID, FTDI_NDI_AURORA_SCU_PID), .driver_info = (kernel_ulong_t)&ftdi_NDI_device_quirk }, + { USB_DEVICE(FTDI_NDI_VID, FTDI_NDI_EMGUIDE_GEMINI_PID), + .driver_info = (kernel_ulong_t)&ftdi_NDI_device_quirk }, { USB_DEVICE(TELLDUS_VID, TELLDUS_TELLSTICK_PID) }, { USB_DEVICE(NOVITUS_VID, NOVITUS_BONO_E_PID) }, { USB_DEVICE(FTDI_VID, RTSYSTEMS_USB_VX8_PID) }, diff --git a/drivers/usb/serial/ftdi_sio_ids.h b/drivers/usb/serial/ftdi_sio_ids.h index 9acb6f837327..4cc1fae8acb9 100644 --- a/drivers/usb/serial/ftdi_sio_ids.h +++ b/drivers/usb/serial/ftdi_sio_ids.h @@ -204,6 +204,9 @@ #define FTDI_NDI_FUTURE_3_PID 0xDA73 /* NDI future device #3 */ #define FTDI_NDI_AURORA_SCU_PID 0xDA74 /* NDI Aurora SCU */ +#define FTDI_NDI_VID 0x23F2 +#define FTDI_NDI_EMGUIDE_GEMINI_PID 0x0003 /* NDI Emguide Gemini */ + /* * ChamSys Limited (www.chamsys.co.uk) USB wing/interface product IDs */ diff --git a/drivers/usb/serial/option.c b/drivers/usb/serial/option.c index 27879cc57536..147ca50c94be 100644 --- a/drivers/usb/serial/option.c +++ b/drivers/usb/serial/option.c @@ -1415,6 +1415,9 @@ static const struct usb_device_id option_ids[] = { .driver_info = NCTRL(5) }, { USB_DEVICE_AND_INTERFACE_INFO(TELIT_VENDOR_ID, 0x10d0, 0xff, 0xff, 0x40) }, { USB_DEVICE_AND_INTERFACE_INFO(TELIT_VENDOR_ID, 0x10d0, 0xff, 0xff, 0x60) }, + { USB_DEVICE_AND_INTERFACE_INFO(TELIT_VENDOR_ID, 0x10c7, 0xff, 0xff, 0x30), /* Telit FE910C04 (ECM) */ + .driver_info = NCTRL(4) }, + { USB_DEVICE_AND_INTERFACE_INFO(TELIT_VENDOR_ID, 0x10c7, 0xff, 0xff, 0x40) }, { USB_DEVICE_AND_INTERFACE_INFO(TELIT_VENDOR_ID, 0x10d1, 0xff, 0xff, 0x30), /* Telit FN990B (MBIM) */ .driver_info = NCTRL(6) }, { USB_DEVICE_AND_INTERFACE_INFO(TELIT_VENDOR_ID, 0x10d1, 0xff, 0xff, 0x40) }, @@ -2343,6 +2346,8 @@ static const struct usb_device_id option_ids[] = { .driver_info = RSVD(3) }, { USB_DEVICE_INTERFACE_CLASS(0x0489, 0xe145, 0xff), /* Foxconn T99W651 RNDIS */ .driver_info = RSVD(5) | RSVD(6) }, + { USB_DEVICE_INTERFACE_CLASS(0x0489, 0xe167, 0xff), /* Foxconn T99W640 MBIM */ + .driver_info = RSVD(3) }, { USB_DEVICE(0x1508, 0x1001), /* Fibocom NL668 (IOT version) */ .driver_info = RSVD(4) | RSVD(5) | RSVD(6) }, { USB_DEVICE(0x1782, 0x4d10) }, /* Fibocom L610 (AT mode) */ diff --git a/drivers/virt/coco/efi_secret/efi_secret.c b/drivers/virt/coco/efi_secret/efi_secret.c index 1864f9f80617..5946c5abeae8 100644 --- a/drivers/virt/coco/efi_secret/efi_secret.c +++ b/drivers/virt/coco/efi_secret/efi_secret.c @@ -31,8 +31,6 @@ struct efi_secret { struct dentry *secrets_dir; - struct dentry *fs_dir; - struct dentry *fs_files[EFI_SECRET_NUM_FILES]; void __iomem *secret_data; u64 secret_data_len; }; @@ -119,10 +117,8 @@ static void wipe_memory(void *addr, size_t size) static int efi_secret_unlink(struct inode *dir, struct dentry *dentry) { - struct efi_secret *s = efi_secret_get(); struct inode *inode = d_inode(dentry); struct secret_entry *e = (struct secret_entry *)inode->i_private; - int i; if (e) { /* Zero out the secret data */ @@ -132,19 +128,7 @@ static int efi_secret_unlink(struct inode *dir, struct dentry *dentry) inode->i_private = NULL; - for (i = 0; i < EFI_SECRET_NUM_FILES; i++) - if (s->fs_files[i] == dentry) - s->fs_files[i] = NULL; - - /* - * securityfs_remove tries to lock the directory's inode, but we reach - * the unlink callback when it's already locked - */ - inode_unlock(dir); - securityfs_remove(dentry); - inode_lock(dir); - - return 0; + return simple_unlink(inode, dentry); } static const struct inode_operations efi_secret_dir_inode_operations = { @@ -194,15 +178,6 @@ unmap: static void efi_secret_securityfs_teardown(struct platform_device *dev) { struct efi_secret *s = efi_secret_get(); - int i; - - for (i = (EFI_SECRET_NUM_FILES - 1); i >= 0; i--) { - securityfs_remove(s->fs_files[i]); - s->fs_files[i] = NULL; - } - - securityfs_remove(s->fs_dir); - s->fs_dir = NULL; securityfs_remove(s->secrets_dir); s->secrets_dir = NULL; @@ -217,7 +192,7 @@ static int efi_secret_securityfs_setup(struct platform_device *dev) unsigned char *ptr; struct secret_header *h; struct secret_entry *e; - struct dentry *dent; + struct dentry *dent, *dir; char guid_str[EFI_VARIABLE_GUID_LEN + 1]; ptr = (void __force *)s->secret_data; @@ -240,8 +215,6 @@ static int efi_secret_securityfs_setup(struct platform_device *dev) } s->secrets_dir = NULL; - s->fs_dir = NULL; - memset(s->fs_files, 0, sizeof(s->fs_files)); dent = securityfs_create_dir("secrets", NULL); if (IS_ERR(dent)) { @@ -251,14 +224,13 @@ static int efi_secret_securityfs_setup(struct platform_device *dev) } s->secrets_dir = dent; - dent = securityfs_create_dir("coco", s->secrets_dir); - if (IS_ERR(dent)) { + dir = securityfs_create_dir("coco", s->secrets_dir); + if (IS_ERR(dir)) { dev_err(&dev->dev, "Error creating coco securityfs directory entry err=%ld\n", - PTR_ERR(dent)); - return PTR_ERR(dent); + PTR_ERR(dir)); + return PTR_ERR(dir); } - d_inode(dent)->i_op = &efi_secret_dir_inode_operations; - s->fs_dir = dent; + d_inode(dir)->i_op = &efi_secret_dir_inode_operations; bytes_left = h->len - sizeof(*h); ptr += sizeof(*h); @@ -274,15 +246,14 @@ static int efi_secret_securityfs_setup(struct platform_device *dev) if (efi_guidcmp(e->guid, NULL_GUID)) { efi_guid_to_str(&e->guid, guid_str); - dent = securityfs_create_file(guid_str, 0440, s->fs_dir, (void *)e, + dent = securityfs_create_file(guid_str, 0440, dir, (void *)e, &efi_secret_bin_file_fops); if (IS_ERR(dent)) { dev_err(&dev->dev, "Error creating efi_secret securityfs entry\n"); ret = PTR_ERR(dent); goto err_cleanup; } - - s->fs_files[i++] = dent; + i++; } ptr += e->len; bytes_left -= e->len; diff --git a/fs/9p/vfs_dentry.c b/fs/9p/vfs_dentry.c index 5061f192eafd..04795508a795 100644 --- a/fs/9p/vfs_dentry.c +++ b/fs/9p/vfs_dentry.c @@ -127,7 +127,6 @@ const struct dentry_operations v9fs_cached_dentry_operations = { }; const struct dentry_operations v9fs_dentry_operations = { - .d_delete = always_delete_dentry, .d_release = v9fs_dentry_release, .d_unalias_trylock = v9fs_dentry_unalias_trylock, .d_unalias_unlock = v9fs_dentry_unalias_unlock, diff --git a/fs/9p/vfs_super.c b/fs/9p/vfs_super.c index 489db161abc9..795c6388744c 100644 --- a/fs/9p/vfs_super.c +++ b/fs/9p/vfs_super.c @@ -134,10 +134,12 @@ static struct dentry *v9fs_mount(struct file_system_type *fs_type, int flags, if (retval) goto release_sb; - if (v9ses->cache & (CACHE_META|CACHE_LOOSE)) - sb->s_d_op = &v9fs_cached_dentry_operations; - else - sb->s_d_op = &v9fs_dentry_operations; + if (v9ses->cache & (CACHE_META|CACHE_LOOSE)) { + set_default_d_op(sb, &v9fs_cached_dentry_operations); + } else { + set_default_d_op(sb, &v9fs_dentry_operations); + sb->s_d_flags |= DCACHE_DONTCACHE; + } inode = v9fs_get_new_inode_from_fid(v9ses, fid, sb); if (IS_ERR(inode)) { diff --git a/fs/adfs/super.c b/fs/adfs/super.c index 017c48a80203..fdccdbbfc213 100644 --- a/fs/adfs/super.c +++ b/fs/adfs/super.c @@ -397,7 +397,7 @@ static int adfs_fill_super(struct super_block *sb, struct fs_context *fc) if (asb->s_ftsuffix) asb->s_namelen += 4; - sb->s_d_op = &adfs_dentry_operations; + set_default_d_op(sb, &adfs_dentry_operations); root = adfs_iget(sb, &root_obj); sb->s_root = d_make_root(root); if (!sb->s_root) { diff --git a/fs/affs/super.c b/fs/affs/super.c index 2fa40337776d..44f8aa883100 100644 --- a/fs/affs/super.c +++ b/fs/affs/super.c @@ -500,9 +500,9 @@ got_root: return PTR_ERR(root_inode); if (affs_test_opt(AFFS_SB(sb)->s_flags, SF_INTL)) - sb->s_d_op = &affs_intl_dentry_operations; + set_default_d_op(sb, &affs_intl_dentry_operations); else - sb->s_d_op = &affs_dentry_operations; + set_default_d_op(sb, &affs_dentry_operations); sb->s_root = d_make_root(root_inode); if (!sb->s_root) { diff --git a/fs/afs/addr_prefs.c b/fs/afs/addr_prefs.c index c0384201b8fe..133736412c3d 100644 --- a/fs/afs/addr_prefs.c +++ b/fs/afs/addr_prefs.c @@ -48,7 +48,7 @@ static int afs_split_string(char **pbuf, char *strv[], unsigned int maxstrv) strv[count++] = p; /* Skip over word */ - while (!isspace(*p)) + while (!isspace(*p) && *p) p++; if (!*p) break; diff --git a/fs/afs/cell.c b/fs/afs/cell.c index 0168bbf53fe0..f31359922e98 100644 --- a/fs/afs/cell.c +++ b/fs/afs/cell.c @@ -177,6 +177,7 @@ static struct afs_cell *afs_alloc_cell(struct afs_net *net, VL_SERVICE, AFS_VL_PORT); if (IS_ERR(vllist)) { ret = PTR_ERR(vllist); + vllist = NULL; goto parse_failed; } diff --git a/fs/afs/super.c b/fs/afs/super.c index 25b306db6992..da407f2d6f0d 100644 --- a/fs/afs/super.c +++ b/fs/afs/super.c @@ -483,9 +483,9 @@ static int afs_fill_super(struct super_block *sb, struct afs_fs_context *ctx) goto error; if (as->dyn_root) { - sb->s_d_op = &afs_dynroot_dentry_operations; + set_default_d_op(sb, &afs_dynroot_dentry_operations); } else { - sb->s_d_op = &afs_fs_dentry_operations; + set_default_d_op(sb, &afs_fs_dentry_operations); rcu_assign_pointer(as->volume->sb, sb); } diff --git a/fs/autofs/inode.c b/fs/autofs/inode.c index ee2edccaef70..f5c16ffba013 100644 --- a/fs/autofs/inode.c +++ b/fs/autofs/inode.c @@ -311,7 +311,7 @@ static int autofs_fill_super(struct super_block *s, struct fs_context *fc) s->s_blocksize_bits = 10; s->s_magic = AUTOFS_SUPER_MAGIC; s->s_op = &autofs_sops; - s->s_d_op = &autofs_dentry_operations; + set_default_d_op(s, &autofs_dentry_operations); s->s_time_gran = 1; /* diff --git a/fs/bcachefs/alloc_foreground.c b/fs/bcachefs/alloc_foreground.c index b375ad610acd..b58525ec7b4d 100644 --- a/fs/bcachefs/alloc_foreground.c +++ b/fs/bcachefs/alloc_foreground.c @@ -511,7 +511,8 @@ again: bch2_dev_usage_read_fast(ca, &req->usage); avail = dev_buckets_free(ca, req->usage, req->watermark); - if (req->usage.buckets[BCH_DATA_need_discard] > avail) + if (req->usage.buckets[BCH_DATA_need_discard] > + min(avail, ca->mi.nbuckets >> 7)) bch2_dev_do_discards(ca); if (req->usage.buckets[BCH_DATA_need_gc_gens] > avail) diff --git a/fs/bcachefs/btree_cache.c b/fs/bcachefs/btree_cache.c index 91e0aa796e6b..83c9860e6b82 100644 --- a/fs/bcachefs/btree_cache.c +++ b/fs/bcachefs/btree_cache.c @@ -85,7 +85,7 @@ void bch2_btree_node_to_freelist(struct bch_fs *c, struct btree *b) six_unlock_intent(&b->c.lock); } -static void __btree_node_data_free(struct btree_cache *bc, struct btree *b) +void __btree_node_data_free(struct btree *b) { BUG_ON(!list_empty(&b->list)); BUG_ON(btree_node_hashed(b)); @@ -112,16 +112,17 @@ static void __btree_node_data_free(struct btree_cache *bc, struct btree *b) munmap(b->aux_data, btree_aux_data_bytes(b)); #endif b->aux_data = NULL; - - btree_node_to_freedlist(bc, b); } static void btree_node_data_free(struct btree_cache *bc, struct btree *b) { BUG_ON(list_empty(&b->list)); list_del_init(&b->list); + + __btree_node_data_free(b); + --bc->nr_freeable; - __btree_node_data_free(bc, b); + btree_node_to_freedlist(bc, b); } static int bch2_btree_cache_cmp_fn(struct rhashtable_compare_arg *arg, @@ -185,10 +186,7 @@ static struct btree *__btree_node_mem_alloc(struct bch_fs *c, gfp_t gfp) struct btree *__bch2_btree_node_mem_alloc(struct bch_fs *c) { - struct btree_cache *bc = &c->btree_cache; - struct btree *b; - - b = __btree_node_mem_alloc(c, GFP_KERNEL); + struct btree *b = __btree_node_mem_alloc(c, GFP_KERNEL); if (!b) return NULL; @@ -198,8 +196,6 @@ struct btree *__bch2_btree_node_mem_alloc(struct bch_fs *c) } bch2_btree_lock_init(&b->c, 0, GFP_KERNEL); - - __bch2_btree_node_to_freelist(bc, b); return b; } @@ -524,7 +520,8 @@ restart: --touched;; } else if (!btree_node_reclaim(c, b)) { __bch2_btree_node_hash_remove(bc, b); - __btree_node_data_free(bc, b); + __btree_node_data_free(b); + btree_node_to_freedlist(bc, b); freed++; bc->nr_freed++; @@ -652,9 +649,12 @@ int bch2_fs_btree_cache_init(struct bch_fs *c) bch2_recalc_btree_reserve(c); - for (i = 0; i < bc->nr_reserve; i++) - if (!__bch2_btree_node_mem_alloc(c)) + for (i = 0; i < bc->nr_reserve; i++) { + struct btree *b = __bch2_btree_node_mem_alloc(c); + if (!b) goto err; + __bch2_btree_node_to_freelist(bc, b); + } list_splice_init(&bc->live[0].list, &bc->freeable); diff --git a/fs/bcachefs/btree_cache.h b/fs/bcachefs/btree_cache.h index ca3c1b145330..be275f87a60e 100644 --- a/fs/bcachefs/btree_cache.h +++ b/fs/bcachefs/btree_cache.h @@ -30,6 +30,7 @@ void bch2_btree_node_update_key_early(struct btree_trans *, enum btree_id, unsig void bch2_btree_cache_cannibalize_unlock(struct btree_trans *); int bch2_btree_cache_cannibalize_lock(struct btree_trans *, struct closure *); +void __btree_node_data_free(struct btree *); struct btree *__bch2_btree_node_mem_alloc(struct bch_fs *); struct btree *bch2_btree_node_mem_alloc(struct btree_trans *, bool); diff --git a/fs/bcachefs/btree_io.c b/fs/bcachefs/btree_io.c index e874a4357f64..590cd29f3e86 100644 --- a/fs/bcachefs/btree_io.c +++ b/fs/bcachefs/btree_io.c @@ -568,9 +568,9 @@ static int __btree_err(int ret, bch2_mark_btree_validate_failure(failed, ca->dev_idx); struct extent_ptr_decoded pick; - have_retry = !bch2_bkey_pick_read_device(c, + have_retry = bch2_bkey_pick_read_device(c, bkey_i_to_s_c(&b->key), - failed, &pick, -1); + failed, &pick, -1) == 1; } if (!have_retry && ret == -BCH_ERR_btree_node_read_err_want_retry) @@ -615,7 +615,6 @@ static int __btree_err(int ret, goto out; case -BCH_ERR_btree_node_read_err_bad_node: prt_str(&out, ", "); - ret = __bch2_topology_error(c, &out); break; } @@ -644,7 +643,6 @@ static int __btree_err(int ret, goto out; case -BCH_ERR_btree_node_read_err_bad_node: prt_str(&out, ", "); - ret = __bch2_topology_error(c, &out); break; } print: @@ -1297,9 +1295,6 @@ int bch2_btree_node_read_done(struct bch_fs *c, struct bch_dev *ca, btree_bounce_free(c, btree_buf_bytes(b), used_mempool, sorted); - if (updated_range) - bch2_btree_node_drop_keys_outside_node(b); - i = &b->data->keys; for (k = i->start; k != vstruct_last(i);) { struct bkey tmp; @@ -1337,6 +1332,9 @@ int bch2_btree_node_read_done(struct bch_fs *c, struct bch_dev *ca, btree_node_reset_sib_u64s(b); + if (updated_range) + bch2_btree_node_drop_keys_outside_node(b); + /* * XXX: * @@ -1408,7 +1406,7 @@ static void btree_node_read_work(struct work_struct *work) ret = bch2_bkey_pick_read_device(c, bkey_i_to_s_c(&b->key), &failed, &rb->pick, -1); - if (ret) { + if (ret <= 0) { set_btree_node_read_error(b); break; } diff --git a/fs/bcachefs/btree_node_scan.c b/fs/bcachefs/btree_node_scan.c index 23d8c62ea4b6..a3fb07c60e25 100644 --- a/fs/bcachefs/btree_node_scan.c +++ b/fs/bcachefs/btree_node_scan.c @@ -75,39 +75,6 @@ static inline u64 bkey_journal_seq(struct bkey_s_c k) } } -static bool found_btree_node_is_readable(struct btree_trans *trans, - struct found_btree_node *f) -{ - struct { __BKEY_PADDED(k, BKEY_BTREE_PTR_VAL_U64s_MAX); } tmp; - - found_btree_node_to_key(&tmp.k, f); - - struct btree *b = bch2_btree_node_get_noiter(trans, &tmp.k, f->btree_id, f->level, false); - bool ret = !IS_ERR_OR_NULL(b); - if (!ret) - return ret; - - f->sectors_written = b->written; - f->journal_seq = le64_to_cpu(b->data->keys.journal_seq); - - struct bkey_s_c k; - struct bkey unpacked; - struct btree_node_iter iter; - for_each_btree_node_key_unpack(b, k, &iter, &unpacked) - f->journal_seq = max(f->journal_seq, bkey_journal_seq(k)); - - six_unlock_read(&b->c.lock); - - /* - * We might update this node's range; if that happens, we need the node - * to be re-read so the read path can trim keys that are no longer in - * this node - */ - if (b != btree_node_root(trans->c, b)) - bch2_btree_node_evict(trans, &tmp.k); - return ret; -} - static int found_btree_node_cmp_cookie(const void *_l, const void *_r) { const struct found_btree_node *l = _l; @@ -159,17 +126,17 @@ static const struct min_heap_callbacks found_btree_node_heap_cbs = { }; static void try_read_btree_node(struct find_btree_nodes *f, struct bch_dev *ca, - struct bio *bio, struct btree_node *bn, u64 offset) + struct btree *b, struct bio *bio, u64 offset) { struct bch_fs *c = container_of(f, struct bch_fs, found_btree_nodes); + struct btree_node *bn = b->data; bio_reset(bio, ca->disk_sb.bdev, REQ_OP_READ); bio->bi_iter.bi_sector = offset; - bch2_bio_map(bio, bn, PAGE_SIZE); + bch2_bio_map(bio, b->data, c->opts.block_size); u64 submit_time = local_clock(); submit_bio_wait(bio); - bch2_account_io_completion(ca, BCH_MEMBER_ERROR_read, submit_time, !bio->bi_status); if (bio->bi_status) { @@ -217,7 +184,28 @@ static void try_read_btree_node(struct find_btree_nodes *f, struct bch_dev *ca, }; rcu_read_unlock(); - if (bch2_trans_run(c, found_btree_node_is_readable(trans, &n))) { + bio_reset(bio, ca->disk_sb.bdev, REQ_OP_READ); + bio->bi_iter.bi_sector = offset; + bch2_bio_map(bio, b->data, c->opts.btree_node_size); + + submit_time = local_clock(); + submit_bio_wait(bio); + bch2_account_io_completion(ca, BCH_MEMBER_ERROR_read, submit_time, !bio->bi_status); + + found_btree_node_to_key(&b->key, &n); + + CLASS(printbuf, buf)(); + if (!bch2_btree_node_read_done(c, ca, b, NULL, &buf)) { + /* read_done will swap out b->data for another buffer */ + bn = b->data; + /* + * Grab journal_seq here because we want the max journal_seq of + * any bset; read_done sorts down to a single set and picks the + * max journal_seq + */ + n.journal_seq = le64_to_cpu(bn->keys.journal_seq), + n.sectors_written = b->written; + mutex_lock(&f->lock); if (BSET_BIG_ENDIAN(&bn->keys) != CPU_BIG_ENDIAN) { bch_err(c, "try_read_btree_node() can't handle endian conversion"); @@ -237,12 +225,20 @@ static int read_btree_nodes_worker(void *p) struct find_btree_nodes_worker *w = p; struct bch_fs *c = container_of(w->f, struct bch_fs, found_btree_nodes); struct bch_dev *ca = w->ca; - void *buf = (void *) __get_free_page(GFP_KERNEL); - struct bio *bio = bio_alloc(NULL, 1, 0, GFP_KERNEL); unsigned long last_print = jiffies; + struct btree *b = NULL; + struct bio *bio = NULL; + + b = __bch2_btree_node_mem_alloc(c); + if (!b) { + bch_err(c, "read_btree_nodes_worker: error allocating buf"); + w->f->ret = -ENOMEM; + goto err; + } - if (!buf || !bio) { - bch_err(c, "read_btree_nodes_worker: error allocating bio/buf"); + bio = bio_alloc(NULL, buf_pages(b->data, c->opts.btree_node_size), 0, GFP_KERNEL); + if (!bio) { + bch_err(c, "read_btree_nodes_worker: error allocating bio"); w->f->ret = -ENOMEM; goto err; } @@ -266,11 +262,13 @@ static int read_btree_nodes_worker(void *p) !bch2_dev_btree_bitmap_marked_sectors(ca, sector, btree_sectors(c))) continue; - try_read_btree_node(w->f, ca, bio, buf, sector); + try_read_btree_node(w->f, ca, b, bio, sector); } err: + if (b) + __btree_node_data_free(b); + kfree(b); bio_put(bio); - free_page((unsigned long) buf); enumerated_ref_put(&ca->io_ref[READ], BCH_DEV_READ_REF_btree_node_scan); closure_put(w->cl); kfree(w); diff --git a/fs/bcachefs/debug.c b/fs/bcachefs/debug.c index 901f643ead83..07c2a0f73cc2 100644 --- a/fs/bcachefs/debug.c +++ b/fs/bcachefs/debug.c @@ -153,8 +153,6 @@ void __bch2_btree_verify(struct bch_fs *c, struct btree *b) c->verify_data = __bch2_btree_node_mem_alloc(c); if (!c->verify_data) goto out; - - list_del_init(&c->verify_data->list); } BUG_ON(b->nsets != 1); @@ -586,6 +584,8 @@ static ssize_t bch2_btree_transactions_read(struct file *file, char __user *buf, i->ubuf = buf; i->size = size; i->ret = 0; + + int srcu_idx = srcu_read_lock(&c->btree_trans_barrier); restart: seqmutex_lock(&c->btree_trans_lock); list_sort(&c->btree_trans_list, list_ptr_order_cmp); @@ -599,6 +599,11 @@ restart: if (!closure_get_not_zero(&trans->ref)) continue; + if (!trans->srcu_held) { + closure_put(&trans->ref); + continue; + } + u32 seq = seqmutex_unlock(&c->btree_trans_lock); bch2_btree_trans_to_text(&i->buf, trans); @@ -620,6 +625,8 @@ restart: } seqmutex_unlock(&c->btree_trans_lock); unlocked: + srcu_read_unlock(&c->btree_trans_barrier, srcu_idx); + if (i->buf.allocation_failure) ret = -ENOMEM; diff --git a/fs/bcachefs/dirent.c b/fs/bcachefs/dirent.c index a18d0f78704d..28875c5c86ad 100644 --- a/fs/bcachefs/dirent.c +++ b/fs/bcachefs/dirent.c @@ -13,6 +13,7 @@ #include <linux/dcache.h> +#ifdef CONFIG_UNICODE int bch2_casefold(struct btree_trans *trans, const struct bch_hash_info *info, const struct qstr *str, struct qstr *out_cf) { @@ -33,6 +34,7 @@ int bch2_casefold(struct btree_trans *trans, const struct bch_hash_info *info, *out_cf = (struct qstr) QSTR_INIT(buf, ret); return 0; } +#endif static unsigned bch2_dirent_name_bytes(struct bkey_s_c_dirent d) { @@ -254,6 +256,7 @@ int bch2_dirent_init_name(struct bch_fs *c, if (!bch2_fs_casefold_enabled(c)) return -EOPNOTSUPP; +#ifdef CONFIG_UNICODE memcpy(&dirent->v.d_cf_name_block.d_names[0], name->name, name->len); char *cf_out = &dirent->v.d_cf_name_block.d_names[name->len]; @@ -279,6 +282,7 @@ int bch2_dirent_init_name(struct bch_fs *c, dirent->v.d_cf_name_block.d_cf_name_len = cpu_to_le16(cf_len); EBUG_ON(bch2_dirent_get_casefold_name(dirent_i_to_s_c(dirent)).len != cf_len); +#endif } unsigned u64s = dirent_val_u64s(name->len, cf_len); diff --git a/fs/bcachefs/dirent.h b/fs/bcachefs/dirent.h index 1e17199cc5c7..0417608c18d5 100644 --- a/fs/bcachefs/dirent.h +++ b/fs/bcachefs/dirent.h @@ -23,8 +23,16 @@ struct bch_fs; struct bch_hash_info; struct bch_inode_info; +#ifdef CONFIG_UNICODE int bch2_casefold(struct btree_trans *, const struct bch_hash_info *, const struct qstr *, struct qstr *); +#else +static inline int bch2_casefold(struct btree_trans *trans, const struct bch_hash_info *info, + const struct qstr *str, struct qstr *out_cf) +{ + return -EOPNOTSUPP; +} +#endif static inline int bch2_maybe_casefold(struct btree_trans *trans, const struct bch_hash_info *info, diff --git a/fs/bcachefs/errcode.h b/fs/bcachefs/errcode.h index 86a842f1e88e..acc3b7b67704 100644 --- a/fs/bcachefs/errcode.h +++ b/fs/bcachefs/errcode.h @@ -282,7 +282,6 @@ x(EIO, sb_not_downgraded) \ x(EIO, btree_node_write_all_failed) \ x(EIO, btree_node_read_error) \ - x(EIO, btree_node_read_validate_error) \ x(EIO, btree_need_topology_repair) \ x(EIO, bucket_ref_update) \ x(EIO, trigger_alloc) \ diff --git a/fs/bcachefs/error.c b/fs/bcachefs/error.c index b2a6c041e165..267e73d9d7e6 100644 --- a/fs/bcachefs/error.c +++ b/fs/bcachefs/error.c @@ -103,7 +103,7 @@ int __bch2_topology_error(struct bch_fs *c, struct printbuf *out) return bch_err_throw(c, btree_need_topology_repair); } else { return bch2_run_explicit_recovery_pass(c, out, BCH_RECOVERY_PASS_check_topology, 0) ?: - bch_err_throw(c, btree_node_read_validate_error); + bch_err_throw(c, btree_need_topology_repair); } } @@ -633,7 +633,9 @@ err: * log_fsck_err()s: that would require us to track for every error type * which recovery pass corrects it, to get the fsck exit status correct: */ - if (bch2_err_matches(ret, BCH_ERR_fsck_fix)) { + if (bch2_err_matches(ret, BCH_ERR_transaction_restart)) { + /* nothing */ + } else if (bch2_err_matches(ret, BCH_ERR_fsck_fix)) { set_bit(BCH_FS_errors_fixed, &c->flags); } else { set_bit(BCH_FS_errors_not_fixed, &c->flags); diff --git a/fs/bcachefs/extents.c b/fs/bcachefs/extents.c index 036e4ad95987..83cbd77dcb9c 100644 --- a/fs/bcachefs/extents.c +++ b/fs/bcachefs/extents.c @@ -50,19 +50,17 @@ void bch2_io_failures_to_text(struct printbuf *out, struct bch_io_failures *failed) { static const char * const error_types[] = { - "io", "checksum", "ec reconstruct", NULL + "btree validate", "io", "checksum", "ec reconstruct", NULL }; for (struct bch_dev_io_failures *f = failed->devs; f < failed->devs + failed->nr; f++) { unsigned errflags = - ((!!f->failed_io) << 0) | - ((!!f->failed_csum_nr) << 1) | - ((!!f->failed_ec) << 2); - - if (!errflags) - continue; + ((!!f->failed_btree_validate) << 0) | + ((!!f->failed_io) << 1) | + ((!!f->failed_csum_nr) << 2) | + ((!!f->failed_ec) << 3); bch2_printbuf_make_room(out, 1024); out->atomic++; @@ -77,7 +75,9 @@ void bch2_io_failures_to_text(struct printbuf *out, prt_char(out, ' '); - if (is_power_of_2(errflags)) { + if (!errflags) { + prt_str(out, "no error - confused"); + } else if (is_power_of_2(errflags)) { prt_bitflags(out, error_types, errflags); prt_str(out, " error"); } else { diff --git a/fs/bcachefs/fsck.c b/fs/bcachefs/fsck.c index dbf161e4311a..15c1e890d299 100644 --- a/fs/bcachefs/fsck.c +++ b/fs/bcachefs/fsck.c @@ -12,6 +12,7 @@ #include "fs.h" #include "fsck.h" #include "inode.h" +#include "io_misc.h" #include "keylist.h" #include "namei.h" #include "recovery_passes.h" @@ -1919,33 +1920,12 @@ static int check_extent(struct btree_trans *trans, struct btree_iter *iter, "extent type past end of inode %llu:%u, i_size %llu\n%s", i->inode.bi_inum, i->inode.bi_snapshot, i->inode.bi_size, (bch2_bkey_val_to_text(&buf, c, k), buf.buf))) { - struct bkey_i *whiteout = bch2_trans_kmalloc(trans, sizeof(*whiteout)); - ret = PTR_ERR_OR_ZERO(whiteout); - if (ret) - goto err; - - bkey_init(&whiteout->k); - whiteout->k.p = SPOS(k.k->p.inode, - last_block, - i->inode.bi_snapshot); - bch2_key_resize(&whiteout->k, - min(KEY_SIZE_MAX & (~0 << c->block_bits), - U64_MAX - whiteout->k.p.offset)); - - - /* - * Need a normal (not BTREE_ITER_all_snapshots) - * iterator, if we're deleting in a different - * snapshot and need to emit a whiteout - */ - struct btree_iter iter2; - bch2_trans_iter_init(trans, &iter2, BTREE_ID_extents, - bkey_start_pos(&whiteout->k), - BTREE_ITER_intent); - ret = bch2_btree_iter_traverse(trans, &iter2) ?: - bch2_trans_update(trans, &iter2, whiteout, - BTREE_UPDATE_internal_snapshot_node); - bch2_trans_iter_exit(trans, &iter2); + ret = snapshots_seen_add_inorder(c, s, i->inode.bi_snapshot) ?: + bch2_fpunch_snapshot(trans, + SPOS(i->inode.bi_inum, + last_block, + i->inode.bi_snapshot), + POS(i->inode.bi_inum, U64_MAX)); if (ret) goto err; diff --git a/fs/bcachefs/io_misc.c b/fs/bcachefs/io_misc.c index bf72b1d2e2cb..07023667a475 100644 --- a/fs/bcachefs/io_misc.c +++ b/fs/bcachefs/io_misc.c @@ -135,6 +135,33 @@ err_noprint: return ret; } +/* For fsck */ +int bch2_fpunch_snapshot(struct btree_trans *trans, struct bpos start, struct bpos end) +{ + u32 restart_count = trans->restart_count; + struct bch_fs *c = trans->c; + struct disk_reservation disk_res = bch2_disk_reservation_init(c, 0); + unsigned max_sectors = KEY_SIZE_MAX & (~0 << c->block_bits); + struct bkey_i delete; + + int ret = for_each_btree_key_max_commit(trans, iter, BTREE_ID_extents, + start, end, 0, k, + &disk_res, NULL, BCH_TRANS_COMMIT_no_enospc, ({ + bkey_init(&delete.k); + delete.k.p = iter.pos; + + /* create the biggest key we can */ + bch2_key_resize(&delete.k, max_sectors); + bch2_cut_back(end, &delete); + + bch2_extent_trim_atomic(trans, &iter, &delete) ?: + bch2_trans_update(trans, &iter, &delete, 0); + })); + + bch2_disk_reservation_put(c, &disk_res); + return ret ?: trans_was_restarted(trans, restart_count); +} + /* * Returns -BCH_ERR_transacton_restart if we had to drop locks: */ diff --git a/fs/bcachefs/io_misc.h b/fs/bcachefs/io_misc.h index 9cb44a7c43c1..b93e4d4b3c0c 100644 --- a/fs/bcachefs/io_misc.h +++ b/fs/bcachefs/io_misc.h @@ -5,6 +5,8 @@ int bch2_extent_fallocate(struct btree_trans *, subvol_inum, struct btree_iter *, u64, struct bch_io_opts, s64 *, struct write_point_specifier); + +int bch2_fpunch_snapshot(struct btree_trans *, struct bpos, struct bpos); int bch2_fpunch_at(struct btree_trans *, struct btree_iter *, subvol_inum, u64, s64 *); int bch2_fpunch(struct bch_fs *c, subvol_inum, u64, u64, s64 *); diff --git a/fs/bcachefs/io_read.c b/fs/bcachefs/io_read.c index cd184b219a65..e0874ad9a6cf 100644 --- a/fs/bcachefs/io_read.c +++ b/fs/bcachefs/io_read.c @@ -166,6 +166,7 @@ static noinline void promote_free(struct bch_read_bio *rbio) BUG_ON(ret); async_object_list_del(c, promote, op->list_idx); + async_object_list_del(c, rbio, rbio->list_idx); bch2_data_update_exit(&op->write); @@ -456,6 +457,10 @@ static void bch2_rbio_done(struct bch_read_bio *rbio) if (rbio->start_time) bch2_time_stats_update(&rbio->c->times[BCH_TIME_data_read], rbio->start_time); +#ifdef CONFIG_BCACHEFS_ASYNC_OBJECT_LISTS + if (rbio->list_idx) + async_object_list_del(rbio->c, rbio, rbio->list_idx); +#endif bio_endio(&rbio->bio); } diff --git a/fs/bcachefs/journal.c b/fs/bcachefs/journal.c index f22b05e02c1e..ddfeb0dafc9d 100644 --- a/fs/bcachefs/journal.c +++ b/fs/bcachefs/journal.c @@ -1082,6 +1082,7 @@ static struct journal_buf *__bch2_next_write_buffer_flush_journal_buf(struct jou if (open && !*blocked) { __bch2_journal_block(j); + s.v = atomic64_read_acquire(&j->reservations.counter); *blocked = true; } diff --git a/fs/bcachefs/journal_io.c b/fs/bcachefs/journal_io.c index dd3f3434c1b0..9e028dbcc3d0 100644 --- a/fs/bcachefs/journal_io.c +++ b/fs/bcachefs/journal_io.c @@ -1767,6 +1767,7 @@ static CLOSURE_CALLBACK(journal_write_done) closure_wake_up(&c->freelist_wait); bch2_reset_alloc_cursors(c); + do_discards = true; } j->seq_ondisk = seq; diff --git a/fs/bcachefs/journal_reclaim.c b/fs/bcachefs/journal_reclaim.c index cd6201741c59..0042d43b8e57 100644 --- a/fs/bcachefs/journal_reclaim.c +++ b/fs/bcachefs/journal_reclaim.c @@ -170,6 +170,12 @@ static struct journal_space __journal_space_available(struct journal *j, unsigne return (struct journal_space) { 0, 0 }; /* + * It's possible for bucket size to be misaligned w.r.t. the filesystem + * block size: + */ + min_bucket_size = round_down(min_bucket_size, block_sectors(c)); + + /* * We sorted largest to smallest, and we want the smallest out of the * @nr_devs_want largest devices: */ diff --git a/fs/bcachefs/movinggc.c b/fs/bcachefs/movinggc.c index 27e68d470ad0..5e6de91a8763 100644 --- a/fs/bcachefs/movinggc.c +++ b/fs/bcachefs/movinggc.c @@ -71,7 +71,7 @@ static int bch2_bucket_is_movable(struct btree_trans *trans, if (ret) return ret; - struct bch_dev *ca = bch2_dev_tryget(c, k.k->p.inode); + struct bch_dev *ca = bch2_dev_bucket_tryget(c, k.k->p); if (!ca) goto out; diff --git a/fs/bcachefs/recovery.c b/fs/bcachefs/recovery.c index d0b7e3a36a54..c94debb12d2f 100644 --- a/fs/bcachefs/recovery.c +++ b/fs/bcachefs/recovery.c @@ -273,24 +273,35 @@ static int bch2_journal_replay_key(struct btree_trans *trans, goto out; struct btree_path *path = btree_iter_path(trans, &iter); - if (unlikely(!btree_path_node(path, k->level) && - !k->allocated)) { + if (unlikely(!btree_path_node(path, k->level))) { struct bch_fs *c = trans->c; + CLASS(printbuf, buf)(); + prt_str(&buf, "btree="); + bch2_btree_id_to_text(&buf, k->btree_id); + prt_printf(&buf, " level=%u ", k->level); + bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(k->k)); + if (!(c->recovery.passes_complete & (BIT_ULL(BCH_RECOVERY_PASS_scan_for_btree_nodes)| BIT_ULL(BCH_RECOVERY_PASS_check_topology)))) { - bch_err(c, "have key in journal replay for btree depth that does not exist, confused"); + bch_err(c, "have key in journal replay for btree depth that does not exist, confused\n%s", + buf.buf); ret = -EINVAL; } -#if 0 + + if (!k->allocated) { + bch_notice(c, "dropping key in journal replay for depth that does not exist because we're recovering from scan\n%s", + buf.buf); + k->overwritten = true; + goto out; + } + bch2_trans_iter_exit(trans, &iter); bch2_trans_node_iter_init(trans, &iter, k->btree_id, k->k->k.p, BTREE_MAX_DEPTH, 0, iter_flags); ret = bch2_btree_iter_traverse(trans, &iter) ?: bch2_btree_increase_depth(trans, iter.path, 0) ?: -BCH_ERR_transaction_restart_nested; -#endif - k->overwritten = true; goto out; } diff --git a/fs/bcachefs/recovery_passes.c b/fs/bcachefs/recovery_passes.c index c09ed2dd4639..6a039e011064 100644 --- a/fs/bcachefs/recovery_passes.c +++ b/fs/bcachefs/recovery_passes.c @@ -360,7 +360,7 @@ int __bch2_run_explicit_recovery_pass(struct bch_fs *c, !(r->passes_complete & BIT_ULL(pass)); bool ratelimit = flags & RUN_RECOVERY_PASS_ratelimit; - if (!(in_recovery && (flags & RUN_RECOVERY_PASS_nopersistent))) { + if (!(flags & RUN_RECOVERY_PASS_nopersistent)) { struct bch_sb_field_ext *ext = bch2_sb_field_get(c->disk_sb.sb, ext); __set_bit_le64(bch2_recovery_pass_to_stable(pass), ext->recovery_passes_required); } diff --git a/fs/binfmt_misc.c b/fs/binfmt_misc.c index 432fbf4fc334..a839f960cd4a 100644 --- a/fs/binfmt_misc.c +++ b/fs/binfmt_misc.c @@ -675,44 +675,6 @@ static void bm_evict_inode(struct inode *inode) } /** - * unlink_binfmt_dentry - remove the dentry for the binary type handler - * @dentry: dentry associated with the binary type handler - * - * Do the actual filesystem work to remove a dentry for a registered binary - * type handler. Since binfmt_misc only allows simple files to be created - * directly under the root dentry of the filesystem we ensure that we are - * indeed passed a dentry directly beneath the root dentry, that the inode - * associated with the root dentry is locked, and that it is a regular file we - * are asked to remove. - */ -static void unlink_binfmt_dentry(struct dentry *dentry) -{ - struct dentry *parent = dentry->d_parent; - struct inode *inode, *parent_inode; - - /* All entries are immediate descendants of the root dentry. */ - if (WARN_ON_ONCE(dentry->d_sb->s_root != parent)) - return; - - /* We only expect to be called on regular files. */ - inode = d_inode(dentry); - if (WARN_ON_ONCE(!S_ISREG(inode->i_mode))) - return; - - /* The parent inode must be locked. */ - parent_inode = d_inode(parent); - if (WARN_ON_ONCE(!inode_is_locked(parent_inode))) - return; - - if (simple_positive(dentry)) { - dget(dentry); - simple_unlink(parent_inode, dentry); - d_delete(dentry); - dput(dentry); - } -} - -/** * remove_binfmt_handler - remove a binary type handler * @misc: handle to binfmt_misc instance * @e: binary type handler to remove @@ -729,7 +691,7 @@ static void remove_binfmt_handler(struct binfmt_misc *misc, Node *e) write_lock(&misc->entries_lock); list_del_init(&e->list); write_unlock(&misc->entries_lock); - unlink_binfmt_dentry(e->dentry); + locked_recursive_removal(e->dentry, NULL); } /* /<entry> */ @@ -772,7 +734,7 @@ static ssize_t bm_entry_write(struct file *file, const char __user *buffer, case 3: /* Delete this handler. */ inode = d_inode(inode->i_sb->s_root); - inode_lock(inode); + inode_lock_nested(inode, I_MUTEX_PARENT); /* * In order to add new element or remove elements from the list @@ -922,7 +884,7 @@ static ssize_t bm_status_write(struct file *file, const char __user *buffer, case 3: /* Delete all handlers. */ inode = d_inode(file_inode(file)->i_sb->s_root); - inode_lock(inode); + inode_lock_nested(inode, I_MUTEX_PARENT); /* * In order to add new element or remove elements from the list diff --git a/fs/btrfs/Kconfig b/fs/btrfs/Kconfig index c352f3ae0385..ea95c90c8474 100644 --- a/fs/btrfs/Kconfig +++ b/fs/btrfs/Kconfig @@ -114,6 +114,8 @@ config BTRFS_EXPERIMENTAL - extent tree v2 - complex rework of extent tracking + - large folio support + If unsure, say N. config BTRFS_FS_REF_VERIFY diff --git a/fs/btrfs/accessors.c b/fs/btrfs/accessors.c index e3716516ca38..861c7d92c437 100644 --- a/fs/btrfs/accessors.c +++ b/fs/btrfs/accessors.c @@ -9,27 +9,24 @@ #include "fs.h" #include "accessors.h" -static bool check_setget_bounds(const struct extent_buffer *eb, - const void *ptr, unsigned off, int size) +static void __cold report_setget_bounds(const struct extent_buffer *eb, + const void *ptr, unsigned off, int size) { - const unsigned long member_offset = (unsigned long)ptr + off; + unsigned long member_offset = (unsigned long)ptr + off; - if (unlikely(member_offset + size > eb->len)) { - btrfs_warn(eb->fs_info, - "bad eb member %s: ptr 0x%lx start %llu member offset %lu size %d", - (member_offset > eb->len ? "start" : "end"), - (unsigned long)ptr, eb->start, member_offset, size); - return false; - } - - return true; + btrfs_warn(eb->fs_info, + "bad eb member %s: ptr 0x%lx start %llu member offset %lu size %d", + (member_offset > eb->len ? "start" : "end"), + (unsigned long)ptr, eb->start, member_offset, size); } -void btrfs_init_map_token(struct btrfs_map_token *token, struct extent_buffer *eb) +/* Copy bytes from @src1 and @src2 to @dest. */ +static __always_inline void memcpy_split_src(char *dest, const char *src1, + const char *src2, const size_t len1, + const size_t total) { - token->eb = eb; - token->kaddr = folio_address(eb->folios[0]); - token->offset = 0; + memcpy(dest, src1, len1); + memcpy(dest + len1, src2, total - len1); } /* @@ -41,11 +38,6 @@ void btrfs_init_map_token(struct btrfs_map_token *token, struct extent_buffer *e * - btrfs_set_8 (for 8/16/32/64) * - btrfs_get_8 (for 8/16/32/64) * - * Generic helpers with a token (cached address of the most recently accessed - * page): - * - btrfs_set_token_8 (for 8/16/32/64) - * - btrfs_get_token_8 (for 8/16/32/64) - * * The set/get functions handle data spanning two pages transparently, in case * metadata block size is larger than page. Every pointer to metadata items is * an offset into the extent buffer page array, cast to a specific type. This @@ -57,118 +49,66 @@ void btrfs_init_map_token(struct btrfs_map_token *token, struct extent_buffer *e */ #define DEFINE_BTRFS_SETGET_BITS(bits) \ -u##bits btrfs_get_token_##bits(struct btrfs_map_token *token, \ - const void *ptr, unsigned long off) \ -{ \ - const unsigned long member_offset = (unsigned long)ptr + off; \ - const unsigned long idx = get_eb_folio_index(token->eb, member_offset); \ - const unsigned long oil = get_eb_offset_in_folio(token->eb, \ - member_offset);\ - const int unit_size = token->eb->folio_size; \ - const int unit_shift = token->eb->folio_shift; \ - const int size = sizeof(u##bits); \ - u8 lebytes[sizeof(u##bits)]; \ - const int part = unit_size - oil; \ - \ - ASSERT(token); \ - ASSERT(token->kaddr); \ - ASSERT(check_setget_bounds(token->eb, ptr, off, size)); \ - if (token->offset <= member_offset && \ - member_offset + size <= token->offset + unit_size) { \ - return get_unaligned_le##bits(token->kaddr + oil); \ - } \ - token->kaddr = folio_address(token->eb->folios[idx]); \ - token->offset = idx << unit_shift; \ - if (INLINE_EXTENT_BUFFER_PAGES == 1 || oil + size <= unit_size) \ - return get_unaligned_le##bits(token->kaddr + oil); \ - \ - memcpy(lebytes, token->kaddr + oil, part); \ - token->kaddr = folio_address(token->eb->folios[idx + 1]); \ - token->offset = (idx + 1) << unit_shift; \ - memcpy(lebytes + part, token->kaddr, size - part); \ - return get_unaligned_le##bits(lebytes); \ -} \ u##bits btrfs_get_##bits(const struct extent_buffer *eb, \ const void *ptr, unsigned long off) \ { \ const unsigned long member_offset = (unsigned long)ptr + off; \ const unsigned long idx = get_eb_folio_index(eb, member_offset);\ - const unsigned long oil = get_eb_offset_in_folio(eb, \ - member_offset);\ - const int unit_size = eb->folio_size; \ - char *kaddr = folio_address(eb->folios[idx]); \ - const int size = sizeof(u##bits); \ - const int part = unit_size - oil; \ - u8 lebytes[sizeof(u##bits)]; \ - \ - ASSERT(check_setget_bounds(eb, ptr, off, size)); \ - if (INLINE_EXTENT_BUFFER_PAGES == 1 || oil + size <= unit_size) \ - return get_unaligned_le##bits(kaddr + oil); \ - \ - memcpy(lebytes, kaddr + oil, part); \ - kaddr = folio_address(eb->folios[idx + 1]); \ - memcpy(lebytes + part, kaddr, size - part); \ - return get_unaligned_le##bits(lebytes); \ -} \ -void btrfs_set_token_##bits(struct btrfs_map_token *token, \ - const void *ptr, unsigned long off, \ - u##bits val) \ -{ \ - const unsigned long member_offset = (unsigned long)ptr + off; \ - const unsigned long idx = get_eb_folio_index(token->eb, member_offset); \ - const unsigned long oil = get_eb_offset_in_folio(token->eb, \ + const unsigned long oif = get_eb_offset_in_folio(eb, \ member_offset);\ - const int unit_size = token->eb->folio_size; \ - const int unit_shift = token->eb->folio_shift; \ - const int size = sizeof(u##bits); \ + char *kaddr = folio_address(eb->folios[idx]) + oif; \ + const int part = eb->folio_size - oif; \ u8 lebytes[sizeof(u##bits)]; \ - const int part = unit_size - oil; \ \ - ASSERT(token); \ - ASSERT(token->kaddr); \ - ASSERT(check_setget_bounds(token->eb, ptr, off, size)); \ - if (token->offset <= member_offset && \ - member_offset + size <= token->offset + unit_size) { \ - put_unaligned_le##bits(val, token->kaddr + oil); \ - return; \ + if (unlikely(member_offset + sizeof(u##bits) > eb->len)) { \ + report_setget_bounds(eb, ptr, off, sizeof(u##bits)); \ + return 0; \ } \ - token->kaddr = folio_address(token->eb->folios[idx]); \ - token->offset = idx << unit_shift; \ - if (INLINE_EXTENT_BUFFER_PAGES == 1 || \ - oil + size <= unit_size) { \ - put_unaligned_le##bits(val, token->kaddr + oil); \ - return; \ + if (INLINE_EXTENT_BUFFER_PAGES == 1 || sizeof(u##bits) == 1 || \ + likely(sizeof(u##bits) <= part)) \ + return get_unaligned_le##bits(kaddr); \ + \ + if (sizeof(u##bits) == 2) { \ + lebytes[0] = *kaddr; \ + kaddr = folio_address(eb->folios[idx + 1]); \ + lebytes[1] = *kaddr; \ + } else { \ + memcpy_split_src(lebytes, kaddr, \ + folio_address(eb->folios[idx + 1]), \ + part, sizeof(u##bits)); \ } \ - put_unaligned_le##bits(val, lebytes); \ - memcpy(token->kaddr + oil, lebytes, part); \ - token->kaddr = folio_address(token->eb->folios[idx + 1]); \ - token->offset = (idx + 1) << unit_shift; \ - memcpy(token->kaddr, lebytes + part, size - part); \ + return get_unaligned_le##bits(lebytes); \ } \ void btrfs_set_##bits(const struct extent_buffer *eb, void *ptr, \ unsigned long off, u##bits val) \ { \ const unsigned long member_offset = (unsigned long)ptr + off; \ const unsigned long idx = get_eb_folio_index(eb, member_offset);\ - const unsigned long oil = get_eb_offset_in_folio(eb, \ + const unsigned long oif = get_eb_offset_in_folio(eb, \ member_offset);\ - const int unit_size = eb->folio_size; \ - char *kaddr = folio_address(eb->folios[idx]); \ - const int size = sizeof(u##bits); \ - const int part = unit_size - oil; \ + char *kaddr = folio_address(eb->folios[idx]) + oif; \ + const int part = eb->folio_size - oif; \ u8 lebytes[sizeof(u##bits)]; \ \ - ASSERT(check_setget_bounds(eb, ptr, off, size)); \ - if (INLINE_EXTENT_BUFFER_PAGES == 1 || \ - oil + size <= unit_size) { \ - put_unaligned_le##bits(val, kaddr + oil); \ + if (unlikely(member_offset + sizeof(u##bits) > eb->len)) { \ + report_setget_bounds(eb, ptr, off, sizeof(u##bits)); \ + return; \ + } \ + if (INLINE_EXTENT_BUFFER_PAGES == 1 || sizeof(u##bits) == 1 || \ + likely(sizeof(u##bits) <= part)) { \ + put_unaligned_le##bits(val, kaddr); \ return; \ } \ - \ put_unaligned_le##bits(val, lebytes); \ - memcpy(kaddr + oil, lebytes, part); \ - kaddr = folio_address(eb->folios[idx + 1]); \ - memcpy(kaddr, lebytes + part, size - part); \ + if (sizeof(u##bits) == 2) { \ + *kaddr = lebytes[0]; \ + kaddr = folio_address(eb->folios[idx + 1]); \ + *kaddr = lebytes[1]; \ + } else { \ + memcpy(kaddr, lebytes, part); \ + kaddr = folio_address(eb->folios[idx + 1]); \ + memcpy(kaddr, lebytes + part, sizeof(u##bits) - part); \ + } \ } DEFINE_BTRFS_SETGET_BITS(8) diff --git a/fs/btrfs/accessors.h b/fs/btrfs/accessors.h index 15ea6348800b..99b3ced12805 100644 --- a/fs/btrfs/accessors.h +++ b/fs/btrfs/accessors.h @@ -16,14 +16,6 @@ struct extent_buffer; -struct btrfs_map_token { - struct extent_buffer *eb; - char *kaddr; - unsigned long offset; -}; - -void btrfs_init_map_token(struct btrfs_map_token *token, struct extent_buffer *eb); - /* * Some macros to generate set/get functions for the struct fields. This * assumes there is a lefoo_to_cpu for every type, so lets make a simple one @@ -56,11 +48,6 @@ static inline void put_unaligned_le8(u8 val, void *p) sizeof_field(type, member))) #define DECLARE_BTRFS_SETGET_BITS(bits) \ -u##bits btrfs_get_token_##bits(struct btrfs_map_token *token, \ - const void *ptr, unsigned long off); \ -void btrfs_set_token_##bits(struct btrfs_map_token *token, \ - const void *ptr, unsigned long off, \ - u##bits val); \ u##bits btrfs_get_##bits(const struct extent_buffer *eb, \ const void *ptr, unsigned long off); \ void btrfs_set_##bits(const struct extent_buffer *eb, void *ptr, \ @@ -83,18 +70,6 @@ static inline void btrfs_set_##name(const struct extent_buffer *eb, type *s, \ { \ static_assert(sizeof(u##bits) == sizeof_field(type, member)); \ btrfs_set_##bits(eb, s, offsetof(type, member), val); \ -} \ -static inline u##bits btrfs_token_##name(struct btrfs_map_token *token, \ - const type *s) \ -{ \ - static_assert(sizeof(u##bits) == sizeof_field(type, member)); \ - return btrfs_get_token_##bits(token, s, offsetof(type, member));\ -} \ -static inline void btrfs_set_token_##name(struct btrfs_map_token *token,\ - type *s, u##bits val) \ -{ \ - static_assert(sizeof(u##bits) == sizeof_field(type, member)); \ - btrfs_set_token_##bits(token, s, offsetof(type, member), val); \ } #define BTRFS_SETGET_HEADER_FUNCS(name, type, member, bits) \ @@ -479,18 +454,6 @@ static inline void btrfs_set_item_##member(const struct extent_buffer *eb, \ int slot, u32 val) \ { \ btrfs_set_raw_item_##member(eb, btrfs_item_nr(eb, slot), val); \ -} \ -static inline u32 btrfs_token_item_##member(struct btrfs_map_token *token, \ - int slot) \ -{ \ - struct btrfs_item *item = btrfs_item_nr(token->eb, slot); \ - return btrfs_token_raw_item_##member(token, item); \ -} \ -static inline void btrfs_set_token_item_##member(struct btrfs_map_token *token, \ - int slot, u32 val) \ -{ \ - struct btrfs_item *item = btrfs_item_nr(token->eb, slot); \ - btrfs_set_token_raw_item_##member(token, item, val); \ } BTRFS_ITEM_SETGET_FUNCS(offset) diff --git a/fs/btrfs/backref.c b/fs/btrfs/backref.c index ed497f5f8d1b..6a450be293b1 100644 --- a/fs/btrfs/backref.c +++ b/fs/btrfs/backref.c @@ -733,7 +733,6 @@ static int resolve_indirect_refs(struct btrfs_backref_walk_ctx *ctx, struct preftrees *preftrees, struct share_check *sc) { - int err; int ret = 0; struct ulist *parents; struct ulist_node *node; @@ -752,6 +751,7 @@ static int resolve_indirect_refs(struct btrfs_backref_walk_ctx *ctx, */ while ((rnode = rb_first_cached(&preftrees->indirect.root))) { struct prelim_ref *ref; + int ret2; ref = rb_entry(rnode, struct prelim_ref, rbnode); if (WARN(ref->parent, @@ -773,18 +773,18 @@ static int resolve_indirect_refs(struct btrfs_backref_walk_ctx *ctx, ret = BACKREF_FOUND_SHARED; goto out; } - err = resolve_indirect_ref(ctx, path, preftrees, ref, parents); + ret2 = resolve_indirect_ref(ctx, path, preftrees, ref, parents); /* * we can only tolerate ENOENT,otherwise,we should catch error * and return directly. */ - if (err == -ENOENT) { + if (ret2 == -ENOENT) { prelim_ref_insert(ctx->fs_info, &preftrees->direct, ref, NULL); continue; - } else if (err) { + } else if (ret2) { free_pref(ref); - ret = err; + ret = ret2; goto out; } @@ -2201,7 +2201,6 @@ int extent_from_logical(struct btrfs_fs_info *fs_info, u64 logical, int ret; u64 flags; u64 size = 0; - u32 item_size; const struct extent_buffer *eb; struct btrfs_extent_item *ei; struct btrfs_key key; @@ -2244,7 +2243,6 @@ int extent_from_logical(struct btrfs_fs_info *fs_info, u64 logical, } eb = path->nodes[0]; - item_size = btrfs_item_size(eb, path->slots[0]); ei = btrfs_item_ptr(eb, path->slots[0], struct btrfs_extent_item); flags = btrfs_extent_flags(eb, ei); @@ -2252,7 +2250,7 @@ int extent_from_logical(struct btrfs_fs_info *fs_info, u64 logical, btrfs_debug(fs_info, "logical %llu is at position %llu within the extent (%llu EXTENT_ITEM %llu) flags %#llx size %u", logical, logical - found_key->objectid, found_key->objectid, - found_key->offset, flags, item_size); + found_key->offset, flags, btrfs_item_size(eb, path->slots[0])); WARN_ON(!flags_ret); if (flags_ret) { @@ -2548,17 +2546,20 @@ static int build_ino_list(u64 inum, u64 offset, u64 num_bytes, u64 root, void *c } int iterate_inodes_from_logical(u64 logical, struct btrfs_fs_info *fs_info, - struct btrfs_path *path, void *ctx, bool ignore_offset) { struct btrfs_backref_walk_ctx walk_ctx = { 0 }; int ret; u64 flags = 0; struct btrfs_key found_key; - int search_commit_root = path->search_commit_root; + struct btrfs_path *path; + + path = btrfs_alloc_path(); + if (!path) + return -ENOMEM; ret = extent_from_logical(fs_info, logical, path, &found_key, &flags); - btrfs_release_path(path); + btrfs_free_path(path); if (ret < 0) return ret; if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) @@ -2571,8 +2572,7 @@ int iterate_inodes_from_logical(u64 logical, struct btrfs_fs_info *fs_info, walk_ctx.extent_item_pos = logical - found_key.objectid; walk_ctx.fs_info = fs_info; - return iterate_extent_inodes(&walk_ctx, search_commit_root, - build_ino_list, ctx); + return iterate_extent_inodes(&walk_ctx, false, build_ino_list, ctx); } static int inode_to_path(u64 inum, u32 name_len, unsigned long name_off, @@ -3161,18 +3161,14 @@ void btrfs_backref_release_cache(struct btrfs_backref_cache *cache) ASSERT(!cache->nr_edges); } -void btrfs_backref_link_edge(struct btrfs_backref_edge *edge, - struct btrfs_backref_node *lower, - struct btrfs_backref_node *upper, - int link_which) +static void btrfs_backref_link_edge(struct btrfs_backref_edge *edge, + struct btrfs_backref_node *lower, + struct btrfs_backref_node *upper) { ASSERT(upper && lower && upper->level == lower->level + 1); edge->node[LOWER] = lower; edge->node[UPPER] = upper; - if (link_which & LINK_LOWER) - list_add_tail(&edge->list[LOWER], &lower->upper); - if (link_which & LINK_UPPER) - list_add_tail(&edge->list[UPPER], &upper->lower); + list_add_tail(&edge->list[LOWER], &lower->upper); } /* * Handle direct tree backref @@ -3242,7 +3238,7 @@ static int handle_direct_tree_backref(struct btrfs_backref_cache *cache, ASSERT(upper->checked); INIT_LIST_HEAD(&edge->list[UPPER]); } - btrfs_backref_link_edge(edge, cur, upper, LINK_LOWER); + btrfs_backref_link_edge(edge, cur, upper); return 0; } @@ -3412,7 +3408,7 @@ static int handle_indirect_tree_backref(struct btrfs_trans_handle *trans, if (!upper->owner) upper->owner = btrfs_header_owner(eb); } - btrfs_backref_link_edge(edge, lower, upper, LINK_LOWER); + btrfs_backref_link_edge(edge, lower, upper); if (rb_node) { btrfs_put_root(root); @@ -3570,7 +3566,7 @@ int btrfs_backref_finish_upper_links(struct btrfs_backref_cache *cache, ASSERT(start->checked); - rb_node = rb_simple_insert(&cache->rb_root, start->bytenr, &start->rb_node); + rb_node = rb_simple_insert(&cache->rb_root, &start->simple_node); if (rb_node) btrfs_backref_panic(cache->fs_info, start->bytenr, -EEXIST); @@ -3621,8 +3617,7 @@ int btrfs_backref_finish_upper_links(struct btrfs_backref_cache *cache, return -EUCLEAN; } - rb_node = rb_simple_insert(&cache->rb_root, upper->bytenr, - &upper->rb_node); + rb_node = rb_simple_insert(&cache->rb_root, &upper->simple_node); if (unlikely(rb_node)) { btrfs_backref_panic(cache->fs_info, upper->bytenr, -EEXIST); return -EUCLEAN; diff --git a/fs/btrfs/backref.h b/fs/btrfs/backref.h index 953637115956..34b0193a181c 100644 --- a/fs/btrfs/backref.h +++ b/fs/btrfs/backref.h @@ -226,8 +226,7 @@ int iterate_extent_inodes(struct btrfs_backref_walk_ctx *ctx, iterate_extent_inodes_t *iterate, void *user_ctx); int iterate_inodes_from_logical(u64 logical, struct btrfs_fs_info *fs_info, - struct btrfs_path *path, void *ctx, - bool ignore_offset); + void *ctx, bool ignore_offset); int paths_from_inode(u64 inum, struct inode_fs_paths *ipath); @@ -313,10 +312,15 @@ int btrfs_backref_iter_next(struct btrfs_backref_iter *iter); * Represent a tree block in the backref cache */ struct btrfs_backref_node { - struct { - struct rb_node rb_node; - u64 bytenr; - }; /* Use rb_simple_node for search/insert */ + union{ + /* Use rb_simple_node for search/insert */ + struct { + struct rb_node rb_node; + u64 bytenr; + }; + + struct rb_simple_node simple_node; + }; /* * This is a sanity check, whenever we COW a block we will update @@ -423,13 +427,6 @@ struct btrfs_backref_node *btrfs_backref_alloc_node( struct btrfs_backref_edge *btrfs_backref_alloc_edge( struct btrfs_backref_cache *cache); -#define LINK_LOWER (1U << 0) -#define LINK_UPPER (1U << 1) - -void btrfs_backref_link_edge(struct btrfs_backref_edge *edge, - struct btrfs_backref_node *lower, - struct btrfs_backref_node *upper, - int link_which); void btrfs_backref_free_node(struct btrfs_backref_cache *cache, struct btrfs_backref_node *node); void btrfs_backref_free_edge(struct btrfs_backref_cache *cache, diff --git a/fs/btrfs/bio.c b/fs/btrfs/bio.c index f7d8958b7327..50b5fc1c06d7 100644 --- a/fs/btrfs/bio.c +++ b/fs/btrfs/bio.c @@ -27,12 +27,12 @@ struct btrfs_failed_bio { }; /* Is this a data path I/O that needs storage layer checksum and repair? */ -static inline bool is_data_bbio(struct btrfs_bio *bbio) +static inline bool is_data_bbio(const struct btrfs_bio *bbio) { return bbio->inode && is_data_inode(bbio->inode); } -static bool bbio_has_ordered_extent(struct btrfs_bio *bbio) +static bool bbio_has_ordered_extent(const struct btrfs_bio *bbio) { return is_data_bbio(bbio) && btrfs_op(&bbio->bio) == BTRFS_MAP_WRITE; } @@ -134,14 +134,14 @@ void btrfs_bio_end_io(struct btrfs_bio *bbio, blk_status_t status) } } -static int next_repair_mirror(struct btrfs_failed_bio *fbio, int cur_mirror) +static int next_repair_mirror(const struct btrfs_failed_bio *fbio, int cur_mirror) { if (cur_mirror == fbio->num_copies) return cur_mirror + 1 - fbio->num_copies; return cur_mirror + 1; } -static int prev_repair_mirror(struct btrfs_failed_bio *fbio, int cur_mirror) +static int prev_repair_mirror(const struct btrfs_failed_bio *fbio, int cur_mirror) { if (cur_mirror == 1) return fbio->num_copies; @@ -165,12 +165,6 @@ static void btrfs_end_repair_bio(struct btrfs_bio *repair_bbio, struct bio_vec *bv = bio_first_bvec_all(&repair_bbio->bio); int mirror = repair_bbio->mirror_num; - /* - * We can only trigger this for data bio, which doesn't support larger - * folios yet. - */ - ASSERT(folio_order(page_folio(bv->bv_page)) == 0); - if (repair_bbio->bio.bi_status || !btrfs_data_csum_ok(repair_bbio, dev, 0, bv)) { bio_reset(&repair_bbio->bio, NULL, REQ_OP_READ); @@ -301,7 +295,7 @@ static void btrfs_check_read_bio(struct btrfs_bio *bbio, struct btrfs_device *de btrfs_bio_end_io(bbio, bbio->bio.bi_status); } -static void btrfs_log_dev_io_error(struct bio *bio, struct btrfs_device *dev) +static void btrfs_log_dev_io_error(const struct bio *bio, struct btrfs_device *dev) { if (!dev || !dev->bdev) return; @@ -316,8 +310,8 @@ static void btrfs_log_dev_io_error(struct bio *bio, struct btrfs_device *dev) btrfs_dev_stat_inc_and_print(dev, BTRFS_DEV_STAT_FLUSH_ERRS); } -static struct workqueue_struct *btrfs_end_io_wq(struct btrfs_fs_info *fs_info, - struct bio *bio) +static struct workqueue_struct *btrfs_end_io_wq(const struct btrfs_fs_info *fs_info, + const struct bio *bio) { if (bio->bi_opf & REQ_META) return fs_info->endio_meta_workers; @@ -439,7 +433,7 @@ static void btrfs_submit_dev_bio(struct btrfs_device *dev, struct bio *bio) ASSERT(btrfs_dev_is_sequential(dev, physical)); bio->bi_iter.bi_sector = zone_start >> SECTOR_SHIFT; } - btrfs_debug_in_rcu(dev->fs_info, + btrfs_debug(dev->fs_info, "%s: rw %d 0x%x, sector=%llu, dev=%lu (%s id %llu), size=%u", __func__, bio_op(bio), bio->bi_opf, bio->bi_iter.bi_sector, (unsigned long)dev->bdev->bd_dev, btrfs_dev_name(dev), @@ -845,7 +839,7 @@ int btrfs_repair_io_failure(struct btrfs_fs_info *fs_info, u64 ino, u64 start, goto out_bio_uninit; } - btrfs_info_rl_in_rcu(fs_info, + btrfs_info_rl(fs_info, "read error corrected: ino %llu off %llu (dev %s sector %llu)", ino, start, btrfs_dev_name(smap.dev), smap.physical >> SECTOR_SHIFT); diff --git a/fs/btrfs/block-group.c b/fs/btrfs/block-group.c index 5b0cb04b2b93..9bf282d2453c 100644 --- a/fs/btrfs/block-group.c +++ b/fs/btrfs/block-group.c @@ -34,6 +34,19 @@ int btrfs_should_fragment_free_space(const struct btrfs_block_group *block_group } #endif +static inline bool has_unwritten_metadata(struct btrfs_block_group *block_group) +{ + /* The meta_write_pointer is available only on the zoned setup. */ + if (!btrfs_is_zoned(block_group->fs_info)) + return false; + + if (block_group->flags & BTRFS_BLOCK_GROUP_DATA) + return false; + + return block_group->start + block_group->alloc_offset > + block_group->meta_write_pointer; +} + /* * Return target flags in extended format or 0 if restripe for this chunk_type * is not in progress @@ -832,8 +845,8 @@ out: static inline void btrfs_free_excluded_extents(const struct btrfs_block_group *bg) { - btrfs_clear_extent_bits(&bg->fs_info->excluded_extents, bg->start, - bg->start + bg->length - 1, EXTENT_DIRTY); + btrfs_clear_extent_bit(&bg->fs_info->excluded_extents, bg->start, + bg->start + bg->length - 1, EXTENT_DIRTY, NULL); } static noinline void caching_thread(struct btrfs_work *work) @@ -877,7 +890,7 @@ static noinline void caching_thread(struct btrfs_work *work) */ if (btrfs_fs_compat_ro(fs_info, FREE_SPACE_TREE) && !(test_bit(BTRFS_FS_FREE_SPACE_TREE_UNTRUSTED, &fs_info->flags))) - ret = load_free_space_tree(caching_ctl); + ret = btrfs_load_free_space_tree(caching_ctl); else ret = load_extent_tree_free(caching_ctl); done: @@ -1235,7 +1248,7 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans, * another task to attempt to create another block group with the same * item key (and failing with -EEXIST and a transaction abort). */ - ret = remove_block_group_free_space(trans, block_group); + ret = btrfs_remove_block_group_free_space(trans, block_group); if (ret) goto out; @@ -1244,6 +1257,15 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans, goto out; spin_lock(&block_group->lock); + /* + * Hitting this WARN means we removed a block group with an unwritten + * region. It will cause "unable to find chunk map for logical" errors. + */ + if (WARN_ON(has_unwritten_metadata(block_group))) + btrfs_warn(fs_info, + "block group %llu is removed before metadata write out", + block_group->start); + set_bit(BLOCK_GROUP_FLAG_REMOVED, &block_group->runtime_flags); /* @@ -1403,7 +1425,7 @@ out: if (ret == -ENOSPC && btrfs_test_opt(cache->fs_info, ENOSPC_DEBUG)) { btrfs_info(cache->fs_info, "unable to make block group %llu ro", cache->start); - btrfs_dump_space_info(cache->fs_info, cache->space_info, 0, 0); + btrfs_dump_space_info(cache->fs_info, cache->space_info, 0, false); } return ret; } @@ -1436,14 +1458,14 @@ static bool clean_pinned_extents(struct btrfs_trans_handle *trans, */ mutex_lock(&fs_info->unused_bg_unpin_mutex); if (prev_trans) { - ret = btrfs_clear_extent_bits(&prev_trans->pinned_extents, start, end, - EXTENT_DIRTY); + ret = btrfs_clear_extent_bit(&prev_trans->pinned_extents, start, end, + EXTENT_DIRTY, NULL); if (ret) goto out; } - ret = btrfs_clear_extent_bits(&trans->transaction->pinned_extents, start, end, - EXTENT_DIRTY); + ret = btrfs_clear_extent_bit(&trans->transaction->pinned_extents, start, end, + EXTENT_DIRTY, NULL); out: mutex_unlock(&fs_info->unused_bg_unpin_mutex); if (prev_trans) @@ -1586,8 +1608,9 @@ void btrfs_delete_unused_bgs(struct btrfs_fs_info *fs_info) * needing to allocate extents from the block group. */ used = btrfs_space_info_used(space_info, true); - if (space_info->total_bytes - block_group->length < used && - block_group->zone_unusable < block_group->length) { + if ((space_info->total_bytes - block_group->length < used && + block_group->zone_unusable < block_group->length) || + has_unwritten_metadata(block_group)) { /* * Add a reference for the list, compensate for the ref * drop under the "next" label for the @@ -1616,8 +1639,10 @@ void btrfs_delete_unused_bgs(struct btrfs_fs_info *fs_info) ret = btrfs_zone_finish(block_group); if (ret < 0) { btrfs_dec_block_group_ro(block_group); - if (ret == -EAGAIN) + if (ret == -EAGAIN) { + btrfs_link_bg_list(block_group, &retry_list); ret = 0; + } goto next; } @@ -1843,7 +1868,6 @@ void btrfs_reclaim_bgs_work(struct work_struct *work) */ list_sort(NULL, &fs_info->reclaim_bgs, reclaim_bgs_cmp); while (!list_empty(&fs_info->reclaim_bgs)) { - u64 zone_unusable; u64 used; u64 reserved; int ret = 0; @@ -1910,16 +1934,6 @@ void btrfs_reclaim_bgs_work(struct work_struct *work) goto next; } - /* - * Cache the zone_unusable value before turning the block group - * to read only. As soon as the block group is read only it's - * zone_unusable value gets moved to the block group's read-only - * bytes and isn't available for calculations anymore. We also - * cache it before unlocking the block group, to prevent races - * (reports from KCSAN and such tools) with tasks updating it. - */ - zone_unusable = bg->zone_unusable; - spin_unlock(&bg->lock); spin_unlock(&space_info->lock); @@ -1963,14 +1977,8 @@ void btrfs_reclaim_bgs_work(struct work_struct *work) reserved = bg->reserved; spin_unlock(&bg->lock); - btrfs_info(fs_info, - "reclaiming chunk %llu with %llu%% used %llu%% reserved %llu%% unusable", - bg->start, - div64_u64(used * 100, bg->length), - div64_u64(reserved * 100, bg->length), - div64_u64(zone_unusable * 100, bg->length)); trace_btrfs_reclaim_block_group(bg); - ret = btrfs_relocate_chunk(fs_info, bg->start); + ret = btrfs_relocate_chunk(fs_info, bg->start, false); if (ret) { btrfs_dec_block_group_ro(bg); btrfs_err(fs_info, "error relocating chunk %llu", @@ -2372,7 +2380,7 @@ static int read_one_block_group(struct btrfs_fs_info *info, cache->global_root_id = btrfs_stack_block_group_chunk_objectid(bgi); cache->space_info = btrfs_find_space_info(info, cache->flags); - set_free_space_tree_thresholds(cache); + btrfs_set_free_space_tree_thresholds(cache); if (need_clear) { /* @@ -2791,7 +2799,7 @@ void btrfs_create_pending_block_groups(struct btrfs_trans_handle *trans) block_group->length); if (ret) btrfs_abort_transaction(trans, ret); - add_block_group_free_space(trans, block_group); + btrfs_add_block_group_free_space(trans, block_group); /* * If we restriped during balance, we may have added a new raid @@ -2889,7 +2897,7 @@ struct btrfs_block_group *btrfs_make_block_group(struct btrfs_trans_handle *tran set_bit(BLOCK_GROUP_FLAG_NEW, &cache->runtime_flags); cache->length = size; - set_free_space_tree_thresholds(cache); + btrfs_set_free_space_tree_thresholds(cache); cache->flags = type; cache->cached = BTRFS_CACHE_FINISHED; cache->global_root_id = calculate_global_root_id(fs_info, cache->start); @@ -3636,9 +3644,11 @@ int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans) wait_event(cur_trans->writer_wait, atomic_read(&cur_trans->num_writers) == 1); ret = update_block_group_item(trans, path, cache); - } - if (ret) + if (ret) + btrfs_abort_transaction(trans, ret); + } else if (ret) { btrfs_abort_transaction(trans, ret); + } } /* If its not on the io list, we need to put the block group */ @@ -4298,7 +4308,7 @@ static void reserve_chunk_space(struct btrfs_trans_handle *trans, if (left < bytes && btrfs_test_opt(fs_info, ENOSPC_DEBUG)) { btrfs_info(fs_info, "left=%llu, need=%llu, flags=%llu", left, bytes, type); - btrfs_dump_space_info(fs_info, info, 0, 0); + btrfs_dump_space_info(fs_info, info, 0, false); } if (left < bytes) { @@ -4443,7 +4453,7 @@ static void check_removing_space_info(struct btrfs_space_info *space_info) * indicates a real bug if this happens. */ if (WARN_ON(space_info->bytes_pinned > 0 || space_info->bytes_may_use > 0)) - btrfs_dump_space_info(info, space_info, 0, 0); + btrfs_dump_space_info(info, space_info, 0, false); /* * If there was a failure to cleanup a log tree, very likely due to an @@ -4454,7 +4464,7 @@ static void check_removing_space_info(struct btrfs_space_info *space_info) if (!(space_info->flags & BTRFS_BLOCK_GROUP_METADATA) || !BTRFS_FS_LOG_CLEANUP_ERROR(info)) { if (WARN_ON(space_info->bytes_reserved > 0)) - btrfs_dump_space_info(info, space_info, 0, 0); + btrfs_dump_space_info(info, space_info, 0, false); } WARN_ON(space_info->reclaim_size > 0); diff --git a/fs/btrfs/block-group.h b/fs/btrfs/block-group.h index aa176cc9a324..a8bb8429c966 100644 --- a/fs/btrfs/block-group.h +++ b/fs/btrfs/block-group.h @@ -246,6 +246,11 @@ struct btrfs_block_group { /* Lock for free space tree operations. */ struct mutex free_space_lock; + /* Protected by @free_space_lock. */ + bool using_free_space_bitmaps; + /* Protected by @free_space_lock. */ + bool using_free_space_bitmaps_cached; + /* * Number of extents in this block group used for swap files. * All accesses protected by the spinlock 'lock'. diff --git a/fs/btrfs/btrfs_inode.h b/fs/btrfs/btrfs_inode.h index a79fa0726f1d..b99fb0273292 100644 --- a/fs/btrfs/btrfs_inode.h +++ b/fs/btrfs/btrfs_inode.h @@ -525,6 +525,19 @@ static inline void btrfs_update_inode_mapping_flags(struct btrfs_inode *inode) mapping_set_stable_writes(inode->vfs_inode.i_mapping); } +static inline void btrfs_set_inode_mapping_order(struct btrfs_inode *inode) +{ + /* Metadata inode should not reach here. */ + ASSERT(is_data_inode(inode)); + + /* We only allow BITS_PER_LONGS blocks for each bitmap. */ +#ifdef CONFIG_BTRFS_EXPERIMENTAL + mapping_set_folio_order_range(inode->vfs_inode.i_mapping, 0, + ilog2(((BITS_PER_LONG << inode->root->fs_info->sectorsize_bits) + >> PAGE_SHIFT))); +#endif +} + /* Array of bytes with variable length, hexadecimal format 0x1234 */ #define CSUM_FMT "0x%*phN" #define CSUM_FMT_VALUE(size, bytes) size, bytes diff --git a/fs/btrfs/compression.c b/fs/btrfs/compression.c index 48d07939fee4..d09d622016ef 100644 --- a/fs/btrfs/compression.c +++ b/fs/btrfs/compression.c @@ -282,8 +282,8 @@ static noinline void end_compressed_writeback(const struct compressed_bio *cb) { struct inode *inode = &cb->bbio.inode->vfs_inode; struct btrfs_fs_info *fs_info = inode_to_fs_info(inode); - unsigned long index = cb->start >> PAGE_SHIFT; - unsigned long end_index = (cb->start + cb->len - 1) >> PAGE_SHIFT; + pgoff_t index = cb->start >> PAGE_SHIFT; + const pgoff_t end_index = (cb->start + cb->len - 1) >> PAGE_SHIFT; struct folio_batch fbatch; int i; int ret; @@ -415,7 +415,7 @@ static noinline int add_ra_bio_pages(struct inode *inode, int *memstall, unsigned long *pflags) { struct btrfs_fs_info *fs_info = inode_to_fs_info(inode); - unsigned long end_index; + pgoff_t end_index; struct bio *orig_bio = &cb->orig_bbio->bio; u64 cur = cb->orig_bbio->file_offset + orig_bio->bi_iter.bi_size; u64 isize = i_size_read(inode); @@ -446,8 +446,8 @@ static noinline int add_ra_bio_pages(struct inode *inode, end_index = (i_size_read(inode) - 1) >> PAGE_SHIFT; while (cur < compressed_end) { - u64 page_end; - u64 pg_index = cur >> PAGE_SHIFT; + pgoff_t page_end; + pgoff_t pg_index = cur >> PAGE_SHIFT; u32 add_size; if (pg_index > end_index) @@ -789,8 +789,8 @@ static void btrfs_init_workspace_manager(int type) */ workspace = alloc_workspace(type, 0); if (IS_ERR(workspace)) { - pr_warn( - "BTRFS: cannot preallocate compression workspace, will try later\n"); + btrfs_warn(NULL, + "cannot preallocate compression workspace, will try later"); } else { atomic_set(&wsm->total_ws, 1); wsm->free_ws = 1; @@ -888,9 +888,9 @@ again: /* once per minute */ 60 * HZ, /* no burst */ 1); - if (__ratelimit(&_rs)) { - pr_warn("BTRFS: no compression workspaces, low memory, retrying\n"); - } + if (__ratelimit(&_rs)) + btrfs_warn(NULL, + "no compression workspaces, low memory, retrying"); } goto again; } @@ -975,7 +975,7 @@ static int btrfs_compress_set_level(unsigned int type, int level) if (level == 0) level = ops->default_level; else - level = min(max(level, ops->min_level), ops->max_level); + level = clamp(level, ops->min_level, ops->max_level); return level; } @@ -1482,7 +1482,7 @@ static void heuristic_collect_sample(struct inode *inode, u64 start, u64 end, struct heuristic_ws *ws) { struct page *page; - u64 index, index_end; + pgoff_t index, index_end; u32 i, curr_sample_pos; u8 *in_data; diff --git a/fs/btrfs/compression.h b/fs/btrfs/compression.h index d34c4341eaf4..1b38e707bbd9 100644 --- a/fs/btrfs/compression.h +++ b/fs/btrfs/compression.h @@ -13,6 +13,7 @@ #include <linux/wait.h> #include <linux/pagemap.h> #include "bio.h" +#include "fs.h" #include "messages.h" struct address_space; @@ -77,12 +78,10 @@ struct compressed_bio { /* @range_end must be exclusive. */ static inline u32 btrfs_calc_input_length(struct folio *folio, u64 range_end, u64 cur) { - const u64 folio_end = folio_pos(folio) + folio_size(folio); - /* @cur must be inside the folio. */ ASSERT(folio_pos(folio) <= cur); - ASSERT(cur < folio_end); - return min(range_end, folio_end) - cur; + ASSERT(cur < folio_end(folio)); + return min(range_end, folio_end(folio)) - cur; } int __init btrfs_init_compress(void); @@ -114,6 +113,8 @@ enum btrfs_compression_type { BTRFS_COMPRESS_LZO = 2, BTRFS_COMPRESS_ZSTD = 3, BTRFS_NR_COMPRESS_TYPES = 4, + + BTRFS_DEFRAG_DONT_COMPRESS, }; struct workspace_manager { diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index a2e7979372cc..74e6d7f3d266 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -198,7 +198,7 @@ struct extent_buffer *btrfs_root_node(struct btrfs_root *root) * the inc_not_zero dance and if it doesn't work then * synchronize_rcu and try again. */ - if (atomic_inc_not_zero(&eb->refs)) { + if (refcount_inc_not_zero(&eb->refs)) { rcu_read_unlock(); break; } @@ -283,15 +283,26 @@ int btrfs_copy_root(struct btrfs_trans_handle *trans, write_extent_buffer_fsid(cow, fs_info->fs_devices->metadata_uuid); - WARN_ON(btrfs_header_generation(buf) > trans->transid); - if (new_root_objectid == BTRFS_TREE_RELOC_OBJECTID) + if (unlikely(btrfs_header_generation(buf) > trans->transid)) { + btrfs_tree_unlock(cow); + free_extent_buffer(cow); + ret = -EUCLEAN; + btrfs_abort_transaction(trans, ret); + return ret; + } + + if (new_root_objectid == BTRFS_TREE_RELOC_OBJECTID) { ret = btrfs_inc_ref(trans, root, cow, 1); - else + if (ret) + btrfs_abort_transaction(trans, ret); + } else { ret = btrfs_inc_ref(trans, root, cow, 0); + if (ret) + btrfs_abort_transaction(trans, ret); + } if (ret) { btrfs_tree_unlock(cow); free_extent_buffer(cow); - btrfs_abort_transaction(trans, ret); return ret; } @@ -303,9 +314,9 @@ int btrfs_copy_root(struct btrfs_trans_handle *trans, /* * check if the tree block can be shared by multiple trees */ -bool btrfs_block_can_be_shared(struct btrfs_trans_handle *trans, - struct btrfs_root *root, - struct extent_buffer *buf) +bool btrfs_block_can_be_shared(const struct btrfs_trans_handle *trans, + const struct btrfs_root *root, + const struct extent_buffer *buf) { const u64 buf_gen = btrfs_header_generation(buf); @@ -549,7 +560,7 @@ int btrfs_force_cow_block(struct btrfs_trans_handle *trans, btrfs_abort_transaction(trans, ret); goto error_unlock_cow; } - atomic_inc(&cow->refs); + refcount_inc(&cow->refs); rcu_assign_pointer(root->node, cow); ret = btrfs_free_tree_block(trans, btrfs_root_id(root), buf, @@ -602,9 +613,9 @@ error_unlock_cow: return ret; } -static inline int should_cow_block(struct btrfs_trans_handle *trans, - struct btrfs_root *root, - struct extent_buffer *buf) +static inline int should_cow_block(const struct btrfs_trans_handle *trans, + const struct btrfs_root *root, + const struct extent_buffer *buf) { if (btrfs_is_testing(root->fs_info)) return 0; @@ -724,7 +735,7 @@ int __pure btrfs_comp_cpu_keys(const struct btrfs_key *k1, const struct btrfs_ke * Slot may point to the total number of items (i.e. one position beyond the last * key) if the key is bigger than the last key in the extent buffer. */ -int btrfs_bin_search(struct extent_buffer *eb, int first_slot, +int btrfs_bin_search(const struct extent_buffer *eb, int first_slot, const struct btrfs_key *key, int *slot) { unsigned long p; @@ -1081,7 +1092,7 @@ static noinline int balance_level(struct btrfs_trans_handle *trans, /* update the path */ if (left) { if (btrfs_header_nritems(left) > orig_slot) { - atomic_inc(&left->refs); + refcount_inc(&left->refs); /* left was locked after cow */ path->nodes[level] = left; path->slots[level + 1] -= 1; @@ -1268,7 +1279,7 @@ static noinline int push_nodes_for_insert(struct btrfs_trans_handle *trans, * to the block in 'slot', and triggering ra on them. */ static void reada_for_search(struct btrfs_fs_info *fs_info, - struct btrfs_path *path, + const struct btrfs_path *path, int level, int slot, u64 objectid) { struct extent_buffer *node; @@ -1350,7 +1361,7 @@ static void reada_for_search(struct btrfs_fs_info *fs_info, } } -static noinline void reada_for_balance(struct btrfs_path *path, int level) +static noinline void reada_for_balance(const struct btrfs_path *path, int level) { struct extent_buffer *parent; int slot; @@ -1446,8 +1457,8 @@ read_block_for_search(struct btrfs_root *root, struct btrfs_path *p, u64 blocknr; struct extent_buffer *tmp = NULL; int ret = 0; + int ret2; int parent_level; - int err; bool read_tmp = false; bool tmp_locked = false; bool path_released = false; @@ -1505,9 +1516,9 @@ read_block_for_search(struct btrfs_root *root, struct btrfs_path *p, } /* Now we're allowed to do a blocking uptodate check. */ - err = btrfs_read_extent_buffer(tmp, &check); - if (err) { - ret = err; + ret2 = btrfs_read_extent_buffer(tmp, &check); + if (ret2) { + ret = ret2; goto out; } @@ -1548,9 +1559,9 @@ read_block_for_search(struct btrfs_root *root, struct btrfs_path *p, } /* Now we're allowed to do a blocking uptodate check. */ - err = btrfs_read_extent_buffer(tmp, &check); - if (err) { - ret = err; + ret2 = btrfs_read_extent_buffer(tmp, &check); + if (ret2) { + ret = ret2; goto out; } @@ -1685,7 +1696,7 @@ static struct extent_buffer *btrfs_search_slot_get_root(struct btrfs_root *root, if (p->search_commit_root) { b = root->commit_root; - atomic_inc(&b->refs); + refcount_inc(&b->refs); level = btrfs_header_level(b); /* * Ensure that all callers have set skip_locking when @@ -1794,7 +1805,7 @@ static int finish_need_commit_sem_search(struct btrfs_path *path) return 0; } -static inline int search_for_key_slot(struct extent_buffer *eb, +static inline int search_for_key_slot(const struct extent_buffer *eb, int search_low_slot, const struct btrfs_key *key, int prev_cmp, @@ -1928,15 +1939,14 @@ static int search_leaf(struct btrfs_trans_handle *trans, ASSERT(leaf_free_space >= 0); if (leaf_free_space < ins_len) { - int err; - - err = split_leaf(trans, root, key, path, ins_len, - (ret == 0)); - ASSERT(err <= 0); - if (WARN_ON(err > 0)) - err = -EUCLEAN; - if (err) - ret = err; + int ret2; + + ret2 = split_leaf(trans, root, key, path, ins_len, (ret == 0)); + ASSERT(ret2 <= 0); + if (WARN_ON(ret2 > 0)) + ret2 = -EUCLEAN; + if (ret2) + ret = ret2; } } @@ -1982,7 +1992,6 @@ int btrfs_search_slot(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct extent_buffer *b; int slot; int ret; - int err; int level; int lowest_unlock = 1; /* everything at write_lock_level or lower must be write locked */ @@ -2053,6 +2062,7 @@ again: while (b) { int dec = 0; + int ret2; level = btrfs_header_level(b); @@ -2081,16 +2091,15 @@ again: } if (last_level) - err = btrfs_cow_block(trans, root, b, NULL, 0, - &b, - BTRFS_NESTING_COW); + ret2 = btrfs_cow_block(trans, root, b, NULL, 0, + &b, BTRFS_NESTING_COW); else - err = btrfs_cow_block(trans, root, b, - p->nodes[level + 1], - p->slots[level + 1], &b, - BTRFS_NESTING_COW); - if (err) { - ret = err; + ret2 = btrfs_cow_block(trans, root, b, + p->nodes[level + 1], + p->slots[level + 1], &b, + BTRFS_NESTING_COW); + if (ret2) { + ret = ret2; goto done; } } @@ -2138,12 +2147,12 @@ cow_done: slot--; } p->slots[level] = slot; - err = setup_nodes_for_search(trans, root, p, b, level, ins_len, - &write_lock_level); - if (err == -EAGAIN) + ret2 = setup_nodes_for_search(trans, root, p, b, level, ins_len, + &write_lock_level); + if (ret2 == -EAGAIN) goto again; - if (err) { - ret = err; + if (ret2) { + ret = ret2; goto done; } b = p->nodes[level]; @@ -2169,11 +2178,11 @@ cow_done: goto done; } - err = read_block_for_search(root, p, &b, slot, key); - if (err == -EAGAIN && !p->nowait) + ret2 = read_block_for_search(root, p, &b, slot, key); + if (ret2 == -EAGAIN && !p->nowait) goto again; - if (err) { - ret = err; + if (ret2) { + ret = ret2; goto done; } @@ -2236,7 +2245,6 @@ int btrfs_search_old_slot(struct btrfs_root *root, const struct btrfs_key *key, struct extent_buffer *b; int slot; int ret; - int err; int level; int lowest_unlock = 1; u8 lowest_level = 0; @@ -2261,6 +2269,7 @@ again: while (b) { int dec = 0; + int ret2; level = btrfs_header_level(b); p->nodes[level] = b; @@ -2296,11 +2305,11 @@ again: goto done; } - err = read_block_for_search(root, p, &b, slot, key); - if (err == -EAGAIN && !p->nowait) + ret2 = read_block_for_search(root, p, &b, slot, key); + if (ret2 == -EAGAIN && !p->nowait) goto again; - if (err) { - ret = err; + if (ret2) { + ret = ret2; goto done; } @@ -2872,6 +2881,7 @@ static noinline int insert_new_root(struct btrfs_trans_handle *trans, if (ret < 0) { int ret2; + btrfs_clear_buffer_dirty(trans, c); ret2 = btrfs_free_tree_block(trans, btrfs_root_id(root), c, 0, 1); if (ret2 < 0) btrfs_abort_transaction(trans, ret2); @@ -2885,7 +2895,7 @@ static noinline int insert_new_root(struct btrfs_trans_handle *trans, free_extent_buffer(old); add_root_to_dirty_list(root); - atomic_inc(&c->refs); + refcount_inc(&c->refs); path->nodes[level] = c; path->locks[level] = BTRFS_WRITE_LOCK; path->slots[level] = 0; @@ -3100,7 +3110,6 @@ static noinline int __push_leaf_right(struct btrfs_trans_handle *trans, struct btrfs_fs_info *fs_info = right->fs_info; struct extent_buffer *left = path->nodes[0]; struct extent_buffer *upper = path->nodes[1]; - struct btrfs_map_token token; struct btrfs_disk_key disk_key; int slot; u32 i; @@ -3174,13 +3183,12 @@ static noinline int __push_leaf_right(struct btrfs_trans_handle *trans, copy_leaf_items(right, left, 0, left_nritems - push_items, push_items); /* update the item pointers */ - btrfs_init_map_token(&token, right); right_nritems += push_items; btrfs_set_header_nritems(right, right_nritems); push_space = BTRFS_LEAF_DATA_SIZE(fs_info); for (i = 0; i < right_nritems; i++) { - push_space -= btrfs_token_item_size(&token, i); - btrfs_set_token_item_offset(&token, i, push_space); + push_space -= btrfs_item_size(right, i); + btrfs_set_item_offset(right, i, push_space); } left_nritems -= push_items; @@ -3323,7 +3331,6 @@ static noinline int __push_leaf_left(struct btrfs_trans_handle *trans, int ret = 0; u32 this_item_size; u32 old_left_item_size; - struct btrfs_map_token token; if (empty) nr = min(right_nritems, max_slot); @@ -3371,13 +3378,12 @@ static noinline int __push_leaf_left(struct btrfs_trans_handle *trans, old_left_nritems = btrfs_header_nritems(left); BUG_ON(old_left_nritems <= 0); - btrfs_init_map_token(&token, left); old_left_item_size = btrfs_item_offset(left, old_left_nritems - 1); for (i = old_left_nritems; i < old_left_nritems + push_items; i++) { u32 ioff; - ioff = btrfs_token_item_offset(&token, i); - btrfs_set_token_item_offset(&token, i, + ioff = btrfs_item_offset(left, i); + btrfs_set_item_offset(left, i, ioff - (BTRFS_LEAF_DATA_SIZE(fs_info) - old_left_item_size)); } btrfs_set_header_nritems(left, old_left_nritems + push_items); @@ -3398,13 +3404,12 @@ static noinline int __push_leaf_left(struct btrfs_trans_handle *trans, btrfs_header_nritems(right) - push_items); } - btrfs_init_map_token(&token, right); right_nritems -= push_items; btrfs_set_header_nritems(right, right_nritems); push_space = BTRFS_LEAF_DATA_SIZE(fs_info); for (i = 0; i < right_nritems; i++) { - push_space = push_space - btrfs_token_item_size(&token, i); - btrfs_set_token_item_offset(&token, i, push_space); + push_space = push_space - btrfs_item_size(right, i); + btrfs_set_item_offset(right, i, push_space); } btrfs_mark_buffer_dirty(trans, left); @@ -3518,7 +3523,6 @@ static noinline int copy_for_split(struct btrfs_trans_handle *trans, int i; int ret; struct btrfs_disk_key disk_key; - struct btrfs_map_token token; nritems = nritems - mid; btrfs_set_header_nritems(right, nritems); @@ -3531,12 +3535,11 @@ static noinline int copy_for_split(struct btrfs_trans_handle *trans, rt_data_off = BTRFS_LEAF_DATA_SIZE(fs_info) - btrfs_item_data_end(l, mid); - btrfs_init_map_token(&token, right); for (i = 0; i < nritems; i++) { u32 ioff; - ioff = btrfs_token_item_offset(&token, i); - btrfs_set_token_item_offset(&token, i, ioff + rt_data_off); + ioff = btrfs_item_offset(right, i); + btrfs_set_item_offset(right, i, ioff + rt_data_off); } btrfs_set_header_nritems(l, mid); @@ -4002,7 +4005,6 @@ void btrfs_truncate_item(struct btrfs_trans_handle *trans, unsigned int old_size; unsigned int size_diff; int i; - struct btrfs_map_token token; leaf = path->nodes[0]; slot = path->slots[0]; @@ -4025,12 +4027,11 @@ void btrfs_truncate_item(struct btrfs_trans_handle *trans, * item0..itemN ... dataN.offset..dataN.size .. data0.size */ /* first correct the data pointers */ - btrfs_init_map_token(&token, leaf); for (i = slot; i < nritems; i++) { u32 ioff; - ioff = btrfs_token_item_offset(&token, i); - btrfs_set_token_item_offset(&token, i, ioff + size_diff); + ioff = btrfs_item_offset(leaf, i); + btrfs_set_item_offset(leaf, i, ioff + size_diff); } /* shift the data */ @@ -4093,7 +4094,6 @@ void btrfs_extend_item(struct btrfs_trans_handle *trans, unsigned int old_data; unsigned int old_size; int i; - struct btrfs_map_token token; leaf = path->nodes[0]; @@ -4119,12 +4119,11 @@ void btrfs_extend_item(struct btrfs_trans_handle *trans, * item0..itemN ... dataN.offset..dataN.size .. data0.size */ /* first correct the data pointers */ - btrfs_init_map_token(&token, leaf); for (i = slot; i < nritems; i++) { u32 ioff; - ioff = btrfs_token_item_offset(&token, i); - btrfs_set_token_item_offset(&token, i, ioff - data_size); + ioff = btrfs_item_offset(leaf, i); + btrfs_set_item_offset(leaf, i, ioff - data_size); } /* shift the data */ @@ -4164,7 +4163,6 @@ static void setup_items_for_insert(struct btrfs_trans_handle *trans, struct btrfs_disk_key disk_key; struct extent_buffer *leaf; int slot; - struct btrfs_map_token token; u32 total_size; /* @@ -4192,7 +4190,6 @@ static void setup_items_for_insert(struct btrfs_trans_handle *trans, BUG(); } - btrfs_init_map_token(&token, leaf); if (slot != nritems) { unsigned int old_data = btrfs_item_data_end(leaf, slot); @@ -4210,8 +4207,8 @@ static void setup_items_for_insert(struct btrfs_trans_handle *trans, for (i = slot; i < nritems; i++) { u32 ioff; - ioff = btrfs_token_item_offset(&token, i); - btrfs_set_token_item_offset(&token, i, + ioff = btrfs_item_offset(leaf, i); + btrfs_set_item_offset(leaf, i, ioff - batch->total_data_size); } /* shift the items */ @@ -4228,8 +4225,8 @@ static void setup_items_for_insert(struct btrfs_trans_handle *trans, btrfs_cpu_key_to_disk(&disk_key, &batch->keys[i]); btrfs_set_item_key(leaf, &disk_key, slot + i); data_end -= batch->data_sizes[i]; - btrfs_set_token_item_offset(&token, slot + i, data_end); - btrfs_set_token_item_size(&token, slot + i, batch->data_sizes[i]); + btrfs_set_item_offset(leaf, slot + i, data_end); + btrfs_set_item_size(leaf, slot + i, batch->data_sizes[i]); } btrfs_set_header_nritems(leaf, nritems + batch->nr); @@ -4442,7 +4439,7 @@ static noinline int btrfs_del_leaf(struct btrfs_trans_handle *trans, root_sub_used_bytes(root); - atomic_inc(&leaf->refs); + refcount_inc(&leaf->refs); ret = btrfs_free_tree_block(trans, btrfs_root_id(root), leaf, 0, 1); free_extent_buffer_stale(leaf); if (ret < 0) @@ -4469,7 +4466,6 @@ int btrfs_del_items(struct btrfs_trans_handle *trans, struct btrfs_root *root, if (slot + nr != nritems) { const u32 last_off = btrfs_item_offset(leaf, slot + nr - 1); const int data_end = leaf_data_end(leaf); - struct btrfs_map_token token; u32 dsize = 0; int i; @@ -4479,12 +4475,11 @@ int btrfs_del_items(struct btrfs_trans_handle *trans, struct btrfs_root *root, memmove_leaf_data(leaf, data_end + dsize, data_end, last_off - data_end); - btrfs_init_map_token(&token, leaf); for (i = slot + nr; i < nritems; i++) { u32 ioff; - ioff = btrfs_token_item_offset(&token, i); - btrfs_set_token_item_offset(&token, i, ioff + dsize); + ioff = btrfs_item_offset(leaf, i); + btrfs_set_item_offset(leaf, i, ioff + dsize); } memmove_leaf_items(leaf, slot, slot + nr, nritems - slot - nr); @@ -4527,7 +4522,7 @@ int btrfs_del_items(struct btrfs_trans_handle *trans, struct btrfs_root *root, * for possible call to btrfs_del_ptr below */ slot = path->slots[1]; - atomic_inc(&leaf->refs); + refcount_inc(&leaf->refs); /* * We want to be able to at least push one item to the * left neighbour leaf, and that's the first item. @@ -4585,16 +4580,13 @@ int btrfs_del_items(struct btrfs_trans_handle *trans, struct btrfs_root *root, /* * A helper function to walk down the tree starting at min_key, and looking - * for nodes or leaves that are have a minimum transaction id. + * for leaves that have a minimum transaction id. * This is used by the btree defrag code, and tree logging * * This does not cow, but it does stuff the starting key it finds back * into min_key, so you can call btrfs_search_slot with cow=1 on the * key and get a writable path. * - * This honors path->lowest_level to prevent descent past a given level - * of the tree. - * * min_trans indicates the oldest transaction that you are interested * in walking through. Any nodes or leaves older than min_trans are * skipped over (without reading them). @@ -4615,6 +4607,7 @@ int btrfs_search_forward(struct btrfs_root *root, struct btrfs_key *min_key, int keep_locks = path->keep_locks; ASSERT(!path->nowait); + ASSERT(path->lowest_level == 0); path->keep_locks = 1; again: cur = btrfs_read_lock_root_node(root); @@ -4636,8 +4629,8 @@ again: goto out; } - /* at the lowest level, we're done, setup the path and exit */ - if (level == path->lowest_level) { + /* At level 0 we're done, setup the path and exit. */ + if (level == 0) { if (slot >= nritems) goto find_next_key; ret = 0; @@ -4678,12 +4671,6 @@ find_next_key: goto out; } } - if (level == path->lowest_level) { - ret = 0; - /* Save our key for returning back. */ - btrfs_node_key_to_cpu(cur, min_key, slot); - goto out; - } cur = btrfs_read_node_slot(cur, slot); if (IS_ERR(cur)) { ret = PTR_ERR(cur); @@ -4699,7 +4686,7 @@ find_next_key: out: path->keep_locks = keep_locks; if (ret == 0) - btrfs_unlock_up_safe(path, path->lowest_level + 1); + btrfs_unlock_up_safe(path, 1); return ret; } diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 71fa42ca04fe..fe70b593c7cd 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -224,16 +224,10 @@ struct btrfs_root { struct list_head root_list; - /* - * Xarray that keeps track of in-memory inodes, protected by the lock - * @inode_lock. - */ + /* Xarray that keeps track of in-memory inodes. */ struct xarray inodes; - /* - * Xarray that keeps track of delayed nodes of every inode, protected - * by @inode_lock. - */ + /* Xarray that keeps track of delayed nodes of every inode. */ struct xarray delayed_nodes; /* * right now this just gets used so that a root has its own devid @@ -508,7 +502,7 @@ static inline u32 BTRFS_MAX_XATTR_SIZE(const struct btrfs_fs_info *info) int __init btrfs_ctree_init(void); void __cold btrfs_ctree_exit(void); -int btrfs_bin_search(struct extent_buffer *eb, int first_slot, +int btrfs_bin_search(const struct extent_buffer *eb, int first_slot, const struct btrfs_key *key, int *slot); int __pure btrfs_comp_cpu_keys(const struct btrfs_key *k1, const struct btrfs_key *k2); @@ -576,9 +570,9 @@ int btrfs_copy_root(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct extent_buffer *buf, struct extent_buffer **cow_ret, u64 new_root_objectid); -bool btrfs_block_can_be_shared(struct btrfs_trans_handle *trans, - struct btrfs_root *root, - struct extent_buffer *buf); +bool btrfs_block_can_be_shared(const struct btrfs_trans_handle *trans, + const struct btrfs_root *root, + const struct extent_buffer *buf); int btrfs_del_ptr(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_path *path, int level, int slot); void btrfs_extend_item(struct btrfs_trans_handle *trans, @@ -727,13 +721,18 @@ static inline int btrfs_next_item(struct btrfs_root *root, struct btrfs_path *p) } int btrfs_leaf_free_space(const struct extent_buffer *leaf); -static inline int is_fstree(u64 rootid) +static inline bool btrfs_is_fstree(u64 rootid) { - if (rootid == BTRFS_FS_TREE_OBJECTID || - ((s64)rootid >= (s64)BTRFS_FIRST_FREE_OBJECTID && - !btrfs_qgroup_level(rootid))) - return 1; - return 0; + if (rootid == BTRFS_FS_TREE_OBJECTID) + return true; + + if ((s64)rootid < (s64)BTRFS_FIRST_FREE_OBJECTID) + return false; + + if (btrfs_qgroup_level(rootid) != 0) + return false; + + return true; } static inline bool btrfs_is_data_reloc_root(const struct btrfs_root *root) diff --git a/fs/btrfs/defrag.c b/fs/btrfs/defrag.c index 1831618579cb..738179a5e170 100644 --- a/fs/btrfs/defrag.c +++ b/fs/btrfs/defrag.c @@ -60,6 +60,14 @@ static int compare_inode_defrag(const struct inode_defrag *defrag1, return 0; } +static int inode_defrag_cmp(struct rb_node *new, const struct rb_node *existing) +{ + const struct inode_defrag *new_defrag = rb_entry(new, struct inode_defrag, rb_node); + const struct inode_defrag *existing_defrag = rb_entry(existing, struct inode_defrag, rb_node); + + return compare_inode_defrag(new_defrag, existing_defrag); +} + /* * Insert a record for an inode into the defrag tree. The lock must be held * already. @@ -71,37 +79,23 @@ static int btrfs_insert_inode_defrag(struct btrfs_inode *inode, struct inode_defrag *defrag) { struct btrfs_fs_info *fs_info = inode->root->fs_info; - struct inode_defrag *entry; - struct rb_node **p; - struct rb_node *parent = NULL; - int ret; + struct rb_node *node; - p = &fs_info->defrag_inodes.rb_node; - while (*p) { - parent = *p; - entry = rb_entry(parent, struct inode_defrag, rb_node); + node = rb_find_add(&defrag->rb_node, &fs_info->defrag_inodes, inode_defrag_cmp); + if (node) { + struct inode_defrag *entry; - ret = compare_inode_defrag(defrag, entry); - if (ret < 0) - p = &parent->rb_left; - else if (ret > 0) - p = &parent->rb_right; - else { - /* - * If we're reinserting an entry for an old defrag run, - * make sure to lower the transid of our existing - * record. - */ - if (defrag->transid < entry->transid) - entry->transid = defrag->transid; - entry->extent_thresh = min(defrag->extent_thresh, - entry->extent_thresh); - return -EEXIST; - } + entry = rb_entry(node, struct inode_defrag, rb_node); + /* + * If we're reinserting an entry for an old defrag run, make + * sure to lower the transid of our existing record. + */ + if (defrag->transid < entry->transid) + entry->transid = defrag->transid; + entry->extent_thresh = min(defrag->extent_thresh, entry->extent_thresh); + return -EEXIST; } set_bit(BTRFS_INODE_IN_DEFRAG, &inode->runtime_flags); - rb_link_node(&defrag->rb_node, parent, p); - rb_insert_color(&defrag->rb_node, &fs_info->defrag_inodes); return 0; } @@ -854,8 +848,8 @@ static struct folio *defrag_prepare_one_folio(struct btrfs_inode *inode, pgoff_t { struct address_space *mapping = inode->vfs_inode.i_mapping; gfp_t mask = btrfs_alloc_write_mask(mapping); - u64 folio_start; - u64 folio_end; + u64 lock_start; + u64 lock_end; struct extent_state *cached_state = NULL; struct folio *folio; int ret; @@ -891,15 +885,15 @@ again: return ERR_PTR(ret); } - folio_start = folio_pos(folio); - folio_end = folio_pos(folio) + folio_size(folio) - 1; + lock_start = folio_pos(folio); + lock_end = folio_end(folio) - 1; /* Wait for any existing ordered extent in the range */ while (1) { struct btrfs_ordered_extent *ordered; - btrfs_lock_extent(&inode->io_tree, folio_start, folio_end, &cached_state); - ordered = btrfs_lookup_ordered_range(inode, folio_start, folio_size(folio)); - btrfs_unlock_extent(&inode->io_tree, folio_start, folio_end, &cached_state); + btrfs_lock_extent(&inode->io_tree, lock_start, lock_end, &cached_state); + ordered = btrfs_lookup_ordered_range(inode, lock_start, folio_size(folio)); + btrfs_unlock_extent(&inode->io_tree, lock_start, lock_end, &cached_state); if (!ordered) break; @@ -953,7 +947,7 @@ struct defrag_target_range { * @extent_thresh: file extent size threshold, any extent size >= this value * will be ignored * @newer_than: only defrag extents newer than this value - * @do_compress: whether the defrag is doing compression + * @do_compress: whether the defrag is doing compression or no-compression * if true, @extent_thresh will be ignored and all regular * file extents meeting @newer_than will be targets. * @locked: if the range has already held extent lock @@ -1184,8 +1178,7 @@ static int defrag_one_locked_target(struct btrfs_inode *inode, if (!folio) break; - if (start >= folio_pos(folio) + folio_size(folio) || - start + len <= folio_pos(folio)) + if (start >= folio_end(folio) || start + len <= folio_pos(folio)) continue; btrfs_folio_clamp_clear_checked(fs_info, folio, start, len); btrfs_folio_clamp_set_dirty(fs_info, folio, start, len); @@ -1226,7 +1219,7 @@ static int defrag_one_range(struct btrfs_inode *inode, u64 start, u32 len, folios[i] = NULL; goto free_folios; } - cur = folio_pos(folios[i]) + folio_size(folios[i]); + cur = folio_end(folios[i]); } for (int i = 0; i < nr_pages; i++) { if (!folios[i]) @@ -1371,6 +1364,7 @@ int btrfs_defrag_file(struct btrfs_inode *inode, struct file_ra_state *ra, u64 cur; u64 last_byte; bool do_compress = (range->flags & BTRFS_DEFRAG_RANGE_COMPRESS); + bool no_compress = (range->flags & BTRFS_DEFRAG_RANGE_NOCOMPRESS); int compress_type = BTRFS_COMPRESS_ZLIB; int compress_level = 0; int ret = 0; @@ -1401,6 +1395,9 @@ int btrfs_defrag_file(struct btrfs_inode *inode, struct file_ra_state *ra, if (range->compress_type) compress_type = range->compress_type; } + } else if (range->flags & BTRFS_DEFRAG_RANGE_NOCOMPRESS) { + compress_type = BTRFS_DEFRAG_DONT_COMPRESS; + compress_level = 1; } if (extent_thresh == 0) @@ -1451,13 +1448,14 @@ int btrfs_defrag_file(struct btrfs_inode *inode, struct file_ra_state *ra, btrfs_inode_unlock(inode, 0); break; } - if (do_compress) { + if (do_compress || no_compress) { inode->defrag_compress = compress_type; inode->defrag_compress_level = compress_level; } ret = defrag_one_cluster(inode, ra, cur, cluster_end + 1 - cur, extent_thresh, - newer_than, do_compress, §ors_defragged, + newer_than, do_compress || no_compress, + §ors_defragged, max_to_defrag, &last_scanned); if (sectors_defragged > prev_sectors_defragged) @@ -1496,7 +1494,7 @@ int btrfs_defrag_file(struct btrfs_inode *inode, struct file_ra_state *ra, btrfs_set_fs_incompat(fs_info, COMPRESS_ZSTD); ret = sectors_defragged; } - if (do_compress) { + if (do_compress || no_compress) { btrfs_inode_lock(inode, 0); inode->defrag_compress = BTRFS_COMPRESS_NONE; btrfs_inode_unlock(inode, 0); diff --git a/fs/btrfs/delayed-inode.c b/fs/btrfs/delayed-inode.c index 8c597fa60523..0f8d8e275143 100644 --- a/fs/btrfs/delayed-inode.c +++ b/fs/btrfs/delayed-inode.c @@ -334,6 +334,20 @@ static struct btrfs_delayed_item *btrfs_alloc_delayed_item(u16 data_len, return item; } +static int delayed_item_index_cmp(const void *key, const struct rb_node *node) +{ + const u64 *index = key; + const struct btrfs_delayed_item *delayed_item = rb_entry(node, + struct btrfs_delayed_item, rb_node); + + if (delayed_item->index < *index) + return 1; + else if (delayed_item->index > *index) + return -1; + + return 0; +} + /* * Look up the delayed item by key. * @@ -347,21 +361,10 @@ static struct btrfs_delayed_item *__btrfs_lookup_delayed_item( struct rb_root *root, u64 index) { - struct rb_node *node = root->rb_node; - struct btrfs_delayed_item *delayed_item = NULL; - - while (node) { - delayed_item = rb_entry(node, struct btrfs_delayed_item, - rb_node); - if (delayed_item->index < index) - node = node->rb_right; - else if (delayed_item->index > index) - node = node->rb_left; - else - return delayed_item; - } + struct rb_node *node; - return NULL; + node = rb_find(&index, root, delayed_item_index_cmp); + return rb_entry_safe(node, struct btrfs_delayed_item, rb_node); } static int btrfs_delayed_item_cmp(const struct rb_node *new, @@ -369,14 +372,8 @@ static int btrfs_delayed_item_cmp(const struct rb_node *new, { const struct btrfs_delayed_item *new_item = rb_entry(new, struct btrfs_delayed_item, rb_node); - const struct btrfs_delayed_item *exist_item = - rb_entry(exist, struct btrfs_delayed_item, rb_node); - if (new_item->index < exist_item->index) - return -1; - if (new_item->index > exist_item->index) - return 1; - return 0; + return delayed_item_index_cmp(&new_item->index, exist); } static int __btrfs_add_delayed_item(struct btrfs_delayed_node *delayed_node, @@ -1008,8 +1005,16 @@ static int __btrfs_update_delayed_inode(struct btrfs_trans_handle *trans, ret = btrfs_lookup_inode(trans, root, path, &key, mod); if (ret > 0) ret = -ENOENT; - if (ret < 0) + if (ret < 0) { + /* + * If we fail to update the delayed inode we need to abort the + * transaction, because we could leave the inode with the + * improper counts behind. + */ + if (ret != -ENOENT) + btrfs_abort_transaction(trans, ret); goto out; + } leaf = path->nodes[0]; inode_item = btrfs_item_ptr(leaf, path->slots[0], @@ -1034,8 +1039,10 @@ static int __btrfs_update_delayed_inode(struct btrfs_trans_handle *trans, btrfs_release_path(path); ret = btrfs_search_slot(trans, root, &key, path, -1, 1); - if (ret < 0) + if (ret < 0) { + btrfs_abort_transaction(trans, ret); goto err_out; + } ASSERT(ret > 0); ASSERT(path->slots[0] > 0); ret = 0; @@ -1057,21 +1064,14 @@ static int __btrfs_update_delayed_inode(struct btrfs_trans_handle *trans, * in the same item doesn't exist. */ ret = btrfs_del_item(trans, root, path); + if (ret < 0) + btrfs_abort_transaction(trans, ret); out: btrfs_release_delayed_iref(node); btrfs_release_path(path); err_out: btrfs_delayed_inode_release_metadata(fs_info, node, (ret < 0)); btrfs_release_delayed_inode(node); - - /* - * If we fail to update the delayed inode we need to abort the - * transaction, because we could leave the inode with the improper - * counts behind. - */ - if (ret && ret != -ENOENT) - btrfs_abort_transaction(trans, ret); - return ret; } @@ -1540,8 +1540,8 @@ release_node: return ret; } -static int btrfs_delete_delayed_insertion_item(struct btrfs_delayed_node *node, - u64 index) +static bool btrfs_delete_delayed_insertion_item(struct btrfs_delayed_node *node, + u64 index) { struct btrfs_delayed_item *item; @@ -1549,7 +1549,7 @@ static int btrfs_delete_delayed_insertion_item(struct btrfs_delayed_node *node, item = __btrfs_lookup_delayed_item(&node->ins_root.rb_root, index); if (!item) { mutex_unlock(&node->mutex); - return 1; + return false; } /* @@ -1584,7 +1584,7 @@ static int btrfs_delete_delayed_insertion_item(struct btrfs_delayed_node *node, } mutex_unlock(&node->mutex); - return 0; + return true; } int btrfs_delete_delayed_dir_index(struct btrfs_trans_handle *trans, @@ -1598,9 +1598,10 @@ int btrfs_delete_delayed_dir_index(struct btrfs_trans_handle *trans, if (IS_ERR(node)) return PTR_ERR(node); - ret = btrfs_delete_delayed_insertion_item(node, index); - if (!ret) + if (btrfs_delete_delayed_insertion_item(node, index)) { + ret = 0; goto end; + } item = btrfs_alloc_delayed_item(0, node, BTRFS_DELAYED_DELETION_ITEM); if (!item) { @@ -1617,7 +1618,8 @@ int btrfs_delete_delayed_dir_index(struct btrfs_trans_handle *trans, */ if (ret < 0) { btrfs_err(trans->fs_info, -"metadata reservation failed for delayed dir item deltiona, should have been reserved"); +"metadata reservation failed for delayed dir item deletion, index: %llu, root: %llu, inode: %llu, error: %d", + index, btrfs_root_id(node->root), node->inode_id, ret); btrfs_release_delayed_item(item); goto end; } @@ -1626,9 +1628,8 @@ int btrfs_delete_delayed_dir_index(struct btrfs_trans_handle *trans, ret = __btrfs_add_delayed_item(node, item); if (unlikely(ret)) { btrfs_err(trans->fs_info, - "err add delayed dir index item(index: %llu) into the deletion tree of the delayed node(root id: %llu, inode id: %llu, errno: %d)", - index, btrfs_root_id(node->root), - node->inode_id, ret); +"failed to add delayed dir index item, root: %llu, inode: %llu, index: %llu, error: %d", + index, btrfs_root_id(node->root), node->inode_id, ret); btrfs_delayed_item_release_metadata(dir->root, item); btrfs_release_delayed_item(item); } @@ -1733,17 +1734,16 @@ void btrfs_readdir_put_delayed_items(struct btrfs_inode *inode, downgrade_write(&inode->vfs_inode.i_rwsem); } -int btrfs_should_delete_dir_index(const struct list_head *del_list, - u64 index) +bool btrfs_should_delete_dir_index(const struct list_head *del_list, u64 index) { struct btrfs_delayed_item *curr; - int ret = 0; + bool ret = false; list_for_each_entry(curr, del_list, readdir_list) { if (curr->index > index) break; if (curr->index == index) { - ret = 1; + ret = true; break; } } @@ -1753,15 +1753,14 @@ int btrfs_should_delete_dir_index(const struct list_head *del_list, /* * Read dir info stored in the delayed tree. */ -int btrfs_readdir_delayed_dir_index(struct dir_context *ctx, - const struct list_head *ins_list) +bool btrfs_readdir_delayed_dir_index(struct dir_context *ctx, + const struct list_head *ins_list) { struct btrfs_dir_item *di; struct btrfs_delayed_item *curr, *next; struct btrfs_key location; char *name; int name_len; - int over = 0; unsigned char d_type; /* @@ -1770,6 +1769,8 @@ int btrfs_readdir_delayed_dir_index(struct dir_context *ctx, * directory, nobody can delete any directory indexes now. */ list_for_each_entry_safe(curr, next, ins_list, readdir_list) { + bool over; + list_del(&curr->readdir_list); if (curr->index < ctx->pos) { @@ -1787,17 +1788,16 @@ int btrfs_readdir_delayed_dir_index(struct dir_context *ctx, d_type = fs_ftype_to_dtype(btrfs_dir_flags_to_ftype(di->type)); btrfs_disk_key_to_cpu(&location, &di->location); - over = !dir_emit(ctx, name, name_len, - location.objectid, d_type); + over = !dir_emit(ctx, name, name_len, location.objectid, d_type); if (refcount_dec_and_test(&curr->refs)) kfree(curr); if (over) - return 1; + return true; ctx->pos++; } - return 0; + return false; } static void fill_stack_inode_item(struct btrfs_trans_handle *trans, diff --git a/fs/btrfs/delayed-inode.h b/fs/btrfs/delayed-inode.h index c4b4ba122beb..e6e763ad2d42 100644 --- a/fs/btrfs/delayed-inode.h +++ b/fs/btrfs/delayed-inode.h @@ -150,10 +150,9 @@ bool btrfs_readdir_get_delayed_items(struct btrfs_inode *inode, void btrfs_readdir_put_delayed_items(struct btrfs_inode *inode, struct list_head *ins_list, struct list_head *del_list); -int btrfs_should_delete_dir_index(const struct list_head *del_list, - u64 index); -int btrfs_readdir_delayed_dir_index(struct dir_context *ctx, - const struct list_head *ins_list); +bool btrfs_should_delete_dir_index(const struct list_head *del_list, u64 index); +bool btrfs_readdir_delayed_dir_index(struct dir_context *ctx, + const struct list_head *ins_list); /* Used during directory logging. */ void btrfs_log_get_delayed_items(struct btrfs_inode *inode, diff --git a/fs/btrfs/delayed-ref.c b/fs/btrfs/delayed-ref.c index 739c9e29aaa3..ca382c5b186f 100644 --- a/fs/btrfs/delayed-ref.c +++ b/fs/btrfs/delayed-ref.c @@ -928,7 +928,7 @@ static void init_delayed_ref_common(struct btrfs_fs_info *fs_info, if (action == BTRFS_ADD_DELAYED_EXTENT) action = BTRFS_ADD_DELAYED_REF; - if (is_fstree(generic_ref->ref_root)) + if (btrfs_is_fstree(generic_ref->ref_root)) seq = atomic64_read(&fs_info->tree_mod_seq); refcount_set(&ref->refs, 1); @@ -958,8 +958,8 @@ void btrfs_init_tree_ref(struct btrfs_ref *generic_ref, int level, u64 mod_root, #endif generic_ref->tree_ref.level = level; generic_ref->type = BTRFS_REF_METADATA; - if (skip_qgroup || !(is_fstree(generic_ref->ref_root) && - (!mod_root || is_fstree(mod_root)))) + if (skip_qgroup || !(btrfs_is_fstree(generic_ref->ref_root) && + (!mod_root || btrfs_is_fstree(mod_root)))) generic_ref->skip_qgroup = true; else generic_ref->skip_qgroup = false; @@ -976,8 +976,8 @@ void btrfs_init_data_ref(struct btrfs_ref *generic_ref, u64 ino, u64 offset, generic_ref->data_ref.objectid = ino; generic_ref->data_ref.offset = offset; generic_ref->type = BTRFS_REF_DATA; - if (skip_qgroup || !(is_fstree(generic_ref->ref_root) && - (!mod_root || is_fstree(mod_root)))) + if (skip_qgroup || !(btrfs_is_fstree(generic_ref->ref_root) && + (!mod_root || btrfs_is_fstree(mod_root)))) generic_ref->skip_qgroup = true; else generic_ref->skip_qgroup = false; diff --git a/fs/btrfs/delayed-ref.h b/fs/btrfs/delayed-ref.h index 78cc23837610..552ec4fa645d 100644 --- a/fs/btrfs/delayed-ref.h +++ b/fs/btrfs/delayed-ref.h @@ -420,7 +420,7 @@ bool btrfs_find_delayed_tree_ref(struct btrfs_delayed_ref_head *head, u64 root, u64 parent); void btrfs_destroy_delayed_refs(struct btrfs_transaction *trans); -static inline u64 btrfs_delayed_ref_owner(struct btrfs_delayed_ref_node *node) +static inline u64 btrfs_delayed_ref_owner(const struct btrfs_delayed_ref_node *node) { if (node->type == BTRFS_EXTENT_DATA_REF_KEY || node->type == BTRFS_SHARED_DATA_REF_KEY) @@ -428,7 +428,7 @@ static inline u64 btrfs_delayed_ref_owner(struct btrfs_delayed_ref_node *node) return node->tree_ref.level; } -static inline u64 btrfs_delayed_ref_offset(struct btrfs_delayed_ref_node *node) +static inline u64 btrfs_delayed_ref_offset(const struct btrfs_delayed_ref_node *node) { if (node->type == BTRFS_EXTENT_DATA_REF_KEY || node->type == BTRFS_SHARED_DATA_REF_KEY) @@ -436,7 +436,7 @@ static inline u64 btrfs_delayed_ref_offset(struct btrfs_delayed_ref_node *node) return 0; } -static inline u8 btrfs_ref_type(struct btrfs_ref *ref) +static inline u8 btrfs_ref_type(const struct btrfs_ref *ref) { ASSERT(ref->type == BTRFS_REF_DATA || ref->type == BTRFS_REF_METADATA); diff --git a/fs/btrfs/dev-replace.c b/fs/btrfs/dev-replace.c index 2decb9fff445..4675bcd5f92e 100644 --- a/fs/btrfs/dev-replace.c +++ b/fs/btrfs/dev-replace.c @@ -250,7 +250,7 @@ static int btrfs_init_dev_replace_tgtdev(struct btrfs_fs_info *fs_info, } bdev_file = bdev_file_open_by_path(device_path, BLK_OPEN_WRITE, - fs_info->bdev_holder, NULL); + fs_info->sb, &fs_holder_ops); if (IS_ERR(bdev_file)) { btrfs_err(fs_info, "target device %s is invalid!", device_path); return PTR_ERR(bdev_file); @@ -327,7 +327,7 @@ static int btrfs_init_dev_replace_tgtdev(struct btrfs_fs_info *fs_info, return 0; error: - fput(bdev_file); + bdev_fput(bdev_file); return ret; } @@ -600,7 +600,7 @@ static int btrfs_dev_replace_start(struct btrfs_fs_info *fs_info, return PTR_ERR(src_device); if (btrfs_pinned_by_swapfile(fs_info, src_device)) { - btrfs_warn_in_rcu(fs_info, + btrfs_warn(fs_info, "cannot replace device %s (devid %llu) due to active swapfile", btrfs_dev_name(src_device), src_device->devid); return -ETXTBSY; @@ -647,7 +647,7 @@ static int btrfs_dev_replace_start(struct btrfs_fs_info *fs_info, dev_replace->srcdev = src_device; dev_replace->tgtdev = tgt_device; - btrfs_info_in_rcu(fs_info, + btrfs_info(fs_info, "dev_replace from %s (devid %llu) to %s started", btrfs_dev_name(src_device), src_device->devid, @@ -943,7 +943,7 @@ static int btrfs_dev_replace_finishing(struct btrfs_fs_info *fs_info, tgt_device); } else { if (scrub_ret != -ECANCELED) - btrfs_err_in_rcu(fs_info, + btrfs_err(fs_info, "btrfs_scrub_dev(%s, %llu, %s) failed %d", btrfs_dev_name(src_device), src_device->devid, @@ -961,7 +961,7 @@ error: return scrub_ret; } - btrfs_info_in_rcu(fs_info, + btrfs_info(fs_info, "dev_replace from %s (devid %llu) to %s finished", btrfs_dev_name(src_device), src_device->devid, @@ -1109,7 +1109,7 @@ int btrfs_dev_replace_cancel(struct btrfs_fs_info *fs_info) * btrfs_dev_replace_finishing() will handle the * cleanup part */ - btrfs_info_in_rcu(fs_info, + btrfs_info(fs_info, "dev_replace from %s (devid %llu) to %s canceled", btrfs_dev_name(src_device), src_device->devid, btrfs_dev_name(tgt_device)); @@ -1143,7 +1143,7 @@ int btrfs_dev_replace_cancel(struct btrfs_fs_info *fs_info) ret = btrfs_commit_transaction(trans); WARN_ON(ret); - btrfs_info_in_rcu(fs_info, + btrfs_info(fs_info, "suspended dev_replace from %s (devid %llu) to %s canceled", btrfs_dev_name(src_device), src_device->devid, btrfs_dev_name(tgt_device)); @@ -1247,7 +1247,7 @@ static int btrfs_dev_replace_kthread(void *data) progress = btrfs_dev_replace_progress(fs_info); progress = div_u64(progress, 10); - btrfs_info_in_rcu(fs_info, + btrfs_info(fs_info, "continuing dev_replace from %s (devid %llu) to target %s @%u%%", btrfs_dev_name(dev_replace->srcdev), dev_replace->srcdev->devid, diff --git a/fs/btrfs/dir-item.c b/fs/btrfs/dir-item.c index b29cc31a7c4a..69863e398e22 100644 --- a/fs/btrfs/dir-item.c +++ b/fs/btrfs/dir-item.c @@ -227,7 +227,7 @@ struct btrfs_dir_item *btrfs_lookup_dir_item(struct btrfs_trans_handle *trans, return di; } -int btrfs_check_dir_item_collision(struct btrfs_root *root, u64 dir, +int btrfs_check_dir_item_collision(struct btrfs_root *root, u64 dir_ino, const struct fscrypt_str *name) { int ret; @@ -242,7 +242,7 @@ int btrfs_check_dir_item_collision(struct btrfs_root *root, u64 dir, if (!path) return -ENOMEM; - key.objectid = dir; + key.objectid = dir_ino; key.type = BTRFS_DIR_ITEM_KEY; key.offset = btrfs_name_hash(name->name, name->len); diff --git a/fs/btrfs/dir-item.h b/fs/btrfs/dir-item.h index 8462579a95f4..e52174a8baf9 100644 --- a/fs/btrfs/dir-item.h +++ b/fs/btrfs/dir-item.h @@ -14,7 +14,7 @@ struct btrfs_inode; struct btrfs_root; struct btrfs_trans_handle; -int btrfs_check_dir_item_collision(struct btrfs_root *root, u64 dir, +int btrfs_check_dir_item_collision(struct btrfs_root *root, u64 dir_ino, const struct fscrypt_str *name); int btrfs_insert_dir_item(struct btrfs_trans_handle *trans, const struct fscrypt_str *name, struct btrfs_inode *dir, diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 0d6ad7512f21..9cc14ab35297 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -884,7 +884,7 @@ struct btrfs_root *btrfs_create_tree(struct btrfs_trans_handle *trans, btrfs_set_root_used(&root->root_item, leaf->len); btrfs_set_root_last_snapshot(&root->root_item, 0); btrfs_set_root_dirid(&root->root_item, 0); - if (is_fstree(objectid)) + if (btrfs_is_fstree(objectid)) generate_random_guid(root->root_item.uuid); else export_guid(root->root_item.uuid, &guid_null); @@ -1104,7 +1104,7 @@ static int btrfs_init_fs_root(struct btrfs_root *root, dev_t anon_dev) if (btrfs_root_id(root) != BTRFS_TREE_LOG_OBJECTID && !btrfs_is_data_reloc_root(root) && - is_fstree(btrfs_root_id(root))) { + btrfs_is_fstree(btrfs_root_id(root))) { set_bit(BTRFS_ROOT_SHAREABLE, &root->state); btrfs_check_and_init_root_item(&root->root_item); } @@ -1113,7 +1113,7 @@ static int btrfs_init_fs_root(struct btrfs_root *root, dev_t anon_dev) * Don't assign anonymous block device to roots that are not exposed to * userspace, the id pool is limited to 1M */ - if (is_fstree(btrfs_root_id(root)) && + if (btrfs_is_fstree(btrfs_root_id(root)) && btrfs_root_refs(&root->root_item) > 0) { if (!anon_dev) { ret = get_anon_bdev(&root->anon_dev); @@ -1246,6 +1246,8 @@ void btrfs_free_fs_info(struct btrfs_fs_info *fs_info) { struct percpu_counter *em_counter = &fs_info->evictable_extent_maps; + if (fs_info->fs_devices) + btrfs_close_devices(fs_info->fs_devices); percpu_counter_destroy(&fs_info->stats_read_blocks); percpu_counter_destroy(&fs_info->dirty_metadata_bytes); percpu_counter_destroy(&fs_info->delalloc_bytes); @@ -1315,7 +1317,7 @@ static struct btrfs_root *btrfs_get_root_ref(struct btrfs_fs_info *fs_info, * This is namely for free-space-tree and quota tree, which can change * at runtime and should only be grabbed from fs_info. */ - if (!is_fstree(objectid) && objectid != BTRFS_DATA_RELOC_TREE_OBJECTID) + if (!btrfs_is_fstree(objectid) && objectid != BTRFS_DATA_RELOC_TREE_OBJECTID) return ERR_PTR(-ENOENT); again: root = btrfs_lookup_fs_root(fs_info, objectid); @@ -1947,7 +1949,6 @@ static void btrfs_init_qgroup(struct btrfs_fs_info *fs_info) fs_info->qgroup_tree = RB_ROOT; INIT_LIST_HEAD(&fs_info->dirty_qgroups); fs_info->qgroup_seq = 1; - fs_info->qgroup_ulist = NULL; fs_info->qgroup_rescan_running = false; fs_info->qgroup_drop_subtree_thres = BTRFS_QGROUP_DROP_SUBTREE_THRES_DEFAULT; mutex_init(&fs_info->qgroup_rescan_lock); @@ -3396,6 +3397,7 @@ int __cold open_ctree(struct super_block *sb, struct btrfs_fs_devices *fs_device fs_info->delalloc_batch = sectorsize * 512 * (1 + ilog2(nr_cpu_ids)); fs_info->nodesize = nodesize; + fs_info->nodesize_bits = ilog2(nodesize); fs_info->sectorsize = sectorsize; fs_info->sectorsize_bits = ilog2(sectorsize); fs_info->csums_per_leaf = BTRFS_MAX_ITEM_SIZE(fs_info) / fs_info->csum_size; @@ -3561,6 +3563,7 @@ int __cold open_ctree(struct super_block *sb, struct btrfs_fs_devices *fs_device goto fail_sysfs; } + btrfs_zoned_reserve_data_reloc_bg(fs_info); btrfs_free_zone_cache(fs_info); btrfs_check_active_zone_reservation(fs_info); @@ -3681,7 +3684,6 @@ fail_alloc: iput(fs_info->btree_inode); fail: - btrfs_close_devices(fs_info->fs_devices); ASSERT(ret < 0); return ret; } @@ -3694,7 +3696,7 @@ static void btrfs_end_super_write(struct bio *bio) bio_for_each_folio_all(fi, bio) { if (bio->bi_status) { - btrfs_warn_rl_in_rcu(device->fs_info, + btrfs_warn_rl(device->fs_info, "lost super block write due to IO error on %s (%d)", btrfs_dev_name(device), blk_status_to_errno(bio->bi_status)); @@ -3992,7 +3994,7 @@ int btrfs_get_num_tolerated_disk_barrier_failures(u64 flags) } if (min_tolerated == INT_MAX) { - pr_warn("BTRFS: unknown raid flag: %llu", flags); + btrfs_warn(NULL, "unknown raid flag: %llu", flags); min_tolerated = 0; } @@ -4428,7 +4430,6 @@ void __cold close_ctree(struct btrfs_fs_info *fs_info) iput(fs_info->btree_inode); btrfs_mapping_tree_free(fs_info); - btrfs_close_devices(fs_info->fs_devices); } void btrfs_mark_buffer_dirty(struct btrfs_trans_handle *trans, @@ -4640,7 +4641,7 @@ static void btrfs_destroy_marked_extents(struct btrfs_fs_info *fs_info, while (btrfs_find_first_extent_bit(dirty_pages, start, &start, &end, mark, NULL)) { - btrfs_clear_extent_bits(dirty_pages, start, end, mark); + btrfs_clear_extent_bit(dirty_pages, start, end, mark, NULL); while (start <= end) { eb = find_extent_buffer(fs_info, start); start += fs_info->nodesize; diff --git a/fs/btrfs/extent-io-tree.c b/fs/btrfs/extent-io-tree.c index b1b96eb5f64e..66361325f6dc 100644 --- a/fs/btrfs/extent-io-tree.c +++ b/fs/btrfs/extent-io-tree.c @@ -43,7 +43,8 @@ static inline void btrfs_extent_state_leak_debug_check(void) while (!list_empty(&states)) { state = list_first_entry(&states, struct extent_state, leak_list); - pr_err("BTRFS: state leak: start %llu end %llu state %u in tree %d refs %d\n", + btrfs_err(NULL, + "state leak: start %llu end %llu state %u in tree %d refs %d", state->start, state->end, state->state, extent_state_in_tree(state), refcount_read(&state->refs)); @@ -1882,12 +1883,11 @@ int btrfs_clear_record_extent_bits(struct extent_io_tree *tree, u64 start, u64 e bool btrfs_try_lock_extent_bits(struct extent_io_tree *tree, u64 start, u64 end, u32 bits, struct extent_state **cached) { - int err; + int ret; u64 failed_start; - err = set_extent_bit(tree, start, end, bits, &failed_start, NULL, - cached, NULL); - if (err == -EEXIST) { + ret = set_extent_bit(tree, start, end, bits, &failed_start, NULL, cached, NULL); + if (ret == -EEXIST) { if (failed_start > start) btrfs_clear_extent_bit(tree, start, failed_start - 1, bits, cached); @@ -1904,21 +1904,21 @@ int btrfs_lock_extent_bits(struct extent_io_tree *tree, u64 start, u64 end, u32 struct extent_state **cached_state) { struct extent_state *failed_state = NULL; - int err; + int ret; u64 failed_start; - err = set_extent_bit(tree, start, end, bits, &failed_start, + ret = set_extent_bit(tree, start, end, bits, &failed_start, &failed_state, cached_state, NULL); - while (err == -EEXIST) { + while (ret == -EEXIST) { if (failed_start != start) btrfs_clear_extent_bit(tree, start, failed_start - 1, bits, cached_state); wait_extent_bit(tree, failed_start, end, bits, &failed_state); - err = set_extent_bit(tree, start, end, bits, &failed_start, + ret = set_extent_bit(tree, start, end, bits, &failed_start, &failed_state, cached_state, NULL); } - return err; + return ret; } /* diff --git a/fs/btrfs/extent-io-tree.h b/fs/btrfs/extent-io-tree.h index 0a18ca9c59c3..36facca37973 100644 --- a/fs/btrfs/extent-io-tree.h +++ b/fs/btrfs/extent-io-tree.h @@ -19,7 +19,8 @@ enum { ENUM_BIT(EXTENT_DIRTY), ENUM_BIT(EXTENT_LOCKED), ENUM_BIT(EXTENT_DIO_LOCKED), - ENUM_BIT(EXTENT_NEW), + ENUM_BIT(EXTENT_DIRTY_LOG1), + ENUM_BIT(EXTENT_DIRTY_LOG2), ENUM_BIT(EXTENT_DELALLOC), ENUM_BIT(EXTENT_DEFRAG), ENUM_BIT(EXTENT_BOUNDARY), @@ -191,12 +192,6 @@ static inline int btrfs_unlock_extent(struct extent_io_tree *tree, u64 start, u6 cached, NULL); } -static inline int btrfs_clear_extent_bits(struct extent_io_tree *tree, u64 start, - u64 end, u32 bits) -{ - return btrfs_clear_extent_bit(tree, start, end, bits, NULL); -} - int btrfs_set_record_extent_bits(struct extent_io_tree *tree, u64 start, u64 end, u32 bits, struct extent_changeset *changeset); int btrfs_set_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index cb6128778a83..97d517cdf2df 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -46,7 +46,7 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans, struct btrfs_delayed_ref_head *href, - struct btrfs_delayed_ref_node *node, + const struct btrfs_delayed_ref_node *node, struct btrfs_delayed_extent_op *extra_op); static void __run_delayed_extent_op(struct btrfs_delayed_extent_op *extent_op, struct extent_buffer *leaf, @@ -56,12 +56,12 @@ static int alloc_reserved_file_extent(struct btrfs_trans_handle *trans, u64 flags, u64 owner, u64 offset, struct btrfs_key *ins, int ref_mod, u64 oref_root); static int alloc_reserved_tree_block(struct btrfs_trans_handle *trans, - struct btrfs_delayed_ref_node *node, + const struct btrfs_delayed_ref_node *node, struct btrfs_delayed_extent_op *extent_op); -static int find_next_key(struct btrfs_path *path, int level, +static int find_next_key(const struct btrfs_path *path, int level, struct btrfs_key *key); -static int block_group_bits(struct btrfs_block_group *cache, u64 bits) +static int block_group_bits(const struct btrfs_block_group *cache, u64 bits) { return (cache->flags & bits) == bits; } @@ -329,7 +329,7 @@ search_again: * is_data == BTRFS_REF_TYPE_ANY, either type is OK. */ int btrfs_get_extent_inline_ref_type(const struct extent_buffer *eb, - struct btrfs_extent_inline_ref *iref, + const struct btrfs_extent_inline_ref *iref, enum btrfs_inline_ref_type is_data) { struct btrfs_fs_info *fs_info = eb->fs_info; @@ -401,16 +401,16 @@ u64 hash_extent_data_ref(u64 root_objectid, u64 owner, u64 offset) return ((u64)high_crc << 31) ^ (u64)low_crc; } -static u64 hash_extent_data_ref_item(struct extent_buffer *leaf, - struct btrfs_extent_data_ref *ref) +static u64 hash_extent_data_ref_item(const struct extent_buffer *leaf, + const struct btrfs_extent_data_ref *ref) { return hash_extent_data_ref(btrfs_extent_data_ref_root(leaf, ref), btrfs_extent_data_ref_objectid(leaf, ref), btrfs_extent_data_ref_offset(leaf, ref)); } -static bool match_extent_data_ref(struct extent_buffer *leaf, - struct btrfs_extent_data_ref *ref, +static bool match_extent_data_ref(const struct extent_buffer *leaf, + const struct btrfs_extent_data_ref *ref, u64 root_objectid, u64 owner, u64 offset) { if (btrfs_extent_data_ref_root(leaf, ref) != root_objectid || @@ -497,7 +497,7 @@ fail: static noinline int insert_extent_data_ref(struct btrfs_trans_handle *trans, struct btrfs_path *path, - struct btrfs_delayed_ref_node *node, + const struct btrfs_delayed_ref_node *node, u64 bytenr) { struct btrfs_root *root = btrfs_extent_root(trans->fs_info, bytenr); @@ -617,13 +617,13 @@ static noinline int remove_extent_data_ref(struct btrfs_trans_handle *trans, return ret; } -static noinline u32 extent_data_ref_count(struct btrfs_path *path, - struct btrfs_extent_inline_ref *iref) +static noinline u32 extent_data_ref_count(const struct btrfs_path *path, + const struct btrfs_extent_inline_ref *iref) { struct btrfs_key key; struct extent_buffer *leaf; - struct btrfs_extent_data_ref *ref1; - struct btrfs_shared_data_ref *ref2; + const struct btrfs_extent_data_ref *ref1; + const struct btrfs_shared_data_ref *ref2; u32 num_refs = 0; int type; @@ -638,10 +638,10 @@ static noinline u32 extent_data_ref_count(struct btrfs_path *path, type = btrfs_get_extent_inline_ref_type(leaf, iref, BTRFS_REF_TYPE_DATA); ASSERT(type != BTRFS_REF_TYPE_INVALID); if (type == BTRFS_EXTENT_DATA_REF_KEY) { - ref1 = (struct btrfs_extent_data_ref *)(&iref->offset); + ref1 = (const struct btrfs_extent_data_ref *)(&iref->offset); num_refs = btrfs_extent_data_ref_count(leaf, ref1); } else { - ref2 = (struct btrfs_shared_data_ref *)(iref + 1); + ref2 = (const struct btrfs_shared_data_ref *)(iref + 1); num_refs = btrfs_shared_data_ref_count(leaf, ref2); } } else if (key.type == BTRFS_EXTENT_DATA_REF_KEY) { @@ -684,7 +684,7 @@ static noinline int lookup_tree_block_ref(struct btrfs_trans_handle *trans, static noinline int insert_tree_block_ref(struct btrfs_trans_handle *trans, struct btrfs_path *path, - struct btrfs_delayed_ref_node *node, + const struct btrfs_delayed_ref_node *node, u64 bytenr) { struct btrfs_root *root = btrfs_extent_root(trans->fs_info, bytenr); @@ -722,7 +722,7 @@ static inline int extent_ref_type(u64 parent, u64 owner) return type; } -static int find_next_key(struct btrfs_path *path, int level, +static int find_next_key(const struct btrfs_path *path, int level, struct btrfs_key *key) { @@ -1480,7 +1480,7 @@ int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans, * */ static int __btrfs_inc_extent_ref(struct btrfs_trans_handle *trans, - struct btrfs_delayed_ref_node *node, + const struct btrfs_delayed_ref_node *node, struct btrfs_delayed_extent_op *extent_op) { BTRFS_PATH_AUTO_FREE(path); @@ -1522,19 +1522,21 @@ static int __btrfs_inc_extent_ref(struct btrfs_trans_handle *trans, btrfs_release_path(path); /* now insert the actual backref */ - if (owner < BTRFS_FIRST_FREE_OBJECTID) + if (owner < BTRFS_FIRST_FREE_OBJECTID) { ret = insert_tree_block_ref(trans, path, node, bytenr); - else + if (ret) + btrfs_abort_transaction(trans, ret); + } else { ret = insert_extent_data_ref(trans, path, node, bytenr); - - if (ret) - btrfs_abort_transaction(trans, ret); + if (ret) + btrfs_abort_transaction(trans, ret); + } return ret; } static void free_head_ref_squota_rsv(struct btrfs_fs_info *fs_info, - struct btrfs_delayed_ref_head *href) + const struct btrfs_delayed_ref_head *href) { u64 root = href->owning_root; @@ -1543,7 +1545,7 @@ static void free_head_ref_squota_rsv(struct btrfs_fs_info *fs_info, * where it has already been unset. */ if (btrfs_qgroup_mode(fs_info) != BTRFS_QGROUP_MODE_SIMPLE || - !href->is_data || !is_fstree(root)) + !href->is_data || !btrfs_is_fstree(root)) return; btrfs_qgroup_free_refroot(fs_info, root, href->reserved_bytes, @@ -1552,7 +1554,7 @@ static void free_head_ref_squota_rsv(struct btrfs_fs_info *fs_info, static int run_delayed_data_ref(struct btrfs_trans_handle *trans, struct btrfs_delayed_ref_head *href, - struct btrfs_delayed_ref_node *node, + const struct btrfs_delayed_ref_node *node, struct btrfs_delayed_extent_op *extent_op, bool insert_reserved) { @@ -1620,7 +1622,7 @@ static void __run_delayed_extent_op(struct btrfs_delayed_extent_op *extent_op, } static int run_delayed_extent_op(struct btrfs_trans_handle *trans, - struct btrfs_delayed_ref_head *head, + const struct btrfs_delayed_ref_head *head, struct btrfs_delayed_extent_op *extent_op) { struct btrfs_fs_info *fs_info = trans->fs_info; @@ -1707,7 +1709,7 @@ again: static int run_delayed_tree_ref(struct btrfs_trans_handle *trans, struct btrfs_delayed_ref_head *href, - struct btrfs_delayed_ref_node *node, + const struct btrfs_delayed_ref_node *node, struct btrfs_delayed_extent_op *extent_op, bool insert_reserved) { @@ -1754,7 +1756,7 @@ static int run_delayed_tree_ref(struct btrfs_trans_handle *trans, /* helper function to actually process a single delayed ref entry */ static int run_one_delayed_ref(struct btrfs_trans_handle *trans, struct btrfs_delayed_ref_head *href, - struct btrfs_delayed_ref_node *node, + const struct btrfs_delayed_ref_node *node, struct btrfs_delayed_extent_op *extent_op, bool insert_reserved) { @@ -2998,7 +3000,7 @@ static int do_free_extent_accounting(struct btrfs_trans_handle *trans, return ret; } - ret = add_to_free_space_tree(trans, bytenr, num_bytes); + ret = btrfs_add_to_free_space_tree(trans, bytenr, num_bytes); if (ret) { btrfs_abort_transaction(trans, ret); return ret; @@ -3079,7 +3081,7 @@ static int do_free_extent_accounting(struct btrfs_trans_handle *trans, */ static int __btrfs_free_extent(struct btrfs_trans_handle *trans, struct btrfs_delayed_ref_head *href, - struct btrfs_delayed_ref_node *node, + const struct btrfs_delayed_ref_node *node, struct btrfs_delayed_extent_op *extent_op) { struct btrfs_fs_info *info = trans->fs_info; @@ -3649,6 +3651,21 @@ btrfs_release_block_group(struct btrfs_block_group *cache, btrfs_put_block_group(cache); } +static bool find_free_extent_check_size_class(const struct find_free_extent_ctl *ffe_ctl, + const struct btrfs_block_group *bg) +{ + if (ffe_ctl->policy == BTRFS_EXTENT_ALLOC_ZONED) + return true; + if (!btrfs_block_group_should_use_size_class(bg)) + return true; + if (ffe_ctl->loop >= LOOP_WRONG_SIZE_CLASS) + return true; + if (ffe_ctl->loop >= LOOP_UNSET_SIZE_CLASS && + bg->size_class == BTRFS_BG_SZ_NONE) + return true; + return ffe_ctl->size_class == bg->size_class; +} + /* * Helper function for find_free_extent(). * @@ -3670,7 +3687,8 @@ static int find_free_extent_clustered(struct btrfs_block_group *bg, if (!cluster_bg) goto refill_cluster; if (cluster_bg != bg && (cluster_bg->ro || - !block_group_bits(cluster_bg, ffe_ctl->flags))) + !block_group_bits(cluster_bg, ffe_ctl->flags) || + !find_free_extent_check_size_class(ffe_ctl, cluster_bg))) goto release_cluster; offset = btrfs_alloc_from_cluster(cluster_bg, last_ptr, @@ -4227,21 +4245,6 @@ static int find_free_extent_update_loop(struct btrfs_fs_info *fs_info, return -ENOSPC; } -static bool find_free_extent_check_size_class(struct find_free_extent_ctl *ffe_ctl, - struct btrfs_block_group *bg) -{ - if (ffe_ctl->policy == BTRFS_EXTENT_ALLOC_ZONED) - return true; - if (!btrfs_block_group_should_use_size_class(bg)) - return true; - if (ffe_ctl->loop >= LOOP_WRONG_SIZE_CLASS) - return true; - if (ffe_ctl->loop >= LOOP_UNSET_SIZE_CLASS && - bg->size_class == BTRFS_BG_SZ_NONE) - return true; - return ffe_ctl->size_class == bg->size_class; -} - static int prepare_allocation_clustered(struct btrfs_fs_info *fs_info, struct find_free_extent_ctl *ffe_ctl, struct btrfs_space_info *space_info, @@ -4782,7 +4785,7 @@ static int alloc_reserved_extent(struct btrfs_trans_handle *trans, u64 bytenr, struct btrfs_fs_info *fs_info = trans->fs_info; int ret; - ret = remove_from_free_space_tree(trans, bytenr, num_bytes); + ret = btrfs_remove_from_free_space_tree(trans, bytenr, num_bytes); if (ret) return ret; @@ -4873,7 +4876,7 @@ static int alloc_reserved_file_extent(struct btrfs_trans_handle *trans, } static int alloc_reserved_tree_block(struct btrfs_trans_handle *trans, - struct btrfs_delayed_ref_node *node, + const struct btrfs_delayed_ref_node *node, struct btrfs_delayed_extent_op *extent_op) { struct btrfs_fs_info *fs_info = trans->fs_info; @@ -4961,7 +4964,7 @@ int btrfs_alloc_reserved_file_extent(struct btrfs_trans_handle *trans, ASSERT(generic_ref.ref_root != BTRFS_TREE_LOG_OBJECTID); - if (btrfs_is_data_reloc_root(root) && is_fstree(root->relocation_src_root)) + if (btrfs_is_data_reloc_root(root) && btrfs_is_fstree(root->relocation_src_root)) generic_ref.owning_root = root->relocation_src_root; btrfs_init_data_ref(&generic_ref, owner, offset, 0, false); @@ -4983,7 +4986,7 @@ int btrfs_alloc_logged_file_extent(struct btrfs_trans_handle *trans, int ret; struct btrfs_block_group *block_group; struct btrfs_space_info *space_info; - struct btrfs_squota_delta delta = { + const struct btrfs_squota_delta delta = { .root = root_objectid, .num_bytes = ins->offset, .generation = trans->transid, @@ -5111,11 +5114,11 @@ btrfs_init_new_buffer(struct btrfs_trans_handle *trans, struct btrfs_root *root, if (buf->log_index == 0) btrfs_set_extent_bit(&root->dirty_log_pages, buf->start, buf->start + buf->len - 1, - EXTENT_DIRTY, NULL); + EXTENT_DIRTY_LOG1, NULL); else btrfs_set_extent_bit(&root->dirty_log_pages, buf->start, buf->start + buf->len - 1, - EXTENT_NEW, NULL); + EXTENT_DIRTY_LOG2, NULL); } else { buf->log_index = -1; btrfs_set_extent_bit(&trans->transaction->dirty_pages, buf->start, @@ -5552,7 +5555,7 @@ again: goto again; } - exists = btrfs_find_delayed_tree_ref(head, root->root_key.objectid, parent); + exists = btrfs_find_delayed_tree_ref(head, btrfs_root_id(root), parent); mutex_unlock(&head->mutex); out: spin_unlock(&delayed_refs->lock); @@ -5872,15 +5875,20 @@ static noinline int walk_up_proc(struct btrfs_trans_handle *trans, if (wc->refs[level] == 1) { if (level == 0) { - if (wc->flags[level] & BTRFS_BLOCK_FLAG_FULL_BACKREF) + if (wc->flags[level] & BTRFS_BLOCK_FLAG_FULL_BACKREF) { ret = btrfs_dec_ref(trans, root, eb, 1); - else + if (ret) { + btrfs_abort_transaction(trans, ret); + return ret; + } + } else { ret = btrfs_dec_ref(trans, root, eb, 0); - if (ret) { - btrfs_abort_transaction(trans, ret); - return ret; + if (ret) { + btrfs_abort_transaction(trans, ret); + return ret; + } } - if (is_fstree(btrfs_root_id(root))) { + if (btrfs_is_fstree(btrfs_root_id(root))) { ret = btrfs_qgroup_trace_leaf_items(trans, eb); if (ret) { btrfs_err_rl(fs_info, @@ -6341,7 +6349,7 @@ int btrfs_drop_subtree(struct btrfs_trans_handle *trans, btrfs_assert_tree_write_locked(parent); parent_level = btrfs_header_level(parent); - atomic_inc(&parent->refs); + refcount_inc(&parent->refs); path->nodes[parent_level] = parent; path->slots[parent_level] = btrfs_header_nritems(parent); @@ -6442,7 +6450,7 @@ static int btrfs_trim_free_extents(struct btrfs_device *device, u64 *trimmed) /* Check if there are any CHUNK_* bits left */ if (start > device->total_bytes) { DEBUG_WARN(); - btrfs_warn_in_rcu(fs_info, + btrfs_warn(fs_info, "ignoring attempt to trim beyond device size: offset %llu length %llu device %s device size %llu", start, end - start + 1, btrfs_dev_name(device), diff --git a/fs/btrfs/extent-tree.h b/fs/btrfs/extent-tree.h index 72914074c304..82d3a82dc712 100644 --- a/fs/btrfs/extent-tree.h +++ b/fs/btrfs/extent-tree.h @@ -97,7 +97,7 @@ enum btrfs_inline_ref_type { }; int btrfs_get_extent_inline_ref_type(const struct extent_buffer *eb, - struct btrfs_extent_inline_ref *iref, + const struct btrfs_extent_inline_ref *iref, enum btrfs_inline_ref_type is_data); u64 hash_extent_data_ref(u64 root_objectid, u64 owner, u64 offset); diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index 1dc931c4937f..835b0deef9bb 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -75,9 +75,9 @@ void btrfs_extent_buffer_leak_debug_check(struct btrfs_fs_info *fs_info) while (!list_empty(&fs_info->allocated_ebs)) { eb = list_first_entry(&fs_info->allocated_ebs, struct extent_buffer, leak_list); - pr_err( - "BTRFS: buffer leak start %llu len %u refs %d bflags %lu owner %llu\n", - eb->start, eb->len, atomic_read(&eb->refs), eb->bflags, + btrfs_err(fs_info, + "buffer leak start %llu len %u refs %d bflags %lu owner %llu", + eb->start, eb->len, refcount_read(&eb->refs), eb->bflags, btrfs_header_owner(eb)); list_del(&eb->leak_list); WARN_ON_ONCE(1); @@ -110,6 +110,7 @@ struct btrfs_bio_ctrl { * This is to avoid touching ranges covered by compression/inline. */ unsigned long submit_bitmap; + struct readahead_control *ractl; }; static void submit_one_bio(struct btrfs_bio_ctrl *bio_ctrl) @@ -266,8 +267,7 @@ static noinline int lock_delalloc_folios(struct inode *inode, goto out; } range_start = max_t(u64, folio_pos(folio), start); - range_len = min_t(u64, folio_pos(folio) + folio_size(folio), - end + 1) - range_start; + range_len = min_t(u64, folio_end(folio), end + 1) - range_start; btrfs_folio_set_lock(fs_info, folio, range_start, range_len); processed_end = range_start + range_len - 1; @@ -321,7 +321,7 @@ noinline_for_stack bool find_lock_delalloc_range(struct inode *inode, ASSERT(orig_end > orig_start); /* The range should at least cover part of the folio */ - ASSERT(!(orig_start >= folio_pos(locked_folio) + folio_size(locked_folio) || + ASSERT(!(orig_start >= folio_end(locked_folio) || orig_end <= folio_pos(locked_folio))); again: /* step one, find a bunch of delalloc bytes starting at start */ @@ -419,7 +419,7 @@ static void end_folio_read(struct folio *folio, bool uptodate, u64 start, u32 le struct btrfs_fs_info *fs_info = folio_to_fs_info(folio); ASSERT(folio_pos(folio) <= start && - start + len <= folio_pos(folio) + folio_size(folio)); + start + len <= folio_end(folio)); if (uptodate && btrfs_verify_folio(folio, start, len)) btrfs_folio_set_uptodate(fs_info, folio, start, len); @@ -782,7 +782,7 @@ static void submit_extent_folio(struct btrfs_bio_ctrl *bio_ctrl, static int attach_extent_buffer_folio(struct extent_buffer *eb, struct folio *folio, - struct btrfs_subpage *prealloc) + struct btrfs_folio_state *prealloc) { struct btrfs_fs_info *fs_info = eb->fs_info; int ret = 0; @@ -806,7 +806,7 @@ static int attach_extent_buffer_folio(struct extent_buffer *eb, /* Already mapped, just free prealloc */ if (folio_test_private(folio)) { - btrfs_free_subpage(prealloc); + btrfs_free_folio_state(prealloc); return 0; } @@ -815,7 +815,7 @@ static int attach_extent_buffer_folio(struct extent_buffer *eb, folio_attach_private(folio, prealloc); else /* Do new allocation to attach subpage */ - ret = btrfs_attach_subpage(fs_info, folio, BTRFS_SUBPAGE_METADATA); + ret = btrfs_attach_folio_state(fs_info, folio, BTRFS_SUBPAGE_METADATA); return ret; } @@ -831,7 +831,7 @@ int set_folio_extent_mapped(struct folio *folio) fs_info = folio_to_fs_info(folio); if (btrfs_is_subpage(fs_info, folio)) - return btrfs_attach_subpage(fs_info, folio, BTRFS_SUBPAGE_DATA); + return btrfs_attach_folio_state(fs_info, folio, BTRFS_SUBPAGE_DATA); folio_attach_private(folio, (void *)EXTENT_FOLIO_PRIVATE); return 0; @@ -848,7 +848,7 @@ void clear_folio_extent_mapped(struct folio *folio) fs_info = folio_to_fs_info(folio); if (btrfs_is_subpage(fs_info, folio)) - return btrfs_detach_subpage(fs_info, folio, BTRFS_SUBPAGE_DATA); + return btrfs_detach_folio_state(fs_info, folio, BTRFS_SUBPAGE_DATA); folio_detach_private(folio); } @@ -882,6 +882,25 @@ static struct extent_map *get_extent_map(struct btrfs_inode *inode, return em; } + +static void btrfs_readahead_expand(struct readahead_control *ractl, + const struct extent_map *em) +{ + const u64 ra_pos = readahead_pos(ractl); + const u64 ra_end = ra_pos + readahead_length(ractl); + const u64 em_end = em->start + em->ram_bytes; + + /* No expansion for holes and inline extents. */ + if (em->disk_bytenr > EXTENT_MAP_LAST_BYTE) + return; + + ASSERT(em_end >= ra_pos, + "extent_map %llu %llu ends before current readahead position %llu", + em->start, em->len, ra_pos); + if (em_end > ra_end) + readahead_expand(ractl, ra_pos, em_end - ra_pos); +} + /* * basic readpage implementation. Locked extent state structs are inserted * into the tree that are removed when the IO is done (by the end_io @@ -945,6 +964,16 @@ static int btrfs_do_readpage(struct folio *folio, struct extent_map **em_cached, compress_type = btrfs_extent_map_compression(em); + /* + * Only expand readahead for extents which are already creating + * the pages anyway in add_ra_bio_pages, which is compressed + * extents in the non subpage case. + */ + if (bio_ctrl->ractl && + !btrfs_is_subpage(fs_info, folio) && + compress_type != BTRFS_COMPRESS_NONE) + btrfs_readahead_expand(bio_ctrl->ractl, em); + if (compress_type != BTRFS_COMPRESS_NONE) disk_bytenr = em->disk_bytenr; else @@ -1086,7 +1115,7 @@ static bool can_skip_one_ordered_range(struct btrfs_inode *inode, * finished our folio read and unlocked the folio. */ if (btrfs_folio_test_dirty(fs_info, folio, cur, blocksize)) { - u64 range_len = min(folio_pos(folio) + folio_size(folio), + u64 range_len = min(folio_end(folio), ordered->file_offset + ordered->num_bytes) - cur; ret = true; @@ -1108,7 +1137,7 @@ static bool can_skip_one_ordered_range(struct btrfs_inode *inode, * So we return true and update @next_ret to the OE/folio boundary. */ if (btrfs_folio_test_uptodate(fs_info, folio, cur, blocksize)) { - u64 range_len = min(folio_pos(folio) + folio_size(folio), + u64 range_len = min(folio_end(folio), ordered->file_offset + ordered->num_bytes) - cur; /* @@ -1663,7 +1692,7 @@ static int extent_writepage(struct folio *folio, struct btrfs_bio_ctrl *bio_ctrl int ret; size_t pg_offset; loff_t i_size = i_size_read(&inode->vfs_inode); - unsigned long end_index = i_size >> PAGE_SHIFT; + const pgoff_t end_index = i_size >> PAGE_SHIFT; const unsigned int blocks_per_folio = btrfs_blocks_per_folio(fs_info, folio); trace_extent_writepage(folio, &inode->vfs_inode, bio_ctrl->wbc); @@ -1704,7 +1733,7 @@ static int extent_writepage(struct folio *folio, struct btrfs_bio_ctrl *bio_ctrl WARN_ON(IS_ENABLED(CONFIG_BTRFS_DEBUG)); btrfs_err_rl(fs_info, "root %lld ino %llu folio %llu is marked dirty without notifying the fs", - inode->root->root_key.objectid, + btrfs_root_id(inode->root), btrfs_ino(inode), folio_pos(folio)); ret = -EUCLEAN; goto done; @@ -1774,7 +1803,7 @@ static noinline_for_stack bool lock_extent_buffer_for_io(struct extent_buffer *e */ spin_lock(&eb->refs_lock); if (test_and_clear_bit(EXTENT_BUFFER_DIRTY, &eb->bflags)) { - XA_STATE(xas, &fs_info->buffer_tree, eb->start >> fs_info->sectorsize_bits); + XA_STATE(xas, &fs_info->buffer_tree, eb->start >> fs_info->nodesize_bits); unsigned long flags; set_bit(EXTENT_BUFFER_WRITEBACK, &eb->bflags); @@ -1874,7 +1903,7 @@ static void set_btree_ioerr(struct extent_buffer *eb) static void buffer_tree_set_mark(const struct extent_buffer *eb, xa_mark_t mark) { struct btrfs_fs_info *fs_info = eb->fs_info; - XA_STATE(xas, &fs_info->buffer_tree, eb->start >> fs_info->sectorsize_bits); + XA_STATE(xas, &fs_info->buffer_tree, eb->start >> fs_info->nodesize_bits); unsigned long flags; xas_lock_irqsave(&xas, flags); @@ -1886,7 +1915,7 @@ static void buffer_tree_set_mark(const struct extent_buffer *eb, xa_mark_t mark) static void buffer_tree_clear_mark(const struct extent_buffer *eb, xa_mark_t mark) { struct btrfs_fs_info *fs_info = eb->fs_info; - XA_STATE(xas, &fs_info->buffer_tree, eb->start >> fs_info->sectorsize_bits); + XA_STATE(xas, &fs_info->buffer_tree, eb->start >> fs_info->nodesize_bits); unsigned long flags; xas_lock_irqsave(&xas, flags); @@ -1961,7 +1990,7 @@ retry: if (!eb) return NULL; - if (!atomic_inc_not_zero(&eb->refs)) { + if (!refcount_inc_not_zero(&eb->refs)) { xas_reset(xas); goto retry; } @@ -1986,7 +2015,7 @@ static unsigned int buffer_tree_get_ebs_tag(struct btrfs_fs_info *fs_info, rcu_read_lock(); while ((eb = find_get_eb(&xas, end, tag)) != NULL) { if (!eb_batch_add(batch, eb)) { - *start = ((eb->start + eb->len) >> fs_info->sectorsize_bits); + *start = ((eb->start + eb->len) >> fs_info->nodesize_bits); goto out; } } @@ -2008,11 +2037,11 @@ static struct extent_buffer *find_extent_buffer_nolock( struct btrfs_fs_info *fs_info, u64 start) { struct extent_buffer *eb; - unsigned long index = (start >> fs_info->sectorsize_bits); + unsigned long index = (start >> fs_info->nodesize_bits); rcu_read_lock(); eb = xa_load(&fs_info->buffer_tree, index); - if (eb && !atomic_inc_not_zero(&eb->refs)) + if (eb && !refcount_inc_not_zero(&eb->refs)) eb = NULL; rcu_read_unlock(); return eb; @@ -2031,10 +2060,7 @@ static void end_bbio_meta_write(struct btrfs_bio *bbio) } buffer_tree_clear_mark(eb, PAGECACHE_TAG_WRITEBACK); - clear_bit(EXTENT_BUFFER_WRITEBACK, &eb->bflags); - smp_mb__after_atomic(); - wake_up_bit(&eb->bflags, EXTENT_BUFFER_WRITEBACK); - + clear_and_wake_up_bit(EXTENT_BUFFER_WRITEBACK, &eb->bflags); bio_put(&bbio->bio); } @@ -2085,7 +2111,7 @@ static noinline_for_stack void write_one_eb(struct extent_buffer *eb, for (int i = 0; i < num_extent_folios(eb); i++) { struct folio *folio = eb->folios[i]; u64 range_start = max_t(u64, eb->start, folio_pos(folio)); - u32 range_len = min_t(u64, folio_pos(folio) + folio_size(folio), + u32 range_len = min_t(u64, folio_end(folio), eb->start + eb->len) - range_start; folio_lock(folio); @@ -2114,8 +2140,8 @@ void btrfs_btree_wait_writeback_range(struct btrfs_fs_info *fs_info, u64 start, u64 end) { struct eb_batch batch; - unsigned long start_index = (start >> fs_info->sectorsize_bits); - unsigned long end_index = (end >> fs_info->sectorsize_bits); + unsigned long start_index = (start >> fs_info->nodesize_bits); + unsigned long end_index = (end >> fs_info->nodesize_bits); eb_batch_init(&batch); while (start_index <= end_index) { @@ -2151,7 +2177,7 @@ int btree_write_cache_pages(struct address_space *mapping, eb_batch_init(&batch); if (wbc->range_cyclic) { - index = ((mapping->writeback_index << PAGE_SHIFT) >> fs_info->sectorsize_bits); + index = ((mapping->writeback_index << PAGE_SHIFT) >> fs_info->nodesize_bits); end = -1; /* @@ -2160,8 +2186,8 @@ int btree_write_cache_pages(struct address_space *mapping, */ scanned = (index == 0); } else { - index = (wbc->range_start >> fs_info->sectorsize_bits); - end = (wbc->range_end >> fs_info->sectorsize_bits); + index = (wbc->range_start >> fs_info->nodesize_bits); + end = (wbc->range_end >> fs_info->nodesize_bits); scanned = 1; } @@ -2489,7 +2515,7 @@ void extent_write_locked_range(struct inode *inode, const struct folio *locked_f continue; } - cur_end = min_t(u64, folio_pos(folio) + folio_size(folio) - 1, end); + cur_end = min_t(u64, folio_end(folio) - 1, end); cur_len = cur_end + 1 - cur; ASSERT(folio_test_locked(folio)); @@ -2541,7 +2567,10 @@ int btrfs_writepages(struct address_space *mapping, struct writeback_control *wb void btrfs_readahead(struct readahead_control *rac) { - struct btrfs_bio_ctrl bio_ctrl = { .opf = REQ_OP_READ | REQ_RAHEAD }; + struct btrfs_bio_ctrl bio_ctrl = { + .opf = REQ_OP_READ | REQ_RAHEAD, + .ractl = rac + }; struct folio *folio; struct btrfs_inode *inode = BTRFS_I(rac->mapping->host); const u64 start = readahead_pos(rac); @@ -2731,13 +2760,13 @@ static int extent_buffer_under_io(const struct extent_buffer *eb) static bool folio_range_has_eb(struct folio *folio) { - struct btrfs_subpage *subpage; + struct btrfs_folio_state *bfs; lockdep_assert_held(&folio->mapping->i_private_lock); if (folio_test_private(folio)) { - subpage = folio_get_private(folio); - if (atomic_read(&subpage->eb_refs)) + bfs = folio_get_private(folio); + if (atomic_read(&bfs->eb_refs)) return true; } return false; @@ -2787,7 +2816,7 @@ static void detach_extent_buffer_folio(const struct extent_buffer *eb, struct fo * attached to one dummy eb, no sharing. */ if (!mapped) { - btrfs_detach_subpage(fs_info, folio, BTRFS_SUBPAGE_METADATA); + btrfs_detach_folio_state(fs_info, folio, BTRFS_SUBPAGE_METADATA); return; } @@ -2798,7 +2827,7 @@ static void detach_extent_buffer_folio(const struct extent_buffer *eb, struct fo * page range and no unfinished IO. */ if (!folio_range_has_eb(folio)) - btrfs_detach_subpage(fs_info, folio, BTRFS_SUBPAGE_METADATA); + btrfs_detach_folio_state(fs_info, folio, BTRFS_SUBPAGE_METADATA); spin_unlock(&mapping->i_private_lock); } @@ -2842,7 +2871,7 @@ static struct extent_buffer *__alloc_extent_buffer(struct btrfs_fs_info *fs_info btrfs_leak_debug_add_eb(eb); spin_lock_init(&eb->refs_lock); - atomic_set(&eb->refs, 1); + refcount_set(&eb->refs, 1); ASSERT(eb->len <= BTRFS_MAX_METADATA_BLOCKSIZE); @@ -2975,13 +3004,13 @@ static void check_buffer_tree_ref(struct extent_buffer *eb) * once io is initiated, TREE_REF can no longer be cleared, so that is * the moment at which any such race is best fixed. */ - refs = atomic_read(&eb->refs); + refs = refcount_read(&eb->refs); if (refs >= 2 && test_bit(EXTENT_BUFFER_TREE_REF, &eb->bflags)) return; spin_lock(&eb->refs_lock); if (!test_and_set_bit(EXTENT_BUFFER_TREE_REF, &eb->bflags)) - atomic_inc(&eb->refs); + refcount_inc(&eb->refs); spin_unlock(&eb->refs_lock); } @@ -3038,7 +3067,7 @@ struct extent_buffer *alloc_test_extent_buffer(struct btrfs_fs_info *fs_info, eb->fs_info = fs_info; again: xa_lock_irq(&fs_info->buffer_tree); - exists = __xa_cmpxchg(&fs_info->buffer_tree, start >> fs_info->sectorsize_bits, + exists = __xa_cmpxchg(&fs_info->buffer_tree, start >> fs_info->nodesize_bits, NULL, eb, GFP_NOFS); if (xa_is_err(exists)) { ret = xa_err(exists); @@ -3047,7 +3076,7 @@ again: return ERR_PTR(ret); } if (exists) { - if (!atomic_inc_not_zero(&exists->refs)) { + if (!refcount_inc_not_zero(&exists->refs)) { /* The extent buffer is being freed, retry. */ xa_unlock_irq(&fs_info->buffer_tree); goto again; @@ -3092,7 +3121,7 @@ static struct extent_buffer *grab_extent_buffer(struct btrfs_fs_info *fs_info, * just overwrite folio private. */ exists = folio_get_private(folio); - if (atomic_inc_not_zero(&exists->refs)) + if (refcount_inc_not_zero(&exists->refs)) return exists; WARN_ON(folio_test_dirty(folio)); @@ -3141,13 +3170,13 @@ static bool check_eb_alignment(struct btrfs_fs_info *fs_info, u64 start) * The caller needs to free the existing folios and retry using the same order. */ static int attach_eb_folio_to_filemap(struct extent_buffer *eb, int i, - struct btrfs_subpage *prealloc, + struct btrfs_folio_state *prealloc, struct extent_buffer **found_eb_ret) { struct btrfs_fs_info *fs_info = eb->fs_info; struct address_space *mapping = fs_info->btree_inode->i_mapping; - const unsigned long index = eb->start >> PAGE_SHIFT; + const pgoff_t index = eb->start >> PAGE_SHIFT; struct folio *existing_folio; int ret; @@ -3224,7 +3253,7 @@ struct extent_buffer *alloc_extent_buffer(struct btrfs_fs_info *fs_info, int attached = 0; struct extent_buffer *eb; struct extent_buffer *existing_eb = NULL; - struct btrfs_subpage *prealloc = NULL; + struct btrfs_folio_state *prealloc = NULL; u64 lockdep_owner = owner_root; bool page_contig = true; int uptodate = 1; @@ -3269,7 +3298,7 @@ struct extent_buffer *alloc_extent_buffer(struct btrfs_fs_info *fs_info, * manually if we exit earlier. */ if (btrfs_meta_is_subpage(fs_info)) { - prealloc = btrfs_alloc_subpage(fs_info, PAGE_SIZE, BTRFS_SUBPAGE_METADATA); + prealloc = btrfs_alloc_folio_state(fs_info, PAGE_SIZE, BTRFS_SUBPAGE_METADATA); if (IS_ERR(prealloc)) { ret = PTR_ERR(prealloc); goto out; @@ -3280,7 +3309,7 @@ reallocate: /* Allocate all pages first. */ ret = alloc_eb_folio_array(eb, true); if (ret < 0) { - btrfs_free_subpage(prealloc); + btrfs_free_folio_state(prealloc); goto out; } @@ -3354,7 +3383,7 @@ reallocate: again: xa_lock_irq(&fs_info->buffer_tree); existing_eb = __xa_cmpxchg(&fs_info->buffer_tree, - start >> fs_info->sectorsize_bits, NULL, eb, + start >> fs_info->nodesize_bits, NULL, eb, GFP_NOFS); if (xa_is_err(existing_eb)) { ret = xa_err(existing_eb); @@ -3362,7 +3391,7 @@ again: goto out; } if (existing_eb) { - if (!atomic_inc_not_zero(&existing_eb->refs)) { + if (!refcount_inc_not_zero(&existing_eb->refs)) { xa_unlock_irq(&fs_info->buffer_tree); goto again; } @@ -3391,7 +3420,7 @@ again: return eb; out: - WARN_ON(!atomic_dec_and_test(&eb->refs)); + WARN_ON(!refcount_dec_and_test(&eb->refs)); /* * Any attached folios need to be detached before we unlock them. This @@ -3437,8 +3466,7 @@ static int release_extent_buffer(struct extent_buffer *eb) { lockdep_assert_held(&eb->refs_lock); - WARN_ON(atomic_read(&eb->refs) == 0); - if (atomic_dec_and_test(&eb->refs)) { + if (refcount_dec_and_test(&eb->refs)) { struct btrfs_fs_info *fs_info = eb->fs_info; spin_unlock(&eb->refs_lock); @@ -3458,7 +3486,7 @@ static int release_extent_buffer(struct extent_buffer *eb) * in this case. */ xa_cmpxchg_irq(&fs_info->buffer_tree, - eb->start >> fs_info->sectorsize_bits, eb, NULL, + eb->start >> fs_info->nodesize_bits, eb, NULL, GFP_ATOMIC); btrfs_leak_debug_del_eb(eb); @@ -3484,22 +3512,26 @@ void free_extent_buffer(struct extent_buffer *eb) if (!eb) return; - refs = atomic_read(&eb->refs); + refs = refcount_read(&eb->refs); while (1) { - if ((!test_bit(EXTENT_BUFFER_UNMAPPED, &eb->bflags) && refs <= 3) - || (test_bit(EXTENT_BUFFER_UNMAPPED, &eb->bflags) && - refs == 1)) + if (test_bit(EXTENT_BUFFER_UNMAPPED, &eb->bflags)) { + if (refs == 1) + break; + } else if (refs <= 3) { break; - if (atomic_try_cmpxchg(&eb->refs, &refs, refs - 1)) + } + + /* Optimization to avoid locking eb->refs_lock. */ + if (atomic_try_cmpxchg(&eb->refs.refs, &refs, refs - 1)) return; } spin_lock(&eb->refs_lock); - if (atomic_read(&eb->refs) == 2 && + if (refcount_read(&eb->refs) == 2 && test_bit(EXTENT_BUFFER_STALE, &eb->bflags) && !extent_buffer_under_io(eb) && test_and_clear_bit(EXTENT_BUFFER_TREE_REF, &eb->bflags)) - atomic_dec(&eb->refs); + refcount_dec(&eb->refs); /* * I know this is terrible, but it's temporary until we stop tracking @@ -3516,9 +3548,9 @@ void free_extent_buffer_stale(struct extent_buffer *eb) spin_lock(&eb->refs_lock); set_bit(EXTENT_BUFFER_STALE, &eb->bflags); - if (atomic_read(&eb->refs) == 2 && !extent_buffer_under_io(eb) && + if (refcount_read(&eb->refs) == 2 && !extent_buffer_under_io(eb) && test_and_clear_bit(EXTENT_BUFFER_TREE_REF, &eb->bflags)) - atomic_dec(&eb->refs); + refcount_dec(&eb->refs); release_extent_buffer(eb); } @@ -3576,7 +3608,7 @@ void btrfs_clear_buffer_dirty(struct btrfs_trans_handle *trans, btree_clear_folio_dirty_tag(folio); folio_unlock(folio); } - WARN_ON(atomic_read(&eb->refs) == 0); + WARN_ON(refcount_read(&eb->refs) == 0); } void set_extent_buffer_dirty(struct extent_buffer *eb) @@ -3587,7 +3619,7 @@ void set_extent_buffer_dirty(struct extent_buffer *eb) was_dirty = test_and_set_bit(EXTENT_BUFFER_DIRTY, &eb->bflags); - WARN_ON(atomic_read(&eb->refs) == 0); + WARN_ON(refcount_read(&eb->refs) == 0); WARN_ON(!test_bit(EXTENT_BUFFER_TREE_REF, &eb->bflags)); WARN_ON(test_bit(EXTENT_BUFFER_ZONED_ZEROOUT, &eb->bflags)); @@ -3646,9 +3678,7 @@ void set_extent_buffer_uptodate(struct extent_buffer *eb) static void clear_extent_buffer_reading(struct extent_buffer *eb) { - clear_bit(EXTENT_BUFFER_READING, &eb->bflags); - smp_mb__after_atomic(); - wake_up_bit(&eb->bflags, EXTENT_BUFFER_READING); + clear_and_wake_up_bit(EXTENT_BUFFER_READING, &eb->bflags); } static void end_bbio_meta_read(struct btrfs_bio *bbio) @@ -3713,7 +3743,7 @@ int read_extent_buffer_pages_nowait(struct extent_buffer *eb, int mirror_num, eb->read_mirror = 0; check_buffer_tree_ref(eb); - atomic_inc(&eb->refs); + refcount_inc(&eb->refs); bbio = btrfs_bio_alloc(INLINE_EXTENT_BUFFER_PAGES, REQ_OP_READ | REQ_META, eb->fs_info, @@ -3725,7 +3755,7 @@ int read_extent_buffer_pages_nowait(struct extent_buffer *eb, int mirror_num, for (int i = 0; i < num_extent_folios(eb); i++) { struct folio *folio = eb->folios[i]; u64 range_start = max_t(u64, eb->start, folio_pos(folio)); - u32 range_len = min_t(u64, folio_pos(folio) + folio_size(folio), + u32 range_len = min_t(u64, folio_end(folio), eb->start + eb->len) - range_start; bio_add_folio_nofail(&bbio->bio, folio, range_len, @@ -4104,8 +4134,8 @@ static inline void eb_bitmap_offset(const struct extent_buffer *eb, * @start: offset of the bitmap item in the extent buffer * @nr: bit number to test */ -int extent_buffer_test_bit(const struct extent_buffer *eb, unsigned long start, - unsigned long nr) +bool extent_buffer_test_bit(const struct extent_buffer *eb, unsigned long start, + unsigned long nr) { unsigned long i; size_t offset; @@ -4296,9 +4326,9 @@ static int try_release_subpage_extent_buffer(struct folio *folio) { struct btrfs_fs_info *fs_info = folio_to_fs_info(folio); struct extent_buffer *eb; - unsigned long start = (folio_pos(folio) >> fs_info->sectorsize_bits); + unsigned long start = (folio_pos(folio) >> fs_info->nodesize_bits); unsigned long index = start; - unsigned long end = index + (PAGE_SIZE >> fs_info->sectorsize_bits) - 1; + unsigned long end = index + (PAGE_SIZE >> fs_info->nodesize_bits) - 1; int ret; xa_lock_irq(&fs_info->buffer_tree); @@ -4308,7 +4338,7 @@ static int try_release_subpage_extent_buffer(struct folio *folio) * won't disappear out from under us. */ spin_lock(&eb->refs_lock); - if (atomic_read(&eb->refs) != 1 || extent_buffer_under_io(eb)) { + if (refcount_read(&eb->refs) != 1 || extent_buffer_under_io(eb)) { spin_unlock(&eb->refs_lock); continue; } @@ -4374,7 +4404,7 @@ int try_release_extent_buffer(struct folio *folio) * this page. */ spin_lock(&eb->refs_lock); - if (atomic_read(&eb->refs) != 1 || extent_buffer_under_io(eb)) { + if (refcount_read(&eb->refs) != 1 || extent_buffer_under_io(eb)) { spin_unlock(&eb->refs_lock); spin_unlock(&folio->mapping->i_private_lock); return 0; diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h index e36e8d6a00bc..61130786b9a3 100644 --- a/fs/btrfs/extent_io.h +++ b/fs/btrfs/extent_io.h @@ -98,7 +98,7 @@ struct extent_buffer { void *addr; spinlock_t refs_lock; - atomic_t refs; + refcount_t refs; int read_mirror; /* >= 0 if eb belongs to a log tree, -1 otherwise */ s8 log_index; @@ -345,8 +345,8 @@ void memmove_extent_buffer(const struct extent_buffer *dst, unsigned long len); void memzero_extent_buffer(const struct extent_buffer *eb, unsigned long start, unsigned long len); -int extent_buffer_test_bit(const struct extent_buffer *eb, unsigned long start, - unsigned long pos); +bool extent_buffer_test_bit(const struct extent_buffer *eb, unsigned long start, + unsigned long pos); void extent_buffer_bitmap_set(const struct extent_buffer *eb, unsigned long start, unsigned long pos, unsigned long len); void extent_buffer_bitmap_clear(const struct extent_buffer *eb, diff --git a/fs/btrfs/extent_map.c b/fs/btrfs/extent_map.c index 02bfdb976e40..57f52585a6dd 100644 --- a/fs/btrfs/extent_map.c +++ b/fs/btrfs/extent_map.c @@ -84,7 +84,7 @@ static void remove_em(struct btrfs_inode *inode, struct extent_map *em) rb_erase(&em->rb_node, &inode->extent_tree.root); RB_CLEAR_NODE(&em->rb_node); - if (!btrfs_is_testing(fs_info) && is_fstree(btrfs_root_id(inode->root))) + if (!btrfs_is_testing(fs_info) && btrfs_is_fstree(btrfs_root_id(inode->root))) percpu_counter_dec(&fs_info->evictable_extent_maps); } @@ -502,7 +502,7 @@ static int add_extent_mapping(struct btrfs_inode *inode, setup_extent_mapping(inode, em, modified); - if (!btrfs_is_testing(fs_info) && is_fstree(btrfs_root_id(root))) + if (!btrfs_is_testing(fs_info) && btrfs_is_fstree(btrfs_root_id(root))) percpu_counter_inc(&fs_info->evictable_extent_maps); return 0; @@ -1337,7 +1337,7 @@ static void btrfs_extent_map_shrinker_worker(struct work_struct *work) if (!root) continue; - if (is_fstree(btrfs_root_id(root))) + if (btrfs_is_fstree(btrfs_root_id(root))) nr_dropped += btrfs_scan_root(root, &ctx); btrfs_put_root(root); diff --git a/fs/btrfs/fiemap.c b/fs/btrfs/fiemap.c index 43bf0979fd53..7935586a9dbd 100644 --- a/fs/btrfs/fiemap.c +++ b/fs/btrfs/fiemap.c @@ -320,7 +320,7 @@ static int fiemap_next_leaf_item(struct btrfs_inode *inode, struct btrfs_path *p * the cost of allocating a new one. */ ASSERT(test_bit(EXTENT_BUFFER_UNMAPPED, &clone->bflags)); - atomic_inc(&clone->refs); + refcount_inc(&clone->refs); ret = btrfs_next_leaf(inode->root, path); if (ret != 0) diff --git a/fs/btrfs/file-item.c b/fs/btrfs/file-item.c index 54d523d4f421..c09fbc257634 100644 --- a/fs/btrfs/file-item.c +++ b/fs/btrfs/file-item.c @@ -427,7 +427,7 @@ int btrfs_lookup_bio_sums(struct btrfs_bio *bbio) memset(csum_dst, 0, csum_size); count = 1; - if (btrfs_root_id(inode->root) == BTRFS_DATA_RELOC_TREE_OBJECTID) { + if (btrfs_is_data_reloc_root(inode->root)) { u64 file_offset = bbio->file_offset + bio_offset; btrfs_set_extent_bit(&inode->io_tree, file_offset, diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index 8ce6f45f45e0..bc1e00db96c9 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -89,8 +89,7 @@ int btrfs_dirty_folio(struct btrfs_inode *inode, struct folio *folio, loff_t pos num_bytes = round_up(write_bytes + pos - start_pos, fs_info->sectorsize); ASSERT(num_bytes <= U32_MAX); - ASSERT(folio_pos(folio) <= pos && - folio_pos(folio) + folio_size(folio) >= pos + write_bytes); + ASSERT(folio_pos(folio) <= pos && folio_end(folio) >= pos + write_bytes); end_of_last_block = start_pos + num_bytes - 1; @@ -801,7 +800,7 @@ static int prepare_uptodate_folio(struct inode *inode, struct folio *folio, u64 u64 len) { u64 clamp_start = max_t(u64, pos, folio_pos(folio)); - u64 clamp_end = min_t(u64, pos + len, folio_pos(folio) + folio_size(folio)); + u64 clamp_end = min_t(u64, pos + len, folio_end(folio)); const u32 blocksize = inode_to_fs_info(inode)->sectorsize; int ret = 0; @@ -857,7 +856,7 @@ static noinline int prepare_one_folio(struct inode *inode, struct folio **folio_ loff_t pos, size_t write_bytes, bool nowait) { - unsigned long index = pos >> PAGE_SHIFT; + const pgoff_t index = pos >> PAGE_SHIFT; gfp_t mask = get_prepare_gfp_flags(inode, nowait); fgf_t fgp_flags = (nowait ? FGP_WRITEBEGIN | FGP_NOWAIT : FGP_WRITEBEGIN) | fgf_set_order(write_bytes); @@ -963,6 +962,7 @@ lock_and_cleanup_extent_if_need(struct btrfs_inode *inode, struct folio *folio, * @pos: File offset. * @write_bytes: The length to write, will be updated to the nocow writeable * range. + * @nowait: Indicate if we can block or not (non-blocking IO context). * * This function will flush ordered extents in the range to ensure proper * nocow checks. @@ -971,7 +971,8 @@ lock_and_cleanup_extent_if_need(struct btrfs_inode *inode, struct folio *folio, * > 0 If we can nocow, and updates @write_bytes. * 0 If we can't do a nocow write. * -EAGAIN If we can't do a nocow write because snapshoting of the inode's - * root is in progress. + * root is in progress or because we are in a non-blocking IO + * context and need to block (@nowait is true). * < 0 If an error happened. * * NOTE: Callers need to call btrfs_check_nocow_unlock() if we return > 0. @@ -983,8 +984,8 @@ int btrfs_check_nocow_lock(struct btrfs_inode *inode, loff_t pos, struct btrfs_root *root = inode->root; struct extent_state *cached_state = NULL; u64 lockstart, lockend; - u64 num_bytes; - int ret; + u64 cur_offset; + int ret = 0; if (!(inode->flags & (BTRFS_INODE_NODATACOW | BTRFS_INODE_PREALLOC))) return 0; @@ -995,7 +996,6 @@ int btrfs_check_nocow_lock(struct btrfs_inode *inode, loff_t pos, lockstart = round_down(pos, fs_info->sectorsize); lockend = round_up(pos + *write_bytes, fs_info->sectorsize) - 1; - num_bytes = lockend - lockstart + 1; if (nowait) { if (!btrfs_try_lock_ordered_range(inode, lockstart, lockend, @@ -1007,14 +1007,36 @@ int btrfs_check_nocow_lock(struct btrfs_inode *inode, loff_t pos, btrfs_lock_and_flush_ordered_range(inode, lockstart, lockend, &cached_state); } - ret = can_nocow_extent(inode, lockstart, &num_bytes, NULL, nowait); - if (ret <= 0) - btrfs_drew_write_unlock(&root->snapshot_lock); - else - *write_bytes = min_t(size_t, *write_bytes , - num_bytes - pos + lockstart); + + cur_offset = lockstart; + while (cur_offset < lockend) { + u64 num_bytes = lockend - cur_offset + 1; + + ret = can_nocow_extent(inode, cur_offset, &num_bytes, NULL, nowait); + if (ret <= 0) { + /* + * If cur_offset == lockstart it means we haven't found + * any extent against which we can NOCOW, so unlock the + * snapshot lock. + */ + if (cur_offset == lockstart) + btrfs_drew_write_unlock(&root->snapshot_lock); + break; + } + cur_offset += num_bytes; + } + btrfs_unlock_extent(&inode->io_tree, lockstart, lockend, &cached_state); + /* + * cur_offset > lockstart means there's at least a partial range we can + * NOCOW, and that range can cover one or more extents. + */ + if (cur_offset > lockstart) { + *write_bytes = min_t(size_t, *write_bytes, cur_offset - pos); + return 1; + } + return ret; } @@ -1233,8 +1255,8 @@ again: * The reserved range goes beyond the current folio, shrink the reserved * space to the folio boundary. */ - if (reserved_start + reserved_len > folio_pos(folio) + folio_size(folio)) { - const u64 last_block = folio_pos(folio) + folio_size(folio); + if (reserved_start + reserved_len > folio_end(folio)) { + const u64 last_block = folio_end(folio); shrink_reserved_space(inode, *data_reserved, reserved_start, reserved_len, last_block - reserved_start, @@ -1832,9 +1854,9 @@ static vm_fault_t btrfs_page_mkwrite(struct vm_fault *vmf) { struct page *page = vmf->page; struct folio *folio = page_folio(page); - struct inode *inode = file_inode(vmf->vma->vm_file); - struct btrfs_fs_info *fs_info = inode_to_fs_info(inode); - struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree; + struct btrfs_inode *inode = BTRFS_I(file_inode(vmf->vma->vm_file)); + struct btrfs_fs_info *fs_info = inode->root->fs_info; + struct extent_io_tree *io_tree = &inode->io_tree; struct btrfs_ordered_extent *ordered; struct extent_state *cached_state = NULL; struct extent_changeset *data_reserved = NULL; @@ -1842,6 +1864,7 @@ static vm_fault_t btrfs_page_mkwrite(struct vm_fault *vmf) loff_t size; size_t fsize = folio_size(folio); int ret; + bool only_release_metadata = false; u64 reserved_space; u64 page_start; u64 page_end; @@ -1849,7 +1872,7 @@ static vm_fault_t btrfs_page_mkwrite(struct vm_fault *vmf) reserved_space = fsize; - sb_start_pagefault(inode->i_sb); + sb_start_pagefault(inode->vfs_inode.i_sb); page_start = folio_pos(folio); page_end = page_start + folio_size(folio) - 1; end = page_end; @@ -1862,20 +1885,43 @@ static vm_fault_t btrfs_page_mkwrite(struct vm_fault *vmf) * end up waiting indefinitely to get a lock on the page currently * being processed by btrfs_page_mkwrite() function. */ - ret = btrfs_delalloc_reserve_space(BTRFS_I(inode), &data_reserved, - page_start, reserved_space); - if (ret < 0) + ret = btrfs_check_data_free_space(inode, &data_reserved, page_start, + reserved_space, false); + if (ret < 0) { + size_t write_bytes = reserved_space; + + if (btrfs_check_nocow_lock(inode, page_start, &write_bytes, false) <= 0) + goto out_noreserve; + + only_release_metadata = true; + + /* + * Can't write the whole range, there may be shared extents or + * holes in the range, bail out with @only_release_metadata set + * to true so that we unlock the nocow lock before returning the + * error. + */ + if (write_bytes < reserved_space) + goto out_noreserve; + } + ret = btrfs_delalloc_reserve_metadata(inode, reserved_space, + reserved_space, false); + if (ret < 0) { + if (!only_release_metadata) + btrfs_free_reserved_data_space(inode, data_reserved, + page_start, reserved_space); goto out_noreserve; + } ret = file_update_time(vmf->vma->vm_file); if (ret < 0) goto out; again: - down_read(&BTRFS_I(inode)->i_mmap_lock); + down_read(&inode->i_mmap_lock); folio_lock(folio); - size = i_size_read(inode); + size = i_size_read(&inode->vfs_inode); - if ((folio->mapping != inode->i_mapping) || + if ((folio->mapping != inode->vfs_inode.i_mapping) || (page_start >= size)) { /* Page got truncated out from underneath us. */ goto out_unlock; @@ -1893,11 +1939,11 @@ again: * We can't set the delalloc bits if there are pending ordered * extents. Drop our locks and wait for them to finish. */ - ordered = btrfs_lookup_ordered_range(BTRFS_I(inode), page_start, fsize); + ordered = btrfs_lookup_ordered_range(inode, page_start, fsize); if (ordered) { btrfs_unlock_extent(io_tree, page_start, page_end, &cached_state); folio_unlock(folio); - up_read(&BTRFS_I(inode)->i_mmap_lock); + up_read(&inode->i_mmap_lock); btrfs_start_ordered_extent(ordered); btrfs_put_ordered_extent(ordered); goto again; @@ -1906,10 +1952,14 @@ again: if (folio_contains(folio, (size - 1) >> PAGE_SHIFT)) { reserved_space = round_up(size - page_start, fs_info->sectorsize); if (reserved_space < fsize) { + const u64 to_free = fsize - reserved_space; + end = page_start + reserved_space - 1; - btrfs_delalloc_release_space(BTRFS_I(inode), - data_reserved, end + 1, - fsize - reserved_space, true); + if (only_release_metadata) + btrfs_delalloc_release_metadata(inode, to_free, true); + else + btrfs_delalloc_release_space(inode, data_reserved, + end + 1, to_free, true); } } @@ -1920,12 +1970,11 @@ again: * clear any delalloc bits within this page range since we have to * reserve data&meta space before lock_page() (see above comments). */ - btrfs_clear_extent_bit(&BTRFS_I(inode)->io_tree, page_start, end, + btrfs_clear_extent_bit(io_tree, page_start, end, EXTENT_DELALLOC | EXTENT_DO_ACCOUNTING | EXTENT_DEFRAG, &cached_state); - ret = btrfs_set_extent_delalloc(BTRFS_I(inode), page_start, end, 0, - &cached_state); + ret = btrfs_set_extent_delalloc(inode, page_start, end, 0, &cached_state); if (ret < 0) { btrfs_unlock_extent(io_tree, page_start, page_end, &cached_state); goto out_unlock; @@ -1944,26 +1993,38 @@ again: btrfs_folio_set_dirty(fs_info, folio, page_start, end + 1 - page_start); btrfs_folio_set_uptodate(fs_info, folio, page_start, end + 1 - page_start); - btrfs_set_inode_last_sub_trans(BTRFS_I(inode)); + btrfs_set_inode_last_sub_trans(inode); + + if (only_release_metadata) + btrfs_set_extent_bit(io_tree, page_start, end, EXTENT_NORESERVE, + &cached_state); btrfs_unlock_extent(io_tree, page_start, page_end, &cached_state); - up_read(&BTRFS_I(inode)->i_mmap_lock); + up_read(&inode->i_mmap_lock); - btrfs_delalloc_release_extents(BTRFS_I(inode), fsize); - sb_end_pagefault(inode->i_sb); + btrfs_delalloc_release_extents(inode, fsize); + if (only_release_metadata) + btrfs_check_nocow_unlock(inode); + sb_end_pagefault(inode->vfs_inode.i_sb); extent_changeset_free(data_reserved); return VM_FAULT_LOCKED; out_unlock: folio_unlock(folio); - up_read(&BTRFS_I(inode)->i_mmap_lock); + up_read(&inode->i_mmap_lock); out: - btrfs_delalloc_release_extents(BTRFS_I(inode), fsize); - btrfs_delalloc_release_space(BTRFS_I(inode), data_reserved, page_start, - reserved_space, true); + btrfs_delalloc_release_extents(inode, fsize); + if (only_release_metadata) + btrfs_delalloc_release_metadata(inode, reserved_space, true); + else + btrfs_delalloc_release_space(inode, data_reserved, page_start, + reserved_space, true); extent_changeset_free(data_reserved); out_noreserve: - sb_end_pagefault(inode->i_sb); + if (only_release_metadata) + btrfs_check_nocow_unlock(inode); + + sb_end_pagefault(inode->vfs_inode.i_sb); if (ret < 0) return vmf_error(ret); @@ -2195,7 +2256,7 @@ static bool check_range_has_page(struct inode *inode, u64 start, u64 end) if (folio->index < start_index) continue; /* A large folio extends beyond the end. Not a target. */ - if (folio->index + folio_nr_pages(folio) > end_index) + if (folio_next_index(folio) > end_index) continue; /* A folio doesn't cover the head/tail index. Found a target. */ ret = true; @@ -2341,7 +2402,7 @@ int btrfs_replace_file_extents(struct btrfs_inode *inode, u64 min_size = btrfs_calc_insert_metadata_size(fs_info, 1); u64 ino_size = round_up(inode->vfs_inode.i_size, fs_info->sectorsize); struct btrfs_trans_handle *trans = NULL; - struct btrfs_block_rsv *rsv; + struct btrfs_block_rsv rsv; unsigned int rsv_count; u64 cur_offset; u64 len = end - start; @@ -2350,13 +2411,9 @@ int btrfs_replace_file_extents(struct btrfs_inode *inode, if (end <= start) return -EINVAL; - rsv = btrfs_alloc_block_rsv(fs_info, BTRFS_BLOCK_RSV_TEMP); - if (!rsv) { - ret = -ENOMEM; - goto out; - } - rsv->size = btrfs_calc_insert_metadata_size(fs_info, 1); - rsv->failfast = true; + btrfs_init_metadata_block_rsv(fs_info, &rsv, BTRFS_BLOCK_RSV_TEMP); + rsv.size = btrfs_calc_insert_metadata_size(fs_info, 1); + rsv.failfast = true; /* * 1 - update the inode @@ -2373,14 +2430,14 @@ int btrfs_replace_file_extents(struct btrfs_inode *inode, if (IS_ERR(trans)) { ret = PTR_ERR(trans); trans = NULL; - goto out_free; + goto out_release; } - ret = btrfs_block_rsv_migrate(&fs_info->trans_block_rsv, rsv, + ret = btrfs_block_rsv_migrate(&fs_info->trans_block_rsv, &rsv, min_size, false); if (WARN_ON(ret)) goto out_trans; - trans->block_rsv = rsv; + trans->block_rsv = &rsv; cur_offset = start; drop_args.path = path; @@ -2496,10 +2553,10 @@ int btrfs_replace_file_extents(struct btrfs_inode *inode, } ret = btrfs_block_rsv_migrate(&fs_info->trans_block_rsv, - rsv, min_size, false); + &rsv, min_size, false); if (WARN_ON(ret)) break; - trans->block_rsv = rsv; + trans->block_rsv = &rsv; cur_offset = drop_args.drop_end; len = end - cur_offset; @@ -2576,16 +2633,15 @@ int btrfs_replace_file_extents(struct btrfs_inode *inode, out_trans: if (!trans) - goto out_free; + goto out_release; trans->block_rsv = &fs_info->trans_block_rsv; if (ret) btrfs_end_transaction(trans); else *trans_out = trans; -out_free: - btrfs_free_block_rsv(fs_info, rsv); -out: +out_release: + btrfs_block_rsv_release(fs_info, &rsv, (u64)-1, NULL); return ret; } diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c index 4b34ea1f01c2..5d8d1570a5c9 100644 --- a/fs/btrfs/free-space-cache.c +++ b/fs/btrfs/free-space-cache.c @@ -366,7 +366,7 @@ fail: static void readahead_cache(struct inode *inode) { struct file_ra_state ra; - unsigned long last_index; + pgoff_t last_index; file_ra_state_init(&ra, inode->i_mapping); last_index = (i_size_read(inode) - 1) >> PAGE_SHIFT; @@ -3192,7 +3192,7 @@ static u64 btrfs_alloc_from_bitmap(struct btrfs_block_group *block_group, u64 *max_extent_size) { struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; - int err; + int ret2; u64 search_start = cluster->window_start; u64 search_bytes = bytes; u64 ret = 0; @@ -3200,8 +3200,8 @@ static u64 btrfs_alloc_from_bitmap(struct btrfs_block_group *block_group, search_start = min_start; search_bytes = bytes; - err = search_bitmap(ctl, entry, &search_start, &search_bytes, true); - if (err) { + ret2 = search_bitmap(ctl, entry, &search_start, &search_bytes, true); + if (ret2) { *max_extent_size = max(get_max_extent_size(entry), *max_extent_size); return 0; diff --git a/fs/btrfs/free-space-tree.c b/fs/btrfs/free-space-tree.c index a83c268f7f87..eba7f22ae49c 100644 --- a/fs/btrfs/free-space-tree.c +++ b/fs/btrfs/free-space-tree.c @@ -35,7 +35,7 @@ static struct btrfs_root *btrfs_free_space_root( return btrfs_global_root(block_group->fs_info, &key); } -void set_free_space_tree_thresholds(struct btrfs_block_group *cache) +void btrfs_set_free_space_tree_thresholds(struct btrfs_block_group *cache) { u32 bitmap_range; size_t bitmap_size; @@ -82,22 +82,19 @@ static int add_new_free_space_info(struct btrfs_trans_handle *trans, ret = btrfs_insert_empty_item(trans, root, path, &key, sizeof(*info)); if (ret) - goto out; + return ret; leaf = path->nodes[0]; info = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_free_space_info); btrfs_set_free_space_extent_count(leaf, info, 0); btrfs_set_free_space_flags(leaf, info, 0); - - ret = 0; -out: btrfs_release_path(path); - return ret; + return 0; } EXPORT_FOR_TESTS -struct btrfs_free_space_info *search_free_space_info( +struct btrfs_free_space_info *btrfs_search_free_space_info( struct btrfs_trans_handle *trans, struct btrfs_block_group *block_group, struct btrfs_path *path, int cow) @@ -201,9 +198,9 @@ static void le_bitmap_set(unsigned long *map, unsigned int start, int len) } EXPORT_FOR_TESTS -int convert_free_space_to_bitmaps(struct btrfs_trans_handle *trans, - struct btrfs_block_group *block_group, - struct btrfs_path *path) +int btrfs_convert_free_space_to_bitmaps(struct btrfs_trans_handle *trans, + struct btrfs_block_group *block_group, + struct btrfs_path *path) { struct btrfs_fs_info *fs_info = trans->fs_info; struct btrfs_root *root = btrfs_free_space_root(block_group); @@ -281,7 +278,7 @@ int convert_free_space_to_bitmaps(struct btrfs_trans_handle *trans, btrfs_release_path(path); } - info = search_free_space_info(trans, block_group, path, 1); + info = btrfs_search_free_space_info(trans, block_group, path, 1); if (IS_ERR(info)) { ret = PTR_ERR(info); btrfs_abort_transaction(trans, ret); @@ -290,6 +287,8 @@ int convert_free_space_to_bitmaps(struct btrfs_trans_handle *trans, leaf = path->nodes[0]; flags = btrfs_free_space_flags(leaf, info); flags |= BTRFS_FREE_SPACE_USING_BITMAPS; + block_group->using_free_space_bitmaps = true; + block_group->using_free_space_bitmaps_cached = true; btrfs_set_free_space_flags(leaf, info, flags); expected_extent_count = btrfs_free_space_extent_count(leaf, info); btrfs_release_path(path); @@ -343,9 +342,9 @@ out: } EXPORT_FOR_TESTS -int convert_free_space_to_extents(struct btrfs_trans_handle *trans, - struct btrfs_block_group *block_group, - struct btrfs_path *path) +int btrfs_convert_free_space_to_extents(struct btrfs_trans_handle *trans, + struct btrfs_block_group *block_group, + struct btrfs_path *path) { struct btrfs_fs_info *fs_info = trans->fs_info; struct btrfs_root *root = btrfs_free_space_root(block_group); @@ -409,12 +408,12 @@ int convert_free_space_to_extents(struct btrfs_trans_handle *trans, data_size = free_space_bitmap_size(fs_info, found_key.offset); - ptr = btrfs_item_ptr_offset(leaf, path->slots[0] - 1); + path->slots[0]--; + ptr = btrfs_item_ptr_offset(leaf, path->slots[0]); read_extent_buffer(leaf, bitmap_cursor, ptr, data_size); nr++; - path->slots[0]--; } else { ASSERT(0); } @@ -428,7 +427,7 @@ int convert_free_space_to_extents(struct btrfs_trans_handle *trans, btrfs_release_path(path); } - info = search_free_space_info(trans, block_group, path, 1); + info = btrfs_search_free_space_info(trans, block_group, path, 1); if (IS_ERR(info)) { ret = PTR_ERR(info); btrfs_abort_transaction(trans, ret); @@ -437,20 +436,22 @@ int convert_free_space_to_extents(struct btrfs_trans_handle *trans, leaf = path->nodes[0]; flags = btrfs_free_space_flags(leaf, info); flags &= ~BTRFS_FREE_SPACE_USING_BITMAPS; + block_group->using_free_space_bitmaps = false; + block_group->using_free_space_bitmaps_cached = true; btrfs_set_free_space_flags(leaf, info, flags); expected_extent_count = btrfs_free_space_extent_count(leaf, info); btrfs_release_path(path); - nrbits = block_group->length >> block_group->fs_info->sectorsize_bits; + nrbits = block_group->length >> fs_info->sectorsize_bits; start_bit = find_next_bit_le(bitmap, nrbits, 0); while (start_bit < nrbits) { end_bit = find_next_zero_bit_le(bitmap, nrbits, start_bit); ASSERT(start_bit < end_bit); - key.objectid = start + start_bit * block_group->fs_info->sectorsize; + key.objectid = start + start_bit * fs_info->sectorsize; key.type = BTRFS_FREE_SPACE_EXTENT_KEY; - key.offset = (end_bit - start_bit) * block_group->fs_info->sectorsize; + key.offset = (end_bit - start_bit) * fs_info->sectorsize; ret = btrfs_insert_empty_item(trans, root, path, &key, 0); if (ret) { @@ -493,11 +494,10 @@ static int update_free_space_extent_count(struct btrfs_trans_handle *trans, if (new_extents == 0) return 0; - info = search_free_space_info(trans, block_group, path, 1); - if (IS_ERR(info)) { - ret = PTR_ERR(info); - goto out; - } + info = btrfs_search_free_space_info(trans, block_group, path, 1); + if (IS_ERR(info)) + return PTR_ERR(info); + flags = btrfs_free_space_flags(path->nodes[0], info); extent_count = btrfs_free_space_extent_count(path->nodes[0], info); @@ -507,19 +507,18 @@ static int update_free_space_extent_count(struct btrfs_trans_handle *trans, if (!(flags & BTRFS_FREE_SPACE_USING_BITMAPS) && extent_count > block_group->bitmap_high_thresh) { - ret = convert_free_space_to_bitmaps(trans, block_group, path); + ret = btrfs_convert_free_space_to_bitmaps(trans, block_group, path); } else if ((flags & BTRFS_FREE_SPACE_USING_BITMAPS) && extent_count < block_group->bitmap_low_thresh) { - ret = convert_free_space_to_extents(trans, block_group, path); + ret = btrfs_convert_free_space_to_extents(trans, block_group, path); } -out: return ret; } EXPORT_FOR_TESTS -int free_space_test_bit(struct btrfs_block_group *block_group, - struct btrfs_path *path, u64 offset) +bool btrfs_free_space_test_bit(struct btrfs_block_group *block_group, + struct btrfs_path *path, u64 offset) { struct extent_buffer *leaf; struct btrfs_key key; @@ -537,13 +536,13 @@ int free_space_test_bit(struct btrfs_block_group *block_group, ptr = btrfs_item_ptr_offset(leaf, path->slots[0]); i = div_u64(offset - found_start, block_group->fs_info->sectorsize); - return !!extent_buffer_test_bit(leaf, ptr, i); + return extent_buffer_test_bit(leaf, ptr, i); } -static void free_space_set_bits(struct btrfs_trans_handle *trans, - struct btrfs_block_group *block_group, - struct btrfs_path *path, u64 *start, u64 *size, - int bit) +static void free_space_modify_bits(struct btrfs_trans_handle *trans, + struct btrfs_block_group *block_group, + struct btrfs_path *path, u64 *start, u64 *size, + bool set_bits) { struct btrfs_fs_info *fs_info = block_group->fs_info; struct extent_buffer *leaf; @@ -567,7 +566,7 @@ static void free_space_set_bits(struct btrfs_trans_handle *trans, ptr = btrfs_item_ptr_offset(leaf, path->slots[0]); first = (*start - found_start) >> fs_info->sectorsize_bits; last = (end - found_start) >> fs_info->sectorsize_bits; - if (bit) + if (set_bits) extent_buffer_bitmap_set(leaf, ptr, first, last - first); else extent_buffer_bitmap_clear(leaf, ptr, first, last - first); @@ -611,13 +610,14 @@ static int free_space_next_bitmap(struct btrfs_trans_handle *trans, static int modify_free_space_bitmap(struct btrfs_trans_handle *trans, struct btrfs_block_group *block_group, struct btrfs_path *path, - u64 start, u64 size, int remove) + u64 start, u64 size, bool remove) { struct btrfs_root *root = btrfs_free_space_root(block_group); struct btrfs_key key; u64 end = start + size; u64 cur_start, cur_size; - int prev_bit, next_bit; + bool prev_bit_set = false; + bool next_bit_set = false; int new_extents; int ret; @@ -634,16 +634,16 @@ static int modify_free_space_bitmap(struct btrfs_trans_handle *trans, ret = btrfs_search_prev_slot(trans, root, &key, path, 0, 1); if (ret) - goto out; + return ret; - prev_bit = free_space_test_bit(block_group, path, prev_block); + prev_bit_set = btrfs_free_space_test_bit(block_group, path, prev_block); /* The previous block may have been in the previous bitmap. */ btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]); if (start >= key.objectid + key.offset) { ret = free_space_next_bitmap(trans, root, path); if (ret) - goto out; + return ret; } } else { key.objectid = start; @@ -652,9 +652,7 @@ static int modify_free_space_bitmap(struct btrfs_trans_handle *trans, ret = btrfs_search_prev_slot(trans, root, &key, path, 0, 1); if (ret) - goto out; - - prev_bit = -1; + return ret; } /* @@ -664,13 +662,13 @@ static int modify_free_space_bitmap(struct btrfs_trans_handle *trans, cur_start = start; cur_size = size; while (1) { - free_space_set_bits(trans, block_group, path, &cur_start, &cur_size, - !remove); + free_space_modify_bits(trans, block_group, path, &cur_start, + &cur_size, !remove); if (cur_size == 0) break; ret = free_space_next_bitmap(trans, root, path); if (ret) - goto out; + return ret; } /* @@ -683,42 +681,36 @@ static int modify_free_space_bitmap(struct btrfs_trans_handle *trans, if (end >= key.objectid + key.offset) { ret = free_space_next_bitmap(trans, root, path); if (ret) - goto out; + return ret; } - next_bit = free_space_test_bit(block_group, path, end); - } else { - next_bit = -1; + next_bit_set = btrfs_free_space_test_bit(block_group, path, end); } if (remove) { new_extents = -1; - if (prev_bit == 1) { + if (prev_bit_set) { /* Leftover on the left. */ new_extents++; } - if (next_bit == 1) { + if (next_bit_set) { /* Leftover on the right. */ new_extents++; } } else { new_extents = 1; - if (prev_bit == 1) { + if (prev_bit_set) { /* Merging with neighbor on the left. */ new_extents--; } - if (next_bit == 1) { + if (next_bit_set) { /* Merging with neighbor on the right. */ new_extents--; } } btrfs_release_path(path); - ret = update_free_space_extent_count(trans, block_group, path, - new_extents); - -out: - return ret; + return update_free_space_extent_count(trans, block_group, path, new_extents); } static int remove_free_space_extent(struct btrfs_trans_handle *trans, @@ -739,7 +731,7 @@ static int remove_free_space_extent(struct btrfs_trans_handle *trans, ret = btrfs_search_prev_slot(trans, root, &key, path, -1, 1); if (ret) - goto out; + return ret; btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]); @@ -771,7 +763,7 @@ static int remove_free_space_extent(struct btrfs_trans_handle *trans, /* Delete the existing key (cases 1-4). */ ret = btrfs_del_item(trans, root, path); if (ret) - goto out; + return ret; /* Add a key for leftovers at the beginning (cases 3 and 4). */ if (start > found_start) { @@ -782,7 +774,7 @@ static int remove_free_space_extent(struct btrfs_trans_handle *trans, btrfs_release_path(path); ret = btrfs_insert_empty_item(trans, root, path, &key, 0); if (ret) - goto out; + return ret; new_extents++; } @@ -795,50 +787,58 @@ static int remove_free_space_extent(struct btrfs_trans_handle *trans, btrfs_release_path(path); ret = btrfs_insert_empty_item(trans, root, path, &key, 0); if (ret) - goto out; + return ret; new_extents++; } btrfs_release_path(path); - ret = update_free_space_extent_count(trans, block_group, path, - new_extents); - -out: - return ret; + return update_free_space_extent_count(trans, block_group, path, new_extents); } -EXPORT_FOR_TESTS -int __remove_from_free_space_tree(struct btrfs_trans_handle *trans, - struct btrfs_block_group *block_group, - struct btrfs_path *path, u64 start, u64 size) +static int using_bitmaps(struct btrfs_block_group *bg, struct btrfs_path *path) { struct btrfs_free_space_info *info; u32 flags; - int ret; - if (test_bit(BLOCK_GROUP_FLAG_NEEDS_FREE_SPACE, &block_group->runtime_flags)) { - ret = __add_block_group_free_space(trans, block_group, path); - if (ret) - return ret; - } + if (bg->using_free_space_bitmaps_cached) + return bg->using_free_space_bitmaps; - info = search_free_space_info(NULL, block_group, path, 0); + info = btrfs_search_free_space_info(NULL, bg, path, 0); if (IS_ERR(info)) return PTR_ERR(info); flags = btrfs_free_space_flags(path->nodes[0], info); btrfs_release_path(path); - if (flags & BTRFS_FREE_SPACE_USING_BITMAPS) { + bg->using_free_space_bitmaps = (flags & BTRFS_FREE_SPACE_USING_BITMAPS); + bg->using_free_space_bitmaps_cached = true; + + return bg->using_free_space_bitmaps; +} + +EXPORT_FOR_TESTS +int __btrfs_remove_from_free_space_tree(struct btrfs_trans_handle *trans, + struct btrfs_block_group *block_group, + struct btrfs_path *path, u64 start, u64 size) +{ + int ret; + + ret = __add_block_group_free_space(trans, block_group, path); + if (ret) + return ret; + + ret = using_bitmaps(block_group, path); + if (ret < 0) + return ret; + + if (ret) return modify_free_space_bitmap(trans, block_group, path, - start, size, 1); - } else { - return remove_free_space_extent(trans, block_group, path, - start, size); - } + start, size, true); + + return remove_free_space_extent(trans, block_group, path, start, size); } -int remove_from_free_space_tree(struct btrfs_trans_handle *trans, - u64 start, u64 size) +int btrfs_remove_from_free_space_tree(struct btrfs_trans_handle *trans, + u64 start, u64 size) { struct btrfs_block_group *block_group; struct btrfs_path *path; @@ -863,8 +863,7 @@ int remove_from_free_space_tree(struct btrfs_trans_handle *trans, } mutex_lock(&block_group->free_space_lock); - ret = __remove_from_free_space_tree(trans, block_group, path, start, - size); + ret = __btrfs_remove_from_free_space_tree(trans, block_group, path, start, size); mutex_unlock(&block_group->free_space_lock); if (ret) btrfs_abort_transaction(trans, ret); @@ -918,7 +917,7 @@ static int add_free_space_extent(struct btrfs_trans_handle *trans, ret = btrfs_search_prev_slot(trans, root, &key, path, -1, 1); if (ret) - goto out; + return ret; btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]); @@ -941,7 +940,7 @@ static int add_free_space_extent(struct btrfs_trans_handle *trans, if (found_end == start) { ret = btrfs_del_item(trans, root, path); if (ret) - goto out; + return ret; new_key.objectid = found_start; new_key.offset += key.offset; new_extents--; @@ -958,7 +957,7 @@ right: ret = btrfs_search_prev_slot(trans, root, &key, path, -1, 1); if (ret) - goto out; + return ret; btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]); @@ -982,7 +981,7 @@ right: if (found_start == end) { ret = btrfs_del_item(trans, root, path); if (ret) - goto out; + return ret; new_key.offset += key.offset; new_extents--; } @@ -992,48 +991,36 @@ insert: /* Insert the new key (cases 1-4). */ ret = btrfs_insert_empty_item(trans, root, path, &new_key, 0); if (ret) - goto out; + return ret; btrfs_release_path(path); - ret = update_free_space_extent_count(trans, block_group, path, - new_extents); - -out: - return ret; + return update_free_space_extent_count(trans, block_group, path, new_extents); } EXPORT_FOR_TESTS -int __add_to_free_space_tree(struct btrfs_trans_handle *trans, - struct btrfs_block_group *block_group, - struct btrfs_path *path, u64 start, u64 size) +int __btrfs_add_to_free_space_tree(struct btrfs_trans_handle *trans, + struct btrfs_block_group *block_group, + struct btrfs_path *path, u64 start, u64 size) { - struct btrfs_free_space_info *info; - u32 flags; int ret; - if (test_bit(BLOCK_GROUP_FLAG_NEEDS_FREE_SPACE, &block_group->runtime_flags)) { - ret = __add_block_group_free_space(trans, block_group, path); - if (ret) - return ret; - } + ret = __add_block_group_free_space(trans, block_group, path); + if (ret) + return ret; - info = search_free_space_info(NULL, block_group, path, 0); - if (IS_ERR(info)) - return PTR_ERR(info); - flags = btrfs_free_space_flags(path->nodes[0], info); - btrfs_release_path(path); + ret = using_bitmaps(block_group, path); + if (ret < 0) + return ret; - if (flags & BTRFS_FREE_SPACE_USING_BITMAPS) { + if (ret) return modify_free_space_bitmap(trans, block_group, path, - start, size, 0); - } else { - return add_free_space_extent(trans, block_group, path, start, - size); - } + start, size, false); + + return add_free_space_extent(trans, block_group, path, start, size); } -int add_to_free_space_tree(struct btrfs_trans_handle *trans, - u64 start, u64 size) +int btrfs_add_to_free_space_tree(struct btrfs_trans_handle *trans, + u64 start, u64 size) { struct btrfs_block_group *block_group; struct btrfs_path *path; @@ -1058,7 +1045,7 @@ int add_to_free_space_tree(struct btrfs_trans_handle *trans, } mutex_lock(&block_group->free_space_lock); - ret = __add_to_free_space_tree(trans, block_group, path, start, size); + ret = __btrfs_add_to_free_space_tree(trans, block_group, path, start, size); mutex_unlock(&block_group->free_space_lock); if (ret) btrfs_abort_transaction(trans, ret); @@ -1138,11 +1125,11 @@ static int populate_free_space_tree(struct btrfs_trans_handle *trans, break; if (start < key.objectid) { - ret = __add_to_free_space_tree(trans, - block_group, - path2, start, - key.objectid - - start); + ret = __btrfs_add_to_free_space_tree(trans, + block_group, + path2, start, + key.objectid - + start); if (ret) goto out_locked; } @@ -1161,8 +1148,8 @@ static int populate_free_space_tree(struct btrfs_trans_handle *trans, goto out_locked; } if (start < end) { - ret = __add_to_free_space_tree(trans, block_group, path2, - start, end - start); + ret = __btrfs_add_to_free_space_tree(trans, block_group, path2, + start, end - start); if (ret) goto out_locked; } @@ -1403,9 +1390,12 @@ static int __add_block_group_free_space(struct btrfs_trans_handle *trans, struct btrfs_block_group *block_group, struct btrfs_path *path) { + bool own_path = false; int ret; - clear_bit(BLOCK_GROUP_FLAG_NEEDS_FREE_SPACE, &block_group->runtime_flags); + if (!test_and_clear_bit(BLOCK_GROUP_FLAG_NEEDS_FREE_SPACE, + &block_group->runtime_flags)) + return 0; /* * While rebuilding the free space tree we may allocate new metadata @@ -1430,47 +1420,49 @@ static int __add_block_group_free_space(struct btrfs_trans_handle *trans, */ set_bit(BLOCK_GROUP_FLAG_FREE_SPACE_ADDED, &block_group->runtime_flags); + if (!path) { + path = btrfs_alloc_path(); + if (!path) { + btrfs_abort_transaction(trans, -ENOMEM); + return -ENOMEM; + } + own_path = true; + } + ret = add_new_free_space_info(trans, block_group, path); + if (ret) { + btrfs_abort_transaction(trans, ret); + goto out; + } + + ret = __btrfs_add_to_free_space_tree(trans, block_group, path, + block_group->start, block_group->length); if (ret) - return ret; + btrfs_abort_transaction(trans, ret); - return __add_to_free_space_tree(trans, block_group, path, - block_group->start, - block_group->length); +out: + if (own_path) + btrfs_free_path(path); + + return ret; } -int add_block_group_free_space(struct btrfs_trans_handle *trans, - struct btrfs_block_group *block_group) +int btrfs_add_block_group_free_space(struct btrfs_trans_handle *trans, + struct btrfs_block_group *block_group) { - struct btrfs_fs_info *fs_info = trans->fs_info; - struct btrfs_path *path = NULL; - int ret = 0; + int ret; - if (!btrfs_fs_compat_ro(fs_info, FREE_SPACE_TREE)) + if (!btrfs_fs_compat_ro(trans->fs_info, FREE_SPACE_TREE)) return 0; mutex_lock(&block_group->free_space_lock); - if (!test_bit(BLOCK_GROUP_FLAG_NEEDS_FREE_SPACE, &block_group->runtime_flags)) - goto out; - - path = btrfs_alloc_path(); - if (!path) { - ret = -ENOMEM; - goto out; - } - - ret = __add_block_group_free_space(trans, block_group, path); - -out: - btrfs_free_path(path); + ret = __add_block_group_free_space(trans, block_group, NULL); mutex_unlock(&block_group->free_space_lock); - if (ret) - btrfs_abort_transaction(trans, ret); return ret; } -int remove_block_group_free_space(struct btrfs_trans_handle *trans, - struct btrfs_block_group *block_group) +int btrfs_remove_block_group_free_space(struct btrfs_trans_handle *trans, + struct btrfs_block_group *block_group) { struct btrfs_root *root = btrfs_free_space_root(block_group); struct btrfs_path *path; @@ -1491,6 +1483,7 @@ int remove_block_group_free_space(struct btrfs_trans_handle *trans, path = btrfs_alloc_path(); if (!path) { ret = -ENOMEM; + btrfs_abort_transaction(trans, ret); goto out; } @@ -1503,8 +1496,10 @@ int remove_block_group_free_space(struct btrfs_trans_handle *trans, while (!done) { ret = btrfs_search_prev_slot(trans, root, &key, path, -1, 1); - if (ret) + if (ret) { + btrfs_abort_transaction(trans, ret); goto out; + } leaf = path->nodes[0]; nr = 0; @@ -1532,16 +1527,16 @@ int remove_block_group_free_space(struct btrfs_trans_handle *trans, } ret = btrfs_del_items(trans, root, path, path->slots[0], nr); - if (ret) + if (ret) { + btrfs_abort_transaction(trans, ret); goto out; + } btrfs_release_path(path); } ret = 0; out: btrfs_free_path(path); - if (ret) - btrfs_abort_transaction(trans, ret); return ret; } @@ -1553,7 +1548,7 @@ static int load_free_space_bitmaps(struct btrfs_caching_control *caching_ctl, struct btrfs_fs_info *fs_info; struct btrfs_root *root; struct btrfs_key key; - int prev_bit = 0, bit; + bool prev_bit_set = false; /* Initialize to silence GCC. */ u64 extent_start = 0; u64 end, offset; @@ -1570,7 +1565,7 @@ static int load_free_space_bitmaps(struct btrfs_caching_control *caching_ctl, while (1) { ret = btrfs_next_item(root, path); if (ret < 0) - goto out; + return ret; if (ret) break; @@ -1584,10 +1579,12 @@ static int load_free_space_bitmaps(struct btrfs_caching_control *caching_ctl, offset = key.objectid; while (offset < key.objectid + key.offset) { - bit = free_space_test_bit(block_group, path, offset); - if (prev_bit == 0 && bit == 1) { + bool bit_set; + + bit_set = btrfs_free_space_test_bit(block_group, path, offset); + if (!prev_bit_set && bit_set) { extent_start = offset; - } else if (prev_bit == 1 && bit == 0) { + } else if (prev_bit_set && !bit_set) { u64 space_added; ret = btrfs_add_new_free_space(block_group, @@ -1595,7 +1592,7 @@ static int load_free_space_bitmaps(struct btrfs_caching_control *caching_ctl, offset, &space_added); if (ret) - goto out; + return ret; total_found += space_added; if (total_found > CACHING_CTL_WAKE_UP) { total_found = 0; @@ -1603,14 +1600,14 @@ static int load_free_space_bitmaps(struct btrfs_caching_control *caching_ctl, } extent_count++; } - prev_bit = bit; + prev_bit_set = bit_set; offset += fs_info->sectorsize; } } - if (prev_bit == 1) { + if (prev_bit_set) { ret = btrfs_add_new_free_space(block_group, extent_start, end, NULL); if (ret) - goto out; + return ret; extent_count++; } @@ -1620,13 +1617,10 @@ static int load_free_space_bitmaps(struct btrfs_caching_control *caching_ctl, block_group->start, extent_count, expected_extent_count); DEBUG_WARN(); - ret = -EIO; - goto out; + return -EIO; } - ret = 0; -out: - return ret; + return 0; } static int load_free_space_extents(struct btrfs_caching_control *caching_ctl, @@ -1653,7 +1647,7 @@ static int load_free_space_extents(struct btrfs_caching_control *caching_ctl, ret = btrfs_next_item(root, path); if (ret < 0) - goto out; + return ret; if (ret) break; @@ -1669,7 +1663,7 @@ static int load_free_space_extents(struct btrfs_caching_control *caching_ctl, key.objectid + key.offset, &space_added); if (ret) - goto out; + return ret; total_found += space_added; if (total_found > CACHING_CTL_WAKE_UP) { total_found = 0; @@ -1684,16 +1678,13 @@ static int load_free_space_extents(struct btrfs_caching_control *caching_ctl, block_group->start, extent_count, expected_extent_count); DEBUG_WARN(); - ret = -EIO; - goto out; + return -EIO; } - ret = 0; -out: - return ret; + return 0; } -int load_free_space_tree(struct btrfs_caching_control *caching_ctl) +int btrfs_load_free_space_tree(struct btrfs_caching_control *caching_ctl) { struct btrfs_block_group *block_group; struct btrfs_free_space_info *info; @@ -1714,7 +1705,7 @@ int load_free_space_tree(struct btrfs_caching_control *caching_ctl) path->search_commit_root = 1; path->reada = READA_FORWARD; - info = search_free_space_info(NULL, block_group, path, 0); + info = btrfs_search_free_space_info(NULL, block_group, path, 0); if (IS_ERR(info)) return PTR_ERR(info); diff --git a/fs/btrfs/free-space-tree.h b/fs/btrfs/free-space-tree.h index e6c6d6f4f221..3d9a5d4477fc 100644 --- a/fs/btrfs/free-space-tree.h +++ b/fs/btrfs/free-space-tree.h @@ -22,39 +22,39 @@ struct btrfs_trans_handle; #define BTRFS_FREE_SPACE_BITMAP_SIZE 256 #define BTRFS_FREE_SPACE_BITMAP_BITS (BTRFS_FREE_SPACE_BITMAP_SIZE * BITS_PER_BYTE) -void set_free_space_tree_thresholds(struct btrfs_block_group *block_group); +void btrfs_set_free_space_tree_thresholds(struct btrfs_block_group *block_group); int btrfs_create_free_space_tree(struct btrfs_fs_info *fs_info); int btrfs_delete_free_space_tree(struct btrfs_fs_info *fs_info); int btrfs_rebuild_free_space_tree(struct btrfs_fs_info *fs_info); -int load_free_space_tree(struct btrfs_caching_control *caching_ctl); -int add_block_group_free_space(struct btrfs_trans_handle *trans, - struct btrfs_block_group *block_group); -int remove_block_group_free_space(struct btrfs_trans_handle *trans, - struct btrfs_block_group *block_group); -int add_to_free_space_tree(struct btrfs_trans_handle *trans, - u64 start, u64 size); -int remove_from_free_space_tree(struct btrfs_trans_handle *trans, - u64 start, u64 size); +int btrfs_load_free_space_tree(struct btrfs_caching_control *caching_ctl); +int btrfs_add_block_group_free_space(struct btrfs_trans_handle *trans, + struct btrfs_block_group *block_group); +int btrfs_remove_block_group_free_space(struct btrfs_trans_handle *trans, + struct btrfs_block_group *block_group); +int btrfs_add_to_free_space_tree(struct btrfs_trans_handle *trans, + u64 start, u64 size); +int btrfs_remove_from_free_space_tree(struct btrfs_trans_handle *trans, + u64 start, u64 size); #ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS struct btrfs_free_space_info * -search_free_space_info(struct btrfs_trans_handle *trans, - struct btrfs_block_group *block_group, - struct btrfs_path *path, int cow); -int __add_to_free_space_tree(struct btrfs_trans_handle *trans, +btrfs_search_free_space_info(struct btrfs_trans_handle *trans, struct btrfs_block_group *block_group, - struct btrfs_path *path, u64 start, u64 size); -int __remove_from_free_space_tree(struct btrfs_trans_handle *trans, - struct btrfs_block_group *block_group, - struct btrfs_path *path, u64 start, u64 size); -int convert_free_space_to_bitmaps(struct btrfs_trans_handle *trans, - struct btrfs_block_group *block_group, - struct btrfs_path *path); -int convert_free_space_to_extents(struct btrfs_trans_handle *trans, - struct btrfs_block_group *block_group, - struct btrfs_path *path); -int free_space_test_bit(struct btrfs_block_group *block_group, - struct btrfs_path *path, u64 offset); + struct btrfs_path *path, int cow); +int __btrfs_add_to_free_space_tree(struct btrfs_trans_handle *trans, + struct btrfs_block_group *block_group, + struct btrfs_path *path, u64 start, u64 size); +int __btrfs_remove_from_free_space_tree(struct btrfs_trans_handle *trans, + struct btrfs_block_group *block_group, + struct btrfs_path *path, u64 start, u64 size); +int btrfs_convert_free_space_to_bitmaps(struct btrfs_trans_handle *trans, + struct btrfs_block_group *block_group, + struct btrfs_path *path); +int btrfs_convert_free_space_to_extents(struct btrfs_trans_handle *trans, + struct btrfs_block_group *block_group, + struct btrfs_path *path); +bool btrfs_free_space_test_bit(struct btrfs_block_group *block_group, + struct btrfs_path *path, u64 offset); #endif #endif diff --git a/fs/btrfs/fs.h b/fs/btrfs/fs.h index 4394de12a767..8cc07cc70b12 100644 --- a/fs/btrfs/fs.h +++ b/fs/btrfs/fs.h @@ -420,6 +420,8 @@ struct btrfs_commit_stats { u64 last_commit_dur; /* The total commit duration in ns */ u64 total_commit_dur; + /* Start of the last critical section in ns. */ + u64 critical_section_start_time; }; struct btrfs_fs_info { @@ -713,8 +715,6 @@ struct btrfs_fs_info { u32 data_chunk_allocations; u32 metadata_ratio; - void *bdev_holder; - /* Private scrub information */ struct mutex scrub_lock; atomic_t scrubs_running; @@ -739,12 +739,6 @@ struct btrfs_fs_info { spinlock_t qgroup_lock; /* - * Used to avoid frequently calling ulist_alloc()/ulist_free() - * when doing qgroup accounting, it must be protected by qgroup_lock. - */ - struct ulist *qgroup_ulist; - - /* * Protect user change for quota operations. If a transaction is needed, * it must be started before locking this lock. */ @@ -779,7 +773,7 @@ struct btrfs_fs_info { struct btrfs_delayed_root *delayed_root; - /* Entries are eb->start / sectorsize */ + /* Entries are eb->start >> nodesize_bits */ struct xarray buffer_tree; /* Next backup root to be overwritten */ @@ -811,6 +805,7 @@ struct btrfs_fs_info { /* Cached block sizes */ u32 nodesize; + u32 nodesize_bits; u32 sectorsize; /* ilog2 of sectorsize, use to avoid 64bit division */ u32 sectorsize_bits; diff --git a/fs/btrfs/inode-item.c b/fs/btrfs/inode-item.c index a61c3540d67b..f06cf701ae5a 100644 --- a/fs/btrfs/inode-item.c +++ b/fs/btrfs/inode-item.c @@ -78,13 +78,10 @@ struct btrfs_inode_extref *btrfs_find_name_in_ext_backref( } /* Returns NULL if no extref found */ -struct btrfs_inode_extref * -btrfs_lookup_inode_extref(struct btrfs_trans_handle *trans, - struct btrfs_root *root, - struct btrfs_path *path, - const struct fscrypt_str *name, - u64 inode_objectid, u64 ref_objectid, int ins_len, - int cow) +struct btrfs_inode_extref *btrfs_lookup_inode_extref(struct btrfs_root *root, + struct btrfs_path *path, + const struct fscrypt_str *name, + u64 inode_objectid, u64 ref_objectid) { int ret; struct btrfs_key key; @@ -93,7 +90,7 @@ btrfs_lookup_inode_extref(struct btrfs_trans_handle *trans, key.type = BTRFS_INODE_EXTREF_KEY; key.offset = btrfs_extref_hash(ref_objectid, name->name, name->len); - ret = btrfs_search_slot(trans, root, &key, path, ins_len, cow); + ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); if (ret < 0) return ERR_PTR(ret); if (ret > 0) @@ -720,13 +717,12 @@ delete: } out: if (ret >= 0 && pending_del_nr) { - int err; + int ret2; - err = btrfs_del_items(trans, root, path, pending_del_slot, - pending_del_nr); - if (err) { - btrfs_abort_transaction(trans, err); - ret = err; + ret2 = btrfs_del_items(trans, root, path, pending_del_slot, pending_del_nr); + if (ret2) { + btrfs_abort_transaction(trans, ret2); + ret = ret2; } } diff --git a/fs/btrfs/inode-item.h b/fs/btrfs/inode-item.h index c11b97fdccc4..6d9f5ad20646 100644 --- a/fs/btrfs/inode-item.h +++ b/fs/btrfs/inode-item.h @@ -101,13 +101,10 @@ int btrfs_lookup_inode(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_path *path, struct btrfs_key *location, int mod); -struct btrfs_inode_extref *btrfs_lookup_inode_extref( - struct btrfs_trans_handle *trans, - struct btrfs_root *root, - struct btrfs_path *path, - const struct fscrypt_str *name, - u64 inode_objectid, u64 ref_objectid, int ins_len, - int cow); +struct btrfs_inode_extref *btrfs_lookup_inode_extref(struct btrfs_root *root, + struct btrfs_path *path, + const struct fscrypt_str *name, + u64 inode_objectid, u64 ref_objectid); struct btrfs_inode_ref *btrfs_find_name_in_backref(const struct extent_buffer *leaf, int slot, diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index fc66872b4c74..b77dd22b8cdb 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -308,7 +308,7 @@ static void __cold btrfs_print_data_csum_error(struct btrfs_inode *inode, const u32 csum_size = root->fs_info->csum_size; /* For data reloc tree, it's better to do a backref lookup instead. */ - if (btrfs_root_id(root) == BTRFS_DATA_RELOC_TREE_OBJECTID) + if (btrfs_is_data_reloc_root(root)) return print_data_reloc_error(inode, logical_start, csum, csum_expected, mirror_num); @@ -395,8 +395,8 @@ void btrfs_inode_unlock(struct btrfs_inode *inode, unsigned int ilock_flags) static inline void btrfs_cleanup_ordered_extents(struct btrfs_inode *inode, u64 offset, u64 bytes) { - unsigned long index = offset >> PAGE_SHIFT; - unsigned long end_index = (offset + bytes - 1) >> PAGE_SHIFT; + pgoff_t index = offset >> PAGE_SHIFT; + const pgoff_t end_index = (offset + bytes - 1) >> PAGE_SHIFT; struct folio *folio; while (index <= end_index) { @@ -423,18 +423,18 @@ static int btrfs_dirty_inode(struct btrfs_inode *inode); static int btrfs_init_inode_security(struct btrfs_trans_handle *trans, struct btrfs_new_inode_args *args) { - int err; + int ret; if (args->default_acl) { - err = __btrfs_set_acl(trans, args->inode, args->default_acl, + ret = __btrfs_set_acl(trans, args->inode, args->default_acl, ACL_TYPE_DEFAULT); - if (err) - return err; + if (ret) + return ret; } if (args->acl) { - err = __btrfs_set_acl(trans, args->inode, args->acl, ACL_TYPE_ACCESS); - if (err) - return err; + ret = __btrfs_set_acl(trans, args->inode, args->acl, ACL_TYPE_ACCESS); + if (ret) + return ret; } if (!args->default_acl && !args->acl) cache_no_acl(args->inode); @@ -781,12 +781,15 @@ static inline int inode_need_compress(struct btrfs_inode *inode, u64 start, return 0; } + /* Defrag ioctl takes precedence over mount options and properties. */ + if (inode->defrag_compress == BTRFS_DEFRAG_DONT_COMPRESS) + return 0; + if (BTRFS_COMPRESS_NONE < inode->defrag_compress && + inode->defrag_compress < BTRFS_NR_COMPRESS_TYPES) + return 1; /* force compress */ if (btrfs_test_opt(fs_info, FORCE_COMPRESS)) return 1; - /* defrag ioctl */ - if (inode->defrag_compress) - return 1; /* bad compression ratios */ if (inode->flags & BTRFS_INODE_NOCOMPRESS) return 0; @@ -808,12 +811,11 @@ static inline void inode_should_defrag(struct btrfs_inode *inode, static int extent_range_clear_dirty_for_io(struct btrfs_inode *inode, u64 start, u64 end) { - unsigned long end_index = end >> PAGE_SHIFT; + const pgoff_t end_index = end >> PAGE_SHIFT; struct folio *folio; int ret = 0; - for (unsigned long index = start >> PAGE_SHIFT; - index <= end_index; index++) { + for (pgoff_t index = start >> PAGE_SHIFT; index <= end_index; index++) { folio = filemap_get_folio(inode->vfs_inode.i_mapping, index); if (IS_ERR(folio)) { if (!ret) @@ -943,7 +945,7 @@ again: goto cleanup_and_bail_uncompressed; } - if (inode->defrag_compress) { + if (0 < inode->defrag_compress && inode->defrag_compress < BTRFS_NR_COMPRESS_TYPES) { compress_type = inode->defrag_compress; compress_level = inode->defrag_compress_level; } else if (inode->prop_compress) { @@ -1755,7 +1757,8 @@ static int fallback_to_cow(struct btrfs_inode *inode, spin_unlock(&sinfo->lock); if (count > 0) - btrfs_clear_extent_bits(io_tree, start, end, EXTENT_NORESERVE); + btrfs_clear_extent_bit(io_tree, start, end, EXTENT_NORESERVE, + &cached_state); } btrfs_unlock_extent(io_tree, start, end, &cached_state); @@ -2328,8 +2331,7 @@ int btrfs_run_delalloc_range(struct btrfs_inode *inode, struct folio *locked_fol * The range must cover part of the @locked_folio, or a return of 1 * can confuse the caller. */ - ASSERT(!(end <= folio_pos(locked_folio) || - start >= folio_pos(locked_folio) + folio_size(locked_folio))); + ASSERT(!(end <= folio_pos(locked_folio) || start >= folio_end(locked_folio))); if (should_nocow(inode, start, end)) { ret = run_delalloc_nocow(inode, locked_folio, start, end); @@ -2737,7 +2739,7 @@ static void btrfs_writepage_fixup_worker(struct btrfs_work *work) struct btrfs_inode *inode = fixup->inode; struct btrfs_fs_info *fs_info = inode->root->fs_info; u64 page_start = folio_pos(folio); - u64 page_end = folio_pos(folio) + folio_size(folio) - 1; + u64 page_end = folio_end(folio) - 1; int ret = 0; bool free_delalloc_space = true; @@ -2881,7 +2883,7 @@ int btrfs_writepage_cow_fixup(struct folio *folio) DEBUG_WARN(); btrfs_err_rl(fs_info, "root %lld ino %llu folio %llu is marked dirty without notifying the fs", - BTRFS_I(inode)->root->root_key.objectid, + btrfs_root_id(BTRFS_I(inode)->root), btrfs_ino(BTRFS_I(inode)), folio_pos(folio)); return -EUCLEAN; @@ -3375,8 +3377,8 @@ bool btrfs_data_csum_ok(struct btrfs_bio *bbio, struct btrfs_device *dev, btrfs_test_range_bit(&inode->io_tree, file_offset, end, EXTENT_NODATASUM, NULL)) { /* Skip the range without csum for data reloc inode */ - btrfs_clear_extent_bits(&inode->io_tree, file_offset, end, - EXTENT_NODATASUM); + btrfs_clear_extent_bit(&inode->io_tree, file_offset, end, + EXTENT_NODATASUM, NULL); return true; } @@ -3946,6 +3948,7 @@ static int btrfs_read_locked_inode(struct btrfs_inode *inode, struct btrfs_path btrfs_inode_split_flags(btrfs_inode_flags(leaf, inode_item), &inode->flags, &inode->ro_flags); btrfs_update_inode_mapping_flags(inode); + btrfs_set_inode_mapping_order(inode); cache_index: /* @@ -4078,45 +4081,35 @@ static void fill_inode_item(struct btrfs_trans_handle *trans, struct btrfs_inode_item *item, struct inode *inode) { - struct btrfs_map_token token; u64 flags; - btrfs_init_map_token(&token, leaf); - - btrfs_set_token_inode_uid(&token, item, i_uid_read(inode)); - btrfs_set_token_inode_gid(&token, item, i_gid_read(inode)); - btrfs_set_token_inode_size(&token, item, BTRFS_I(inode)->disk_i_size); - btrfs_set_token_inode_mode(&token, item, inode->i_mode); - btrfs_set_token_inode_nlink(&token, item, inode->i_nlink); - - btrfs_set_token_timespec_sec(&token, &item->atime, - inode_get_atime_sec(inode)); - btrfs_set_token_timespec_nsec(&token, &item->atime, - inode_get_atime_nsec(inode)); - - btrfs_set_token_timespec_sec(&token, &item->mtime, - inode_get_mtime_sec(inode)); - btrfs_set_token_timespec_nsec(&token, &item->mtime, - inode_get_mtime_nsec(inode)); - - btrfs_set_token_timespec_sec(&token, &item->ctime, - inode_get_ctime_sec(inode)); - btrfs_set_token_timespec_nsec(&token, &item->ctime, - inode_get_ctime_nsec(inode)); - - btrfs_set_token_timespec_sec(&token, &item->otime, BTRFS_I(inode)->i_otime_sec); - btrfs_set_token_timespec_nsec(&token, &item->otime, BTRFS_I(inode)->i_otime_nsec); - - btrfs_set_token_inode_nbytes(&token, item, inode_get_bytes(inode)); - btrfs_set_token_inode_generation(&token, item, - BTRFS_I(inode)->generation); - btrfs_set_token_inode_sequence(&token, item, inode_peek_iversion(inode)); - btrfs_set_token_inode_transid(&token, item, trans->transid); - btrfs_set_token_inode_rdev(&token, item, inode->i_rdev); + btrfs_set_inode_uid(leaf, item, i_uid_read(inode)); + btrfs_set_inode_gid(leaf, item, i_gid_read(inode)); + btrfs_set_inode_size(leaf, item, BTRFS_I(inode)->disk_i_size); + btrfs_set_inode_mode(leaf, item, inode->i_mode); + btrfs_set_inode_nlink(leaf, item, inode->i_nlink); + + btrfs_set_timespec_sec(leaf, &item->atime, inode_get_atime_sec(inode)); + btrfs_set_timespec_nsec(leaf, &item->atime, inode_get_atime_nsec(inode)); + + btrfs_set_timespec_sec(leaf, &item->mtime, inode_get_mtime_sec(inode)); + btrfs_set_timespec_nsec(leaf, &item->mtime, inode_get_mtime_nsec(inode)); + + btrfs_set_timespec_sec(leaf, &item->ctime, inode_get_ctime_sec(inode)); + btrfs_set_timespec_nsec(leaf, &item->ctime, inode_get_ctime_nsec(inode)); + + btrfs_set_timespec_sec(leaf, &item->otime, BTRFS_I(inode)->i_otime_sec); + btrfs_set_timespec_nsec(leaf, &item->otime, BTRFS_I(inode)->i_otime_nsec); + + btrfs_set_inode_nbytes(leaf, item, inode_get_bytes(inode)); + btrfs_set_inode_generation(leaf, item, BTRFS_I(inode)->generation); + btrfs_set_inode_sequence(leaf, item, inode_peek_iversion(inode)); + btrfs_set_inode_transid(leaf, item, trans->transid); + btrfs_set_inode_rdev(leaf, item, inode->i_rdev); flags = btrfs_inode_combine_flags(BTRFS_I(inode)->flags, BTRFS_I(inode)->ro_flags); - btrfs_set_token_inode_flags(&token, item, flags); - btrfs_set_token_inode_block_group(&token, item, 0); + btrfs_set_inode_flags(leaf, item, flags); + btrfs_set_inode_block_group(leaf, item, 0); } /* @@ -4215,20 +4208,22 @@ static int __btrfs_unlink_inode(struct btrfs_trans_handle *trans, u64 dir_ino = btrfs_ino(dir); path = btrfs_alloc_path(); - if (!path) { - ret = -ENOMEM; - goto out; - } + if (!path) + return -ENOMEM; di = btrfs_lookup_dir_item(trans, root, path, dir_ino, name, -1); if (IS_ERR_OR_NULL(di)) { - ret = di ? PTR_ERR(di) : -ENOENT; - goto err; + btrfs_free_path(path); + return di ? PTR_ERR(di) : -ENOENT; } ret = btrfs_delete_one_dir_name(trans, root, path, di); + /* + * Down the call chains below we'll also need to allocate a path, so no + * need to hold on to this one for longer than necessary. + */ + btrfs_free_path(path); if (ret) - goto err; - btrfs_release_path(path); + return ret; /* * If we don't have dir index, we have to get it by looking up @@ -4254,7 +4249,7 @@ static int __btrfs_unlink_inode(struct btrfs_trans_handle *trans, "failed to delete reference to %.*s, root %llu inode %llu parent %llu", name->len, name->name, btrfs_root_id(root), ino, dir_ino); btrfs_abort_transaction(trans, ret); - goto err; + return ret; } skip_backref: if (rename_ctx) @@ -4263,7 +4258,7 @@ skip_backref: ret = btrfs_delete_delayed_dir_index(trans, dir, index); if (ret) { btrfs_abort_transaction(trans, ret); - goto err; + return ret; } /* @@ -4287,19 +4282,14 @@ skip_backref: * holding. */ btrfs_run_delayed_iput(fs_info, inode); -err: - btrfs_free_path(path); - if (ret) - goto out; btrfs_i_size_write(dir, dir->vfs_inode.i_size - name->len * 2); inode_inc_iversion(&inode->vfs_inode); inode_set_ctime_current(&inode->vfs_inode); inode_inc_iversion(&dir->vfs_inode); inode_set_mtime_to_ts(&dir->vfs_inode, inode_set_ctime_current(&dir->vfs_inode)); - ret = btrfs_update_inode(trans, dir); -out: - return ret; + + return btrfs_update_inode(trans, dir); } int btrfs_unlink_inode(struct btrfs_trans_handle *trans, @@ -4704,32 +4694,33 @@ out_up_write: return ret; } -static int btrfs_rmdir(struct inode *dir, struct dentry *dentry) +static int btrfs_rmdir(struct inode *vfs_dir, struct dentry *dentry) { - struct inode *inode = d_inode(dentry); - struct btrfs_fs_info *fs_info = BTRFS_I(inode)->root->fs_info; + struct btrfs_inode *dir = BTRFS_I(vfs_dir); + struct btrfs_inode *inode = BTRFS_I(d_inode(dentry)); + struct btrfs_fs_info *fs_info = inode->root->fs_info; int ret = 0; struct btrfs_trans_handle *trans; struct fscrypt_name fname; - if (inode->i_size > BTRFS_EMPTY_DIR_SIZE) + if (inode->vfs_inode.i_size > BTRFS_EMPTY_DIR_SIZE) return -ENOTEMPTY; - if (btrfs_ino(BTRFS_I(inode)) == BTRFS_FIRST_FREE_OBJECTID) { + if (btrfs_ino(inode) == BTRFS_FIRST_FREE_OBJECTID) { if (unlikely(btrfs_fs_incompat(fs_info, EXTENT_TREE_V2))) { btrfs_err(fs_info, "extent tree v2 doesn't support snapshot deletion yet"); return -EOPNOTSUPP; } - return btrfs_delete_subvolume(BTRFS_I(dir), dentry); + return btrfs_delete_subvolume(dir, dentry); } - ret = fscrypt_setup_filename(dir, &dentry->d_name, 1, &fname); + ret = fscrypt_setup_filename(vfs_dir, &dentry->d_name, 1, &fname); if (ret) return ret; /* This needs to handle no-key deletions later on */ - trans = __unlink_start_trans(BTRFS_I(dir)); + trans = __unlink_start_trans(dir); if (IS_ERR(trans)) { ret = PTR_ERR(trans); goto out_notrans; @@ -4749,23 +4740,22 @@ static int btrfs_rmdir(struct inode *dir, struct dentry *dentry) * This is because we can't unlink other roots when replaying the dir * deletes for directory foo. */ - if (BTRFS_I(inode)->last_unlink_trans >= trans->transid) - btrfs_record_snapshot_destroy(trans, BTRFS_I(dir)); + if (inode->last_unlink_trans >= trans->transid) + btrfs_record_snapshot_destroy(trans, dir); - if (unlikely(btrfs_ino(BTRFS_I(inode)) == BTRFS_EMPTY_SUBVOL_DIR_OBJECTID)) { - ret = btrfs_unlink_subvol(trans, BTRFS_I(dir), dentry); + if (unlikely(btrfs_ino(inode) == BTRFS_EMPTY_SUBVOL_DIR_OBJECTID)) { + ret = btrfs_unlink_subvol(trans, dir, dentry); goto out; } - ret = btrfs_orphan_add(trans, BTRFS_I(inode)); + ret = btrfs_orphan_add(trans, inode); if (ret) goto out; /* now the directory is empty */ - ret = btrfs_unlink_inode(trans, BTRFS_I(dir), BTRFS_I(d_inode(dentry)), - &fname.disk_name); + ret = btrfs_unlink_inode(trans, dir, inode, &fname.disk_name); if (!ret) - btrfs_i_size_write(BTRFS_I(inode), 0); + btrfs_i_size_write(inode, 0); out: btrfs_end_transaction(trans); out_notrans: @@ -4821,9 +4811,9 @@ again: */ zero_start = max_t(u64, folio_pos(folio), start); - zero_end = folio_pos(folio) + folio_size(folio) - 1; + zero_end = folio_end(folio); folio_zero_range(folio, zero_start - folio_pos(folio), - zero_end - zero_start + 1); + zero_end - zero_start); out_unlock: folio_unlock(folio); @@ -4861,7 +4851,6 @@ int btrfs_truncate_block(struct btrfs_inode *inode, u64 offset, u64 start, u64 e pgoff_t index = (offset >> PAGE_SHIFT); struct folio *folio; gfp_t mask = btrfs_alloc_write_mask(mapping); - size_t write_bytes = blocksize; int ret = 0; const bool in_head_block = is_inside_block(offset, round_down(start, blocksize), blocksize); @@ -4913,8 +4902,12 @@ int btrfs_truncate_block(struct btrfs_inode *inode, u64 offset, u64 start, u64 e ret = btrfs_check_data_free_space(inode, &data_reserved, block_start, blocksize, false); if (ret < 0) { + size_t write_bytes = blocksize; + if (btrfs_check_nocow_lock(inode, block_start, &write_bytes, false) > 0) { - /* For nocow case, no need to reserve data space */ + /* For nocow case, no need to reserve data space. */ + ASSERT(write_bytes == blocksize, "write_bytes=%zu blocksize=%u", + write_bytes, blocksize); only_release_metadata = true; } else { goto out; @@ -5001,8 +4994,7 @@ again: * not reach disk, it still affects our page caches. */ zero_start = max_t(u64, folio_pos(folio), start); - zero_end = min_t(u64, folio_pos(folio) + folio_size(folio) - 1, - end); + zero_end = min_t(u64, folio_end(folio) - 1, end); } else { zero_start = max_t(u64, block_start, start); zero_end = min_t(u64, block_end, end); @@ -5014,11 +5006,12 @@ again: block_end + 1 - block_start); btrfs_folio_set_dirty(fs_info, folio, block_start, block_end + 1 - block_start); - btrfs_unlock_extent(io_tree, block_start, block_end, &cached_state); if (only_release_metadata) btrfs_set_extent_bit(&inode->io_tree, block_start, block_end, - EXTENT_NORESERVE, NULL); + EXTENT_NORESERVE, &cached_state); + + btrfs_unlock_extent(io_tree, block_start, block_end, &cached_state); out_unlock: if (ret) { @@ -5256,7 +5249,7 @@ static int btrfs_setsize(struct inode *inode, struct iattr *attr) ret = btrfs_truncate(BTRFS_I(inode), newsize == oldsize); if (ret && inode->i_nlink) { - int err; + int ret2; /* * Truncate failed, so fix up the in-memory size. We @@ -5264,9 +5257,9 @@ static int btrfs_setsize(struct inode *inode, struct iattr *attr) * wait for disk_i_size to be stable and then update the * in-memory size to match. */ - err = btrfs_wait_ordered_range(BTRFS_I(inode), 0, (u64)-1); - if (err) - return err; + ret2 = btrfs_wait_ordered_range(BTRFS_I(inode), 0, (u64)-1); + if (ret2) + return ret2; i_size_write(inode, BTRFS_I(inode)->disk_i_size); } } @@ -5279,31 +5272,31 @@ static int btrfs_setattr(struct mnt_idmap *idmap, struct dentry *dentry, { struct inode *inode = d_inode(dentry); struct btrfs_root *root = BTRFS_I(inode)->root; - int err; + int ret; if (btrfs_root_readonly(root)) return -EROFS; - err = setattr_prepare(idmap, dentry, attr); - if (err) - return err; + ret = setattr_prepare(idmap, dentry, attr); + if (ret) + return ret; if (S_ISREG(inode->i_mode) && (attr->ia_valid & ATTR_SIZE)) { - err = btrfs_setsize(inode, attr); - if (err) - return err; + ret = btrfs_setsize(inode, attr); + if (ret) + return ret; } if (attr->ia_valid) { setattr_copy(idmap, inode, attr); inode_inc_iversion(inode); - err = btrfs_dirty_inode(BTRFS_I(inode)); + ret = btrfs_dirty_inode(BTRFS_I(inode)); - if (!err && attr->ia_valid & ATTR_MODE) - err = posix_acl_chmod(idmap, dentry, inode->i_mode); + if (!ret && attr->ia_valid & ATTR_MODE) + ret = posix_acl_chmod(idmap, dentry, inode->i_mode); } - return err; + return ret; } /* @@ -5437,7 +5430,7 @@ void btrfs_evict_inode(struct inode *inode) struct btrfs_fs_info *fs_info; struct btrfs_trans_handle *trans; struct btrfs_root *root = BTRFS_I(inode)->root; - struct btrfs_block_rsv *rsv = NULL; + struct btrfs_block_rsv rsv; int ret; trace_btrfs_inode_evict(inode); @@ -5485,11 +5478,9 @@ void btrfs_evict_inode(struct inode *inode) */ btrfs_kill_delayed_inode_items(BTRFS_I(inode)); - rsv = btrfs_alloc_block_rsv(fs_info, BTRFS_BLOCK_RSV_TEMP); - if (!rsv) - goto out; - rsv->size = btrfs_calc_metadata_size(fs_info, 1); - rsv->failfast = true; + btrfs_init_metadata_block_rsv(fs_info, &rsv, BTRFS_BLOCK_RSV_TEMP); + rsv.size = btrfs_calc_metadata_size(fs_info, 1); + rsv.failfast = true; btrfs_i_size_write(BTRFS_I(inode), 0); @@ -5501,11 +5492,11 @@ void btrfs_evict_inode(struct inode *inode) .min_type = 0, }; - trans = evict_refill_and_join(root, rsv); + trans = evict_refill_and_join(root, &rsv); if (IS_ERR(trans)) - goto out; + goto out_release; - trans->block_rsv = rsv; + trans->block_rsv = &rsv; ret = btrfs_truncate_inode_items(trans, root, &control); trans->block_rsv = &fs_info->trans_block_rsv; @@ -5517,7 +5508,7 @@ void btrfs_evict_inode(struct inode *inode) */ btrfs_btree_balance_dirty_nodelay(fs_info); if (ret && ret != -ENOSPC && ret != -EAGAIN) - goto out; + goto out_release; else if (!ret) break; } @@ -5531,16 +5522,17 @@ void btrfs_evict_inode(struct inode *inode) * If it turns out that we are dropping too many of these, we might want * to add a mechanism for retrying these after a commit. */ - trans = evict_refill_and_join(root, rsv); + trans = evict_refill_and_join(root, &rsv); if (!IS_ERR(trans)) { - trans->block_rsv = rsv; + trans->block_rsv = &rsv; btrfs_orphan_del(trans, BTRFS_I(inode)); trans->block_rsv = &fs_info->trans_block_rsv; btrfs_end_transaction(trans); } +out_release: + btrfs_block_rsv_release(fs_info, &rsv, (u64)-1, NULL); out: - btrfs_free_block_rsv(fs_info, rsv); /* * If we didn't successfully delete, the orphan item will still be in * the tree and we'll retry on the next mount. Again, we might also want @@ -6173,8 +6165,7 @@ again: if (ret) goto nopos; - ret = btrfs_readdir_delayed_dir_index(ctx, &ins_list); - if (ret) + if (btrfs_readdir_delayed_dir_index(ctx, &ins_list)) goto nopos; /* @@ -6467,6 +6458,7 @@ int btrfs_create_new_inode(struct btrfs_trans_handle *trans, BTRFS_I(inode)->flags |= BTRFS_INODE_NODATACOW | BTRFS_INODE_NODATASUM; btrfs_update_inode_mapping_flags(BTRFS_I(inode)); + btrfs_set_inode_mapping_order(BTRFS_I(inode)); } ret = btrfs_insert_inode_locked(inode); @@ -6610,13 +6602,17 @@ int btrfs_create_new_inode(struct btrfs_trans_handle *trans, if (args->orphan) { ret = btrfs_orphan_add(trans, BTRFS_I(inode)); + if (ret) { + btrfs_abort_transaction(trans, ret); + goto discard; + } } else { ret = btrfs_add_link(trans, BTRFS_I(dir), BTRFS_I(inode), name, 0, BTRFS_I(inode)->dir_index); - } - if (ret) { - btrfs_abort_transaction(trans, ret); - goto discard; + if (ret) { + btrfs_abort_transaction(trans, ret); + goto discard; + } } return 0; @@ -6703,20 +6699,18 @@ int btrfs_add_link(struct btrfs_trans_handle *trans, fail_dir_item: if (unlikely(ino == BTRFS_FIRST_FREE_OBJECTID)) { u64 local_index; - int err; - err = btrfs_del_root_ref(trans, key.objectid, - btrfs_root_id(root), parent_ino, - &local_index, name); - if (err) - btrfs_abort_transaction(trans, err); + int ret2; + + ret2 = btrfs_del_root_ref(trans, key.objectid, btrfs_root_id(root), + parent_ino, &local_index, name); + if (ret2) + btrfs_abort_transaction(trans, ret2); } else if (add_backref) { - u64 local_index; - int err; + int ret2; - err = btrfs_del_inode_ref(trans, root, name, ino, parent_ino, - &local_index); - if (err) - btrfs_abort_transaction(trans, err); + ret2 = btrfs_del_inode_ref(trans, root, name, ino, parent_ino, NULL); + if (ret2) + btrfs_abort_transaction(trans, ret2); } /* Return the original error code */ @@ -6735,20 +6729,20 @@ static int btrfs_create_common(struct inode *dir, struct dentry *dentry, }; unsigned int trans_num_items; struct btrfs_trans_handle *trans; - int err; + int ret; - err = btrfs_new_inode_prepare(&new_inode_args, &trans_num_items); - if (err) + ret = btrfs_new_inode_prepare(&new_inode_args, &trans_num_items); + if (ret) goto out_inode; trans = btrfs_start_transaction(root, trans_num_items); if (IS_ERR(trans)) { - err = PTR_ERR(trans); + ret = PTR_ERR(trans); goto out_new_inode_args; } - err = btrfs_create_new_inode(trans, &new_inode_args); - if (!err) + ret = btrfs_create_new_inode(trans, &new_inode_args); + if (!ret) d_instantiate_new(dentry, inode); btrfs_end_transaction(trans); @@ -6756,9 +6750,9 @@ static int btrfs_create_common(struct inode *dir, struct dentry *dentry, out_new_inode_args: btrfs_new_inode_args_destroy(&new_inode_args); out_inode: - if (err) + if (ret) iput(inode); - return err; + return ret; } static int btrfs_mknod(struct mnt_idmap *idmap, struct inode *dir, @@ -6799,7 +6793,7 @@ static int btrfs_link(struct dentry *old_dentry, struct inode *dir, struct btrfs_fs_info *fs_info = inode_to_fs_info(inode); struct fscrypt_name fname; u64 index; - int err; + int ret; int drop_inode = 0; /* do not allow sys_link's with other subvols of the same device */ @@ -6809,12 +6803,12 @@ static int btrfs_link(struct dentry *old_dentry, struct inode *dir, if (inode->i_nlink >= BTRFS_LINK_MAX) return -EMLINK; - err = fscrypt_setup_filename(dir, &dentry->d_name, 0, &fname); - if (err) + ret = fscrypt_setup_filename(dir, &dentry->d_name, 0, &fname); + if (ret) goto fail; - err = btrfs_set_inode_index(BTRFS_I(dir), &index); - if (err) + ret = btrfs_set_inode_index(BTRFS_I(dir), &index); + if (ret) goto fail; /* @@ -6825,7 +6819,7 @@ static int btrfs_link(struct dentry *old_dentry, struct inode *dir, */ trans = btrfs_start_transaction(root, inode->i_nlink ? 5 : 6); if (IS_ERR(trans)) { - err = PTR_ERR(trans); + ret = PTR_ERR(trans); trans = NULL; goto fail; } @@ -6838,24 +6832,24 @@ static int btrfs_link(struct dentry *old_dentry, struct inode *dir, ihold(inode); set_bit(BTRFS_INODE_COPY_EVERYTHING, &BTRFS_I(inode)->runtime_flags); - err = btrfs_add_link(trans, BTRFS_I(dir), BTRFS_I(inode), + ret = btrfs_add_link(trans, BTRFS_I(dir), BTRFS_I(inode), &fname.disk_name, 1, index); - if (err) { + if (ret) { drop_inode = 1; } else { struct dentry *parent = dentry->d_parent; - err = btrfs_update_inode(trans, BTRFS_I(inode)); - if (err) + ret = btrfs_update_inode(trans, BTRFS_I(inode)); + if (ret) goto fail; if (inode->i_nlink == 1) { /* * If new hard link count is 1, it's a file created * with open(2) O_TMPFILE flag. */ - err = btrfs_orphan_del(trans, BTRFS_I(inode)); - if (err) + ret = btrfs_orphan_del(trans, BTRFS_I(inode)); + if (ret) goto fail; } d_instantiate(dentry, inode); @@ -6871,7 +6865,7 @@ fail: iput(inode); } btrfs_btree_balance_dirty(fs_info); - return err; + return ret; } static struct dentry *btrfs_mkdir(struct mnt_idmap *idmap, struct inode *dir, @@ -7364,13 +7358,13 @@ struct extent_map *btrfs_create_io_em(struct btrfs_inode *inode, u64 start, static void wait_subpage_spinlock(struct folio *folio) { struct btrfs_fs_info *fs_info = folio_to_fs_info(folio); - struct btrfs_subpage *subpage; + struct btrfs_folio_state *bfs; if (!btrfs_is_subpage(fs_info, folio)) return; ASSERT(folio_test_private(folio) && folio_get_private(folio)); - subpage = folio_get_private(folio); + bfs = folio_get_private(folio); /* * This may look insane as we just acquire the spinlock and release it, @@ -7383,8 +7377,8 @@ static void wait_subpage_spinlock(struct folio *folio) * Here we just acquire the spinlock so that all existing callers * should exit and we're safe to release/invalidate the page. */ - spin_lock_irq(&subpage->lock); - spin_unlock_irq(&subpage->lock); + spin_lock_irq(&bfs->lock); + spin_unlock_irq(&bfs->lock); } static int btrfs_launder_folio(struct folio *folio) @@ -7607,7 +7601,7 @@ static int btrfs_truncate(struct btrfs_inode *inode, bool skip_writeback) }; struct btrfs_root *root = inode->root; struct btrfs_fs_info *fs_info = root->fs_info; - struct btrfs_block_rsv *rsv; + struct btrfs_block_rsv rsv; int ret; struct btrfs_trans_handle *trans; u64 mask = fs_info->sectorsize - 1; @@ -7649,11 +7643,9 @@ static int btrfs_truncate(struct btrfs_inode *inode, bool skip_writeback) * 2) fs_info->trans_block_rsv - this will have 1 items worth left for * updating the inode. */ - rsv = btrfs_alloc_block_rsv(fs_info, BTRFS_BLOCK_RSV_TEMP); - if (!rsv) - return -ENOMEM; - rsv->size = min_size; - rsv->failfast = true; + btrfs_init_metadata_block_rsv(fs_info, &rsv, BTRFS_BLOCK_RSV_TEMP); + rsv.size = min_size; + rsv.failfast = true; /* * 1 for the truncate slack space @@ -7666,7 +7658,7 @@ static int btrfs_truncate(struct btrfs_inode *inode, bool skip_writeback) } /* Migrate the slack space for the truncate to our reserve */ - ret = btrfs_block_rsv_migrate(&fs_info->trans_block_rsv, rsv, + ret = btrfs_block_rsv_migrate(&fs_info->trans_block_rsv, &rsv, min_size, false); /* * We have reserved 2 metadata units when we started the transaction and @@ -7678,7 +7670,7 @@ static int btrfs_truncate(struct btrfs_inode *inode, bool skip_writeback) goto out; } - trans->block_rsv = rsv; + trans->block_rsv = &rsv; while (1) { struct extent_state *cached_state = NULL; @@ -7721,9 +7713,9 @@ static int btrfs_truncate(struct btrfs_inode *inode, bool skip_writeback) break; } - btrfs_block_rsv_release(fs_info, rsv, -1, NULL); + btrfs_block_rsv_release(fs_info, &rsv, -1, NULL); ret = btrfs_block_rsv_migrate(&fs_info->trans_block_rsv, - rsv, min_size, false); + &rsv, min_size, false); /* * We have reserved 2 metadata units when we started the * transaction and min_size matches 1 unit, so this should never @@ -7732,7 +7724,7 @@ static int btrfs_truncate(struct btrfs_inode *inode, bool skip_writeback) if (WARN_ON(ret)) break; - trans->block_rsv = rsv; + trans->block_rsv = &rsv; } /* @@ -7771,7 +7763,7 @@ static int btrfs_truncate(struct btrfs_inode *inode, bool skip_writeback) btrfs_btree_balance_dirty(fs_info); } out: - btrfs_free_block_rsv(fs_info, rsv); + btrfs_block_rsv_release(fs_info, &rsv, (u64)-1, NULL); /* * So if we truncate and then write and fsync we normally would just * write the extents that changed, which is a problem if we need to @@ -8026,7 +8018,7 @@ static int btrfs_getattr(struct mnt_idmap *idmap, generic_fillattr(idmap, request_mask, inode, stat); stat->dev = BTRFS_I(inode)->root->anon_dev; - stat->subvol = BTRFS_I(inode)->root->root_key.objectid; + stat->subvol = btrfs_root_id(BTRFS_I(inode)->root); stat->result_mask |= STATX_SUBVOL; spin_lock(&BTRFS_I(inode)->lock); @@ -8823,7 +8815,7 @@ static int btrfs_symlink(struct mnt_idmap *idmap, struct inode *dir, .dentry = dentry, }; unsigned int trans_num_items; - int err; + int ret; int name_len; int datasize; unsigned long ptr; @@ -8850,26 +8842,26 @@ static int btrfs_symlink(struct mnt_idmap *idmap, struct inode *dir, inode_set_bytes(inode, name_len); new_inode_args.inode = inode; - err = btrfs_new_inode_prepare(&new_inode_args, &trans_num_items); - if (err) + ret = btrfs_new_inode_prepare(&new_inode_args, &trans_num_items); + if (ret) goto out_inode; /* 1 additional item for the inline extent */ trans_num_items++; trans = btrfs_start_transaction(root, trans_num_items); if (IS_ERR(trans)) { - err = PTR_ERR(trans); + ret = PTR_ERR(trans); goto out_new_inode_args; } - err = btrfs_create_new_inode(trans, &new_inode_args); - if (err) + ret = btrfs_create_new_inode(trans, &new_inode_args); + if (ret) goto out; path = btrfs_alloc_path(); if (!path) { - err = -ENOMEM; - btrfs_abort_transaction(trans, err); + ret = -ENOMEM; + btrfs_abort_transaction(trans, ret); discard_new_inode(inode); inode = NULL; goto out; @@ -8878,10 +8870,9 @@ static int btrfs_symlink(struct mnt_idmap *idmap, struct inode *dir, key.type = BTRFS_EXTENT_DATA_KEY; key.offset = 0; datasize = btrfs_file_extent_calc_inline_size(name_len); - err = btrfs_insert_empty_item(trans, root, path, &key, - datasize); - if (err) { - btrfs_abort_transaction(trans, err); + ret = btrfs_insert_empty_item(trans, root, path, &key, datasize); + if (ret) { + btrfs_abort_transaction(trans, ret); btrfs_free_path(path); discard_new_inode(inode); inode = NULL; @@ -8903,16 +8894,16 @@ static int btrfs_symlink(struct mnt_idmap *idmap, struct inode *dir, btrfs_free_path(path); d_instantiate_new(dentry, inode); - err = 0; + ret = 0; out: btrfs_end_transaction(trans); btrfs_btree_balance_dirty(fs_info); out_new_inode_args: btrfs_new_inode_args_destroy(&new_inode_args); out_inode: - if (err) + if (ret) iput(inode); - return err; + return ret; } static struct btrfs_trans_handle *insert_prealloc_file_extent( diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index 8a60983a697c..bf561be18885 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -841,7 +841,7 @@ free_pending: static int btrfs_may_delete(struct mnt_idmap *idmap, struct inode *dir, struct dentry *victim, int isdir) { - int error; + int ret; if (d_really_is_negative(victim)) return -ENOENT; @@ -851,9 +851,9 @@ static int btrfs_may_delete(struct mnt_idmap *idmap, return -EINVAL; audit_inode_child(dir, victim, AUDIT_TYPE_CHILD_DELETE); - error = inode_permission(idmap, dir, MAY_WRITE | MAY_EXEC); - if (error) - return error; + ret = inode_permission(idmap, dir, MAY_WRITE | MAY_EXEC); + if (ret) + return ret; if (IS_APPEND(dir)) return -EPERM; if (check_sticky(idmap, dir, d_inode(victim)) || @@ -892,39 +892,37 @@ static inline int btrfs_may_create(struct mnt_idmap *idmap, * sys_mkdirat and vfs_mkdir, but we only do a single component lookup * inside this filesystem so it's quite a bit simpler. */ -static noinline int btrfs_mksubvol(const struct path *parent, +static noinline int btrfs_mksubvol(struct dentry *parent, struct mnt_idmap *idmap, - const char *name, int namelen, - struct btrfs_root *snap_src, + struct qstr *qname, struct btrfs_root *snap_src, bool readonly, struct btrfs_qgroup_inherit *inherit) { - struct inode *dir = d_inode(parent->dentry); + struct inode *dir = d_inode(parent); struct btrfs_fs_info *fs_info = inode_to_fs_info(dir); struct dentry *dentry; - struct fscrypt_str name_str = FSTR_INIT((char *)name, namelen); - int error; + struct fscrypt_str name_str = FSTR_INIT((char *)qname->name, qname->len); + int ret; - error = down_write_killable_nested(&dir->i_rwsem, I_MUTEX_PARENT); - if (error == -EINTR) - return error; + ret = down_write_killable_nested(&dir->i_rwsem, I_MUTEX_PARENT); + if (ret == -EINTR) + return ret; - dentry = lookup_one(idmap, &QSTR_LEN(name, namelen), parent->dentry); - error = PTR_ERR(dentry); + dentry = lookup_one(idmap, qname, parent); + ret = PTR_ERR(dentry); if (IS_ERR(dentry)) goto out_unlock; - error = btrfs_may_create(idmap, dir, dentry); - if (error) + ret = btrfs_may_create(idmap, dir, dentry); + if (ret) goto out_dput; /* * even if this name doesn't exist, we may get hash collisions. * check for them now when we can safely fail */ - error = btrfs_check_dir_item_collision(BTRFS_I(dir)->root, - dir->i_ino, &name_str); - if (error) + ret = btrfs_check_dir_item_collision(BTRFS_I(dir)->root, dir->i_ino, &name_str); + if (ret) goto out_dput; down_read(&fs_info->subvol_sem); @@ -933,11 +931,11 @@ static noinline int btrfs_mksubvol(const struct path *parent, goto out_up_read; if (snap_src) - error = create_snapshot(snap_src, dir, dentry, readonly, inherit); + ret = create_snapshot(snap_src, dir, dentry, readonly, inherit); else - error = create_subvol(idmap, dir, dentry, inherit); + ret = create_subvol(idmap, dir, dentry, inherit); - if (!error) + if (!ret) fsnotify_mkdir(dir, dentry); out_up_read: up_read(&fs_info->subvol_sem); @@ -945,12 +943,12 @@ out_dput: dput(dentry); out_unlock: btrfs_inode_unlock(BTRFS_I(dir), 0); - return error; + return ret; } -static noinline int btrfs_mksnapshot(const struct path *parent, +static noinline int btrfs_mksnapshot(struct dentry *parent, struct mnt_idmap *idmap, - const char *name, int namelen, + struct qstr *qname, struct btrfs_root *root, bool readonly, struct btrfs_qgroup_inherit *inherit) @@ -977,8 +975,8 @@ static noinline int btrfs_mksnapshot(const struct path *parent, btrfs_wait_ordered_extents(root, U64_MAX, NULL); - ret = btrfs_mksubvol(parent, idmap, name, namelen, - root, readonly, inherit); + ret = btrfs_mksubvol(parent, idmap, qname, root, readonly, inherit); + atomic_dec(&root->snapshot_force_cow); out: btrfs_drew_read_unlock(&root->snapshot_lock); @@ -1169,7 +1167,7 @@ static noinline int btrfs_ioctl_resize(struct file *file, } /* equal, nothing need to do */ if (ret == 0 && new_size != old_size) - btrfs_info_in_rcu(fs_info, + btrfs_info(fs_info, "resize device %s (devid %llu) from %llu to %llu", btrfs_dev_name(device), device->devid, old_size, new_size); @@ -1184,12 +1182,12 @@ out_drop: static noinline int __btrfs_ioctl_snap_create(struct file *file, struct mnt_idmap *idmap, - const char *name, unsigned long fd, int subvol, + const char *name, unsigned long fd, bool subvol, bool readonly, struct btrfs_qgroup_inherit *inherit) { - int namelen; int ret = 0; + struct qstr qname = QSTR_INIT(name, strlen(name)); if (!S_ISDIR(file_inode(file)->i_mode)) return -ENOTDIR; @@ -1198,21 +1196,20 @@ static noinline int __btrfs_ioctl_snap_create(struct file *file, if (ret) goto out; - namelen = strlen(name); if (strchr(name, '/')) { ret = -EINVAL; goto out_drop_write; } - if (name[0] == '.' && - (namelen == 1 || (name[1] == '.' && namelen == 2))) { + if (qname.name[0] == '.' && + (qname.len == 1 || (qname.name[1] == '.' && qname.len == 2))) { ret = -EEXIST; goto out_drop_write; } if (subvol) { - ret = btrfs_mksubvol(&file->f_path, idmap, name, - namelen, NULL, readonly, inherit); + ret = btrfs_mksubvol(file_dentry(file), idmap, &qname, NULL, + readonly, inherit); } else { CLASS(fd, src)(fd); struct inode *src_inode; @@ -1242,8 +1239,7 @@ static noinline int __btrfs_ioctl_snap_create(struct file *file, */ ret = -EINVAL; } else { - ret = btrfs_mksnapshot(&file->f_path, idmap, - name, namelen, + ret = btrfs_mksnapshot(file_dentry(file), idmap, &qname, BTRFS_I(src_inode)->root, readonly, inherit); } @@ -1280,7 +1276,7 @@ out: } static noinline int btrfs_ioctl_snap_create_v2(struct file *file, - void __user *arg, int subvol) + void __user *arg, bool subvol) { struct btrfs_ioctl_vol_args_v2 *vol_args; int ret; @@ -2558,8 +2554,14 @@ static int btrfs_ioctl_defrag(struct file *file, void __user *argp) ret = -EOPNOTSUPP; goto out; } - /* compression requires us to start the IO */ - if ((range.flags & BTRFS_DEFRAG_RANGE_COMPRESS)) { + if ((range.flags & BTRFS_DEFRAG_RANGE_COMPRESS) && + (range.flags & BTRFS_DEFRAG_RANGE_NOCOMPRESS)) { + ret = -EINVAL; + goto out; + } + /* Compression or no-compression require to start the IO. */ + if ((range.flags & BTRFS_DEFRAG_RANGE_COMPRESS) || + (range.flags & BTRFS_DEFRAG_RANGE_NOCOMPRESS)) { range.flags |= BTRFS_DEFRAG_RANGE_START_IO; range.extent_thresh = (u32)-1; } @@ -2700,7 +2702,7 @@ static long btrfs_ioctl_rm_dev_v2(struct file *file, void __user *arg) err_drop: mnt_drop_write_file(file); if (bdev_file) - fput(bdev_file); + bdev_fput(bdev_file); out: btrfs_put_dev_args_from_path(&args); kfree(vol_args); @@ -2751,7 +2753,7 @@ static long btrfs_ioctl_rm_dev(struct file *file, void __user *arg) mnt_drop_write_file(file); if (bdev_file) - fput(bdev_file); + bdev_fput(bdev_file); out: btrfs_put_dev_args_from_path(&args); out_free: @@ -2890,7 +2892,7 @@ static long btrfs_ioctl_default_subvol(struct file *file, void __user *argp) ret = PTR_ERR(new_root); goto out; } - if (!is_fstree(btrfs_root_id(new_root))) { + if (!btrfs_is_fstree(btrfs_root_id(new_root))) { ret = -ENOENT; goto out_free; } @@ -3357,7 +3359,6 @@ static long btrfs_ioctl_logical_to_ino(struct btrfs_fs_info *fs_info, int size; struct btrfs_ioctl_logical_ino_args *loi; struct btrfs_data_container *inodes = NULL; - struct btrfs_path *path = NULL; bool ignore_offset; if (!capable(CAP_SYS_ADMIN)) @@ -3391,14 +3392,7 @@ static long btrfs_ioctl_logical_to_ino(struct btrfs_fs_info *fs_info, goto out_loi; } - path = btrfs_alloc_path(); - if (!path) { - ret = -ENOMEM; - goto out; - } - ret = iterate_inodes_from_logical(loi->logical, fs_info, path, - inodes, ignore_offset); - btrfs_free_path(path); + ret = iterate_inodes_from_logical(loi->logical, fs_info, inodes, ignore_offset); if (ret == -EINVAL) ret = -ENOENT; if (ret < 0) @@ -3715,22 +3709,6 @@ drop_write: return ret; } -/* - * Quick check for ioctl handlers if quotas are enabled. Proper locking must be - * done before any operations. - */ -static bool qgroup_enabled(struct btrfs_fs_info *fs_info) -{ - bool ret = true; - - mutex_lock(&fs_info->qgroup_ioctl_lock); - if (!fs_info->quota_root) - ret = false; - mutex_unlock(&fs_info->qgroup_ioctl_lock); - - return ret; -} - static long btrfs_ioctl_qgroup_assign(struct file *file, void __user *arg) { struct inode *inode = file_inode(file); @@ -3745,7 +3723,7 @@ static long btrfs_ioctl_qgroup_assign(struct file *file, void __user *arg) if (!capable(CAP_SYS_ADMIN)) return -EPERM; - if (!qgroup_enabled(root->fs_info)) + if (!btrfs_qgroup_enabled(fs_info)) return -ENOTCONN; ret = mnt_want_write_file(file); @@ -3815,7 +3793,7 @@ static long btrfs_ioctl_qgroup_create(struct file *file, void __user *arg) if (!capable(CAP_SYS_ADMIN)) return -EPERM; - if (!qgroup_enabled(root->fs_info)) + if (!btrfs_qgroup_enabled(root->fs_info)) return -ENOTCONN; ret = mnt_want_write_file(file); @@ -3833,7 +3811,7 @@ static long btrfs_ioctl_qgroup_create(struct file *file, void __user *arg) goto out; } - if (sa->create && is_fstree(sa->qgroupid)) { + if (sa->create && btrfs_is_fstree(sa->qgroupid)) { ret = -EINVAL; goto out; } @@ -3874,7 +3852,7 @@ static long btrfs_ioctl_qgroup_limit(struct file *file, void __user *arg) if (!capable(CAP_SYS_ADMIN)) return -EPERM; - if (!qgroup_enabled(root->fs_info)) + if (!btrfs_qgroup_enabled(root->fs_info)) return -ENOTCONN; ret = mnt_want_write_file(file); @@ -3922,7 +3900,7 @@ static long btrfs_ioctl_quota_rescan(struct file *file, void __user *arg) if (!capable(CAP_SYS_ADMIN)) return -EPERM; - if (!qgroup_enabled(fs_info)) + if (!btrfs_qgroup_enabled(fs_info)) return -ENOTCONN; ret = mnt_want_write_file(file); @@ -4200,7 +4178,7 @@ static int btrfs_ioctl_set_fslabel(struct file *file, void __user *arg) } spin_lock(&fs_info->super_lock); - strcpy(super_block->label, label); + strscpy(super_block->label, label); spin_unlock(&fs_info->super_lock); ret = btrfs_commit_transaction(trans); @@ -4659,7 +4637,7 @@ static void btrfs_uring_read_finished(struct io_uring_cmd *cmd, unsigned int iss struct btrfs_uring_priv *priv = bc->priv; struct btrfs_inode *inode = BTRFS_I(file_inode(priv->iocb.ki_filp)); struct extent_io_tree *io_tree = &inode->io_tree; - unsigned long index; + pgoff_t index; u64 cur; size_t page_offset; ssize_t ret; @@ -4829,7 +4807,8 @@ static int btrfs_uring_encoded_read(struct io_uring_cmd *cmd, unsigned int issue #if defined(CONFIG_64BIT) && defined(CONFIG_COMPAT) copy_end = offsetofend(struct btrfs_ioctl_encoded_io_args_32, flags); #else - return -ENOTTY; + ret = -ENOTTY; + goto out_acct; #endif } else { copy_end = copy_end_kernel; diff --git a/fs/btrfs/messages.h b/fs/btrfs/messages.h index 6abf81bb00c2..022ebc89af85 100644 --- a/fs/btrfs/messages.h +++ b/fs/btrfs/messages.h @@ -37,106 +37,46 @@ void _btrfs_printk(const struct btrfs_fs_info *fs_info, const char *fmt, ...); btrfs_no_printk(fs_info, fmt, ##args) #endif -#define btrfs_emerg(fs_info, fmt, args...) \ - btrfs_printk(fs_info, KERN_EMERG fmt, ##args) -#define btrfs_alert(fs_info, fmt, args...) \ - btrfs_printk(fs_info, KERN_ALERT fmt, ##args) -#define btrfs_crit(fs_info, fmt, args...) \ - btrfs_printk(fs_info, KERN_CRIT fmt, ##args) -#define btrfs_err(fs_info, fmt, args...) \ - btrfs_printk(fs_info, KERN_ERR fmt, ##args) -#define btrfs_warn(fs_info, fmt, args...) \ - btrfs_printk(fs_info, KERN_WARNING fmt, ##args) -#define btrfs_notice(fs_info, fmt, args...) \ - btrfs_printk(fs_info, KERN_NOTICE fmt, ##args) -#define btrfs_info(fs_info, fmt, args...) \ - btrfs_printk(fs_info, KERN_INFO fmt, ##args) - /* - * Wrappers that use printk_in_rcu + * Print a message with filesystem info, enclosed in RCU protection. */ -#define btrfs_emerg_in_rcu(fs_info, fmt, args...) \ - btrfs_printk_in_rcu(fs_info, KERN_EMERG fmt, ##args) -#define btrfs_alert_in_rcu(fs_info, fmt, args...) \ - btrfs_printk_in_rcu(fs_info, KERN_ALERT fmt, ##args) -#define btrfs_crit_in_rcu(fs_info, fmt, args...) \ +#define btrfs_crit(fs_info, fmt, args...) \ btrfs_printk_in_rcu(fs_info, KERN_CRIT fmt, ##args) -#define btrfs_err_in_rcu(fs_info, fmt, args...) \ +#define btrfs_err(fs_info, fmt, args...) \ btrfs_printk_in_rcu(fs_info, KERN_ERR fmt, ##args) -#define btrfs_warn_in_rcu(fs_info, fmt, args...) \ +#define btrfs_warn(fs_info, fmt, args...) \ btrfs_printk_in_rcu(fs_info, KERN_WARNING fmt, ##args) -#define btrfs_notice_in_rcu(fs_info, fmt, args...) \ - btrfs_printk_in_rcu(fs_info, KERN_NOTICE fmt, ##args) -#define btrfs_info_in_rcu(fs_info, fmt, args...) \ +#define btrfs_info(fs_info, fmt, args...) \ btrfs_printk_in_rcu(fs_info, KERN_INFO fmt, ##args) /* - * Wrappers that use a ratelimited printk_in_rcu - */ -#define btrfs_emerg_rl_in_rcu(fs_info, fmt, args...) \ - btrfs_printk_rl_in_rcu(fs_info, KERN_EMERG fmt, ##args) -#define btrfs_alert_rl_in_rcu(fs_info, fmt, args...) \ - btrfs_printk_rl_in_rcu(fs_info, KERN_ALERT fmt, ##args) -#define btrfs_crit_rl_in_rcu(fs_info, fmt, args...) \ - btrfs_printk_rl_in_rcu(fs_info, KERN_CRIT fmt, ##args) -#define btrfs_err_rl_in_rcu(fs_info, fmt, args...) \ - btrfs_printk_rl_in_rcu(fs_info, KERN_ERR fmt, ##args) -#define btrfs_warn_rl_in_rcu(fs_info, fmt, args...) \ - btrfs_printk_rl_in_rcu(fs_info, KERN_WARNING fmt, ##args) -#define btrfs_notice_rl_in_rcu(fs_info, fmt, args...) \ - btrfs_printk_rl_in_rcu(fs_info, KERN_NOTICE fmt, ##args) -#define btrfs_info_rl_in_rcu(fs_info, fmt, args...) \ - btrfs_printk_rl_in_rcu(fs_info, KERN_INFO fmt, ##args) - -/* * Wrappers that use a ratelimited printk */ -#define btrfs_emerg_rl(fs_info, fmt, args...) \ - btrfs_printk_ratelimited(fs_info, KERN_EMERG fmt, ##args) -#define btrfs_alert_rl(fs_info, fmt, args...) \ - btrfs_printk_ratelimited(fs_info, KERN_ALERT fmt, ##args) #define btrfs_crit_rl(fs_info, fmt, args...) \ - btrfs_printk_ratelimited(fs_info, KERN_CRIT fmt, ##args) + btrfs_printk_rl_in_rcu(fs_info, KERN_CRIT fmt, ##args) #define btrfs_err_rl(fs_info, fmt, args...) \ - btrfs_printk_ratelimited(fs_info, KERN_ERR fmt, ##args) + btrfs_printk_rl_in_rcu(fs_info, KERN_ERR fmt, ##args) #define btrfs_warn_rl(fs_info, fmt, args...) \ - btrfs_printk_ratelimited(fs_info, KERN_WARNING fmt, ##args) -#define btrfs_notice_rl(fs_info, fmt, args...) \ - btrfs_printk_ratelimited(fs_info, KERN_NOTICE fmt, ##args) + btrfs_printk_rl_in_rcu(fs_info, KERN_WARNING fmt, ##args) #define btrfs_info_rl(fs_info, fmt, args...) \ - btrfs_printk_ratelimited(fs_info, KERN_INFO fmt, ##args) + btrfs_printk_rl_in_rcu(fs_info, KERN_INFO fmt, ##args) #if defined(CONFIG_DYNAMIC_DEBUG) #define btrfs_debug(fs_info, fmt, args...) \ - _dynamic_func_call_no_desc(fmt, btrfs_printk, \ - fs_info, KERN_DEBUG fmt, ##args) -#define btrfs_debug_in_rcu(fs_info, fmt, args...) \ _dynamic_func_call_no_desc(fmt, btrfs_printk_in_rcu, \ fs_info, KERN_DEBUG fmt, ##args) -#define btrfs_debug_rl_in_rcu(fs_info, fmt, args...) \ - _dynamic_func_call_no_desc(fmt, btrfs_printk_rl_in_rcu, \ - fs_info, KERN_DEBUG fmt, ##args) #define btrfs_debug_rl(fs_info, fmt, args...) \ - _dynamic_func_call_no_desc(fmt, btrfs_printk_ratelimited, \ + _dynamic_func_call_no_desc(fmt, btrfs_printk_rl_in_rcu, \ fs_info, KERN_DEBUG fmt, ##args) #elif defined(DEBUG) #define btrfs_debug(fs_info, fmt, args...) \ - btrfs_printk(fs_info, KERN_DEBUG fmt, ##args) -#define btrfs_debug_in_rcu(fs_info, fmt, args...) \ btrfs_printk_in_rcu(fs_info, KERN_DEBUG fmt, ##args) -#define btrfs_debug_rl_in_rcu(fs_info, fmt, args...) \ - btrfs_printk_rl_in_rcu(fs_info, KERN_DEBUG fmt, ##args) #define btrfs_debug_rl(fs_info, fmt, args...) \ - btrfs_printk_ratelimited(fs_info, KERN_DEBUG fmt, ##args) + btrfs_printk_rl_in_rcu(fs_info, KERN_DEBUG fmt, ##args) #else -#define btrfs_debug(fs_info, fmt, args...) \ - btrfs_no_printk(fs_info, KERN_DEBUG fmt, ##args) -#define btrfs_debug_in_rcu(fs_info, fmt, args...) \ - btrfs_no_printk_in_rcu(fs_info, KERN_DEBUG fmt, ##args) -#define btrfs_debug_rl_in_rcu(fs_info, fmt, args...) \ - btrfs_no_printk_in_rcu(fs_info, KERN_DEBUG fmt, ##args) -#define btrfs_debug_rl(fs_info, fmt, args...) \ - btrfs_no_printk(fs_info, KERN_DEBUG fmt, ##args) +/* When printk() is no_printk(), expand to no-op. */ +#define btrfs_debug(fs_info, fmt, args...) do { (void)(fs_info); } while(0) +#define btrfs_debug_rl(fs_info, fmt, args...) do { (void)(fs_info); } while(0) #endif #define btrfs_printk_in_rcu(fs_info, fmt, args...) \ @@ -146,26 +86,15 @@ do { \ rcu_read_unlock(); \ } while (0) -#define btrfs_no_printk_in_rcu(fs_info, fmt, args...) \ -do { \ - rcu_read_lock(); \ - btrfs_no_printk(fs_info, fmt, ##args); \ - rcu_read_unlock(); \ -} while (0) - -#define btrfs_printk_ratelimited(fs_info, fmt, args...) \ +#define btrfs_printk_rl_in_rcu(fs_info, fmt, args...) \ do { \ static DEFINE_RATELIMIT_STATE(_rs, \ DEFAULT_RATELIMIT_INTERVAL, \ DEFAULT_RATELIMIT_BURST); \ + \ + rcu_read_lock(); \ if (__ratelimit(&_rs)) \ btrfs_printk(fs_info, fmt, ##args); \ -} while (0) - -#define btrfs_printk_rl_in_rcu(fs_info, fmt, args...) \ -do { \ - rcu_read_lock(); \ - btrfs_printk_ratelimited(fs_info, fmt, ##args); \ rcu_read_unlock(); \ } while (0) diff --git a/fs/btrfs/misc.h b/fs/btrfs/misc.h index 0d599fd847c9..ff5eac84d819 100644 --- a/fs/btrfs/misc.h +++ b/fs/btrfs/misc.h @@ -7,6 +7,8 @@ #include <linux/bitmap.h> #include <linux/sched.h> #include <linux/wait.h> +#include <linux/mm.h> +#include <linux/pagemap.h> #include <linux/math64.h> #include <linux/rbtree.h> @@ -119,28 +121,23 @@ static inline struct rb_node *rb_simple_search_first(const struct rb_root *root, return ret; } -static inline struct rb_node *rb_simple_insert(struct rb_root *root, u64 bytenr, - struct rb_node *node) +static int rb_simple_node_bytenr_cmp(struct rb_node *new, const struct rb_node *existing) { - struct rb_node **p = &root->rb_node; - struct rb_node *parent = NULL; - struct rb_simple_node *entry; + struct rb_simple_node *new_entry = rb_entry(new, struct rb_simple_node, rb_node); + struct rb_simple_node *existing_entry = rb_entry(existing, struct rb_simple_node, rb_node); - while (*p) { - parent = *p; - entry = rb_entry(parent, struct rb_simple_node, rb_node); + if (new_entry->bytenr < existing_entry->bytenr) + return -1; + else if (new_entry->bytenr > existing_entry->bytenr) + return 1; - if (bytenr < entry->bytenr) - p = &(*p)->rb_left; - else if (bytenr > entry->bytenr) - p = &(*p)->rb_right; - else - return parent; - } + return 0; +} - rb_link_node(node, parent, p); - rb_insert_color(node, root); - return NULL; +static inline struct rb_node *rb_simple_insert(struct rb_root *root, + struct rb_simple_node *simple_node) +{ + return rb_find_add(&simple_node->rb_node, root, rb_simple_node_bytenr_cmp); } static inline bool bitmap_test_range_all_set(const unsigned long *addr, @@ -163,4 +160,9 @@ static inline bool bitmap_test_range_all_zero(const unsigned long *addr, return (found_set == start + nbits); } +static inline u64 folio_end(struct folio *folio) +{ + return folio_pos(folio) + folio_size(folio); +} + #endif diff --git a/fs/btrfs/ordered-data.c b/fs/btrfs/ordered-data.c index 9212ce110cde..2829f20d7bb5 100644 --- a/fs/btrfs/ordered-data.c +++ b/fs/btrfs/ordered-data.c @@ -359,7 +359,7 @@ static bool can_finish_ordered_extent(struct btrfs_ordered_extent *ordered, if (folio) { ASSERT(folio->mapping); ASSERT(folio_pos(folio) <= file_offset); - ASSERT(file_offset + len <= folio_pos(folio) + folio_size(folio)); + ASSERT(file_offset + len <= folio_end(folio)); /* * Ordered flag indicates whether we still have diff --git a/fs/btrfs/print-tree.c b/fs/btrfs/print-tree.c index fc821aa446f0..74e38da9bd39 100644 --- a/fs/btrfs/print-tree.c +++ b/fs/btrfs/print-tree.c @@ -190,7 +190,7 @@ static void print_uuid_item(const struct extent_buffer *l, unsigned long offset, u32 item_size) { if (!IS_ALIGNED(item_size, sizeof(u64))) { - pr_warn("BTRFS: uuid item with illegal size %lu!\n", + btrfs_warn(l->fs_info, "uuid item with illegal size %lu", (unsigned long)item_size); return; } @@ -223,7 +223,7 @@ static void print_eb_refs_lock(const struct extent_buffer *eb) { #ifdef CONFIG_BTRFS_DEBUG btrfs_info(eb->fs_info, "refs %u lock_owner %u current %u", - atomic_read(&eb->refs), eb->lock_owner, current->pid); + refcount_read(&eb->refs), eb->lock_owner, current->pid); #endif } diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c index b3176edbde82..1a5972178b3a 100644 --- a/fs/btrfs/qgroup.c +++ b/fs/btrfs/qgroup.c @@ -160,23 +160,34 @@ qgroup_rescan_init(struct btrfs_fs_info *fs_info, u64 progress_objectid, int init_flags); static void qgroup_rescan_zero_tracking(struct btrfs_fs_info *fs_info); +static int btrfs_qgroup_qgroupid_key_cmp(const void *key, const struct rb_node *node) +{ + const u64 *qgroupid = key; + const struct btrfs_qgroup *qgroup = rb_entry(node, struct btrfs_qgroup, node); + + if (qgroup->qgroupid < *qgroupid) + return -1; + else if (qgroup->qgroupid > *qgroupid) + return 1; + + return 0; +} + /* must be called with qgroup_ioctl_lock held */ static struct btrfs_qgroup *find_qgroup_rb(const struct btrfs_fs_info *fs_info, u64 qgroupid) { - struct rb_node *n = fs_info->qgroup_tree.rb_node; - struct btrfs_qgroup *qgroup; + struct rb_node *node; - while (n) { - qgroup = rb_entry(n, struct btrfs_qgroup, node); - if (qgroup->qgroupid < qgroupid) - n = n->rb_left; - else if (qgroup->qgroupid > qgroupid) - n = n->rb_right; - else - return qgroup; - } - return NULL; + node = rb_find(&qgroupid, &fs_info->qgroup_tree, btrfs_qgroup_qgroupid_key_cmp); + return rb_entry_safe(node, struct btrfs_qgroup, node); +} + +static int btrfs_qgroup_qgroupid_cmp(struct rb_node *new, const struct rb_node *existing) +{ + const struct btrfs_qgroup *new_qgroup = rb_entry(new, struct btrfs_qgroup, node); + + return btrfs_qgroup_qgroupid_key_cmp(&new_qgroup->qgroupid, existing); } /* @@ -191,39 +202,25 @@ static struct btrfs_qgroup *add_qgroup_rb(struct btrfs_fs_info *fs_info, struct btrfs_qgroup *prealloc, u64 qgroupid) { - struct rb_node **p = &fs_info->qgroup_tree.rb_node; - struct rb_node *parent = NULL; - struct btrfs_qgroup *qgroup; + struct rb_node *node; /* Caller must have pre-allocated @prealloc. */ ASSERT(prealloc); - while (*p) { - parent = *p; - qgroup = rb_entry(parent, struct btrfs_qgroup, node); - - if (qgroup->qgroupid < qgroupid) { - p = &(*p)->rb_left; - } else if (qgroup->qgroupid > qgroupid) { - p = &(*p)->rb_right; - } else { - kfree(prealloc); - return qgroup; - } + prealloc->qgroupid = qgroupid; + node = rb_find_add(&prealloc->node, &fs_info->qgroup_tree, btrfs_qgroup_qgroupid_cmp); + if (node) { + kfree(prealloc); + return rb_entry(node, struct btrfs_qgroup, node); } - qgroup = prealloc; - qgroup->qgroupid = qgroupid; - INIT_LIST_HEAD(&qgroup->groups); - INIT_LIST_HEAD(&qgroup->members); - INIT_LIST_HEAD(&qgroup->dirty); - INIT_LIST_HEAD(&qgroup->iterator); - INIT_LIST_HEAD(&qgroup->nested_iterator); + INIT_LIST_HEAD(&prealloc->groups); + INIT_LIST_HEAD(&prealloc->members); + INIT_LIST_HEAD(&prealloc->dirty); + INIT_LIST_HEAD(&prealloc->iterator); + INIT_LIST_HEAD(&prealloc->nested_iterator); - rb_link_node(&qgroup->node, parent, p); - rb_insert_color(&qgroup->node, &fs_info->qgroup_tree); - - return qgroup; + return prealloc; } static void __del_qgroup_rb(struct btrfs_qgroup *qgroup) @@ -349,13 +346,27 @@ int btrfs_verify_qgroup_counts(const struct btrfs_fs_info *fs_info, u64 qgroupid } #endif -static void qgroup_mark_inconsistent(struct btrfs_fs_info *fs_info) +__printf(2, 3) +static void qgroup_mark_inconsistent(struct btrfs_fs_info *fs_info, const char *fmt, ...) { + const u64 old_flags = fs_info->qgroup_flags; + if (btrfs_qgroup_mode(fs_info) == BTRFS_QGROUP_MODE_SIMPLE) return; fs_info->qgroup_flags |= (BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT | BTRFS_QGROUP_RUNTIME_FLAG_CANCEL_RESCAN | BTRFS_QGROUP_RUNTIME_FLAG_NO_ACCOUNTING); + if (!(old_flags & BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT)) { + struct va_format vaf; + va_list args; + + va_start(args, fmt); + vaf.fmt = fmt; + vaf.va = &args; + + btrfs_warn_rl(fs_info, "qgroup marked inconsistent, %pV", &vaf); + va_end(args); + } } static void qgroup_read_enable_gen(struct btrfs_fs_info *fs_info, @@ -386,12 +397,6 @@ int btrfs_read_qgroup_config(struct btrfs_fs_info *fs_info) if (!fs_info->quota_root) return 0; - fs_info->qgroup_ulist = ulist_alloc(GFP_KERNEL); - if (!fs_info->qgroup_ulist) { - ret = -ENOMEM; - goto out; - } - path = btrfs_alloc_path(); if (!path) { ret = -ENOMEM; @@ -434,13 +439,10 @@ int btrfs_read_qgroup_config(struct btrfs_fs_info *fs_info) goto out; } fs_info->qgroup_flags = btrfs_qgroup_status_flags(l, ptr); - if (fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_SIMPLE_MODE) { + if (fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_SIMPLE_MODE) qgroup_read_enable_gen(fs_info, l, slot, ptr); - } else if (btrfs_qgroup_status_generation(l, ptr) != fs_info->generation) { - qgroup_mark_inconsistent(fs_info); - btrfs_err(fs_info, - "qgroup generation mismatch, marked as inconsistent"); - } + else if (btrfs_qgroup_status_generation(l, ptr) != fs_info->generation) + qgroup_mark_inconsistent(fs_info, "qgroup generation mismatch"); rescan_progress = btrfs_qgroup_status_rescan(l, ptr); goto next1; } @@ -451,10 +453,8 @@ int btrfs_read_qgroup_config(struct btrfs_fs_info *fs_info) qgroup = find_qgroup_rb(fs_info, found_key.offset); if ((qgroup && found_key.type == BTRFS_QGROUP_INFO_KEY) || - (!qgroup && found_key.type == BTRFS_QGROUP_LIMIT_KEY)) { - btrfs_err(fs_info, "inconsistent qgroup config"); - qgroup_mark_inconsistent(fs_info); - } + (!qgroup && found_key.type == BTRFS_QGROUP_LIMIT_KEY)) + qgroup_mark_inconsistent(fs_info, "inconsistent qgroup config"); if (!qgroup) { struct btrfs_qgroup *prealloc; struct btrfs_root *tree_root = fs_info->tree_root; @@ -476,7 +476,7 @@ int btrfs_read_qgroup_config(struct btrfs_fs_info *fs_info) * during mount before we start doing things like creating * subvolumes. */ - if (is_fstree(qgroup->qgroupid) && + if (btrfs_is_fstree(qgroup->qgroupid) && qgroup->qgroupid > tree_root->free_objectid) /* * Don't need to check against BTRFS_LAST_FREE_OBJECTID, @@ -581,8 +581,6 @@ out: if (fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_RESCAN) ret = qgroup_rescan_init(fs_info, rescan_progress, 0); } else { - ulist_free(fs_info->qgroup_ulist); - fs_info->qgroup_ulist = NULL; fs_info->qgroup_flags &= ~BTRFS_QGROUP_STATUS_FLAG_RESCAN; btrfs_sysfs_del_qgroups(fs_info); } @@ -630,29 +628,30 @@ bool btrfs_check_quota_leak(const struct btrfs_fs_info *fs_info) /* * This is called from close_ctree() or open_ctree() or btrfs_quota_disable(), - * first two are in single-threaded paths.And for the third one, we have set - * quota_root to be null with qgroup_lock held before, so it is safe to clean - * up the in-memory structures without qgroup_lock held. + * first two are in single-threaded paths. */ void btrfs_free_qgroup_config(struct btrfs_fs_info *fs_info) { struct rb_node *n; struct btrfs_qgroup *qgroup; + /* + * btrfs_quota_disable() can be called concurrently with + * btrfs_qgroup_rescan() -> qgroup_rescan_zero_tracking(), so take the + * lock. + */ + spin_lock(&fs_info->qgroup_lock); while ((n = rb_first(&fs_info->qgroup_tree))) { qgroup = rb_entry(n, struct btrfs_qgroup, node); rb_erase(n, &fs_info->qgroup_tree); __del_qgroup_rb(qgroup); + spin_unlock(&fs_info->qgroup_lock); btrfs_sysfs_del_one_qgroup(fs_info, qgroup); kfree(qgroup); + spin_lock(&fs_info->qgroup_lock); } - /* - * We call btrfs_free_qgroup_config() when unmounting - * filesystem and disabling quota, so we set qgroup_ulist - * to be null here to avoid double free. - */ - ulist_free(fs_info->qgroup_ulist); - fs_info->qgroup_ulist = NULL; + spin_unlock(&fs_info->qgroup_lock); + btrfs_sysfs_del_qgroups(fs_info); } @@ -998,7 +997,6 @@ int btrfs_quota_enable(struct btrfs_fs_info *fs_info, struct btrfs_qgroup *qgroup = NULL; struct btrfs_qgroup *prealloc = NULL; struct btrfs_trans_handle *trans = NULL; - struct ulist *ulist = NULL; const bool simple = (quota_ctl_args->cmd == BTRFS_QUOTA_CTL_ENABLE_SIMPLE_QUOTA); int ret = 0; int slot; @@ -1021,12 +1019,6 @@ int btrfs_quota_enable(struct btrfs_fs_info *fs_info, if (fs_info->quota_root) goto out; - ulist = ulist_alloc(GFP_KERNEL); - if (!ulist) { - ret = -ENOMEM; - goto out; - } - ret = btrfs_sysfs_add_qgroups(fs_info); if (ret < 0) goto out; @@ -1066,9 +1058,6 @@ int btrfs_quota_enable(struct btrfs_fs_info *fs_info, if (fs_info->quota_root) goto out; - fs_info->qgroup_ulist = ulist; - ulist = NULL; - /* * initially create the quota tree */ @@ -1155,11 +1144,6 @@ int btrfs_quota_enable(struct btrfs_fs_info *fs_info, qgroup = add_qgroup_rb(fs_info, prealloc, found_key.offset); prealloc = NULL; - if (IS_ERR(qgroup)) { - ret = PTR_ERR(qgroup); - btrfs_abort_transaction(trans, ret); - goto out_free_path; - } ret = btrfs_sysfs_add_one_qgroup(fs_info, qgroup); if (ret < 0) { btrfs_abort_transaction(trans, ret); @@ -1272,17 +1256,13 @@ out_free_root: if (ret) btrfs_put_root(quota_root); out: - if (ret) { - ulist_free(fs_info->qgroup_ulist); - fs_info->qgroup_ulist = NULL; + if (ret) btrfs_sysfs_del_qgroups(fs_info); - } mutex_unlock(&fs_info->qgroup_ioctl_lock); if (ret && trans) btrfs_end_transaction(trans); else if (trans) ret = btrfs_end_transaction(trans); - ulist_free(ulist); kfree(prealloc); return ret; } @@ -1354,11 +1334,14 @@ int btrfs_quota_disable(struct btrfs_fs_info *fs_info) /* * We have nothing held here and no trans handle, just return the error - * if there is one. + * if there is one and set back the quota enabled bit since we didn't + * actually disable quotas. */ ret = flush_reservations(fs_info); - if (ret) + if (ret) { + set_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags); return ret; + } /* * 1 For the root item @@ -1679,9 +1662,6 @@ int btrfs_create_qgroup(struct btrfs_trans_handle *trans, u64 qgroupid) struct btrfs_qgroup *prealloc = NULL; int ret = 0; - if (btrfs_qgroup_mode(fs_info) == BTRFS_QGROUP_MODE_DISABLED) - return 0; - mutex_lock(&fs_info->qgroup_ioctl_lock); if (!fs_info->quota_root) { ret = -ENOTCONN; @@ -1844,13 +1824,12 @@ int btrfs_remove_qgroup(struct btrfs_trans_handle *trans, u64 qgroupid) if (qgroup->rfer || qgroup->excl || qgroup->rfer_cmpr || qgroup->excl_cmpr) { DEBUG_WARN(); - btrfs_warn_rl(fs_info, -"to be deleted qgroup %u/%llu has non-zero numbers, rfer %llu rfer_cmpr %llu excl %llu excl_cmpr %llu", - btrfs_qgroup_level(qgroup->qgroupid), - btrfs_qgroup_subvolid(qgroup->qgroupid), - qgroup->rfer, qgroup->rfer_cmpr, - qgroup->excl, qgroup->excl_cmpr); - qgroup_mark_inconsistent(fs_info); + qgroup_mark_inconsistent(fs_info, + "to be deleted qgroup %u/%llu has non-zero numbers, rfer %llu rfer_cmpr %llu excl %llu excl_cmpr %llu", + btrfs_qgroup_level(qgroup->qgroupid), + btrfs_qgroup_subvolid(qgroup->qgroupid), + qgroup->rfer, qgroup->rfer_cmpr, + qgroup->excl, qgroup->excl_cmpr); } } del_qgroup_rb(fs_info, qgroupid); @@ -1873,7 +1852,8 @@ int btrfs_qgroup_cleanup_dropped_subvolume(struct btrfs_fs_info *fs_info, u64 su struct btrfs_trans_handle *trans; int ret; - if (!is_fstree(subvolid) || !btrfs_qgroup_enabled(fs_info) || !fs_info->quota_root) + if (!btrfs_is_fstree(subvolid) || !btrfs_qgroup_enabled(fs_info) || + !fs_info->quota_root) return 0; /* @@ -1968,11 +1948,8 @@ int btrfs_limit_qgroup(struct btrfs_trans_handle *trans, u64 qgroupid, spin_unlock(&fs_info->qgroup_lock); ret = update_qgroup_limit_item(trans, qgroup); - if (ret) { - qgroup_mark_inconsistent(fs_info); - btrfs_info(fs_info, "unable to update quota limit for %llu", - qgroupid); - } + if (ret) + qgroup_mark_inconsistent(fs_info, "qgroup item update error %d", ret); out: mutex_unlock(&fs_info->qgroup_ioctl_lock); @@ -2027,7 +2004,7 @@ int btrfs_qgroup_trace_extent_nolock(struct btrfs_fs_info *fs_info, ret = __xa_store(&delayed_refs->dirty_extents, index, record, GFP_ATOMIC); xa_unlock(&delayed_refs->dirty_extents); if (xa_is_err(ret)) { - qgroup_mark_inconsistent(fs_info); + qgroup_mark_inconsistent(fs_info, "xarray insert error: %d", xa_err(ret)); return xa_err(ret); } @@ -2094,10 +2071,8 @@ int btrfs_qgroup_trace_extent_post(struct btrfs_trans_handle *trans, ret = btrfs_find_all_roots(&ctx, true); if (ret < 0) { - qgroup_mark_inconsistent(fs_info); - btrfs_warn(fs_info, -"error accounting new delayed refs extent (err code: %d), quota inconsistent", - ret); + qgroup_mark_inconsistent(fs_info, + "error accounting new delayed refs extent: %d", ret); return 0; } @@ -2341,7 +2316,7 @@ static int qgroup_trace_extent_swap(struct btrfs_trans_handle* trans, btrfs_item_key_to_cpu(dst_path->nodes[dst_level], &key, 0); /* For src_path */ - atomic_inc(&src_eb->refs); + refcount_inc(&src_eb->refs); src_path->nodes[root_level] = src_eb; src_path->slots[root_level] = dst_path->slots[root_level]; src_path->locks[root_level] = 0; @@ -2574,7 +2549,7 @@ static int qgroup_trace_subtree_swap(struct btrfs_trans_handle *trans, goto out; } /* For dst_path */ - atomic_inc(&dst_eb->refs); + refcount_inc(&dst_eb->refs); dst_path->nodes[level] = dst_eb; dst_path->slots[level] = 0; dst_path->locks[level] = 0; @@ -2589,7 +2564,7 @@ static int qgroup_trace_subtree_swap(struct btrfs_trans_handle *trans, out: btrfs_free_path(dst_path); if (ret < 0) - qgroup_mark_inconsistent(fs_info); + qgroup_mark_inconsistent(fs_info, "%s error: %d", __func__, ret); return ret; } @@ -2633,7 +2608,7 @@ int btrfs_qgroup_trace_subtree(struct btrfs_trans_handle *trans, * mark qgroup inconsistent. */ if (root_level >= drop_subptree_thres) { - qgroup_mark_inconsistent(fs_info); + qgroup_mark_inconsistent(fs_info, "subtree level reached threshold"); return 0; } @@ -2666,7 +2641,7 @@ int btrfs_qgroup_trace_subtree(struct btrfs_trans_handle *trans, * walk back up the tree (adjusting slot pointers as we go) * and restart the search process. */ - atomic_inc(&root_eb->refs); /* For path */ + refcount_inc(&root_eb->refs); /* For path */ path->nodes[root_level] = root_eb; path->slots[root_level] = 0; path->locks[root_level] = 0; /* so release_path doesn't try to unlock */ @@ -2932,7 +2907,7 @@ static int maybe_fs_roots(struct ulist *roots) * trees. * If it contains a non-fs tree, it won't be shared with fs/subvol trees. */ - return is_fstree(unode->val); + return btrfs_is_fstree(unode->val); } int btrfs_qgroup_account_extent(struct btrfs_trans_handle *trans, u64 bytenr, @@ -3133,10 +3108,12 @@ int btrfs_run_qgroups(struct btrfs_trans_handle *trans) spin_unlock(&fs_info->qgroup_lock); ret = update_qgroup_info_item(trans, qgroup); if (ret) - qgroup_mark_inconsistent(fs_info); + qgroup_mark_inconsistent(fs_info, + "qgroup info item update error %d", ret); ret = update_qgroup_limit_item(trans, qgroup); if (ret) - qgroup_mark_inconsistent(fs_info); + qgroup_mark_inconsistent(fs_info, + "qgroup limit item update error %d", ret); spin_lock(&fs_info->qgroup_lock); } if (btrfs_qgroup_enabled(fs_info)) @@ -3147,7 +3124,8 @@ int btrfs_run_qgroups(struct btrfs_trans_handle *trans) ret = update_qgroup_status_item(trans); if (ret) - qgroup_mark_inconsistent(fs_info); + qgroup_mark_inconsistent(fs_info, + "qgroup status item update error %d", ret); return ret; } @@ -3329,6 +3307,9 @@ int btrfs_qgroup_inherit(struct btrfs_trans_handle *trans, u64 srcid, u32 level_size = 0; u64 nums; + if (!btrfs_qgroup_enabled(fs_info)) + return 0; + prealloc = kzalloc(sizeof(*prealloc), GFP_NOFS); if (!prealloc) return -ENOMEM; @@ -3352,8 +3333,6 @@ int btrfs_qgroup_inherit(struct btrfs_trans_handle *trans, u64 srcid, if (!committing) mutex_lock(&fs_info->qgroup_ioctl_lock); - if (!btrfs_qgroup_enabled(fs_info)) - goto out; quota_root = fs_info->quota_root; if (!quota_root) { @@ -3554,7 +3533,7 @@ out: if (!committing) mutex_unlock(&fs_info->qgroup_ioctl_lock); if (need_rescan) - qgroup_mark_inconsistent(fs_info); + qgroup_mark_inconsistent(fs_info, "qgroup inherit needs a rescan"); if (qlist_prealloc) { for (int i = 0; i < inherit->num_qgroups; i++) kfree(qlist_prealloc[i]); @@ -3588,7 +3567,7 @@ static int qgroup_reserve(struct btrfs_root *root, u64 num_bytes, bool enforce, int ret = 0; LIST_HEAD(qgroup_list); - if (!is_fstree(ref_root)) + if (!btrfs_is_fstree(ref_root)) return 0; if (num_bytes == 0) @@ -3648,7 +3627,7 @@ void btrfs_qgroup_free_refroot(struct btrfs_fs_info *fs_info, struct btrfs_qgroup *qgroup; LIST_HEAD(qgroup_list); - if (!is_fstree(ref_root)) + if (!btrfs_is_fstree(ref_root)) return; if (num_bytes == 0) @@ -4036,12 +4015,21 @@ btrfs_qgroup_rescan(struct btrfs_fs_info *fs_info) qgroup_rescan_zero_tracking(fs_info); mutex_lock(&fs_info->qgroup_rescan_lock); - fs_info->qgroup_rescan_running = true; - btrfs_queue_work(fs_info->qgroup_rescan_workers, - &fs_info->qgroup_rescan_work); + /* + * The rescan worker is only for full accounting qgroups, check if it's + * enabled as it is pointless to queue it otherwise. A concurrent quota + * disable may also have just cleared BTRFS_FS_QUOTA_ENABLED. + */ + if (btrfs_qgroup_full_accounting(fs_info)) { + fs_info->qgroup_rescan_running = true; + btrfs_queue_work(fs_info->qgroup_rescan_workers, + &fs_info->qgroup_rescan_work); + } else { + ret = -ENOTCONN; + } mutex_unlock(&fs_info->qgroup_rescan_lock); - return 0; + return ret; } int btrfs_qgroup_wait_for_completion(struct btrfs_fs_info *fs_info, @@ -4128,8 +4116,8 @@ static int qgroup_unreserve_range(struct btrfs_inode *inode, * Now the entry is in [start, start + len), revert the * EXTENT_QGROUP_RESERVED bit. */ - clear_ret = btrfs_clear_extent_bits(&inode->io_tree, entry_start, - entry_end, EXTENT_QGROUP_RESERVED); + clear_ret = btrfs_clear_extent_bit(&inode->io_tree, entry_start, entry_end, + EXTENT_QGROUP_RESERVED, NULL); if (!ret && clear_ret < 0) ret = clear_ret; @@ -4216,7 +4204,7 @@ static int qgroup_reserve_data(struct btrfs_inode *inode, int ret; if (btrfs_qgroup_mode(root->fs_info) == BTRFS_QGROUP_MODE_DISABLED || - !is_fstree(btrfs_root_id(root)) || len == 0) + !btrfs_is_fstree(btrfs_root_id(root)) || len == 0) return 0; /* @reserved parameter is mandatory for qgroup */ @@ -4469,7 +4457,7 @@ int btrfs_qgroup_reserve_meta(struct btrfs_root *root, int num_bytes, int ret; if (btrfs_qgroup_mode(fs_info) == BTRFS_QGROUP_MODE_DISABLED || - !is_fstree(btrfs_root_id(root)) || num_bytes == 0) + !btrfs_is_fstree(btrfs_root_id(root)) || num_bytes == 0) return 0; BUG_ON(num_bytes != round_down(num_bytes, fs_info->nodesize)); @@ -4514,7 +4502,7 @@ void btrfs_qgroup_free_meta_all_pertrans(struct btrfs_root *root) struct btrfs_fs_info *fs_info = root->fs_info; if (btrfs_qgroup_mode(fs_info) == BTRFS_QGROUP_MODE_DISABLED || - !is_fstree(btrfs_root_id(root))) + !btrfs_is_fstree(btrfs_root_id(root))) return; /* TODO: Update trace point to handle such free */ @@ -4530,7 +4518,7 @@ void __btrfs_qgroup_free_meta(struct btrfs_root *root, int num_bytes, struct btrfs_fs_info *fs_info = root->fs_info; if (btrfs_qgroup_mode(fs_info) == BTRFS_QGROUP_MODE_DISABLED || - !is_fstree(btrfs_root_id(root))) + !btrfs_is_fstree(btrfs_root_id(root))) return; /* @@ -4589,7 +4577,7 @@ void btrfs_qgroup_convert_reserved_meta(struct btrfs_root *root, int num_bytes) struct btrfs_fs_info *fs_info = root->fs_info; if (btrfs_qgroup_mode(fs_info) == BTRFS_QGROUP_MODE_DISABLED || - !is_fstree(btrfs_root_id(root))) + !btrfs_is_fstree(btrfs_root_id(root))) return; /* Same as btrfs_qgroup_free_meta_prealloc() */ num_bytes = sub_root_meta_rsv(root, num_bytes, @@ -4673,6 +4661,28 @@ out: spin_unlock(&swapped_blocks->lock); } +static int qgroup_swapped_block_bytenr_key_cmp(const void *key, const struct rb_node *node) +{ + const u64 *bytenr = key; + const struct btrfs_qgroup_swapped_block *block = rb_entry(node, + struct btrfs_qgroup_swapped_block, node); + + if (block->subvol_bytenr < *bytenr) + return -1; + else if (block->subvol_bytenr > *bytenr) + return 1; + + return 0; +} + +static int qgroup_swapped_block_bytenr_cmp(struct rb_node *new, const struct rb_node *existing) +{ + const struct btrfs_qgroup_swapped_block *new_block = rb_entry(new, + struct btrfs_qgroup_swapped_block, node); + + return qgroup_swapped_block_bytenr_key_cmp(&new_block->subvol_bytenr, existing); +} + /* * Add subtree roots record into @subvol_root. * @@ -4692,8 +4702,7 @@ int btrfs_qgroup_add_swapped_blocks(struct btrfs_root *subvol_root, struct btrfs_fs_info *fs_info = subvol_root->fs_info; struct btrfs_qgroup_swapped_blocks *blocks = &subvol_root->swapped_blocks; struct btrfs_qgroup_swapped_block *block; - struct rb_node **cur; - struct rb_node *parent = NULL; + struct rb_node *node; int level = btrfs_header_level(subvol_parent) - 1; int ret = 0; @@ -4742,46 +4751,32 @@ int btrfs_qgroup_add_swapped_blocks(struct btrfs_root *subvol_root, /* Insert @block into @blocks */ spin_lock(&blocks->lock); - cur = &blocks->blocks[level].rb_node; - while (*cur) { + node = rb_find_add(&block->node, &blocks->blocks[level], qgroup_swapped_block_bytenr_cmp); + if (node) { struct btrfs_qgroup_swapped_block *entry; - parent = *cur; - entry = rb_entry(parent, struct btrfs_qgroup_swapped_block, - node); + entry = rb_entry(node, struct btrfs_qgroup_swapped_block, node); - if (entry->subvol_bytenr < block->subvol_bytenr) { - cur = &(*cur)->rb_left; - } else if (entry->subvol_bytenr > block->subvol_bytenr) { - cur = &(*cur)->rb_right; - } else { - if (entry->subvol_generation != - block->subvol_generation || - entry->reloc_bytenr != block->reloc_bytenr || - entry->reloc_generation != - block->reloc_generation) { - /* - * Duplicated but mismatch entry found. - * Shouldn't happen. - * - * Marking qgroup inconsistent should be enough - * for end users. - */ - DEBUG_WARN("duplicated but mismatched entry found"); - ret = -EEXIST; - } - kfree(block); - goto out_unlock; + if (entry->subvol_generation != block->subvol_generation || + entry->reloc_bytenr != block->reloc_bytenr || + entry->reloc_generation != block->reloc_generation) { + /* + * Duplicated but mismatch entry found. Shouldn't happen. + * Marking qgroup inconsistent should be enough for end + * users. + */ + DEBUG_WARN("duplicated but mismatched entry found"); + ret = -EEXIST; } + kfree(block); + goto out_unlock; } - rb_link_node(&block->node, parent, cur); - rb_insert_color(&block->node, &blocks->blocks[level]); blocks->swapped = true; out_unlock: spin_unlock(&blocks->lock); out: if (ret < 0) - qgroup_mark_inconsistent(fs_info); + qgroup_mark_inconsistent(fs_info, "%s error: %d", __func__, ret); return ret; } @@ -4801,7 +4796,6 @@ int btrfs_qgroup_trace_subtree_after_cow(struct btrfs_trans_handle *trans, struct btrfs_qgroup_swapped_block *block; struct extent_buffer *reloc_eb = NULL; struct rb_node *node; - bool found = false; bool swapped = false; int level = btrfs_header_level(subvol_eb); int ret = 0; @@ -4809,7 +4803,7 @@ int btrfs_qgroup_trace_subtree_after_cow(struct btrfs_trans_handle *trans, if (!btrfs_qgroup_full_accounting(fs_info)) return 0; - if (!is_fstree(btrfs_root_id(root)) || !root->reloc_root) + if (!btrfs_is_fstree(btrfs_root_id(root)) || !root->reloc_root) return 0; spin_lock(&blocks->lock); @@ -4817,23 +4811,14 @@ int btrfs_qgroup_trace_subtree_after_cow(struct btrfs_trans_handle *trans, spin_unlock(&blocks->lock); return 0; } - node = blocks->blocks[level].rb_node; - - while (node) { - block = rb_entry(node, struct btrfs_qgroup_swapped_block, node); - if (block->subvol_bytenr < subvol_eb->start) { - node = node->rb_left; - } else if (block->subvol_bytenr > subvol_eb->start) { - node = node->rb_right; - } else { - found = true; - break; - } - } - if (!found) { + node = rb_find(&subvol_eb->start, &blocks->blocks[level], + qgroup_swapped_block_bytenr_key_cmp); + if (!node) { spin_unlock(&blocks->lock); goto out; } + block = rb_entry(node, struct btrfs_qgroup_swapped_block, node); + /* Found one, remove it from @blocks first and update blocks->swapped */ rb_erase(&block->node, &blocks->blocks[level]); for (i = 0; i < BTRFS_MAX_LEVEL; i++) { @@ -4869,10 +4854,9 @@ free_out: free_extent_buffer(reloc_eb); out: if (ret < 0) { - btrfs_err_rl(fs_info, - "failed to account subtree at bytenr %llu: %d", - subvol_eb->start, ret); - qgroup_mark_inconsistent(fs_info); + qgroup_mark_inconsistent(fs_info, + "failed to account subtree at bytenr %llu: %d", + subvol_eb->start, ret); } return ret; } @@ -4903,7 +4887,7 @@ int btrfs_record_squota_delta(struct btrfs_fs_info *fs_info, if (btrfs_qgroup_mode(fs_info) != BTRFS_QGROUP_MODE_SIMPLE) return 0; - if (!is_fstree(root)) + if (!btrfs_is_fstree(root)) return 0; /* If the extent predates enabling quotas, don't count it. */ diff --git a/fs/btrfs/raid-stripe-tree.c b/fs/btrfs/raid-stripe-tree.c index 1834011ccc49..cab0b291088c 100644 --- a/fs/btrfs/raid-stripe-tree.c +++ b/fs/btrfs/raid-stripe-tree.c @@ -329,11 +329,14 @@ int btrfs_insert_one_raid_extent(struct btrfs_trans_handle *trans, ret = btrfs_insert_item(trans, stripe_root, &stripe_key, stripe_extent, item_size); - if (ret == -EEXIST) + if (ret == -EEXIST) { ret = update_raid_extent_item(trans, &stripe_key, stripe_extent, item_size); - if (ret) + if (ret) + btrfs_abort_transaction(trans, ret); + } else if (ret) { btrfs_abort_transaction(trans, ret); + } kfree(stripe_extent); diff --git a/fs/btrfs/rcu-string.h b/fs/btrfs/rcu-string.h deleted file mode 100644 index 1c2d7cb1fe6f..000000000000 --- a/fs/btrfs/rcu-string.h +++ /dev/null @@ -1,58 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/* - * Copyright (C) 2012 Red Hat. All rights reserved. - */ - -#ifndef BTRFS_RCU_STRING_H -#define BTRFS_RCU_STRING_H - -#include <linux/types.h> -#include <linux/string.h> -#include <linux/slab.h> -#include <linux/rcupdate.h> -#include <linux/printk.h> - -struct rcu_string { - struct rcu_head rcu; - char str[]; -}; - -static inline struct rcu_string *rcu_string_strdup(const char *src, gfp_t mask) -{ - size_t len = strlen(src) + 1; - struct rcu_string *ret = kzalloc(sizeof(struct rcu_string) + - (len * sizeof(char)), mask); - if (!ret) - return ret; - /* Warn if the source got unexpectedly truncated. */ - if (WARN_ON(strscpy(ret->str, src, len) < 0)) { - kfree(ret); - return NULL; - } - return ret; -} - -static inline void rcu_string_free(struct rcu_string *str) -{ - if (str) - kfree_rcu(str, rcu); -} - -#define printk_in_rcu(fmt, ...) do { \ - rcu_read_lock(); \ - printk(fmt, __VA_ARGS__); \ - rcu_read_unlock(); \ -} while (0) - -#define printk_ratelimited_in_rcu(fmt, ...) do { \ - rcu_read_lock(); \ - printk_ratelimited(fmt, __VA_ARGS__); \ - rcu_read_unlock(); \ -} while (0) - -#define rcu_str_deref(rcu_str) ({ \ - struct rcu_string *__str = rcu_dereference(rcu_str); \ - __str->str; \ -}) - -#endif diff --git a/fs/btrfs/ref-verify.c b/fs/btrfs/ref-verify.c index 2928abf7eb82..3871c3a6c743 100644 --- a/fs/btrfs/ref-verify.c +++ b/fs/btrfs/ref-verify.c @@ -75,69 +75,70 @@ struct block_entry { struct list_head actions; }; +static int block_entry_bytenr_key_cmp(const void *key, const struct rb_node *node) +{ + const u64 *bytenr = key; + const struct block_entry *entry = rb_entry(node, struct block_entry, node); + + if (entry->bytenr < *bytenr) + return 1; + else if (entry->bytenr > *bytenr) + return -1; + + return 0; +} + +static int block_entry_bytenr_cmp(struct rb_node *new, const struct rb_node *existing) +{ + const struct block_entry *new_entry = rb_entry(new, struct block_entry, node); + + return block_entry_bytenr_key_cmp(&new_entry->bytenr, existing); +} + static struct block_entry *insert_block_entry(struct rb_root *root, struct block_entry *be) { - struct rb_node **p = &root->rb_node; - struct rb_node *parent_node = NULL; - struct block_entry *entry; - - while (*p) { - parent_node = *p; - entry = rb_entry(parent_node, struct block_entry, node); - if (entry->bytenr > be->bytenr) - p = &(*p)->rb_left; - else if (entry->bytenr < be->bytenr) - p = &(*p)->rb_right; - else - return entry; - } + struct rb_node *node; - rb_link_node(&be->node, parent_node, p); - rb_insert_color(&be->node, root); - return NULL; + node = rb_find_add(&be->node, root, block_entry_bytenr_cmp); + return rb_entry_safe(node, struct block_entry, node); } static struct block_entry *lookup_block_entry(struct rb_root *root, u64 bytenr) { - struct rb_node *n; - struct block_entry *entry = NULL; + struct rb_node *node; - n = root->rb_node; - while (n) { - entry = rb_entry(n, struct block_entry, node); - if (entry->bytenr < bytenr) - n = n->rb_right; - else if (entry->bytenr > bytenr) - n = n->rb_left; - else - return entry; - } - return NULL; + node = rb_find(&bytenr, root, block_entry_bytenr_key_cmp); + return rb_entry_safe(node, struct block_entry, node); +} + +static int root_entry_root_objectid_key_cmp(const void *key, const struct rb_node *node) +{ + const u64 *objectid = key; + const struct root_entry *entry = rb_entry(node, struct root_entry, node); + + if (entry->root_objectid < *objectid) + return 1; + else if (entry->root_objectid > *objectid) + return -1; + + return 0; +} + +static int root_entry_root_objectid_cmp(struct rb_node *new, const struct rb_node *existing) +{ + const struct root_entry *new_entry = rb_entry(new, struct root_entry, node); + + return root_entry_root_objectid_key_cmp(&new_entry->root_objectid, existing); } static struct root_entry *insert_root_entry(struct rb_root *root, struct root_entry *re) { - struct rb_node **p = &root->rb_node; - struct rb_node *parent_node = NULL; - struct root_entry *entry; - - while (*p) { - parent_node = *p; - entry = rb_entry(parent_node, struct root_entry, node); - if (entry->root_objectid > re->root_objectid) - p = &(*p)->rb_left; - else if (entry->root_objectid < re->root_objectid) - p = &(*p)->rb_right; - else - return entry; - } - - rb_link_node(&re->node, parent_node, p); - rb_insert_color(&re->node, root); - return NULL; + struct rb_node *node; + node = rb_find_add(&re->node, root, root_entry_root_objectid_cmp); + return rb_entry_safe(node, struct root_entry, node); } static int comp_refs(struct ref_entry *ref1, struct ref_entry *ref2) @@ -161,48 +162,29 @@ static int comp_refs(struct ref_entry *ref1, struct ref_entry *ref2) return 0; } +static int ref_entry_cmp(struct rb_node *new, const struct rb_node *existing) +{ + struct ref_entry *new_entry = rb_entry(new, struct ref_entry, node); + struct ref_entry *existing_entry = rb_entry(existing, struct ref_entry, node); + + return comp_refs(new_entry, existing_entry); +} + static struct ref_entry *insert_ref_entry(struct rb_root *root, struct ref_entry *ref) { - struct rb_node **p = &root->rb_node; - struct rb_node *parent_node = NULL; - struct ref_entry *entry; - int cmp; - - while (*p) { - parent_node = *p; - entry = rb_entry(parent_node, struct ref_entry, node); - cmp = comp_refs(entry, ref); - if (cmp > 0) - p = &(*p)->rb_left; - else if (cmp < 0) - p = &(*p)->rb_right; - else - return entry; - } - - rb_link_node(&ref->node, parent_node, p); - rb_insert_color(&ref->node, root); - return NULL; + struct rb_node *node; + node = rb_find_add(&ref->node, root, ref_entry_cmp); + return rb_entry_safe(node, struct ref_entry, node); } static struct root_entry *lookup_root_entry(struct rb_root *root, u64 objectid) { - struct rb_node *n; - struct root_entry *entry = NULL; + struct rb_node *node; - n = root->rb_node; - while (n) { - entry = rb_entry(n, struct root_entry, node); - if (entry->root_objectid < objectid) - n = n->rb_right; - else if (entry->root_objectid > objectid) - n = n->rb_left; - else - return entry; - } - return NULL; + node = rb_find(&objectid, root, root_entry_root_objectid_key_cmp); + return rb_entry_safe(node, struct root_entry, node); } #ifdef CONFIG_STACKTRACE @@ -668,7 +650,7 @@ static void dump_block_entry(struct btrfs_fs_info *fs_info, * our sanity checks pass as they are no longer needed. */ int btrfs_ref_tree_mod(struct btrfs_fs_info *fs_info, - struct btrfs_ref *generic_ref) + const struct btrfs_ref *generic_ref) { struct ref_entry *ref = NULL, *exist; struct ref_action *ra = NULL; diff --git a/fs/btrfs/ref-verify.h b/fs/btrfs/ref-verify.h index 3511e1a5c96b..559bd25a2b7a 100644 --- a/fs/btrfs/ref-verify.h +++ b/fs/btrfs/ref-verify.h @@ -19,7 +19,7 @@ struct btrfs_ref; int btrfs_build_ref_tree(struct btrfs_fs_info *fs_info); void btrfs_free_ref_cache(struct btrfs_fs_info *fs_info); int btrfs_ref_tree_mod(struct btrfs_fs_info *fs_info, - struct btrfs_ref *generic_ref); + const struct btrfs_ref *generic_ref); void btrfs_free_ref_tree_range(struct btrfs_fs_info *fs_info, u64 start, u64 len); @@ -39,7 +39,7 @@ static inline void btrfs_free_ref_cache(struct btrfs_fs_info *fs_info) } static inline int btrfs_ref_tree_mod(struct btrfs_fs_info *fs_info, - struct btrfs_ref *generic_ref) + const struct btrfs_ref *generic_ref) { return 0; } diff --git a/fs/btrfs/reflink.c b/fs/btrfs/reflink.c index 62161beca559..ce25ab7f0e99 100644 --- a/fs/btrfs/reflink.c +++ b/fs/btrfs/reflink.c @@ -46,11 +46,9 @@ static int clone_finish_inode_update(struct btrfs_trans_handle *trans, if (ret) { btrfs_abort_transaction(trans, ret); btrfs_end_transaction(trans); - goto out; + return ret; } - ret = btrfs_end_transaction(trans); -out: - return ret; + return btrfs_end_transaction(trans); } static int copy_inline_to_page(struct btrfs_inode *inode, @@ -95,8 +93,8 @@ static int copy_inline_to_page(struct btrfs_inode *inode, if (ret < 0) goto out_unlock; - btrfs_clear_extent_bits(&inode->io_tree, file_offset, range_end, - EXTENT_DELALLOC | EXTENT_DO_ACCOUNTING | EXTENT_DEFRAG); + btrfs_clear_extent_bit(&inode->io_tree, file_offset, range_end, + EXTENT_DELALLOC | EXTENT_DO_ACCOUNTING | EXTENT_DEFRAG, NULL); ret = btrfs_set_extent_delalloc(inode, file_offset, range_end, 0, NULL); if (ret) goto out_unlock; @@ -270,11 +268,15 @@ copy_inline_extent: drop_args.end = aligned_end; drop_args.drop_cache = true; ret = btrfs_drop_extents(trans, root, inode, &drop_args); - if (ret) + if (ret) { + btrfs_abort_transaction(trans, ret); goto out; + } ret = btrfs_insert_empty_item(trans, root, path, new_key, size); - if (ret) + if (ret) { + btrfs_abort_transaction(trans, ret); goto out; + } write_extent_buffer(path->nodes[0], inline_data, btrfs_item_ptr_offset(path->nodes[0], @@ -283,6 +285,8 @@ copy_inline_extent: btrfs_update_inode_bytes(inode, datal, drop_args.bytes_found); btrfs_set_inode_full_sync(inode); ret = btrfs_inode_set_file_extent_range(inode, 0, aligned_end); + if (ret) + btrfs_abort_transaction(trans, ret); out: if (!ret && !trans) { /* @@ -297,10 +301,8 @@ out: trans = NULL; } } - if (ret && trans) { - btrfs_abort_transaction(trans, ret); + if (ret && trans) btrfs_end_transaction(trans); - } if (!ret) *trans_out = trans; diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c index 02086191630d..e58151933844 100644 --- a/fs/btrfs/relocation.c +++ b/fs/btrfs/relocation.c @@ -90,10 +90,15 @@ * map address of tree root to tree */ struct mapping_node { - struct { - struct rb_node rb_node; - u64 bytenr; - }; /* Use rb_simle_node for search/insert */ + union { + /* Use rb_simple_node for search/insert */ + struct { + struct rb_node rb_node; + u64 bytenr; + }; + + struct rb_simple_node simple_node; + }; void *data; }; @@ -106,10 +111,15 @@ struct mapping_tree { * present a tree block to process */ struct tree_block { - struct { - struct rb_node rb_node; - u64 bytenr; - }; /* Use rb_simple_node for search/insert */ + union { + /* Use rb_simple_node for search/insert */ + struct { + struct rb_node rb_node; + u64 bytenr; + }; + + struct rb_simple_node simple_node; + }; u64 owner; struct btrfs_key key; u8 level; @@ -480,8 +490,7 @@ static int __add_reloc_root(struct btrfs_root *root) node->data = root; spin_lock(&rc->reloc_root_tree.lock); - rb_node = rb_simple_insert(&rc->reloc_root_tree.rb_root, - node->bytenr, &node->rb_node); + rb_node = rb_simple_insert(&rc->reloc_root_tree.rb_root, &node->simple_node); spin_unlock(&rc->reloc_root_tree.lock); if (rb_node) { btrfs_err(fs_info, @@ -564,8 +573,7 @@ static int __update_reloc_root(struct btrfs_root *root) spin_lock(&rc->reloc_root_tree.lock); node->bytenr = root->node->start; - rb_node = rb_simple_insert(&rc->reloc_root_tree.rb_root, - node->bytenr, &node->rb_node); + rb_node = rb_simple_insert(&rc->reloc_root_tree.rb_root, &node->simple_node); spin_unlock(&rc->reloc_root_tree.lock); if (rb_node) btrfs_backref_panic(fs_info, node->bytenr, -EEXIST); @@ -1516,7 +1524,7 @@ static noinline_for_stack int merge_reloc_root(struct reloc_control *rc, if (btrfs_disk_key_objectid(&root_item->drop_progress) == 0) { level = btrfs_root_level(root_item); - atomic_inc(&reloc_root->node->refs); + refcount_inc(&reloc_root->node->refs); path->nodes[level] = reloc_root->node; path->slots[level] = 0; } else { @@ -2617,7 +2625,7 @@ int relocate_tree_blocks(struct btrfs_trans_handle *trans, * tree. */ if (block->owner && - (!is_fstree(block->owner) || + (!btrfs_is_fstree(block->owner) || block->owner == BTRFS_DATA_RELOC_TREE_OBJECTID)) { ret = relocate_cowonly_block(trans, rc, block, path); if (ret) @@ -2658,66 +2666,24 @@ static noinline_for_stack int prealloc_file_extent_cluster(struct reloc_control u64 num_bytes; int nr; int ret = 0; - u64 i_size = i_size_read(&inode->vfs_inode); u64 prealloc_start = cluster->start - offset; u64 prealloc_end = cluster->end - offset; u64 cur_offset = prealloc_start; /* - * For subpage case, previous i_size may not be aligned to PAGE_SIZE. - * This means the range [i_size, PAGE_END + 1) is filled with zeros by - * btrfs_do_readpage() call of previously relocated file cluster. + * For blocksize < folio size case (either bs < page size or large folios), + * beyond i_size, all blocks are filled with zero. * - * If the current cluster starts in the above range, btrfs_do_readpage() + * If the current cluster covers the above range, btrfs_do_readpage() * will skip the read, and relocate_one_folio() will later writeback * the padding zeros as new data, causing data corruption. * - * Here we have to manually invalidate the range (i_size, PAGE_END + 1). + * Here we have to invalidate the cache covering our cluster. */ - if (!PAGE_ALIGNED(i_size)) { - struct address_space *mapping = inode->vfs_inode.i_mapping; - struct btrfs_fs_info *fs_info = inode->root->fs_info; - const u32 sectorsize = fs_info->sectorsize; - struct folio *folio; - - ASSERT(sectorsize < PAGE_SIZE); - ASSERT(IS_ALIGNED(i_size, sectorsize)); - - /* - * Subpage can't handle page with DIRTY but without UPTODATE - * bit as it can lead to the following deadlock: - * - * btrfs_read_folio() - * | Page already *locked* - * |- btrfs_lock_and_flush_ordered_range() - * |- btrfs_start_ordered_extent() - * |- extent_write_cache_pages() - * |- lock_page() - * We try to lock the page we already hold. - * - * Here we just writeback the whole data reloc inode, so that - * we will be ensured to have no dirty range in the page, and - * are safe to clear the uptodate bits. - * - * This shouldn't cause too much overhead, as we need to write - * the data back anyway. - */ - ret = filemap_write_and_wait(mapping); - if (ret < 0) - return ret; - - folio = filemap_lock_folio(mapping, i_size >> PAGE_SHIFT); - /* - * If page is freed we don't need to do anything then, as we - * will re-read the whole page anyway. - */ - if (!IS_ERR(folio)) { - btrfs_subpage_clear_uptodate(fs_info, folio, i_size, - round_up(i_size, PAGE_SIZE) - i_size); - folio_unlock(folio); - folio_put(folio); - } - } + ret = filemap_invalidate_inode(&inode->vfs_inode, true, prealloc_start, + prealloc_end); + if (ret < 0) + return ret; BUG_ON(cluster->start != cluster->boundary[0]); ret = btrfs_alloc_data_chunk_ondemand(inode, @@ -2806,13 +2772,15 @@ static u64 get_cluster_boundary_end(const struct file_extent_cluster *cluster, static int relocate_one_folio(struct reloc_control *rc, struct file_ra_state *ra, - int *cluster_nr, unsigned long index) + int *cluster_nr, u64 *file_offset_ret) { const struct file_extent_cluster *cluster = &rc->cluster; struct inode *inode = rc->data_inode; struct btrfs_fs_info *fs_info = inode_to_fs_info(inode); + const u64 orig_file_offset = *file_offset_ret; u64 offset = BTRFS_I(inode)->reloc_block_group_start; - const unsigned long last_index = (cluster->end - offset) >> PAGE_SHIFT; + const pgoff_t last_index = (cluster->end - offset) >> PAGE_SHIFT; + const pgoff_t index = orig_file_offset >> PAGE_SHIFT; gfp_t mask = btrfs_alloc_write_mask(inode->i_mapping); struct folio *folio; u64 folio_start; @@ -2845,8 +2813,6 @@ again: return PTR_ERR(folio); } - WARN_ON(folio_order(folio)); - if (folio_test_readahead(folio) && !use_rst) page_cache_async_readahead(inode->i_mapping, ra, NULL, folio, last_index + 1 - index); @@ -2875,7 +2841,7 @@ again: goto release_folio; folio_start = folio_pos(folio); - folio_end = folio_start + PAGE_SIZE - 1; + folio_end = folio_start + folio_size(folio) - 1; /* * Start from the cluster, as for subpage case, the cluster can start @@ -2923,7 +2889,8 @@ again: * EXTENT_BOUNDARY bit prevents current extent from being merged * with previous extent. */ - if (in_range(cluster->boundary[*cluster_nr] - offset, folio_start, PAGE_SIZE)) { + if (in_range(cluster->boundary[*cluster_nr] - offset, + folio_start, folio_size(folio))) { u64 boundary_start = cluster->boundary[*cluster_nr] - offset; u64 boundary_end = boundary_start + @@ -2953,6 +2920,7 @@ again: btrfs_throttle(fs_info); if (btrfs_should_cancel_balance(fs_info)) ret = -ECANCELED; + *file_offset_ret = folio_end + 1; return ret; release_folio: @@ -2966,8 +2934,7 @@ static int relocate_file_extent_cluster(struct reloc_control *rc) struct inode *inode = rc->data_inode; const struct file_extent_cluster *cluster = &rc->cluster; u64 offset = BTRFS_I(inode)->reloc_block_group_start; - unsigned long index; - unsigned long last_index; + u64 cur_file_offset = cluster->start - offset; struct file_ra_state *ra; int cluster_nr = 0; int ret = 0; @@ -2989,10 +2956,11 @@ static int relocate_file_extent_cluster(struct reloc_control *rc) if (ret) goto out; - last_index = (cluster->end - offset) >> PAGE_SHIFT; - for (index = (cluster->start - offset) >> PAGE_SHIFT; - index <= last_index && !ret; index++) - ret = relocate_one_folio(rc, ra, &cluster_nr, index); + while (cur_file_offset < cluster->end - offset) { + ret = relocate_one_folio(rc, ra, &cluster_nr, &cur_file_offset); + if (ret) + break; + } if (ret == 0) WARN_ON(cluster_nr != cluster->nr); out: @@ -3155,7 +3123,7 @@ static int add_tree_block(struct reloc_control *rc, block->key_ready = false; block->owner = owner; - rb_node = rb_simple_insert(blocks, block->bytenr, &block->rb_node); + rb_node = rb_simple_insert(blocks, &block->simple_node); if (rb_node) btrfs_backref_panic(rc->extent_root->fs_info, block->bytenr, -EEXIST); @@ -3643,7 +3611,7 @@ restart: } btrfs_release_path(path); - btrfs_clear_extent_bits(&rc->processed_blocks, 0, (u64)-1, EXTENT_DIRTY); + btrfs_clear_extent_bit(&rc->processed_blocks, 0, (u64)-1, EXTENT_DIRTY, NULL); if (trans) { btrfs_end_transaction_throttle(trans); @@ -3880,7 +3848,7 @@ static void free_reloc_control(struct reloc_control *rc) */ static void describe_relocation(struct btrfs_block_group *block_group) { - char buf[128] = {'\0'}; + char buf[128] = "NONE"; btrfs_describe_block_groups(block_group->flags, buf, sizeof(buf)); @@ -3900,7 +3868,8 @@ static const char *stage_to_string(enum reloc_stage stage) /* * function to relocate all extents in a block group. */ -int btrfs_relocate_block_group(struct btrfs_fs_info *fs_info, u64 group_start) +int btrfs_relocate_block_group(struct btrfs_fs_info *fs_info, u64 group_start, + bool verbose) { struct btrfs_block_group *bg; struct btrfs_root *extent_root = btrfs_extent_root(fs_info, group_start); @@ -3992,7 +3961,8 @@ int btrfs_relocate_block_group(struct btrfs_fs_info *fs_info, u64 group_start) goto out; } - describe_relocation(rc->block_group); + if (verbose) + describe_relocation(rc->block_group); btrfs_wait_block_group_reservations(rc->block_group); btrfs_wait_nocow_writers(rc->block_group); @@ -4036,8 +4006,10 @@ int btrfs_relocate_block_group(struct btrfs_fs_info *fs_info, u64 group_start) if (rc->extents_found == 0) break; - btrfs_info(fs_info, "found %llu extents, stage: %s", - rc->extents_found, stage_to_string(finishes_stage)); + if (verbose) + btrfs_info(fs_info, "found %llu extents, stage: %s", + rc->extents_found, + stage_to_string(finishes_stage)); } WARN_ON(rc->block_group->pinned > 0); @@ -4339,7 +4311,7 @@ int btrfs_reloc_cow_block(struct btrfs_trans_handle *trans, } btrfs_backref_drop_node_buffer(node); - atomic_inc(&cow->refs); + refcount_inc(&cow->refs); node->eb = cow; node->new_bytenr = cow->start; diff --git a/fs/btrfs/relocation.h b/fs/btrfs/relocation.h index 788c86d8633a..5c36b3f84b57 100644 --- a/fs/btrfs/relocation.h +++ b/fs/btrfs/relocation.h @@ -12,7 +12,8 @@ struct btrfs_trans_handle; struct btrfs_ordered_extent; struct btrfs_pending_snapshot; -int btrfs_relocate_block_group(struct btrfs_fs_info *fs_info, u64 group_start); +int btrfs_relocate_block_group(struct btrfs_fs_info *fs_info, u64 group_start, + bool verbose); int btrfs_init_reloc_root(struct btrfs_trans_handle *trans, struct btrfs_root *root); int btrfs_update_reloc_root(struct btrfs_trans_handle *trans, struct btrfs_root *root); diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c index 7cd5e76a783c..6776e6ab8d10 100644 --- a/fs/btrfs/scrub.c +++ b/fs/btrfs/scrub.c @@ -556,7 +556,7 @@ static int scrub_print_warning_inode(u64 inum, u64 offset, u64 num_bytes, * hold all of the paths here */ for (i = 0; i < ipath->fspath->elem_cnt; ++i) - btrfs_warn_in_rcu(fs_info, + btrfs_warn(fs_info, "scrub: %s at logical %llu on dev %s, physical %llu root %llu inode %llu offset %llu length %u links %u (path: %s)", swarn->errstr, swarn->logical, btrfs_dev_name(swarn->dev), @@ -570,7 +570,7 @@ static int scrub_print_warning_inode(u64 inum, u64 offset, u64 num_bytes, return 0; err: - btrfs_warn_in_rcu(fs_info, + btrfs_warn(fs_info, "scrub: %s at logical %llu on dev %s, physical %llu root %llu inode %llu offset %llu: path resolving failed with ret=%d", swarn->errstr, swarn->logical, btrfs_dev_name(swarn->dev), @@ -596,7 +596,7 @@ static void scrub_print_common_warning(const char *errstr, struct btrfs_device * /* Super block error, no need to search extent tree. */ if (is_super) { - btrfs_warn_in_rcu(fs_info, "scrub: %s on device %s, physical %llu", + btrfs_warn(fs_info, "scrub: %s on device %s, physical %llu", errstr, btrfs_dev_name(dev), physical); return; } @@ -637,7 +637,7 @@ static void scrub_print_common_warning(const char *errstr, struct btrfs_device * } if (ret > 0) break; - btrfs_warn_in_rcu(fs_info, + btrfs_warn(fs_info, "scrub: %s at logical %llu on dev %s, physical %llu: metadata %s (level %d) in tree %llu", errstr, swarn.logical, btrfs_dev_name(dev), swarn.physical, (ref_level ? "node" : "leaf"), @@ -1045,12 +1045,12 @@ skip: */ if (repaired) { if (dev) { - btrfs_err_rl_in_rcu(fs_info, + btrfs_err_rl(fs_info, "scrub: fixed up error at logical %llu on dev %s physical %llu", stripe->logical, btrfs_dev_name(dev), physical); } else { - btrfs_err_rl_in_rcu(fs_info, + btrfs_err_rl(fs_info, "scrub: fixed up error at logical %llu on mirror %u", stripe->logical, stripe->mirror_num); } @@ -1059,12 +1059,12 @@ skip: /* The remaining are all for unrepaired. */ if (dev) { - btrfs_err_rl_in_rcu(fs_info, + btrfs_err_rl(fs_info, "scrub: unable to fixup (regular) error at logical %llu on dev %s physical %llu", stripe->logical, btrfs_dev_name(dev), physical); } else { - btrfs_err_rl_in_rcu(fs_info, + btrfs_err_rl(fs_info, "scrub: unable to fixup (regular) error at logical %llu on mirror %u", stripe->logical, stripe->mirror_num); } @@ -1806,7 +1806,7 @@ static void scrub_submit_extent_sector_read(struct scrub_stripe *stripe) struct btrfs_io_context *bioc = NULL; const u64 logical = stripe->logical + (i << fs_info->sectorsize_bits); - int err; + int ret; io_stripe.rst_search_commit_root = true; stripe_len = (nr_sectors - i) << fs_info->sectorsize_bits; @@ -1814,11 +1814,11 @@ static void scrub_submit_extent_sector_read(struct scrub_stripe *stripe) * For RST cases, we need to manually split the bbio to * follow the RST boundary. */ - err = btrfs_map_block(fs_info, BTRFS_MAP_READ, logical, + ret = btrfs_map_block(fs_info, BTRFS_MAP_READ, logical, &stripe_len, &bioc, &io_stripe, &mirror); btrfs_put_bioc(bioc); - if (err < 0) { - if (err != -ENODATA) { + if (ret < 0) { + if (ret != -ENODATA) { /* * Earlier btrfs_get_raid_extent_offset() * returned -ENODATA, which means there's @@ -3057,7 +3057,7 @@ int btrfs_scrub_dev(struct btrfs_fs_info *fs_info, u64 devid, u64 start, if (!is_dev_replace && !readonly && !test_bit(BTRFS_DEV_STATE_WRITEABLE, &dev->dev_state)) { mutex_unlock(&fs_info->fs_devices->device_list_mutex); - btrfs_err_in_rcu(fs_info, + btrfs_err(fs_info, "scrub: devid %llu: filesystem on %s is not writable", devid, btrfs_dev_name(dev)); ret = -EROFS; diff --git a/fs/btrfs/send.c b/fs/btrfs/send.c index 2891ec4056c6..7664025a5af4 100644 --- a/fs/btrfs/send.c +++ b/fs/btrfs/send.c @@ -4,6 +4,7 @@ */ #include <linux/bsearch.h> +#include <linux/falloc.h> #include <linux/fs.h> #include <linux/file.h> #include <linux/sort.h> @@ -758,7 +759,7 @@ static int send_header(struct send_ctx *sctx) { struct btrfs_stream_header hdr; - strcpy(hdr.magic, BTRFS_SEND_STREAM_MAGIC); + strscpy(hdr.magic, BTRFS_SEND_STREAM_MAGIC); hdr.version = cpu_to_le32(sctx->proto); return write_buf(sctx->send_filp, &hdr, sizeof(hdr), &sctx->send_off); @@ -1804,7 +1805,7 @@ static int gen_unique_name(struct send_ctx *sctx, ino, gen, idx); ASSERT(len < sizeof(tmp)); tmp_name.name = tmp; - tmp_name.len = strlen(tmp); + tmp_name.len = len; di = btrfs_lookup_dir_item(NULL, sctx->send_root, path, BTRFS_FIRST_FREE_OBJECTID, @@ -1843,7 +1844,7 @@ static int gen_unique_name(struct send_ctx *sctx, break; } - ret = fs_path_add(dest, tmp, strlen(tmp)); + ret = fs_path_add(dest, tmp, len); out: btrfs_free_path(path); @@ -4628,7 +4629,6 @@ static int rbtree_ref_comp(const void *k, const struct rb_node *node) { const struct recorded_ref *data = k; const struct recorded_ref *ref = rb_entry(node, struct recorded_ref, node); - int result; if (data->dir > ref->dir) return 1; @@ -4642,12 +4642,7 @@ static int rbtree_ref_comp(const void *k, const struct rb_node *node) return 1; if (data->name_len < ref->name_len) return -1; - result = strcmp(data->name, ref->name); - if (result > 0) - return 1; - if (result < 0) - return -1; - return 0; + return strcmp(data->name, ref->name); } static bool rbtree_ref_less(struct rb_node *node, const struct rb_node *parent) @@ -5411,6 +5406,30 @@ tlv_put_failure: return ret; } +static int send_fallocate(struct send_ctx *sctx, u32 mode, u64 offset, u64 len) +{ + struct fs_path *path; + int ret; + + path = get_cur_inode_path(sctx); + if (IS_ERR(path)) + return PTR_ERR(path); + + ret = begin_cmd(sctx, BTRFS_SEND_C_FALLOCATE); + if (ret < 0) + return ret; + + TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH, path); + TLV_PUT_U32(sctx, BTRFS_SEND_A_FALLOCATE_MODE, mode); + TLV_PUT_U64(sctx, BTRFS_SEND_A_FILE_OFFSET, offset); + TLV_PUT_U64(sctx, BTRFS_SEND_A_SIZE, len); + + ret = send_cmd(sctx); + +tlv_put_failure: + return ret; +} + static int send_hole(struct send_ctx *sctx, u64 end) { struct fs_path *p = NULL; @@ -5419,6 +5438,14 @@ static int send_hole(struct send_ctx *sctx, u64 end) int ret = 0; /* + * Starting with send stream v2 we have fallocate and can use it to + * punch holes instead of sending writes full of zeroes. + */ + if (proto_cmd_ok(sctx, BTRFS_SEND_C_FALLOCATE)) + return send_fallocate(sctx, FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE, + offset, end - offset); + + /* * A hole that starts at EOF or beyond it. Since we do not yet support * fallocate (for extent preallocation and hole punching), sending a * write of zeroes starting at EOF or beyond would later require issuing diff --git a/fs/btrfs/space-info.c b/fs/btrfs/space-info.c index d9087aa81b21..0481c693ac2e 100644 --- a/fs/btrfs/space-info.c +++ b/fs/btrfs/space-info.c @@ -615,7 +615,7 @@ static void __btrfs_dump_space_info(const struct btrfs_fs_info *fs_info, void btrfs_dump_space_info(struct btrfs_fs_info *fs_info, struct btrfs_space_info *info, u64 bytes, - int dump_block_groups) + bool dump_block_groups) { struct btrfs_block_group *cache; u64 total_avail = 0; @@ -1887,7 +1887,7 @@ int btrfs_reserve_metadata_bytes(struct btrfs_fs_info *fs_info, space_info->flags, orig_bytes, 1); if (btrfs_test_opt(fs_info, ENOSPC_DEBUG)) - btrfs_dump_space_info(fs_info, space_info, orig_bytes, 0); + btrfs_dump_space_info(fs_info, space_info, orig_bytes, false); } return ret; } @@ -1918,7 +1918,7 @@ int btrfs_reserve_data_bytes(struct btrfs_space_info *space_info, u64 bytes, trace_btrfs_space_reservation(fs_info, "space_info:enospc", space_info->flags, bytes, 1); if (btrfs_test_opt(fs_info, ENOSPC_DEBUG)) - btrfs_dump_space_info(fs_info, space_info, bytes, 0); + btrfs_dump_space_info(fs_info, space_info, bytes, false); } return ret; } @@ -1973,13 +1973,13 @@ u64 btrfs_account_ro_block_groups_free_space(struct btrfs_space_info *sinfo) static u64 calc_pct_ratio(u64 x, u64 y) { - int err; + int ret; if (!y) return 0; again: - err = check_mul_overflow(100, x, &x); - if (err) + ret = check_mul_overflow(100, x, &x); + if (ret) goto lose_precision; return div64_u64(x, y); lose_precision: @@ -2139,7 +2139,7 @@ void btrfs_set_periodic_reclaim_ready(struct btrfs_space_info *space_info, bool } } -bool btrfs_should_periodic_reclaim(struct btrfs_space_info *space_info) +static bool btrfs_should_periodic_reclaim(struct btrfs_space_info *space_info) { bool ret; diff --git a/fs/btrfs/space-info.h b/fs/btrfs/space-info.h index 92b7f5e2b850..679f22efb407 100644 --- a/fs/btrfs/space-info.h +++ b/fs/btrfs/space-info.h @@ -278,7 +278,7 @@ u64 __pure btrfs_space_info_used(const struct btrfs_space_info *s_info, void btrfs_clear_space_info_full(struct btrfs_fs_info *info); void btrfs_dump_space_info(struct btrfs_fs_info *fs_info, struct btrfs_space_info *info, u64 bytes, - int dump_block_groups); + bool dump_block_groups); int btrfs_reserve_metadata_bytes(struct btrfs_fs_info *fs_info, struct btrfs_space_info *space_info, u64 orig_bytes, @@ -306,7 +306,6 @@ u64 btrfs_account_ro_block_groups_free_space(struct btrfs_space_info *sinfo); void btrfs_space_info_update_reclaimable(struct btrfs_space_info *space_info, s64 bytes); void btrfs_set_periodic_reclaim_ready(struct btrfs_space_info *space_info, bool ready); -bool btrfs_should_periodic_reclaim(struct btrfs_space_info *space_info); int btrfs_calc_reclaim_threshold(const struct btrfs_space_info *space_info); void btrfs_reclaim_sweep(const struct btrfs_fs_info *fs_info); void btrfs_return_free_space(struct btrfs_space_info *space_info, u64 len); diff --git a/fs/btrfs/subpage.c b/fs/btrfs/subpage.c index d4f019233493..c9b3821957f7 100644 --- a/fs/btrfs/subpage.c +++ b/fs/btrfs/subpage.c @@ -49,7 +49,7 @@ * Implementation: * * - Common - * Both metadata and data will use a new structure, btrfs_subpage, to + * Both metadata and data will use a new structure, btrfs_folio_state, to * record the status of each sector inside a page. This provides the extra * granularity needed. * @@ -63,10 +63,10 @@ * This means a slightly higher tree locking latency. */ -int btrfs_attach_subpage(const struct btrfs_fs_info *fs_info, - struct folio *folio, enum btrfs_subpage_type type) +int btrfs_attach_folio_state(const struct btrfs_fs_info *fs_info, + struct folio *folio, enum btrfs_folio_type type) { - struct btrfs_subpage *subpage; + struct btrfs_folio_state *bfs; /* For metadata we don't support large folio yet. */ if (type == BTRFS_SUBPAGE_METADATA) @@ -87,18 +87,18 @@ int btrfs_attach_subpage(const struct btrfs_fs_info *fs_info, if (type == BTRFS_SUBPAGE_DATA && !btrfs_is_subpage(fs_info, folio)) return 0; - subpage = btrfs_alloc_subpage(fs_info, folio_size(folio), type); - if (IS_ERR(subpage)) - return PTR_ERR(subpage); + bfs = btrfs_alloc_folio_state(fs_info, folio_size(folio), type); + if (IS_ERR(bfs)) + return PTR_ERR(bfs); - folio_attach_private(folio, subpage); + folio_attach_private(folio, bfs); return 0; } -void btrfs_detach_subpage(const struct btrfs_fs_info *fs_info, struct folio *folio, - enum btrfs_subpage_type type) +void btrfs_detach_folio_state(const struct btrfs_fs_info *fs_info, struct folio *folio, + enum btrfs_folio_type type) { - struct btrfs_subpage *subpage; + struct btrfs_folio_state *bfs; /* Either not subpage, or the folio already has private attached. */ if (!folio_test_private(folio)) @@ -108,15 +108,15 @@ void btrfs_detach_subpage(const struct btrfs_fs_info *fs_info, struct folio *fol if (type == BTRFS_SUBPAGE_DATA && !btrfs_is_subpage(fs_info, folio)) return; - subpage = folio_detach_private(folio); - ASSERT(subpage); - btrfs_free_subpage(subpage); + bfs = folio_detach_private(folio); + ASSERT(bfs); + btrfs_free_folio_state(bfs); } -struct btrfs_subpage *btrfs_alloc_subpage(const struct btrfs_fs_info *fs_info, - size_t fsize, enum btrfs_subpage_type type) +struct btrfs_folio_state *btrfs_alloc_folio_state(const struct btrfs_fs_info *fs_info, + size_t fsize, enum btrfs_folio_type type) { - struct btrfs_subpage *ret; + struct btrfs_folio_state *ret; unsigned int real_size; ASSERT(fs_info->sectorsize < fsize); @@ -136,11 +136,6 @@ struct btrfs_subpage *btrfs_alloc_subpage(const struct btrfs_fs_info *fs_info, return ret; } -void btrfs_free_subpage(struct btrfs_subpage *subpage) -{ - kfree(subpage); -} - /* * Increase the eb_refs of current subpage. * @@ -152,7 +147,7 @@ void btrfs_free_subpage(struct btrfs_subpage *subpage) */ void btrfs_folio_inc_eb_refs(const struct btrfs_fs_info *fs_info, struct folio *folio) { - struct btrfs_subpage *subpage; + struct btrfs_folio_state *bfs; if (!btrfs_meta_is_subpage(fs_info)) return; @@ -160,13 +155,13 @@ void btrfs_folio_inc_eb_refs(const struct btrfs_fs_info *fs_info, struct folio * ASSERT(folio_test_private(folio) && folio->mapping); lockdep_assert_held(&folio->mapping->i_private_lock); - subpage = folio_get_private(folio); - atomic_inc(&subpage->eb_refs); + bfs = folio_get_private(folio); + atomic_inc(&bfs->eb_refs); } void btrfs_folio_dec_eb_refs(const struct btrfs_fs_info *fs_info, struct folio *folio) { - struct btrfs_subpage *subpage; + struct btrfs_folio_state *bfs; if (!btrfs_meta_is_subpage(fs_info)) return; @@ -174,9 +169,9 @@ void btrfs_folio_dec_eb_refs(const struct btrfs_fs_info *fs_info, struct folio * ASSERT(folio_test_private(folio) && folio->mapping); lockdep_assert_held(&folio->mapping->i_private_lock); - subpage = folio_get_private(folio); - ASSERT(atomic_read(&subpage->eb_refs)); - atomic_dec(&subpage->eb_refs); + bfs = folio_get_private(folio); + ASSERT(atomic_read(&bfs->eb_refs)); + atomic_dec(&bfs->eb_refs); } static void btrfs_subpage_assert(const struct btrfs_fs_info *fs_info, @@ -191,8 +186,9 @@ static void btrfs_subpage_assert(const struct btrfs_fs_info *fs_info, * unmapped page like dummy extent buffer pages. */ if (folio->mapping) - ASSERT(folio_pos(folio) <= start && - start + len <= folio_pos(folio) + folio_size(folio)); + ASSERT(folio_pos(folio) <= start && start + len <= folio_end(folio), + "start=%llu len=%u folio_pos=%llu folio_size=%zu", + start, len, folio_pos(folio), folio_size(folio)); } #define subpage_calc_start_bit(fs_info, folio, name, start, len) \ @@ -221,14 +217,13 @@ static void btrfs_subpage_clamp_range(struct folio *folio, u64 *start, u32 *len) if (folio_pos(folio) >= orig_start + orig_len) *len = 0; else - *len = min_t(u64, folio_pos(folio) + folio_size(folio), - orig_start + orig_len) - *start; + *len = min_t(u64, folio_end(folio), orig_start + orig_len) - *start; } static bool btrfs_subpage_end_and_test_lock(const struct btrfs_fs_info *fs_info, struct folio *folio, u64 start, u32 len) { - struct btrfs_subpage *subpage = folio_get_private(folio); + struct btrfs_folio_state *bfs = folio_get_private(folio); const int start_bit = subpage_calc_start_bit(fs_info, folio, locked, start, len); const int nbits = (len >> fs_info->sectorsize_bits); unsigned long flags; @@ -238,7 +233,7 @@ static bool btrfs_subpage_end_and_test_lock(const struct btrfs_fs_info *fs_info, btrfs_subpage_assert(fs_info, folio, start, len); - spin_lock_irqsave(&subpage->lock, flags); + spin_lock_irqsave(&bfs->lock, flags); /* * We have call sites passing @lock_page into * extent_clear_unlock_delalloc() for compression path. @@ -246,18 +241,18 @@ static bool btrfs_subpage_end_and_test_lock(const struct btrfs_fs_info *fs_info, * This @locked_page is locked by plain lock_page(), thus its * subpage::locked is 0. Handle them in a special way. */ - if (atomic_read(&subpage->nr_locked) == 0) { - spin_unlock_irqrestore(&subpage->lock, flags); + if (atomic_read(&bfs->nr_locked) == 0) { + spin_unlock_irqrestore(&bfs->lock, flags); return true; } - for_each_set_bit_from(bit, subpage->bitmaps, start_bit + nbits) { - clear_bit(bit, subpage->bitmaps); + for_each_set_bit_from(bit, bfs->bitmaps, start_bit + nbits) { + clear_bit(bit, bfs->bitmaps); cleared++; } - ASSERT(atomic_read(&subpage->nr_locked) >= cleared); - last = atomic_sub_and_test(cleared, &subpage->nr_locked); - spin_unlock_irqrestore(&subpage->lock, flags); + ASSERT(atomic_read(&bfs->nr_locked) >= cleared); + last = atomic_sub_and_test(cleared, &bfs->nr_locked); + spin_unlock_irqrestore(&bfs->lock, flags); return last; } @@ -280,7 +275,7 @@ static bool btrfs_subpage_end_and_test_lock(const struct btrfs_fs_info *fs_info, void btrfs_folio_end_lock(const struct btrfs_fs_info *fs_info, struct folio *folio, u64 start, u32 len) { - struct btrfs_subpage *subpage = folio_get_private(folio); + struct btrfs_folio_state *bfs = folio_get_private(folio); ASSERT(folio_test_locked(folio)); @@ -296,7 +291,7 @@ void btrfs_folio_end_lock(const struct btrfs_fs_info *fs_info, * Since we own the page lock, no one else could touch subpage::locked * and we are safe to do several atomic operations without spinlock. */ - if (atomic_read(&subpage->nr_locked) == 0) { + if (atomic_read(&bfs->nr_locked) == 0) { /* No subpage lock, locked by plain lock_page(). */ folio_unlock(folio); return; @@ -310,7 +305,7 @@ void btrfs_folio_end_lock(const struct btrfs_fs_info *fs_info, void btrfs_folio_end_lock_bitmap(const struct btrfs_fs_info *fs_info, struct folio *folio, unsigned long bitmap) { - struct btrfs_subpage *subpage = folio_get_private(folio); + struct btrfs_folio_state *bfs = folio_get_private(folio); const unsigned int blocks_per_folio = btrfs_blocks_per_folio(fs_info, folio); const int start_bit = blocks_per_folio * btrfs_bitmap_nr_locked; unsigned long flags; @@ -323,42 +318,42 @@ void btrfs_folio_end_lock_bitmap(const struct btrfs_fs_info *fs_info, return; } - if (atomic_read(&subpage->nr_locked) == 0) { + if (atomic_read(&bfs->nr_locked) == 0) { /* No subpage lock, locked by plain lock_page(). */ folio_unlock(folio); return; } - spin_lock_irqsave(&subpage->lock, flags); + spin_lock_irqsave(&bfs->lock, flags); for_each_set_bit(bit, &bitmap, blocks_per_folio) { - if (test_and_clear_bit(bit + start_bit, subpage->bitmaps)) + if (test_and_clear_bit(bit + start_bit, bfs->bitmaps)) cleared++; } - ASSERT(atomic_read(&subpage->nr_locked) >= cleared); - last = atomic_sub_and_test(cleared, &subpage->nr_locked); - spin_unlock_irqrestore(&subpage->lock, flags); + ASSERT(atomic_read(&bfs->nr_locked) >= cleared); + last = atomic_sub_and_test(cleared, &bfs->nr_locked); + spin_unlock_irqrestore(&bfs->lock, flags); if (last) folio_unlock(folio); } #define subpage_test_bitmap_all_set(fs_info, folio, name) \ ({ \ - struct btrfs_subpage *subpage = folio_get_private(folio); \ + struct btrfs_folio_state *bfs = folio_get_private(folio); \ const unsigned int blocks_per_folio = \ btrfs_blocks_per_folio(fs_info, folio); \ \ - bitmap_test_range_all_set(subpage->bitmaps, \ + bitmap_test_range_all_set(bfs->bitmaps, \ blocks_per_folio * btrfs_bitmap_nr_##name, \ blocks_per_folio); \ }) #define subpage_test_bitmap_all_zero(fs_info, folio, name) \ ({ \ - struct btrfs_subpage *subpage = folio_get_private(folio); \ + struct btrfs_folio_state *bfs = folio_get_private(folio); \ const unsigned int blocks_per_folio = \ btrfs_blocks_per_folio(fs_info, folio); \ \ - bitmap_test_range_all_zero(subpage->bitmaps, \ + bitmap_test_range_all_zero(bfs->bitmaps, \ blocks_per_folio * btrfs_bitmap_nr_##name, \ blocks_per_folio); \ }) @@ -366,43 +361,43 @@ void btrfs_folio_end_lock_bitmap(const struct btrfs_fs_info *fs_info, void btrfs_subpage_set_uptodate(const struct btrfs_fs_info *fs_info, struct folio *folio, u64 start, u32 len) { - struct btrfs_subpage *subpage = folio_get_private(folio); + struct btrfs_folio_state *bfs = folio_get_private(folio); unsigned int start_bit = subpage_calc_start_bit(fs_info, folio, uptodate, start, len); unsigned long flags; - spin_lock_irqsave(&subpage->lock, flags); - bitmap_set(subpage->bitmaps, start_bit, len >> fs_info->sectorsize_bits); + spin_lock_irqsave(&bfs->lock, flags); + bitmap_set(bfs->bitmaps, start_bit, len >> fs_info->sectorsize_bits); if (subpage_test_bitmap_all_set(fs_info, folio, uptodate)) folio_mark_uptodate(folio); - spin_unlock_irqrestore(&subpage->lock, flags); + spin_unlock_irqrestore(&bfs->lock, flags); } void btrfs_subpage_clear_uptodate(const struct btrfs_fs_info *fs_info, struct folio *folio, u64 start, u32 len) { - struct btrfs_subpage *subpage = folio_get_private(folio); + struct btrfs_folio_state *bfs = folio_get_private(folio); unsigned int start_bit = subpage_calc_start_bit(fs_info, folio, uptodate, start, len); unsigned long flags; - spin_lock_irqsave(&subpage->lock, flags); - bitmap_clear(subpage->bitmaps, start_bit, len >> fs_info->sectorsize_bits); + spin_lock_irqsave(&bfs->lock, flags); + bitmap_clear(bfs->bitmaps, start_bit, len >> fs_info->sectorsize_bits); folio_clear_uptodate(folio); - spin_unlock_irqrestore(&subpage->lock, flags); + spin_unlock_irqrestore(&bfs->lock, flags); } void btrfs_subpage_set_dirty(const struct btrfs_fs_info *fs_info, struct folio *folio, u64 start, u32 len) { - struct btrfs_subpage *subpage = folio_get_private(folio); + struct btrfs_folio_state *bfs = folio_get_private(folio); unsigned int start_bit = subpage_calc_start_bit(fs_info, folio, dirty, start, len); unsigned long flags; - spin_lock_irqsave(&subpage->lock, flags); - bitmap_set(subpage->bitmaps, start_bit, len >> fs_info->sectorsize_bits); - spin_unlock_irqrestore(&subpage->lock, flags); + spin_lock_irqsave(&bfs->lock, flags); + bitmap_set(bfs->bitmaps, start_bit, len >> fs_info->sectorsize_bits); + spin_unlock_irqrestore(&bfs->lock, flags); folio_mark_dirty(folio); } @@ -419,17 +414,17 @@ void btrfs_subpage_set_dirty(const struct btrfs_fs_info *fs_info, bool btrfs_subpage_clear_and_test_dirty(const struct btrfs_fs_info *fs_info, struct folio *folio, u64 start, u32 len) { - struct btrfs_subpage *subpage = folio_get_private(folio); + struct btrfs_folio_state *bfs = folio_get_private(folio); unsigned int start_bit = subpage_calc_start_bit(fs_info, folio, dirty, start, len); unsigned long flags; bool last = false; - spin_lock_irqsave(&subpage->lock, flags); - bitmap_clear(subpage->bitmaps, start_bit, len >> fs_info->sectorsize_bits); + spin_lock_irqsave(&bfs->lock, flags); + bitmap_clear(bfs->bitmaps, start_bit, len >> fs_info->sectorsize_bits); if (subpage_test_bitmap_all_zero(fs_info, folio, dirty)) last = true; - spin_unlock_irqrestore(&subpage->lock, flags); + spin_unlock_irqrestore(&bfs->lock, flags); return last; } @@ -446,91 +441,91 @@ void btrfs_subpage_clear_dirty(const struct btrfs_fs_info *fs_info, void btrfs_subpage_set_writeback(const struct btrfs_fs_info *fs_info, struct folio *folio, u64 start, u32 len) { - struct btrfs_subpage *subpage = folio_get_private(folio); + struct btrfs_folio_state *bfs = folio_get_private(folio); unsigned int start_bit = subpage_calc_start_bit(fs_info, folio, writeback, start, len); unsigned long flags; - spin_lock_irqsave(&subpage->lock, flags); - bitmap_set(subpage->bitmaps, start_bit, len >> fs_info->sectorsize_bits); + spin_lock_irqsave(&bfs->lock, flags); + bitmap_set(bfs->bitmaps, start_bit, len >> fs_info->sectorsize_bits); if (!folio_test_writeback(folio)) folio_start_writeback(folio); - spin_unlock_irqrestore(&subpage->lock, flags); + spin_unlock_irqrestore(&bfs->lock, flags); } void btrfs_subpage_clear_writeback(const struct btrfs_fs_info *fs_info, struct folio *folio, u64 start, u32 len) { - struct btrfs_subpage *subpage = folio_get_private(folio); + struct btrfs_folio_state *bfs = folio_get_private(folio); unsigned int start_bit = subpage_calc_start_bit(fs_info, folio, writeback, start, len); unsigned long flags; - spin_lock_irqsave(&subpage->lock, flags); - bitmap_clear(subpage->bitmaps, start_bit, len >> fs_info->sectorsize_bits); + spin_lock_irqsave(&bfs->lock, flags); + bitmap_clear(bfs->bitmaps, start_bit, len >> fs_info->sectorsize_bits); if (subpage_test_bitmap_all_zero(fs_info, folio, writeback)) { ASSERT(folio_test_writeback(folio)); folio_end_writeback(folio); } - spin_unlock_irqrestore(&subpage->lock, flags); + spin_unlock_irqrestore(&bfs->lock, flags); } void btrfs_subpage_set_ordered(const struct btrfs_fs_info *fs_info, struct folio *folio, u64 start, u32 len) { - struct btrfs_subpage *subpage = folio_get_private(folio); + struct btrfs_folio_state *bfs = folio_get_private(folio); unsigned int start_bit = subpage_calc_start_bit(fs_info, folio, ordered, start, len); unsigned long flags; - spin_lock_irqsave(&subpage->lock, flags); - bitmap_set(subpage->bitmaps, start_bit, len >> fs_info->sectorsize_bits); + spin_lock_irqsave(&bfs->lock, flags); + bitmap_set(bfs->bitmaps, start_bit, len >> fs_info->sectorsize_bits); folio_set_ordered(folio); - spin_unlock_irqrestore(&subpage->lock, flags); + spin_unlock_irqrestore(&bfs->lock, flags); } void btrfs_subpage_clear_ordered(const struct btrfs_fs_info *fs_info, struct folio *folio, u64 start, u32 len) { - struct btrfs_subpage *subpage = folio_get_private(folio); + struct btrfs_folio_state *bfs = folio_get_private(folio); unsigned int start_bit = subpage_calc_start_bit(fs_info, folio, ordered, start, len); unsigned long flags; - spin_lock_irqsave(&subpage->lock, flags); - bitmap_clear(subpage->bitmaps, start_bit, len >> fs_info->sectorsize_bits); + spin_lock_irqsave(&bfs->lock, flags); + bitmap_clear(bfs->bitmaps, start_bit, len >> fs_info->sectorsize_bits); if (subpage_test_bitmap_all_zero(fs_info, folio, ordered)) folio_clear_ordered(folio); - spin_unlock_irqrestore(&subpage->lock, flags); + spin_unlock_irqrestore(&bfs->lock, flags); } void btrfs_subpage_set_checked(const struct btrfs_fs_info *fs_info, struct folio *folio, u64 start, u32 len) { - struct btrfs_subpage *subpage = folio_get_private(folio); + struct btrfs_folio_state *bfs = folio_get_private(folio); unsigned int start_bit = subpage_calc_start_bit(fs_info, folio, checked, start, len); unsigned long flags; - spin_lock_irqsave(&subpage->lock, flags); - bitmap_set(subpage->bitmaps, start_bit, len >> fs_info->sectorsize_bits); + spin_lock_irqsave(&bfs->lock, flags); + bitmap_set(bfs->bitmaps, start_bit, len >> fs_info->sectorsize_bits); if (subpage_test_bitmap_all_set(fs_info, folio, checked)) folio_set_checked(folio); - spin_unlock_irqrestore(&subpage->lock, flags); + spin_unlock_irqrestore(&bfs->lock, flags); } void btrfs_subpage_clear_checked(const struct btrfs_fs_info *fs_info, struct folio *folio, u64 start, u32 len) { - struct btrfs_subpage *subpage = folio_get_private(folio); + struct btrfs_folio_state *bfs = folio_get_private(folio); unsigned int start_bit = subpage_calc_start_bit(fs_info, folio, checked, start, len); unsigned long flags; - spin_lock_irqsave(&subpage->lock, flags); - bitmap_clear(subpage->bitmaps, start_bit, len >> fs_info->sectorsize_bits); + spin_lock_irqsave(&bfs->lock, flags); + bitmap_clear(bfs->bitmaps, start_bit, len >> fs_info->sectorsize_bits); folio_clear_checked(folio); - spin_unlock_irqrestore(&subpage->lock, flags); + spin_unlock_irqrestore(&bfs->lock, flags); } /* @@ -541,16 +536,16 @@ void btrfs_subpage_clear_checked(const struct btrfs_fs_info *fs_info, bool btrfs_subpage_test_##name(const struct btrfs_fs_info *fs_info, \ struct folio *folio, u64 start, u32 len) \ { \ - struct btrfs_subpage *subpage = folio_get_private(folio); \ + struct btrfs_folio_state *bfs = folio_get_private(folio); \ unsigned int start_bit = subpage_calc_start_bit(fs_info, folio, \ name, start, len); \ unsigned long flags; \ bool ret; \ \ - spin_lock_irqsave(&subpage->lock, flags); \ - ret = bitmap_test_range_all_set(subpage->bitmaps, start_bit, \ + spin_lock_irqsave(&bfs->lock, flags); \ + ret = bitmap_test_range_all_set(bfs->bitmaps, start_bit, \ len >> fs_info->sectorsize_bits); \ - spin_unlock_irqrestore(&subpage->lock, flags); \ + spin_unlock_irqrestore(&bfs->lock, flags); \ return ret; \ } IMPLEMENT_BTRFS_SUBPAGE_TEST_OP(uptodate); @@ -662,10 +657,10 @@ IMPLEMENT_BTRFS_PAGE_OPS(checked, folio_set_checked, folio_clear_checked, { \ const unsigned int blocks_per_folio = \ btrfs_blocks_per_folio(fs_info, folio); \ - const struct btrfs_subpage *subpage = folio_get_private(folio); \ + const struct btrfs_folio_state *bfs = folio_get_private(folio); \ \ ASSERT(blocks_per_folio <= BITS_PER_LONG); \ - *dst = bitmap_read(subpage->bitmaps, \ + *dst = bitmap_read(bfs->bitmaps, \ blocks_per_folio * btrfs_bitmap_nr_##name, \ blocks_per_folio); \ } @@ -690,7 +685,7 @@ IMPLEMENT_BTRFS_PAGE_OPS(checked, folio_set_checked, folio_clear_checked, void btrfs_folio_assert_not_dirty(const struct btrfs_fs_info *fs_info, struct folio *folio, u64 start, u32 len) { - struct btrfs_subpage *subpage; + struct btrfs_folio_state *bfs; unsigned int start_bit; unsigned int nbits; unsigned long flags; @@ -705,15 +700,15 @@ void btrfs_folio_assert_not_dirty(const struct btrfs_fs_info *fs_info, start_bit = subpage_calc_start_bit(fs_info, folio, dirty, start, len); nbits = len >> fs_info->sectorsize_bits; - subpage = folio_get_private(folio); - ASSERT(subpage); - spin_lock_irqsave(&subpage->lock, flags); - if (unlikely(!bitmap_test_range_all_zero(subpage->bitmaps, start_bit, nbits))) { + bfs = folio_get_private(folio); + ASSERT(bfs); + spin_lock_irqsave(&bfs->lock, flags); + if (unlikely(!bitmap_test_range_all_zero(bfs->bitmaps, start_bit, nbits))) { SUBPAGE_DUMP_BITMAP(fs_info, folio, dirty, start, len); - ASSERT(bitmap_test_range_all_zero(subpage->bitmaps, start_bit, nbits)); + ASSERT(bitmap_test_range_all_zero(bfs->bitmaps, start_bit, nbits)); } - ASSERT(bitmap_test_range_all_zero(subpage->bitmaps, start_bit, nbits)); - spin_unlock_irqrestore(&subpage->lock, flags); + ASSERT(bitmap_test_range_all_zero(bfs->bitmaps, start_bit, nbits)); + spin_unlock_irqrestore(&bfs->lock, flags); } /* @@ -726,7 +721,7 @@ void btrfs_folio_assert_not_dirty(const struct btrfs_fs_info *fs_info, void btrfs_folio_set_lock(const struct btrfs_fs_info *fs_info, struct folio *folio, u64 start, u32 len) { - struct btrfs_subpage *subpage; + struct btrfs_folio_state *bfs; unsigned long flags; unsigned int start_bit; unsigned int nbits; @@ -736,19 +731,19 @@ void btrfs_folio_set_lock(const struct btrfs_fs_info *fs_info, if (unlikely(!fs_info) || !btrfs_is_subpage(fs_info, folio)) return; - subpage = folio_get_private(folio); + bfs = folio_get_private(folio); start_bit = subpage_calc_start_bit(fs_info, folio, locked, start, len); nbits = len >> fs_info->sectorsize_bits; - spin_lock_irqsave(&subpage->lock, flags); + spin_lock_irqsave(&bfs->lock, flags); /* Target range should not yet be locked. */ - if (unlikely(!bitmap_test_range_all_zero(subpage->bitmaps, start_bit, nbits))) { + if (unlikely(!bitmap_test_range_all_zero(bfs->bitmaps, start_bit, nbits))) { SUBPAGE_DUMP_BITMAP(fs_info, folio, locked, start, len); - ASSERT(bitmap_test_range_all_zero(subpage->bitmaps, start_bit, nbits)); + ASSERT(bitmap_test_range_all_zero(bfs->bitmaps, start_bit, nbits)); } - bitmap_set(subpage->bitmaps, start_bit, nbits); - ret = atomic_add_return(nbits, &subpage->nr_locked); + bitmap_set(bfs->bitmaps, start_bit, nbits); + ret = atomic_add_return(nbits, &bfs->nr_locked); ASSERT(ret <= btrfs_blocks_per_folio(fs_info, folio)); - spin_unlock_irqrestore(&subpage->lock, flags); + spin_unlock_irqrestore(&bfs->lock, flags); } /* @@ -776,7 +771,7 @@ bool btrfs_meta_folio_clear_and_test_dirty(struct folio *folio, const struct ext void __cold btrfs_subpage_dump_bitmap(const struct btrfs_fs_info *fs_info, struct folio *folio, u64 start, u32 len) { - struct btrfs_subpage *subpage; + struct btrfs_folio_state *bfs; const unsigned int blocks_per_folio = btrfs_blocks_per_folio(fs_info, folio); unsigned long uptodate_bitmap; unsigned long dirty_bitmap; @@ -788,18 +783,18 @@ void __cold btrfs_subpage_dump_bitmap(const struct btrfs_fs_info *fs_info, ASSERT(folio_test_private(folio) && folio_get_private(folio)); ASSERT(blocks_per_folio > 1); - subpage = folio_get_private(folio); + bfs = folio_get_private(folio); - spin_lock_irqsave(&subpage->lock, flags); + spin_lock_irqsave(&bfs->lock, flags); GET_SUBPAGE_BITMAP(fs_info, folio, uptodate, &uptodate_bitmap); GET_SUBPAGE_BITMAP(fs_info, folio, dirty, &dirty_bitmap); GET_SUBPAGE_BITMAP(fs_info, folio, writeback, &writeback_bitmap); GET_SUBPAGE_BITMAP(fs_info, folio, ordered, &ordered_bitmap); GET_SUBPAGE_BITMAP(fs_info, folio, checked, &checked_bitmap); GET_SUBPAGE_BITMAP(fs_info, folio, locked, &locked_bitmap); - spin_unlock_irqrestore(&subpage->lock, flags); + spin_unlock_irqrestore(&bfs->lock, flags); - dump_page(folio_page(folio, 0), "btrfs subpage dump"); + dump_page(folio_page(folio, 0), "btrfs folio state dump"); btrfs_warn(fs_info, "start=%llu len=%u page=%llu, bitmaps uptodate=%*pbl dirty=%*pbl locked=%*pbl writeback=%*pbl ordered=%*pbl checked=%*pbl", start, len, folio_pos(folio), @@ -815,14 +810,14 @@ void btrfs_get_subpage_dirty_bitmap(struct btrfs_fs_info *fs_info, struct folio *folio, unsigned long *ret_bitmap) { - struct btrfs_subpage *subpage; + struct btrfs_folio_state *bfs; unsigned long flags; ASSERT(folio_test_private(folio) && folio_get_private(folio)); ASSERT(btrfs_blocks_per_folio(fs_info, folio) > 1); - subpage = folio_get_private(folio); + bfs = folio_get_private(folio); - spin_lock_irqsave(&subpage->lock, flags); + spin_lock_irqsave(&bfs->lock, flags); GET_SUBPAGE_BITMAP(fs_info, folio, dirty, ret_bitmap); - spin_unlock_irqrestore(&subpage->lock, flags); + spin_unlock_irqrestore(&bfs->lock, flags); } diff --git a/fs/btrfs/subpage.h b/fs/btrfs/subpage.h index 3042c5ea840a..ee0710eb13fd 100644 --- a/fs/btrfs/subpage.h +++ b/fs/btrfs/subpage.h @@ -32,9 +32,31 @@ struct folio; enum { btrfs_bitmap_nr_uptodate = 0, btrfs_bitmap_nr_dirty, + + /* + * This can be changed to atomic eventually. But this change will rely + * on the async delalloc range rework for locked bitmap. As async + * delalloc can unlock its range and mark blocks writeback at random + * timing. + */ btrfs_bitmap_nr_writeback, + + /* + * The ordered and checked flags are for COW fixup, already marked + * deprecated, and will be removed eventually. + */ btrfs_bitmap_nr_ordered, btrfs_bitmap_nr_checked, + + /* + * The locked bit is for async delalloc range (compression), currently + * async extent is queued with the range locked, until the compression + * is done. + * So an async extent can unlock the range at any random timing. + * + * This will need a rework on the async extent lifespan (mark writeback + * and do compression) before deprecating this flag. + */ btrfs_bitmap_nr_locked, btrfs_bitmap_nr_max }; @@ -43,7 +65,7 @@ enum { * Structure to trace status of each sector inside a page, attached to * page::private for both data and metadata inodes. */ -struct btrfs_subpage { +struct btrfs_folio_state { /* Common members for both data and metadata pages */ spinlock_t lock; union { @@ -51,7 +73,7 @@ struct btrfs_subpage { * Structures only used by metadata * * @eb_refs should only be operated under private_lock, as it - * manages whether the subpage can be detached. + * manages whether the btrfs_folio_state can be detached. */ atomic_t eb_refs; @@ -65,12 +87,11 @@ struct btrfs_subpage { unsigned long bitmaps[]; }; -enum btrfs_subpage_type { +enum btrfs_folio_type { BTRFS_SUBPAGE_METADATA, BTRFS_SUBPAGE_DATA, }; -#if PAGE_SIZE > BTRFS_MIN_BLOCKSIZE /* * Subpage support for metadata is more complex, as we can have dummy extent * buffers, where folios have no mapping to determine the owning inode. @@ -91,29 +112,19 @@ static inline bool btrfs_is_subpage(const struct btrfs_fs_info *fs_info, ASSERT(is_data_inode(BTRFS_I(folio->mapping->host))); return fs_info->sectorsize < folio_size(folio); } -#else -static inline bool btrfs_meta_is_subpage(const struct btrfs_fs_info *fs_info) -{ - return false; -} -static inline bool btrfs_is_subpage(const struct btrfs_fs_info *fs_info, - struct folio *folio) -{ - if (folio->mapping && folio->mapping->host) - ASSERT(is_data_inode(BTRFS_I(folio->mapping->host))); - return false; -} -#endif -int btrfs_attach_subpage(const struct btrfs_fs_info *fs_info, - struct folio *folio, enum btrfs_subpage_type type); -void btrfs_detach_subpage(const struct btrfs_fs_info *fs_info, struct folio *folio, - enum btrfs_subpage_type type); +int btrfs_attach_folio_state(const struct btrfs_fs_info *fs_info, + struct folio *folio, enum btrfs_folio_type type); +void btrfs_detach_folio_state(const struct btrfs_fs_info *fs_info, struct folio *folio, + enum btrfs_folio_type type); /* Allocate additional data where page represents more than one sector */ -struct btrfs_subpage *btrfs_alloc_subpage(const struct btrfs_fs_info *fs_info, - size_t fsize, enum btrfs_subpage_type type); -void btrfs_free_subpage(struct btrfs_subpage *subpage); +struct btrfs_folio_state *btrfs_alloc_folio_state(const struct btrfs_fs_info *fs_info, + size_t fsize, enum btrfs_folio_type type); +static inline void btrfs_free_folio_state(struct btrfs_folio_state *bfs) +{ + kfree(bfs); +} void btrfs_folio_inc_eb_refs(const struct btrfs_fs_info *fs_info, struct folio *folio); void btrfs_folio_dec_eb_refs(const struct btrfs_fs_info *fs_info, struct folio *folio); diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index a0c65adce1ab..68e35a3700ff 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -261,10 +261,65 @@ static const struct fs_parameter_spec btrfs_fs_parameters[] = { {} }; -/* No support for restricting writes to btrfs devices yet... */ -static inline blk_mode_t btrfs_open_mode(struct fs_context *fc) +static bool btrfs_match_compress_type(const char *string, const char *type, bool may_have_level) { - return sb_open_mode(fc->sb_flags) & ~BLK_OPEN_RESTRICT_WRITES; + const int len = strlen(type); + + return (strncmp(string, type, len) == 0) && + ((may_have_level && string[len] == ':') || string[len] == '\0'); +} + +static int btrfs_parse_compress(struct btrfs_fs_context *ctx, + const struct fs_parameter *param, int opt) +{ + const char *string = param->string; + + /* + * Provide the same semantics as older kernels that don't use fs + * context, specifying the "compress" option clears "force-compress" + * without the need to pass "compress-force=[no|none]" before + * specifying "compress". + */ + if (opt != Opt_compress_force && opt != Opt_compress_force_type) + btrfs_clear_opt(ctx->mount_opt, FORCE_COMPRESS); + + if (opt == Opt_compress || opt == Opt_compress_force) { + ctx->compress_type = BTRFS_COMPRESS_ZLIB; + ctx->compress_level = BTRFS_ZLIB_DEFAULT_LEVEL; + btrfs_set_opt(ctx->mount_opt, COMPRESS); + btrfs_clear_opt(ctx->mount_opt, NODATACOW); + btrfs_clear_opt(ctx->mount_opt, NODATASUM); + } else if (btrfs_match_compress_type(string, "zlib", true)) { + ctx->compress_type = BTRFS_COMPRESS_ZLIB; + ctx->compress_level = btrfs_compress_str2level(BTRFS_COMPRESS_ZLIB, + string + 4); + btrfs_set_opt(ctx->mount_opt, COMPRESS); + btrfs_clear_opt(ctx->mount_opt, NODATACOW); + btrfs_clear_opt(ctx->mount_opt, NODATASUM); + } else if (btrfs_match_compress_type(string, "lzo", false)) { + ctx->compress_type = BTRFS_COMPRESS_LZO; + ctx->compress_level = 0; + btrfs_set_opt(ctx->mount_opt, COMPRESS); + btrfs_clear_opt(ctx->mount_opt, NODATACOW); + btrfs_clear_opt(ctx->mount_opt, NODATASUM); + } else if (btrfs_match_compress_type(string, "zstd", true)) { + ctx->compress_type = BTRFS_COMPRESS_ZSTD; + ctx->compress_level = btrfs_compress_str2level(BTRFS_COMPRESS_ZSTD, + string + 4); + btrfs_set_opt(ctx->mount_opt, COMPRESS); + btrfs_clear_opt(ctx->mount_opt, NODATACOW); + btrfs_clear_opt(ctx->mount_opt, NODATASUM); + } else if (btrfs_match_compress_type(string, "no", false) || + btrfs_match_compress_type(string, "none", false)) { + ctx->compress_level = 0; + ctx->compress_type = 0; + btrfs_clear_opt(ctx->mount_opt, COMPRESS); + btrfs_clear_opt(ctx->mount_opt, FORCE_COMPRESS); + } else { + btrfs_err(NULL, "unrecognized compression value %s", string); + return -EINVAL; + } + return 0; } static int btrfs_parse_param(struct fs_context *fc, struct fs_parameter *param) @@ -303,10 +358,9 @@ static int btrfs_parse_param(struct fs_context *fc, struct fs_parameter *param) break; case Opt_device: { struct btrfs_device *device; - blk_mode_t mode = btrfs_open_mode(fc); mutex_lock(&uuid_mutex); - device = btrfs_scan_one_device(param->string, mode, false); + device = btrfs_scan_one_device(param->string, false); mutex_unlock(&uuid_mutex); if (IS_ERR(device)) return PTR_ERR(device); @@ -336,53 +390,8 @@ static int btrfs_parse_param(struct fs_context *fc, struct fs_parameter *param) fallthrough; case Opt_compress: case Opt_compress_type: - /* - * Provide the same semantics as older kernels that don't use fs - * context, specifying the "compress" option clears - * "force-compress" without the need to pass - * "compress-force=[no|none]" before specifying "compress". - */ - if (opt != Opt_compress_force && opt != Opt_compress_force_type) - btrfs_clear_opt(ctx->mount_opt, FORCE_COMPRESS); - - if (opt == Opt_compress || opt == Opt_compress_force) { - ctx->compress_type = BTRFS_COMPRESS_ZLIB; - ctx->compress_level = BTRFS_ZLIB_DEFAULT_LEVEL; - btrfs_set_opt(ctx->mount_opt, COMPRESS); - btrfs_clear_opt(ctx->mount_opt, NODATACOW); - btrfs_clear_opt(ctx->mount_opt, NODATASUM); - } else if (strncmp(param->string, "zlib", 4) == 0) { - ctx->compress_type = BTRFS_COMPRESS_ZLIB; - ctx->compress_level = - btrfs_compress_str2level(BTRFS_COMPRESS_ZLIB, - param->string + 4); - btrfs_set_opt(ctx->mount_opt, COMPRESS); - btrfs_clear_opt(ctx->mount_opt, NODATACOW); - btrfs_clear_opt(ctx->mount_opt, NODATASUM); - } else if (strncmp(param->string, "lzo", 3) == 0) { - ctx->compress_type = BTRFS_COMPRESS_LZO; - ctx->compress_level = 0; - btrfs_set_opt(ctx->mount_opt, COMPRESS); - btrfs_clear_opt(ctx->mount_opt, NODATACOW); - btrfs_clear_opt(ctx->mount_opt, NODATASUM); - } else if (strncmp(param->string, "zstd", 4) == 0) { - ctx->compress_type = BTRFS_COMPRESS_ZSTD; - ctx->compress_level = - btrfs_compress_str2level(BTRFS_COMPRESS_ZSTD, - param->string + 4); - btrfs_set_opt(ctx->mount_opt, COMPRESS); - btrfs_clear_opt(ctx->mount_opt, NODATACOW); - btrfs_clear_opt(ctx->mount_opt, NODATASUM); - } else if (strncmp(param->string, "no", 2) == 0) { - ctx->compress_level = 0; - ctx->compress_type = 0; - btrfs_clear_opt(ctx->mount_opt, COMPRESS); - btrfs_clear_opt(ctx->mount_opt, FORCE_COMPRESS); - } else { - btrfs_err(NULL, "unrecognized compression value %s", - param->string); + if (btrfs_parse_compress(ctx, param, opt)) return -EINVAL; - } break; case Opt_ssd: if (result.negated) { @@ -945,12 +954,12 @@ static int btrfs_fill_super(struct super_block *sb, { struct btrfs_inode *inode; struct btrfs_fs_info *fs_info = btrfs_sb(sb); - int err; + int ret; sb->s_maxbytes = MAX_LFS_FILESIZE; sb->s_magic = BTRFS_SUPER_MAGIC; sb->s_op = &btrfs_super_ops; - sb->s_d_op = &btrfs_dentry_operations; + set_default_d_op(sb, &btrfs_dentry_operations); sb->s_export_op = &btrfs_export_ops; #ifdef CONFIG_FS_VERITY sb->s_vop = &btrfs_verityops; @@ -959,28 +968,28 @@ static int btrfs_fill_super(struct super_block *sb, sb->s_time_gran = 1; sb->s_iflags |= SB_I_CGROUPWB | SB_I_ALLOW_HSM; - err = super_setup_bdi(sb); - if (err) { + ret = super_setup_bdi(sb); + if (ret) { btrfs_err(fs_info, "super_setup_bdi failed"); - return err; + return ret; } - err = open_ctree(sb, fs_devices); - if (err) { - btrfs_err(fs_info, "open_ctree failed: %d", err); - return err; + ret = open_ctree(sb, fs_devices); + if (ret) { + btrfs_err(fs_info, "open_ctree failed: %d", ret); + return ret; } inode = btrfs_iget(BTRFS_FIRST_FREE_OBJECTID, fs_info->fs_root); if (IS_ERR(inode)) { - err = PTR_ERR(inode); - btrfs_handle_fs_error(fs_info, err, NULL); + ret = PTR_ERR(inode); + btrfs_handle_fs_error(fs_info, ret, NULL); goto fail_close; } sb->s_root = d_make_root(&inode->vfs_inode); if (!sb->s_root) { - err = -ENOMEM; + ret = -ENOMEM; goto fail_close; } @@ -989,7 +998,7 @@ static int btrfs_fill_super(struct super_block *sb, fail_close: close_ctree(fs_info); - return err; + return ret; } int btrfs_sync_fs(struct super_block *sb, int wait) @@ -1826,10 +1835,9 @@ static int btrfs_get_tree_super(struct fs_context *fc) struct btrfs_fs_info *fs_info = fc->s_fs_info; struct btrfs_fs_context *ctx = fc->fs_private; struct btrfs_fs_devices *fs_devices = NULL; - struct block_device *bdev; struct btrfs_device *device; struct super_block *sb; - blk_mode_t mode = btrfs_open_mode(fc); + blk_mode_t mode = sb_open_mode(fc->sb_flags); int ret; btrfs_ctx_to_info(fs_info, ctx); @@ -1839,47 +1847,60 @@ static int btrfs_get_tree_super(struct fs_context *fc) * With 'true' passed to btrfs_scan_one_device() (mount time) we expect * either a valid device or an error. */ - device = btrfs_scan_one_device(fc->source, mode, true); + device = btrfs_scan_one_device(fc->source, true); ASSERT(device != NULL); if (IS_ERR(device)) { mutex_unlock(&uuid_mutex); return PTR_ERR(device); } - fs_devices = device->fs_devices; + /* + * We cannot hold uuid_mutex calling sget_fc(), it will lead to a + * locking order reversal with s_umount. + * + * So here we increase the holding number of fs_devices, this will ensure + * the fs_devices itself won't be freed. + */ + btrfs_fs_devices_inc_holding(fs_devices); fs_info->fs_devices = fs_devices; - - ret = btrfs_open_devices(fs_devices, mode, &btrfs_fs_type); mutex_unlock(&uuid_mutex); - if (ret) - return ret; - - if (!(fc->sb_flags & SB_RDONLY) && fs_devices->rw_devices == 0) { - ret = -EACCES; - goto error; - } - bdev = fs_devices->latest_dev->bdev; - /* - * From now on the error handling is not straightforward. - * - * If successful, this will transfer the fs_info into the super block, - * and fc->s_fs_info will be NULL. However if there's an existing - * super, we'll still have fc->s_fs_info populated. If we error - * completely out it'll be cleaned up when we drop the fs_context, - * otherwise it's tied to the lifetime of the super_block. - */ sb = sget_fc(fc, btrfs_fc_test_super, set_anon_super_fc); if (IS_ERR(sb)) { - ret = PTR_ERR(sb); - goto error; + mutex_lock(&uuid_mutex); + btrfs_fs_devices_dec_holding(fs_devices); + /* + * Since the fs_devices is not opened, it can be freed at any + * time after unlocking uuid_mutex. We need to avoid double + * free through put_fs_context()->btrfs_free_fs_info(). + * So here we reset fs_info->fs_devices to NULL, and let the + * regular fs_devices reclaim path to handle it. + * + * This applies to all later branches where no fs_devices is + * opened. + */ + fs_info->fs_devices = NULL; + mutex_unlock(&uuid_mutex); + return PTR_ERR(sb); } set_device_specific_options(fs_info); if (sb->s_root) { - btrfs_close_devices(fs_devices); + /* + * Not the first mount of the fs thus got an existing super block. + * Will reuse the returned super block, fs_info and fs_devices. + * + * fc->s_fs_info is not touched and will be later freed by + * put_fs_context() through btrfs_free_fs_context(). + */ + ASSERT(fc->s_fs_info == fs_info); + + mutex_lock(&uuid_mutex); + btrfs_fs_devices_dec_holding(fs_devices); + fs_info->fs_devices = NULL; + mutex_unlock(&uuid_mutex); /* * At this stage we may have RO flag mismatch between * fc->sb_flags and sb->s_flags. Caller should detect such @@ -1887,9 +1908,32 @@ static int btrfs_get_tree_super(struct fs_context *fc) * needed. */ } else { + struct block_device *bdev; + + /* + * The first mount of the fs thus a new superblock, fc->s_fs_info + * must be NULL, and the ownership of our fs_info and fs_devices is + * transferred to the super block. + */ + ASSERT(fc->s_fs_info == NULL); + + mutex_lock(&uuid_mutex); + btrfs_fs_devices_dec_holding(fs_devices); + ret = btrfs_open_devices(fs_devices, mode, sb); + if (ret < 0) + fs_info->fs_devices = NULL; + mutex_unlock(&uuid_mutex); + if (ret < 0) { + deactivate_locked_super(sb); + return ret; + } + if (!(fc->sb_flags & SB_RDONLY) && fs_devices->rw_devices == 0) { + deactivate_locked_super(sb); + return -EACCES; + } + bdev = fs_devices->latest_dev->bdev; snprintf(sb->s_id, sizeof(sb->s_id), "%pg", bdev); shrinker_debugfs_rename(sb->s_shrink, "sb-btrfs:%s", sb->s_id); - btrfs_sb(sb)->bdev_holder = &btrfs_fs_type; ret = btrfs_fill_super(sb, fs_devices); if (ret) { deactivate_locked_super(sb); @@ -1901,10 +1945,6 @@ static int btrfs_get_tree_super(struct fs_context *fc) fc->root = dget(sb->s_root); return 0; - -error: - btrfs_close_devices(fs_devices); - return ret; } /* @@ -1980,17 +2020,13 @@ error: * btrfs or not, setting the whole super block RO. To make per-subvolume mounting * work with different options work we need to keep backward compatibility. */ -static int btrfs_reconfigure_for_mount(struct fs_context *fc, struct vfsmount *mnt) +static int btrfs_reconfigure_for_mount(struct fs_context *fc) { int ret = 0; - if (fc->sb_flags & SB_RDONLY) - return ret; - - down_write(&mnt->mnt_sb->s_umount); - if (!(fc->sb_flags & SB_RDONLY) && (mnt->mnt_sb->s_flags & SB_RDONLY)) + if (!(fc->sb_flags & SB_RDONLY) && (fc->root->d_sb->s_flags & SB_RDONLY)) ret = btrfs_reconfigure(fc); - up_write(&mnt->mnt_sb->s_umount); + return ret; } @@ -2035,25 +2071,18 @@ static int btrfs_get_tree_subvol(struct fs_context *fc) */ dup_fc->s_fs_info = fs_info; - /* - * We'll do the security settings in our btrfs_get_tree_super() mount - * loop, they were duplicated into dup_fc, we can drop the originals - * here. - */ - security_free_mnt_opts(&fc->security); - fc->security = NULL; + ret = btrfs_get_tree_super(dup_fc); + if (ret) + goto error; - mnt = fc_mount(dup_fc); - if (IS_ERR(mnt)) { - put_fs_context(dup_fc); - return PTR_ERR(mnt); - } - ret = btrfs_reconfigure_for_mount(dup_fc, mnt); + ret = btrfs_reconfigure_for_mount(dup_fc); + up_write(&dup_fc->root->d_sb->s_umount); + if (ret) + goto error; + mnt = vfs_create_mount(dup_fc); put_fs_context(dup_fc); - if (ret) { - mntput(mnt); - return ret; - } + if (IS_ERR(mnt)) + return PTR_ERR(mnt); /* * This free's ->subvol_name, because if it isn't set we have to @@ -2067,25 +2096,15 @@ static int btrfs_get_tree_subvol(struct fs_context *fc) fc->root = dentry; return 0; +error: + put_fs_context(dup_fc); + return ret; } static int btrfs_get_tree(struct fs_context *fc) { - /* - * Since we use mount_subtree to mount the default/specified subvol, we - * have to do mounts in two steps. - * - * First pass through we call btrfs_get_tree_subvol(), this is just a - * wrapper around fc_mount() to call back into here again, and this time - * we'll call btrfs_get_tree_super(). This will do the open_ctree() and - * everything to open the devices and file system. Then we return back - * with a fully constructed vfsmount in btrfs_get_tree_subvol(), and - * from there we can do our mount_subvol() call, which will lookup - * whichever subvol we're mounting and setup this fc with the - * appropriate dentry for the subvol. - */ - if (fc->s_fs_info) - return btrfs_get_tree_super(fc); + ASSERT(fc->s_fs_info == NULL); + return btrfs_get_tree_subvol(fc); } @@ -2217,7 +2236,7 @@ static long btrfs_control_ioctl(struct file *file, unsigned int cmd, * Scanning outside of mount can return NULL which would turn * into 0 error code. */ - device = btrfs_scan_one_device(vol->name, BLK_OPEN_READ, false); + device = btrfs_scan_one_device(vol->name, false); ret = PTR_ERR_OR_ZERO(device); mutex_unlock(&uuid_mutex); break; @@ -2235,7 +2254,7 @@ static long btrfs_control_ioctl(struct file *file, unsigned int cmd, * Scanning outside of mount can return NULL which would turn * into 0 error code. */ - device = btrfs_scan_one_device(vol->name, BLK_OPEN_READ, false); + device = btrfs_scan_one_device(vol->name, false); if (IS_ERR_OR_NULL(device)) { mutex_unlock(&uuid_mutex); if (IS_ERR(device)) diff --git a/fs/btrfs/sysfs.c b/fs/btrfs/sysfs.c index 5d93d9dd2c12..9d398f7a36ad 100644 --- a/fs/btrfs/sysfs.c +++ b/fs/btrfs/sysfs.c @@ -160,8 +160,7 @@ static int can_modify_feature(struct btrfs_feature_attr *fa) clear = BTRFS_FEATURE_INCOMPAT_SAFE_CLEAR; break; default: - pr_warn("btrfs: sysfs: unknown feature set %d\n", - fa->feature_set); + btrfs_warn(NULL, "sysfs: unknown feature set %d", fa->feature_set); return 0; } @@ -1138,13 +1137,21 @@ static ssize_t btrfs_commit_stats_show(struct kobject *kobj, struct kobj_attribute *a, char *buf) { struct btrfs_fs_info *fs_info = to_fs_info(kobj); + u64 now = ktime_get_ns(); + u64 start_time = fs_info->commit_stats.critical_section_start_time; + u64 pending = 0; + + if (start_time) + pending = now - start_time; return sysfs_emit(buf, "commits %llu\n" + "cur_commit_ms %llu\n" "last_commit_ms %llu\n" "max_commit_ms %llu\n" "total_commit_ms %llu\n", fs_info->commit_stats.commit_count, + div_u64(pending, NSEC_PER_MSEC), div_u64(fs_info->commit_stats.last_commit_dur, NSEC_PER_MSEC), div_u64(fs_info->commit_stats.max_commit_dur, NSEC_PER_MSEC), div_u64(fs_info->commit_stats.total_commit_dur, NSEC_PER_MSEC)); @@ -1202,7 +1209,7 @@ static ssize_t quota_override_store(struct kobject *kobj, { struct btrfs_fs_info *fs_info = to_fs_info(kobj); unsigned long knob; - int err; + int ret; if (!fs_info) return -EPERM; @@ -1210,9 +1217,9 @@ static ssize_t quota_override_store(struct kobject *kobj, if (!capable(CAP_SYS_RESOURCE)) return -EPERM; - err = kstrtoul(buf, 10, &knob); - if (err) - return err; + ret = kstrtoul(buf, 10, &knob); + if (ret) + return ret; if (knob > 1) return -EINVAL; @@ -2239,7 +2246,7 @@ void btrfs_kobject_uevent(struct block_device *bdev, enum kobject_action action) ret = kobject_uevent(&disk_to_dev(bdev->bd_disk)->kobj, action); if (ret) - pr_warn("BTRFS: Sending event '%d' to kobject: '%s' (%p): failed\n", + btrfs_warn(NULL, "sending event %d to kobject: '%s' (%p): failed", action, kobject_name(&disk_to_dev(bdev->bd_disk)->kobj), &disk_to_dev(bdev->bd_disk)->kobj); } @@ -2282,15 +2289,15 @@ static struct kset *btrfs_kset; */ int btrfs_sysfs_add_fsid(struct btrfs_fs_devices *fs_devs) { - int error; + int ret; init_completion(&fs_devs->kobj_unregister); fs_devs->fsid_kobj.kset = btrfs_kset; - error = kobject_init_and_add(&fs_devs->fsid_kobj, &btrfs_ktype, NULL, - "%pU", fs_devs->fsid); - if (error) { + ret = kobject_init_and_add(&fs_devs->fsid_kobj, &btrfs_ktype, NULL, + "%pU", fs_devs->fsid); + if (ret) { kobject_put(&fs_devs->fsid_kobj); - return error; + return ret; } fs_devs->devices_kobj = kobject_create_and_add("devices", @@ -2316,71 +2323,70 @@ int btrfs_sysfs_add_fsid(struct btrfs_fs_devices *fs_devs) int btrfs_sysfs_add_mounted(struct btrfs_fs_info *fs_info) { - int error; + int ret; struct btrfs_fs_devices *fs_devs = fs_info->fs_devices; struct kobject *fsid_kobj = &fs_devs->fsid_kobj; - error = btrfs_sysfs_add_fs_devices(fs_devs); - if (error) - return error; + ret = btrfs_sysfs_add_fs_devices(fs_devs); + if (ret) + return ret; - error = sysfs_create_files(fsid_kobj, btrfs_attrs); - if (error) { + ret = sysfs_create_files(fsid_kobj, btrfs_attrs); + if (ret) { btrfs_sysfs_remove_fs_devices(fs_devs); - return error; + return ret; } - error = sysfs_create_group(fsid_kobj, - &btrfs_feature_attr_group); - if (error) + ret = sysfs_create_group(fsid_kobj, &btrfs_feature_attr_group); + if (ret) goto failure; #ifdef CONFIG_BTRFS_DEBUG fs_info->debug_kobj = kobject_create_and_add("debug", fsid_kobj); if (!fs_info->debug_kobj) { - error = -ENOMEM; + ret = -ENOMEM; goto failure; } - error = sysfs_create_files(fs_info->debug_kobj, btrfs_debug_mount_attrs); - if (error) + ret = sysfs_create_files(fs_info->debug_kobj, btrfs_debug_mount_attrs); + if (ret) goto failure; #endif /* Discard directory */ fs_info->discard_kobj = kobject_create_and_add("discard", fsid_kobj); if (!fs_info->discard_kobj) { - error = -ENOMEM; + ret = -ENOMEM; goto failure; } - error = sysfs_create_files(fs_info->discard_kobj, discard_attrs); - if (error) + ret = sysfs_create_files(fs_info->discard_kobj, discard_attrs); + if (ret) goto failure; - error = addrm_unknown_feature_attrs(fs_info, true); - if (error) + ret = addrm_unknown_feature_attrs(fs_info, true); + if (ret) goto failure; - error = sysfs_create_link(fsid_kobj, &fs_info->sb->s_bdi->dev->kobj, "bdi"); - if (error) + ret = sysfs_create_link(fsid_kobj, &fs_info->sb->s_bdi->dev->kobj, "bdi"); + if (ret) goto failure; fs_info->space_info_kobj = kobject_create_and_add("allocation", fsid_kobj); if (!fs_info->space_info_kobj) { - error = -ENOMEM; + ret = -ENOMEM; goto failure; } - error = sysfs_create_files(fs_info->space_info_kobj, allocation_attrs); - if (error) + ret = sysfs_create_files(fs_info->space_info_kobj, allocation_attrs); + if (ret) goto failure; return 0; failure: btrfs_sysfs_remove_mounted(fs_info); - return error; + return ret; } static ssize_t qgroup_enabled_show(struct kobject *qgroups_kobj, diff --git a/fs/btrfs/tests/extent-io-tests.c b/fs/btrfs/tests/extent-io-tests.c index 00da54f0164c..b19328d077d3 100644 --- a/fs/btrfs/tests/extent-io-tests.c +++ b/fs/btrfs/tests/extent-io-tests.c @@ -23,8 +23,8 @@ static noinline int process_page_range(struct inode *inode, u64 start, u64 end, { int ret; struct folio_batch fbatch; - unsigned long index = start >> PAGE_SHIFT; - unsigned long end_index = end >> PAGE_SHIFT; + pgoff_t index = start >> PAGE_SHIFT; + pgoff_t end_index = end >> PAGE_SHIFT; int i; int count = 0; int loops = 0; @@ -75,7 +75,8 @@ static void extent_flag_to_str(const struct extent_state *state, char *dest) dest[0] = 0; PRINT_ONE_FLAG(state, dest, cur, DIRTY); PRINT_ONE_FLAG(state, dest, cur, LOCKED); - PRINT_ONE_FLAG(state, dest, cur, NEW); + PRINT_ONE_FLAG(state, dest, cur, DIRTY_LOG1); + PRINT_ONE_FLAG(state, dest, cur, DIRTY_LOG2); PRINT_ONE_FLAG(state, dest, cur, DELALLOC); PRINT_ONE_FLAG(state, dest, cur, DEFRAG); PRINT_ONE_FLAG(state, dest, cur, BOUNDARY); @@ -113,7 +114,6 @@ static int test_find_delalloc(u32 sectorsize, u32 nodesize) struct extent_io_tree *tmp; struct page *page; struct page *locked_page = NULL; - unsigned long index = 0; /* In this test we need at least 2 file extents at its maximum size */ u64 max_bytes = BTRFS_MAX_EXTENT_SIZE; u64 total_dirty = 2 * max_bytes; @@ -156,7 +156,7 @@ static int test_find_delalloc(u32 sectorsize, u32 nodesize) * everything to make sure our pages don't get evicted and screw up our * test. */ - for (index = 0; index < (total_dirty >> PAGE_SHIFT); index++) { + for (pgoff_t index = 0; index < (total_dirty >> PAGE_SHIFT); index++) { page = find_or_create_page(inode->i_mapping, index, GFP_KERNEL); if (!page) { test_err("failed to allocate test page"); @@ -326,7 +326,7 @@ static int test_find_delalloc(u32 sectorsize, u32 nodesize) out_bits: if (ret) dump_extent_io_tree(tmp); - btrfs_clear_extent_bits(tmp, 0, total_dirty - 1, (unsigned)-1); + btrfs_clear_extent_bit(tmp, 0, total_dirty - 1, (unsigned)-1, NULL); out: if (locked_page) put_page(locked_page); @@ -343,11 +343,11 @@ static int check_eb_bitmap(unsigned long *bitmap, struct extent_buffer *eb) unsigned long i; for (i = 0; i < eb->len * BITS_PER_BYTE; i++) { - int bit, bit1; + bool bit_set, bit1_set; - bit = !!test_bit(i, bitmap); - bit1 = !!extent_buffer_test_bit(eb, 0, i); - if (bit1 != bit) { + bit_set = test_bit(i, bitmap); + bit1_set = extent_buffer_test_bit(eb, 0, i); + if (bit1_set != bit_set) { u8 has; u8 expect; @@ -360,9 +360,9 @@ static int check_eb_bitmap(unsigned long *bitmap, struct extent_buffer *eb) return -EINVAL; } - bit1 = !!extent_buffer_test_bit(eb, i / BITS_PER_BYTE, - i % BITS_PER_BYTE); - if (bit1 != bit) { + bit1_set = extent_buffer_test_bit(eb, i / BITS_PER_BYTE, + i % BITS_PER_BYTE); + if (bit1_set != bit_set) { u8 has; u8 expect; @@ -662,7 +662,7 @@ static int test_find_first_clear_extent_bit(void) out: if (ret) dump_extent_io_tree(&tree); - btrfs_clear_extent_bits(&tree, 0, (u64)-1, CHUNK_TRIMMED | CHUNK_ALLOCATED); + btrfs_clear_extent_bit(&tree, 0, (u64)-1, CHUNK_TRIMMED | CHUNK_ALLOCATED, NULL); return ret; } diff --git a/fs/btrfs/tests/free-space-tree-tests.c b/fs/btrfs/tests/free-space-tree-tests.c index b61972046feb..c8822edd32e2 100644 --- a/fs/btrfs/tests/free-space-tree-tests.c +++ b/fs/btrfs/tests/free-space-tree-tests.c @@ -32,7 +32,7 @@ static int __check_free_space_extents(struct btrfs_trans_handle *trans, unsigned int i; int ret; - info = search_free_space_info(trans, cache, path, 0); + info = btrfs_search_free_space_info(trans, cache, path, 0); if (IS_ERR(info)) { test_err("could not find free space info"); ret = PTR_ERR(info); @@ -57,7 +57,7 @@ static int __check_free_space_extents(struct btrfs_trans_handle *trans, goto invalid; offset = key.objectid; while (offset < key.objectid + key.offset) { - bit = free_space_test_bit(cache, path, offset); + bit = btrfs_free_space_test_bit(cache, path, offset); if (prev_bit == 0 && bit == 1) { extent_start = offset; } else if (prev_bit == 1 && bit == 0) { @@ -115,7 +115,7 @@ static int check_free_space_extents(struct btrfs_trans_handle *trans, u32 flags; int ret; - info = search_free_space_info(trans, cache, path, 0); + info = btrfs_search_free_space_info(trans, cache, path, 0); if (IS_ERR(info)) { test_err("could not find free space info"); btrfs_release_path(path); @@ -131,13 +131,13 @@ static int check_free_space_extents(struct btrfs_trans_handle *trans, /* Flip it to the other format and check that for good measure. */ if (flags & BTRFS_FREE_SPACE_USING_BITMAPS) { - ret = convert_free_space_to_extents(trans, cache, path); + ret = btrfs_convert_free_space_to_extents(trans, cache, path); if (ret) { test_err("could not convert to extents"); return ret; } } else { - ret = convert_free_space_to_bitmaps(trans, cache, path); + ret = btrfs_convert_free_space_to_bitmaps(trans, cache, path); if (ret) { test_err("could not convert to bitmaps"); return ret; @@ -170,9 +170,8 @@ static int test_remove_all(struct btrfs_trans_handle *trans, const struct free_space_extent extents[] = {}; int ret; - ret = __remove_from_free_space_tree(trans, cache, path, - cache->start, - cache->length); + ret = __btrfs_remove_from_free_space_tree(trans, cache, path, + cache->start, cache->length); if (ret) { test_err("could not remove free space"); return ret; @@ -193,8 +192,8 @@ static int test_remove_beginning(struct btrfs_trans_handle *trans, }; int ret; - ret = __remove_from_free_space_tree(trans, cache, path, - cache->start, alignment); + ret = __btrfs_remove_from_free_space_tree(trans, cache, path, + cache->start, alignment); if (ret) { test_err("could not remove free space"); return ret; @@ -216,7 +215,7 @@ static int test_remove_end(struct btrfs_trans_handle *trans, }; int ret; - ret = __remove_from_free_space_tree(trans, cache, path, + ret = __btrfs_remove_from_free_space_tree(trans, cache, path, cache->start + cache->length - alignment, alignment); if (ret) { @@ -240,9 +239,9 @@ static int test_remove_middle(struct btrfs_trans_handle *trans, }; int ret; - ret = __remove_from_free_space_tree(trans, cache, path, - cache->start + alignment, - alignment); + ret = __btrfs_remove_from_free_space_tree(trans, cache, path, + cache->start + alignment, + alignment); if (ret) { test_err("could not remove free space"); return ret; @@ -263,23 +262,22 @@ static int test_merge_left(struct btrfs_trans_handle *trans, }; int ret; - ret = __remove_from_free_space_tree(trans, cache, path, - cache->start, cache->length); + ret = __btrfs_remove_from_free_space_tree(trans, cache, path, + cache->start, cache->length); if (ret) { test_err("could not remove free space"); return ret; } - ret = __add_to_free_space_tree(trans, cache, path, cache->start, - alignment); + ret = __btrfs_add_to_free_space_tree(trans, cache, path, cache->start, + alignment); if (ret) { test_err("could not add free space"); return ret; } - ret = __add_to_free_space_tree(trans, cache, path, - cache->start + alignment, - alignment); + ret = __btrfs_add_to_free_space_tree(trans, cache, path, + cache->start + alignment, alignment); if (ret) { test_err("could not add free space"); return ret; @@ -300,24 +298,23 @@ static int test_merge_right(struct btrfs_trans_handle *trans, }; int ret; - ret = __remove_from_free_space_tree(trans, cache, path, - cache->start, cache->length); + ret = __btrfs_remove_from_free_space_tree(trans, cache, path, + cache->start, cache->length); if (ret) { test_err("could not remove free space"); return ret; } - ret = __add_to_free_space_tree(trans, cache, path, - cache->start + 2 * alignment, - alignment); + ret = __btrfs_add_to_free_space_tree(trans, cache, path, + cache->start + 2 * alignment, + alignment); if (ret) { test_err("could not add free space"); return ret; } - ret = __add_to_free_space_tree(trans, cache, path, - cache->start + alignment, - alignment); + ret = __btrfs_add_to_free_space_tree(trans, cache, path, + cache->start + alignment, alignment); if (ret) { test_err("could not add free space"); return ret; @@ -338,29 +335,29 @@ static int test_merge_both(struct btrfs_trans_handle *trans, }; int ret; - ret = __remove_from_free_space_tree(trans, cache, path, - cache->start, cache->length); + ret = __btrfs_remove_from_free_space_tree(trans, cache, path, + cache->start, cache->length); if (ret) { test_err("could not remove free space"); return ret; } - ret = __add_to_free_space_tree(trans, cache, path, cache->start, - alignment); + ret = __btrfs_add_to_free_space_tree(trans, cache, path, cache->start, + alignment); if (ret) { test_err("could not add free space"); return ret; } - ret = __add_to_free_space_tree(trans, cache, path, - cache->start + 2 * alignment, alignment); + ret = __btrfs_add_to_free_space_tree(trans, cache, path, + cache->start + 2 * alignment, alignment); if (ret) { test_err("could not add free space"); return ret; } - ret = __add_to_free_space_tree(trans, cache, path, - cache->start + alignment, alignment); + ret = __btrfs_add_to_free_space_tree(trans, cache, path, + cache->start + alignment, alignment); if (ret) { test_err("could not add free space"); return ret; @@ -383,29 +380,29 @@ static int test_merge_none(struct btrfs_trans_handle *trans, }; int ret; - ret = __remove_from_free_space_tree(trans, cache, path, - cache->start, cache->length); + ret = __btrfs_remove_from_free_space_tree(trans, cache, path, + cache->start, cache->length); if (ret) { test_err("could not remove free space"); return ret; } - ret = __add_to_free_space_tree(trans, cache, path, cache->start, - alignment); + ret = __btrfs_add_to_free_space_tree(trans, cache, path, cache->start, + alignment); if (ret) { test_err("could not add free space"); return ret; } - ret = __add_to_free_space_tree(trans, cache, path, - cache->start + 4 * alignment, alignment); + ret = __btrfs_add_to_free_space_tree(trans, cache, path, + cache->start + 4 * alignment, alignment); if (ret) { test_err("could not add free space"); return ret; } - ret = __add_to_free_space_tree(trans, cache, path, - cache->start + 2 * alignment, alignment); + ret = __btrfs_add_to_free_space_tree(trans, cache, path, + cache->start + 2 * alignment, alignment); if (ret) { test_err("could not add free space"); return ret; @@ -483,14 +480,14 @@ static int run_test(test_func_t test_func, int bitmaps, u32 sectorsize, goto out; } - ret = add_block_group_free_space(&trans, cache); + ret = btrfs_add_block_group_free_space(&trans, cache); if (ret) { test_err("could not add block group free space"); goto out; } if (bitmaps) { - ret = convert_free_space_to_bitmaps(&trans, cache, path); + ret = btrfs_convert_free_space_to_bitmaps(&trans, cache, path); if (ret) { test_err("could not convert block group to bitmaps"); goto out; @@ -501,7 +498,7 @@ static int run_test(test_func_t test_func, int bitmaps, u32 sectorsize, if (ret) goto out; - ret = remove_block_group_free_space(&trans, cache); + ret = btrfs_remove_block_group_free_space(&trans, cache); if (ret) { test_err("could not remove block group free space"); goto out; diff --git a/fs/btrfs/tests/inode-tests.c b/fs/btrfs/tests/inode-tests.c index a29d2c02c2c8..a4c2b7748b95 100644 --- a/fs/btrfs/tests/inode-tests.c +++ b/fs/btrfs/tests/inode-tests.c @@ -950,10 +950,10 @@ static int test_extent_accounting(u32 sectorsize, u32 nodesize) } /* [BTRFS_MAX_EXTENT_SIZE/2][sectorsize HOLE][the rest] */ - ret = btrfs_clear_extent_bits(&BTRFS_I(inode)->io_tree, - BTRFS_MAX_EXTENT_SIZE >> 1, - (BTRFS_MAX_EXTENT_SIZE >> 1) + sectorsize - 1, - EXTENT_DELALLOC | EXTENT_DELALLOC_NEW); + ret = btrfs_clear_extent_bit(&BTRFS_I(inode)->io_tree, + BTRFS_MAX_EXTENT_SIZE >> 1, + (BTRFS_MAX_EXTENT_SIZE >> 1) + sectorsize - 1, + EXTENT_DELALLOC | EXTENT_DELALLOC_NEW, NULL); if (ret) { test_err("clear_extent_bit returned %d", ret); goto out; @@ -1017,10 +1017,10 @@ static int test_extent_accounting(u32 sectorsize, u32 nodesize) } /* [BTRFS_MAX_EXTENT_SIZE+4k][4K HOLE][BTRFS_MAX_EXTENT_SIZE+4k] */ - ret = btrfs_clear_extent_bits(&BTRFS_I(inode)->io_tree, - BTRFS_MAX_EXTENT_SIZE + sectorsize, - BTRFS_MAX_EXTENT_SIZE + 2 * sectorsize - 1, - EXTENT_DELALLOC | EXTENT_DELALLOC_NEW); + ret = btrfs_clear_extent_bit(&BTRFS_I(inode)->io_tree, + BTRFS_MAX_EXTENT_SIZE + sectorsize, + BTRFS_MAX_EXTENT_SIZE + 2 * sectorsize - 1, + EXTENT_DELALLOC | EXTENT_DELALLOC_NEW, NULL); if (ret) { test_err("clear_extent_bit returned %d", ret); goto out; @@ -1051,8 +1051,8 @@ static int test_extent_accounting(u32 sectorsize, u32 nodesize) } /* Empty */ - ret = btrfs_clear_extent_bits(&BTRFS_I(inode)->io_tree, 0, (u64)-1, - EXTENT_DELALLOC | EXTENT_DELALLOC_NEW); + ret = btrfs_clear_extent_bit(&BTRFS_I(inode)->io_tree, 0, (u64)-1, + EXTENT_DELALLOC | EXTENT_DELALLOC_NEW, NULL); if (ret) { test_err("clear_extent_bit returned %d", ret); goto out; @@ -1066,8 +1066,8 @@ static int test_extent_accounting(u32 sectorsize, u32 nodesize) ret = 0; out: if (ret) - btrfs_clear_extent_bits(&BTRFS_I(inode)->io_tree, 0, (u64)-1, - EXTENT_DELALLOC | EXTENT_DELALLOC_NEW); + btrfs_clear_extent_bit(&BTRFS_I(inode)->io_tree, 0, (u64)-1, + EXTENT_DELALLOC | EXTENT_DELALLOC_NEW, NULL); iput(inode); btrfs_free_dummy_root(root); btrfs_free_dummy_fs_info(fs_info); diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index b96195d6480f..c5c0d9cf1a80 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -1211,15 +1211,15 @@ static int btrfs_wait_extents(struct btrfs_fs_info *fs_info, struct extent_io_tree *dirty_pages) { bool errors = false; - int err; + int ret; - err = __btrfs_wait_marked_extents(fs_info, dirty_pages); + ret = __btrfs_wait_marked_extents(fs_info, dirty_pages); if (test_and_clear_bit(BTRFS_FS_BTREE_ERR, &fs_info->flags)) errors = true; - if (errors && !err) - err = -EIO; - return err; + if (errors && !ret) + ret = -EIO; + return ret; } int btrfs_wait_tree_log_extents(struct btrfs_root *log_root, int mark) @@ -1227,22 +1227,22 @@ int btrfs_wait_tree_log_extents(struct btrfs_root *log_root, int mark) struct btrfs_fs_info *fs_info = log_root->fs_info; struct extent_io_tree *dirty_pages = &log_root->dirty_log_pages; bool errors = false; - int err; + int ret; ASSERT(btrfs_root_id(log_root) == BTRFS_TREE_LOG_OBJECTID); - err = __btrfs_wait_marked_extents(fs_info, dirty_pages); - if ((mark & EXTENT_DIRTY) && + ret = __btrfs_wait_marked_extents(fs_info, dirty_pages); + if ((mark & EXTENT_DIRTY_LOG1) && test_and_clear_bit(BTRFS_FS_LOG1_ERR, &fs_info->flags)) errors = true; - if ((mark & EXTENT_NEW) && + if ((mark & EXTENT_DIRTY_LOG2) && test_and_clear_bit(BTRFS_FS_LOG2_ERR, &fs_info->flags)) errors = true; - if (errors && !err) - err = -EIO; - return err; + if (errors && !ret) + ret = -EIO; + return ret; } /* @@ -1735,8 +1735,10 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans, ret = btrfs_create_qgroup(trans, objectid); if (ret && ret != -EEXIST) { - btrfs_abort_transaction(trans, ret); - goto fail; + if (ret != -ENOTCONN || btrfs_qgroup_enabled(fs_info)) { + btrfs_abort_transaction(trans, ret); + goto fail; + } } /* @@ -2163,13 +2165,19 @@ static void add_pending_snapshot(struct btrfs_trans_handle *trans) list_add(&trans->pending_snapshot->list, &cur_trans->pending_snapshots); } -static void update_commit_stats(struct btrfs_fs_info *fs_info, ktime_t interval) +static void update_commit_stats(struct btrfs_fs_info *fs_info) { + ktime_t now = ktime_get_ns(); + ktime_t interval = now - fs_info->commit_stats.critical_section_start_time; + + ASSERT(fs_info->commit_stats.critical_section_start_time); + fs_info->commit_stats.commit_count++; fs_info->commit_stats.last_commit_dur = interval; fs_info->commit_stats.max_commit_dur = max_t(u64, fs_info->commit_stats.max_commit_dur, interval); fs_info->commit_stats.total_commit_dur += interval; + fs_info->commit_stats.critical_section_start_time = 0; } int btrfs_commit_transaction(struct btrfs_trans_handle *trans) @@ -2178,8 +2186,6 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans) struct btrfs_transaction *cur_trans = trans->transaction; struct btrfs_transaction *prev_trans = NULL; int ret; - ktime_t start_time; - ktime_t interval; ASSERT(refcount_read(&trans->use_count) == 1); btrfs_trans_state_lockdep_acquire(fs_info, BTRFS_LOCKDEP_TRANS_COMMIT_PREP); @@ -2312,8 +2318,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans) * Get the time spent on the work done by the commit thread and not * the time spent waiting on a previous commit */ - start_time = ktime_get_ns(); - + fs_info->commit_stats.critical_section_start_time = ktime_get_ns(); extwriter_counter_dec(cur_trans, trans->type); ret = btrfs_start_delalloc_flush(fs_info); @@ -2545,6 +2550,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans) if (ret) goto scrub_continue; + update_commit_stats(fs_info); /* * We needn't acquire the lock here because there is no other task * which can change it. @@ -2581,8 +2587,6 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans) trace_btrfs_transaction_commit(fs_info); - interval = ktime_get_ns() - start_time; - btrfs_scrub_continue(fs_info); if (current->journal_info == trans) @@ -2590,8 +2594,6 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans) kmem_cache_free(btrfs_trans_handle_cachep, trans); - update_commit_stats(fs_info, interval); - return ret; unlock_reloc: diff --git a/fs/btrfs/tree-checker.c b/fs/btrfs/tree-checker.c index 8f4703b488b7..0f556f4de3f9 100644 --- a/fs/btrfs/tree-checker.c +++ b/fs/btrfs/tree-checker.c @@ -191,7 +191,7 @@ static bool check_prev_ino(struct extent_buffer *leaf, * Only subvolume trees along with their reloc trees need this check. * Things like log tree doesn't follow this ino requirement. */ - if (!is_fstree(btrfs_header_owner(leaf))) + if (!btrfs_is_fstree(btrfs_header_owner(leaf))) return true; if (key->objectid == prev_key->objectid) @@ -475,7 +475,7 @@ static int check_root_key(struct extent_buffer *leaf, struct btrfs_key *key, * to be COWed to be relocated. */ if (unlikely(is_root_item && key->objectid == BTRFS_TREE_RELOC_OBJECTID && - !is_fstree(key->offset))) { + !btrfs_is_fstree(key->offset))) { generic_err(leaf, slot, "invalid reloc tree for root %lld, root id is not a subvolume tree", key->offset); @@ -493,7 +493,7 @@ static int check_root_key(struct extent_buffer *leaf, struct btrfs_key *key, } /* DIR_ITEM/INDEX/INODE_REF is not allowed to point to non-fs trees */ - if (unlikely(!is_fstree(key->objectid) && !is_root_item)) { + if (unlikely(!btrfs_is_fstree(key->objectid) && !is_root_item)) { dir_item_err(leaf, slot, "invalid location key objectid, have %llu expect [%llu, %llu]", key->objectid, BTRFS_FIRST_FREE_OBJECTID, @@ -1311,7 +1311,7 @@ static bool is_valid_dref_root(u64 rootid) * - tree root * For v1 space cache */ - return is_fstree(rootid) || rootid == BTRFS_DATA_RELOC_TREE_OBJECTID || + return btrfs_is_fstree(rootid) || rootid == BTRFS_DATA_RELOC_TREE_OBJECTID || rootid == BTRFS_ROOT_TREE_OBJECTID; } @@ -2167,7 +2167,7 @@ ALLOW_ERROR_INJECTION(btrfs_check_node, ERRNO); int btrfs_check_eb_owner(const struct extent_buffer *eb, u64 root_owner) { - const bool is_subvol = is_fstree(root_owner); + const bool is_subvol = btrfs_is_fstree(root_owner); const u64 eb_owner = btrfs_header_owner(eb); /* @@ -2209,7 +2209,7 @@ int btrfs_check_eb_owner(const struct extent_buffer *eb, u64 root_owner) * For subvolume trees, owners can mismatch, but they should all belong * to subvolume trees. */ - if (unlikely(is_subvol != is_fstree(eb_owner))) { + if (unlikely(is_subvol != btrfs_is_fstree(eb_owner))) { btrfs_crit(eb->fs_info, "corrupted %s, root=%llu block=%llu owner mismatch, have %llu expect [%llu, %llu]", btrfs_header_level(eb) == 0 ? "leaf" : "node", diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index cea8a7e9d6d3..9f05d454b9df 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c @@ -112,7 +112,7 @@ static noinline int replay_dir_deletes(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_root *log, struct btrfs_path *path, - u64 dirid, int del_all); + u64 dirid, bool del_all); static void wait_log_commit(struct btrfs_root *root, int transid); /* @@ -144,7 +144,7 @@ static struct btrfs_inode *btrfs_iget_logging(u64 objectid, struct btrfs_root *r struct btrfs_inode *inode; /* Only meant to be called for subvolume roots and not for log roots. */ - ASSERT(is_fstree(btrfs_root_id(root))); + ASSERT(btrfs_is_fstree(btrfs_root_id(root))); /* * We're holding a transaction handle whether we are logging or @@ -1041,6 +1041,126 @@ out: return ret; } +static int unlink_refs_not_in_log(struct btrfs_trans_handle *trans, + struct btrfs_path *path, + struct btrfs_root *log_root, + struct btrfs_key *search_key, + struct btrfs_inode *dir, + struct btrfs_inode *inode, + u64 parent_objectid) +{ + struct extent_buffer *leaf = path->nodes[0]; + unsigned long ptr; + unsigned long ptr_end; + + /* + * Check all the names in this back reference to see if they are in the + * log. If so, we allow them to stay otherwise they must be unlinked as + * a conflict. + */ + ptr = btrfs_item_ptr_offset(leaf, path->slots[0]); + ptr_end = ptr + btrfs_item_size(leaf, path->slots[0]); + while (ptr < ptr_end) { + struct fscrypt_str victim_name; + struct btrfs_inode_ref *victim_ref; + int ret; + + victim_ref = (struct btrfs_inode_ref *)ptr; + ret = read_alloc_one_name(leaf, (victim_ref + 1), + btrfs_inode_ref_name_len(leaf, victim_ref), + &victim_name); + if (ret) + return ret; + + ret = backref_in_log(log_root, search_key, parent_objectid, &victim_name); + if (ret) { + kfree(victim_name.name); + if (ret < 0) + return ret; + ptr = (unsigned long)(victim_ref + 1) + victim_name.len; + continue; + } + + inc_nlink(&inode->vfs_inode); + btrfs_release_path(path); + + ret = unlink_inode_for_log_replay(trans, dir, inode, &victim_name); + kfree(victim_name.name); + if (ret) + return ret; + return -EAGAIN; + } + + return 0; +} + +static int unlink_extrefs_not_in_log(struct btrfs_trans_handle *trans, + struct btrfs_path *path, + struct btrfs_root *root, + struct btrfs_root *log_root, + struct btrfs_key *search_key, + struct btrfs_inode *inode, + u64 inode_objectid, + u64 parent_objectid) +{ + struct extent_buffer *leaf = path->nodes[0]; + const unsigned long base = btrfs_item_ptr_offset(leaf, path->slots[0]); + const u32 item_size = btrfs_item_size(leaf, path->slots[0]); + u32 cur_offset = 0; + + while (cur_offset < item_size) { + struct btrfs_inode_extref *extref; + struct btrfs_inode *victim_parent; + struct fscrypt_str victim_name; + int ret; + + extref = (struct btrfs_inode_extref *)(base + cur_offset); + victim_name.len = btrfs_inode_extref_name_len(leaf, extref); + + if (btrfs_inode_extref_parent(leaf, extref) != parent_objectid) + goto next; + + ret = read_alloc_one_name(leaf, &extref->name, victim_name.len, + &victim_name); + if (ret) + return ret; + + search_key->objectid = inode_objectid; + search_key->type = BTRFS_INODE_EXTREF_KEY; + search_key->offset = btrfs_extref_hash(parent_objectid, + victim_name.name, + victim_name.len); + ret = backref_in_log(log_root, search_key, parent_objectid, &victim_name); + if (ret) { + kfree(victim_name.name); + if (ret < 0) + return ret; +next: + cur_offset += victim_name.len + sizeof(*extref); + continue; + } + + victim_parent = btrfs_iget_logging(parent_objectid, root); + if (IS_ERR(victim_parent)) { + kfree(victim_name.name); + return PTR_ERR(victim_parent); + } + + inc_nlink(&inode->vfs_inode); + btrfs_release_path(path); + + ret = unlink_inode_for_log_replay(trans, victim_parent, inode, + &victim_name); + iput(&victim_parent->vfs_inode); + kfree(victim_name.name); + if (ret) + return ret; + return -EAGAIN; + } + + return 0; +} + static inline int __add_inode_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_path *path, @@ -1051,7 +1171,6 @@ static inline int __add_inode_ref(struct btrfs_trans_handle *trans, u64 ref_index, struct fscrypt_str *name) { int ret; - struct extent_buffer *leaf; struct btrfs_dir_item *di; struct btrfs_key search_key; struct btrfs_inode_extref *extref; @@ -1065,120 +1184,34 @@ again: if (ret < 0) { return ret; } else if (ret == 0) { - struct btrfs_inode_ref *victim_ref; - unsigned long ptr; - unsigned long ptr_end; - - leaf = path->nodes[0]; - - /* are we trying to overwrite a back ref for the root directory - * if so, just jump out, we're done + /* + * Are we trying to overwrite a back ref for the root directory? + * If so, we're done. */ if (search_key.objectid == search_key.offset) return 1; - /* check all the names in this back reference to see - * if they are in the log. if so, we allow them to stay - * otherwise they must be unlinked as a conflict - */ - ptr = btrfs_item_ptr_offset(leaf, path->slots[0]); - ptr_end = ptr + btrfs_item_size(leaf, path->slots[0]); - while (ptr < ptr_end) { - struct fscrypt_str victim_name; - - victim_ref = (struct btrfs_inode_ref *)ptr; - ret = read_alloc_one_name(leaf, (victim_ref + 1), - btrfs_inode_ref_name_len(leaf, victim_ref), - &victim_name); - if (ret) - return ret; - - ret = backref_in_log(log_root, &search_key, - parent_objectid, &victim_name); - if (ret < 0) { - kfree(victim_name.name); - return ret; - } else if (!ret) { - inc_nlink(&inode->vfs_inode); - btrfs_release_path(path); - - ret = unlink_inode_for_log_replay(trans, dir, inode, - &victim_name); - kfree(victim_name.name); - if (ret) - return ret; - goto again; - } - kfree(victim_name.name); - - ptr = (unsigned long)(victim_ref + 1) + victim_name.len; - } + ret = unlink_refs_not_in_log(trans, path, log_root, &search_key, + dir, inode, parent_objectid); + if (ret == -EAGAIN) + goto again; + else if (ret) + return ret; } btrfs_release_path(path); /* Same search but for extended refs */ - extref = btrfs_lookup_inode_extref(NULL, root, path, name, - inode_objectid, parent_objectid, 0, - 0); + extref = btrfs_lookup_inode_extref(root, path, name, inode_objectid, parent_objectid); if (IS_ERR(extref)) { return PTR_ERR(extref); } else if (extref) { - u32 item_size; - u32 cur_offset = 0; - unsigned long base; - struct btrfs_inode *victim_parent; - - leaf = path->nodes[0]; - - item_size = btrfs_item_size(leaf, path->slots[0]); - base = btrfs_item_ptr_offset(leaf, path->slots[0]); - - while (cur_offset < item_size) { - struct fscrypt_str victim_name; - - extref = (struct btrfs_inode_extref *)(base + cur_offset); - victim_name.len = btrfs_inode_extref_name_len(leaf, extref); - - if (btrfs_inode_extref_parent(leaf, extref) != parent_objectid) - goto next; - - ret = read_alloc_one_name(leaf, &extref->name, - victim_name.len, &victim_name); - if (ret) - return ret; - - search_key.objectid = inode_objectid; - search_key.type = BTRFS_INODE_EXTREF_KEY; - search_key.offset = btrfs_extref_hash(parent_objectid, - victim_name.name, - victim_name.len); - ret = backref_in_log(log_root, &search_key, - parent_objectid, &victim_name); - if (ret < 0) { - kfree(victim_name.name); - return ret; - } else if (!ret) { - victim_parent = btrfs_iget_logging(parent_objectid, root); - if (IS_ERR(victim_parent)) { - ret = PTR_ERR(victim_parent); - } else { - inc_nlink(&inode->vfs_inode); - btrfs_release_path(path); - - ret = unlink_inode_for_log_replay(trans, - victim_parent, - inode, &victim_name); - iput(&victim_parent->vfs_inode); - } - kfree(victim_name.name); - if (ret) - return ret; - goto again; - } - kfree(victim_name.name); -next: - cur_offset += victim_name.len + sizeof(*extref); - } + ret = unlink_extrefs_not_in_log(trans, path, root, log_root, + &search_key, inode, + inode_objectid, parent_objectid); + if (ret == -EAGAIN) + goto again; + else if (ret) + return ret; } btrfs_release_path(path); @@ -1352,7 +1385,7 @@ static noinline int add_inode_ref(struct btrfs_trans_handle *trans, unsigned long ref_end; struct fscrypt_str name = { 0 }; int ret; - int log_ref_ver = 0; + const bool is_extref_item = (key->type == BTRFS_INODE_EXTREF_KEY); u64 parent_objectid; u64 inode_objectid; u64 ref_index = 0; @@ -1361,11 +1394,10 @@ static noinline int add_inode_ref(struct btrfs_trans_handle *trans, ref_ptr = btrfs_item_ptr_offset(eb, slot); ref_end = ref_ptr + btrfs_item_size(eb, slot); - if (key->type == BTRFS_INODE_EXTREF_KEY) { + if (is_extref_item) { struct btrfs_inode_extref *r; ref_struct_size = sizeof(struct btrfs_inode_extref); - log_ref_ver = 1; r = (struct btrfs_inode_extref *)ref_ptr; parent_objectid = btrfs_inode_extref_parent(eb, r); } else { @@ -1383,6 +1415,8 @@ static noinline int add_inode_ref(struct btrfs_trans_handle *trans, dir = btrfs_iget_logging(parent_objectid, root); if (IS_ERR(dir)) { ret = PTR_ERR(dir); + if (ret == -ENOENT) + ret = 0; dir = NULL; goto out; } @@ -1395,9 +1429,11 @@ static noinline int add_inode_ref(struct btrfs_trans_handle *trans, } while (ref_ptr < ref_end) { - if (log_ref_ver) { + if (is_extref_item) { ret = extref_get_fields(eb, ref_ptr, &name, &ref_index, &parent_objectid); + if (ret) + goto out; /* * parent object can change from one array * item to another. @@ -1407,14 +1443,30 @@ static noinline int add_inode_ref(struct btrfs_trans_handle *trans, if (IS_ERR(dir)) { ret = PTR_ERR(dir); dir = NULL; + /* + * A new parent dir may have not been + * logged and not exist in the subvolume + * tree, see the comment above before + * the loop when getting the first + * parent dir. + */ + if (ret == -ENOENT) { + /* + * The next extref may refer to + * another parent dir that + * exists, so continue. + */ + ret = 0; + goto next; + } goto out; } } } else { ret = ref_get_fields(eb, ref_ptr, &name, &ref_index); + if (ret) + goto out; } - if (ret) - goto out; ret = inode_in_dir(root, path, btrfs_ino(dir), btrfs_ino(inode), ref_index, &name); @@ -1448,10 +1500,11 @@ static noinline int add_inode_ref(struct btrfs_trans_handle *trans, } /* Else, ret == 1, we already have a perfect match, we're done. */ +next: ref_ptr = (unsigned long)(ref_ptr + ref_struct_size) + name.len; kfree(name.name); name.name = NULL; - if (log_ref_ver) { + if (is_extref_item && dir) { iput(&dir->vfs_inode); dir = NULL; } @@ -1628,8 +1681,7 @@ static noinline int fixup_inode_link_count(struct btrfs_trans_handle *trans, if (inode->vfs_inode.i_nlink == 0) { if (S_ISDIR(inode->vfs_inode.i_mode)) { - ret = replay_dir_deletes(trans, root, NULL, path, - ino, 1); + ret = replay_dir_deletes(trans, root, NULL, path, ino, true); if (ret) goto out; } @@ -2281,7 +2333,7 @@ static noinline int replay_dir_deletes(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_root *log, struct btrfs_path *path, - u64 dirid, int del_all) + u64 dirid, bool del_all) { u64 range_start; u64 range_end; @@ -2443,8 +2495,8 @@ static int replay_one_buffer(struct btrfs_root *log, struct extent_buffer *eb, break; mode = btrfs_inode_mode(eb, inode_item); if (S_ISDIR(mode)) { - ret = replay_dir_deletes(wc->trans, - root, log, path, key.objectid, 0); + ret = replay_dir_deletes(wc->trans, root, log, path, + key.objectid, false); if (ret) break; } @@ -2519,9 +2571,8 @@ static int replay_one_buffer(struct btrfs_root *log, struct extent_buffer *eb, key.type == BTRFS_INODE_EXTREF_KEY) { ret = add_inode_ref(wc->trans, root, log, path, eb, i, &key); - if (ret && ret != -ENOENT) + if (ret) break; - ret = 0; } else if (key.type == BTRFS_EXTENT_DATA_KEY) { ret = replay_one_extent(wc->trans, root, path, eb, i, &key); @@ -2720,7 +2771,7 @@ static int walk_log_tree(struct btrfs_trans_handle *trans, level = btrfs_header_level(log->node); orig_level = level; path->nodes[level] = log->node; - atomic_inc(&log->node->refs); + refcount_inc(&log->node->refs); path->slots[level] = 0; while (1) { @@ -2961,9 +3012,9 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans, } if (log_transid % 2 == 0) - mark = EXTENT_DIRTY; + mark = EXTENT_DIRTY_LOG1; else - mark = EXTENT_NEW; + mark = EXTENT_DIRTY_LOG2; /* we start IO on all the marked extents here, but we don't actually * wait for them until later. @@ -3094,7 +3145,7 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans, ret = btrfs_write_marked_extents(fs_info, &log_root_tree->dirty_log_pages, - EXTENT_DIRTY | EXTENT_NEW); + EXTENT_DIRTY_LOG1 | EXTENT_DIRTY_LOG2); blk_finish_plug(&plug); /* * As described above, -EAGAIN indicates a hole in the extents. We @@ -3114,7 +3165,7 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans, ret = btrfs_wait_tree_log_extents(log, mark); if (!ret) ret = btrfs_wait_tree_log_extents(log_root_tree, - EXTENT_NEW | EXTENT_DIRTY); + EXTENT_DIRTY_LOG1 | EXTENT_DIRTY_LOG2); if (ret) { btrfs_set_log_full_commit(trans); mutex_unlock(&log_root_tree->log_mutex); @@ -3240,9 +3291,9 @@ static void free_log_tree(struct btrfs_trans_handle *trans, */ btrfs_write_marked_extents(log->fs_info, &log->dirty_log_pages, - EXTENT_DIRTY | EXTENT_NEW); + EXTENT_DIRTY_LOG1 | EXTENT_DIRTY_LOG2); btrfs_wait_tree_log_extents(log, - EXTENT_DIRTY | EXTENT_NEW); + EXTENT_DIRTY_LOG1 | EXTENT_DIRTY_LOG2); if (trans) btrfs_abort_transaction(trans, ret); @@ -3432,7 +3483,7 @@ static int del_logged_dentry(struct btrfs_trans_handle *trans, * inode item because on log replay we update the field to reflect * all existing entries in the directory (see overwrite_item()). */ - return btrfs_delete_one_dir_name(trans, log, path, di); + return btrfs_del_item(trans, log, path); } /* @@ -3472,26 +3523,27 @@ void btrfs_del_dir_entries_in_log(struct btrfs_trans_handle *trans, return; } - ret = join_running_log_trans(root); - if (ret) - return; - - mutex_lock(&dir->log_mutex); - path = btrfs_alloc_path(); if (!path) { - ret = -ENOMEM; - goto out_unlock; + btrfs_set_log_full_commit(trans); + return; } + ret = join_running_log_trans(root); + ASSERT(ret == 0, "join_running_log_trans() ret=%d", ret); + if (WARN_ON(ret)) + goto out; + + mutex_lock(&dir->log_mutex); + ret = del_logged_dentry(trans, root->log_root, path, btrfs_ino(dir), name, index); - btrfs_free_path(path); -out_unlock: mutex_unlock(&dir->log_mutex); if (ret < 0) btrfs_set_log_full_commit(trans); btrfs_end_log_trans(root); +out: + btrfs_free_path(path); } /* see comments for btrfs_del_dir_entries_in_log */ @@ -3501,7 +3553,6 @@ void btrfs_del_inode_ref_in_log(struct btrfs_trans_handle *trans, struct btrfs_inode *inode, u64 dirid) { struct btrfs_root *log; - u64 index; int ret; ret = inode_logged(trans, inode, NULL); @@ -3513,13 +3564,13 @@ void btrfs_del_inode_ref_in_log(struct btrfs_trans_handle *trans, } ret = join_running_log_trans(root); - if (ret) + ASSERT(ret == 0, "join_running_log_trans() ret=%d", ret); + if (WARN_ON(ret)) return; log = root->log_root; mutex_lock(&inode->log_mutex); - ret = btrfs_del_inode_ref(trans, log, name, btrfs_ino(inode), - dirid, &index); + ret = btrfs_del_inode_ref(trans, log, name, btrfs_ino(inode), dirid, NULL); mutex_unlock(&inode->log_mutex); if (ret < 0 && ret != -ENOENT) btrfs_set_log_full_commit(trans); @@ -3684,7 +3735,7 @@ static int clone_leaf(struct btrfs_path *path, struct btrfs_log_ctx *ctx) * Add extra ref to scratch eb so that it is not freed when callers * release the path, so we can reuse it later if needed. */ - atomic_inc(&ctx->scratch_eb->refs); + refcount_inc(&ctx->scratch_eb->refs); return 0; } @@ -4172,44 +4223,37 @@ static void fill_inode_item(struct btrfs_trans_handle *trans, struct inode *inode, int log_inode_only, u64 logged_isize) { - struct btrfs_map_token token; u64 flags; - btrfs_init_map_token(&token, leaf); - if (log_inode_only) { /* set the generation to zero so the recover code * can tell the difference between an logging * just to say 'this inode exists' and a logging * to say 'update this inode with these values' */ - btrfs_set_token_inode_generation(&token, item, 0); - btrfs_set_token_inode_size(&token, item, logged_isize); + btrfs_set_inode_generation(leaf, item, 0); + btrfs_set_inode_size(leaf, item, logged_isize); } else { - btrfs_set_token_inode_generation(&token, item, - BTRFS_I(inode)->generation); - btrfs_set_token_inode_size(&token, item, inode->i_size); + btrfs_set_inode_generation(leaf, item, BTRFS_I(inode)->generation); + btrfs_set_inode_size(leaf, item, inode->i_size); } - btrfs_set_token_inode_uid(&token, item, i_uid_read(inode)); - btrfs_set_token_inode_gid(&token, item, i_gid_read(inode)); - btrfs_set_token_inode_mode(&token, item, inode->i_mode); - btrfs_set_token_inode_nlink(&token, item, inode->i_nlink); + btrfs_set_inode_uid(leaf, item, i_uid_read(inode)); + btrfs_set_inode_gid(leaf, item, i_gid_read(inode)); + btrfs_set_inode_mode(leaf, item, inode->i_mode); + btrfs_set_inode_nlink(leaf, item, inode->i_nlink); + + btrfs_set_timespec_sec(leaf, &item->atime, inode_get_atime_sec(inode)); + btrfs_set_timespec_nsec(leaf, &item->atime, inode_get_atime_nsec(inode)); - btrfs_set_token_timespec_sec(&token, &item->atime, - inode_get_atime_sec(inode)); - btrfs_set_token_timespec_nsec(&token, &item->atime, - inode_get_atime_nsec(inode)); + btrfs_set_timespec_sec(leaf, &item->mtime, inode_get_mtime_sec(inode)); + btrfs_set_timespec_nsec(leaf, &item->mtime, inode_get_mtime_nsec(inode)); - btrfs_set_token_timespec_sec(&token, &item->mtime, - inode_get_mtime_sec(inode)); - btrfs_set_token_timespec_nsec(&token, &item->mtime, - inode_get_mtime_nsec(inode)); + btrfs_set_timespec_sec(leaf, &item->ctime, inode_get_ctime_sec(inode)); + btrfs_set_timespec_nsec(leaf, &item->ctime, inode_get_ctime_nsec(inode)); - btrfs_set_token_timespec_sec(&token, &item->ctime, - inode_get_ctime_sec(inode)); - btrfs_set_token_timespec_nsec(&token, &item->ctime, - inode_get_ctime_nsec(inode)); + btrfs_set_timespec_sec(leaf, &item->otime, BTRFS_I(inode)->i_otime_sec); + btrfs_set_timespec_nsec(leaf, &item->otime, BTRFS_I(inode)->i_otime_nsec); /* * We do not need to set the nbytes field, in fact during a fast fsync @@ -4220,13 +4264,13 @@ static void fill_inode_item(struct btrfs_trans_handle *trans, * inode item in subvolume tree as needed (see overwrite_item()). */ - btrfs_set_token_inode_sequence(&token, item, inode_peek_iversion(inode)); - btrfs_set_token_inode_transid(&token, item, trans->transid); - btrfs_set_token_inode_rdev(&token, item, inode->i_rdev); + btrfs_set_inode_sequence(leaf, item, inode_peek_iversion(inode)); + btrfs_set_inode_transid(leaf, item, trans->transid); + btrfs_set_inode_rdev(leaf, item, inode->i_rdev); flags = btrfs_inode_combine_flags(BTRFS_I(inode)->flags, BTRFS_I(inode)->ro_flags); - btrfs_set_token_inode_flags(&token, item, flags); - btrfs_set_token_inode_block_group(&token, item, 0); + btrfs_set_inode_flags(leaf, item, flags); + btrfs_set_inode_block_group(leaf, item, 0); } static int log_inode_item(struct btrfs_trans_handle *trans, @@ -7192,8 +7236,6 @@ int btrfs_recover_log_trees(struct btrfs_root *log_root_tree) struct btrfs_path *path; struct btrfs_trans_handle *trans; struct btrfs_key key; - struct btrfs_key found_key; - struct btrfs_root *log; struct btrfs_fs_info *fs_info = log_root_tree->fs_info; struct walk_control wc = { .process_func = process_one_buffer, @@ -7227,6 +7269,9 @@ again: key.offset = (u64)-1; while (1) { + struct btrfs_root *log; + struct btrfs_key found_key; + ret = btrfs_search_slot(NULL, log_root_tree, &key, path, 0, 0); if (ret < 0) { @@ -7255,6 +7300,12 @@ again: true); if (IS_ERR(wc.replay_dest)) { ret = PTR_ERR(wc.replay_dest); + wc.replay_dest = NULL; + if (ret != -ENOENT) { + btrfs_put_root(log); + btrfs_abort_transaction(trans, ret); + goto error; + } /* * We didn't find the subvol, likely because it was @@ -7267,36 +7318,36 @@ again: * block from being modified, and we'll just bail for * each subsequent pass. */ - if (ret == -ENOENT) - ret = btrfs_pin_extent_for_log_replay(trans, log->node); - btrfs_put_root(log); - - if (!ret) - goto next; - btrfs_abort_transaction(trans, ret); - goto error; + ret = btrfs_pin_extent_for_log_replay(trans, log->node); + if (ret) { + btrfs_put_root(log); + btrfs_abort_transaction(trans, ret); + goto error; + } + goto next; } wc.replay_dest->log_root = log; ret = btrfs_record_root_in_trans(trans, wc.replay_dest); - if (ret) - /* The loop needs to continue due to the root refs */ + if (ret) { btrfs_abort_transaction(trans, ret); - else - ret = walk_log_tree(trans, log, &wc); + goto next; + } - if (!ret && wc.stage == LOG_WALK_REPLAY_ALL) { - ret = fixup_inode_link_counts(trans, wc.replay_dest, - path); - if (ret) - btrfs_abort_transaction(trans, ret); + ret = walk_log_tree(trans, log, &wc); + if (ret) { + btrfs_abort_transaction(trans, ret); + goto next; } - if (!ret && wc.stage == LOG_WALK_REPLAY_ALL) { + if (wc.stage == LOG_WALK_REPLAY_ALL) { struct btrfs_root *root = wc.replay_dest; - btrfs_release_path(path); - + ret = fixup_inode_link_counts(trans, wc.replay_dest, path); + if (ret) { + btrfs_abort_transaction(trans, ret); + goto next; + } /* * We have just replayed everything, and the highest * objectid of fs roots probably has changed in case @@ -7306,17 +7357,20 @@ again: * could only happen during mount. */ ret = btrfs_init_root_free_objectid(root); - if (ret) + if (ret) { btrfs_abort_transaction(trans, ret); + goto next; + } + } +next: + if (wc.replay_dest) { + wc.replay_dest->log_root = NULL; + btrfs_put_root(wc.replay_dest); } - - wc.replay_dest->log_root = NULL; - btrfs_put_root(wc.replay_dest); btrfs_put_root(log); if (ret) goto error; -next: if (found_key.offset == 0) break; key.offset = found_key.offset - 1; @@ -7485,6 +7539,9 @@ void btrfs_log_new_name(struct btrfs_trans_handle *trans, bool log_pinned = false; int ret; + btrfs_init_log_ctx(&ctx, inode); + ctx.logging_new_name = true; + /* * this will force the logging code to walk the dentry chain * up for the file @@ -7516,6 +7573,13 @@ void btrfs_log_new_name(struct btrfs_trans_handle *trans, ret = 0; /* + * Now that we know we need to update the log, allocate the scratch eb + * for the context before joining a log transaction below, as this can + * take time and therefore we could delay log commits from other tasks. + */ + btrfs_init_log_ctx_scratch_eb(&ctx); + + /* * If we are doing a rename (old_dir is not NULL) from a directory that * was previously logged, make sure that on log replay we get the old * dir entry deleted. This is needed because we will also log the new @@ -7533,6 +7597,14 @@ void btrfs_log_new_name(struct btrfs_trans_handle *trans, &old_dentry->d_name, 0, &fname); if (ret) goto out; + + path = btrfs_alloc_path(); + if (!path) { + ret = -ENOMEM; + fscrypt_free_filename(&fname); + goto out; + } + /* * We have two inodes to update in the log, the old directory and * the inode that got renamed, so we must pin the log to prevent @@ -7546,19 +7618,13 @@ void btrfs_log_new_name(struct btrfs_trans_handle *trans, * mark the log for a full commit. */ if (WARN_ON_ONCE(ret < 0)) { + btrfs_free_path(path); fscrypt_free_filename(&fname); goto out; } log_pinned = true; - path = btrfs_alloc_path(); - if (!path) { - ret = -ENOMEM; - fscrypt_free_filename(&fname); - goto out; - } - /* * Other concurrent task might be logging the old directory, * as it can be triggered when logging other inode that had or @@ -7590,9 +7656,6 @@ void btrfs_log_new_name(struct btrfs_trans_handle *trans, goto out; } - btrfs_init_log_ctx(&ctx, inode); - ctx.logging_new_name = true; - btrfs_init_log_ctx_scratch_eb(&ctx); /* * We don't care about the return value. If we fail to log the new name * then we know the next attempt to sync the log will fallback to a full @@ -7601,7 +7664,6 @@ void btrfs_log_new_name(struct btrfs_trans_handle *trans, * inconsistent state after a rename operation. */ btrfs_log_inode_parent(trans, inode, parent, LOG_INODE_EXISTS, &ctx); - free_extent_buffer(ctx.scratch_eb); ASSERT(list_empty(&ctx.conflict_inodes)); out: /* @@ -7614,5 +7676,6 @@ out: btrfs_set_log_full_commit(trans); if (log_pinned) btrfs_end_log_trans(root); + free_extent_buffer(ctx.scratch_eb); } diff --git a/fs/btrfs/tree-mod-log.c b/fs/btrfs/tree-mod-log.c index 1ac2678fc4ca..9e8cb3b7c064 100644 --- a/fs/btrfs/tree-mod-log.c +++ b/fs/btrfs/tree-mod-log.c @@ -27,18 +27,29 @@ struct tree_mod_elem { /* This is used for BTRFS_MOD_LOG_KEY* and BTRFS_MOD_LOG_ROOT_REPLACE. */ u64 generation; - /* Those are used for op == BTRFS_MOD_LOG_KEY_{REPLACE,REMOVE}. */ - struct btrfs_disk_key key; - u64 blockptr; - - /* This is used for op == BTRFS_MOD_LOG_MOVE_KEYS. */ - struct { - int dst_slot; - int nr_items; - } move; - - /* This is used for op == BTRFS_MOD_LOG_ROOT_REPLACE. */ - struct tree_mod_root old_root; + union { + /* + * This is used for the following op types: + * + * BTRFS_MOD_LOG_KEY_REMOVE_WHILE_FREEING + * BTRFS_MOD_LOG_KEY_REMOVE_WHILE_MOVING + * BTRFS_MOD_LOG_KEY_REMOVE + * BTRFS_MOD_LOG_KEY_REPLACE + */ + struct { + struct btrfs_disk_key key; + u64 blockptr; + } slot_change; + + /* This is used for op == BTRFS_MOD_LOG_MOVE_KEYS. */ + struct { + int dst_slot; + int nr_items; + } move; + + /* This is used for op == BTRFS_MOD_LOG_ROOT_REPLACE. */ + struct tree_mod_root old_root; + }; }; /* @@ -164,6 +175,30 @@ static noinline int tree_mod_log_insert(struct btrfs_fs_info *fs_info, return 0; } +static inline bool skip_eb_logging(const struct extent_buffer *eb) +{ + const u64 owner = btrfs_header_owner(eb); + + if (btrfs_header_level(eb) == 0) + return true; + + /* + * Tree mod logging exists so that there's a consistent view of the + * extents and backrefs of inodes even if while a task is iterating over + * them other tasks are modifying subvolume trees and the extent tree + * (including running delayed refs). So we only need to log extent + * buffers from the extent tree and subvolume trees. + */ + + if (owner == BTRFS_EXTENT_TREE_OBJECTID) + return false; + + if (btrfs_is_fstree(owner)) + return false; + + return true; +} + /* * Determines if logging can be omitted. Returns true if it can. Otherwise, it * returns false with the tree_mod_log_lock acquired. The caller must hold @@ -174,7 +209,7 @@ static bool tree_mod_dont_log(struct btrfs_fs_info *fs_info, const struct extent { if (!test_bit(BTRFS_FS_TREE_MOD_LOG_USERS, &fs_info->flags)) return true; - if (eb && btrfs_header_level(eb) == 0) + if (eb && skip_eb_logging(eb)) return true; write_lock(&fs_info->tree_mod_log_lock); @@ -192,7 +227,7 @@ static bool tree_mod_need_log(const struct btrfs_fs_info *fs_info, { if (!test_bit(BTRFS_FS_TREE_MOD_LOG_USERS, &fs_info->flags)) return false; - if (eb && btrfs_header_level(eb) == 0) + if (eb && skip_eb_logging(eb)) return false; return true; @@ -204,15 +239,17 @@ static struct tree_mod_elem *alloc_tree_mod_elem(const struct extent_buffer *eb, { struct tree_mod_elem *tm; + /* Can't be one of these types, due to union in struct tree_mod_elem. */ + ASSERT(op != BTRFS_MOD_LOG_MOVE_KEYS); + ASSERT(op != BTRFS_MOD_LOG_ROOT_REPLACE); + tm = kzalloc(sizeof(*tm), GFP_NOFS); if (!tm) return NULL; tm->logical = eb->start; - if (op != BTRFS_MOD_LOG_KEY_ADD) { - btrfs_node_key(eb, &tm->key, slot); - tm->blockptr = btrfs_node_blockptr(eb, slot); - } + btrfs_node_key(eb, &tm->slot_change.key, slot); + tm->slot_change.blockptr = btrfs_node_blockptr(eb, slot); tm->op = op; tm->slot = slot; tm->generation = btrfs_node_ptr_generation(eb, slot); @@ -830,8 +867,8 @@ static void tree_mod_log_rewind(struct btrfs_fs_info *fs_info, fallthrough; case BTRFS_MOD_LOG_KEY_REMOVE_WHILE_MOVING: case BTRFS_MOD_LOG_KEY_REMOVE: - btrfs_set_node_key(eb, &tm->key, tm->slot); - btrfs_set_node_blockptr(eb, tm->slot, tm->blockptr); + btrfs_set_node_key(eb, &tm->slot_change.key, tm->slot); + btrfs_set_node_blockptr(eb, tm->slot, tm->slot_change.blockptr); btrfs_set_node_ptr_generation(eb, tm->slot, tm->generation); n++; @@ -840,8 +877,8 @@ static void tree_mod_log_rewind(struct btrfs_fs_info *fs_info, break; case BTRFS_MOD_LOG_KEY_REPLACE: BUG_ON(tm->slot >= n); - btrfs_set_node_key(eb, &tm->key, tm->slot); - btrfs_set_node_blockptr(eb, tm->slot, tm->blockptr); + btrfs_set_node_key(eb, &tm->slot_change.key, tm->slot); + btrfs_set_node_blockptr(eb, tm->slot, tm->slot_change.blockptr); btrfs_set_node_ptr_generation(eb, tm->slot, tm->generation); break; diff --git a/fs/btrfs/ulist.c b/fs/btrfs/ulist.c index fc59b57257d6..7e16a253fb35 100644 --- a/fs/btrfs/ulist.c +++ b/fs/btrfs/ulist.c @@ -129,21 +129,25 @@ void ulist_free(struct ulist *ulist) kfree(ulist); } +static int ulist_node_val_key_cmp(const void *key, const struct rb_node *node) +{ + const u64 *val = key; + const struct ulist_node *unode = rb_entry(node, struct ulist_node, rb_node); + + if (unode->val < *val) + return 1; + else if (unode->val > *val) + return -1; + + return 0; +} + static struct ulist_node *ulist_rbtree_search(struct ulist *ulist, u64 val) { - struct rb_node *n = ulist->root.rb_node; - struct ulist_node *u = NULL; - - while (n) { - u = rb_entry(n, struct ulist_node, rb_node); - if (u->val < val) - n = n->rb_right; - else if (u->val > val) - n = n->rb_left; - else - return u; - } - return NULL; + struct rb_node *node; + + node = rb_find(&val, &ulist->root, ulist_node_val_key_cmp); + return rb_entry_safe(node, struct ulist_node, rb_node); } static void ulist_rbtree_erase(struct ulist *ulist, struct ulist_node *node) @@ -155,25 +159,20 @@ static void ulist_rbtree_erase(struct ulist *ulist, struct ulist_node *node) ulist->nnodes--; } +static int ulist_node_val_cmp(struct rb_node *new, const struct rb_node *existing) +{ + const struct ulist_node *unode = rb_entry(new, struct ulist_node, rb_node); + + return ulist_node_val_key_cmp(&unode->val, existing); +} + static int ulist_rbtree_insert(struct ulist *ulist, struct ulist_node *ins) { - struct rb_node **p = &ulist->root.rb_node; - struct rb_node *parent = NULL; - struct ulist_node *cur = NULL; - - while (*p) { - parent = *p; - cur = rb_entry(parent, struct ulist_node, rb_node); - - if (cur->val < ins->val) - p = &(*p)->rb_right; - else if (cur->val > ins->val) - p = &(*p)->rb_left; - else - return -EEXIST; - } - rb_link_node(&ins->rb_node, parent, p); - rb_insert_color(&ins->rb_node, &ulist->root); + struct rb_node *node; + + node = rb_find_add(&ins->rb_node, &ulist->root, ulist_node_val_cmp); + if (node) + return -EEXIST; return 0; } diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index f475b4b7c457..fa7a929a0461 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -18,7 +18,6 @@ #include "transaction.h" #include "volumes.h" #include "raid56.h" -#include "rcu-string.h" #include "dev-replace.h" #include "sysfs.h" #include "tree-checker.h" @@ -214,10 +213,8 @@ void btrfs_describe_block_groups(u64 bg_flags, char *buf, u32 size_buf) u64 flags = bg_flags; u32 size_bp = size_buf; - if (!flags) { - strcpy(bp, "NONE"); + if (!flags) return; - } #define DESCRIBE_FLAG(flag, desc) \ do { \ @@ -403,7 +400,11 @@ static struct btrfs_fs_devices *alloc_fs_devices(const u8 *fsid) static void btrfs_free_device(struct btrfs_device *device) { WARN_ON(!list_empty(&device->post_commit_list)); - rcu_string_free(device->name); + /* + * No need to call kfree_rcu() nor do RCU lock/unlock, nothing is + * reading the device name. + */ + kfree(rcu_dereference_raw(device->name)); btrfs_extent_io_tree_release(&device->alloc_state); btrfs_destroy_dev_zone_info(device); kfree(device); @@ -414,6 +415,7 @@ static void free_fs_devices(struct btrfs_fs_devices *fs_devices) struct btrfs_device *device; WARN_ON(fs_devices->opened); + WARN_ON(fs_devices->holding); while (!list_empty(&fs_devices->devices)) { device = list_first_entry(&fs_devices->devices, struct btrfs_device, dev_list); @@ -473,7 +475,7 @@ btrfs_get_bdev_and_sb(const char *device_path, blk_mode_t flags, void *holder, struct block_device *bdev; int ret; - *bdev_file = bdev_file_open_by_path(device_path, flags, holder, NULL); + *bdev_file = bdev_file_open_by_path(device_path, flags, holder, &fs_holder_ops); if (IS_ERR(*bdev_file)) { ret = PTR_ERR(*bdev_file); @@ -488,7 +490,7 @@ btrfs_get_bdev_and_sb(const char *device_path, blk_mode_t flags, void *holder, if (holder) { ret = set_blocksize(*bdev_file, BTRFS_BDEV_BLOCKSIZE); if (ret) { - fput(*bdev_file); + bdev_fput(*bdev_file); goto error; } } @@ -496,7 +498,7 @@ btrfs_get_bdev_and_sb(const char *device_path, blk_mode_t flags, void *holder, *disk_super = btrfs_read_disk_super(bdev, 0, false); if (IS_ERR(*disk_super)) { ret = PTR_ERR(*disk_super); - fput(*bdev_file); + bdev_fput(*bdev_file); goto error; } @@ -541,7 +543,7 @@ static int btrfs_free_stale_devices(dev_t devt, struct btrfs_device *skip_device continue; if (devt && devt != device->devt) continue; - if (fs_devices->opened) { + if (fs_devices->opened || fs_devices->holding) { if (devt) ret = -EBUSY; break; @@ -657,7 +659,7 @@ static int btrfs_open_one_device(struct btrfs_fs_devices *fs_devices, if (!device->name) return -EINVAL; - ret = btrfs_get_bdev_and_sb(device->name->str, flags, holder, 1, + ret = btrfs_get_bdev_and_sb(rcu_dereference_raw(device->name), flags, holder, 1, &bdev_file, &disk_super); if (ret) return ret; @@ -674,8 +676,8 @@ static int btrfs_open_one_device(struct btrfs_fs_devices *fs_devices, if (btrfs_super_flags(disk_super) & BTRFS_SUPER_FLAG_SEEDING) { if (btrfs_super_incompat_flags(disk_super) & BTRFS_FEATURE_INCOMPAT_METADATA_UUID) { - pr_err( - "BTRFS: Invalid seeding and uuid-changed device detected\n"); + btrfs_err(NULL, + "invalid seeding and uuid-changed device detected"); goto error_free_page; } @@ -701,7 +703,7 @@ static int btrfs_open_one_device(struct btrfs_fs_devices *fs_devices, if (device->devt != device->bdev->bd_dev) { btrfs_warn(NULL, "device %s maj:min changed from %d:%d to %d:%d", - device->name->str, MAJOR(device->devt), + rcu_dereference_raw(device->name), MAJOR(device->devt), MINOR(device->devt), MAJOR(device->bdev->bd_dev), MINOR(device->bdev->bd_dev)); @@ -720,7 +722,7 @@ static int btrfs_open_one_device(struct btrfs_fs_devices *fs_devices, error_free_page: btrfs_release_disk_super(disk_super); - fput(bdev_file); + bdev_fput(bdev_file); return -EINVAL; } @@ -749,7 +751,7 @@ static bool is_same_device(struct btrfs_device *device, const char *new_path) goto out; rcu_read_lock(); - ret = strscpy(old_path, rcu_str_deref(device->name), PATH_MAX); + ret = strscpy(old_path, rcu_dereference(device->name), PATH_MAX); rcu_read_unlock(); if (ret < 0) goto out; @@ -782,11 +784,11 @@ static noinline struct btrfs_device *device_list_add(const char *path, { struct btrfs_device *device; struct btrfs_fs_devices *fs_devices = NULL; - struct rcu_string *name; + const char *name; u64 found_transid = btrfs_super_generation(disk_super); u64 devid = btrfs_stack_device_id(&disk_super->dev_item); dev_t path_devt; - int error; + int ret; bool same_fsid_diff_dev = false; bool has_metadata_uuid = (btrfs_super_incompat_flags(disk_super) & BTRFS_FEATURE_INCOMPAT_METADATA_UUID); @@ -798,11 +800,11 @@ static noinline struct btrfs_device *device_list_add(const char *path, return ERR_PTR(-EAGAIN); } - error = lookup_bdev(path, &path_devt); - if (error) { + ret = lookup_bdev(path, &path_devt); + if (ret) { btrfs_err(NULL, "failed to lookup block device for path %s: %d", - path, error); - return ERR_PTR(error); + path, ret); + return ERR_PTR(ret); } fs_devices = find_fsid_by_device(disk_super, path_devt, &same_fsid_diff_dev); @@ -819,7 +821,7 @@ static noinline struct btrfs_device *device_list_add(const char *path, if (same_fsid_diff_dev) { generate_random_uuid(fs_devices->fsid); fs_devices->temp_fsid = true; - pr_info("BTRFS: device %s (%d:%d) using temp-fsid %pU\n", + btrfs_info(NULL, "device %s (%d:%d) using temp-fsid %pU", path, MAJOR(path_devt), MINOR(path_devt), fs_devices->fsid); } @@ -890,6 +892,8 @@ static noinline struct btrfs_device *device_list_add(const char *path, current->comm, task_pid_nr(current)); } else if (!device->name || !is_same_device(device, path)) { + const char *old_name; + /* * When FS is already mounted. * 1. If you are here and if the device->name is NULL that @@ -943,27 +947,31 @@ static noinline struct btrfs_device *device_list_add(const char *path, if (device->bdev) { if (device->devt != path_devt) { mutex_unlock(&fs_devices->device_list_mutex); - btrfs_warn_in_rcu(NULL, + btrfs_warn(NULL, "duplicate device %s devid %llu generation %llu scanned by %s (%d)", path, devid, found_transid, current->comm, task_pid_nr(current)); return ERR_PTR(-EEXIST); } - btrfs_info_in_rcu(NULL, + btrfs_info(NULL, "devid %llu device path %s changed to %s scanned by %s (%d)", devid, btrfs_dev_name(device), path, current->comm, task_pid_nr(current)); } - name = rcu_string_strdup(path, GFP_NOFS); + name = kstrdup(path, GFP_NOFS); if (!name) { mutex_unlock(&fs_devices->device_list_mutex); return ERR_PTR(-ENOMEM); } - rcu_string_free(device->name); + rcu_read_lock(); + old_name = rcu_dereference(device->name); + rcu_read_unlock(); rcu_assign_pointer(device->name, name); + kfree_rcu_mightsleep(old_name); + if (test_bit(BTRFS_DEV_STATE_MISSING, &device->dev_state)) { fs_devices->missing_devices--; clear_bit(BTRFS_DEV_STATE_MISSING, &device->dev_state); @@ -1012,7 +1020,7 @@ static struct btrfs_fs_devices *clone_fs_devices(struct btrfs_fs_devices *orig) * uuid mutex so nothing we touch in here is going to disappear. */ if (orig_dev->name) - dev_path = orig_dev->name->str; + dev_path = rcu_dereference_raw(orig_dev->name); device = btrfs_alloc_device(NULL, &orig_dev->devid, orig_dev->uuid, dev_path); @@ -1070,7 +1078,7 @@ static void __btrfs_free_extra_devids(struct btrfs_fs_devices *fs_devices, continue; if (device->bdev_file) { - fput(device->bdev_file); + bdev_fput(device->bdev_file); device->bdev = NULL; device->bdev_file = NULL; fs_devices->open_devices--; @@ -1117,7 +1125,7 @@ static void btrfs_close_bdev(struct btrfs_device *device) invalidate_bdev(device->bdev); } - fput(device->bdev_file); + bdev_fput(device->bdev_file); } static void btrfs_close_one_device(struct btrfs_device *device) @@ -1197,7 +1205,7 @@ void btrfs_close_devices(struct btrfs_fs_devices *fs_devices) mutex_lock(&uuid_mutex); close_fs_devices(fs_devices); - if (!fs_devices->opened) { + if (!fs_devices->opened && !fs_devices->holding) { list_splice_init(&fs_devices->seed_list, &list); /* @@ -1414,7 +1422,7 @@ static bool btrfs_skip_registration(struct btrfs_super_block *disk_super, list_for_each_entry(device, &fs_devices->devices, dev_list) { if (device->bdev && (device->bdev->bd_dev == devt) && - strcmp(device->name->str, path) != 0) { + strcmp(rcu_dereference_raw(device->name), path) != 0) { mutex_unlock(&fs_devices->device_list_mutex); /* Do not skip registration. */ @@ -1440,7 +1448,7 @@ static bool btrfs_skip_registration(struct btrfs_super_block *disk_super, * the device or return an error. Multi-device and seeding devices are registered * in both cases. */ -struct btrfs_device *btrfs_scan_one_device(const char *path, blk_mode_t flags, +struct btrfs_device *btrfs_scan_one_device(const char *path, bool mount_arg_dev) { struct btrfs_super_block *disk_super; @@ -1461,7 +1469,7 @@ struct btrfs_device *btrfs_scan_one_device(const char *path, blk_mode_t flags, * values temporarily, as the device paths of the fsid are the only * required information for assembling the volume. */ - bdev_file = bdev_file_open_by_path(path, flags, NULL, NULL); + bdev_file = bdev_file_open_by_path(path, BLK_OPEN_READ, NULL, NULL); if (IS_ERR(bdev_file)) return ERR_CAST(bdev_file); @@ -1473,7 +1481,7 @@ struct btrfs_device *btrfs_scan_one_device(const char *path, blk_mode_t flags, devt = file_bdev(bdev_file)->bd_dev; if (btrfs_skip_registration(disk_super, path, devt, mount_arg_dev)) { - pr_debug("BTRFS: skip registering single non-seed device %s (%d:%d)\n", + btrfs_debug(NULL, "skip registering single non-seed device %s (%d:%d)", path, MAJOR(devt), MINOR(devt)); btrfs_free_stale_devices(devt, NULL); @@ -1490,7 +1498,7 @@ free_disk_super: btrfs_release_disk_super(disk_super); error_bdev_put: - fput(bdev_file); + bdev_fput(bdev_file); return device; } @@ -2164,7 +2172,7 @@ void btrfs_scratch_superblocks(struct btrfs_fs_info *fs_info, struct btrfs_devic btrfs_kobject_uevent(bdev, KOBJ_CHANGE); /* Update ctime/mtime for device path for libblkid */ - update_dev_time(device->name->str); + update_dev_time(rcu_dereference_raw(device->name)); } int btrfs_rm_device(struct btrfs_fs_info *fs_info, @@ -2204,7 +2212,7 @@ int btrfs_rm_device(struct btrfs_fs_info *fs_info, } if (btrfs_pinned_by_swapfile(fs_info, device)) { - btrfs_warn_in_rcu(fs_info, + btrfs_warn(fs_info, "cannot remove device %s (devid %llu) due to active swapfile", btrfs_dev_name(device), device->devid); return -ETXTBSY; @@ -2294,7 +2302,7 @@ int btrfs_rm_device(struct btrfs_fs_info *fs_info, * free the device. * * We cannot call btrfs_close_bdev() here because we're holding the sb - * write lock, and fput() on the block device will pull in the + * write lock, and bdev_fput() on the block device will pull in the * ->open_mutex on the block device and it's dependencies. Instead * just flush the device and let the caller do the final bdev_release. */ @@ -2473,7 +2481,7 @@ int btrfs_get_dev_args_from_path(struct btrfs_fs_info *fs_info, else memcpy(args->fsid, disk_super->fsid, BTRFS_FSID_SIZE); btrfs_release_disk_super(disk_super); - fput(bdev_file); + bdev_fput(bdev_file); return 0; } @@ -2705,7 +2713,7 @@ int btrfs_init_new_device(struct btrfs_fs_info *fs_info, const char *device_path return -EROFS; bdev_file = bdev_file_open_by_path(device_path, BLK_OPEN_WRITE, - fs_info->bdev_holder, NULL); + fs_info->sb, &fs_holder_ops); if (IS_ERR(bdev_file)) return PTR_ERR(bdev_file); @@ -2921,7 +2929,7 @@ error_free_zone: error_free_device: btrfs_free_device(device); error: - fput(bdev_file); + bdev_fput(bdev_file); if (locked) { mutex_unlock(&uuid_mutex); up_write(&sb->s_umount); @@ -3404,7 +3412,8 @@ out: return ret; } -int btrfs_relocate_chunk(struct btrfs_fs_info *fs_info, u64 chunk_offset) +int btrfs_relocate_chunk(struct btrfs_fs_info *fs_info, u64 chunk_offset, + bool verbose) { struct btrfs_root *root = fs_info->chunk_root; struct btrfs_trans_handle *trans; @@ -3434,7 +3443,7 @@ int btrfs_relocate_chunk(struct btrfs_fs_info *fs_info, u64 chunk_offset) /* step one, relocate all the extents inside this chunk */ btrfs_scrub_pause(fs_info); - ret = btrfs_relocate_block_group(fs_info, chunk_offset); + ret = btrfs_relocate_block_group(fs_info, chunk_offset, true); btrfs_scrub_continue(fs_info); if (ret) { /* @@ -3544,7 +3553,8 @@ again: btrfs_release_path(path); if (chunk_type & BTRFS_BLOCK_GROUP_SYSTEM) { - ret = btrfs_relocate_chunk(fs_info, found_key.offset); + ret = btrfs_relocate_chunk(fs_info, found_key.offset, + true); if (ret == -ENOSPC) failed++; else @@ -4209,7 +4219,7 @@ again: } } - ret = btrfs_relocate_chunk(fs_info, found_key.offset); + ret = btrfs_relocate_chunk(fs_info, found_key.offset, true); mutex_unlock(&fs_info->reclaim_bgs_lock); if (ret == -ENOSPC) { enospc_errors++; @@ -4977,7 +4987,7 @@ again: goto done; } - ret = btrfs_relocate_chunk(fs_info, chunk_offset); + ret = btrfs_relocate_chunk(fs_info, chunk_offset, true); mutex_unlock(&fs_info->reclaim_bgs_lock); if (ret == -ENOSPC) { failed++; @@ -5009,8 +5019,8 @@ again: mutex_lock(&fs_info->chunk_mutex); /* Clear all state bits beyond the shrunk device size */ - btrfs_clear_extent_bits(&device->alloc_state, new_size, (u64)-1, - CHUNK_STATE_MASK); + btrfs_clear_extent_bit(&device->alloc_state, new_size, (u64)-1, + CHUNK_STATE_MASK, NULL); btrfs_device_set_disk_total_bytes(device, new_size); if (list_empty(&device->post_commit_list)) @@ -5437,9 +5447,9 @@ static void chunk_map_device_clear_bits(struct btrfs_chunk_map *map, unsigned in struct btrfs_io_stripe *stripe = &map->stripes[i]; struct btrfs_device *device = stripe->dev; - btrfs_clear_extent_bits(&device->alloc_state, stripe->physical, - stripe->physical + map->stripe_size - 1, - bits | EXTENT_NOWAIT); + btrfs_clear_extent_bit(&device->alloc_state, stripe->physical, + stripe->physical + map->stripe_size - 1, + bits | EXTENT_NOWAIT, NULL); } } @@ -6923,9 +6933,9 @@ struct btrfs_device *btrfs_alloc_device(struct btrfs_fs_info *fs_info, generate_random_uuid(dev->uuid); if (path) { - struct rcu_string *name; + const char *name; - name = rcu_string_strdup(path, GFP_KERNEL); + name = kstrdup(path, GFP_KERNEL); if (!name) { btrfs_free_device(dev); return ERR_PTR(-ENOMEM); @@ -7174,7 +7184,7 @@ static struct btrfs_fs_devices *open_seed_devices(struct btrfs_fs_info *fs_info, if (IS_ERR(fs_devices)) return fs_devices; - ret = open_fs_devices(fs_devices, BLK_OPEN_READ, fs_info->bdev_holder); + ret = open_fs_devices(fs_devices, BLK_OPEN_READ, fs_info->sb); if (ret) { free_fs_devices(fs_devices); return ERR_PTR(ret); @@ -7706,7 +7716,7 @@ static int update_dev_stat_item(struct btrfs_trans_handle *trans, return -ENOMEM; ret = btrfs_search_slot(trans, dev_root, &key, path, -1, 1); if (ret < 0) { - btrfs_warn_in_rcu(fs_info, + btrfs_warn(fs_info, "error %d while searching for dev_stats item for device %s", ret, btrfs_dev_name(device)); goto out; @@ -7717,7 +7727,7 @@ static int update_dev_stat_item(struct btrfs_trans_handle *trans, /* need to delete old one and insert a new one */ ret = btrfs_del_item(trans, dev_root, path); if (ret != 0) { - btrfs_warn_in_rcu(fs_info, + btrfs_warn(fs_info, "delete too small dev_stats item for device %s failed %d", btrfs_dev_name(device), ret); goto out; @@ -7731,7 +7741,7 @@ static int update_dev_stat_item(struct btrfs_trans_handle *trans, ret = btrfs_insert_empty_item(trans, dev_root, path, &key, sizeof(*ptr)); if (ret < 0) { - btrfs_warn_in_rcu(fs_info, + btrfs_warn(fs_info, "insert dev_stats item for device %s failed %d", btrfs_dev_name(device), ret); goto out; @@ -7794,7 +7804,7 @@ void btrfs_dev_stat_inc_and_print(struct btrfs_device *dev, int index) if (!dev->dev_stats_valid) return; - btrfs_err_rl_in_rcu(dev->fs_info, + btrfs_err_rl(dev->fs_info, "bdev %s errs: wr %u, rd %u, flush %u, corrupt %u, gen %u", btrfs_dev_name(dev), btrfs_dev_stat_read(dev, BTRFS_DEV_STAT_WRITE_ERRS), @@ -7814,7 +7824,7 @@ static void btrfs_dev_stat_print_on_load(struct btrfs_device *dev) if (i == BTRFS_DEV_STAT_VALUES_MAX) return; /* all values == 0, suppress message */ - btrfs_info_in_rcu(dev->fs_info, + btrfs_info(dev->fs_info, "bdev %s errs: wr %u, rd %u, flush %u, corrupt %u, gen %u", btrfs_dev_name(dev), btrfs_dev_stat_read(dev, BTRFS_DEV_STAT_WRITE_ERRS), @@ -7938,7 +7948,7 @@ static int verify_one_dev_extent(struct btrfs_fs_info *fs_info, } /* - * Very old mkfs.btrfs (before v4.1) will not respect the reserved + * Very old mkfs.btrfs (before v4.15) will not respect the reserved * space. Although kernel can handle it without problem, better to warn * the users. */ @@ -8190,7 +8200,7 @@ static int relocating_repair_kthread(void *data) btrfs_info(fs_info, "zoned: relocating block group %llu to repair IO failure", target); - ret = btrfs_relocate_chunk(fs_info, target); + ret = btrfs_relocate_chunk(fs_info, target, true); out: if (cache) diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h index 137cc232f58e..a56e873a3029 100644 --- a/fs/btrfs/volumes.h +++ b/fs/btrfs/volumes.h @@ -21,7 +21,6 @@ #include <uapi/linux/btrfs.h> #include <uapi/linux/btrfs_tree.h> #include "messages.h" -#include "rcu-string.h" #include "extent-io-tree.h" struct block_device; @@ -114,7 +113,8 @@ struct btrfs_device { struct btrfs_fs_devices *fs_devices; struct btrfs_fs_info *fs_info; - struct rcu_string __rcu *name; + /* Device path or NULL if missing. */ + const char __rcu *name; u64 generation; @@ -422,6 +422,16 @@ struct btrfs_fs_devices { /* Count fs-devices opened. */ int opened; + /* + * Counter of the processes that are holding this fs_devices but not + * yet opened. + * This is for mounting handling, as we can only open the fs_devices + * after a super block is created. But we cannot take uuid_mutex + * during sget_fc(), thus we have to hold the fs_devices (meaning it + * cannot be released) until a super block is returned. + */ + int holding; + /* Set when we find or add a device that doesn't have the nonrot flag set. */ bool rotating; /* Devices support TRIM/discard commands. */ @@ -667,7 +677,7 @@ enum btrfs_map_op { BTRFS_MAP_GET_READ_MIRRORS, }; -static inline enum btrfs_map_op btrfs_op(struct bio *bio) +static inline enum btrfs_map_op btrfs_op(const struct bio *bio) { switch (bio_op(bio)) { case REQ_OP_WRITE: @@ -719,8 +729,7 @@ struct btrfs_block_group *btrfs_create_chunk(struct btrfs_trans_handle *trans, void btrfs_mapping_tree_free(struct btrfs_fs_info *fs_info); int btrfs_open_devices(struct btrfs_fs_devices *fs_devices, blk_mode_t flags, void *holder); -struct btrfs_device *btrfs_scan_one_device(const char *path, blk_mode_t flags, - bool mount_arg_dev); +struct btrfs_device *btrfs_scan_one_device(const char *path, bool mount_arg_dev); int btrfs_forget_devices(dev_t devt); void btrfs_close_devices(struct btrfs_fs_devices *fs_devices); void btrfs_free_extra_devids(struct btrfs_fs_devices *fs_devices); @@ -754,7 +763,8 @@ void btrfs_describe_block_groups(u64 flags, char *buf, u32 size_buf); int btrfs_resume_balance_async(struct btrfs_fs_info *fs_info); int btrfs_recover_balance(struct btrfs_fs_info *fs_info); int btrfs_pause_balance(struct btrfs_fs_info *fs_info); -int btrfs_relocate_chunk(struct btrfs_fs_info *fs_info, u64 chunk_offset); +int btrfs_relocate_chunk(struct btrfs_fs_info *fs_info, u64 chunk_offset, + bool verbose); int btrfs_cancel_balance(struct btrfs_fs_info *fs_info); bool btrfs_chunk_writeable(struct btrfs_fs_info *fs_info, u64 chunk_offset); void btrfs_dev_stat_inc_and_print(struct btrfs_device *dev, int index); @@ -846,7 +856,7 @@ static inline const char *btrfs_dev_name(const struct btrfs_device *device) if (!device || test_bit(BTRFS_DEV_STATE_MISSING, &device->dev_state)) return "<missing disk>"; else - return rcu_str_deref(device->name); + return rcu_dereference(device->name); } static inline void btrfs_warn_unknown_chunk_allocation(enum btrfs_chunk_allocation_policy pol) @@ -854,6 +864,20 @@ static inline void btrfs_warn_unknown_chunk_allocation(enum btrfs_chunk_allocati WARN_ONCE(1, "unknown allocation policy %d, fallback to regular", pol); } +static inline void btrfs_fs_devices_inc_holding(struct btrfs_fs_devices *fs_devices) +{ + lockdep_assert_held(&uuid_mutex); + ASSERT(fs_devices->holding >= 0); + fs_devices->holding++; +} + +static inline void btrfs_fs_devices_dec_holding(struct btrfs_fs_devices *fs_devices) +{ + lockdep_assert_held(&uuid_mutex); + ASSERT(fs_devices->holding > 0); + fs_devices->holding--; +} + void btrfs_commit_device_sizes(struct btrfs_transaction *trans); struct list_head * __attribute_const__ btrfs_get_fs_uuids(void); diff --git a/fs/btrfs/xattr.c b/fs/btrfs/xattr.c index 3e0edbcf73e1..79fb1614bd0c 100644 --- a/fs/btrfs/xattr.c +++ b/fs/btrfs/xattr.c @@ -510,14 +510,15 @@ static int btrfs_initxattrs(struct inode *inode, */ nofs_flag = memalloc_nofs_save(); for (xattr = xattr_array; xattr->name != NULL; xattr++) { - name = kmalloc(XATTR_SECURITY_PREFIX_LEN + - strlen(xattr->name) + 1, GFP_KERNEL); + const size_t name_len = XATTR_SECURITY_PREFIX_LEN + + strlen(xattr->name) + 1; + + name = kmalloc(name_len, GFP_KERNEL); if (!name) { ret = -ENOMEM; break; } - strcpy(name, XATTR_SECURITY_PREFIX); - strcpy(name + XATTR_SECURITY_PREFIX_LEN, xattr->name); + scnprintf(name, name_len, "%s%s", XATTR_SECURITY_PREFIX, xattr->name); if (strcmp(name, XATTR_NAME_CAPS) == 0) clear_bit(BTRFS_INODE_NO_CAP_XATTR, &BTRFS_I(inode)->runtime_flags); diff --git a/fs/btrfs/zoned.c b/fs/btrfs/zoned.c index 9430b34d3cbb..245e813ecd78 100644 --- a/fs/btrfs/zoned.c +++ b/fs/btrfs/zoned.c @@ -9,7 +9,6 @@ #include "ctree.h" #include "volumes.h" #include "zoned.h" -#include "rcu-string.h" #include "disk-io.h" #include "block-group.h" #include "dev-replace.h" @@ -17,6 +16,7 @@ #include "fs.h" #include "accessors.h" #include "bio.h" +#include "transaction.h" /* Maximum number of zones to report per blkdev_report_zones() call */ #define BTRFS_REPORT_NR_ZONES 4096 @@ -263,9 +263,9 @@ static int btrfs_get_dev_zones(struct btrfs_device *device, u64 pos, ret = blkdev_report_zones(device->bdev, pos >> SECTOR_SHIFT, *nr_zones, copy_zone_info_cb, zones); if (ret < 0) { - btrfs_err_in_rcu(device->fs_info, + btrfs_err(device->fs_info, "zoned: failed to read zone %llu on %s (devid %llu)", - pos, rcu_str_deref(device->name), + pos, rcu_dereference(device->name), device->devid); return ret; } @@ -395,16 +395,16 @@ int btrfs_get_dev_zone_info(struct btrfs_device *device, bool populate_cache) /* We reject devices with a zone size larger than 8GB */ if (zone_info->zone_size > BTRFS_MAX_ZONE_SIZE) { - btrfs_err_in_rcu(fs_info, + btrfs_err(fs_info, "zoned: %s: zone size %llu larger than supported maximum %llu", - rcu_str_deref(device->name), + rcu_dereference(device->name), zone_info->zone_size, BTRFS_MAX_ZONE_SIZE); ret = -EINVAL; goto out; } else if (zone_info->zone_size < BTRFS_MIN_ZONE_SIZE) { - btrfs_err_in_rcu(fs_info, + btrfs_err(fs_info, "zoned: %s: zone size %llu smaller than supported minimum %u", - rcu_str_deref(device->name), + rcu_dereference(device->name), zone_info->zone_size, BTRFS_MIN_ZONE_SIZE); ret = -EINVAL; goto out; @@ -418,9 +418,9 @@ int btrfs_get_dev_zone_info(struct btrfs_device *device, bool populate_cache) max_active_zones = bdev_max_active_zones(bdev); if (max_active_zones && max_active_zones < BTRFS_MIN_ACTIVE_ZONES) { - btrfs_err_in_rcu(fs_info, + btrfs_err(fs_info, "zoned: %s: max active zones %u is too small, need at least %u active zones", - rcu_str_deref(device->name), max_active_zones, + rcu_dereference(device->name), max_active_zones, BTRFS_MIN_ACTIVE_ZONES); ret = -EINVAL; goto out; @@ -460,9 +460,9 @@ int btrfs_get_dev_zone_info(struct btrfs_device *device, bool populate_cache) zone_info->zone_cache = vcalloc(zone_info->nr_zones, sizeof(struct blk_zone)); if (!zone_info->zone_cache) { - btrfs_err_in_rcu(device->fs_info, + btrfs_err(device->fs_info, "zoned: failed to allocate zone cache for %s", - rcu_str_deref(device->name)); + rcu_dereference(device->name)); ret = -ENOMEM; goto out; } @@ -497,9 +497,9 @@ int btrfs_get_dev_zone_info(struct btrfs_device *device, bool populate_cache) } if (nreported != zone_info->nr_zones) { - btrfs_err_in_rcu(device->fs_info, + btrfs_err(device->fs_info, "inconsistent number of zones on %s (%u/%u)", - rcu_str_deref(device->name), nreported, + rcu_dereference(device->name), nreported, zone_info->nr_zones); ret = -EIO; goto out; @@ -507,9 +507,9 @@ int btrfs_get_dev_zone_info(struct btrfs_device *device, bool populate_cache) if (max_active_zones) { if (nactive > max_active_zones) { - btrfs_err_in_rcu(device->fs_info, + btrfs_err(device->fs_info, "zoned: %u active zones on %s exceeds max_active_zones %u", - nactive, rcu_str_deref(device->name), + nactive, rcu_dereference(device->name), max_active_zones); ret = -EIO; goto out; @@ -538,7 +538,7 @@ int btrfs_get_dev_zone_info(struct btrfs_device *device, bool populate_cache) goto out; if (nr_zones != BTRFS_NR_SB_LOG_ZONES) { - btrfs_err_in_rcu(device->fs_info, + btrfs_err(device->fs_info, "zoned: failed to read super block log zone info at devid %llu zone %u", device->devid, sb_zone); ret = -EUCLEAN; @@ -556,7 +556,7 @@ int btrfs_get_dev_zone_info(struct btrfs_device *device, bool populate_cache) ret = sb_write_pointer(device->bdev, &zone_info->sb_zones[sb_pos], &sb_wp); if (ret != -ENOENT && ret) { - btrfs_err_in_rcu(device->fs_info, + btrfs_err(device->fs_info, "zoned: super block log zone corrupted devid %llu zone %u", device->devid, sb_zone); ret = -EUCLEAN; @@ -575,9 +575,9 @@ int btrfs_get_dev_zone_info(struct btrfs_device *device, bool populate_cache) emulated = "emulated "; } - btrfs_info_in_rcu(fs_info, + btrfs_info(fs_info, "%s block device %s, %u %szones of %llu bytes", - model, rcu_str_deref(device->name), zone_info->nr_zones, + model, rcu_dereference(device->name), zone_info->nr_zones, emulated, zone_info->zone_size); return 0; @@ -1182,10 +1182,10 @@ int btrfs_ensure_empty_zones(struct btrfs_device *device, u64 start, u64 size) continue; /* Free regions should be empty */ - btrfs_warn_in_rcu( + btrfs_warn( device->fs_info, "zoned: resetting device %s (devid %llu) zone %llu for allocation", - rcu_str_deref(device->name), device->devid, pos >> shift); + rcu_dereference(device->name), device->devid, pos >> shift); WARN_ON_ONCE(1); ret = btrfs_reset_device_zone(device, pos, zinfo->zone_size, @@ -1345,9 +1345,9 @@ static int btrfs_load_zone_info(struct btrfs_fs_info *fs_info, int zone_idx, } if (zone.type == BLK_ZONE_TYPE_CONVENTIONAL) { - btrfs_err_in_rcu(fs_info, + btrfs_err(fs_info, "zoned: unexpected conventional zone %llu on device %s (devid %llu)", - zone.start << SECTOR_SHIFT, rcu_str_deref(device->name), + zone.start << SECTOR_SHIFT, rcu_dereference(device->name), device->devid); up_read(&dev_replace->rwsem); return -EIO; @@ -1358,10 +1358,10 @@ static int btrfs_load_zone_info(struct btrfs_fs_info *fs_info, int zone_idx, switch (zone.cond) { case BLK_ZONE_COND_OFFLINE: case BLK_ZONE_COND_READONLY: - btrfs_err_in_rcu(fs_info, + btrfs_err(fs_info, "zoned: offline/readonly zone %llu on device %s (devid %llu)", (info->physical >> device->zone_info->zone_size_shift), - rcu_str_deref(device->name), device->devid); + rcu_dereference(device->name), device->devid); info->alloc_offset = WP_MISSING_DEV; break; case BLK_ZONE_COND_EMPTY: @@ -2485,7 +2485,7 @@ void btrfs_schedule_zone_finish_bg(struct btrfs_block_group *bg, /* For the work */ btrfs_get_block_group(bg); - atomic_inc(&eb->refs); + refcount_inc(&eb->refs); bg->last_eb = eb; INIT_WORK(&bg->zone_finish_work, btrfs_zone_finish_endio_workfn); queue_work(system_unbound_wq, &bg->zone_finish_work); @@ -2501,6 +2501,66 @@ void btrfs_clear_data_reloc_bg(struct btrfs_block_group *bg) spin_unlock(&fs_info->relocation_bg_lock); } +void btrfs_zoned_reserve_data_reloc_bg(struct btrfs_fs_info *fs_info) +{ + struct btrfs_space_info *data_sinfo = fs_info->data_sinfo; + struct btrfs_space_info *space_info = data_sinfo->sub_group[0]; + struct btrfs_trans_handle *trans; + struct btrfs_block_group *bg; + struct list_head *bg_list; + u64 alloc_flags; + bool initial = false; + bool did_chunk_alloc = false; + int index; + int ret; + + if (!btrfs_is_zoned(fs_info)) + return; + + if (fs_info->data_reloc_bg) + return; + + if (sb_rdonly(fs_info->sb)) + return; + + ASSERT(space_info->subgroup_id == BTRFS_SUB_GROUP_DATA_RELOC); + alloc_flags = btrfs_get_alloc_profile(fs_info, space_info->flags); + index = btrfs_bg_flags_to_raid_index(alloc_flags); + + bg_list = &data_sinfo->block_groups[index]; +again: + list_for_each_entry(bg, bg_list, list) { + if (bg->used > 0) + continue; + + if (!initial) { + initial = true; + continue; + } + + fs_info->data_reloc_bg = bg->start; + set_bit(BLOCK_GROUP_FLAG_ZONED_DATA_RELOC, &bg->runtime_flags); + btrfs_zone_activate(bg); + + return; + } + + if (did_chunk_alloc) + return; + + trans = btrfs_join_transaction(fs_info->tree_root); + if (IS_ERR(trans)) + return; + + ret = btrfs_chunk_alloc(trans, space_info, alloc_flags, CHUNK_ALLOC_FORCE); + btrfs_end_transaction(trans); + if (ret == 1) { + did_chunk_alloc = true; + bg_list = &space_info->block_groups[index]; + goto again; + } +} + void btrfs_free_zone_cache(struct btrfs_fs_info *fs_info) { struct btrfs_fs_devices *fs_devices = fs_info->fs_devices; @@ -2523,8 +2583,8 @@ bool btrfs_zoned_should_reclaim(const struct btrfs_fs_info *fs_info) { struct btrfs_fs_devices *fs_devices = fs_info->fs_devices; struct btrfs_device *device; + u64 total = btrfs_super_total_bytes(fs_info->super_copy); u64 used = 0; - u64 total = 0; u64 factor; ASSERT(btrfs_is_zoned(fs_info)); @@ -2537,7 +2597,6 @@ bool btrfs_zoned_should_reclaim(const struct btrfs_fs_info *fs_info) if (!device->bdev) continue; - total += device->disk_total_bytes; used += device->bytes_used; } mutex_unlock(&fs_devices->device_list_mutex); diff --git a/fs/btrfs/zoned.h b/fs/btrfs/zoned.h index 9672bf4c3335..6e11533b8e14 100644 --- a/fs/btrfs/zoned.h +++ b/fs/btrfs/zoned.h @@ -88,6 +88,7 @@ void btrfs_zone_finish_endio(struct btrfs_fs_info *fs_info, u64 logical, void btrfs_schedule_zone_finish_bg(struct btrfs_block_group *bg, struct extent_buffer *eb); void btrfs_clear_data_reloc_bg(struct btrfs_block_group *bg); +void btrfs_zoned_reserve_data_reloc_bg(struct btrfs_fs_info *fs_info); void btrfs_free_zone_cache(struct btrfs_fs_info *fs_info); bool btrfs_zoned_should_reclaim(const struct btrfs_fs_info *fs_info); void btrfs_zoned_release_data_reloc_bg(struct btrfs_fs_info *fs_info, u64 logical, @@ -241,6 +242,8 @@ static inline void btrfs_schedule_zone_finish_bg(struct btrfs_block_group *bg, static inline void btrfs_clear_data_reloc_bg(struct btrfs_block_group *bg) { } +static inline void btrfs_zoned_reserve_data_reloc_bg(struct btrfs_fs_info *fs_info) { } + static inline void btrfs_free_zone_cache(struct btrfs_fs_info *fs_info) { } static inline bool btrfs_zoned_should_reclaim(const struct btrfs_fs_info *fs_info) diff --git a/fs/btrfs/zstd.c b/fs/btrfs/zstd.c index 4a796a049b5a..ff0292615e1f 100644 --- a/fs/btrfs/zstd.c +++ b/fs/btrfs/zstd.c @@ -200,8 +200,7 @@ void zstd_init_workspace_manager(void) ws = zstd_alloc_workspace(ZSTD_BTRFS_MAX_LEVEL); if (IS_ERR(ws)) { - pr_warn( - "BTRFS: cannot preallocate zstd compression workspace\n"); + btrfs_warn(NULL, "cannot preallocate zstd compression workspace"); } else { set_bit(ZSTD_BTRFS_MAX_LEVEL - 1, &wsm.active_map); list_add(ws, &wsm.idle_ws[ZSTD_BTRFS_MAX_LEVEL - 1]); diff --git a/fs/cachefiles/io.c b/fs/cachefiles/io.c index c08e4a66ac07..3e0576d9db1d 100644 --- a/fs/cachefiles/io.c +++ b/fs/cachefiles/io.c @@ -347,8 +347,6 @@ int __cachefiles_write(struct cachefiles_object *object, default: ki->was_async = false; cachefiles_write_complete(&ki->iocb, ret); - if (ret > 0) - ret = 0; break; } diff --git a/fs/cachefiles/ondemand.c b/fs/cachefiles/ondemand.c index d9bc67176128..a7ed86fa98bb 100644 --- a/fs/cachefiles/ondemand.c +++ b/fs/cachefiles/ondemand.c @@ -83,10 +83,8 @@ static ssize_t cachefiles_ondemand_fd_write_iter(struct kiocb *kiocb, trace_cachefiles_ondemand_fd_write(object, file_inode(file), pos, len); ret = __cachefiles_write(object, file, pos, iter, NULL, NULL); - if (!ret) { - ret = len; + if (ret > 0) kiocb->ki_pos += ret; - } out: fput(file); diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c index a8d8b56cf9d2..b1a8ff612c41 100644 --- a/fs/ceph/caps.c +++ b/fs/ceph/caps.c @@ -4957,24 +4957,20 @@ int ceph_encode_dentry_release(void **p, struct dentry *dentry, cl = ceph_inode_to_client(dir); spin_lock(&dentry->d_lock); if (ret && di->lease_session && di->lease_session->s_mds == mds) { + int len = dentry->d_name.len; doutc(cl, "%p mds%d seq %d\n", dentry, mds, (int)di->lease_seq); rel->dname_seq = cpu_to_le32(di->lease_seq); __ceph_mdsc_drop_dentry_lease(dentry); + memcpy(*p, dentry->d_name.name, len); spin_unlock(&dentry->d_lock); if (IS_ENCRYPTED(dir) && fscrypt_has_encryption_key(dir)) { - int ret2 = ceph_encode_encrypted_fname(dir, dentry, *p); - - if (ret2 < 0) - return ret2; - - rel->dname_len = cpu_to_le32(ret2); - *p += ret2; - } else { - rel->dname_len = cpu_to_le32(dentry->d_name.len); - memcpy(*p, dentry->d_name.name, dentry->d_name.len); - *p += dentry->d_name.len; + len = ceph_encode_encrypted_dname(dir, *p, len); + if (len < 0) + return len; } + rel->dname_len = cpu_to_le32(len); + *p += len; } else { spin_unlock(&dentry->d_lock); } diff --git a/fs/ceph/crypto.c b/fs/ceph/crypto.c index 3b3c4d8d401e..e312f52f48e4 100644 --- a/fs/ceph/crypto.c +++ b/fs/ceph/crypto.c @@ -215,35 +215,31 @@ static struct inode *parse_longname(const struct inode *parent, struct ceph_client *cl = ceph_inode_to_client(parent); struct inode *dir = NULL; struct ceph_vino vino = { .snap = CEPH_NOSNAP }; - char *inode_number; - char *name_end; - int orig_len = *name_len; + char *name_end, *inode_number; int ret = -EIO; - + /* NUL-terminate */ + char *str __free(kfree) = kmemdup_nul(name, *name_len, GFP_KERNEL); + if (!str) + return ERR_PTR(-ENOMEM); /* Skip initial '_' */ - name++; - name_end = strrchr(name, '_'); + str++; + name_end = strrchr(str, '_'); if (!name_end) { - doutc(cl, "failed to parse long snapshot name: %s\n", name); + doutc(cl, "failed to parse long snapshot name: %s\n", str); return ERR_PTR(-EIO); } - *name_len = (name_end - name); + *name_len = (name_end - str); if (*name_len <= 0) { pr_err_client(cl, "failed to parse long snapshot name\n"); return ERR_PTR(-EIO); } /* Get the inode number */ - inode_number = kmemdup_nul(name_end + 1, - orig_len - *name_len - 2, - GFP_KERNEL); - if (!inode_number) - return ERR_PTR(-ENOMEM); + inode_number = name_end + 1; ret = kstrtou64(inode_number, 10, &vino.ino); if (ret) { - doutc(cl, "failed to parse inode number: %s\n", name); - dir = ERR_PTR(ret); - goto out; + doutc(cl, "failed to parse inode number: %s\n", str); + return ERR_PTR(ret); } /* And finally the inode */ @@ -254,42 +250,29 @@ static struct inode *parse_longname(const struct inode *parent, if (IS_ERR(dir)) doutc(cl, "can't find inode %s (%s)\n", inode_number, name); } - -out: - kfree(inode_number); return dir; } -int ceph_encode_encrypted_dname(struct inode *parent, struct qstr *d_name, - char *buf) +int ceph_encode_encrypted_dname(struct inode *parent, char *buf, int elen) { struct ceph_client *cl = ceph_inode_to_client(parent); struct inode *dir = parent; - struct qstr iname; + char *p = buf; u32 len; - int name_len; - int elen; + int name_len = elen; int ret; u8 *cryptbuf = NULL; - iname.name = d_name->name; - name_len = d_name->len; - /* Handle the special case of snapshot names that start with '_' */ - if ((ceph_snap(dir) == CEPH_SNAPDIR) && (name_len > 0) && - (iname.name[0] == '_')) { - dir = parse_longname(parent, iname.name, &name_len); + if (ceph_snap(dir) == CEPH_SNAPDIR && *p == '_') { + dir = parse_longname(parent, p, &name_len); if (IS_ERR(dir)) return PTR_ERR(dir); - iname.name++; /* skip initial '_' */ + p++; /* skip initial '_' */ } - iname.len = name_len; - if (!fscrypt_has_encryption_key(dir)) { - memcpy(buf, d_name->name, d_name->len); - elen = d_name->len; + if (!fscrypt_has_encryption_key(dir)) goto out; - } /* * Convert cleartext d_name to ciphertext. If result is longer than @@ -297,7 +280,7 @@ int ceph_encode_encrypted_dname(struct inode *parent, struct qstr *d_name, * * See: fscrypt_setup_filename */ - if (!fscrypt_fname_encrypted_size(dir, iname.len, NAME_MAX, &len)) { + if (!fscrypt_fname_encrypted_size(dir, name_len, NAME_MAX, &len)) { elen = -ENAMETOOLONG; goto out; } @@ -310,7 +293,9 @@ int ceph_encode_encrypted_dname(struct inode *parent, struct qstr *d_name, goto out; } - ret = fscrypt_fname_encrypt(dir, &iname, cryptbuf, len); + ret = fscrypt_fname_encrypt(dir, + &(struct qstr)QSTR_INIT(p, name_len), + cryptbuf, len); if (ret) { elen = ret; goto out; @@ -331,18 +316,13 @@ int ceph_encode_encrypted_dname(struct inode *parent, struct qstr *d_name, } /* base64 encode the encrypted name */ - elen = ceph_base64_encode(cryptbuf, len, buf); - doutc(cl, "base64-encoded ciphertext name = %.*s\n", elen, buf); + elen = ceph_base64_encode(cryptbuf, len, p); + doutc(cl, "base64-encoded ciphertext name = %.*s\n", elen, p); /* To understand the 240 limit, see CEPH_NOHASH_NAME_MAX comments */ WARN_ON(elen > 240); - if ((elen > 0) && (dir != parent)) { - char tmp_buf[NAME_MAX]; - - elen = snprintf(tmp_buf, sizeof(tmp_buf), "_%.*s_%ld", - elen, buf, dir->i_ino); - memcpy(buf, tmp_buf, elen); - } + if (dir != parent) // leading _ is already there; append _<inum> + elen += 1 + sprintf(p + elen, "_%ld", dir->i_ino); out: kfree(cryptbuf); @@ -355,14 +335,6 @@ out: return elen; } -int ceph_encode_encrypted_fname(struct inode *parent, struct dentry *dentry, - char *buf) -{ - WARN_ON_ONCE(!fscrypt_has_encryption_key(parent)); - - return ceph_encode_encrypted_dname(parent, &dentry->d_name, buf); -} - /** * ceph_fname_to_usr - convert a filename for userland presentation * @fname: ceph_fname to be converted diff --git a/fs/ceph/crypto.h b/fs/ceph/crypto.h index d0768239a1c9..f752bbb2eb06 100644 --- a/fs/ceph/crypto.h +++ b/fs/ceph/crypto.h @@ -102,10 +102,7 @@ int ceph_fscrypt_prepare_context(struct inode *dir, struct inode *inode, struct ceph_acl_sec_ctx *as); void ceph_fscrypt_as_ctx_to_req(struct ceph_mds_request *req, struct ceph_acl_sec_ctx *as); -int ceph_encode_encrypted_dname(struct inode *parent, struct qstr *d_name, - char *buf); -int ceph_encode_encrypted_fname(struct inode *parent, struct dentry *dentry, - char *buf); +int ceph_encode_encrypted_dname(struct inode *parent, char *buf, int len); static inline int ceph_fname_alloc_buffer(struct inode *parent, struct fscrypt_str *fname) @@ -194,17 +191,10 @@ static inline void ceph_fscrypt_as_ctx_to_req(struct ceph_mds_request *req, { } -static inline int ceph_encode_encrypted_dname(struct inode *parent, - struct qstr *d_name, char *buf) +static inline int ceph_encode_encrypted_dname(struct inode *parent, char *buf, + int len) { - memcpy(buf, d_name->name, d_name->len); - return d_name->len; -} - -static inline int ceph_encode_encrypted_fname(struct inode *parent, - struct dentry *dentry, char *buf) -{ - return -EOPNOTSUPP; + return len; } static inline int ceph_fname_alloc_buffer(struct inode *parent, diff --git a/fs/ceph/dir.c b/fs/ceph/dir.c index a321aa6d0ed2..8478e7e75df6 100644 --- a/fs/ceph/dir.c +++ b/fs/ceph/dir.c @@ -423,17 +423,16 @@ more: req->r_inode_drop = CEPH_CAP_FILE_EXCL; } if (dfi->last_name) { - struct qstr d_name = { .name = dfi->last_name, - .len = strlen(dfi->last_name) }; + int len = strlen(dfi->last_name); req->r_path2 = kzalloc(NAME_MAX + 1, GFP_KERNEL); if (!req->r_path2) { ceph_mdsc_put_request(req); return -ENOMEM; } + memcpy(req->r_path2, dfi->last_name, len); - err = ceph_encode_encrypted_dname(inode, &d_name, - req->r_path2); + err = ceph_encode_encrypted_dname(inode, req->r_path2, len); if (err < 0) { ceph_mdsc_put_request(req); return err; diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c index 230e0c3f341f..0f497c39ff82 100644 --- a/fs/ceph/mds_client.c +++ b/fs/ceph/mds_client.c @@ -2766,8 +2766,8 @@ retry: } if (fscrypt_has_encryption_key(d_inode(parent))) { - len = ceph_encode_encrypted_fname(d_inode(parent), - cur, buf); + len = ceph_encode_encrypted_dname(d_inode(parent), + buf, len); if (len < 0) { dput(parent); dput(cur); diff --git a/fs/ceph/super.c b/fs/ceph/super.c index 2b8438d8a324..c3eb651862c5 100644 --- a/fs/ceph/super.c +++ b/fs/ceph/super.c @@ -1219,7 +1219,7 @@ static int ceph_set_super(struct super_block *s, struct fs_context *fc) fsc->max_file_size = 1ULL << 40; /* temp value until we get mdsmap */ s->s_op = &ceph_super_ops; - s->s_d_op = &ceph_dentry_ops; + set_default_d_op(s, &ceph_dentry_ops); s->s_export_op = &ceph_export_ops; s->s_time_gran = 1; diff --git a/fs/coda/inode.c b/fs/coda/inode.c index 6896fce122e1..08450d006016 100644 --- a/fs/coda/inode.c +++ b/fs/coda/inode.c @@ -230,7 +230,7 @@ static int coda_fill_super(struct super_block *sb, struct fs_context *fc) sb->s_blocksize_bits = 12; sb->s_magic = CODA_SUPER_MAGIC; sb->s_op = &coda_super_operations; - sb->s_d_op = &coda_dentry_operations; + set_default_d_op(sb, &coda_dentry_operations); sb->s_time_gran = 1; sb->s_time_min = S64_MIN; sb->s_time_max = S64_MAX; diff --git a/fs/configfs/dir.c b/fs/configfs/dir.c index ebf32822e29b..f327fbb9a0ca 100644 --- a/fs/configfs/dir.c +++ b/fs/configfs/dir.c @@ -67,7 +67,6 @@ static void configfs_d_iput(struct dentry * dentry, const struct dentry_operations configfs_dentry_ops = { .d_iput = configfs_d_iput, - .d_delete = always_delete_dentry, }; #ifdef CONFIG_LOCKDEP diff --git a/fs/configfs/mount.c b/fs/configfs/mount.c index c2d820063ec4..740f18b60c9d 100644 --- a/fs/configfs/mount.c +++ b/fs/configfs/mount.c @@ -92,7 +92,8 @@ static int configfs_fill_super(struct super_block *sb, struct fs_context *fc) configfs_root_group.cg_item.ci_dentry = root; root->d_fsdata = &configfs_root; sb->s_root = root; - sb->s_d_op = &configfs_dentry_ops; /* the rest get that */ + set_default_d_op(sb, &configfs_dentry_ops); /* the rest get that */ + sb->s_d_flags |= DCACHE_DONTCACHE; return 0; } diff --git a/fs/dcache.c b/fs/dcache.c index 03d58b2d4fa3..2adac023ba23 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -1436,7 +1436,7 @@ int d_set_mounted(struct dentry *dentry) { struct dentry *p; int ret = -ENOENT; - write_seqlock(&rename_lock); + read_seqlock_excl(&rename_lock); for (p = dentry->d_parent; !IS_ROOT(p); p = p->d_parent) { /* Need exclusion wrt. d_invalidate() */ spin_lock(&p->d_lock); @@ -1456,7 +1456,7 @@ int d_set_mounted(struct dentry *dentry) } spin_unlock(&dentry->d_lock); out: - write_sequnlock(&rename_lock); + read_sequnlock_excl(&rename_lock); return ret; } @@ -1731,14 +1731,14 @@ static struct dentry *__d_alloc(struct super_block *sb, const struct qstr *name) dentry->d_inode = NULL; dentry->d_parent = dentry; dentry->d_sb = sb; - dentry->d_op = NULL; + dentry->d_op = sb->__s_d_op; + dentry->d_flags = sb->s_d_flags; dentry->d_fsdata = NULL; INIT_HLIST_BL_NODE(&dentry->d_hash); INIT_LIST_HEAD(&dentry->d_lru); INIT_HLIST_HEAD(&dentry->d_children); INIT_HLIST_NODE(&dentry->d_u.d_alias); INIT_HLIST_NODE(&dentry->d_sib); - d_set_d_op(dentry, dentry->d_sb->s_d_op); if (dentry->d_op && dentry->d_op->d_init) { err = dentry->d_op->d_init(dentry); @@ -1821,8 +1821,9 @@ struct dentry *d_alloc_pseudo(struct super_block *sb, const struct qstr *name) struct dentry *dentry = __d_alloc(sb, name); if (likely(dentry)) { dentry->d_flags |= DCACHE_NORCU; - if (!sb->s_d_op) - d_set_d_op(dentry, &anon_ops); + /* d_op_flags(&anon_ops) is 0 */ + if (!dentry->d_op) + dentry->d_op = &anon_ops; } return dentry; } @@ -1837,35 +1838,50 @@ struct dentry *d_alloc_name(struct dentry *parent, const char *name) } EXPORT_SYMBOL(d_alloc_name); -void d_set_d_op(struct dentry *dentry, const struct dentry_operations *op) +#define DCACHE_OP_FLAGS \ + (DCACHE_OP_HASH | DCACHE_OP_COMPARE | DCACHE_OP_REVALIDATE | \ + DCACHE_OP_WEAK_REVALIDATE | DCACHE_OP_DELETE | DCACHE_OP_PRUNE | \ + DCACHE_OP_REAL) + +static unsigned int d_op_flags(const struct dentry_operations *op) { + unsigned int flags = 0; + if (op) { + if (op->d_hash) + flags |= DCACHE_OP_HASH; + if (op->d_compare) + flags |= DCACHE_OP_COMPARE; + if (op->d_revalidate) + flags |= DCACHE_OP_REVALIDATE; + if (op->d_weak_revalidate) + flags |= DCACHE_OP_WEAK_REVALIDATE; + if (op->d_delete) + flags |= DCACHE_OP_DELETE; + if (op->d_prune) + flags |= DCACHE_OP_PRUNE; + if (op->d_real) + flags |= DCACHE_OP_REAL; + } + return flags; +} + +static void d_set_d_op(struct dentry *dentry, const struct dentry_operations *op) +{ + unsigned int flags = d_op_flags(op); WARN_ON_ONCE(dentry->d_op); - WARN_ON_ONCE(dentry->d_flags & (DCACHE_OP_HASH | - DCACHE_OP_COMPARE | - DCACHE_OP_REVALIDATE | - DCACHE_OP_WEAK_REVALIDATE | - DCACHE_OP_DELETE | - DCACHE_OP_REAL)); + WARN_ON_ONCE(dentry->d_flags & DCACHE_OP_FLAGS); dentry->d_op = op; - if (!op) - return; - if (op->d_hash) - dentry->d_flags |= DCACHE_OP_HASH; - if (op->d_compare) - dentry->d_flags |= DCACHE_OP_COMPARE; - if (op->d_revalidate) - dentry->d_flags |= DCACHE_OP_REVALIDATE; - if (op->d_weak_revalidate) - dentry->d_flags |= DCACHE_OP_WEAK_REVALIDATE; - if (op->d_delete) - dentry->d_flags |= DCACHE_OP_DELETE; - if (op->d_prune) - dentry->d_flags |= DCACHE_OP_PRUNE; - if (op->d_real) - dentry->d_flags |= DCACHE_OP_REAL; - -} -EXPORT_SYMBOL(d_set_d_op); + if (flags) + dentry->d_flags |= flags; +} + +void set_default_d_op(struct super_block *s, const struct dentry_operations *ops) +{ + unsigned int flags = d_op_flags(ops); + s->__s_d_op = ops; + s->s_d_flags = (s->s_d_flags & ~DCACHE_OP_FLAGS) | flags; +} +EXPORT_SYMBOL(set_default_d_op); static unsigned d_flags_for_inode(struct inode *inode) { @@ -2530,13 +2546,19 @@ struct dentry *d_alloc_parallel(struct dentry *parent, unsigned int hash = name->hash; struct hlist_bl_head *b = in_lookup_hash(parent, hash); struct hlist_bl_node *node; - struct dentry *new = d_alloc(parent, name); + struct dentry *new = __d_alloc(parent->d_sb, name); struct dentry *dentry; unsigned seq, r_seq, d_seq; if (unlikely(!new)) return ERR_PTR(-ENOMEM); + new->d_flags |= DCACHE_PAR_LOOKUP; + spin_lock(&parent->d_lock); + new->d_parent = dget_dlock(parent); + hlist_add_head(&new->d_sib, &parent->d_children); + spin_unlock(&parent->d_lock); + retry: rcu_read_lock(); seq = smp_load_acquire(&parent->d_inode->i_dir_seq); @@ -2620,8 +2642,6 @@ retry: return dentry; } rcu_read_unlock(); - /* we can't take ->d_lock here; it's OK, though. */ - new->d_flags |= DCACHE_PAR_LOOKUP; new->d_wait = wq; hlist_bl_add_head(&new->d_u.d_in_lookup_hash, b); hlist_bl_unlock(b); @@ -2667,7 +2687,8 @@ EXPORT_SYMBOL(__d_lookup_unhash_wake); /* inode->i_lock held if inode is non-NULL */ -static inline void __d_add(struct dentry *dentry, struct inode *inode) +static inline void __d_add(struct dentry *dentry, struct inode *inode, + const struct dentry_operations *ops) { wait_queue_head_t *d_wait; struct inode *dir = NULL; @@ -2678,6 +2699,8 @@ static inline void __d_add(struct dentry *dentry, struct inode *inode) n = start_dir_add(dir); d_wait = __d_lookup_unhash(dentry); } + if (unlikely(ops)) + d_set_d_op(dentry, ops); if (inode) { unsigned add_flags = d_flags_for_inode(inode); hlist_add_head(&dentry->d_u.d_alias, &inode->i_dentry); @@ -2709,7 +2732,7 @@ void d_add(struct dentry *entry, struct inode *inode) security_d_instantiate(entry, inode); spin_lock(&inode->i_lock); } - __d_add(entry, inode); + __d_add(entry, inode, NULL); } EXPORT_SYMBOL(d_add); @@ -2961,30 +2984,8 @@ out_err: return ret; } -/** - * d_splice_alias - splice a disconnected dentry into the tree if one exists - * @inode: the inode which may have a disconnected dentry - * @dentry: a negative dentry which we want to point to the inode. - * - * If inode is a directory and has an IS_ROOT alias, then d_move that in - * place of the given dentry and return it, else simply d_add the inode - * to the dentry and return NULL. - * - * If a non-IS_ROOT directory is found, the filesystem is corrupt, and - * we should error out: directories can't have multiple aliases. - * - * This is needed in the lookup routine of any filesystem that is exportable - * (via knfsd) so that we can build dcache paths to directories effectively. - * - * If a dentry was found and moved, then it is returned. Otherwise NULL - * is returned. This matches the expected return value of ->lookup. - * - * Cluster filesystems may call this function with a negative, hashed dentry. - * In that case, we know that the inode will be a regular file, and also this - * will only occur during atomic_open. So we need to check for the dentry - * being already hashed only in the final case. - */ -struct dentry *d_splice_alias(struct inode *inode, struct dentry *dentry) +struct dentry *d_splice_alias_ops(struct inode *inode, struct dentry *dentry, + const struct dentry_operations *ops) { if (IS_ERR(inode)) return ERR_CAST(inode); @@ -3030,9 +3031,37 @@ struct dentry *d_splice_alias(struct inode *inode, struct dentry *dentry) } } out: - __d_add(dentry, inode); + __d_add(dentry, inode, ops); return NULL; } + +/** + * d_splice_alias - splice a disconnected dentry into the tree if one exists + * @inode: the inode which may have a disconnected dentry + * @dentry: a negative dentry which we want to point to the inode. + * + * If inode is a directory and has an IS_ROOT alias, then d_move that in + * place of the given dentry and return it, else simply d_add the inode + * to the dentry and return NULL. + * + * If a non-IS_ROOT directory is found, the filesystem is corrupt, and + * we should error out: directories can't have multiple aliases. + * + * This is needed in the lookup routine of any filesystem that is exportable + * (via knfsd) so that we can build dcache paths to directories effectively. + * + * If a dentry was found and moved, then it is returned. Otherwise NULL + * is returned. This matches the expected return value of ->lookup. + * + * Cluster filesystems may call this function with a negative, hashed dentry. + * In that case, we know that the inode will be a regular file, and also this + * will only occur during atomic_open. So we need to check for the dentry + * being already hashed only in the final case. + */ +struct dentry *d_splice_alias(struct inode *inode, struct dentry *dentry) +{ + return d_splice_alias_ops(inode, dentry, NULL); +} EXPORT_SYMBOL(d_splice_alias); /* diff --git a/fs/debugfs/inode.c b/fs/debugfs/inode.c index 30c4944e1862..6677991c7e4b 100644 --- a/fs/debugfs/inode.c +++ b/fs/debugfs/inode.c @@ -258,7 +258,6 @@ static struct vfsmount *debugfs_automount(struct path *path) } static const struct dentry_operations debugfs_dops = { - .d_delete = always_delete_dentry, .d_release = debugfs_release_dentry, .d_automount = debugfs_automount, }; @@ -273,7 +272,8 @@ static int debugfs_fill_super(struct super_block *sb, struct fs_context *fc) return err; sb->s_op = &debugfs_super_operations; - sb->s_d_op = &debugfs_dops; + set_default_d_op(sb, &debugfs_dops); + sb->s_d_flags |= DCACHE_DONTCACHE; debugfs_apply_options(sb); @@ -384,27 +384,12 @@ static struct dentry *start_creating(const char *name, struct dentry *parent) if (!parent) parent = debugfs_mount->mnt_root; - inode_lock(d_inode(parent)); - if (unlikely(IS_DEADDIR(d_inode(parent)))) - dentry = ERR_PTR(-ENOENT); - else - dentry = lookup_noperm(&QSTR(name), parent); - if (!IS_ERR(dentry) && d_really_is_positive(dentry)) { - if (d_is_dir(dentry)) - pr_err("Directory '%s' with parent '%s' already present!\n", - name, parent->d_name.name); - else - pr_err("File '%s' in directory '%s' already present!\n", - name, parent->d_name.name); - dput(dentry); - dentry = ERR_PTR(-EEXIST); - } - + dentry = simple_start_creating(parent, name); if (IS_ERR(dentry)) { - inode_unlock(d_inode(parent)); + if (dentry == ERR_PTR(-EEXIST)) + pr_err("'%s' already exists in '%pd'\n", name, parent); simple_release_fs(&debugfs_mount, &debugfs_mount_count); } - return dentry; } diff --git a/fs/devpts/inode.c b/fs/devpts/inode.c index 9c20d78e41f6..fdf22264a8e9 100644 --- a/fs/devpts/inode.c +++ b/fs/devpts/inode.c @@ -381,7 +381,7 @@ static int devpts_fill_super(struct super_block *s, struct fs_context *fc) s->s_blocksize_bits = 10; s->s_magic = DEVPTS_SUPER_MAGIC; s->s_op = &devpts_sops; - s->s_d_op = &simple_dentry_operations; + s->s_d_flags = DCACHE_DONTCACHE; s->s_time_gran = 1; fsi->sb = s; diff --git a/fs/ecryptfs/main.c b/fs/ecryptfs/main.c index 8dd1d7189c3b..45f9ca4465da 100644 --- a/fs/ecryptfs/main.c +++ b/fs/ecryptfs/main.c @@ -471,7 +471,7 @@ static int ecryptfs_get_tree(struct fs_context *fc) sbi = NULL; s->s_op = &ecryptfs_sops; s->s_xattr = ecryptfs_xattr_handlers; - s->s_d_op = &ecryptfs_dops; + set_default_d_op(s, &ecryptfs_dops); err = "Reading sb failed"; rc = kern_path(fc->source, LOOKUP_FOLLOW | LOOKUP_DIRECTORY, &path); diff --git a/fs/efivarfs/super.c b/fs/efivarfs/super.c index c900d98bf494..c4a139911356 100644 --- a/fs/efivarfs/super.c +++ b/fs/efivarfs/super.c @@ -183,7 +183,6 @@ static int efivarfs_d_hash(const struct dentry *dentry, struct qstr *qstr) static const struct dentry_operations efivarfs_d_ops = { .d_compare = efivarfs_d_compare, .d_hash = efivarfs_d_hash, - .d_delete = always_delete_dentry, }; static struct dentry *efivarfs_alloc_dentry(struct dentry *parent, char *name) @@ -350,7 +349,8 @@ static int efivarfs_fill_super(struct super_block *sb, struct fs_context *fc) sb->s_blocksize_bits = PAGE_SHIFT; sb->s_magic = EFIVARFS_MAGIC; sb->s_op = &efivarfs_ops; - sb->s_d_op = &efivarfs_d_ops; + set_default_d_op(sb, &efivarfs_d_ops); + sb->s_d_flags |= DCACHE_DONTCACHE; sb->s_time_gran = 1; if (!efivar_supports_writes()) @@ -390,10 +390,16 @@ static int efivarfs_reconfigure(struct fs_context *fc) return 0; } +static void efivarfs_free(struct fs_context *fc) +{ + kfree(fc->s_fs_info); +} + static const struct fs_context_operations efivarfs_context_ops = { .get_tree = efivarfs_get_tree, .parse_param = efivarfs_parse_param, .reconfigure = efivarfs_reconfigure, + .free = efivarfs_free, }; static int efivarfs_check_missing(efi_char16_t *name16, efi_guid_t vendor, diff --git a/fs/erofs/Kconfig b/fs/erofs/Kconfig index 6beeb7063871..7b26efc271ee 100644 --- a/fs/erofs/Kconfig +++ b/fs/erofs/Kconfig @@ -147,6 +147,8 @@ config EROFS_FS_ZIP_ZSTD config EROFS_FS_ZIP_ACCEL bool "EROFS hardware decompression support" depends on EROFS_FS_ZIP + select CRYPTO + select CRYPTO_DEFLATE help Saying Y here includes hardware accelerator support for reading EROFS file systems containing compressed data. It gives better diff --git a/fs/erofs/data.c b/fs/erofs/data.c index 6a329c329f43..f46c47335b9c 100644 --- a/fs/erofs/data.c +++ b/fs/erofs/data.c @@ -49,11 +49,18 @@ void *erofs_bread(struct erofs_buf *buf, erofs_off_t offset, bool need_kmap) return buf->base + (offset & ~PAGE_MASK); } -void erofs_init_metabuf(struct erofs_buf *buf, struct super_block *sb) +int erofs_init_metabuf(struct erofs_buf *buf, struct super_block *sb, + bool in_metabox) { struct erofs_sb_info *sbi = EROFS_SB(sb); buf->file = NULL; + if (in_metabox) { + if (unlikely(!sbi->metabox_inode)) + return -EFSCORRUPTED; + buf->mapping = sbi->metabox_inode->i_mapping; + return 0; + } buf->off = sbi->dif0.fsoff; if (erofs_is_fileio_mode(sbi)) { buf->file = sbi->dif0.file; /* some fs like FUSE needs it */ @@ -62,13 +69,18 @@ void erofs_init_metabuf(struct erofs_buf *buf, struct super_block *sb) buf->mapping = sbi->dif0.fscache->inode->i_mapping; else buf->mapping = sb->s_bdev->bd_mapping; + return 0; } void *erofs_read_metabuf(struct erofs_buf *buf, struct super_block *sb, - erofs_off_t offset, bool need_kmap) + erofs_off_t offset, bool in_metabox) { - erofs_init_metabuf(buf, sb); - return erofs_bread(buf, offset, need_kmap); + int err; + + err = erofs_init_metabuf(buf, sb, in_metabox); + if (err) + return ERR_PTR(err); + return erofs_bread(buf, offset, true); } int erofs_map_blocks(struct inode *inode, struct erofs_map_blocks *map) @@ -118,7 +130,7 @@ int erofs_map_blocks(struct inode *inode, struct erofs_map_blocks *map) pos = ALIGN(erofs_iloc(inode) + vi->inode_isize + vi->xattr_isize, unit) + unit * chunknr; - idx = erofs_read_metabuf(&buf, sb, pos, true); + idx = erofs_read_metabuf(&buf, sb, pos, erofs_inode_in_metabox(inode)); if (IS_ERR(idx)) { err = PTR_ERR(idx); goto out; @@ -214,9 +226,11 @@ int erofs_map_dev(struct super_block *sb, struct erofs_map_dev *map) /* * bit 30: I/O error occurred on this folio + * bit 29: CPU has dirty data in D-cache (needs aliasing handling); * bit 0 - 29: remaining parts to complete this folio */ -#define EROFS_ONLINEFOLIO_EIO (1 << 30) +#define EROFS_ONLINEFOLIO_EIO 30 +#define EROFS_ONLINEFOLIO_DIRTY 29 void erofs_onlinefolio_init(struct folio *folio) { @@ -233,19 +247,23 @@ void erofs_onlinefolio_split(struct folio *folio) atomic_inc((atomic_t *)&folio->private); } -void erofs_onlinefolio_end(struct folio *folio, int err) +void erofs_onlinefolio_end(struct folio *folio, int err, bool dirty) { int orig, v; do { orig = atomic_read((atomic_t *)&folio->private); - v = (orig - 1) | (err ? EROFS_ONLINEFOLIO_EIO : 0); + DBG_BUGON(orig <= 0); + v = dirty << EROFS_ONLINEFOLIO_DIRTY; + v |= (orig - 1) | (!!err << EROFS_ONLINEFOLIO_EIO); } while (atomic_cmpxchg((atomic_t *)&folio->private, orig, v) != orig); - if (v & ~EROFS_ONLINEFOLIO_EIO) + if (v & (BIT(EROFS_ONLINEFOLIO_DIRTY) - 1)) return; folio->private = 0; - folio_end_read(folio, !(v & EROFS_ONLINEFOLIO_EIO)); + if (v & BIT(EROFS_ONLINEFOLIO_DIRTY)) + flush_dcache_folio(folio); + folio_end_read(folio, !(v & BIT(EROFS_ONLINEFOLIO_EIO))); } static int erofs_iomap_begin(struct inode *inode, loff_t offset, loff_t length, @@ -258,51 +276,51 @@ static int erofs_iomap_begin(struct inode *inode, loff_t offset, loff_t length, map.m_la = offset; map.m_llen = length; - ret = erofs_map_blocks(inode, &map); if (ret < 0) return ret; - mdev = (struct erofs_map_dev) { - .m_deviceid = map.m_deviceid, - .m_pa = map.m_pa, - }; - ret = erofs_map_dev(sb, &mdev); - if (ret) - return ret; - iomap->offset = map.m_la; - if (flags & IOMAP_DAX) - iomap->dax_dev = mdev.m_dif->dax_dev; - else - iomap->bdev = mdev.m_bdev; iomap->length = map.m_llen; iomap->flags = 0; iomap->private = NULL; - + iomap->addr = IOMAP_NULL_ADDR; if (!(map.m_flags & EROFS_MAP_MAPPED)) { iomap->type = IOMAP_HOLE; - iomap->addr = IOMAP_NULL_ADDR; - if (!iomap->length) - iomap->length = length; return 0; } + if (!(map.m_flags & EROFS_MAP_META) || !erofs_inode_in_metabox(inode)) { + mdev = (struct erofs_map_dev) { + .m_deviceid = map.m_deviceid, + .m_pa = map.m_pa, + }; + ret = erofs_map_dev(sb, &mdev); + if (ret) + return ret; + + if (flags & IOMAP_DAX) + iomap->dax_dev = mdev.m_dif->dax_dev; + else + iomap->bdev = mdev.m_bdev; + iomap->addr = mdev.m_dif->fsoff + mdev.m_pa; + if (flags & IOMAP_DAX) + iomap->addr += mdev.m_dif->dax_part_off; + } + if (map.m_flags & EROFS_MAP_META) { void *ptr; struct erofs_buf buf = __EROFS_BUF_INITIALIZER; iomap->type = IOMAP_INLINE; - ptr = erofs_read_metabuf(&buf, sb, mdev.m_pa, true); + ptr = erofs_read_metabuf(&buf, sb, map.m_pa, + erofs_inode_in_metabox(inode)); if (IS_ERR(ptr)) return PTR_ERR(ptr); iomap->inline_data = ptr; iomap->private = buf.base; } else { iomap->type = IOMAP_MAPPED; - iomap->addr = mdev.m_dif->fsoff + mdev.m_pa; - if (flags & IOMAP_DAX) - iomap->addr += mdev.m_dif->dax_part_off; } return 0; } @@ -351,11 +369,16 @@ int erofs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, */ static int erofs_read_folio(struct file *file, struct folio *folio) { + trace_erofs_read_folio(folio, true); + return iomap_read_folio(folio, &erofs_iomap_ops); } static void erofs_readahead(struct readahead_control *rac) { + trace_erofs_readahead(rac->mapping->host, readahead_index(rac), + readahead_count(rac), true); + return iomap_readahead(rac, &erofs_iomap_ops); } diff --git a/fs/erofs/decompressor.c b/fs/erofs/decompressor.c index bf62e2836b60..354762c9723f 100644 --- a/fs/erofs/decompressor.c +++ b/fs/erofs/decompressor.c @@ -301,13 +301,11 @@ static int z_erofs_transform_plain(struct z_erofs_decompress_req *rq, cur = min(cur, rq->outputsize); if (cur && rq->out[0]) { kin = kmap_local_page(rq->in[nrpages_in - 1]); - if (rq->out[0] == rq->in[nrpages_in - 1]) { + if (rq->out[0] == rq->in[nrpages_in - 1]) memmove(kin + rq->pageofs_out, kin + pi, cur); - flush_dcache_page(rq->out[0]); - } else { + else memcpy_to_page(rq->out[0], rq->pageofs_out, kin + pi, cur); - } kunmap_local(kin); } rq->outputsize -= cur; @@ -325,14 +323,12 @@ static int z_erofs_transform_plain(struct z_erofs_decompress_req *rq, po = (rq->pageofs_out + cur + pi) & ~PAGE_MASK; DBG_BUGON(no >= nrpages_out); cnt = min(insz - pi, PAGE_SIZE - po); - if (rq->out[no] == rq->in[ni]) { + if (rq->out[no] == rq->in[ni]) memmove(kin + po, kin + rq->pageofs_in + pi, cnt); - flush_dcache_page(rq->out[no]); - } else if (rq->out[no]) { + else if (rq->out[no]) memcpy_to_page(rq->out[no], po, kin + rq->pageofs_in + pi, cnt); - } pi += cnt; } while (pi < insz); kunmap_local(kin); @@ -471,7 +467,7 @@ int z_erofs_parse_cfgs(struct super_block *sb, struct erofs_super_block *dsb) return -EOPNOTSUPP; } - erofs_init_metabuf(&buf, sb); + (void)erofs_init_metabuf(&buf, sb, false); offset = EROFS_SUPER_OFFSET + sbi->sb_size; alg = 0; for (algs = sbi->available_compr_algs; algs; algs >>= 1, ++alg) { diff --git a/fs/erofs/dir.c b/fs/erofs/dir.c index 2fae209d0274..debf469ad6bd 100644 --- a/fs/erofs/dir.c +++ b/fs/erofs/dir.c @@ -34,7 +34,8 @@ static int erofs_fill_dentries(struct inode *dir, struct dir_context *ctx, } if (!dir_emit(ctx, de_name, de_namelen, - le64_to_cpu(de->nid), d_type)) + erofs_nid_to_ino64(EROFS_SB(dir->i_sb), + le64_to_cpu(de->nid)), d_type)) return 1; ++de; ctx->pos += sizeof(struct erofs_dirent); @@ -47,8 +48,12 @@ static int erofs_readdir(struct file *f, struct dir_context *ctx) struct inode *dir = file_inode(f); struct erofs_buf buf = __EROFS_BUF_INITIALIZER; struct super_block *sb = dir->i_sb; + struct file_ra_state *ra = &f->f_ra; unsigned long bsz = sb->s_blocksize; unsigned int ofs = erofs_blkoff(sb, ctx->pos); + pgoff_t ra_pages = DIV_ROUND_UP_POW2( + EROFS_I_SB(dir)->dir_ra_bytes, PAGE_SIZE); + pgoff_t nr_pages = DIV_ROUND_UP_POW2(dir->i_size, PAGE_SIZE); int err = 0; bool initial = true; @@ -58,6 +63,21 @@ static int erofs_readdir(struct file *f, struct dir_context *ctx) struct erofs_dirent *de; unsigned int nameoff, maxsize; + if (fatal_signal_pending(current)) { + err = -ERESTARTSYS; + break; + } + + /* readahead blocks to enhance performance for large directories */ + if (ra_pages) { + pgoff_t idx = DIV_ROUND_UP_POW2(ctx->pos, PAGE_SIZE); + pgoff_t pages = min(nr_pages - idx, ra_pages); + + if (pages > 1 && !ra_has_index(ra, idx)) + page_cache_sync_readahead(dir->i_mapping, ra, + f, idx, pages); + } + de = erofs_bread(&buf, dbstart, true); if (IS_ERR(de)) { erofs_err(sb, "failed to readdir of logical block %llu of nid %llu", @@ -88,6 +108,7 @@ static int erofs_readdir(struct file *f, struct dir_context *ctx) break; ctx->pos = dbstart + maxsize; ofs = 0; + cond_resched(); } erofs_put_metabuf(&buf); if (EROFS_I(dir)->dot_omitted && ctx->pos == dir->i_size) { diff --git a/fs/erofs/erofs_fs.h b/fs/erofs/erofs_fs.h index 767fb4acdc93..377ee12b8b96 100644 --- a/fs/erofs/erofs_fs.h +++ b/fs/erofs/erofs_fs.h @@ -15,6 +15,7 @@ #define EROFS_FEATURE_COMPAT_SB_CHKSUM 0x00000001 #define EROFS_FEATURE_COMPAT_MTIME 0x00000002 #define EROFS_FEATURE_COMPAT_XATTR_FILTER 0x00000004 +#define EROFS_FEATURE_COMPAT_SHARED_EA_IN_METABOX 0x00000008 /* * Any bits that aren't in EROFS_ALL_FEATURE_INCOMPAT should @@ -31,8 +32,9 @@ #define EROFS_FEATURE_INCOMPAT_DEDUPE 0x00000020 #define EROFS_FEATURE_INCOMPAT_XATTR_PREFIXES 0x00000040 #define EROFS_FEATURE_INCOMPAT_48BIT 0x00000080 +#define EROFS_FEATURE_INCOMPAT_METABOX 0x00000100 #define EROFS_ALL_FEATURE_INCOMPAT \ - ((EROFS_FEATURE_INCOMPAT_48BIT << 1) - 1) + ((EROFS_FEATURE_INCOMPAT_METABOX << 1) - 1) #define EROFS_SB_EXTSLOT_SIZE 16 @@ -46,7 +48,7 @@ struct erofs_deviceslot { }; #define EROFS_DEVT_SLOT_SIZE sizeof(struct erofs_deviceslot) -/* erofs on-disk super block (currently 128 bytes) */ +/* erofs on-disk super block (currently 144 bytes at maximum) */ struct erofs_super_block { __le32 magic; /* file system magic number */ __le32 checksum; /* crc32c to avoid unexpected on-disk overlap */ @@ -82,7 +84,9 @@ struct erofs_super_block { __u8 reserved[3]; __le32 build_time; /* seconds added to epoch for mkfs time */ __le64 rootnid_8b; /* (48BIT on) nid of root directory */ - __u8 reserved2[8]; + __le64 reserved2; + __le64 metabox_nid; /* (METABOX on) nid of the metabox inode */ + __le64 reserved3; /* [align to extslot 1] */ }; /* @@ -267,6 +271,9 @@ struct erofs_inode_chunk_index { __le32 startblk_lo; /* starting block number of this chunk */ }; +#define EROFS_DIRENT_NID_METABOX_BIT 63 +#define EROFS_DIRENT_NID_MASK (BIT_ULL(EROFS_DIRENT_NID_METABOX_BIT) - 1) + /* dirent sorts in alphabet order, thus we can do binary search */ struct erofs_dirent { __le64 nid; /* node number */ @@ -434,7 +441,7 @@ static inline void erofs_check_ondisk_layout_definitions(void) .h_clusterbits = 1 << Z_EROFS_FRAGMENT_INODE_BIT }; - BUILD_BUG_ON(sizeof(struct erofs_super_block) != 128); + BUILD_BUG_ON(sizeof(struct erofs_super_block) != 144); BUILD_BUG_ON(sizeof(struct erofs_inode_compact) != 32); BUILD_BUG_ON(sizeof(struct erofs_inode_extended) != 64); BUILD_BUG_ON(sizeof(struct erofs_xattr_ibody_header) != 12); diff --git a/fs/erofs/fileio.c b/fs/erofs/fileio.c index df5cc63f2c01..b7b3432a9882 100644 --- a/fs/erofs/fileio.c +++ b/fs/erofs/fileio.c @@ -38,7 +38,7 @@ static void erofs_fileio_ki_complete(struct kiocb *iocb, long ret) } else { bio_for_each_folio_all(fi, &rq->bio) { DBG_BUGON(folio_test_uptodate(fi.folio)); - erofs_onlinefolio_end(fi.folio, ret); + erofs_onlinefolio_end(fi.folio, ret, false); } } bio_uninit(&rq->bio); @@ -96,8 +96,6 @@ static int erofs_fileio_scan_folio(struct erofs_fileio *io, struct folio *folio) struct erofs_map_blocks *map = &io->map; unsigned int cur = 0, end = folio_size(folio), len, attached = 0; loff_t pos = folio_pos(folio), ofs; - struct iov_iter iter; - struct bio_vec bv; int err = 0; erofs_onlinefolio_init(folio); @@ -117,18 +115,12 @@ static int erofs_fileio_scan_folio(struct erofs_fileio *io, struct folio *folio) void *src; src = erofs_read_metabuf(&buf, inode->i_sb, - map->m_pa + ofs, true); + map->m_pa + ofs, erofs_inode_in_metabox(inode)); if (IS_ERR(src)) { err = PTR_ERR(src); break; } - bvec_set_folio(&bv, folio, len, cur); - iov_iter_bvec(&iter, ITER_DEST, &bv, 1, len); - if (copy_to_iter(src, len, &iter) != len) { - erofs_put_metabuf(&buf); - err = -EIO; - break; - } + memcpy_to_folio(folio, cur, src, len); erofs_put_metabuf(&buf); } else if (!(map->m_flags & EROFS_MAP_MAPPED)) { folio_zero_segment(folio, cur, cur + len); @@ -162,7 +154,7 @@ io_retry: } cur += len; } - erofs_onlinefolio_end(folio, err); + erofs_onlinefolio_end(folio, err, false); return err; } diff --git a/fs/erofs/fscache.c b/fs/erofs/fscache.c index 34517ca9df91..362acf828279 100644 --- a/fs/erofs/fscache.c +++ b/fs/erofs/fscache.c @@ -274,7 +274,8 @@ static int erofs_fscache_data_read_slice(struct erofs_fscache_rq *req) size_t size = map.m_llen; void *src; - src = erofs_read_metabuf(&buf, sb, map.m_pa, true); + src = erofs_read_metabuf(&buf, sb, map.m_pa, + erofs_inode_in_metabox(inode)); if (IS_ERR(src)) return PTR_ERR(src); diff --git a/fs/erofs/inode.c b/fs/erofs/inode.c index a0ae0b4f7b01..9a2f59721522 100644 --- a/fs/erofs/inode.c +++ b/fs/erofs/inode.c @@ -29,6 +29,7 @@ static int erofs_read_inode(struct inode *inode) struct super_block *sb = inode->i_sb; erofs_blk_t blkaddr = erofs_blknr(sb, erofs_iloc(inode)); unsigned int ofs = erofs_blkoff(sb, erofs_iloc(inode)); + bool in_mbox = erofs_inode_in_metabox(inode); struct erofs_buf buf = __EROFS_BUF_INITIALIZER; struct erofs_sb_info *sbi = EROFS_SB(sb); erofs_blk_t addrmask = BIT_ULL(48) - 1; @@ -39,10 +40,10 @@ static int erofs_read_inode(struct inode *inode) void *ptr; int err = 0; - ptr = erofs_read_metabuf(&buf, sb, erofs_pos(sb, blkaddr), true); + ptr = erofs_read_metabuf(&buf, sb, erofs_pos(sb, blkaddr), in_mbox); if (IS_ERR(ptr)) { err = PTR_ERR(ptr); - erofs_err(sb, "failed to get inode (nid: %llu) page, err %d", + erofs_err(sb, "failed to read inode meta block (nid: %llu): %d", vi->nid, err); goto err_out; } @@ -78,10 +79,10 @@ static int erofs_read_inode(struct inode *inode) memcpy(&copied, dic, gotten); ptr = erofs_read_metabuf(&buf, sb, - erofs_pos(sb, blkaddr + 1), true); + erofs_pos(sb, blkaddr + 1), in_mbox); if (IS_ERR(ptr)) { err = PTR_ERR(ptr); - erofs_err(sb, "failed to get inode payload block (nid: %llu), err %d", + erofs_err(sb, "failed to read inode payload block (nid: %llu): %d", vi->nid, err); goto err_out; } @@ -264,13 +265,13 @@ static int erofs_fill_inode(struct inode *inode) * ino_t is 32-bits on 32-bit arch. We have to squash the 64-bit value down * so that it will fit. */ -static ino_t erofs_squash_ino(erofs_nid_t nid) +static ino_t erofs_squash_ino(struct super_block *sb, erofs_nid_t nid) { - ino_t ino = (ino_t)nid; + u64 ino64 = erofs_nid_to_ino64(EROFS_SB(sb), nid); if (sizeof(ino_t) < sizeof(erofs_nid_t)) - ino ^= nid >> (sizeof(erofs_nid_t) - sizeof(ino_t)) * 8; - return ino; + ino64 ^= ino64 >> (sizeof(erofs_nid_t) - sizeof(ino_t)) * 8; + return (ino_t)ino64; } static int erofs_iget5_eq(struct inode *inode, void *opaque) @@ -282,7 +283,7 @@ static int erofs_iget5_set(struct inode *inode, void *opaque) { const erofs_nid_t nid = *(erofs_nid_t *)opaque; - inode->i_ino = erofs_squash_ino(nid); + inode->i_ino = erofs_squash_ino(inode->i_sb, nid); EROFS_I(inode)->nid = nid; return 0; } @@ -291,7 +292,7 @@ struct inode *erofs_iget(struct super_block *sb, erofs_nid_t nid) { struct inode *inode; - inode = iget5_locked(sb, erofs_squash_ino(nid), erofs_iget5_eq, + inode = iget5_locked(sb, erofs_squash_ino(sb, nid), erofs_iget5_eq, erofs_iget5_set, &nid); if (!inode) return ERR_PTR(-ENOMEM); diff --git a/fs/erofs/internal.h b/fs/erofs/internal.h index a32c03a80c70..4ccc5f0ee8df 100644 --- a/fs/erofs/internal.h +++ b/fs/erofs/internal.h @@ -125,6 +125,7 @@ struct erofs_sb_info { struct erofs_sb_lz4_info lz4; #endif /* CONFIG_EROFS_FS_ZIP */ struct inode *packed_inode; + struct inode *metabox_inode; struct erofs_dev_context *devs; u64 total_blocks; @@ -148,6 +149,7 @@ struct erofs_sb_info { /* what we really care is nid, rather than ino.. */ erofs_nid_t root_nid; erofs_nid_t packed_nid; + erofs_nid_t metabox_nid; /* used for statfs, f_files - f_favail */ u64 inos; @@ -157,6 +159,7 @@ struct erofs_sb_info { /* sysfs support */ struct kobject s_kobj; /* /sys/fs/erofs/<devname> */ struct completion s_kobj_unregister; + erofs_off_t dir_ra_bytes; /* fscache support */ struct fscache_volume *volume; @@ -227,8 +230,27 @@ EROFS_FEATURE_FUNCS(fragments, incompat, INCOMPAT_FRAGMENTS) EROFS_FEATURE_FUNCS(dedupe, incompat, INCOMPAT_DEDUPE) EROFS_FEATURE_FUNCS(xattr_prefixes, incompat, INCOMPAT_XATTR_PREFIXES) EROFS_FEATURE_FUNCS(48bit, incompat, INCOMPAT_48BIT) +EROFS_FEATURE_FUNCS(metabox, incompat, INCOMPAT_METABOX) EROFS_FEATURE_FUNCS(sb_chksum, compat, COMPAT_SB_CHKSUM) EROFS_FEATURE_FUNCS(xattr_filter, compat, COMPAT_XATTR_FILTER) +EROFS_FEATURE_FUNCS(shared_ea_in_metabox, compat, COMPAT_SHARED_EA_IN_METABOX) + +static inline u64 erofs_nid_to_ino64(struct erofs_sb_info *sbi, erofs_nid_t nid) +{ + if (!erofs_sb_has_metabox(sbi)) + return nid; + + /* + * When metadata compression is enabled, avoid generating excessively + * large inode numbers for metadata-compressed inodes. Shift NIDs in + * the 31-62 bit range left by one and move the metabox flag to bit 31. + * + * Note: on-disk NIDs remain unchanged as they are primarily used for + * compatibility with non-LFS 32-bit applications. + */ + return ((nid << 1) & GENMASK_ULL(63, 32)) | (nid & GENMASK(30, 0)) | + ((nid >> EROFS_DIRENT_NID_METABOX_BIT) << 31); +} /* atomic flag definitions */ #define EROFS_I_EA_INITED_BIT 0 @@ -238,6 +260,9 @@ EROFS_FEATURE_FUNCS(xattr_filter, compat, COMPAT_XATTR_FILTER) #define EROFS_I_BL_XATTR_BIT (BITS_PER_LONG - 1) #define EROFS_I_BL_Z_BIT (BITS_PER_LONG - 2) +/* default readahead size of directories */ +#define EROFS_DIR_RA_BYTES 16384 + struct erofs_inode { erofs_nid_t nid; @@ -279,12 +304,20 @@ struct erofs_inode { #define EROFS_I(ptr) container_of(ptr, struct erofs_inode, vfs_inode) +static inline bool erofs_inode_in_metabox(struct inode *inode) +{ + return EROFS_I(inode)->nid & BIT_ULL(EROFS_DIRENT_NID_METABOX_BIT); +} + static inline erofs_off_t erofs_iloc(struct inode *inode) { struct erofs_sb_info *sbi = EROFS_I_SB(inode); + erofs_nid_t nid_lo = EROFS_I(inode)->nid & EROFS_DIRENT_NID_MASK; + if (erofs_inode_in_metabox(inode)) + return nid_lo << sbi->islotbits; return erofs_pos(inode->i_sb, sbi->meta_blkaddr) + - (EROFS_I(inode)->nid << sbi->islotbits); + (nid_lo << sbi->islotbits); } static inline unsigned int erofs_inode_version(unsigned int ifmt) @@ -315,10 +348,12 @@ static inline struct folio *erofs_grab_folio_nowait(struct address_space *as, /* The length of extent is full */ #define EROFS_MAP_FULL_MAPPED 0x0008 /* Located in the special packed inode */ -#define EROFS_MAP_FRAGMENT 0x0010 +#define __EROFS_MAP_FRAGMENT 0x0010 /* The extent refers to partial decompressed data */ #define EROFS_MAP_PARTIAL_REF 0x0020 +#define EROFS_MAP_FRAGMENT (EROFS_MAP_MAPPED | __EROFS_MAP_FRAGMENT) + struct erofs_map_blocks { struct erofs_buf buf; @@ -381,16 +416,17 @@ void *erofs_read_metadata(struct super_block *sb, struct erofs_buf *buf, void erofs_unmap_metabuf(struct erofs_buf *buf); void erofs_put_metabuf(struct erofs_buf *buf); void *erofs_bread(struct erofs_buf *buf, erofs_off_t offset, bool need_kmap); -void erofs_init_metabuf(struct erofs_buf *buf, struct super_block *sb); +int erofs_init_metabuf(struct erofs_buf *buf, struct super_block *sb, + bool in_metabox); void *erofs_read_metabuf(struct erofs_buf *buf, struct super_block *sb, - erofs_off_t offset, bool need_kmap); + erofs_off_t offset, bool in_metabox); int erofs_map_dev(struct super_block *sb, struct erofs_map_dev *dev); int erofs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, u64 start, u64 len); int erofs_map_blocks(struct inode *inode, struct erofs_map_blocks *map); void erofs_onlinefolio_init(struct folio *folio); void erofs_onlinefolio_split(struct folio *folio); -void erofs_onlinefolio_end(struct folio *folio, int err); +void erofs_onlinefolio_end(struct folio *folio, int err, bool dirty); struct inode *erofs_iget(struct super_block *sb, erofs_nid_t nid); int erofs_getattr(struct mnt_idmap *idmap, const struct path *path, struct kstat *stat, u32 request_mask, diff --git a/fs/erofs/super.c b/fs/erofs/super.c index e1e9f06e8342..e1020aa60771 100644 --- a/fs/erofs/super.c +++ b/fs/erofs/super.c @@ -141,7 +141,7 @@ static int erofs_init_device(struct erofs_buf *buf, struct super_block *sb, struct erofs_deviceslot *dis; struct file *file; - dis = erofs_read_metabuf(buf, sb, *pos, true); + dis = erofs_read_metabuf(buf, sb, *pos, false); if (IS_ERR(dis)) return PTR_ERR(dis); @@ -258,7 +258,7 @@ static int erofs_read_superblock(struct super_block *sb) void *data; int ret; - data = erofs_read_metabuf(&buf, sb, 0, true); + data = erofs_read_metabuf(&buf, sb, 0, false); if (IS_ERR(data)) { erofs_err(sb, "cannot read erofs superblock"); return PTR_ERR(data); @@ -319,6 +319,14 @@ static int erofs_read_superblock(struct super_block *sb) sbi->root_nid = le16_to_cpu(dsb->rb.rootnid_2b); } sbi->packed_nid = le64_to_cpu(dsb->packed_nid); + if (erofs_sb_has_metabox(sbi)) { + if (sbi->sb_size <= offsetof(struct erofs_super_block, + metabox_nid)) + return -EFSCORRUPTED; + sbi->metabox_nid = le64_to_cpu(dsb->metabox_nid); + if (sbi->metabox_nid & BIT_ULL(EROFS_DIRENT_NID_METABOX_BIT)) + return -EFSCORRUPTED; /* self-loop detection */ + } sbi->inos = le64_to_cpu(dsb->inos); sbi->epoch = (s64)le64_to_cpu(dsb->epoch); @@ -335,6 +343,8 @@ static int erofs_read_superblock(struct super_block *sb) if (erofs_sb_has_48bit(sbi)) erofs_info(sb, "EXPERIMENTAL 48-bit layout support in use. Use at your own risk!"); + if (erofs_sb_has_metabox(sbi)) + erofs_info(sb, "EXPERIMENTAL metadata compression support in use. Use at your own risk!"); if (erofs_is_fscache_mode(sb)) erofs_info(sb, "[deprecated] fscache-based on-demand read feature in use. Use at your own risk!"); out: @@ -690,6 +700,12 @@ static int erofs_fc_fill_super(struct super_block *sb, struct fs_context *fc) return PTR_ERR(inode); sbi->packed_inode = inode; } + if (erofs_sb_has_metabox(sbi)) { + inode = erofs_iget(sb, sbi->metabox_nid); + if (IS_ERR(inode)) + return PTR_ERR(inode); + sbi->metabox_inode = inode; + } inode = erofs_iget(sb, sbi->root_nid); if (IS_ERR(inode)) @@ -715,6 +731,7 @@ static int erofs_fc_fill_super(struct super_block *sb, struct fs_context *fc) if (err) return err; + sbi->dir_ra_bytes = EROFS_DIR_RA_BYTES; erofs_info(sb, "mounted with root inode @ nid %llu.", sbi->root_nid); return 0; } @@ -845,6 +862,8 @@ static void erofs_drop_internal_inodes(struct erofs_sb_info *sbi) { iput(sbi->packed_inode); sbi->packed_inode = NULL; + iput(sbi->metabox_inode); + sbi->metabox_inode = NULL; #ifdef CONFIG_EROFS_FS_ZIP iput(sbi->managed_cache); sbi->managed_cache = NULL; diff --git a/fs/erofs/sysfs.c b/fs/erofs/sysfs.c index eed8797a193f..1e0658a1d95b 100644 --- a/fs/erofs/sysfs.c +++ b/fs/erofs/sysfs.c @@ -65,12 +65,14 @@ EROFS_ATTR_FUNC(drop_caches, 0200); #ifdef CONFIG_EROFS_FS_ZIP_ACCEL EROFS_ATTR_FUNC(accel, 0644); #endif +EROFS_ATTR_RW_UI(dir_ra_bytes, erofs_sb_info); static struct attribute *erofs_sb_attrs[] = { #ifdef CONFIG_EROFS_FS_ZIP ATTR_LIST(sync_decompress), ATTR_LIST(drop_caches), #endif + ATTR_LIST(dir_ra_bytes), NULL, }; ATTRIBUTE_GROUPS(erofs_sb); @@ -95,6 +97,7 @@ EROFS_ATTR_FEATURE(ztailpacking); EROFS_ATTR_FEATURE(fragments); EROFS_ATTR_FEATURE(dedupe); EROFS_ATTR_FEATURE(48bit); +EROFS_ATTR_FEATURE(metabox); static struct attribute *erofs_feat_attrs[] = { ATTR_LIST(zero_padding), @@ -108,6 +111,7 @@ static struct attribute *erofs_feat_attrs[] = { ATTR_LIST(fragments), ATTR_LIST(dedupe), ATTR_LIST(48bit), + ATTR_LIST(metabox), NULL, }; ATTRIBUTE_GROUPS(erofs_feat); diff --git a/fs/erofs/xattr.c b/fs/erofs/xattr.c index 9cf84717a92e..eaa9efd766ee 100644 --- a/fs/erofs/xattr.c +++ b/fs/erofs/xattr.c @@ -72,12 +72,14 @@ static int erofs_init_inode_xattrs(struct inode *inode) ret = -EFSCORRUPTED; goto out_unlock; /* xattr ondisk layout error */ } - ret = -ENOATTR; + ret = -ENODATA; goto out_unlock; } it.buf = __EROFS_BUF_INITIALIZER; - erofs_init_metabuf(&it.buf, sb); + ret = erofs_init_metabuf(&it.buf, sb, erofs_inode_in_metabox(inode)); + if (ret) + goto out_unlock; it.pos = erofs_iloc(inode) + vi->inode_isize; /* read in shared xattr array (non-atomic, see kmalloc below) */ @@ -266,20 +268,20 @@ static int erofs_getxattr_foreach(struct erofs_xattr_iter *it) (entry.e_name_index & EROFS_XATTR_LONG_PREFIX_MASK); if (pf >= sbi->xattr_prefixes + sbi->xattr_prefix_count) - return -ENOATTR; + return -ENODATA; if (it->index != pf->prefix->base_index || it->name.len != entry.e_name_len + pf->infix_len) - return -ENOATTR; + return -ENODATA; if (memcmp(it->name.name, pf->prefix->infix, pf->infix_len)) - return -ENOATTR; + return -ENODATA; it->infix_len = pf->infix_len; } else { if (it->index != entry.e_name_index || it->name.len != entry.e_name_len) - return -ENOATTR; + return -ENODATA; it->infix_len = 0; } @@ -295,7 +297,7 @@ static int erofs_getxattr_foreach(struct erofs_xattr_iter *it) entry.e_name_len - processed); if (memcmp(it->name.name + it->infix_len + processed, it->kaddr, slice)) - return -ENOATTR; + return -ENODATA; it->pos += slice; } @@ -323,9 +325,12 @@ static int erofs_xattr_iter_inline(struct erofs_xattr_iter *it, sizeof(u32) * vi->xattr_shared_count; if (xattr_header_sz >= vi->xattr_isize) { DBG_BUGON(xattr_header_sz > vi->xattr_isize); - return -ENOATTR; + return -ENODATA; } + ret = erofs_init_metabuf(&it->buf, it->sb, erofs_inode_in_metabox(inode)); + if (ret) + return ret; remaining = vi->xattr_isize - xattr_header_sz; it->pos = erofs_iloc(inode) + vi->inode_isize + xattr_header_sz; @@ -347,7 +352,7 @@ static int erofs_xattr_iter_inline(struct erofs_xattr_iter *it, ret = erofs_getxattr_foreach(it); else ret = erofs_listxattr_foreach(it); - if ((getxattr && ret != -ENOATTR) || (!getxattr && ret)) + if ((getxattr && ret != -ENODATA) || (!getxattr && ret)) break; it->pos = next_pos; @@ -361,12 +366,17 @@ static int erofs_xattr_iter_shared(struct erofs_xattr_iter *it, struct erofs_inode *const vi = EROFS_I(inode); struct super_block *const sb = it->sb; struct erofs_sb_info *sbi = EROFS_SB(sb); - unsigned int i; - int ret = -ENOATTR; + unsigned int i = 0; + int ret; - for (i = 0; i < vi->xattr_shared_count; ++i) { + ret = erofs_init_metabuf(&it->buf, sb, + erofs_sb_has_shared_ea_in_metabox(sbi)); + if (ret) + return ret; + + while (i < vi->xattr_shared_count) { it->pos = erofs_pos(sb, sbi->xattr_blkaddr) + - vi->xattr_shared_xattrs[i] * sizeof(__le32); + vi->xattr_shared_xattrs[i++] * sizeof(__le32); it->kaddr = erofs_bread(&it->buf, it->pos, true); if (IS_ERR(it->kaddr)) return PTR_ERR(it->kaddr); @@ -375,10 +385,10 @@ static int erofs_xattr_iter_shared(struct erofs_xattr_iter *it, ret = erofs_getxattr_foreach(it); else ret = erofs_listxattr_foreach(it); - if ((getxattr && ret != -ENOATTR) || (!getxattr && ret)) + if ((getxattr && ret != -ENODATA) || (!getxattr && ret)) break; } - return ret; + return i ? ret : -ENODATA; } int erofs_getxattr(struct inode *inode, int index, const char *name, @@ -403,7 +413,7 @@ int erofs_getxattr(struct inode *inode, int index, const char *name, EROFS_XATTR_FILTER_SEED + index); hashbit &= EROFS_XATTR_FILTER_BITS - 1; if (vi->xattr_name_filter & (1U << hashbit)) - return -ENOATTR; + return -ENODATA; } it.index = index; @@ -413,13 +423,12 @@ int erofs_getxattr(struct inode *inode, int index, const char *name, it.sb = inode->i_sb; it.buf = __EROFS_BUF_INITIALIZER; - erofs_init_metabuf(&it.buf, it.sb); it.buffer = buffer; it.buffer_size = buffer_size; it.buffer_ofs = 0; ret = erofs_xattr_iter_inline(&it, inode, true); - if (ret == -ENOATTR) + if (ret == -ENODATA) ret = erofs_xattr_iter_shared(&it, inode, true); erofs_put_metabuf(&it.buf); return ret ? ret : it.buffer_ofs; @@ -432,23 +441,22 @@ ssize_t erofs_listxattr(struct dentry *dentry, char *buffer, size_t buffer_size) struct inode *inode = d_inode(dentry); ret = erofs_init_inode_xattrs(inode); - if (ret == -ENOATTR) + if (ret == -ENODATA) return 0; if (ret) return ret; it.sb = dentry->d_sb; it.buf = __EROFS_BUF_INITIALIZER; - erofs_init_metabuf(&it.buf, it.sb); it.dentry = dentry; it.buffer = buffer; it.buffer_size = buffer_size; it.buffer_ofs = 0; ret = erofs_xattr_iter_inline(&it, inode, false); - if (!ret || ret == -ENOATTR) + if (!ret || ret == -ENODATA) ret = erofs_xattr_iter_shared(&it, inode, false); - if (ret == -ENOATTR) + if (ret == -ENODATA) ret = 0; erofs_put_metabuf(&it.buf); return ret ? ret : it.buffer_ofs; @@ -485,7 +493,7 @@ int erofs_xattr_prefixes_init(struct super_block *sb) if (sbi->packed_inode) buf.mapping = sbi->packed_inode->i_mapping; else - erofs_init_metabuf(&buf, sb); + (void)erofs_init_metabuf(&buf, sb, false); for (i = 0; i < sbi->xattr_prefix_count; i++) { void *ptr = erofs_read_metadata(sb, &buf, &pos, &len); @@ -539,7 +547,7 @@ struct posix_acl *erofs_get_acl(struct inode *inode, int type, bool rcu) rc = erofs_getxattr(inode, prefix, "", value, rc); } - if (rc == -ENOATTR) + if (rc == -ENODATA) acl = NULL; else if (rc < 0) acl = ERR_PTR(rc); diff --git a/fs/erofs/xattr.h b/fs/erofs/xattr.h index b246cd0e135e..6317caa8413e 100644 --- a/fs/erofs/xattr.h +++ b/fs/erofs/xattr.h @@ -10,9 +10,6 @@ #include <linux/posix_acl_xattr.h> #include <linux/xattr.h> -/* Attribute not found */ -#define ENOATTR ENODATA - #ifdef CONFIG_EROFS_FS_XATTR extern const struct xattr_handler erofs_xattr_user_handler; extern const struct xattr_handler erofs_xattr_trusted_handler; diff --git a/fs/erofs/zdata.c b/fs/erofs/zdata.c index fe8071844724..792f20888a8f 100644 --- a/fs/erofs/zdata.c +++ b/fs/erofs/zdata.c @@ -805,6 +805,7 @@ static int z_erofs_pcluster_begin(struct z_erofs_frontend *fe) struct erofs_map_blocks *map = &fe->map; struct super_block *sb = fe->inode->i_sb; struct z_erofs_pcluster *pcl = NULL; + void *ptr; int ret; DBG_BUGON(fe->pcl); @@ -854,15 +855,17 @@ static int z_erofs_pcluster_begin(struct z_erofs_frontend *fe) /* bind cache first when cached decompression is preferred */ z_erofs_bind_cache(fe); } else { - void *mptr; - - mptr = erofs_read_metabuf(&map->buf, sb, map->m_pa, false); - if (IS_ERR(mptr)) { - ret = PTR_ERR(mptr); - erofs_err(sb, "failed to get inline data %d", ret); + ret = erofs_init_metabuf(&map->buf, sb, + erofs_inode_in_metabox(fe->inode)); + if (ret) + return ret; + ptr = erofs_bread(&map->buf, map->m_pa, false); + if (IS_ERR(ptr)) { + ret = PTR_ERR(ptr); + erofs_err(sb, "failed to get inline folio %d", ret); return ret; } - get_page(map->buf.page); + folio_get(page_folio(map->buf.page)); WRITE_ONCE(fe->pcl->compressed_bvecs[0].page, map->buf.page); fe->pcl->pageofs_in = map->m_pa & ~PAGE_MASK; fe->mode = Z_EROFS_PCLUSTER_FOLLOWED_NOINPLACE; @@ -1034,7 +1037,7 @@ static int z_erofs_scan_folio(struct z_erofs_frontend *f, if (!(map->m_flags & EROFS_MAP_MAPPED)) { folio_zero_segment(folio, cur, end); tight = false; - } else if (map->m_flags & EROFS_MAP_FRAGMENT) { + } else if (map->m_flags & __EROFS_MAP_FRAGMENT) { erofs_off_t fpos = offset + cur - map->m_la; err = z_erofs_read_fragment(inode->i_sb, folio, cur, @@ -1091,7 +1094,7 @@ static int z_erofs_scan_folio(struct z_erofs_frontend *f, tight = (bs == PAGE_SIZE); } } while ((end = cur) > 0); - erofs_onlinefolio_end(folio, err); + erofs_onlinefolio_end(folio, err, false); return err; } @@ -1196,7 +1199,7 @@ static void z_erofs_fill_other_copies(struct z_erofs_backend *be, int err) cur += len; } kunmap_local(dst); - erofs_onlinefolio_end(page_folio(bvi->bvec.page), err); + erofs_onlinefolio_end(page_folio(bvi->bvec.page), err, true); list_del(p); kfree(bvi); } @@ -1325,9 +1328,8 @@ static int z_erofs_decompress_pcluster(struct z_erofs_backend *be, int err) /* must handle all compressed pages before actual file pages */ if (pcl->from_meta) { - page = pcl->compressed_bvecs[0].page; + folio_put(page_folio(pcl->compressed_bvecs[0].page)); WRITE_ONCE(pcl->compressed_bvecs[0].page, NULL); - put_page(page); } else { /* managed folios are still left in compressed_bvecs[] */ for (i = 0; i < pclusterpages; ++i) { @@ -1355,7 +1357,7 @@ static int z_erofs_decompress_pcluster(struct z_erofs_backend *be, int err) DBG_BUGON(z_erofs_page_is_invalidated(page)); if (!z_erofs_is_shortlived_page(page)) { - erofs_onlinefolio_end(page_folio(page), err); + erofs_onlinefolio_end(page_folio(page), err, true); continue; } if (pcl->algorithmformat != Z_EROFS_COMPRESSION_LZ4) { diff --git a/fs/erofs/zmap.c b/fs/erofs/zmap.c index 0bebc6e3a4d7..a93efd95c555 100644 --- a/fs/erofs/zmap.c +++ b/fs/erofs/zmap.c @@ -17,7 +17,7 @@ struct z_erofs_maprecorder { u16 delta[2]; erofs_blk_t pblk, compressedblks; erofs_off_t nextpackoff; - bool partialref; + bool partialref, in_mbox; }; static int z_erofs_load_full_lcluster(struct z_erofs_maprecorder *m, @@ -31,7 +31,7 @@ static int z_erofs_load_full_lcluster(struct z_erofs_maprecorder *m, struct z_erofs_lcluster_index *di; unsigned int advise; - di = erofs_read_metabuf(&m->map->buf, inode->i_sb, pos, true); + di = erofs_read_metabuf(&m->map->buf, inode->i_sb, pos, m->in_mbox); if (IS_ERR(di)) return PTR_ERR(di); m->lcn = lcn; @@ -146,7 +146,7 @@ static int z_erofs_load_compact_lcluster(struct z_erofs_maprecorder *m, else return -EOPNOTSUPP; - in = erofs_read_metabuf(&m->map->buf, m->inode->i_sb, pos, true); + in = erofs_read_metabuf(&m->map->buf, inode->i_sb, pos, m->in_mbox); if (IS_ERR(in)) return PTR_ERR(in); @@ -240,6 +240,13 @@ static int z_erofs_load_compact_lcluster(struct z_erofs_maprecorder *m, static int z_erofs_load_lcluster_from_disk(struct z_erofs_maprecorder *m, unsigned int lcn, bool lookahead) { + if (m->type >= Z_EROFS_LCLUSTER_TYPE_MAX) { + erofs_err(m->inode->i_sb, "unknown type %u @ lcn %u of nid %llu", + m->type, lcn, EROFS_I(m->inode)->nid); + DBG_BUGON(1); + return -EOPNOTSUPP; + } + switch (EROFS_I(m->inode)->datalayout) { case EROFS_INODE_COMPRESSED_FULL: return z_erofs_load_full_lcluster(m, lcn); @@ -265,12 +272,7 @@ static int z_erofs_extent_lookback(struct z_erofs_maprecorder *m, if (err) return err; - if (m->type >= Z_EROFS_LCLUSTER_TYPE_MAX) { - erofs_err(sb, "unknown type %u @ lcn %lu of nid %llu", - m->type, lcn, vi->nid); - DBG_BUGON(1); - return -EOPNOTSUPP; - } else if (m->type == Z_EROFS_LCLUSTER_TYPE_NONHEAD) { + if (m->type == Z_EROFS_LCLUSTER_TYPE_NONHEAD) { lookback_distance = m->delta[0]; if (!lookback_distance) break; @@ -325,25 +327,18 @@ static int z_erofs_get_extent_compressedlen(struct z_erofs_maprecorder *m, DBG_BUGON(lcn == initial_lcn && m->type == Z_EROFS_LCLUSTER_TYPE_NONHEAD); - if (m->type == Z_EROFS_LCLUSTER_TYPE_NONHEAD) { - if (m->delta[0] != 1) { - erofs_err(sb, "bogus CBLKCNT @ lcn %lu of nid %llu", lcn, vi->nid); - DBG_BUGON(1); - return -EFSCORRUPTED; - } - if (m->compressedblks) - goto out; - } else if (m->type < Z_EROFS_LCLUSTER_TYPE_MAX) { - /* - * if the 1st NONHEAD lcluster is actually PLAIN or HEAD type - * rather than CBLKCNT, it's a 1 block-sized pcluster. - */ - m->compressedblks = 1; - goto out; + if (m->type == Z_EROFS_LCLUSTER_TYPE_NONHEAD && m->delta[0] != 1) { + erofs_err(sb, "bogus CBLKCNT @ lcn %lu of nid %llu", lcn, vi->nid); + DBG_BUGON(1); + return -EFSCORRUPTED; } - erofs_err(sb, "cannot found CBLKCNT @ lcn %lu of nid %llu", lcn, vi->nid); - DBG_BUGON(1); - return -EFSCORRUPTED; + + /* + * if the 1st NONHEAD lcluster is actually PLAIN or HEAD type rather + * than CBLKCNT, it's a 1 block-sized pcluster. + */ + if (m->type != Z_EROFS_LCLUSTER_TYPE_NONHEAD || !m->compressedblks) + m->compressedblks = 1; out: m->map->m_plen = erofs_pos(sb, m->compressedblks); return 0; @@ -379,11 +374,6 @@ static int z_erofs_get_extent_decompressedlen(struct z_erofs_maprecorder *m) if (lcn != headlcn) break; /* ends at the next HEAD lcluster */ m->delta[1] = 1; - } else { - erofs_err(inode->i_sb, "unknown type %u @ lcn %llu of nid %llu", - m->type, lcn, vi->nid); - DBG_BUGON(1); - return -EOPNOTSUPP; } lcn += m->delta[1]; } @@ -402,6 +392,7 @@ static int z_erofs_map_blocks_fo(struct inode *inode, struct z_erofs_maprecorder m = { .inode = inode, .map = map, + .in_mbox = erofs_inode_in_metabox(inode), }; int err = 0; unsigned int endoff, afmt; @@ -413,8 +404,7 @@ static int z_erofs_map_blocks_fo(struct inode *inode, !vi->z_tailextent_headlcn) { map->m_la = 0; map->m_llen = inode->i_size; - map->m_flags = EROFS_MAP_MAPPED | - EROFS_MAP_FULL_MAPPED | EROFS_MAP_FRAGMENT; + map->m_flags = EROFS_MAP_FRAGMENT; return 0; } initial_lcn = ofs >> lclusterbits; @@ -429,44 +419,33 @@ static int z_erofs_map_blocks_fo(struct inode *inode, map->m_flags = EROFS_MAP_MAPPED | EROFS_MAP_ENCODED; end = (m.lcn + 1ULL) << lclusterbits; - switch (m.type) { - case Z_EROFS_LCLUSTER_TYPE_PLAIN: - case Z_EROFS_LCLUSTER_TYPE_HEAD1: - case Z_EROFS_LCLUSTER_TYPE_HEAD2: - if (endoff >= m.clusterofs) { - m.headtype = m.type; - map->m_la = (m.lcn << lclusterbits) | m.clusterofs; - /* - * For ztailpacking files, in order to inline data more - * effectively, special EOF lclusters are now supported - * which can have three parts at most. - */ - if (ztailpacking && end > inode->i_size) - end = inode->i_size; - break; - } - /* m.lcn should be >= 1 if endoff < m.clusterofs */ - if (!m.lcn) { - erofs_err(sb, "invalid logical cluster 0 at nid %llu", - vi->nid); - err = -EFSCORRUPTED; - goto unmap_out; + if (m.type != Z_EROFS_LCLUSTER_TYPE_NONHEAD && endoff >= m.clusterofs) { + m.headtype = m.type; + map->m_la = (m.lcn << lclusterbits) | m.clusterofs; + /* + * For ztailpacking files, in order to inline data more + * effectively, special EOF lclusters are now supported + * which can have three parts at most. + */ + if (ztailpacking && end > inode->i_size) + end = inode->i_size; + } else { + if (m.type != Z_EROFS_LCLUSTER_TYPE_NONHEAD) { + /* m.lcn should be >= 1 if endoff < m.clusterofs */ + if (!m.lcn) { + erofs_err(sb, "invalid logical cluster 0 at nid %llu", + vi->nid); + err = -EFSCORRUPTED; + goto unmap_out; + } + end = (m.lcn << lclusterbits) | m.clusterofs; + map->m_flags |= EROFS_MAP_FULL_MAPPED; + m.delta[0] = 1; } - end = (m.lcn << lclusterbits) | m.clusterofs; - map->m_flags |= EROFS_MAP_FULL_MAPPED; - m.delta[0] = 1; - fallthrough; - case Z_EROFS_LCLUSTER_TYPE_NONHEAD: /* get the corresponding first chunk */ err = z_erofs_extent_lookback(&m, m.delta[0]); if (err) goto unmap_out; - break; - default: - erofs_err(sb, "unknown type %u @ offset %llu of nid %llu", - m.type, ofs, vi->nid); - err = -EOPNOTSUPP; - goto unmap_out; } if (m.partialref) map->m_flags |= EROFS_MAP_PARTIAL_REF; @@ -489,7 +468,7 @@ static int z_erofs_map_blocks_fo(struct inode *inode, goto unmap_out; } } else if (fragment && m.lcn == vi->z_tailextent_headlcn) { - map->m_flags |= EROFS_MAP_FRAGMENT; + map->m_flags = EROFS_MAP_FRAGMENT; } else { map->m_pa = erofs_pos(sb, m.pblk); err = z_erofs_get_extent_compressedlen(&m, initial_lcn); @@ -543,6 +522,7 @@ static int z_erofs_map_blocks_ext(struct inode *inode, unsigned int recsz = z_erofs_extent_recsize(vi->z_advise); erofs_off_t pos = round_up(Z_EROFS_MAP_HEADER_END(erofs_iloc(inode) + vi->inode_isize + vi->xattr_isize), recsz); + bool in_mbox = erofs_inode_in_metabox(inode); erofs_off_t lend = inode->i_size; erofs_off_t l, r, mid, pa, la, lstart; struct z_erofs_extent *ext; @@ -552,7 +532,7 @@ static int z_erofs_map_blocks_ext(struct inode *inode, map->m_flags = 0; if (recsz <= offsetof(struct z_erofs_extent, pstart_hi)) { if (recsz <= offsetof(struct z_erofs_extent, pstart_lo)) { - ext = erofs_read_metabuf(&map->buf, sb, pos, true); + ext = erofs_read_metabuf(&map->buf, sb, pos, in_mbox); if (IS_ERR(ext)) return PTR_ERR(ext); pa = le64_to_cpu(*(__le64 *)ext); @@ -565,7 +545,7 @@ static int z_erofs_map_blocks_ext(struct inode *inode, } for (; lstart <= map->m_la; lstart += 1 << vi->z_lclusterbits) { - ext = erofs_read_metabuf(&map->buf, sb, pos, true); + ext = erofs_read_metabuf(&map->buf, sb, pos, in_mbox); if (IS_ERR(ext)) return PTR_ERR(ext); map->m_plen = le32_to_cpu(ext->plen); @@ -585,7 +565,7 @@ static int z_erofs_map_blocks_ext(struct inode *inode, for (l = 0, r = vi->z_extents; l < r; ) { mid = l + (r - l) / 2; ext = erofs_read_metabuf(&map->buf, sb, - pos + mid * recsz, true); + pos + mid * recsz, in_mbox); if (IS_ERR(ext)) return PTR_ERR(ext); @@ -617,7 +597,7 @@ static int z_erofs_map_blocks_ext(struct inode *inode, if (lstart < lend) { map->m_la = lstart; if (last && (vi->z_advise & Z_EROFS_ADVISE_FRAGMENT_PCLUSTER)) { - map->m_flags |= EROFS_MAP_MAPPED | EROFS_MAP_FRAGMENT; + map->m_flags = EROFS_MAP_FRAGMENT; vi->z_fragmentoff = map->m_plen; if (recsz > offsetof(struct z_erofs_extent, pstart_lo)) vi->z_fragmentoff |= map->m_pa << 32; @@ -642,13 +622,12 @@ static int z_erofs_map_blocks_ext(struct inode *inode, return 0; } -static int z_erofs_fill_inode_lazy(struct inode *inode) +static int z_erofs_fill_inode(struct inode *inode, struct erofs_map_blocks *map) { struct erofs_inode *const vi = EROFS_I(inode); struct super_block *const sb = inode->i_sb; int err, headnr; erofs_off_t pos; - struct erofs_buf buf = __EROFS_BUF_INITIALIZER; struct z_erofs_map_header *h; if (test_bit(EROFS_I_Z_INITED_BIT, &vi->flags)) { @@ -668,7 +647,7 @@ static int z_erofs_fill_inode_lazy(struct inode *inode) goto out_unlock; pos = ALIGN(erofs_iloc(inode) + vi->inode_isize + vi->xattr_isize, 8); - h = erofs_read_metabuf(&buf, sb, pos, true); + h = erofs_read_metabuf(&map->buf, sb, pos, erofs_inode_in_metabox(inode)); if (IS_ERR(h)) { err = PTR_ERR(h); goto out_unlock; @@ -706,7 +685,7 @@ static int z_erofs_fill_inode_lazy(struct inode *inode) erofs_err(sb, "unknown HEAD%u format %u for nid %llu, please upgrade kernel", headnr + 1, vi->z_algorithmtype[headnr], vi->nid); err = -EOPNOTSUPP; - goto out_put_metabuf; + goto out_unlock; } if (!erofs_sb_has_big_pcluster(EROFS_SB(sb)) && @@ -715,7 +694,7 @@ static int z_erofs_fill_inode_lazy(struct inode *inode) erofs_err(sb, "per-inode big pcluster without sb feature for nid %llu", vi->nid); err = -EFSCORRUPTED; - goto out_put_metabuf; + goto out_unlock; } if (vi->datalayout == EROFS_INODE_COMPRESSED_COMPACT && !(vi->z_advise & Z_EROFS_ADVISE_BIG_PCLUSTER_1) ^ @@ -723,27 +702,25 @@ static int z_erofs_fill_inode_lazy(struct inode *inode) erofs_err(sb, "big pcluster head1/2 of compact indexes should be consistent for nid %llu", vi->nid); err = -EFSCORRUPTED; - goto out_put_metabuf; + goto out_unlock; } if (vi->z_idata_size || (vi->z_advise & Z_EROFS_ADVISE_FRAGMENT_PCLUSTER)) { - struct erofs_map_blocks map = { + struct erofs_map_blocks tm = { .buf = __EROFS_BUF_INITIALIZER }; - err = z_erofs_map_blocks_fo(inode, &map, + err = z_erofs_map_blocks_fo(inode, &tm, EROFS_GET_BLOCKS_FINDTAIL); - erofs_put_metabuf(&map.buf); + erofs_put_metabuf(&tm.buf); if (err < 0) - goto out_put_metabuf; + goto out_unlock; } done: /* paired with smp_mb() at the beginning of the function */ smp_mb(); set_bit(EROFS_I_Z_INITED_BIT, &vi->flags); -out_put_metabuf: - erofs_put_metabuf(&buf); out_unlock: clear_and_wake_up_bit(EROFS_I_BL_Z_BIT, &vi->flags); return err; @@ -761,7 +738,7 @@ int z_erofs_map_blocks_iter(struct inode *inode, struct erofs_map_blocks *map, map->m_la = inode->i_size; map->m_flags = 0; } else { - err = z_erofs_fill_inode_lazy(inode); + err = z_erofs_fill_inode(inode, map); if (!err) { if (vi->datalayout == EROFS_INODE_COMPRESSED_FULL && (vi->z_advise & Z_EROFS_ADVISE_EXTENTS)) @@ -797,7 +774,7 @@ static int z_erofs_iomap_begin_report(struct inode *inode, loff_t offset, iomap->length = map.m_llen; if (map.m_flags & EROFS_MAP_MAPPED) { iomap->type = IOMAP_MAPPED; - iomap->addr = map.m_flags & EROFS_MAP_FRAGMENT ? + iomap->addr = map.m_flags & __EROFS_MAP_FRAGMENT ? IOMAP_NULL_ADDR : map.m_pa; } else { iomap->type = IOMAP_HOLE; diff --git a/fs/eventpoll.c b/fs/eventpoll.c index a97a771a459c..0fbf5dfedb24 100644 --- a/fs/eventpoll.c +++ b/fs/eventpoll.c @@ -137,7 +137,13 @@ struct epitem { }; /* List header used to link this structure to the eventpoll ready list */ - struct llist_node rdllink; + struct list_head rdllink; + + /* + * Works together "struct eventpoll"->ovflist in keeping the + * single linked chain of items. + */ + struct epitem *next; /* The file descriptor information this item refers to */ struct epoll_filefd ffd; @@ -185,15 +191,22 @@ struct eventpoll { /* Wait queue used by file->poll() */ wait_queue_head_t poll_wait; - /* - * List of ready file descriptors. Adding to this list is lockless. Items can be removed - * only with eventpoll::mtx - */ - struct llist_head rdllist; + /* List of ready file descriptors */ + struct list_head rdllist; + + /* Lock which protects rdllist and ovflist */ + rwlock_t lock; /* RB tree root used to store monitored fd structs */ struct rb_root_cached rbr; + /* + * This is a single linked list that chains all the "struct epitem" that + * happened while transferring ready events to userspace w/out + * holding ->lock. + */ + struct epitem *ovflist; + /* wakeup_source used when ep_send_events or __ep_eventpoll_poll is running */ struct wakeup_source *ws; @@ -348,14 +361,10 @@ static inline int ep_cmp_ffd(struct epoll_filefd *p1, (p1->file < p2->file ? -1 : p1->fd - p2->fd)); } -/* - * Add the item to its container eventpoll's rdllist; do nothing if the item is already on rdllist. - */ -static void epitem_ready(struct epitem *epi) +/* Tells us if the item is currently linked */ +static inline int ep_is_linked(struct epitem *epi) { - if (&epi->rdllink == cmpxchg(&epi->rdllink.next, &epi->rdllink, NULL)) - llist_add(&epi->rdllink, &epi->ep->rdllist); - + return !list_empty(&epi->rdllink); } static inline struct eppoll_entry *ep_pwq_from_wait(wait_queue_entry_t *p) @@ -374,26 +383,13 @@ static inline struct epitem *ep_item_from_wait(wait_queue_entry_t *p) * * @ep: Pointer to the eventpoll context. * - * Return: true if ready events might be available, false otherwise. + * Return: a value different than %zero if ready events are available, + * or %zero otherwise. */ -static inline bool ep_events_available(struct eventpoll *ep) +static inline int ep_events_available(struct eventpoll *ep) { - bool available; - int locked; - - locked = mutex_trylock(&ep->mtx); - if (!locked) { - /* - * The lock held and someone might have removed all items while inspecting it. The - * llist_empty() check in this case is futile. Assume that something is enqueued and - * let ep_try_send_events() figure it out. - */ - return true; - } - - available = !llist_empty(&ep->rdllist); - mutex_unlock(&ep->mtx); - return available; + return !list_empty_careful(&ep->rdllist) || + READ_ONCE(ep->ovflist) != EP_UNACTIVE_PTR; } #ifdef CONFIG_NET_RX_BUSY_POLL @@ -728,6 +724,77 @@ static inline void ep_pm_stay_awake_rcu(struct epitem *epi) rcu_read_unlock(); } + +/* + * ep->mutex needs to be held because we could be hit by + * eventpoll_release_file() and epoll_ctl(). + */ +static void ep_start_scan(struct eventpoll *ep, struct list_head *txlist) +{ + /* + * Steal the ready list, and re-init the original one to the + * empty list. Also, set ep->ovflist to NULL so that events + * happening while looping w/out locks, are not lost. We cannot + * have the poll callback to queue directly on ep->rdllist, + * because we want the "sproc" callback to be able to do it + * in a lockless way. + */ + lockdep_assert_irqs_enabled(); + write_lock_irq(&ep->lock); + list_splice_init(&ep->rdllist, txlist); + WRITE_ONCE(ep->ovflist, NULL); + write_unlock_irq(&ep->lock); +} + +static void ep_done_scan(struct eventpoll *ep, + struct list_head *txlist) +{ + struct epitem *epi, *nepi; + + write_lock_irq(&ep->lock); + /* + * During the time we spent inside the "sproc" callback, some + * other events might have been queued by the poll callback. + * We re-insert them inside the main ready-list here. + */ + for (nepi = READ_ONCE(ep->ovflist); (epi = nepi) != NULL; + nepi = epi->next, epi->next = EP_UNACTIVE_PTR) { + /* + * We need to check if the item is already in the list. + * During the "sproc" callback execution time, items are + * queued into ->ovflist but the "txlist" might already + * contain them, and the list_splice() below takes care of them. + */ + if (!ep_is_linked(epi)) { + /* + * ->ovflist is LIFO, so we have to reverse it in order + * to keep in FIFO. + */ + list_add(&epi->rdllink, &ep->rdllist); + ep_pm_stay_awake(epi); + } + } + /* + * We need to set back ep->ovflist to EP_UNACTIVE_PTR, so that after + * releasing the lock, events will be queued in the normal way inside + * ep->rdllist. + */ + WRITE_ONCE(ep->ovflist, EP_UNACTIVE_PTR); + + /* + * Quickly re-inject items left on "txlist". + */ + list_splice(txlist, &ep->rdllist); + __pm_relax(ep->ws); + + if (!list_empty(&ep->rdllist)) { + if (waitqueue_active(&ep->wq)) + wake_up(&ep->wq); + } + + write_unlock_irq(&ep->lock); +} + static void ep_get(struct eventpoll *ep) { refcount_inc(&ep->refcount); @@ -765,12 +832,10 @@ static void ep_free(struct eventpoll *ep) static bool __ep_remove(struct eventpoll *ep, struct epitem *epi, bool force) { struct file *file = epi->ffd.file; - struct llist_node *put_back_last; struct epitems_head *to_free; struct hlist_head *head; - LLIST_HEAD(put_back); - lockdep_assert_held(&ep->mtx); + lockdep_assert_irqs_enabled(); /* * Removes poll wait queue hooks. @@ -802,20 +867,10 @@ static bool __ep_remove(struct eventpoll *ep, struct epitem *epi, bool force) rb_erase_cached(&epi->rbn, &ep->rbr); - if (llist_on_list(&epi->rdllink)) { - put_back_last = NULL; - while (true) { - struct llist_node *n = llist_del_first(&ep->rdllist); - - if (&epi->rdllink == n || WARN_ON(!n)) - break; - if (!put_back_last) - put_back_last = n; - __llist_add(n, &put_back); - } - if (put_back_last) - llist_add_batch(put_back.first, put_back_last, &ep->rdllist); - } + write_lock_irq(&ep->lock); + if (ep_is_linked(epi)) + list_del_init(&epi->rdllink); + write_unlock_irq(&ep->lock); wakeup_source_unregister(ep_wakeup_source(epi)); /* @@ -828,7 +883,7 @@ static bool __ep_remove(struct eventpoll *ep, struct epitem *epi, bool force) kfree_rcu(epi, rcu); percpu_counter_dec(&ep->user->epoll_watches); - return ep_refcount_dec_and_test(ep); + return true; } /* @@ -836,14 +891,14 @@ static bool __ep_remove(struct eventpoll *ep, struct epitem *epi, bool force) */ static void ep_remove_safe(struct eventpoll *ep, struct epitem *epi) { - WARN_ON_ONCE(__ep_remove(ep, epi, false)); + if (__ep_remove(ep, epi, false)) + WARN_ON_ONCE(ep_refcount_dec_and_test(ep)); } static void ep_clear_and_put(struct eventpoll *ep) { struct rb_node *rbp, *next; struct epitem *epi; - bool dispose; /* We need to release all tasks waiting for these file */ if (waitqueue_active(&ep->poll_wait)) @@ -876,10 +931,8 @@ static void ep_clear_and_put(struct eventpoll *ep) cond_resched(); } - dispose = ep_refcount_dec_and_test(ep); mutex_unlock(&ep->mtx); - - if (dispose) + if (ep_refcount_dec_and_test(ep)) ep_free(ep); } @@ -919,9 +972,8 @@ static __poll_t ep_item_poll(const struct epitem *epi, poll_table *pt, int depth static __poll_t __ep_eventpoll_poll(struct file *file, poll_table *wait, int depth) { struct eventpoll *ep = file->private_data; - struct wakeup_source *ws; - struct llist_node *n; - struct epitem *epi; + LIST_HEAD(txlist); + struct epitem *epi, *tmp; poll_table pt; __poll_t res = 0; @@ -935,39 +987,22 @@ static __poll_t __ep_eventpoll_poll(struct file *file, poll_table *wait, int dep * the ready list. */ mutex_lock_nested(&ep->mtx, depth); - while (true) { - n = llist_del_first_init(&ep->rdllist); - if (!n) - break; - - epi = llist_entry(n, struct epitem, rdllink); - + ep_start_scan(ep, &txlist); + list_for_each_entry_safe(epi, tmp, &txlist, rdllink) { if (ep_item_poll(epi, &pt, depth + 1)) { res = EPOLLIN | EPOLLRDNORM; - epitem_ready(epi); break; } else { /* - * We need to activate ep before deactivating epi, to prevent autosuspend - * just in case epi becomes active after ep_item_poll() above. - * - * This is similar to ep_send_events(). + * Item has been dropped into the ready list by the poll + * callback, but it's not actually ready, as far as + * caller requested events goes. We can remove it here. */ - ws = ep_wakeup_source(epi); - if (ws) { - if (ws->active) - __pm_stay_awake(ep->ws); - __pm_relax(ws); - } __pm_relax(ep_wakeup_source(epi)); - - /* Just in case epi becomes active right before __pm_relax() */ - if (unlikely(ep_item_poll(epi, &pt, depth + 1))) - ep_pm_stay_awake(epi); - - __pm_relax(ep->ws); + list_del_init(&epi->rdllink); } } + ep_done_scan(ep, &txlist); mutex_unlock(&ep->mtx); return res; } @@ -1100,7 +1135,7 @@ again: dispose = __ep_remove(ep, epi, true); mutex_unlock(&ep->mtx); - if (dispose) + if (dispose && ep_refcount_dec_and_test(ep)) ep_free(ep); goto again; } @@ -1116,10 +1151,12 @@ static int ep_alloc(struct eventpoll **pep) return -ENOMEM; mutex_init(&ep->mtx); + rwlock_init(&ep->lock); init_waitqueue_head(&ep->wq); init_waitqueue_head(&ep->poll_wait); - init_llist_head(&ep->rdllist); + INIT_LIST_HEAD(&ep->rdllist); ep->rbr = RB_ROOT_CACHED; + ep->ovflist = EP_UNACTIVE_PTR; ep->user = get_current_user(); refcount_set(&ep->refcount, 1); @@ -1202,10 +1239,93 @@ struct file *get_epoll_tfile_raw_ptr(struct file *file, int tfd, #endif /* CONFIG_KCMP */ /* + * Adds a new entry to the tail of the list in a lockless way, i.e. + * multiple CPUs are allowed to call this function concurrently. + * + * Beware: it is necessary to prevent any other modifications of the + * existing list until all changes are completed, in other words + * concurrent list_add_tail_lockless() calls should be protected + * with a read lock, where write lock acts as a barrier which + * makes sure all list_add_tail_lockless() calls are fully + * completed. + * + * Also an element can be locklessly added to the list only in one + * direction i.e. either to the tail or to the head, otherwise + * concurrent access will corrupt the list. + * + * Return: %false if element has been already added to the list, %true + * otherwise. + */ +static inline bool list_add_tail_lockless(struct list_head *new, + struct list_head *head) +{ + struct list_head *prev; + + /* + * This is simple 'new->next = head' operation, but cmpxchg() + * is used in order to detect that same element has been just + * added to the list from another CPU: the winner observes + * new->next == new. + */ + if (!try_cmpxchg(&new->next, &new, head)) + return false; + + /* + * Initially ->next of a new element must be updated with the head + * (we are inserting to the tail) and only then pointers are atomically + * exchanged. XCHG guarantees memory ordering, thus ->next should be + * updated before pointers are actually swapped and pointers are + * swapped before prev->next is updated. + */ + + prev = xchg(&head->prev, new); + + /* + * It is safe to modify prev->next and new->prev, because a new element + * is added only to the tail and new->next is updated before XCHG. + */ + + prev->next = new; + new->prev = prev; + + return true; +} + +/* + * Chains a new epi entry to the tail of the ep->ovflist in a lockless way, + * i.e. multiple CPUs are allowed to call this function concurrently. + * + * Return: %false if epi element has been already chained, %true otherwise. + */ +static inline bool chain_epi_lockless(struct epitem *epi) +{ + struct eventpoll *ep = epi->ep; + + /* Fast preliminary check */ + if (epi->next != EP_UNACTIVE_PTR) + return false; + + /* Check that the same epi has not been just chained from another CPU */ + if (cmpxchg(&epi->next, EP_UNACTIVE_PTR, NULL) != EP_UNACTIVE_PTR) + return false; + + /* Atomically exchange tail */ + epi->next = xchg(&ep->ovflist, epi); + + return true; +} + +/* * This is the callback that is passed to the wait queue wakeup * mechanism. It is called by the stored file descriptors when they * have events to report. * + * This callback takes a read lock in order not to contend with concurrent + * events from another file descriptor, thus all modifications to ->rdllist + * or ->ovflist are lockless. Read lock is paired with the write lock from + * ep_start/done_scan(), which stops all list modifications and guarantees + * that lists state is seen correctly. + * * Another thing worth to mention is that ep_poll_callback() can be called * concurrently for the same @epi from different CPUs if poll table was inited * with several wait queues entries. Plural wakeup from different CPUs of a @@ -1215,11 +1335,15 @@ struct file *get_epoll_tfile_raw_ptr(struct file *file, int tfd, */ static int ep_poll_callback(wait_queue_entry_t *wait, unsigned mode, int sync, void *key) { + int pwake = 0; struct epitem *epi = ep_item_from_wait(wait); struct eventpoll *ep = epi->ep; __poll_t pollflags = key_to_poll(key); + unsigned long flags; int ewake = 0; + read_lock_irqsave(&ep->lock, flags); + ep_set_busy_poll_napi_id(epi); /* @@ -1229,7 +1353,7 @@ static int ep_poll_callback(wait_queue_entry_t *wait, unsigned mode, int sync, v * until the next EPOLL_CTL_MOD will be issued. */ if (!(epi->event.events & ~EP_PRIVATE_BITS)) - goto out; + goto out_unlock; /* * Check the events coming with the callback. At this stage, not @@ -1238,10 +1362,22 @@ static int ep_poll_callback(wait_queue_entry_t *wait, unsigned mode, int sync, v * test for "key" != NULL before the event match test. */ if (pollflags && !(pollflags & epi->event.events)) - goto out; + goto out_unlock; - ep_pm_stay_awake_rcu(epi); - epitem_ready(epi); + /* + * If we are transferring events to userspace, we can hold no locks + * (because we're accessing user memory, and because of linux f_op->poll() + * semantics). All the events that happen during that period of time are + * chained in ep->ovflist and requeued later on. + */ + if (READ_ONCE(ep->ovflist) != EP_UNACTIVE_PTR) { + if (chain_epi_lockless(epi)) + ep_pm_stay_awake_rcu(epi); + } else if (!ep_is_linked(epi)) { + /* In the usual case, add event to ready list. */ + if (list_add_tail_lockless(&epi->rdllink, &ep->rdllist)) + ep_pm_stay_awake_rcu(epi); + } /* * Wake up ( if active ) both the eventpoll wait list and the ->poll() @@ -1270,9 +1406,15 @@ static int ep_poll_callback(wait_queue_entry_t *wait, unsigned mode, int sync, v wake_up(&ep->wq); } if (waitqueue_active(&ep->poll_wait)) + pwake++; + +out_unlock: + read_unlock_irqrestore(&ep->lock, flags); + + /* We have to call this outside the lock */ + if (pwake) ep_poll_safewake(ep, epi, pollflags & EPOLL_URING_WAKE); -out: if (!(epi->event.events & EPOLLEXCLUSIVE)) ewake = 1; @@ -1517,6 +1659,8 @@ static int ep_insert(struct eventpoll *ep, const struct epoll_event *event, if (is_file_epoll(tfile)) tep = tfile->private_data; + lockdep_assert_irqs_enabled(); + if (unlikely(percpu_counter_compare(&ep->user->epoll_watches, max_user_watches) >= 0)) return -ENOSPC; @@ -1528,10 +1672,11 @@ static int ep_insert(struct eventpoll *ep, const struct epoll_event *event, } /* Item initialization follow here ... */ - init_llist_node(&epi->rdllink); + INIT_LIST_HEAD(&epi->rdllink); epi->ep = ep; ep_set_ffd(&epi->ffd, tfile, fd); epi->event = *event; + epi->next = EP_UNACTIVE_PTR; if (tep) mutex_lock_nested(&tep->mtx, 1); @@ -1598,13 +1743,16 @@ static int ep_insert(struct eventpoll *ep, const struct epoll_event *event, return -ENOMEM; } + /* We have to drop the new item inside our item list to keep track of it */ + write_lock_irq(&ep->lock); + /* record NAPI ID of new item if present */ ep_set_busy_poll_napi_id(epi); /* If the file is already "ready" we drop it inside the ready list */ - if (revents) { + if (revents && !ep_is_linked(epi)) { + list_add_tail(&epi->rdllink, &ep->rdllist); ep_pm_stay_awake(epi); - epitem_ready(epi); /* Notify waiting tasks that events are available */ if (waitqueue_active(&ep->wq)) @@ -1613,6 +1761,8 @@ static int ep_insert(struct eventpoll *ep, const struct epoll_event *event, pwake++; } + write_unlock_irq(&ep->lock); + /* We have to call this outside the lock */ if (pwake) ep_poll_safewake(ep, NULL, 0); @@ -1627,8 +1777,11 @@ static int ep_insert(struct eventpoll *ep, const struct epoll_event *event, static int ep_modify(struct eventpoll *ep, struct epitem *epi, const struct epoll_event *event) { + int pwake = 0; poll_table pt; + lockdep_assert_irqs_enabled(); + init_poll_funcptr(&pt, NULL); /* @@ -1672,16 +1825,24 @@ static int ep_modify(struct eventpoll *ep, struct epitem *epi, * list, push it inside. */ if (ep_item_poll(epi, &pt, 1)) { - ep_pm_stay_awake(epi); - epitem_ready(epi); + write_lock_irq(&ep->lock); + if (!ep_is_linked(epi)) { + list_add_tail(&epi->rdllink, &ep->rdllist); + ep_pm_stay_awake(epi); - /* Notify waiting tasks that events are available */ - if (waitqueue_active(&ep->wq)) - wake_up(&ep->wq); - if (waitqueue_active(&ep->poll_wait)) - ep_poll_safewake(ep, NULL, 0); + /* Notify waiting tasks that events are available */ + if (waitqueue_active(&ep->wq)) + wake_up(&ep->wq); + if (waitqueue_active(&ep->poll_wait)) + pwake++; + } + write_unlock_irq(&ep->lock); } + /* We have to call this outside the lock */ + if (pwake) + ep_poll_safewake(ep, NULL, 0); + return 0; } @@ -1689,7 +1850,7 @@ static int ep_send_events(struct eventpoll *ep, struct epoll_event __user *events, int maxevents) { struct epitem *epi, *tmp; - LLIST_HEAD(txlist); + LIST_HEAD(txlist); poll_table pt; int res = 0; @@ -1704,18 +1865,19 @@ static int ep_send_events(struct eventpoll *ep, init_poll_funcptr(&pt, NULL); mutex_lock(&ep->mtx); + ep_start_scan(ep, &txlist); - while (res < maxevents) { + /* + * We can loop without lock because we are passed a task private list. + * Items cannot vanish during the loop we are holding ep->mtx. + */ + list_for_each_entry_safe(epi, tmp, &txlist, rdllink) { struct wakeup_source *ws; - struct llist_node *n; __poll_t revents; - n = llist_del_first(&ep->rdllist); - if (!n) + if (res >= maxevents) break; - epi = llist_entry(n, struct epitem, rdllink); - /* * Activate ep->ws before deactivating epi->ws to prevent * triggering auto-suspend here (in case we reactive epi->ws @@ -1732,30 +1894,21 @@ static int ep_send_events(struct eventpoll *ep, __pm_relax(ws); } + list_del_init(&epi->rdllink); + /* * If the event mask intersect the caller-requested one, * deliver the event to userspace. Again, we are holding ep->mtx, * so no operations coming from userspace can change the item. */ revents = ep_item_poll(epi, &pt, 1); - if (!revents) { - init_llist_node(n); - - /* - * Just in case epi becomes ready after ep_item_poll() above, but before - * init_llist_node(). Make sure to add it to the ready list, otherwise an - * event may be lost. - */ - if (unlikely(ep_item_poll(epi, &pt, 1))) { - ep_pm_stay_awake(epi); - epitem_ready(epi); - } + if (!revents) continue; - } events = epoll_put_uevent(revents, epi->event.data, events); if (!events) { - llist_add(&epi->rdllink, &ep->rdllist); + list_add(&epi->rdllink, &txlist); + ep_pm_stay_awake(epi); if (!res) res = -EFAULT; break; @@ -1763,31 +1916,25 @@ static int ep_send_events(struct eventpoll *ep, res++; if (epi->event.events & EPOLLONESHOT) epi->event.events &= EP_PRIVATE_BITS; - __llist_add(n, &txlist); - } - - llist_for_each_entry_safe(epi, tmp, txlist.first, rdllink) { - init_llist_node(&epi->rdllink); - - if (!(epi->event.events & EPOLLET)) { + else if (!(epi->event.events & EPOLLET)) { /* - * If this file has been added with Level Trigger mode, we need to insert - * back inside the ready list, so that the next call to epoll_wait() will - * check again the events availability. + * If this file has been added with Level + * Trigger mode, we need to insert back inside + * the ready list, so that the next call to + * epoll_wait() will check again the events + * availability. At this point, no one can insert + * into ep->rdllist besides us. The epoll_ctl() + * callers are locked out by + * ep_send_events() holding "mtx" and the + * poll callback will queue them in ep->ovflist. */ + list_add_tail(&epi->rdllink, &ep->rdllist); ep_pm_stay_awake(epi); - epitem_ready(epi); } } - - __pm_relax(ep->ws); + ep_done_scan(ep, &txlist); mutex_unlock(&ep->mtx); - if (!llist_empty(&ep->rdllist)) { - if (waitqueue_active(&ep->wq)) - wake_up(&ep->wq); - } - return res; } @@ -1880,6 +2027,8 @@ static int ep_poll(struct eventpoll *ep, struct epoll_event __user *events, wait_queue_entry_t wait; ktime_t expires, *to = NULL; + lockdep_assert_irqs_enabled(); + if (timeout && (timeout->tv_sec | timeout->tv_nsec)) { slack = select_estimate_accuracy(timeout); to = &expires; @@ -1939,15 +2088,54 @@ static int ep_poll(struct eventpoll *ep, struct epoll_event __user *events, init_wait(&wait); wait.func = ep_autoremove_wake_function; - prepare_to_wait_exclusive(&ep->wq, &wait, TASK_INTERRUPTIBLE); + write_lock_irq(&ep->lock); + /* + * Barrierless variant, waitqueue_active() is called under + * the same lock on wakeup ep_poll_callback() side, so it + * is safe to avoid an explicit barrier. + */ + __set_current_state(TASK_INTERRUPTIBLE); + + /* + * Do the final check under the lock. ep_start/done_scan() + * plays with two lists (->rdllist and ->ovflist) and there + * is always a race when both lists are empty for short + * period of time although events are pending, so lock is + * important. + */ + eavail = ep_events_available(ep); + if (!eavail) + __add_wait_queue_exclusive(&ep->wq, &wait); + + write_unlock_irq(&ep->lock); - if (!ep_events_available(ep)) + if (!eavail) timed_out = !ep_schedule_timeout(to) || !schedule_hrtimeout_range(to, slack, HRTIMER_MODE_ABS); + __set_current_state(TASK_RUNNING); - finish_wait(&ep->wq, &wait); - eavail = ep_events_available(ep); + /* + * We were woken up, thus go and try to harvest some events. + * If timed out and still on the wait queue, recheck eavail + * carefully under lock, below. + */ + eavail = 1; + + if (!list_empty_careful(&wait.entry)) { + write_lock_irq(&ep->lock); + /* + * If the thread timed out and is not on the wait queue, + * it means that the thread was woken up after its + * timeout expired before it could reacquire the lock. + * Thus, when wait.entry is empty, it needs to harvest + * events. + */ + if (timed_out) + eavail = list_empty(&wait.entry); + __remove_wait_queue(&ep->wq, &wait); + write_unlock_irq(&ep->lock); + } } } diff --git a/fs/exfat/super.c b/fs/exfat/super.c index 7ed858937d45..ea5c1334a214 100644 --- a/fs/exfat/super.c +++ b/fs/exfat/super.c @@ -667,9 +667,9 @@ static int exfat_fill_super(struct super_block *sb, struct fs_context *fc) } if (sbi->options.utf8) - sb->s_d_op = &exfat_utf8_dentry_ops; + set_default_d_op(sb, &exfat_utf8_dentry_ops); else - sb->s_d_op = &exfat_dentry_ops; + set_default_d_op(sb, &exfat_dentry_ops); root_inode = new_inode(sb); if (!root_inode) { diff --git a/fs/fat/namei_msdos.c b/fs/fat/namei_msdos.c index 23e9b9371ec3..0b920ee40a7f 100644 --- a/fs/fat/namei_msdos.c +++ b/fs/fat/namei_msdos.c @@ -646,7 +646,7 @@ static const struct inode_operations msdos_dir_inode_operations = { static void setup(struct super_block *sb) { MSDOS_SB(sb)->dir_ops = &msdos_dir_inode_operations; - sb->s_d_op = &msdos_dentry_operations; + set_default_d_op(sb, &msdos_dentry_operations); sb->s_flags |= SB_NOATIME; } diff --git a/fs/fat/namei_vfat.c b/fs/fat/namei_vfat.c index dd910edd2404..5dbc4cbb8fce 100644 --- a/fs/fat/namei_vfat.c +++ b/fs/fat/namei_vfat.c @@ -1187,9 +1187,9 @@ static void setup(struct super_block *sb) { MSDOS_SB(sb)->dir_ops = &vfat_dir_inode_operations; if (MSDOS_SB(sb)->options.name_check != 's') - sb->s_d_op = &vfat_ci_dentry_ops; + set_default_d_op(sb, &vfat_ci_dentry_ops); else - sb->s_d_op = &vfat_dentry_ops; + set_default_d_op(sb, &vfat_dentry_ops); } static int vfat_fill_super(struct super_block *sb, struct fs_context *fc) diff --git a/fs/fuse/control.c b/fs/fuse/control.c index 2a730d88cc3b..bb407705603c 100644 --- a/fs/fuse/control.c +++ b/fs/fuse/control.c @@ -11,6 +11,7 @@ #include <linux/init.h> #include <linux/module.h> #include <linux/fs_context.h> +#include <linux/namei.h> #define FUSE_CTL_SUPER_MAGIC 0x65735543 @@ -212,7 +213,6 @@ static struct dentry *fuse_ctl_add_dentry(struct dentry *parent, struct dentry *dentry; struct inode *inode; - BUG_ON(fc->ctl_ndents >= FUSE_CTL_NUM_DENTRIES); dentry = d_alloc_name(parent, name); if (!dentry) return NULL; @@ -236,8 +236,6 @@ static struct dentry *fuse_ctl_add_dentry(struct dentry *parent, inode->i_private = fc; d_add(dentry, inode); - fc->ctl_dentry[fc->ctl_ndents++] = dentry; - return dentry; } @@ -280,27 +278,29 @@ int fuse_ctl_add_conn(struct fuse_conn *fc) return -ENOMEM; } +static void remove_one(struct dentry *dentry) +{ + d_inode(dentry)->i_private = NULL; +} + /* * Remove a connection from the control filesystem (if it exists). * Caller must hold fuse_mutex */ void fuse_ctl_remove_conn(struct fuse_conn *fc) { - int i; + struct dentry *dentry; + char name[32]; if (!fuse_control_sb || fc->no_control) return; - for (i = fc->ctl_ndents - 1; i >= 0; i--) { - struct dentry *dentry = fc->ctl_dentry[i]; - d_inode(dentry)->i_private = NULL; - if (!i) { - /* Get rid of submounts: */ - d_invalidate(dentry); - } - dput(dentry); + sprintf(name, "%u", fc->dev); + dentry = lookup_noperm_positive_unlocked(&QSTR(name), fuse_control_sb->s_root); + if (!IS_ERR(dentry)) { + simple_recursive_removal(dentry, remove_one); + dput(dentry); // paired with lookup_noperm_positive_unlocked() } - drop_nlink(d_inode(fuse_control_sb->s_root)); } static int fuse_ctl_fill_super(struct super_block *sb, struct fs_context *fsc) @@ -346,12 +346,8 @@ static int fuse_ctl_init_fs_context(struct fs_context *fsc) static void fuse_ctl_kill_sb(struct super_block *sb) { - struct fuse_conn *fc; - mutex_lock(&fuse_mutex); fuse_control_sb = NULL; - list_for_each_entry(fc, &fuse_conn_list, entry) - fc->ctl_ndents = 0; mutex_unlock(&fuse_mutex); kill_litter_super(sb); diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c index 45b4c3cc1396..2d817d7cab26 100644 --- a/fs/fuse/dir.c +++ b/fs/fuse/dir.c @@ -338,13 +338,6 @@ const struct dentry_operations fuse_dentry_operations = { .d_automount = fuse_dentry_automount, }; -const struct dentry_operations fuse_root_dentry_operations = { -#if BITS_PER_LONG < 64 - .d_init = fuse_dentry_init, - .d_release = fuse_dentry_release, -#endif -}; - int fuse_valid_type(int m) { return S_ISREG(m) || S_ISDIR(m) || S_ISLNK(m) || S_ISCHR(m) || diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h index b54f4f57789f..989f37abe138 100644 --- a/fs/fuse/fuse_i.h +++ b/fs/fuse/fuse_i.h @@ -913,12 +913,6 @@ struct fuse_conn { /** Device ID from the root super block */ dev_t dev; - /** Dentries in the control filesystem */ - struct dentry *ctl_dentry[FUSE_CTL_NUM_DENTRIES]; - - /** number of dentries used in the above array */ - int ctl_ndents; - /** Key for lock owner ID scrambling */ u32 scramble_key[4]; @@ -1109,7 +1103,6 @@ static inline void fuse_sync_bucket_dec(struct fuse_sync_bucket *bucket) extern const struct file_operations fuse_dev_operations; extern const struct dentry_operations fuse_dentry_operations; -extern const struct dentry_operations fuse_root_dentry_operations; /** * Get a filled in inode diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c index 9572bdef49ee..ecb869e895ab 100644 --- a/fs/fuse/inode.c +++ b/fs/fuse/inode.c @@ -1719,7 +1719,7 @@ static int fuse_fill_super_submount(struct super_block *sb, fi = get_fuse_inode(root); fi->nlookup--; - sb->s_d_op = &fuse_dentry_operations; + set_default_d_op(sb, &fuse_dentry_operations); sb->s_root = d_make_root(root); if (!sb->s_root) return -ENOMEM; @@ -1854,12 +1854,10 @@ int fuse_fill_super_common(struct super_block *sb, struct fuse_fs_context *ctx) err = -ENOMEM; root = fuse_get_root_inode(sb, ctx->rootmode); - sb->s_d_op = &fuse_root_dentry_operations; + set_default_d_op(sb, &fuse_dentry_operations); root_dentry = d_make_root(root); if (!root_dentry) goto err_dev_free; - /* Root dentry doesn't have .d_revalidate */ - sb->s_d_op = &fuse_dentry_operations; mutex_lock(&fuse_mutex); err = -EINVAL; diff --git a/fs/gfs2/dir.c b/fs/gfs2/dir.c index dbf1aede744c..509e2f0d97e7 100644 --- a/fs/gfs2/dir.c +++ b/fs/gfs2/dir.c @@ -60,6 +60,7 @@ #include <linux/crc32.h> #include <linux/vmalloc.h> #include <linux/bio.h> +#include <linux/log2.h> #include "gfs2.h" #include "incore.h" @@ -912,7 +913,6 @@ static int dir_make_exhash(struct inode *inode) struct qstr args; struct buffer_head *bh, *dibh; struct gfs2_leaf *leaf; - int y; u32 x; __be64 *lp; u64 bn; @@ -979,9 +979,7 @@ static int dir_make_exhash(struct inode *inode) i_size_write(inode, sdp->sd_sb.sb_bsize / 2); gfs2_add_inode_blocks(&dip->i_inode, 1); dip->i_diskflags |= GFS2_DIF_EXHASH; - - for (x = sdp->sd_hash_ptrs, y = -1; x; x >>= 1, y++) ; - dip->i_depth = y; + dip->i_depth = ilog2(sdp->sd_hash_ptrs); gfs2_dinode_out(dip, dibh->b_data); diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c index ba25b884169e..b6fd1cb17de7 100644 --- a/fs/gfs2/glock.c +++ b/fs/gfs2/glock.c @@ -590,35 +590,31 @@ static void gfs2_demote_wake(struct gfs2_glock *gl) static void finish_xmote(struct gfs2_glock *gl, unsigned int ret) { const struct gfs2_glock_operations *glops = gl->gl_ops; - struct gfs2_holder *gh; - unsigned state = ret & LM_OUT_ST_MASK; - trace_gfs2_glock_state_change(gl, state); - state_change(gl, state); - gh = find_first_waiter(gl); + if (!(ret & ~LM_OUT_ST_MASK)) { + unsigned state = ret & LM_OUT_ST_MASK; + + trace_gfs2_glock_state_change(gl, state); + state_change(gl, state); + } + /* Demote to UN request arrived during demote to SH or DF */ if (test_bit(GLF_DEMOTE_IN_PROGRESS, &gl->gl_flags) && - state != LM_ST_UNLOCKED && gl->gl_demote_state == LM_ST_UNLOCKED) + gl->gl_state != LM_ST_UNLOCKED && + gl->gl_demote_state == LM_ST_UNLOCKED) gl->gl_target = LM_ST_UNLOCKED; /* Check for state != intended state */ - if (unlikely(state != gl->gl_target)) { - if (gh && (ret & LM_OUT_CANCELED)) - gfs2_holder_wake(gh); + if (unlikely(gl->gl_state != gl->gl_target)) { + struct gfs2_holder *gh = find_first_waiter(gl); + if (gh && !test_bit(GLF_DEMOTE_IN_PROGRESS, &gl->gl_flags)) { if (ret & LM_OUT_CANCELED) { list_del_init(&gh->gh_list); trace_gfs2_glock_queue(gh, 0); + gfs2_holder_wake(gh); gl->gl_target = gl->gl_state; - gh = find_first_waiter(gl); - if (gh) { - gl->gl_target = gh->gh_state; - if (do_promote(gl)) - goto out; - do_xmote(gl, gh, gl->gl_target); - return; - } goto out; } /* Some error or failed "try lock" - report it */ @@ -629,7 +625,7 @@ static void finish_xmote(struct gfs2_glock *gl, unsigned int ret) goto out; } } - switch(state) { + switch(gl->gl_state) { /* Unlocked due to conversion deadlock, try again */ case LM_ST_UNLOCKED: do_xmote(gl, gh, gl->gl_target); @@ -640,8 +636,10 @@ static void finish_xmote(struct gfs2_glock *gl, unsigned int ret) do_xmote(gl, gh, LM_ST_UNLOCKED); break; default: /* Everything else */ - fs_err(gl->gl_name.ln_sbd, "wanted %u got %u\n", - gl->gl_target, state); + fs_err(gl->gl_name.ln_sbd, + "glock %u:%llu requested=%u ret=%u\n", + gl->gl_name.ln_type, gl->gl_name.ln_number, + gl->gl_req, ret); GLOCK_BUG_ON(gl, 1); } return; @@ -650,7 +648,7 @@ static void finish_xmote(struct gfs2_glock *gl, unsigned int ret) /* Fast path - we got what we asked for */ if (test_and_clear_bit(GLF_DEMOTE_IN_PROGRESS, &gl->gl_flags)) gfs2_demote_wake(gl); - if (state != LM_ST_UNLOCKED) { + if (gl->gl_state != LM_ST_UNLOCKED) { if (glops->go_xmote_bh) { int rv; @@ -802,7 +800,8 @@ skip_inval: * We skip telling dlm to do the locking, so we won't get a * reply that would otherwise clear GLF_LOCK. So we clear it here. */ - clear_bit(GLF_LOCK, &gl->gl_flags); + if (!test_bit(GLF_CANCELING, &gl->gl_flags)) + clear_bit(GLF_LOCK, &gl->gl_flags); clear_bit(GLF_DEMOTE_IN_PROGRESS, &gl->gl_flags); gfs2_glock_queue_work(gl, GL_GLOCK_DFT_HOLD); return; diff --git a/fs/gfs2/glock.h b/fs/gfs2/glock.h index c171f745650f..9339a3bff6ee 100644 --- a/fs/gfs2/glock.h +++ b/fs/gfs2/glock.h @@ -92,12 +92,22 @@ enum { * LM_OUT_ST_MASK * Masks the lower two bits of lock state in the returned value. * + * LM_OUT_TRY_AGAIN + * The trylock request failed. + * + * LM_OUT_DEADLOCK + * The lock request failed because it would deadlock. + * * LM_OUT_CANCELED * The lock request was canceled. * + * LM_OUT_ERROR + * The lock request timed out or failed. */ #define LM_OUT_ST_MASK 0x00000003 +#define LM_OUT_TRY_AGAIN 0x00000020 +#define LM_OUT_DEADLOCK 0x00000010 #define LM_OUT_CANCELED 0x00000008 #define LM_OUT_ERROR 0x00000004 diff --git a/fs/gfs2/glops.c b/fs/gfs2/glops.c index cebd66b22694..fe0faad4892f 100644 --- a/fs/gfs2/glops.c +++ b/fs/gfs2/glops.c @@ -11,6 +11,7 @@ #include <linux/bio.h> #include <linux/posix_acl.h> #include <linux/security.h> +#include <linux/log2.h> #include "gfs2.h" #include "incore.h" @@ -450,6 +451,11 @@ static int gfs2_dinode_in(struct gfs2_inode *ip, const void *buf) gfs2_consist_inode(ip); return -EIO; } + if ((ip->i_diskflags & GFS2_DIF_EXHASH) && + depth < ilog2(sdp->sd_hash_ptrs)) { + gfs2_consist_inode(ip); + return -EIO; + } ip->i_depth = (u8)depth; ip->i_entries = be32_to_cpu(str->di_entries); diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h index 0a41c4e76b32..d4ad82f47eee 100644 --- a/fs/gfs2/incore.h +++ b/fs/gfs2/incore.h @@ -375,7 +375,6 @@ struct gfs2_glock { enum { GIF_QD_LOCKED = 1, - GIF_ALLOC_FAILED = 2, GIF_SW_PAGED = 3, GIF_FREE_VFS_INODE = 5, GIF_GLOP_PENDING = 6, diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c index 187d789a8f1e..8760e7e20c9d 100644 --- a/fs/gfs2/inode.c +++ b/fs/gfs2/inode.c @@ -444,11 +444,9 @@ static void gfs2_final_release_pages(struct gfs2_inode *ip) struct inode *inode = &ip->i_inode; struct gfs2_glock *gl = ip->i_gl; - if (unlikely(!gl)) { - /* This can only happen during incomplete inode creation. */ - BUG_ON(!test_bit(GIF_ALLOC_FAILED, &ip->i_flags)); + /* This can only happen during incomplete inode creation. */ + if (unlikely(!gl)) return; - } truncate_inode_pages(gfs2_glock2aspace(gl), 0); truncate_inode_pages(&inode->i_data, 0); @@ -902,7 +900,6 @@ fail_gunlock3: fail_gunlock2: gfs2_glock_put(io_gl); fail_dealloc_inode: - set_bit(GIF_ALLOC_FAILED, &ip->i_flags); dealloc_error = 0; if (ip->i_eattr) dealloc_error = gfs2_ea_dealloc(ip, xattr_initialized); diff --git a/fs/gfs2/inode.h b/fs/gfs2/inode.h index eafe123617e6..811a0bd3792c 100644 --- a/fs/gfs2/inode.h +++ b/fs/gfs2/inode.h @@ -44,17 +44,17 @@ static inline int gfs2_is_dir(const struct gfs2_inode *ip) static inline void gfs2_set_inode_blocks(struct inode *inode, u64 blocks) { - inode->i_blocks = blocks << (inode->i_blkbits - 9); + inode->i_blocks = blocks << (inode->i_blkbits - SECTOR_SHIFT); } static inline u64 gfs2_get_inode_blocks(const struct inode *inode) { - return inode->i_blocks >> (inode->i_blkbits - 9); + return inode->i_blocks >> (inode->i_blkbits - SECTOR_SHIFT); } static inline void gfs2_add_inode_blocks(struct inode *inode, s64 change) { - change <<= inode->i_blkbits - 9; + change <<= inode->i_blkbits - SECTOR_SHIFT; gfs2_assert(GFS2_SB(inode), (change >= 0 || inode->i_blocks >= -change)); inode->i_blocks += change; } diff --git a/fs/gfs2/lock_dlm.c b/fs/gfs2/lock_dlm.c index 7cb9d216d8bb..cee5d199d2d8 100644 --- a/fs/gfs2/lock_dlm.c +++ b/fs/gfs2/lock_dlm.c @@ -119,7 +119,7 @@ static inline void gfs2_update_request_times(struct gfs2_glock *gl) static void gdlm_ast(void *arg) { struct gfs2_glock *gl = arg; - unsigned ret = gl->gl_state; + unsigned ret; /* If the glock is dead, we only react to a dlm_unlock() reply. */ if (__lockref_is_dead(&gl->gl_lockref) && @@ -139,13 +139,16 @@ static void gdlm_ast(void *arg) gfs2_glock_free(gl); return; case -DLM_ECANCEL: /* Cancel while getting lock */ - ret |= LM_OUT_CANCELED; + ret = LM_OUT_CANCELED; goto out; case -EAGAIN: /* Try lock fails */ + ret = LM_OUT_TRY_AGAIN; + goto out; case -EDEADLK: /* Deadlock detected */ + ret = LM_OUT_DEADLOCK; goto out; case -ETIMEDOUT: /* Canceled due to timeout */ - ret |= LM_OUT_ERROR; + ret = LM_OUT_ERROR; goto out; case 0: /* Success */ break; diff --git a/fs/gfs2/meta_io.c b/fs/gfs2/meta_io.c index 9dc8885c95d0..7fb11ff71b5a 100644 --- a/fs/gfs2/meta_io.c +++ b/fs/gfs2/meta_io.c @@ -103,6 +103,7 @@ const struct address_space_operations gfs2_meta_aops = { .invalidate_folio = block_invalidate_folio, .writepages = gfs2_aspace_writepages, .release_folio = gfs2_release_folio, + .migrate_folio = buffer_migrate_folio_norefs, }; const struct address_space_operations gfs2_rgrp_aops = { @@ -110,6 +111,7 @@ const struct address_space_operations gfs2_rgrp_aops = { .invalidate_folio = block_invalidate_folio, .writepages = gfs2_aspace_writepages, .release_folio = gfs2_release_folio, + .migrate_folio = buffer_migrate_folio_norefs, }; /** @@ -228,7 +230,7 @@ static void gfs2_submit_bhs(blk_opf_t opf, struct buffer_head *bhs[], int num) struct bio *bio; bio = bio_alloc(bh->b_bdev, num, opf, GFP_NOIO); - bio->bi_iter.bi_sector = bh->b_blocknr * (bh->b_size >> 9); + bio->bi_iter.bi_sector = bh->b_blocknr * (bh->b_size >> SECTOR_SHIFT); while (num > 0) { bh = *bhs; if (!bio_add_folio(bio, bh->b_folio, bh->b_size, bh_offset(bh))) { @@ -443,11 +445,9 @@ void gfs2_journal_wipe(struct gfs2_inode *ip, u64 bstart, u32 blen) struct buffer_head *bh; int ty; - if (!ip->i_gl) { - /* This can only happen during incomplete inode creation. */ - BUG_ON(!test_bit(GIF_ALLOC_FAILED, &ip->i_flags)); + /* This can only happen during incomplete inode creation. */ + if (!ip->i_gl) return; - } gfs2_ail1_wipe(sdp, bstart, blen); while (blen) { diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c index 85c491fcf1a3..efe99b732551 100644 --- a/fs/gfs2/ops_fstype.c +++ b/fs/gfs2/ops_fstype.c @@ -163,7 +163,7 @@ static int gfs2_check_sb(struct gfs2_sbd *sdp, int silent) return -EINVAL; } - if (sb->sb_bsize < 512 || sb->sb_bsize > PAGE_SIZE || + if (sb->sb_bsize < SECTOR_SIZE || sb->sb_bsize > PAGE_SIZE || (sb->sb_bsize & (sb->sb_bsize - 1))) { pr_warn("Invalid block size\n"); return -EINVAL; @@ -224,8 +224,8 @@ static int gfs2_read_super(struct gfs2_sbd *sdp, sector_t sector, int silent) if (unlikely(!sb)) return -ENOMEM; err = bdev_rw_virt(sdp->sd_vfs->s_bdev, - sector * (sdp->sd_vfs->s_blocksize >> 9), sb, PAGE_SIZE, - REQ_OP_READ | REQ_META); + sector << (sdp->sd_vfs->s_blocksize_bits - SECTOR_SHIFT), + sb, PAGE_SIZE, REQ_OP_READ | REQ_META); if (err) { pr_warn("error %d reading superblock\n", err); kfree(sb); @@ -257,7 +257,7 @@ static int gfs2_read_sb(struct gfs2_sbd *sdp, int silent) return error; } - sdp->sd_fsb2bb_shift = sdp->sd_sb.sb_bsize_shift - 9; + sdp->sd_fsb2bb_shift = sdp->sd_sb.sb_bsize_shift - SECTOR_SHIFT; sdp->sd_fsb2bb = BIT(sdp->sd_fsb2bb_shift); sdp->sd_diptrs = (sdp->sd_sb.sb_bsize - sizeof(struct gfs2_dinode)) / sizeof(u64); @@ -1145,7 +1145,7 @@ static int gfs2_fill_super(struct super_block *sb, struct fs_context *fc) sb->s_magic = GFS2_MAGIC; sb->s_op = &gfs2_super_ops; - sb->s_d_op = &gfs2_dops; + set_default_d_op(sb, &gfs2_dops); sb->s_export_op = &gfs2_export_ops; sb->s_qcop = &gfs2_quotactl_ops; sb->s_quota_types = QTYPE_MASK_USR | QTYPE_MASK_GRP; @@ -1155,12 +1155,12 @@ static int gfs2_fill_super(struct super_block *sb, struct fs_context *fc) /* Set up the buffer cache and fill in some fake block size values to allow us to read-in the on-disk superblock. */ - sdp->sd_sb.sb_bsize = sb_min_blocksize(sb, 512); + sdp->sd_sb.sb_bsize = sb_min_blocksize(sb, SECTOR_SIZE); error = -EINVAL; if (!sdp->sd_sb.sb_bsize) goto fail_free; sdp->sd_sb.sb_bsize_shift = sb->s_blocksize_bits; - sdp->sd_fsb2bb_shift = sdp->sd_sb.sb_bsize_shift - 9; + sdp->sd_fsb2bb_shift = sdp->sd_sb.sb_bsize_shift - SECTOR_SHIFT; sdp->sd_fsb2bb = BIT(sdp->sd_fsb2bb_shift); sdp->sd_tune.gt_logd_secs = sdp->sd_args.ar_commit; diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c index 7c518c4ff638..b42e2110084b 100644 --- a/fs/gfs2/super.c +++ b/fs/gfs2/super.c @@ -487,11 +487,9 @@ static void gfs2_dirty_inode(struct inode *inode, int flags) int need_endtrans = 0; int ret; - if (unlikely(!ip->i_gl)) { - /* This can only happen during incomplete inode creation. */ - BUG_ON(!test_bit(GIF_ALLOC_FAILED, &ip->i_flags)); + /* This can only happen during incomplete inode creation. */ + if (unlikely(!ip->i_gl)) return; - } if (gfs2_withdrawing_or_withdrawn(sdp)) return; diff --git a/fs/gfs2/util.c b/fs/gfs2/util.c index d5a1e63fa257..24864a66074b 100644 --- a/fs/gfs2/util.c +++ b/fs/gfs2/util.c @@ -232,32 +232,23 @@ static void signal_our_withdraw(struct gfs2_sbd *sdp) */ ret = gfs2_glock_nq(&sdp->sd_live_gh); + gfs2_glock_put(live_gl); /* drop extra reference we acquired */ + clear_bit(SDF_WITHDRAW_RECOVERY, &sdp->sd_flags); + /* * If we actually got the "live" lock in EX mode, there are no other - * nodes available to replay our journal. So we try to replay it - * ourselves. We hold the "live" glock to prevent other mounters - * during recovery, then just dequeue it and reacquire it in our - * normal SH mode. Just in case the problem that caused us to - * withdraw prevents us from recovering our journal (e.g. io errors - * and such) we still check if the journal is clean before proceeding - * but we may wait forever until another mounter does the recovery. + * nodes available to replay our journal. */ if (ret == 0) { - fs_warn(sdp, "No other mounters found. Trying to recover our " - "own journal jid %d.\n", sdp->sd_lockstruct.ls_jid); - if (gfs2_recover_journal(sdp->sd_jdesc, 1)) - fs_warn(sdp, "Unable to recover our journal jid %d.\n", - sdp->sd_lockstruct.ls_jid); - gfs2_glock_dq_wait(&sdp->sd_live_gh); - gfs2_holder_reinit(LM_ST_SHARED, - LM_FLAG_NOEXP | GL_EXACT | GL_NOPID, - &sdp->sd_live_gh); - gfs2_glock_nq(&sdp->sd_live_gh); + fs_warn(sdp, "No other mounters found.\n"); + /* + * We are about to release the lockspace. By keeping live_gl + * locked here, we ensure that the next mounter coming along + * will be a "first" mounter which will perform recovery. + */ + goto skip_recovery; } - gfs2_glock_put(live_gl); /* drop extra reference we acquired */ - clear_bit(SDF_WITHDRAW_RECOVERY, &sdp->sd_flags); - /* * At this point our journal is evicted, so we need to get a new inode * for it. Once done, we need to call gfs2_find_jhead which diff --git a/fs/hfs/super.c b/fs/hfs/super.c index fe09c2093a93..388a318297ec 100644 --- a/fs/hfs/super.c +++ b/fs/hfs/super.c @@ -365,7 +365,7 @@ static int hfs_fill_super(struct super_block *sb, struct fs_context *fc) if (!root_inode) goto bail_no_root; - sb->s_d_op = &hfs_dentry_operations; + set_default_d_op(sb, &hfs_dentry_operations); res = -ENOMEM; sb->s_root = d_make_root(root_inode); if (!sb->s_root) diff --git a/fs/hfsplus/super.c b/fs/hfsplus/super.c index 948b8aaee33e..0caf7aa1c249 100644 --- a/fs/hfsplus/super.c +++ b/fs/hfsplus/super.c @@ -508,7 +508,7 @@ static int hfsplus_fill_super(struct super_block *sb, struct fs_context *fc) goto out_put_alloc_file; } - sb->s_d_op = &hfsplus_dentry_operations; + set_default_d_op(sb, &hfsplus_dentry_operations); sb->s_root = d_make_root(root); if (!sb->s_root) { err = -ENOMEM; diff --git a/fs/hostfs/hostfs_kern.c b/fs/hostfs/hostfs_kern.c index 702c41317589..1f512f8ea757 100644 --- a/fs/hostfs/hostfs_kern.c +++ b/fs/hostfs/hostfs_kern.c @@ -933,7 +933,7 @@ static int hostfs_fill_super(struct super_block *sb, struct fs_context *fc) sb->s_blocksize_bits = 10; sb->s_magic = HOSTFS_SUPER_MAGIC; sb->s_op = &hostfs_sbops; - sb->s_d_op = &simple_dentry_operations; + sb->s_d_flags = DCACHE_DONTCACHE; sb->s_maxbytes = MAX_LFS_FILESIZE; err = super_setup_bdi(sb); if (err) diff --git a/fs/hpfs/super.c b/fs/hpfs/super.c index 27567920abe4..42b779b4d87f 100644 --- a/fs/hpfs/super.c +++ b/fs/hpfs/super.c @@ -554,7 +554,7 @@ static int hpfs_fill_super(struct super_block *s, struct fs_context *fc) /* Fill superblock stuff */ s->s_magic = HPFS_SUPER_MAGIC; s->s_op = &hpfs_sops; - s->s_d_op = &hpfs_dentry_operations; + set_default_d_op(s, &hpfs_dentry_operations); s->s_time_min = local_to_gmt(s, 0); s->s_time_max = local_to_gmt(s, U32_MAX); diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c index e4de5425838d..6e0ade365a33 100644 --- a/fs/hugetlbfs/inode.c +++ b/fs/hugetlbfs/inode.c @@ -1433,6 +1433,7 @@ hugetlbfs_fill_super(struct super_block *sb, struct fs_context *fc) sb->s_blocksize_bits = huge_page_shift(ctx->hstate); sb->s_magic = HUGETLBFS_MAGIC; sb->s_op = &hugetlbfs_ops; + sb->s_d_flags = DCACHE_DONTCACHE; sb->s_time_gran = 1; /* diff --git a/fs/iomap/buffered-io.c b/fs/iomap/buffered-io.c index 3729391a18f3..fb4519158f3a 100644 --- a/fs/iomap/buffered-io.c +++ b/fs/iomap/buffered-io.c @@ -71,6 +71,9 @@ static void iomap_set_range_uptodate(struct folio *folio, size_t off, unsigned long flags; bool uptodate = true; + if (folio_test_uptodate(folio)) + return; + if (ifs) { spin_lock_irqsave(&ifs->state_lock, flags); uptodate = ifs_set_range_uptodate(folio, ifs, off, len); diff --git a/fs/isofs/inode.c b/fs/isofs/inode.c index d5da9817df9b..6f0e6b19383c 100644 --- a/fs/isofs/inode.c +++ b/fs/isofs/inode.c @@ -939,7 +939,7 @@ root_found: sbi->s_check = opt->check; if (table) - s->s_d_op = &isofs_dentry_ops[table - 1]; + set_default_d_op(s, &isofs_dentry_ops[table - 1]); /* get the root dentry */ s->s_root = d_make_root(inode); @@ -1440,9 +1440,16 @@ static int isofs_read_inode(struct inode *inode, int relocated) inode->i_op = &page_symlink_inode_operations; inode_nohighmem(inode); inode->i_data.a_ops = &isofs_symlink_aops; - } else + } else if (S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode) || + S_ISFIFO(inode->i_mode) || S_ISSOCK(inode->i_mode)) { /* XXX - parse_rock_ridge_inode() had already set i_rdev. */ init_special_inode(inode, inode->i_mode, inode->i_rdev); + } else { + printk(KERN_DEBUG "ISOFS: Invalid file type 0%04o for inode %lu.\n", + inode->i_mode, inode->i_ino); + ret = -EIO; + goto fail; + } ret = 0; out: diff --git a/fs/jfs/super.c b/fs/jfs/super.c index 10368c188c5e..3cfb86c5a36e 100644 --- a/fs/jfs/super.c +++ b/fs/jfs/super.c @@ -542,7 +542,7 @@ static int jfs_fill_super(struct super_block *sb, struct fs_context *fc) sb->s_magic = JFS_SUPER_MAGIC; if (sbi->mntflag & JFS_OS2) - sb->s_d_op = &jfs_ci_dentry_operations; + set_default_d_op(sb, &jfs_ci_dentry_operations); inode = jfs_iget(sb, ROOT_I); if (IS_ERR(inode)) { diff --git a/fs/kernfs/mount.c b/fs/kernfs/mount.c index c1719b5778a1..e384a69fbece 100644 --- a/fs/kernfs/mount.c +++ b/fs/kernfs/mount.c @@ -318,7 +318,7 @@ static int kernfs_fill_super(struct super_block *sb, struct kernfs_fs_context *k return -ENOMEM; } sb->s_root = root; - sb->s_d_op = &kernfs_dops; + set_default_d_op(sb, &kernfs_dops); return 0; } diff --git a/fs/libfs.c b/fs/libfs.c index 6f487fc6be34..89e47af931f1 100644 --- a/fs/libfs.c +++ b/fs/libfs.c @@ -62,11 +62,6 @@ int always_delete_dentry(const struct dentry *dentry) } EXPORT_SYMBOL(always_delete_dentry); -const struct dentry_operations simple_dentry_operations = { - .d_delete = always_delete_dentry, -}; -EXPORT_SYMBOL(simple_dentry_operations); - /* * Lookup the data. This is trivial - if the dentry didn't already * exist, we know it is negative. Set d_op to delete negative dentries. @@ -75,9 +70,11 @@ struct dentry *simple_lookup(struct inode *dir, struct dentry *dentry, unsigned { if (dentry->d_name.len > NAME_MAX) return ERR_PTR(-ENAMETOOLONG); - if (!dentry->d_sb->s_d_op) - d_set_d_op(dentry, &simple_dentry_operations); - + if (!dentry->d_op && !(dentry->d_flags & DCACHE_DONTCACHE)) { + spin_lock(&dentry->d_lock); + dentry->d_flags |= DCACHE_DONTCACHE; + spin_unlock(&dentry->d_lock); + } if (IS_ENABLED(CONFIG_UNICODE) && IS_CASEFOLDED(dir)) return NULL; @@ -605,15 +602,16 @@ struct dentry *find_next_child(struct dentry *parent, struct dentry *prev) } EXPORT_SYMBOL(find_next_child); -void simple_recursive_removal(struct dentry *dentry, - void (*callback)(struct dentry *)) +static void __simple_recursive_removal(struct dentry *dentry, + void (*callback)(struct dentry *), + bool locked) { struct dentry *this = dget(dentry); while (true) { struct dentry *victim = NULL, *child; struct inode *inode = this->d_inode; - inode_lock(inode); + inode_lock_nested(inode, I_MUTEX_CHILD); if (d_is_dir(this)) inode->i_flags |= S_DEAD; while ((child = find_next_child(this, victim)) == NULL) { @@ -625,15 +623,13 @@ void simple_recursive_removal(struct dentry *dentry, victim = this; this = this->d_parent; inode = this->d_inode; - inode_lock(inode); + if (!locked || victim != dentry) + inode_lock_nested(inode, I_MUTEX_CHILD); if (simple_positive(victim)) { d_invalidate(victim); // avoid lost mounts - if (d_is_dir(victim)) - fsnotify_rmdir(inode, victim); - else - fsnotify_unlink(inode, victim); if (callback) callback(victim); + fsnotify_delete(inode, d_inode(victim), victim); dput(victim); // unpin it } if (victim == dentry) { @@ -641,7 +637,8 @@ void simple_recursive_removal(struct dentry *dentry, inode_set_ctime_current(inode)); if (d_is_dir(dentry)) drop_nlink(inode); - inode_unlock(inode); + if (!locked) + inode_unlock(inode); dput(dentry); return; } @@ -650,8 +647,22 @@ void simple_recursive_removal(struct dentry *dentry, this = child; } } + +void simple_recursive_removal(struct dentry *dentry, + void (*callback)(struct dentry *)) +{ + return __simple_recursive_removal(dentry, callback, false); +} EXPORT_SYMBOL(simple_recursive_removal); +/* caller holds parent directory with I_MUTEX_PARENT */ +void locked_recursive_removal(struct dentry *dentry, + void (*callback)(struct dentry *)) +{ + return __simple_recursive_removal(dentry, callback, true); +} +EXPORT_SYMBOL(locked_recursive_removal); + static const struct super_operations simple_super_operations = { .statfs = simple_statfs, }; @@ -684,7 +695,7 @@ static int pseudo_fs_fill_super(struct super_block *s, struct fs_context *fc) s->s_root = d_make_root(root); if (!s->s_root) return -ENOMEM; - s->s_d_op = ctx->dops; + set_default_d_op(s, ctx->dops); return 0; } @@ -1948,22 +1959,22 @@ static const struct dentry_operations generic_encrypted_dentry_ops = { * @sb: superblock to be configured * * Filesystems supporting casefolding and/or fscrypt can call this - * helper at mount-time to configure sb->s_d_op to best set of dentry - * operations required for the enabled features. The helper must be - * called after these have been configured, but before the root dentry - * is created. + * helper at mount-time to configure default dentry_operations to the + * best set of dentry operations required for the enabled features. + * The helper must be called after these have been configured, but + * before the root dentry is created. */ void generic_set_sb_d_ops(struct super_block *sb) { #if IS_ENABLED(CONFIG_UNICODE) if (sb->s_encoding) { - sb->s_d_op = &generic_ci_dentry_ops; + set_default_d_op(sb, &generic_ci_dentry_ops); return; } #endif #ifdef CONFIG_FS_ENCRYPTION if (sb->s_cop) { - sb->s_d_op = &generic_encrypted_dentry_ops; + set_default_d_op(sb, &generic_encrypted_dentry_ops); return; } #endif @@ -2261,3 +2272,28 @@ void stashed_dentry_prune(struct dentry *dentry) */ cmpxchg(stashed, dentry, NULL); } + +/* parent must be held exclusive */ +struct dentry *simple_start_creating(struct dentry *parent, const char *name) +{ + struct dentry *dentry; + struct inode *dir = d_inode(parent); + + inode_lock(dir); + if (unlikely(IS_DEADDIR(dir))) { + inode_unlock(dir); + return ERR_PTR(-ENOENT); + } + dentry = lookup_noperm(&QSTR(name), parent); + if (IS_ERR(dentry)) { + inode_unlock(dir); + return dentry; + } + if (dentry->d_inode) { + dput(dentry); + inode_unlock(dir); + return ERR_PTR(-EEXIST); + } + return dentry; +} +EXPORT_SYMBOL(simple_start_creating); diff --git a/fs/netfs/read_pgpriv2.c b/fs/netfs/read_pgpriv2.c index 5bbe906a551d..8097bc069c1d 100644 --- a/fs/netfs/read_pgpriv2.c +++ b/fs/netfs/read_pgpriv2.c @@ -110,6 +110,8 @@ static struct netfs_io_request *netfs_pgpriv2_begin_copy_to_cache( if (!creq->io_streams[1].avail) goto cancel_put; + __set_bit(NETFS_RREQ_OFFLOAD_COLLECTION, &creq->flags); + trace_netfs_copy2cache(rreq, creq); trace_netfs_write(creq, netfs_write_trace_copy_to_cache); netfs_stat(&netfs_n_wh_copy_to_cache); rreq->copy_to_cache = creq; @@ -154,6 +156,9 @@ void netfs_pgpriv2_end_copy_to_cache(struct netfs_io_request *rreq) netfs_issue_write(creq, &creq->io_streams[1]); smp_wmb(); /* Write lists before ALL_QUEUED. */ set_bit(NETFS_RREQ_ALL_QUEUED, &creq->flags); + trace_netfs_rreq(rreq, netfs_rreq_trace_end_copy_to_cache); + if (list_empty_careful(&creq->io_streams[1].subrequests)) + netfs_wake_collector(creq); netfs_put_request(creq, netfs_rreq_trace_put_return); creq->copy_to_cache = NULL; diff --git a/fs/nfs/blocklayout/rpc_pipefs.c b/fs/nfs/blocklayout/rpc_pipefs.c index d8d50a88de04..d526f5ba7887 100644 --- a/fs/nfs/blocklayout/rpc_pipefs.c +++ b/fs/nfs/blocklayout/rpc_pipefs.c @@ -141,24 +141,18 @@ static const struct rpc_pipe_ops bl_upcall_ops = { .destroy_msg = bl_pipe_destroy_msg, }; -static struct dentry *nfs4blocklayout_register_sb(struct super_block *sb, +static int nfs4blocklayout_register_sb(struct super_block *sb, struct rpc_pipe *pipe) { - struct dentry *dir, *dentry; + struct dentry *dir; + int err; dir = rpc_d_lookup_sb(sb, NFS_PIPE_DIRNAME); if (dir == NULL) - return ERR_PTR(-ENOENT); - dentry = rpc_mkpipe_dentry(dir, "blocklayout", NULL, pipe); + return -ENOENT; + err = rpc_mkpipe_dentry(dir, "blocklayout", NULL, pipe); dput(dir); - return dentry; -} - -static void nfs4blocklayout_unregister_sb(struct super_block *sb, - struct rpc_pipe *pipe) -{ - if (pipe->dentry) - rpc_unlink(pipe->dentry); + return err; } static int rpc_pipefs_event(struct notifier_block *nb, unsigned long event, @@ -167,7 +161,6 @@ static int rpc_pipefs_event(struct notifier_block *nb, unsigned long event, struct super_block *sb = ptr; struct net *net = sb->s_fs_info; struct nfs_net *nn = net_generic(net, nfs_net_id); - struct dentry *dentry; int ret = 0; if (!try_module_get(THIS_MODULE)) @@ -180,16 +173,10 @@ static int rpc_pipefs_event(struct notifier_block *nb, unsigned long event, switch (event) { case RPC_PIPEFS_MOUNT: - dentry = nfs4blocklayout_register_sb(sb, nn->bl_device_pipe); - if (IS_ERR(dentry)) { - ret = PTR_ERR(dentry); - break; - } - nn->bl_device_pipe->dentry = dentry; + ret = nfs4blocklayout_register_sb(sb, nn->bl_device_pipe); break; case RPC_PIPEFS_UMOUNT: - if (nn->bl_device_pipe->dentry) - nfs4blocklayout_unregister_sb(sb, nn->bl_device_pipe); + rpc_unlink(nn->bl_device_pipe); break; default: ret = -ENOTSUPP; @@ -203,18 +190,17 @@ static struct notifier_block nfs4blocklayout_block = { .notifier_call = rpc_pipefs_event, }; -static struct dentry *nfs4blocklayout_register_net(struct net *net, - struct rpc_pipe *pipe) +static int nfs4blocklayout_register_net(struct net *net, struct rpc_pipe *pipe) { struct super_block *pipefs_sb; - struct dentry *dentry; + int ret; pipefs_sb = rpc_get_sb_net(net); if (!pipefs_sb) - return NULL; - dentry = nfs4blocklayout_register_sb(pipefs_sb, pipe); + return 0; + ret = nfs4blocklayout_register_sb(pipefs_sb, pipe); rpc_put_sb_net(net); - return dentry; + return ret; } static void nfs4blocklayout_unregister_net(struct net *net, @@ -224,7 +210,7 @@ static void nfs4blocklayout_unregister_net(struct net *net, pipefs_sb = rpc_get_sb_net(net); if (pipefs_sb) { - nfs4blocklayout_unregister_sb(pipefs_sb, pipe); + rpc_unlink(pipe); rpc_put_sb_net(net); } } @@ -232,20 +218,17 @@ static void nfs4blocklayout_unregister_net(struct net *net, static int nfs4blocklayout_net_init(struct net *net) { struct nfs_net *nn = net_generic(net, nfs_net_id); - struct dentry *dentry; + int err; mutex_init(&nn->bl_mutex); init_waitqueue_head(&nn->bl_wq); nn->bl_device_pipe = rpc_mkpipe_data(&bl_upcall_ops, 0); if (IS_ERR(nn->bl_device_pipe)) return PTR_ERR(nn->bl_device_pipe); - dentry = nfs4blocklayout_register_net(net, nn->bl_device_pipe); - if (IS_ERR(dentry)) { + err = nfs4blocklayout_register_net(net, nn->bl_device_pipe); + if (unlikely(err)) rpc_destroy_pipe_data(nn->bl_device_pipe); - return PTR_ERR(dentry); - } - nn->bl_device_pipe->dentry = dentry; - return 0; + return err; } static void nfs4blocklayout_net_exit(struct net *net) diff --git a/fs/nfs/nfs4idmap.c b/fs/nfs/nfs4idmap.c index 25a7c771cfd8..00932500fce4 100644 --- a/fs/nfs/nfs4idmap.c +++ b/fs/nfs/nfs4idmap.c @@ -424,26 +424,16 @@ static void nfs_idmap_pipe_destroy(struct dentry *dir, struct rpc_pipe_dir_object *pdo) { struct idmap *idmap = pdo->pdo_data; - struct rpc_pipe *pipe = idmap->idmap_pipe; - if (pipe->dentry) { - rpc_unlink(pipe->dentry); - pipe->dentry = NULL; - } + rpc_unlink(idmap->idmap_pipe); } static int nfs_idmap_pipe_create(struct dentry *dir, struct rpc_pipe_dir_object *pdo) { struct idmap *idmap = pdo->pdo_data; - struct rpc_pipe *pipe = idmap->idmap_pipe; - struct dentry *dentry; - dentry = rpc_mkpipe_dentry(dir, "idmap", idmap, pipe); - if (IS_ERR(dentry)) - return PTR_ERR(dentry); - pipe->dentry = dentry; - return 0; + return rpc_mkpipe_dentry(dir, "idmap", idmap, idmap->idmap_pipe); } static const struct rpc_pipe_dir_object_ops nfs_idmap_pipe_dir_object_ops = { diff --git a/fs/nfs/super.c b/fs/nfs/super.c index 91b5503b6f74..72dee6f3050e 100644 --- a/fs/nfs/super.c +++ b/fs/nfs/super.c @@ -1183,7 +1183,7 @@ static int nfs_set_super(struct super_block *s, struct fs_context *fc) struct nfs_server *server = fc->s_fs_info; int ret; - s->s_d_op = server->nfs_client->rpc_ops->dentry_ops; + set_default_d_op(s, server->nfs_client->rpc_ops->dentry_ops); ret = set_anon_super(s, server); if (ret == 0) server->s_dev = s->s_dev; diff --git a/fs/nfsd/blocklayout.c b/fs/nfsd/blocklayout.c index 08a20e5bcf7f..19078a043e85 100644 --- a/fs/nfsd/blocklayout.c +++ b/fs/nfsd/blocklayout.c @@ -178,11 +178,13 @@ nfsd4_block_proc_layoutcommit(struct inode *inode, { struct iomap *iomaps; int nr_iomaps; + __be32 nfserr; - nr_iomaps = nfsd4_block_decode_layoutupdate(lcp->lc_up_layout, - lcp->lc_up_len, &iomaps, i_blocksize(inode)); - if (nr_iomaps < 0) - return nfserrno(nr_iomaps); + nfserr = nfsd4_block_decode_layoutupdate(lcp->lc_up_layout, + lcp->lc_up_len, &iomaps, &nr_iomaps, + i_blocksize(inode)); + if (nfserr != nfs_ok) + return nfserr; return nfsd4_block_commit_blocks(inode, lcp, iomaps, nr_iomaps); } @@ -316,11 +318,13 @@ nfsd4_scsi_proc_layoutcommit(struct inode *inode, { struct iomap *iomaps; int nr_iomaps; + __be32 nfserr; - nr_iomaps = nfsd4_scsi_decode_layoutupdate(lcp->lc_up_layout, - lcp->lc_up_len, &iomaps, i_blocksize(inode)); - if (nr_iomaps < 0) - return nfserrno(nr_iomaps); + nfserr = nfsd4_scsi_decode_layoutupdate(lcp->lc_up_layout, + lcp->lc_up_len, &iomaps, &nr_iomaps, + i_blocksize(inode)); + if (nfserr != nfs_ok) + return nfserr; return nfsd4_block_commit_blocks(inode, lcp, iomaps, nr_iomaps); } diff --git a/fs/nfsd/blocklayoutxdr.c b/fs/nfsd/blocklayoutxdr.c index ce78f74715ee..bcf21fde9120 100644 --- a/fs/nfsd/blocklayoutxdr.c +++ b/fs/nfsd/blocklayoutxdr.c @@ -112,35 +112,46 @@ nfsd4_block_encode_getdeviceinfo(struct xdr_stream *xdr, return 0; } -int +/** + * nfsd4_block_decode_layoutupdate - decode the block layout extent array + * @p: pointer to the xdr data + * @len: number of bytes to decode + * @iomapp: pointer to store the decoded extent array + * @nr_iomapsp: pointer to store the number of extents + * @block_size: alignment of extent offset and length + * + * This function decodes the opaque field of the layoutupdate4 structure + * in a layoutcommit request for the block layout driver. The field is + * actually an array of extents sent by the client. It also checks that + * the file offset, storage offset and length of each extent are aligned + * by @block_size. + * + * Return values: + * %nfs_ok: Successful decoding, @iomapp and @nr_iomapsp are valid + * %nfserr_bad_xdr: The encoded array in @p is invalid + * %nfserr_inval: An unaligned extent found + * %nfserr_delay: Failed to allocate memory for @iomapp + */ +__be32 nfsd4_block_decode_layoutupdate(__be32 *p, u32 len, struct iomap **iomapp, - u32 block_size) + int *nr_iomapsp, u32 block_size) { struct iomap *iomaps; u32 nr_iomaps, i; - if (len < sizeof(u32)) { - dprintk("%s: extent array too small: %u\n", __func__, len); - return -EINVAL; - } + if (len < sizeof(u32)) + return nfserr_bad_xdr; len -= sizeof(u32); - if (len % PNFS_BLOCK_EXTENT_SIZE) { - dprintk("%s: extent array invalid: %u\n", __func__, len); - return -EINVAL; - } + if (len % PNFS_BLOCK_EXTENT_SIZE) + return nfserr_bad_xdr; nr_iomaps = be32_to_cpup(p++); - if (nr_iomaps != len / PNFS_BLOCK_EXTENT_SIZE) { - dprintk("%s: extent array size mismatch: %u/%u\n", - __func__, len, nr_iomaps); - return -EINVAL; - } + if (nr_iomaps != len / PNFS_BLOCK_EXTENT_SIZE) + return nfserr_bad_xdr; iomaps = kcalloc(nr_iomaps, sizeof(*iomaps), GFP_KERNEL); - if (!iomaps) { - dprintk("%s: failed to allocate extent array\n", __func__); - return -ENOMEM; - } + if (!iomaps) + return nfserr_delay; for (i = 0; i < nr_iomaps; i++) { struct pnfs_block_extent bex; @@ -150,26 +161,18 @@ nfsd4_block_decode_layoutupdate(__be32 *p, u32 len, struct iomap **iomapp, p = xdr_decode_hyper(p, &bex.foff); if (bex.foff & (block_size - 1)) { - dprintk("%s: unaligned offset 0x%llx\n", - __func__, bex.foff); goto fail; } p = xdr_decode_hyper(p, &bex.len); if (bex.len & (block_size - 1)) { - dprintk("%s: unaligned length 0x%llx\n", - __func__, bex.foff); goto fail; } p = xdr_decode_hyper(p, &bex.soff); if (bex.soff & (block_size - 1)) { - dprintk("%s: unaligned disk offset 0x%llx\n", - __func__, bex.soff); goto fail; } bex.es = be32_to_cpup(p++); if (bex.es != PNFS_BLOCK_READWRITE_DATA) { - dprintk("%s: incorrect extent state %d\n", - __func__, bex.es); goto fail; } @@ -178,59 +181,71 @@ nfsd4_block_decode_layoutupdate(__be32 *p, u32 len, struct iomap **iomapp, } *iomapp = iomaps; - return nr_iomaps; + *nr_iomapsp = nr_iomaps; + return nfs_ok; fail: kfree(iomaps); - return -EINVAL; + return nfserr_inval; } -int +/** + * nfsd4_scsi_decode_layoutupdate - decode the scsi layout extent array + * @p: pointer to the xdr data + * @len: number of bytes to decode + * @iomapp: pointer to store the decoded extent array + * @nr_iomapsp: pointer to store the number of extents + * @block_size: alignment of extent offset and length + * + * This function decodes the opaque field of the layoutupdate4 structure + * in a layoutcommit request for the scsi layout driver. The field is + * actually an array of extents sent by the client. It also checks that + * the offset and length of each extent are aligned by @block_size. + * + * Return values: + * %nfs_ok: Successful decoding, @iomapp and @nr_iomapsp are valid + * %nfserr_bad_xdr: The encoded array in @p is invalid + * %nfserr_inval: An unaligned extent found + * %nfserr_delay: Failed to allocate memory for @iomapp + */ +__be32 nfsd4_scsi_decode_layoutupdate(__be32 *p, u32 len, struct iomap **iomapp, - u32 block_size) + int *nr_iomapsp, u32 block_size) { struct iomap *iomaps; u32 nr_iomaps, expected, i; - if (len < sizeof(u32)) { - dprintk("%s: extent array too small: %u\n", __func__, len); - return -EINVAL; - } + if (len < sizeof(u32)) + return nfserr_bad_xdr; nr_iomaps = be32_to_cpup(p++); expected = sizeof(__be32) + nr_iomaps * PNFS_SCSI_RANGE_SIZE; - if (len != expected) { - dprintk("%s: extent array size mismatch: %u/%u\n", - __func__, len, expected); - return -EINVAL; - } + if (len != expected) + return nfserr_bad_xdr; iomaps = kcalloc(nr_iomaps, sizeof(*iomaps), GFP_KERNEL); - if (!iomaps) { - dprintk("%s: failed to allocate extent array\n", __func__); - return -ENOMEM; - } + if (!iomaps) + return nfserr_delay; for (i = 0; i < nr_iomaps; i++) { u64 val; p = xdr_decode_hyper(p, &val); if (val & (block_size - 1)) { - dprintk("%s: unaligned offset 0x%llx\n", __func__, val); goto fail; } iomaps[i].offset = val; p = xdr_decode_hyper(p, &val); if (val & (block_size - 1)) { - dprintk("%s: unaligned length 0x%llx\n", __func__, val); goto fail; } iomaps[i].length = val; } *iomapp = iomaps; - return nr_iomaps; + *nr_iomapsp = nr_iomaps; + return nfs_ok; fail: kfree(iomaps); - return -EINVAL; + return nfserr_inval; } diff --git a/fs/nfsd/blocklayoutxdr.h b/fs/nfsd/blocklayoutxdr.h index 4e28ac8f1127..15b3569f3d9a 100644 --- a/fs/nfsd/blocklayoutxdr.h +++ b/fs/nfsd/blocklayoutxdr.h @@ -54,9 +54,9 @@ __be32 nfsd4_block_encode_getdeviceinfo(struct xdr_stream *xdr, const struct nfsd4_getdeviceinfo *gdp); __be32 nfsd4_block_encode_layoutget(struct xdr_stream *xdr, const struct nfsd4_layoutget *lgp); -int nfsd4_block_decode_layoutupdate(__be32 *p, u32 len, struct iomap **iomapp, - u32 block_size); -int nfsd4_scsi_decode_layoutupdate(__be32 *p, u32 len, struct iomap **iomapp, - u32 block_size); +__be32 nfsd4_block_decode_layoutupdate(__be32 *p, u32 len, + struct iomap **iomapp, int *nr_iomapsp, u32 block_size); +__be32 nfsd4_scsi_decode_layoutupdate(__be32 *p, u32 len, + struct iomap **iomapp, int *nr_iomapsp, u32 block_size); #endif /* _NFSD_BLOCKLAYOUTXDR_H */ diff --git a/fs/nfsd/export.c b/fs/nfsd/export.c index 88ae410b4113..cadfc2bae60e 100644 --- a/fs/nfsd/export.c +++ b/fs/nfsd/export.c @@ -82,8 +82,7 @@ static int expkey_parse(struct cache_detail *cd, char *mesg, int mlen) int len; struct auth_domain *dom = NULL; int err; - int fsidtype; - char *ep; + u8 fsidtype; struct svc_expkey key; struct svc_expkey *ek = NULL; @@ -109,10 +108,9 @@ static int expkey_parse(struct cache_detail *cd, char *mesg, int mlen) err = -EINVAL; if (qword_get(&mesg, buf, PAGE_SIZE) <= 0) goto out; - fsidtype = simple_strtoul(buf, &ep, 10); - if (*ep) + if (kstrtou8(buf, 10, &fsidtype)) goto out; - dprintk("found fsidtype %d\n", fsidtype); + dprintk("found fsidtype %u\n", fsidtype); if (key_len(fsidtype)==0) /* invalid type */ goto out; if ((len=qword_get(&mesg, buf, PAGE_SIZE)) <= 0) diff --git a/fs/nfsd/export.h b/fs/nfsd/export.h index 4d92b99c1ffd..b9c0adb3ce09 100644 --- a/fs/nfsd/export.h +++ b/fs/nfsd/export.h @@ -88,7 +88,7 @@ struct svc_expkey { struct cache_head h; struct auth_domain * ek_client; - int ek_fsidtype; + u8 ek_fsidtype; u32 ek_fsid[6]; struct path ek_path; diff --git a/fs/nfsd/filecache.c b/fs/nfsd/filecache.c index e108b6c705b4..732abf6b92a5 100644 --- a/fs/nfsd/filecache.c +++ b/fs/nfsd/filecache.c @@ -372,7 +372,7 @@ nfsd_file_put(struct nfsd_file *nf) /** * nfsd_file_put_local - put nfsd_file reference and arm nfsd_net_put in caller - * @nf: nfsd_file of which to put the reference + * @pnf: nfsd_file of which to put the reference * * First save the associated net to return to caller, then put * the reference of the nfsd_file. diff --git a/fs/nfsd/localio.c b/fs/nfsd/localio.c index 80d9ff6608a7..4f6468eb2adf 100644 --- a/fs/nfsd/localio.c +++ b/fs/nfsd/localio.c @@ -32,7 +32,7 @@ * @rpc_clnt: rpc_clnt that the client established * @cred: cred that the client established * @nfs_fh: filehandle to lookup - * @nfp: place to find the nfsd_file, or store it if it was non-NULL + * @pnf: place to find the nfsd_file, or store it if it was non-NULL * @fmode: fmode_t to use for open * * This function maps a local fh to a path on a local filesystem. diff --git a/fs/nfsd/nfs3proc.c b/fs/nfsd/nfs3proc.c index a817d8485d21..b6d03e1ef5f7 100644 --- a/fs/nfsd/nfs3proc.c +++ b/fs/nfsd/nfs3proc.c @@ -561,7 +561,7 @@ static void nfsd3_init_dirlist_pages(struct svc_rqst *rqstp, buf->pages = rqstp->rq_next_page; rqstp->rq_next_page += (buf->buflen + PAGE_SIZE - 1) >> PAGE_SHIFT; - xdr_init_encode_pages(xdr, buf, buf->pages, NULL); + xdr_init_encode_pages(xdr, buf); } /* diff --git a/fs/nfsd/nfs4layouts.c b/fs/nfsd/nfs4layouts.c index 290271ac4245..aea905fcaf87 100644 --- a/fs/nfsd/nfs4layouts.c +++ b/fs/nfsd/nfs4layouts.c @@ -65,7 +65,7 @@ nfsd4_alloc_devid_map(const struct svc_fh *fhp) return; map->fsid_type = fh->fh_fsid_type; - memcpy(&map->fsid, fh->fh_fsid, fsid_len); + memcpy(&map->fsid, fh_fsid(fh), fsid_len); spin_lock(&nfsd_devid_lock); if (fhp->fh_export->ex_devid_map) @@ -75,7 +75,7 @@ nfsd4_alloc_devid_map(const struct svc_fh *fhp) list_for_each_entry(old, &nfsd_devid_hash[i], hash) { if (old->fsid_type != fh->fh_fsid_type) continue; - if (memcmp(old->fsid, fh->fh_fsid, + if (memcmp(old->fsid, fh_fsid(fh), key_len(old->fsid_type))) continue; diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c index f13abbb13b38..71b428efcbb5 100644 --- a/fs/nfsd/nfs4proc.c +++ b/fs/nfsd/nfs4proc.c @@ -1917,13 +1917,6 @@ nfsd4_copy(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, struct nfsd42_write_res *result; __be32 status; - /* - * Currently, async COPY is not reliable. Force all COPY - * requests to be synchronous to avoid client application - * hangs waiting for COPY completion. - */ - nfsd4_copy_set_sync(copy, true); - result = ©->cp_res; nfsd_copy_write_verifier((__be32 *)&result->wr_verifier.data, nn); @@ -2842,20 +2835,10 @@ nfsd4_proc_compound(struct svc_rqst *rqstp) rqstp->rq_lease_breaker = (void **)&cstate->clp; - trace_nfsd_compound(rqstp, args->tag, args->taglen, args->client_opcnt); + trace_nfsd_compound(rqstp, args->tag, args->taglen, args->opcnt); while (!status && resp->opcnt < args->opcnt) { op = &args->ops[resp->opcnt++]; - if (unlikely(resp->opcnt == NFSD_MAX_OPS_PER_COMPOUND)) { - /* If there are still more operations to process, - * stop here and report NFS4ERR_RESOURCE. */ - if (cstate->minorversion == 0 && - args->client_opcnt > resp->opcnt) { - op->status = nfserr_resource; - goto encode_op; - } - } - /* * The XDR decode routines may have pre-set op->status; * for example, if there is a miscellaneous XDR error @@ -2932,7 +2915,7 @@ encode_op: status = op->status; } - trace_nfsd_compound_status(args->client_opcnt, resp->opcnt, + trace_nfsd_compound_status(args->opcnt, resp->opcnt, status, nfsd4_op_name(op->opnum)); nfsd4_cstate_clear_replay(cstate); diff --git a/fs/nfsd/nfs4recover.c b/fs/nfsd/nfs4recover.c index 82785db730d9..2231192ec33f 100644 --- a/fs/nfsd/nfs4recover.c +++ b/fs/nfsd/nfs4recover.c @@ -950,38 +950,32 @@ static const struct rpc_pipe_ops cld_upcall_ops = { .destroy_msg = cld_pipe_destroy_msg, }; -static struct dentry * +static int nfsd4_cld_register_sb(struct super_block *sb, struct rpc_pipe *pipe) { - struct dentry *dir, *dentry; + struct dentry *dir; + int err; dir = rpc_d_lookup_sb(sb, NFSD_PIPE_DIR); if (dir == NULL) - return ERR_PTR(-ENOENT); - dentry = rpc_mkpipe_dentry(dir, NFSD_CLD_PIPE, NULL, pipe); + return -ENOENT; + err = rpc_mkpipe_dentry(dir, NFSD_CLD_PIPE, NULL, pipe); dput(dir); - return dentry; + return err; } -static void -nfsd4_cld_unregister_sb(struct rpc_pipe *pipe) -{ - if (pipe->dentry) - rpc_unlink(pipe->dentry); -} - -static struct dentry * +static int nfsd4_cld_register_net(struct net *net, struct rpc_pipe *pipe) { struct super_block *sb; - struct dentry *dentry; + int err; sb = rpc_get_sb_net(net); if (!sb) - return NULL; - dentry = nfsd4_cld_register_sb(sb, pipe); + return 0; + err = nfsd4_cld_register_sb(sb, pipe); rpc_put_sb_net(net); - return dentry; + return err; } static void @@ -991,7 +985,7 @@ nfsd4_cld_unregister_net(struct net *net, struct rpc_pipe *pipe) sb = rpc_get_sb_net(net); if (sb) { - nfsd4_cld_unregister_sb(pipe); + rpc_unlink(pipe); rpc_put_sb_net(net); } } @@ -1001,7 +995,6 @@ static int __nfsd4_init_cld_pipe(struct net *net) { int ret; - struct dentry *dentry; struct nfsd_net *nn = net_generic(net, nfsd_net_id); struct cld_net *cn; @@ -1022,13 +1015,10 @@ __nfsd4_init_cld_pipe(struct net *net) spin_lock_init(&cn->cn_lock); INIT_LIST_HEAD(&cn->cn_list); - dentry = nfsd4_cld_register_net(net, cn->cn_pipe); - if (IS_ERR(dentry)) { - ret = PTR_ERR(dentry); + ret = nfsd4_cld_register_net(net, cn->cn_pipe); + if (unlikely(ret)) goto err_destroy_data; - } - cn->cn_pipe->dentry = dentry; #ifdef CONFIG_NFSD_LEGACY_CLIENT_TRACKING cn->cn_has_legacy = false; #endif @@ -2121,7 +2111,6 @@ rpc_pipefs_event(struct notifier_block *nb, unsigned long event, void *ptr) struct net *net = sb->s_fs_info; struct nfsd_net *nn = net_generic(net, nfsd_net_id); struct cld_net *cn = nn->cld_net; - struct dentry *dentry; int ret = 0; if (!try_module_get(THIS_MODULE)) @@ -2134,16 +2123,10 @@ rpc_pipefs_event(struct notifier_block *nb, unsigned long event, void *ptr) switch (event) { case RPC_PIPEFS_MOUNT: - dentry = nfsd4_cld_register_sb(sb, cn->cn_pipe); - if (IS_ERR(dentry)) { - ret = PTR_ERR(dentry); - break; - } - cn->cn_pipe->dentry = dentry; + ret = nfsd4_cld_register_sb(sb, cn->cn_pipe); break; case RPC_PIPEFS_UMOUNT: - if (cn->cn_pipe->dentry) - nfsd4_cld_unregister_sb(cn->cn_pipe); + rpc_unlink(cn->cn_pipe); break; default: ret = -ENOTSUPP; diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index d5694987f86f..88c347957da5 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -633,18 +633,6 @@ find_readable_file(struct nfs4_file *f) return ret; } -static struct nfsd_file * -find_rw_file(struct nfs4_file *f) -{ - struct nfsd_file *ret; - - spin_lock(&f->fi_lock); - ret = nfsd_file_get(f->fi_fds[O_RDWR]); - spin_unlock(&f->fi_lock); - - return ret; -} - struct nfsd_file * find_any_file(struct nfs4_file *f) { @@ -1218,15 +1206,20 @@ nfs4_inc_and_copy_stateid(stateid_t *dst, struct nfs4_stid *stid) static void put_deleg_file(struct nfs4_file *fp) { + struct nfsd_file *rnf = NULL; struct nfsd_file *nf = NULL; spin_lock(&fp->fi_lock); - if (--fp->fi_delegees == 0) + if (--fp->fi_delegees == 0) { swap(nf, fp->fi_deleg_file); + swap(rnf, fp->fi_rdeleg_file); + } spin_unlock(&fp->fi_lock); if (nf) nfsd_file_put(nf); + if (rnf) + nfs4_file_put_access(fp, NFS4_SHARE_ACCESS_READ); } static void nfs4_unlock_deleg_lease(struct nfs4_delegation *dp) @@ -3872,7 +3865,6 @@ static __be32 check_forechannel_attrs(struct nfsd4_channel_attrs *ca, struct nfs ca->headerpadsz = 0; ca->maxreq_sz = min_t(u32, ca->maxreq_sz, maxrpc); ca->maxresp_sz = min_t(u32, ca->maxresp_sz, maxrpc); - ca->maxops = min_t(u32, ca->maxops, NFSD_MAX_OPS_PER_COMPOUND); ca->maxresp_cached = min_t(u32, ca->maxresp_cached, NFSD_SLOT_CACHE_SIZE + NFSD_MIN_HDR_SEQ_SZ); ca->maxreqs = min_t(u32, ca->maxreqs, NFSD_MAX_SLOTS_PER_SESSION); @@ -4697,10 +4689,16 @@ nfsd4_setclientid_confirm(struct svc_rqst *rqstp, } status = nfs_ok; if (conf) { - old = unconf; - unhash_client_locked(old); - nfsd4_change_callback(conf, &unconf->cl_cb_conn); - } else { + if (get_client_locked(conf) == nfs_ok) { + old = unconf; + unhash_client_locked(old); + nfsd4_change_callback(conf, &unconf->cl_cb_conn); + } else { + conf = NULL; + } + } + + if (!conf) { old = find_confirmed_client_by_name(&unconf->cl_name, nn); if (old) { status = nfserr_clid_inuse; @@ -4717,10 +4715,14 @@ nfsd4_setclientid_confirm(struct svc_rqst *rqstp, } trace_nfsd_clid_replaced(&old->cl_clientid); } + status = get_client_locked(unconf); + if (status != nfs_ok) { + old = NULL; + goto out; + } move_to_confirmed(unconf); conf = unconf; } - get_client_locked(conf); spin_unlock(&nn->client_lock); if (conf == unconf) fsnotify_dentry(conf->cl_nfsd_info_dentry, FS_MODIFY); @@ -4750,6 +4752,7 @@ static void nfsd4_file_init(const struct svc_fh *fh, struct nfs4_file *fp) INIT_LIST_HEAD(&fp->fi_clnt_odstate); fh_copy_shallow(&fp->fi_fhandle, &fh->fh_handle); fp->fi_deleg_file = NULL; + fp->fi_rdeleg_file = NULL; fp->fi_had_conflict = false; fp->fi_share_deny = 0; memset(fp->fi_fds, 0, sizeof(fp->fi_fds)); @@ -6000,14 +6003,19 @@ nfs4_set_delegation(struct nfsd4_open *open, struct nfs4_ol_stateid *stp, * "An OPEN_DELEGATE_WRITE delegation allows the client to handle, * on its own, all opens." * - * Furthermore the client can use a write delegation for most READ - * operations as well, so we require a O_RDWR file here. + * Furthermore, section 9.1.2 says: + * + * "In the case of READ, the server may perform the corresponding + * check on the access mode, or it may choose to allow READ for + * OPEN4_SHARE_ACCESS_WRITE, to accommodate clients whose WRITE + * implementation may unavoidably do reads (e.g., due to buffer + * cache constraints)." * - * Offer a write delegation in the case of a BOTH open, and ensure - * we get the O_RDWR descriptor. + * We choose to offer a write delegation for OPEN with the + * OPEN4_SHARE_ACCESS_WRITE access mode to accommodate such clients. */ - if ((open->op_share_access & NFS4_SHARE_ACCESS_BOTH) == NFS4_SHARE_ACCESS_BOTH) { - nf = find_rw_file(fp); + if (open->op_share_access & NFS4_SHARE_ACCESS_WRITE) { + nf = find_writeable_file(fp); dl_type = deleg_ts ? OPEN_DELEGATE_WRITE_ATTRS_DELEG : OPEN_DELEGATE_WRITE; } @@ -6138,7 +6146,7 @@ static bool nfs4_delegation_stat(struct nfs4_delegation *dp, struct svc_fh *currentfh, struct kstat *stat) { - struct nfsd_file *nf = find_rw_file(dp->dl_stid.sc_file); + struct nfsd_file *nf = find_writeable_file(dp->dl_stid.sc_file); struct path path; int rc; @@ -6157,6 +6165,34 @@ nfs4_delegation_stat(struct nfs4_delegation *dp, struct svc_fh *currentfh, } /* + * Add NFS4_SHARE_ACCESS_READ to the write delegation granted on OPEN + * with NFS4_SHARE_ACCESS_WRITE by allocating separate nfsd_file and + * struct file to be used for read with delegation stateid. + * + */ +static bool +nfsd4_add_rdaccess_to_wrdeleg(struct svc_rqst *rqstp, struct nfsd4_open *open, + struct svc_fh *fh, struct nfs4_ol_stateid *stp) +{ + struct nfs4_file *fp; + struct nfsd_file *nf = NULL; + + if ((open->op_share_access & NFS4_SHARE_ACCESS_BOTH) == + NFS4_SHARE_ACCESS_WRITE) { + if (nfsd_file_acquire_opened(rqstp, fh, NFSD_MAY_READ, NULL, &nf)) + return (false); + fp = stp->st_stid.sc_file; + spin_lock(&fp->fi_lock); + __nfs4_file_get_access(fp, NFS4_SHARE_ACCESS_READ); + fp = stp->st_stid.sc_file; + fp->fi_fds[O_RDONLY] = nf; + fp->fi_rdeleg_file = nf; + spin_unlock(&fp->fi_lock); + } + return true; +} + +/* * The Linux NFS server does not offer write delegations to NFSv4.0 * clients in order to avoid conflicts between write delegations and * GETATTRs requesting CHANGE or SIZE attributes. @@ -6181,8 +6217,9 @@ nfs4_delegation_stat(struct nfs4_delegation *dp, struct svc_fh *currentfh, * open or lock state. */ static void -nfs4_open_delegation(struct nfsd4_open *open, struct nfs4_ol_stateid *stp, - struct svc_fh *currentfh) +nfs4_open_delegation(struct svc_rqst *rqstp, struct nfsd4_open *open, + struct nfs4_ol_stateid *stp, struct svc_fh *currentfh, + struct svc_fh *fh) { struct nfs4_openowner *oo = openowner(stp->st_stateowner); bool deleg_ts = nfsd4_want_deleg_timestamps(open); @@ -6227,7 +6264,8 @@ nfs4_open_delegation(struct nfsd4_open *open, struct nfs4_ol_stateid *stp, memcpy(&open->op_delegate_stateid, &dp->dl_stid.sc_stateid, sizeof(dp->dl_stid.sc_stateid)); if (open->op_share_access & NFS4_SHARE_ACCESS_WRITE) { - if (!nfs4_delegation_stat(dp, currentfh, &stat)) { + if (!nfsd4_add_rdaccess_to_wrdeleg(rqstp, open, fh, stp) || + !nfs4_delegation_stat(dp, currentfh, &stat)) { nfs4_put_stid(&dp->dl_stid); destroy_delegation(dp); goto out_no_deleg; @@ -6322,6 +6360,20 @@ nfsd4_process_open2(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nf status = nfs4_check_deleg(cl, open, &dp); if (status) goto out; + if (dp && nfsd4_is_deleg_cur(open) && + (dp->dl_stid.sc_file != fp)) { + /* + * RFC8881 section 8.2.4 mandates the server to return + * NFS4ERR_BAD_STATEID if the selected table entry does + * not match the current filehandle. However returning + * NFS4ERR_BAD_STATEID in the OPEN can cause the client + * to repeatedly retry the operation with the same + * stateid, since the stateid itself is valid. To avoid + * this situation NFSD returns NFS4ERR_INVAL instead. + */ + status = nfserr_inval; + goto out; + } stp = nfsd4_find_and_lock_existing_open(fp, open); } else { open->op_file = NULL; @@ -6383,7 +6435,8 @@ nfsd4_process_open2(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nf * Attempt to hand out a delegation. No error return, because the * OPEN succeeds even if we fail. */ - nfs4_open_delegation(open, stp, &resp->cstate.current_fh); + nfs4_open_delegation(rqstp, open, stp, + &resp->cstate.current_fh, current_fh); /* * If there is an existing open stateid, it must be updated and @@ -7076,7 +7129,7 @@ nfsd4_lookup_stateid(struct nfsd4_compound_state *cstate, return_revoked = true; if (typemask & SC_TYPE_DELEG) /* Always allow REVOKED for DELEG so we can - * retturn the appropriate error. + * return the appropriate error. */ statusmask |= SC_STATUS_REVOKED; @@ -7119,10 +7172,6 @@ nfs4_find_file(struct nfs4_stid *s, int flags) switch (s->sc_type) { case SC_TYPE_DELEG: - spin_lock(&s->sc_file->fi_lock); - ret = nfsd_file_get(s->sc_file->fi_deleg_file); - spin_unlock(&s->sc_file->fi_lock); - break; case SC_TYPE_OPEN: case SC_TYPE_LOCK: if (flags & RD_STATE) diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index 3afcdbed6e14..ea91bad4eee2 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c @@ -2500,10 +2500,8 @@ nfsd4_decode_compound(struct nfsd4_compoundargs *argp) if (xdr_stream_decode_u32(argp->xdr, &argp->minorversion) < 0) return false; - if (xdr_stream_decode_u32(argp->xdr, &argp->client_opcnt) < 0) + if (xdr_stream_decode_u32(argp->xdr, &argp->opcnt) < 0) return false; - argp->opcnt = min_t(u32, argp->client_opcnt, - NFSD_MAX_OPS_PER_COMPOUND); if (argp->opcnt > ARRAY_SIZE(argp->iops)) { argp->ops = vcalloc(argp->opcnt, sizeof(*argp->ops)); diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c index 6a42cc7a845a..2909d70de559 100644 --- a/fs/nfsd/nfsctl.c +++ b/fs/nfsd/nfsctl.c @@ -1436,7 +1436,7 @@ unsigned int nfsd_net_id; static int nfsd_genl_rpc_status_compose_msg(struct sk_buff *skb, struct netlink_callback *cb, - struct nfsd_genl_rqstp *rqstp) + struct nfsd_genl_rqstp *genl_rqstp) { void *hdr; u32 i; @@ -1446,22 +1446,22 @@ static int nfsd_genl_rpc_status_compose_msg(struct sk_buff *skb, if (!hdr) return -ENOBUFS; - if (nla_put_be32(skb, NFSD_A_RPC_STATUS_XID, rqstp->rq_xid) || - nla_put_u32(skb, NFSD_A_RPC_STATUS_FLAGS, rqstp->rq_flags) || - nla_put_u32(skb, NFSD_A_RPC_STATUS_PROG, rqstp->rq_prog) || - nla_put_u32(skb, NFSD_A_RPC_STATUS_PROC, rqstp->rq_proc) || - nla_put_u8(skb, NFSD_A_RPC_STATUS_VERSION, rqstp->rq_vers) || + if (nla_put_be32(skb, NFSD_A_RPC_STATUS_XID, genl_rqstp->rq_xid) || + nla_put_u32(skb, NFSD_A_RPC_STATUS_FLAGS, genl_rqstp->rq_flags) || + nla_put_u32(skb, NFSD_A_RPC_STATUS_PROG, genl_rqstp->rq_prog) || + nla_put_u32(skb, NFSD_A_RPC_STATUS_PROC, genl_rqstp->rq_proc) || + nla_put_u8(skb, NFSD_A_RPC_STATUS_VERSION, genl_rqstp->rq_vers) || nla_put_s64(skb, NFSD_A_RPC_STATUS_SERVICE_TIME, - ktime_to_us(rqstp->rq_stime), + ktime_to_us(genl_rqstp->rq_stime), NFSD_A_RPC_STATUS_PAD)) return -ENOBUFS; - switch (rqstp->rq_saddr.sa_family) { + switch (genl_rqstp->rq_saddr.sa_family) { case AF_INET: { const struct sockaddr_in *s_in, *d_in; - s_in = (const struct sockaddr_in *)&rqstp->rq_saddr; - d_in = (const struct sockaddr_in *)&rqstp->rq_daddr; + s_in = (const struct sockaddr_in *)&genl_rqstp->rq_saddr; + d_in = (const struct sockaddr_in *)&genl_rqstp->rq_daddr; if (nla_put_in_addr(skb, NFSD_A_RPC_STATUS_SADDR4, s_in->sin_addr.s_addr) || nla_put_in_addr(skb, NFSD_A_RPC_STATUS_DADDR4, @@ -1476,8 +1476,8 @@ static int nfsd_genl_rpc_status_compose_msg(struct sk_buff *skb, case AF_INET6: { const struct sockaddr_in6 *s_in, *d_in; - s_in = (const struct sockaddr_in6 *)&rqstp->rq_saddr; - d_in = (const struct sockaddr_in6 *)&rqstp->rq_daddr; + s_in = (const struct sockaddr_in6 *)&genl_rqstp->rq_saddr; + d_in = (const struct sockaddr_in6 *)&genl_rqstp->rq_daddr; if (nla_put_in6_addr(skb, NFSD_A_RPC_STATUS_SADDR6, &s_in->sin6_addr) || nla_put_in6_addr(skb, NFSD_A_RPC_STATUS_DADDR6, @@ -1491,9 +1491,9 @@ static int nfsd_genl_rpc_status_compose_msg(struct sk_buff *skb, } } - for (i = 0; i < rqstp->rq_opcnt; i++) + for (i = 0; i < genl_rqstp->rq_opcnt; i++) if (nla_put_u32(skb, NFSD_A_RPC_STATUS_COMPOUND_OPS, - rqstp->rq_opnum[i])) + genl_rqstp->rq_opnum[i])) return -ENOBUFS; genlmsg_end(skb, hdr); @@ -1569,7 +1569,8 @@ int nfsd_nl_rpc_status_get_dumpit(struct sk_buff *skb, int j; args = rqstp->rq_argp; - genl_rqstp.rq_opcnt = args->opcnt; + genl_rqstp.rq_opcnt = min_t(u32, args->opcnt, + ARRAY_SIZE(genl_rqstp.rq_opnum)); for (j = 0; j < genl_rqstp.rq_opcnt; j++) genl_rqstp.rq_opnum[j] = args->ops[j].opnum; diff --git a/fs/nfsd/nfsd.h b/fs/nfsd/nfsd.h index 1bfd0b4e9af7..1cd0bed57bc2 100644 --- a/fs/nfsd/nfsd.h +++ b/fs/nfsd/nfsd.h @@ -57,9 +57,6 @@ struct readdir_cd { __be32 err; /* 0, nfserr, or nfserr_eof */ }; -/* Maximum number of operations per session compound */ -#define NFSD_MAX_OPS_PER_COMPOUND 50 - struct nfsd_genl_rqstp { struct sockaddr rq_daddr; struct sockaddr rq_saddr; @@ -72,7 +69,7 @@ struct nfsd_genl_rqstp { /* NFSv4 compound */ u32 rq_opcnt; - u32 rq_opnum[NFSD_MAX_OPS_PER_COMPOUND]; + u32 rq_opnum[16]; }; extern struct svc_program nfsd_programs[]; @@ -283,6 +280,7 @@ void nfsd_lockd_shutdown(void); #define nfserr_cb_path_down cpu_to_be32(NFSERR_CB_PATH_DOWN) #define nfserr_locked cpu_to_be32(NFSERR_LOCKED) #define nfserr_wrongsec cpu_to_be32(NFSERR_WRONGSEC) +#define nfserr_delay cpu_to_be32(NFS4ERR_DELAY) #define nfserr_badiomode cpu_to_be32(NFS4ERR_BADIOMODE) #define nfserr_badlayout cpu_to_be32(NFS4ERR_BADLAYOUT) #define nfserr_bad_session_digest cpu_to_be32(NFS4ERR_BAD_SESSION_DIGEST) diff --git a/fs/nfsd/nfsfh.c b/fs/nfsd/nfsfh.c index aef474f1b84b..74cf1f4de174 100644 --- a/fs/nfsd/nfsfh.c +++ b/fs/nfsd/nfsfh.c @@ -172,6 +172,8 @@ static __be32 nfsd_set_fh_dentry(struct svc_rqst *rqstp, struct net *net, if (len == 0) return error; if (fh->fh_fsid_type == FSID_MAJOR_MINOR) { + u32 *fsid = fh_fsid(fh); + /* deprecated, convert to type 3 */ len = key_len(FSID_ENCODE_DEV)/4; fh->fh_fsid_type = FSID_ENCODE_DEV; @@ -181,17 +183,17 @@ static __be32 nfsd_set_fh_dentry(struct svc_rqst *rqstp, struct net *net, * confuses sparse, so we must use __force here to * keep it from complaining. */ - fh->fh_fsid[0] = new_encode_dev(MKDEV(ntohl((__force __be32)fh->fh_fsid[0]), - ntohl((__force __be32)fh->fh_fsid[1]))); - fh->fh_fsid[1] = fh->fh_fsid[2]; + fsid[0] = new_encode_dev(MKDEV(ntohl((__force __be32)fsid[0]), + ntohl((__force __be32)fsid[1]))); + fsid[1] = fsid[2]; } data_left -= len; if (data_left < 0) return error; exp = rqst_exp_find(rqstp ? &rqstp->rq_chandle : NULL, net, client, gssclient, - fh->fh_fsid_type, fh->fh_fsid); - fid = (struct fid *)(fh->fh_fsid + len); + fh->fh_fsid_type, fh_fsid(fh)); + fid = (struct fid *)(fh_fsid(fh) + len); error = nfserr_stale; if (IS_ERR(exp)) { @@ -463,7 +465,7 @@ static void _fh_update(struct svc_fh *fhp, struct svc_export *exp, { if (dentry != exp->ex_path.dentry) { struct fid *fid = (struct fid *) - (fhp->fh_handle.fh_fsid + fhp->fh_handle.fh_size/4 - 1); + (fh_fsid(&fhp->fh_handle) + fhp->fh_handle.fh_size/4 - 1); int maxsize = (fhp->fh_maxsize - fhp->fh_handle.fh_size)/4; int fh_flags = (exp->ex_flags & NFSEXP_NOSUBTREECHECK) ? 0 : EXPORT_FH_CONNECTABLE; @@ -614,7 +616,7 @@ fh_compose(struct svc_fh *fhp, struct svc_export *exp, struct dentry *dentry, fhp->fh_handle.fh_auth_type = 0; mk_fsid(fhp->fh_handle.fh_fsid_type, - fhp->fh_handle.fh_fsid, + fh_fsid(&fhp->fh_handle), ex_dev, d_inode(exp->ex_path.dentry)->i_ino, exp->ex_fsid, exp->ex_uuid); diff --git a/fs/nfsd/nfsfh.h b/fs/nfsd/nfsfh.h index 5103c2f4d225..1cf979722521 100644 --- a/fs/nfsd/nfsfh.h +++ b/fs/nfsd/nfsfh.h @@ -49,18 +49,19 @@ struct knfsd_fh { * Points to the current size while * building a new file handle. */ - union { - char fh_raw[NFS4_FHSIZE]; - struct { - u8 fh_version; /* == 1 */ - u8 fh_auth_type; /* deprecated */ - u8 fh_fsid_type; - u8 fh_fileid_type; - u32 fh_fsid[]; /* flexible-array member */ - }; - }; + u8 fh_raw[NFS4_FHSIZE]; }; +#define fh_version fh_raw[0] +#define fh_auth_type fh_raw[1] +#define fh_fsid_type fh_raw[2] +#define fh_fileid_type fh_raw[3] + +static inline u32 *fh_fsid(const struct knfsd_fh *fh) +{ + return (u32 *)&fh->fh_raw[4]; +} + static inline __u32 ino_t_to_u32(ino_t ino) { return (__u32) ino; @@ -260,9 +261,12 @@ static inline bool fh_match(const struct knfsd_fh *fh1, static inline bool fh_fsid_match(const struct knfsd_fh *fh1, const struct knfsd_fh *fh2) { + u32 *fsid1 = fh_fsid(fh1); + u32 *fsid2 = fh_fsid(fh2); + if (fh1->fh_fsid_type != fh2->fh_fsid_type) return false; - if (memcmp(fh1->fh_fsid, fh2->fh_fsid, key_len(fh1->fh_fsid_type)) != 0) + if (memcmp(fsid1, fsid2, key_len(fh1->fh_fsid_type)) != 0) return false; return true; } diff --git a/fs/nfsd/nfsproc.c b/fs/nfsd/nfsproc.c index c10fa8128a8a..8f71f5748c75 100644 --- a/fs/nfsd/nfsproc.c +++ b/fs/nfsd/nfsproc.c @@ -575,7 +575,7 @@ static void nfsd_init_dirlist_pages(struct svc_rqst *rqstp, buf->pages = rqstp->rq_next_page; rqstp->rq_next_page++; - xdr_init_encode_pages(xdr, buf, buf->pages, NULL); + xdr_init_encode_pages(xdr, buf); } /* diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h index 1995bca158b8..8adc2550129e 100644 --- a/fs/nfsd/state.h +++ b/fs/nfsd/state.h @@ -665,6 +665,7 @@ struct nfs4_file { atomic_t fi_access[2]; u32 fi_share_deny; struct nfsd_file *fi_deleg_file; + struct nfsd_file *fi_rdeleg_file; int fi_delegees; struct knfsd_fh fi_fhandle; bool fi_had_conflict; diff --git a/fs/nfsd/trace.h b/fs/nfsd/trace.h index 3c5505ef5e3a..a664fdf1161e 100644 --- a/fs/nfsd/trace.h +++ b/fs/nfsd/trace.h @@ -344,7 +344,7 @@ TRACE_EVENT(nfsd_exp_find_key, int status), TP_ARGS(key, status), TP_STRUCT__entry( - __field(int, fsidtype) + __field(u8, fsidtype) __array(u32, fsid, 6) __string(auth_domain, key->ek_client->name) __field(int, status) @@ -367,7 +367,7 @@ TRACE_EVENT(nfsd_expkey_update, TP_PROTO(const struct svc_expkey *key, const char *exp_path), TP_ARGS(key, exp_path), TP_STRUCT__entry( - __field(int, fsidtype) + __field(u8, fsidtype) __array(u32, fsid, 6) __string(auth_domain, key->ek_client->name) __string(path, exp_path) @@ -1108,7 +1108,6 @@ DEFINE_NFSD_FILE_EVENT(nfsd_file_free); DEFINE_NFSD_FILE_EVENT(nfsd_file_unhash); DEFINE_NFSD_FILE_EVENT(nfsd_file_put); DEFINE_NFSD_FILE_EVENT(nfsd_file_closing); -DEFINE_NFSD_FILE_EVENT(nfsd_file_unhash_and_queue); TRACE_EVENT(nfsd_file_alloc, TP_PROTO( @@ -1344,9 +1343,7 @@ DEFINE_EVENT(nfsd_file_gc_class, name, \ TP_ARGS(nf)) DEFINE_NFSD_FILE_GC_EVENT(nfsd_file_lru_add); -DEFINE_NFSD_FILE_GC_EVENT(nfsd_file_lru_add_disposed); DEFINE_NFSD_FILE_GC_EVENT(nfsd_file_lru_del); -DEFINE_NFSD_FILE_GC_EVENT(nfsd_file_lru_del_disposed); DEFINE_NFSD_FILE_GC_EVENT(nfsd_file_gc_in_use); DEFINE_NFSD_FILE_GC_EVENT(nfsd_file_gc_writeback); DEFINE_NFSD_FILE_GC_EVENT(nfsd_file_gc_referenced); @@ -1380,7 +1377,6 @@ DEFINE_EVENT(nfsd_file_lruwalk_class, name, \ TP_ARGS(removed, remaining)) DEFINE_NFSD_FILE_LRUWALK_EVENT(nfsd_file_gc_removed); -DEFINE_NFSD_FILE_LRUWALK_EVENT(nfsd_file_gc_recent); DEFINE_NFSD_FILE_LRUWALK_EVENT(nfsd_file_shrinker_removed); TRACE_EVENT(nfsd_file_close, @@ -2103,25 +2099,6 @@ TRACE_EVENT(nfsd_ctl_maxblksize, ) ); -TRACE_EVENT(nfsd_ctl_maxconn, - TP_PROTO( - const struct net *net, - int maxconn - ), - TP_ARGS(net, maxconn), - TP_STRUCT__entry( - __field(unsigned int, netns_ino) - __field(int, maxconn) - ), - TP_fast_assign( - __entry->netns_ino = net->ns.inum; - __entry->maxconn = maxconn; - ), - TP_printk("maxconn=%d", - __entry->maxconn - ) -); - TRACE_EVENT(nfsd_ctl_time, TP_PROTO( const struct net *net, diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c index cd689df2ca5d..ee78b6fb1709 100644 --- a/fs/nfsd/vfs.c +++ b/fs/nfsd/vfs.c @@ -1086,10 +1086,13 @@ __be32 nfsd_iter_read(struct svc_rqst *rqstp, struct svc_fh *fhp, { unsigned long v, total; struct iov_iter iter; - loff_t ppos = offset; + struct kiocb kiocb; ssize_t host_err; size_t len; + init_sync_kiocb(&kiocb, file); + kiocb.ki_pos = offset; + v = 0; total = *count; while (total) { @@ -1104,7 +1107,7 @@ __be32 nfsd_iter_read(struct svc_rqst *rqstp, struct svc_fh *fhp, trace_nfsd_read_vector(rqstp, fhp, offset, *count); iov_iter_bvec(&iter, ITER_DEST, rqstp->rq_bvec, v, *count); - host_err = vfs_iter_read(file, &iter, &ppos, 0); + host_err = vfs_iocb_iter_read(file, &kiocb, &iter); return nfsd_finish_read(rqstp, fhp, file, offset, count, eof, host_err); } @@ -1170,15 +1173,14 @@ nfsd_vfs_write(struct svc_rqst *rqstp, struct svc_fh *fhp, struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id); struct file *file = nf->nf_file; struct super_block *sb = file_inode(file)->i_sb; + struct kiocb kiocb; struct svc_export *exp; struct iov_iter iter; errseq_t since; __be32 nfserr; int host_err; - loff_t pos = offset; unsigned long exp_op_flags = 0; unsigned int pflags = current->flags; - rwf_t flags = 0; bool restore_flags = false; unsigned int nvecs; @@ -1204,16 +1206,17 @@ nfsd_vfs_write(struct svc_rqst *rqstp, struct svc_fh *fhp, if (!EX_ISSYNC(exp)) stable = NFS_UNSTABLE; - + init_sync_kiocb(&kiocb, file); + kiocb.ki_pos = offset; if (stable && !fhp->fh_use_wgather) - flags |= RWF_SYNC; + kiocb.ki_flags |= IOCB_DSYNC; nvecs = xdr_buf_to_bvec(rqstp->rq_bvec, rqstp->rq_maxpages, payload); iov_iter_bvec(&iter, ITER_SOURCE, rqstp->rq_bvec, nvecs, *cnt); since = READ_ONCE(file->f_wb_err); if (verf) nfsd_copy_write_verifier(verf, nn); - host_err = vfs_iter_write(file, &iter, &pos, flags); + host_err = vfs_iocb_iter_write(file, &kiocb, &iter); if (host_err < 0) { commit_reset_write_verifier(nn, rqstp, host_err); goto out_nfserr; diff --git a/fs/nfsd/xdr4.h b/fs/nfsd/xdr4.h index aa2a356da784..a23bc56051ca 100644 --- a/fs/nfsd/xdr4.h +++ b/fs/nfsd/xdr4.h @@ -870,7 +870,6 @@ struct nfsd4_compoundargs { char * tag; u32 taglen; u32 minorversion; - u32 client_opcnt; u32 opcnt; bool splice_ok; struct nfsd4_op *ops; diff --git a/fs/nilfs2/inode.c b/fs/nilfs2/inode.c index 6613b8fcceb0..5cf7328d5360 100644 --- a/fs/nilfs2/inode.c +++ b/fs/nilfs2/inode.c @@ -472,11 +472,18 @@ static int __nilfs_read_inode(struct super_block *sb, inode->i_op = &nilfs_symlink_inode_operations; inode_nohighmem(inode); inode->i_mapping->a_ops = &nilfs_aops; - } else { + } else if (S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode) || + S_ISFIFO(inode->i_mode) || S_ISSOCK(inode->i_mode)) { inode->i_op = &nilfs_special_inode_operations; init_special_inode( inode, inode->i_mode, huge_decode_dev(le64_to_cpu(raw_inode->i_device_code))); + } else { + nilfs_error(sb, + "invalid file type bits in mode 0%o for inode %lu", + inode->i_mode, ino); + err = -EIO; + goto failed_unmap; } nilfs_ifile_unmap_inode(raw_inode); brelse(bh); diff --git a/fs/notify/dnotify/dnotify.c b/fs/notify/dnotify/dnotify.c index c4cdaf5fa7ed..9fb73bafd41d 100644 --- a/fs/notify/dnotify/dnotify.c +++ b/fs/notify/dnotify/dnotify.c @@ -308,6 +308,10 @@ int fcntl_dirnotify(int fd, struct file *filp, unsigned int arg) goto out_err; } + error = file_f_owner_allocate(filp); + if (error) + goto out_err; + /* new fsnotify mark, we expect most fcntl calls to add a new mark */ new_dn_mark = kmem_cache_alloc(dnotify_mark_cache, GFP_KERNEL); if (!new_dn_mark) { @@ -315,10 +319,6 @@ int fcntl_dirnotify(int fd, struct file *filp, unsigned int arg) goto out_err; } - error = file_f_owner_allocate(filp); - if (error) - goto out_err; - /* set up the new_fsn_mark and new_dn_mark */ new_fsn_mark = &new_dn_mark->fsn_mark; fsnotify_init_mark(new_fsn_mark, dnotify_group); diff --git a/fs/ntfs3/dir.c b/fs/ntfs3/dir.c index b6da80c69ca6..1b5c865a0339 100644 --- a/fs/ntfs3/dir.c +++ b/fs/ntfs3/dir.c @@ -304,6 +304,9 @@ static inline bool ntfs_dir_emit(struct ntfs_sb_info *sbi, if (sbi->options->nohidden && (fname->dup.fa & FILE_ATTRIBUTE_HIDDEN)) return true; + if (fname->name_len + sizeof(struct NTFS_DE) > le16_to_cpu(e->size)) + return true; + name_len = ntfs_utf16_to_nls(sbi, fname->name, fname->name_len, name, PATH_MAX); if (name_len <= 0) { @@ -329,8 +332,7 @@ static inline bool ntfs_dir_emit(struct ntfs_sb_info *sbi, * It does additional locks/reads just to get the type of name. * Should we use additional mount option to enable branch below? */ - if (((fname->dup.fa & FILE_ATTRIBUTE_REPARSE_POINT) || - fname->dup.ea_size) && + if (fname->dup.extend_data && ino != ni->mi.rno) { struct inode *inode = ntfs_iget5(sbi->sb, &e->ref, NULL); if (!IS_ERR_OR_NULL(inode)) { diff --git a/fs/ntfs3/file.c b/fs/ntfs3/file.c index 1e99a35691cd..2e321b84a1ed 100644 --- a/fs/ntfs3/file.c +++ b/fs/ntfs3/file.c @@ -57,6 +57,10 @@ long ntfs_ioctl(struct file *filp, u32 cmd, unsigned long arg) struct inode *inode = file_inode(filp); struct ntfs_sb_info *sbi = inode->i_sb->s_fs_info; + /* Avoid any operation if inode is bad. */ + if (unlikely(is_bad_ni(ntfs_i(inode)))) + return -EINVAL; + switch (cmd) { case FITRIM: return ntfs_ioctl_fitrim(sbi, arg); @@ -81,6 +85,10 @@ int ntfs_getattr(struct mnt_idmap *idmap, const struct path *path, struct inode *inode = d_inode(path->dentry); struct ntfs_inode *ni = ntfs_i(inode); + /* Avoid any operation if inode is bad. */ + if (unlikely(is_bad_ni(ni))) + return -EINVAL; + stat->result_mask |= STATX_BTIME; stat->btime = ni->i_crtime; stat->blksize = ni->mi.sbi->cluster_size; /* 512, 1K, ..., 2M */ @@ -271,6 +279,10 @@ static int ntfs_file_mmap(struct file *file, struct vm_area_struct *vma) bool rw = vma->vm_flags & VM_WRITE; int err; + /* Avoid any operation if inode is bad. */ + if (unlikely(is_bad_ni(ni))) + return -EINVAL; + if (unlikely(ntfs3_forced_shutdown(inode->i_sb))) return -EIO; @@ -310,7 +322,10 @@ static int ntfs_file_mmap(struct file *file, struct vm_area_struct *vma) } if (ni->i_valid < to) { - inode_lock(inode); + if (!inode_trylock(inode)) { + err = -EAGAIN; + goto out; + } err = ntfs_extend_initialized_size(file, ni, ni->i_valid, to); inode_unlock(inode); @@ -735,6 +750,10 @@ int ntfs_setattr(struct mnt_idmap *idmap, struct dentry *dentry, umode_t mode = inode->i_mode; int err; + /* Avoid any operation if inode is bad. */ + if (unlikely(is_bad_ni(ni))) + return -EINVAL; + if (unlikely(ntfs3_forced_shutdown(inode->i_sb))) return -EIO; @@ -795,6 +814,10 @@ static int check_read_restriction(struct inode *inode) { struct ntfs_inode *ni = ntfs_i(inode); + /* Avoid any operation if inode is bad. */ + if (unlikely(is_bad_ni(ni))) + return -EINVAL; + if (unlikely(ntfs3_forced_shutdown(inode->i_sb))) return -EIO; @@ -1130,6 +1153,10 @@ static int check_write_restriction(struct inode *inode) { struct ntfs_inode *ni = ntfs_i(inode); + /* Avoid any operation if inode is bad. */ + if (unlikely(is_bad_ni(ni))) + return -EINVAL; + if (unlikely(ntfs3_forced_shutdown(inode->i_sb))) return -EIO; @@ -1212,6 +1239,10 @@ int ntfs_file_open(struct inode *inode, struct file *file) { struct ntfs_inode *ni = ntfs_i(inode); + /* Avoid any operation if inode is bad. */ + if (unlikely(is_bad_ni(ni))) + return -EINVAL; + if (unlikely(ntfs3_forced_shutdown(inode->i_sb))) return -EIO; @@ -1281,6 +1312,10 @@ int ntfs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, int err; struct ntfs_inode *ni = ntfs_i(inode); + /* Avoid any operation if inode is bad. */ + if (unlikely(is_bad_ni(ni))) + return -EINVAL; + err = fiemap_prep(inode, fieinfo, start, &len, ~FIEMAP_FLAG_XATTR); if (err) return err; diff --git a/fs/ntfs3/frecord.c b/fs/ntfs3/frecord.c index 756e1306fe6c..8f9fe1d7a690 100644 --- a/fs/ntfs3/frecord.c +++ b/fs/ntfs3/frecord.c @@ -3003,8 +3003,7 @@ int ni_add_name(struct ntfs_inode *dir_ni, struct ntfs_inode *ni, * ni_rename - Remove one name and insert new name. */ int ni_rename(struct ntfs_inode *dir_ni, struct ntfs_inode *new_dir_ni, - struct ntfs_inode *ni, struct NTFS_DE *de, struct NTFS_DE *new_de, - bool *is_bad) + struct ntfs_inode *ni, struct NTFS_DE *de, struct NTFS_DE *new_de) { int err; struct NTFS_DE *de2 = NULL; @@ -3027,8 +3026,8 @@ int ni_rename(struct ntfs_inode *dir_ni, struct ntfs_inode *new_dir_ni, err = ni_add_name(new_dir_ni, ni, new_de); if (!err) { err = ni_remove_name(dir_ni, ni, de, &de2, &undo); - if (err && ni_remove_name(new_dir_ni, ni, new_de, &de2, &undo)) - *is_bad = true; + WARN_ON(err && ni_remove_name(new_dir_ni, ni, new_de, &de2, + &undo)); } /* @@ -3119,11 +3118,21 @@ static bool ni_update_parent(struct ntfs_inode *ni, struct NTFS_DUP_INFO *dup, } } - /* TODO: Fill reparse info. */ - dup->reparse = 0; - dup->ea_size = 0; + dup->extend_data = 0; - if (ni->ni_flags & NI_FLAG_EA) { + if (dup->fa & FILE_ATTRIBUTE_REPARSE_POINT) { + attr = ni_find_attr(ni, NULL, NULL, ATTR_REPARSE, NULL, 0, NULL, + NULL); + + if (attr) { + const struct REPARSE_POINT *rp; + + rp = resident_data_ex(attr, sizeof(struct REPARSE_POINT)); + /* If ATTR_REPARSE exists 'rp' can't be NULL. */ + if (rp) + dup->extend_data = rp->ReparseTag; + } + } else if (ni->ni_flags & NI_FLAG_EA) { attr = ni_find_attr(ni, attr, &le, ATTR_EA_INFO, NULL, 0, NULL, NULL); if (attr) { @@ -3132,7 +3141,7 @@ static bool ni_update_parent(struct ntfs_inode *ni, struct NTFS_DUP_INFO *dup, info = resident_data_ex(attr, sizeof(struct EA_INFO)); /* If ATTR_EA_INFO exists 'info' can't be NULL. */ if (info) - dup->ea_size = info->size_pack; + dup->extend_data = info->size; } } @@ -3199,6 +3208,10 @@ int ni_write_inode(struct inode *inode, int sync, const char *hint) if (is_bad_inode(inode) || sb_rdonly(sb)) return 0; + /* Avoid any operation if inode is bad. */ + if (unlikely(is_bad_ni(ni))) + return -EINVAL; + if (unlikely(ntfs3_forced_shutdown(sb))) return -EIO; diff --git a/fs/ntfs3/fsntfs.c b/fs/ntfs3/fsntfs.c index df81f1f7330c..c7a2f191254d 100644 --- a/fs/ntfs3/fsntfs.c +++ b/fs/ntfs3/fsntfs.c @@ -905,9 +905,13 @@ void ntfs_update_mftmirr(struct ntfs_sb_info *sbi, int wait) void ntfs_bad_inode(struct inode *inode, const char *hint) { struct ntfs_sb_info *sbi = inode->i_sb->s_fs_info; + struct ntfs_inode *ni = ntfs_i(inode); ntfs_inode_err(inode, "%s", hint); - make_bad_inode(inode); + + /* Do not call make_bad_inode()! */ + ni->ni_bad = true; + /* Avoid recursion if bad inode is $Volume. */ if (inode->i_ino != MFT_REC_VOL && !(sbi->flags & NTFS_FLAGS_LOG_REPLAYING)) { diff --git a/fs/ntfs3/inode.c b/fs/ntfs3/inode.c index 0f0d27d4644a..4f9020df8912 100644 --- a/fs/ntfs3/inode.c +++ b/fs/ntfs3/inode.c @@ -878,6 +878,10 @@ static int ntfs_resident_writepage(struct folio *folio, struct ntfs_inode *ni = ntfs_i(inode); int ret; + /* Avoid any operation if inode is bad. */ + if (unlikely(is_bad_ni(ni))) + return -EINVAL; + if (unlikely(ntfs3_forced_shutdown(inode->i_sb))) return -EIO; @@ -896,6 +900,10 @@ static int ntfs_writepages(struct address_space *mapping, { struct inode *inode = mapping->host; + /* Avoid any operation if inode is bad. */ + if (unlikely(is_bad_ni(ntfs_i(inode)))) + return -EINVAL; + if (unlikely(ntfs3_forced_shutdown(inode->i_sb))) return -EIO; @@ -919,6 +927,10 @@ int ntfs_write_begin(struct file *file, struct address_space *mapping, struct inode *inode = mapping->host; struct ntfs_inode *ni = ntfs_i(inode); + /* Avoid any operation if inode is bad. */ + if (unlikely(is_bad_ni(ni))) + return -EINVAL; + if (unlikely(ntfs3_forced_shutdown(inode->i_sb))) return -EIO; @@ -1062,10 +1074,10 @@ int inode_read_data(struct inode *inode, void *data, size_t bytes) * Number of bytes for REPARSE_DATA_BUFFER(IO_REPARSE_TAG_SYMLINK) * for unicode string of @uni_len length. */ -static inline u32 ntfs_reparse_bytes(u32 uni_len) +static inline u32 ntfs_reparse_bytes(u32 uni_len, bool is_absolute) { /* Header + unicode string + decorated unicode string. */ - return sizeof(short) * (2 * uni_len + 4) + + return sizeof(short) * (2 * uni_len + (is_absolute ? 4 : 0)) + offsetof(struct REPARSE_DATA_BUFFER, SymbolicLinkReparseBuffer.PathBuffer); } @@ -1078,8 +1090,11 @@ ntfs_create_reparse_buffer(struct ntfs_sb_info *sbi, const char *symname, struct REPARSE_DATA_BUFFER *rp; __le16 *rp_name; typeof(rp->SymbolicLinkReparseBuffer) *rs; + bool is_absolute; - rp = kzalloc(ntfs_reparse_bytes(2 * size + 2), GFP_NOFS); + is_absolute = (strlen(symname) > 1 && symname[1] == ':'); + + rp = kzalloc(ntfs_reparse_bytes(2 * size + 2, is_absolute), GFP_NOFS); if (!rp) return ERR_PTR(-ENOMEM); @@ -1094,7 +1109,7 @@ ntfs_create_reparse_buffer(struct ntfs_sb_info *sbi, const char *symname, goto out; /* err = the length of unicode name of symlink. */ - *nsize = ntfs_reparse_bytes(err); + *nsize = ntfs_reparse_bytes(err, is_absolute); if (*nsize > sbi->reparse.max_size) { err = -EFBIG; @@ -1114,7 +1129,7 @@ ntfs_create_reparse_buffer(struct ntfs_sb_info *sbi, const char *symname, /* PrintName + SubstituteName. */ rs->SubstituteNameOffset = cpu_to_le16(sizeof(short) * err); - rs->SubstituteNameLength = cpu_to_le16(sizeof(short) * err + 8); + rs->SubstituteNameLength = cpu_to_le16(sizeof(short) * err + (is_absolute ? 8 : 0)); rs->PrintNameLength = rs->SubstituteNameOffset; /* @@ -1122,16 +1137,18 @@ ntfs_create_reparse_buffer(struct ntfs_sb_info *sbi, const char *symname, * parse this path. * 0-absolute path 1- relative path (SYMLINK_FLAG_RELATIVE). */ - rs->Flags = 0; + rs->Flags = cpu_to_le32(is_absolute ? 0 : SYMLINK_FLAG_RELATIVE); - memmove(rp_name + err + 4, rp_name, sizeof(short) * err); + memmove(rp_name + err + (is_absolute ? 4 : 0), rp_name, sizeof(short) * err); - /* Decorate SubstituteName. */ - rp_name += err; - rp_name[0] = cpu_to_le16('\\'); - rp_name[1] = cpu_to_le16('?'); - rp_name[2] = cpu_to_le16('?'); - rp_name[3] = cpu_to_le16('\\'); + if (is_absolute) { + /* Decorate SubstituteName. */ + rp_name += err; + rp_name[0] = cpu_to_le16('\\'); + rp_name[1] = cpu_to_le16('?'); + rp_name[2] = cpu_to_le16('?'); + rp_name[3] = cpu_to_le16('\\'); + } return rp; out: @@ -1260,6 +1277,12 @@ int ntfs_create_inode(struct mnt_idmap *idmap, struct inode *dir, goto out1; } + /* Avoid any operation if inode is bad. */ + if (unlikely(is_bad_ni(dir_ni))) { + err = -EINVAL; + goto out2; + } + if (unlikely(ntfs3_forced_shutdown(sb))) { err = -EIO; goto out2; @@ -1350,7 +1373,7 @@ int ntfs_create_inode(struct mnt_idmap *idmap, struct inode *dir, fname->dup.a_time = std5->cr_time; fname->dup.alloc_size = fname->dup.data_size = 0; fname->dup.fa = std5->fa; - fname->dup.ea_size = fname->dup.reparse = 0; + fname->dup.extend_data = S_ISLNK(mode) ? IO_REPARSE_TAG_SYMLINK : 0; dsize = le16_to_cpu(new_de->key_size); asize = ALIGN(SIZEOF_RESIDENT + dsize, 8); @@ -1590,27 +1613,29 @@ int ntfs_create_inode(struct mnt_idmap *idmap, struct inode *dir, inode->i_flags |= S_NOSEC; } - /* - * ntfs_init_acl and ntfs_save_wsl_perm update extended attribute. - * The packed size of extended attribute is stored in direntry too. - * 'fname' here points to inside new_de. - */ - err = ntfs_save_wsl_perm(inode, &fname->dup.ea_size); - if (err) - goto out6; + if (!S_ISLNK(mode)) { + /* + * ntfs_init_acl and ntfs_save_wsl_perm update extended attribute. + * The packed size of extended attribute is stored in direntry too. + * 'fname' here points to inside new_de. + */ + err = ntfs_save_wsl_perm(inode, &fname->dup.extend_data); + if (err) + goto out6; - /* - * update ea_size in file_name attribute too. - * Use ni_find_attr cause layout of MFT record may be changed - * in ntfs_init_acl and ntfs_save_wsl_perm. - */ - attr = ni_find_attr(ni, NULL, NULL, ATTR_NAME, NULL, 0, NULL, NULL); - if (attr) { - struct ATTR_FILE_NAME *fn; + /* + * update ea_size in file_name attribute too. + * Use ni_find_attr cause layout of MFT record may be changed + * in ntfs_init_acl and ntfs_save_wsl_perm. + */ + attr = ni_find_attr(ni, NULL, NULL, ATTR_NAME, NULL, 0, NULL, NULL); + if (attr) { + struct ATTR_FILE_NAME *fn; - fn = resident_data_ex(attr, SIZEOF_ATTRIBUTE_FILENAME); - if (fn) - fn->dup.ea_size = fname->dup.ea_size; + fn = resident_data_ex(attr, SIZEOF_ATTRIBUTE_FILENAME); + if (fn) + fn->dup.extend_data = fname->dup.extend_data; + } } /* We do not need to update parent directory later */ diff --git a/fs/ntfs3/namei.c b/fs/ntfs3/namei.c index b807744fc6a9..82c8ae56beee 100644 --- a/fs/ntfs3/namei.c +++ b/fs/ntfs3/namei.c @@ -171,6 +171,10 @@ static int ntfs_unlink(struct inode *dir, struct dentry *dentry) struct ntfs_inode *ni = ntfs_i(dir); int err; + /* Avoid any operation if inode is bad. */ + if (unlikely(is_bad_ni(ni))) + return -EINVAL; + if (unlikely(ntfs3_forced_shutdown(dir->i_sb))) return -EIO; @@ -191,6 +195,10 @@ static int ntfs_symlink(struct mnt_idmap *idmap, struct inode *dir, { u32 size = strlen(symname); + /* Avoid any operation if inode is bad. */ + if (unlikely(is_bad_ni(ntfs_i(dir)))) + return -EINVAL; + if (unlikely(ntfs3_forced_shutdown(dir->i_sb))) return -EIO; @@ -216,6 +224,10 @@ static int ntfs_rmdir(struct inode *dir, struct dentry *dentry) struct ntfs_inode *ni = ntfs_i(dir); int err; + /* Avoid any operation if inode is bad. */ + if (unlikely(is_bad_ni(ni))) + return -EINVAL; + if (unlikely(ntfs3_forced_shutdown(dir->i_sb))) return -EIO; @@ -244,7 +256,7 @@ static int ntfs_rename(struct mnt_idmap *idmap, struct inode *dir, struct ntfs_inode *ni = ntfs_i(inode); struct inode *new_inode = d_inode(new_dentry); struct NTFS_DE *de, *new_de; - bool is_same, is_bad; + bool is_same; /* * de - memory of PATH_MAX bytes: * [0-1024) - original name (dentry->d_name) @@ -256,6 +268,10 @@ static int ntfs_rename(struct mnt_idmap *idmap, struct inode *dir, 1024); static_assert(PATH_MAX >= 4 * 1024); + /* Avoid any operation if inode is bad. */ + if (unlikely(is_bad_ni(ni))) + return -EINVAL; + if (unlikely(ntfs3_forced_shutdown(sb))) return -EIO; @@ -313,12 +329,8 @@ static int ntfs_rename(struct mnt_idmap *idmap, struct inode *dir, if (dir_ni != new_dir_ni) ni_lock_dir2(new_dir_ni); - is_bad = false; - err = ni_rename(dir_ni, new_dir_ni, ni, de, new_de, &is_bad); - if (is_bad) { - /* Restore after failed rename failed too. */ - _ntfs_bad_inode(inode); - } else if (!err) { + err = ni_rename(dir_ni, new_dir_ni, ni, de, new_de); + if (!err) { simple_rename_timestamp(dir, dentry, new_dir, new_dentry); mark_inode_dirty(inode); mark_inode_dirty(dir); diff --git a/fs/ntfs3/ntfs.h b/fs/ntfs3/ntfs.h index 1ff13b6f9613..552b97905813 100644 --- a/fs/ntfs3/ntfs.h +++ b/fs/ntfs3/ntfs.h @@ -561,8 +561,7 @@ struct NTFS_DUP_INFO { __le64 alloc_size; // 0x20: Data attribute allocated size, multiple of cluster size. __le64 data_size; // 0x28: Data attribute size <= Dataalloc_size. enum FILE_ATTRIBUTE fa; // 0x30: Standard DOS attributes & more. - __le16 ea_size; // 0x34: Packed EAs. - __le16 reparse; // 0x36: Used by Reparse. + __le32 extend_data; // 0x34: Extended data. }; // 0x38 diff --git a/fs/ntfs3/ntfs_fs.h b/fs/ntfs3/ntfs_fs.h index 36b8052660d5..9a25fec25f01 100644 --- a/fs/ntfs3/ntfs_fs.h +++ b/fs/ntfs3/ntfs_fs.h @@ -377,6 +377,13 @@ struct ntfs_inode { */ u8 mi_loaded; + /* + * Use this field to avoid any write(s). + * If inode is bad during initialization - use make_bad_inode + * If inode is bad during operations - use this field + */ + u8 ni_bad; + union { struct ntfs_index dir; struct { @@ -577,8 +584,7 @@ int ni_add_name(struct ntfs_inode *dir_ni, struct ntfs_inode *ni, struct NTFS_DE *de); int ni_rename(struct ntfs_inode *dir_ni, struct ntfs_inode *new_dir_ni, - struct ntfs_inode *ni, struct NTFS_DE *de, struct NTFS_DE *new_de, - bool *is_bad); + struct ntfs_inode *ni, struct NTFS_DE *de, struct NTFS_DE *new_de); bool ni_is_dirty(struct inode *inode); @@ -874,7 +880,7 @@ int ntfs_acl_chmod(struct mnt_idmap *idmap, struct dentry *dentry); ssize_t ntfs_listxattr(struct dentry *dentry, char *buffer, size_t size); extern const struct xattr_handler *const ntfs_xattr_handlers[]; -int ntfs_save_wsl_perm(struct inode *inode, __le16 *ea_size); +int ntfs_save_wsl_perm(struct inode *inode, __le32 *ea_size); void ntfs_get_wsl_perm(struct inode *inode); /* globals from lznt.c */ @@ -1025,6 +1031,11 @@ static inline bool is_compressed(const struct ntfs_inode *ni) (ni->ni_flags & NI_FLAG_COMPRESSED_MASK); } +static inline bool is_bad_ni(const struct ntfs_inode *ni) +{ + return ni->ni_bad; +} + static inline int ni_ext_compress_bits(const struct ntfs_inode *ni) { return 0xb + (ni->ni_flags & NI_FLAG_COMPRESSED_MASK); diff --git a/fs/ntfs3/super.c b/fs/ntfs3/super.c index 920a1ab47b63..ddff94c091b8 100644 --- a/fs/ntfs3/super.c +++ b/fs/ntfs3/super.c @@ -1223,7 +1223,8 @@ static int ntfs_fill_super(struct super_block *sb, struct fs_context *fc) sb->s_export_op = &ntfs_export_ops; sb->s_time_gran = NTFS_TIME_GRAN; // 100 nsec sb->s_xattr = ntfs_xattr_handlers; - sb->s_d_op = options->nocase ? &ntfs_dentry_ops : NULL; + if (options->nocase) + set_default_d_op(sb, &ntfs_dentry_ops); options->nls = ntfs_load_nls(options->nls_name); if (IS_ERR(options->nls)) { diff --git a/fs/ntfs3/xattr.c b/fs/ntfs3/xattr.c index e0055dcf8fe3..e519e21596a7 100644 --- a/fs/ntfs3/xattr.c +++ b/fs/ntfs3/xattr.c @@ -313,7 +313,7 @@ out: static noinline int ntfs_set_ea(struct inode *inode, const char *name, size_t name_len, const void *value, size_t val_size, int flags, bool locked, - __le16 *ea_size) + __le32 *ea_size) { struct ntfs_inode *ni = ntfs_i(inode); struct ntfs_sb_info *sbi = ni->mi.sbi; @@ -522,7 +522,7 @@ update_ea: if (ea_info.size_pack != size_pack) ni->ni_flags |= NI_FLAG_UPDATE_PARENT; if (ea_size) - *ea_size = ea_info.size_pack; + *ea_size = ea_info.size; mark_inode_dirty(&ni->vfs_inode); out: @@ -552,6 +552,10 @@ struct posix_acl *ntfs_get_acl(struct mnt_idmap *idmap, struct dentry *dentry, int err; void *buf; + /* Avoid any operation if inode is bad. */ + if (unlikely(is_bad_ni(ni))) + return ERR_PTR(-EINVAL); + /* Allocate PATH_MAX bytes. */ buf = __getname(); if (!buf) @@ -600,6 +604,10 @@ static noinline int ntfs_set_acl_ex(struct mnt_idmap *idmap, int flags; umode_t mode; + /* Avoid any operation if inode is bad. */ + if (unlikely(is_bad_ni(ntfs_i(inode)))) + return -EINVAL; + if (S_ISLNK(inode->i_mode)) return -EOPNOTSUPP; @@ -730,6 +738,10 @@ ssize_t ntfs_listxattr(struct dentry *dentry, char *buffer, size_t size) struct ntfs_inode *ni = ntfs_i(inode); ssize_t ret; + /* Avoid any operation if inode is bad. */ + if (unlikely(is_bad_ni(ni))) + return -EINVAL; + if (!(ni->ni_flags & NI_FLAG_EA)) { /* no xattr in file */ return 0; @@ -751,6 +763,10 @@ static int ntfs_getxattr(const struct xattr_handler *handler, struct dentry *de, int err; struct ntfs_inode *ni = ntfs_i(inode); + /* Avoid any operation if inode is bad. */ + if (unlikely(is_bad_ni(ni))) + return -EINVAL; + if (unlikely(ntfs3_forced_shutdown(inode->i_sb))) return -EIO; @@ -950,7 +966,7 @@ out: * * save uid/gid/mode in xattr */ -int ntfs_save_wsl_perm(struct inode *inode, __le16 *ea_size) +int ntfs_save_wsl_perm(struct inode *inode, __le32 *ea_size) { int err; __le32 value; diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c index 3d2533950bae..53daa4482406 100644 --- a/fs/ocfs2/super.c +++ b/fs/ocfs2/super.c @@ -1962,7 +1962,7 @@ static int ocfs2_initialize_super(struct super_block *sb, sb->s_fs_info = osb; sb->s_op = &ocfs2_sops; - sb->s_d_op = &ocfs2_dentry_ops; + set_default_d_op(sb, &ocfs2_dentry_ops); sb->s_export_op = &ocfs2_export_ops; sb->s_qcop = &dquot_quotactl_sysfile_ops; sb->dq_op = &ocfs2_quota_operations; diff --git a/fs/orangefs/super.c b/fs/orangefs/super.c index 64ca9498f550..f3da840758e7 100644 --- a/fs/orangefs/super.c +++ b/fs/orangefs/super.c @@ -416,7 +416,7 @@ static int orangefs_fill_sb(struct super_block *sb, sb->s_xattr = orangefs_xattr_handlers; sb->s_magic = ORANGEFS_SUPER_MAGIC; sb->s_op = &orangefs_s_ops; - sb->s_d_op = &orangefs_dentry_operations; + set_default_d_op(sb, &orangefs_dentry_operations); sb->s_blocksize = PAGE_SIZE; sb->s_blocksize_bits = PAGE_SHIFT; diff --git a/fs/overlayfs/super.c b/fs/overlayfs/super.c index e19940d649ca..efbf0b291551 100644 --- a/fs/overlayfs/super.c +++ b/fs/overlayfs/super.c @@ -1322,7 +1322,7 @@ int ovl_fill_super(struct super_block *sb, struct fs_context *fc) if (WARN_ON(fc->user_ns != current_user_ns())) goto out_err; - sb->s_d_op = &ovl_dentry_operations; + set_default_d_op(sb, &ovl_dentry_operations); err = -ENOMEM; if (!ofs->creator_cred) diff --git a/fs/pidfs.c b/fs/pidfs.c index 69919be1c9d8..4625e097e3a0 100644 --- a/fs/pidfs.c +++ b/fs/pidfs.c @@ -319,7 +319,7 @@ static long pidfd_info(struct file *file, unsigned int cmd, unsigned long arg) if (!c) return -ESRCH; - if (!(kinfo.mask & PIDFD_INFO_COREDUMP)) { + if ((kinfo.mask & PIDFD_INFO_COREDUMP) && !(kinfo.coredump_mask)) { task_lock(task); if (task->mm) kinfo.coredump_mask = pidfs_coredump_mask(task->mm->flags); diff --git a/fs/proc/base.c b/fs/proc/base.c index c667702dc69b..e93149a01341 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c @@ -2704,8 +2704,7 @@ static struct dentry *proc_pident_instantiate(struct dentry *dentry, inode->i_fop = p->fop; ei->op = p->op; pid_update_inode(task, inode); - d_set_d_op(dentry, &pid_dentry_operations); - return d_splice_alias(inode, dentry); + return d_splice_alias_ops(inode, dentry, &pid_dentry_operations); } static struct dentry *proc_pident_lookup(struct inode *dir, @@ -3501,8 +3500,7 @@ static struct dentry *proc_pid_instantiate(struct dentry * dentry, set_nlink(inode, nlink_tgid); pid_update_inode(task, inode); - d_set_d_op(dentry, &pid_dentry_operations); - return d_splice_alias(inode, dentry); + return d_splice_alias_ops(inode, dentry, &pid_dentry_operations); } struct dentry *proc_pid_lookup(struct dentry *dentry, unsigned int flags) @@ -3804,8 +3802,7 @@ static struct dentry *proc_task_instantiate(struct dentry *dentry, set_nlink(inode, nlink_tid); pid_update_inode(task, inode); - d_set_d_op(dentry, &pid_dentry_operations); - return d_splice_alias(inode, dentry); + return d_splice_alias_ops(inode, dentry, &pid_dentry_operations); } static struct dentry *proc_task_lookup(struct inode *dir, struct dentry * dentry, unsigned int flags) diff --git a/fs/proc/generic.c b/fs/proc/generic.c index a3e22803cddf..5635453cd476 100644 --- a/fs/proc/generic.c +++ b/fs/proc/generic.c @@ -254,8 +254,11 @@ struct dentry *proc_lookup_de(struct inode *dir, struct dentry *dentry, inode = proc_get_inode(dir->i_sb, de); if (!inode) return ERR_PTR(-ENOMEM); - d_set_d_op(dentry, de->proc_dops); - return d_splice_alias(inode, dentry); + if (de->flags & PROC_ENTRY_FORCE_LOOKUP) + return d_splice_alias_ops(inode, dentry, + &proc_net_dentry_ops); + return d_splice_alias_ops(inode, dentry, + &proc_misc_dentry_ops); } read_unlock(&proc_subdir_lock); return ERR_PTR(-ENOENT); @@ -448,9 +451,8 @@ static struct proc_dir_entry *__proc_create(struct proc_dir_entry **parent, INIT_LIST_HEAD(&ent->pde_openers); proc_set_user(ent, (*parent)->uid, (*parent)->gid); - ent->proc_dops = &proc_misc_dentry_ops; /* Revalidate everything under /proc/${pid}/net */ - if ((*parent)->proc_dops == &proc_net_dentry_ops) + if ((*parent)->flags & PROC_ENTRY_FORCE_LOOKUP) pde_force_lookup(ent); out: diff --git a/fs/proc/internal.h b/fs/proc/internal.h index 96122e91c645..520c4742101d 100644 --- a/fs/proc/internal.h +++ b/fs/proc/internal.h @@ -44,7 +44,6 @@ struct proc_dir_entry { const struct proc_ops *proc_ops; const struct file_operations *proc_dir_ops; }; - const struct dentry_operations *proc_dops; union { const struct seq_operations *seq_ops; int (*single_show)(struct seq_file *, void *); @@ -403,7 +402,7 @@ extern const struct dentry_operations proc_net_dentry_ops; static inline void pde_force_lookup(struct proc_dir_entry *pde) { /* /proc/net/ entries can be changed under us by setns(CLONE_NEWNET) */ - pde->proc_dops = &proc_net_dentry_ops; + pde->flags |= PROC_ENTRY_FORCE_LOOKUP; } /* @@ -414,7 +413,6 @@ static inline void pde_force_lookup(struct proc_dir_entry *pde) static inline struct dentry *proc_splice_unmountable(struct inode *inode, struct dentry *dentry, const struct dentry_operations *d_ops) { - d_set_d_op(dentry, d_ops); dont_mount(dentry); - return d_splice_alias(inode, dentry); + return d_splice_alias_ops(inode, dentry, d_ops); } diff --git a/fs/proc/namespaces.c b/fs/proc/namespaces.c index c610224faf10..4403a2e20c16 100644 --- a/fs/proc/namespaces.c +++ b/fs/proc/namespaces.c @@ -111,8 +111,7 @@ static struct dentry *proc_ns_instantiate(struct dentry *dentry, ei->ns_ops = ns_ops; pid_update_inode(task, inode); - d_set_d_op(dentry, &pid_dentry_operations); - return d_splice_alias(inode, dentry); + return d_splice_alias_ops(inode, dentry, &pid_dentry_operations); } static int proc_ns_dir_readdir(struct file *file, struct dir_context *ctx) diff --git a/fs/proc/proc_sysctl.c b/fs/proc/proc_sysctl.c index 08b78150cdde..49ab74e0bfde 100644 --- a/fs/proc/proc_sysctl.c +++ b/fs/proc/proc_sysctl.c @@ -540,9 +540,8 @@ static struct dentry *proc_sys_lookup(struct inode *dir, struct dentry *dentry, goto out; } - d_set_d_op(dentry, &proc_sys_dentry_operations); inode = proc_sys_make_inode(dir->i_sb, h ? h : head, p); - err = d_splice_alias(inode, dentry); + err = d_splice_alias_ops(inode, dentry, &proc_sys_dentry_operations); out: if (h) @@ -699,9 +698,9 @@ static bool proc_sys_fill_cache(struct file *file, return false; if (d_in_lookup(child)) { struct dentry *res; - d_set_d_op(child, &proc_sys_dentry_operations); inode = proc_sys_make_inode(dir->d_sb, head, table); - res = d_splice_alias(inode, child); + res = d_splice_alias_ops(inode, child, + &proc_sys_dentry_operations); d_lookup_done(child); if (unlikely(res)) { dput(child); diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c index 4be91eb6ea5c..751479eb128f 100644 --- a/fs/proc/task_mmu.c +++ b/fs/proc/task_mmu.c @@ -36,9 +36,9 @@ void task_mem(struct seq_file *m, struct mm_struct *mm) unsigned long text, lib, swap, anon, file, shmem; unsigned long hiwater_vm, total_vm, hiwater_rss, total_rss; - anon = get_mm_counter(mm, MM_ANONPAGES); - file = get_mm_counter(mm, MM_FILEPAGES); - shmem = get_mm_counter(mm, MM_SHMEMPAGES); + anon = get_mm_counter_sum(mm, MM_ANONPAGES); + file = get_mm_counter_sum(mm, MM_FILEPAGES); + shmem = get_mm_counter_sum(mm, MM_SHMEMPAGES); /* * Note: to minimize their overhead, mm maintains hiwater_vm and @@ -59,7 +59,7 @@ void task_mem(struct seq_file *m, struct mm_struct *mm) text = min(text, mm->exec_vm << PAGE_SHIFT); lib = (mm->exec_vm << PAGE_SHIFT) - text; - swap = get_mm_counter(mm, MM_SWAPENTS); + swap = get_mm_counter_sum(mm, MM_SWAPENTS); SEQ_PUT_DEC("VmPeak:\t", hiwater_vm); SEQ_PUT_DEC(" kB\nVmSize:\t", total_vm); SEQ_PUT_DEC(" kB\nVmLck:\t", mm->locked_vm); @@ -92,12 +92,12 @@ unsigned long task_statm(struct mm_struct *mm, unsigned long *shared, unsigned long *text, unsigned long *data, unsigned long *resident) { - *shared = get_mm_counter(mm, MM_FILEPAGES) + - get_mm_counter(mm, MM_SHMEMPAGES); + *shared = get_mm_counter_sum(mm, MM_FILEPAGES) + + get_mm_counter_sum(mm, MM_SHMEMPAGES); *text = (PAGE_ALIGN(mm->end_code) - (mm->start_code & PAGE_MASK)) >> PAGE_SHIFT; *data = mm->data_vm + mm->stack_vm; - *resident = *shared + get_mm_counter(mm, MM_ANONPAGES); + *resident = *shared + get_mm_counter_sum(mm, MM_ANONPAGES); return mm->total_vm; } diff --git a/fs/pstore/inode.c b/fs/pstore/inode.c index bb3b769edc71..1a2e1185426c 100644 --- a/fs/pstore/inode.c +++ b/fs/pstore/inode.c @@ -300,7 +300,7 @@ static struct dentry *psinfo_lock_root(void) return NULL; root = pstore_sb->s_root; - inode_lock(d_inode(root)); + inode_lock_nested(d_inode(root), I_MUTEX_PARENT); return root; } @@ -318,8 +318,7 @@ int pstore_put_backend_records(struct pstore_info *psi) list_for_each_entry_safe(pos, tmp, &records_list, list) { if (pos->record->psi == psi) { list_del_init(&pos->list); - d_invalidate(pos->dentry); - simple_unlink(d_inode(root), pos->dentry); + locked_recursive_removal(pos->dentry, NULL); pos->dentry = NULL; } } diff --git a/fs/ramfs/inode.c b/fs/ramfs/inode.c index 775fa905fda0..f8874c3b8c1e 100644 --- a/fs/ramfs/inode.c +++ b/fs/ramfs/inode.c @@ -269,6 +269,7 @@ static int ramfs_fill_super(struct super_block *sb, struct fs_context *fc) sb->s_blocksize_bits = PAGE_SHIFT; sb->s_magic = RAMFS_MAGIC; sb->s_op = &ramfs_ops; + sb->s_d_flags = DCACHE_DONTCACHE; sb->s_time_gran = 1; inode = ramfs_get_inode(sb, NULL, S_IFDIR | fsi->mount_opts.mode, 0); diff --git a/fs/smb/client/cifsfs.c b/fs/smb/client/cifsfs.c index 0a5266ecfd15..d4ec73359922 100644 --- a/fs/smb/client/cifsfs.c +++ b/fs/smb/client/cifsfs.c @@ -260,9 +260,9 @@ cifs_read_super(struct super_block *sb) } if (tcon->nocase) - sb->s_d_op = &cifs_ci_dentry_ops; + set_default_d_op(sb, &cifs_ci_dentry_ops); else - sb->s_d_op = &cifs_dentry_ops; + set_default_d_op(sb, &cifs_dentry_ops); sb->s_root = d_make_root(inode); if (!sb->s_root) { diff --git a/fs/smb/client/dir.c b/fs/smb/client/dir.c index 1c6e5389c51f..5223edf6d11a 100644 --- a/fs/smb/client/dir.c +++ b/fs/smb/client/dir.c @@ -190,6 +190,7 @@ static int cifs_do_create(struct inode *inode, struct dentry *direntry, unsigned int disposition; struct TCP_Server_Info *server = tcon->ses->server; struct cifs_open_parms oparms; + struct cached_fid *parent_cfid = NULL; int rdwr_for_fscache = 0; __le32 lease_flags = 0; @@ -313,10 +314,10 @@ static int cifs_do_create(struct inode *inode, struct dentry *direntry, unsigned if (!tcon->unix_ext && (mode & S_IWUGO) == 0) create_options |= CREATE_OPTION_READONLY; + retry_open: if (tcon->cfids && direntry->d_parent && server->dialect >= SMB30_PROT_ID) { - struct cached_fid *parent_cfid; - + parent_cfid = NULL; spin_lock(&tcon->cfids->cfid_list_lock); list_for_each_entry(parent_cfid, &tcon->cfids->entries, entry) { if (parent_cfid->dentry == direntry->d_parent) { @@ -327,6 +328,7 @@ retry_open: memcpy(fid->parent_lease_key, parent_cfid->fid.lease_key, SMB2_LEASE_KEY_SIZE); + parent_cfid->dirents.is_valid = false; } break; } diff --git a/fs/smb/client/file.c b/fs/smb/client/file.c index e9212da32f01..1421bde045c2 100644 --- a/fs/smb/client/file.c +++ b/fs/smb/client/file.c @@ -3088,7 +3088,8 @@ void cifs_oplock_break(struct work_struct *work) struct cifsFileInfo *cfile = container_of(work, struct cifsFileInfo, oplock_break); struct inode *inode = d_inode(cfile->dentry); - struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb); + struct super_block *sb = inode->i_sb; + struct cifs_sb_info *cifs_sb = CIFS_SB(sb); struct cifsInodeInfo *cinode = CIFS_I(inode); struct cifs_tcon *tcon; struct TCP_Server_Info *server; @@ -3098,6 +3099,12 @@ void cifs_oplock_break(struct work_struct *work) __u64 persistent_fid, volatile_fid; __u16 net_fid; + /* + * Hold a reference to the superblock to prevent it and its inodes from + * being freed while we are accessing cinode. Otherwise, _cifsFileInfo_put() + * may release the last reference to the sb and trigger inode eviction. + */ + cifs_sb_active(sb); wait_on_bit(&cinode->flags, CIFS_INODE_PENDING_WRITERS, TASK_UNINTERRUPTIBLE); @@ -3170,6 +3177,7 @@ oplock_break_ack: cifs_put_tlink(tlink); out: cifs_done_oplock_break(cinode); + cifs_sb_deactive(sb); } static int cifs_swap_activate(struct swap_info_struct *sis, diff --git a/fs/smb/client/smb2inode.c b/fs/smb/client/smb2inode.c index 2a3e46b8e15a..a11a2a693c51 100644 --- a/fs/smb/client/smb2inode.c +++ b/fs/smb/client/smb2inode.c @@ -1346,7 +1346,8 @@ struct inode *smb2_get_reparse_inode(struct cifs_open_info_data *data, * empty object on the server. */ if (!(le32_to_cpu(tcon->fsAttrInfo.Attributes) & FILE_SUPPORTS_REPARSE_POINTS)) - return ERR_PTR(-EOPNOTSUPP); + if (!tcon->posix_extensions) + return ERR_PTR(-EOPNOTSUPP); oparms = CIFS_OPARMS(cifs_sb, tcon, full_path, SYNCHRONIZE | DELETE | diff --git a/fs/smb/client/smb2ops.c b/fs/smb/client/smb2ops.c index 1468c16ea9b8..938a8a7c5d21 100644 --- a/fs/smb/client/smb2ops.c +++ b/fs/smb/client/smb2ops.c @@ -4316,6 +4316,7 @@ crypt_message(struct TCP_Server_Info *server, int num_rqst, u8 key[SMB3_ENC_DEC_KEY_SIZE]; struct aead_request *req; u8 *iv; + DECLARE_CRYPTO_WAIT(wait); unsigned int crypt_len = le32_to_cpu(tr_hdr->OriginalMessageSize); void *creq; size_t sensitive_size; @@ -4366,7 +4367,11 @@ crypt_message(struct TCP_Server_Info *server, int num_rqst, aead_request_set_crypt(req, sg, sg, crypt_len, iv); aead_request_set_ad(req, assoc_data_len); - rc = enc ? crypto_aead_encrypt(req) : crypto_aead_decrypt(req); + aead_request_set_callback(req, CRYPTO_TFM_REQ_MAY_BACKLOG, + crypto_req_done, &wait); + + rc = crypto_wait_req(enc ? crypto_aead_encrypt(req) + : crypto_aead_decrypt(req), &wait); if (!rc && enc) memcpy(&tr_hdr->Signature, sign, SMB2_SIGNATURE_SIZE); @@ -5255,7 +5260,8 @@ static int smb2_make_node(unsigned int xid, struct inode *inode, if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_UNX_EMUL) { rc = cifs_sfu_make_node(xid, inode, dentry, tcon, full_path, mode, dev); - } else if (le32_to_cpu(tcon->fsAttrInfo.Attributes) & FILE_SUPPORTS_REPARSE_POINTS) { + } else if ((le32_to_cpu(tcon->fsAttrInfo.Attributes) & FILE_SUPPORTS_REPARSE_POINTS) + || (tcon->posix_extensions)) { rc = smb2_mknod_reparse(xid, inode, dentry, tcon, full_path, mode, dev); } diff --git a/fs/smb/server/smb2pdu.c b/fs/smb/server/smb2pdu.c index fafa86273f12..63d17cea2e95 100644 --- a/fs/smb/server/smb2pdu.c +++ b/fs/smb/server/smb2pdu.c @@ -8573,11 +8573,6 @@ static void smb20_oplock_break_ack(struct ksmbd_work *work) goto err_out; } - opinfo->op_state = OPLOCK_STATE_NONE; - wake_up_interruptible_all(&opinfo->oplock_q); - opinfo_put(opinfo); - ksmbd_fd_put(work, fp); - rsp->StructureSize = cpu_to_le16(24); rsp->OplockLevel = rsp_oplevel; rsp->Reserved = 0; @@ -8585,16 +8580,15 @@ static void smb20_oplock_break_ack(struct ksmbd_work *work) rsp->VolatileFid = volatile_id; rsp->PersistentFid = persistent_id; ret = ksmbd_iov_pin_rsp(work, rsp, sizeof(struct smb2_oplock_break)); - if (!ret) - return; - + if (ret) { err_out: + smb2_set_err_rsp(work); + } + opinfo->op_state = OPLOCK_STATE_NONE; wake_up_interruptible_all(&opinfo->oplock_q); - opinfo_put(opinfo); ksmbd_fd_put(work, fp); - smb2_set_err_rsp(work); } static int check_lease_state(struct lease *lease, __le32 req_state) @@ -8724,11 +8718,6 @@ static void smb21_lease_break_ack(struct ksmbd_work *work) } lease_state = lease->state; - opinfo->op_state = OPLOCK_STATE_NONE; - wake_up_interruptible_all(&opinfo->oplock_q); - atomic_dec(&opinfo->breaking_cnt); - wake_up_interruptible_all(&opinfo->oplock_brk); - opinfo_put(opinfo); rsp->StructureSize = cpu_to_le16(36); rsp->Reserved = 0; @@ -8737,16 +8726,16 @@ static void smb21_lease_break_ack(struct ksmbd_work *work) rsp->LeaseState = lease_state; rsp->LeaseDuration = 0; ret = ksmbd_iov_pin_rsp(work, rsp, sizeof(struct smb2_lease_ack)); - if (!ret) - return; - + if (ret) { err_out: + smb2_set_err_rsp(work); + } + + opinfo->op_state = OPLOCK_STATE_NONE; wake_up_interruptible_all(&opinfo->oplock_q); atomic_dec(&opinfo->breaking_cnt); wake_up_interruptible_all(&opinfo->oplock_brk); - opinfo_put(opinfo); - smb2_set_err_rsp(work); } /** diff --git a/fs/smb/server/transport_rdma.c b/fs/smb/server/transport_rdma.c index 64a428a06ace..c6cbe0d56e32 100644 --- a/fs/smb/server/transport_rdma.c +++ b/fs/smb/server/transport_rdma.c @@ -433,7 +433,8 @@ static void free_transport(struct smb_direct_transport *t) if (t->qp) { ib_drain_qp(t->qp); ib_mr_pool_destroy(t->qp, &t->qp->rdma_mrs); - ib_destroy_qp(t->qp); + t->qp = NULL; + rdma_destroy_qp(t->cm_id); } ksmbd_debug(RDMA, "drain the reassembly queue\n"); @@ -1940,8 +1941,8 @@ static int smb_direct_create_qpair(struct smb_direct_transport *t, return 0; err: if (t->qp) { - ib_destroy_qp(t->qp); t->qp = NULL; + rdma_destroy_qp(t->cm_id); } if (t->recv_cq) { ib_destroy_cq(t->recv_cq); diff --git a/fs/smb/server/vfs.c b/fs/smb/server/vfs.c index 0f3aad12e495..d3437f6644e3 100644 --- a/fs/smb/server/vfs.c +++ b/fs/smb/server/vfs.c @@ -1282,6 +1282,7 @@ out1: err = ksmbd_vfs_lock_parent(parent_path->dentry, path->dentry); if (err) { + mnt_drop_write(parent_path->mnt); path_put(path); path_put(parent_path); } diff --git a/fs/tracefs/inode.c b/fs/tracefs/inode.c index a3fd3cc591bd..0c023941a316 100644 --- a/fs/tracefs/inode.c +++ b/fs/tracefs/inode.c @@ -465,9 +465,20 @@ static int tracefs_d_revalidate(struct inode *inode, const struct qstr *name, return !(ei && ei->is_freed); } +static int tracefs_d_delete(const struct dentry *dentry) +{ + /* + * We want to keep eventfs dentries around but not tracefs + * ones. eventfs dentries have content in d_fsdata. + * Use d_fsdata to determine if it's a eventfs dentry or not. + */ + return dentry->d_fsdata == NULL; +} + static const struct dentry_operations tracefs_dentry_operations = { .d_revalidate = tracefs_d_revalidate, .d_release = tracefs_d_release, + .d_delete = tracefs_d_delete, }; static int tracefs_fill_super(struct super_block *sb, struct fs_context *fc) @@ -480,7 +491,7 @@ static int tracefs_fill_super(struct super_block *sb, struct fs_context *fc) return err; sb->s_op = &tracefs_super_operations; - sb->s_d_op = &tracefs_dentry_operations; + set_default_d_op(sb, &tracefs_dentry_operations); return 0; } @@ -551,20 +562,9 @@ struct dentry *tracefs_start_creating(const char *name, struct dentry *parent) if (!parent) parent = tracefs_mount->mnt_root; - inode_lock(d_inode(parent)); - if (unlikely(IS_DEADDIR(d_inode(parent)))) - dentry = ERR_PTR(-ENOENT); - else - dentry = lookup_noperm(&QSTR(name), parent); - if (!IS_ERR(dentry) && d_inode(dentry)) { - dput(dentry); - dentry = ERR_PTR(-EEXIST); - } - - if (IS_ERR(dentry)) { - inode_unlock(d_inode(parent)); + dentry = simple_start_creating(parent, name); + if (IS_ERR(dentry)) simple_release_fs(&tracefs_mount, &tracefs_mount_count); - } return dentry; } diff --git a/fs/ufs/super.c b/fs/ufs/super.c index eea718ac66b4..6e4585169f94 100644 --- a/fs/ufs/super.c +++ b/fs/ufs/super.c @@ -397,7 +397,7 @@ static int ufs_parse_param(struct fs_context *fc, struct fs_parameter *param) pr_err("ufstype can't be changed during remount\n"); return -EINVAL; } - if (!ctx->flavour) { + if (ctx->flavour) { pr_err("conflicting ufstype options\n"); return -EINVAL; } diff --git a/fs/vboxsf/super.c b/fs/vboxsf/super.c index 0bc96ab6580b..241647b060ee 100644 --- a/fs/vboxsf/super.c +++ b/fs/vboxsf/super.c @@ -189,7 +189,7 @@ static int vboxsf_fill_super(struct super_block *sb, struct fs_context *fc) sb->s_blocksize = 1024; sb->s_maxbytes = MAX_LFS_FILESIZE; sb->s_op = &vboxsf_super_ops; - sb->s_d_op = &vboxsf_dentry_ops; + set_default_d_op(sb, &vboxsf_dentry_ops); iroot = iget_locked(sb, 0); if (!iroot) { diff --git a/fs/xfs/libxfs/xfs_alloc_btree.c b/fs/xfs/libxfs/xfs_alloc_btree.c index a4ac37ba5d51..fa1f03c1331e 100644 --- a/fs/xfs/libxfs/xfs_alloc_btree.c +++ b/fs/xfs/libxfs/xfs_alloc_btree.c @@ -186,35 +186,32 @@ xfs_allocbt_init_ptr_from_cur( ptr->s = agf->agf_cnt_root; } -STATIC int64_t -xfs_bnobt_key_diff( +STATIC int +xfs_bnobt_cmp_key_with_cur( struct xfs_btree_cur *cur, const union xfs_btree_key *key) { struct xfs_alloc_rec_incore *rec = &cur->bc_rec.a; const struct xfs_alloc_rec *kp = &key->alloc; - return (int64_t)be32_to_cpu(kp->ar_startblock) - rec->ar_startblock; + return cmp_int(be32_to_cpu(kp->ar_startblock), + rec->ar_startblock); } -STATIC int64_t -xfs_cntbt_key_diff( +STATIC int +xfs_cntbt_cmp_key_with_cur( struct xfs_btree_cur *cur, const union xfs_btree_key *key) { struct xfs_alloc_rec_incore *rec = &cur->bc_rec.a; const struct xfs_alloc_rec *kp = &key->alloc; - int64_t diff; - diff = (int64_t)be32_to_cpu(kp->ar_blockcount) - rec->ar_blockcount; - if (diff) - return diff; - - return (int64_t)be32_to_cpu(kp->ar_startblock) - rec->ar_startblock; + return cmp_int(be32_to_cpu(kp->ar_blockcount), rec->ar_blockcount) ?: + cmp_int(be32_to_cpu(kp->ar_startblock), rec->ar_startblock); } -STATIC int64_t -xfs_bnobt_diff_two_keys( +STATIC int +xfs_bnobt_cmp_two_keys( struct xfs_btree_cur *cur, const union xfs_btree_key *k1, const union xfs_btree_key *k2, @@ -222,29 +219,24 @@ xfs_bnobt_diff_two_keys( { ASSERT(!mask || mask->alloc.ar_startblock); - return (int64_t)be32_to_cpu(k1->alloc.ar_startblock) - - be32_to_cpu(k2->alloc.ar_startblock); + return cmp_int(be32_to_cpu(k1->alloc.ar_startblock), + be32_to_cpu(k2->alloc.ar_startblock)); } -STATIC int64_t -xfs_cntbt_diff_two_keys( +STATIC int +xfs_cntbt_cmp_two_keys( struct xfs_btree_cur *cur, const union xfs_btree_key *k1, const union xfs_btree_key *k2, const union xfs_btree_key *mask) { - int64_t diff; - ASSERT(!mask || (mask->alloc.ar_blockcount && mask->alloc.ar_startblock)); - diff = be32_to_cpu(k1->alloc.ar_blockcount) - - be32_to_cpu(k2->alloc.ar_blockcount); - if (diff) - return diff; - - return be32_to_cpu(k1->alloc.ar_startblock) - - be32_to_cpu(k2->alloc.ar_startblock); + return cmp_int(be32_to_cpu(k1->alloc.ar_blockcount), + be32_to_cpu(k2->alloc.ar_blockcount)) ?: + cmp_int(be32_to_cpu(k1->alloc.ar_startblock), + be32_to_cpu(k2->alloc.ar_startblock)); } static xfs_failaddr_t @@ -438,9 +430,9 @@ const struct xfs_btree_ops xfs_bnobt_ops = { .init_high_key_from_rec = xfs_bnobt_init_high_key_from_rec, .init_rec_from_cur = xfs_allocbt_init_rec_from_cur, .init_ptr_from_cur = xfs_allocbt_init_ptr_from_cur, - .key_diff = xfs_bnobt_key_diff, + .cmp_key_with_cur = xfs_bnobt_cmp_key_with_cur, .buf_ops = &xfs_bnobt_buf_ops, - .diff_two_keys = xfs_bnobt_diff_two_keys, + .cmp_two_keys = xfs_bnobt_cmp_two_keys, .keys_inorder = xfs_bnobt_keys_inorder, .recs_inorder = xfs_bnobt_recs_inorder, .keys_contiguous = xfs_allocbt_keys_contiguous, @@ -468,9 +460,9 @@ const struct xfs_btree_ops xfs_cntbt_ops = { .init_high_key_from_rec = xfs_cntbt_init_high_key_from_rec, .init_rec_from_cur = xfs_allocbt_init_rec_from_cur, .init_ptr_from_cur = xfs_allocbt_init_ptr_from_cur, - .key_diff = xfs_cntbt_key_diff, + .cmp_key_with_cur = xfs_cntbt_cmp_key_with_cur, .buf_ops = &xfs_cntbt_buf_ops, - .diff_two_keys = xfs_cntbt_diff_two_keys, + .cmp_two_keys = xfs_cntbt_cmp_two_keys, .keys_inorder = xfs_cntbt_keys_inorder, .recs_inorder = xfs_cntbt_recs_inorder, .keys_contiguous = NULL, /* not needed right now */ diff --git a/fs/xfs/libxfs/xfs_bmap_btree.c b/fs/xfs/libxfs/xfs_bmap_btree.c index 908d7b050e9c..188feac04b60 100644 --- a/fs/xfs/libxfs/xfs_bmap_btree.c +++ b/fs/xfs/libxfs/xfs_bmap_btree.c @@ -369,38 +369,26 @@ xfs_bmbt_init_rec_from_cur( xfs_bmbt_disk_set_all(&rec->bmbt, &cur->bc_rec.b); } -STATIC int64_t -xfs_bmbt_key_diff( +STATIC int +xfs_bmbt_cmp_key_with_cur( struct xfs_btree_cur *cur, const union xfs_btree_key *key) { - return (int64_t)be64_to_cpu(key->bmbt.br_startoff) - - cur->bc_rec.b.br_startoff; + return cmp_int(be64_to_cpu(key->bmbt.br_startoff), + cur->bc_rec.b.br_startoff); } -STATIC int64_t -xfs_bmbt_diff_two_keys( +STATIC int +xfs_bmbt_cmp_two_keys( struct xfs_btree_cur *cur, const union xfs_btree_key *k1, const union xfs_btree_key *k2, const union xfs_btree_key *mask) { - uint64_t a = be64_to_cpu(k1->bmbt.br_startoff); - uint64_t b = be64_to_cpu(k2->bmbt.br_startoff); - ASSERT(!mask || mask->bmbt.br_startoff); - /* - * Note: This routine previously casted a and b to int64 and subtracted - * them to generate a result. This lead to problems if b was the - * "maximum" key value (all ones) being signed incorrectly, hence this - * somewhat less efficient version. - */ - if (a > b) - return 1; - if (b > a) - return -1; - return 0; + return cmp_int(be64_to_cpu(k1->bmbt.br_startoff), + be64_to_cpu(k2->bmbt.br_startoff)); } static xfs_failaddr_t @@ -647,8 +635,8 @@ const struct xfs_btree_ops xfs_bmbt_ops = { .init_key_from_rec = xfs_bmbt_init_key_from_rec, .init_high_key_from_rec = xfs_bmbt_init_high_key_from_rec, .init_rec_from_cur = xfs_bmbt_init_rec_from_cur, - .key_diff = xfs_bmbt_key_diff, - .diff_two_keys = xfs_bmbt_diff_two_keys, + .cmp_key_with_cur = xfs_bmbt_cmp_key_with_cur, + .cmp_two_keys = xfs_bmbt_cmp_two_keys, .buf_ops = &xfs_bmbt_buf_ops, .keys_inorder = xfs_bmbt_keys_inorder, .recs_inorder = xfs_bmbt_recs_inorder, diff --git a/fs/xfs/libxfs/xfs_btree.c b/fs/xfs/libxfs/xfs_btree.c index 299ce7fd11b0..a61211d253f1 100644 --- a/fs/xfs/libxfs/xfs_btree.c +++ b/fs/xfs/libxfs/xfs_btree.c @@ -1985,7 +1985,7 @@ xfs_btree_lookup( int *stat) /* success/failure */ { struct xfs_btree_block *block; /* current btree block */ - int64_t diff; /* difference for the current key */ + int cmp_r; /* current key comparison result */ int error; /* error return value */ int keyno; /* current key number */ int level; /* level in the btree */ @@ -2013,13 +2013,13 @@ xfs_btree_lookup( * on the lookup record, then follow the corresponding block * pointer down to the next level. */ - for (level = cur->bc_nlevels - 1, diff = 1; level >= 0; level--) { + for (level = cur->bc_nlevels - 1, cmp_r = 1; level >= 0; level--) { /* Get the block we need to do the lookup on. */ error = xfs_btree_lookup_get_block(cur, level, pp, &block); if (error) goto error0; - if (diff == 0) { + if (cmp_r == 0) { /* * If we already had a key match at a higher level, we * know we need to use the first entry in this block. @@ -2065,15 +2065,16 @@ xfs_btree_lookup( keyno, block, &key); /* - * Compute difference to get next direction: + * Compute comparison result to get next + * direction: * - less than, move right * - greater than, move left * - equal, we're done */ - diff = cur->bc_ops->key_diff(cur, kp); - if (diff < 0) + cmp_r = cur->bc_ops->cmp_key_with_cur(cur, kp); + if (cmp_r < 0) low = keyno + 1; - else if (diff > 0) + else if (cmp_r > 0) high = keyno - 1; else break; @@ -2089,7 +2090,7 @@ xfs_btree_lookup( * If we moved left, need the previous key number, * unless there isn't one. */ - if (diff > 0 && --keyno < 1) + if (cmp_r > 0 && --keyno < 1) keyno = 1; pp = xfs_btree_ptr_addr(cur, keyno, block); @@ -2102,7 +2103,7 @@ xfs_btree_lookup( } /* Done with the search. See if we need to adjust the results. */ - if (dir != XFS_LOOKUP_LE && diff < 0) { + if (dir != XFS_LOOKUP_LE && cmp_r < 0) { keyno++; /* * If ge search and we went off the end of the block, but it's @@ -2125,14 +2126,14 @@ xfs_btree_lookup( *stat = 1; return 0; } - } else if (dir == XFS_LOOKUP_LE && diff > 0) + } else if (dir == XFS_LOOKUP_LE && cmp_r > 0) keyno--; cur->bc_levels[0].ptr = keyno; /* Return if we succeeded or not. */ if (keyno == 0 || keyno > xfs_btree_get_numrecs(block)) *stat = 0; - else if (dir != XFS_LOOKUP_EQ || diff == 0) + else if (dir != XFS_LOOKUP_EQ || cmp_r == 0) *stat = 1; else *stat = 0; @@ -5058,7 +5059,7 @@ xfs_btree_simple_query_range( int error; ASSERT(cur->bc_ops->init_high_key_from_rec); - ASSERT(cur->bc_ops->diff_two_keys); + ASSERT(cur->bc_ops->cmp_two_keys); /* * Find the leftmost record. The btree cursor must be set @@ -5352,15 +5353,15 @@ xfs_btree_count_blocks( } /* Compare two btree pointers. */ -int64_t -xfs_btree_diff_two_ptrs( +int +xfs_btree_cmp_two_ptrs( struct xfs_btree_cur *cur, const union xfs_btree_ptr *a, const union xfs_btree_ptr *b) { if (cur->bc_ops->ptr_len == XFS_BTREE_LONG_PTR_LEN) - return (int64_t)be64_to_cpu(a->l) - be64_to_cpu(b->l); - return (int64_t)be32_to_cpu(a->s) - be32_to_cpu(b->s); + return cmp_int(be64_to_cpu(a->l), be64_to_cpu(b->l)); + return cmp_int(be32_to_cpu(a->s), be32_to_cpu(b->s)); } struct xfs_btree_has_records { diff --git a/fs/xfs/libxfs/xfs_btree.h b/fs/xfs/libxfs/xfs_btree.h index 355b304696e6..60e78572e725 100644 --- a/fs/xfs/libxfs/xfs_btree.h +++ b/fs/xfs/libxfs/xfs_btree.h @@ -171,20 +171,23 @@ struct xfs_btree_ops { void (*init_high_key_from_rec)(union xfs_btree_key *key, const union xfs_btree_rec *rec); - /* difference between key value and cursor value */ - int64_t (*key_diff)(struct xfs_btree_cur *cur, - const union xfs_btree_key *key); + /* + * Compare key value and cursor value -- positive if key > cur, + * negative if key < cur, and zero if equal. + */ + int (*cmp_key_with_cur)(struct xfs_btree_cur *cur, + const union xfs_btree_key *key); /* - * Difference between key2 and key1 -- positive if key1 > key2, - * negative if key1 < key2, and zero if equal. If the @mask parameter - * is non NULL, each key field to be used in the comparison must - * contain a nonzero value. + * Compare key1 and key2 -- positive if key1 > key2, negative if + * key1 < key2, and zero if equal. If the @mask parameter is non NULL, + * each key field to be used in the comparison must contain a nonzero + * value. */ - int64_t (*diff_two_keys)(struct xfs_btree_cur *cur, - const union xfs_btree_key *key1, - const union xfs_btree_key *key2, - const union xfs_btree_key *mask); + int (*cmp_two_keys)(struct xfs_btree_cur *cur, + const union xfs_btree_key *key1, + const union xfs_btree_key *key2, + const union xfs_btree_key *mask); const struct xfs_buf_ops *buf_ops; @@ -516,9 +519,9 @@ struct xfs_btree_block *xfs_btree_get_block(struct xfs_btree_cur *cur, int level, struct xfs_buf **bpp); bool xfs_btree_ptr_is_null(struct xfs_btree_cur *cur, const union xfs_btree_ptr *ptr); -int64_t xfs_btree_diff_two_ptrs(struct xfs_btree_cur *cur, - const union xfs_btree_ptr *a, - const union xfs_btree_ptr *b); +int xfs_btree_cmp_two_ptrs(struct xfs_btree_cur *cur, + const union xfs_btree_ptr *a, + const union xfs_btree_ptr *b); void xfs_btree_get_sibling(struct xfs_btree_cur *cur, struct xfs_btree_block *block, union xfs_btree_ptr *ptr, int lr); @@ -546,7 +549,7 @@ xfs_btree_keycmp_lt( const union xfs_btree_key *key1, const union xfs_btree_key *key2) { - return cur->bc_ops->diff_two_keys(cur, key1, key2, NULL) < 0; + return cur->bc_ops->cmp_two_keys(cur, key1, key2, NULL) < 0; } static inline bool @@ -555,7 +558,7 @@ xfs_btree_keycmp_gt( const union xfs_btree_key *key1, const union xfs_btree_key *key2) { - return cur->bc_ops->diff_two_keys(cur, key1, key2, NULL) > 0; + return cur->bc_ops->cmp_two_keys(cur, key1, key2, NULL) > 0; } static inline bool @@ -564,7 +567,7 @@ xfs_btree_keycmp_eq( const union xfs_btree_key *key1, const union xfs_btree_key *key2) { - return cur->bc_ops->diff_two_keys(cur, key1, key2, NULL) == 0; + return cur->bc_ops->cmp_two_keys(cur, key1, key2, NULL) == 0; } static inline bool @@ -602,7 +605,7 @@ xfs_btree_masked_keycmp_lt( const union xfs_btree_key *key2, const union xfs_btree_key *mask) { - return cur->bc_ops->diff_two_keys(cur, key1, key2, mask) < 0; + return cur->bc_ops->cmp_two_keys(cur, key1, key2, mask) < 0; } static inline bool @@ -612,7 +615,7 @@ xfs_btree_masked_keycmp_gt( const union xfs_btree_key *key2, const union xfs_btree_key *mask) { - return cur->bc_ops->diff_two_keys(cur, key1, key2, mask) > 0; + return cur->bc_ops->cmp_two_keys(cur, key1, key2, mask) > 0; } static inline bool diff --git a/fs/xfs/libxfs/xfs_format.h b/fs/xfs/libxfs/xfs_format.h index 9566a7623365..779dac59b1f3 100644 --- a/fs/xfs/libxfs/xfs_format.h +++ b/fs/xfs/libxfs/xfs_format.h @@ -112,7 +112,7 @@ typedef struct xfs_sb { uint16_t sb_sectsize; /* volume sector size, bytes */ uint16_t sb_inodesize; /* inode size, bytes */ uint16_t sb_inopblock; /* inodes per block */ - char sb_fname[XFSLABEL_MAX]; /* file system name */ + char sb_fname[XFSLABEL_MAX] __nonstring; /* file system name */ uint8_t sb_blocklog; /* log2 of sb_blocksize */ uint8_t sb_sectlog; /* log2 of sb_sectsize */ uint8_t sb_inodelog; /* log2 of sb_inodesize */ diff --git a/fs/xfs/libxfs/xfs_group.c b/fs/xfs/libxfs/xfs_group.c index e9d76bcdc820..792f76d2e2a0 100644 --- a/fs/xfs/libxfs/xfs_group.c +++ b/fs/xfs/libxfs/xfs_group.c @@ -163,7 +163,8 @@ xfs_group_free( xfs_defer_drain_free(&xg->xg_intents_drain); #ifdef __KERNEL__ - kfree(xg->xg_busy_extents); + if (xfs_group_has_extent_busy(xg->xg_mount, xg->xg_type)) + kfree(xg->xg_busy_extents); #endif if (uninit) @@ -171,7 +172,8 @@ xfs_group_free( /* drop the mount's active reference */ xfs_group_rele(xg); - XFS_IS_CORRUPT(mp, atomic_read(&xg->xg_active_ref) != 0); + XFS_IS_CORRUPT(mp, atomic_read(&xg->xg_active_ref) > 0); + XFS_IS_CORRUPT(mp, atomic_read(&xg->xg_active_ref) < 0); kfree_rcu_mightsleep(xg); } @@ -189,9 +191,11 @@ xfs_group_insert( xg->xg_type = type; #ifdef __KERNEL__ - xg->xg_busy_extents = xfs_extent_busy_alloc(); - if (!xg->xg_busy_extents) - return -ENOMEM; + if (xfs_group_has_extent_busy(mp, type)) { + xg->xg_busy_extents = xfs_extent_busy_alloc(); + if (!xg->xg_busy_extents) + return -ENOMEM; + } spin_lock_init(&xg->xg_state_lock); xfs_hooks_init(&xg->xg_rmap_update_hooks); #endif @@ -210,7 +214,8 @@ xfs_group_insert( out_drain: xfs_defer_drain_free(&xg->xg_intents_drain); #ifdef __KERNEL__ - kfree(xg->xg_busy_extents); + if (xfs_group_has_extent_busy(xg->xg_mount, xg->xg_type)) + kfree(xg->xg_busy_extents); #endif return error; } diff --git a/fs/xfs/libxfs/xfs_ialloc_btree.c b/fs/xfs/libxfs/xfs_ialloc_btree.c index 6f270d8f4270..100afdd66cdd 100644 --- a/fs/xfs/libxfs/xfs_ialloc_btree.c +++ b/fs/xfs/libxfs/xfs_ialloc_btree.c @@ -265,17 +265,17 @@ xfs_finobt_init_ptr_from_cur( ptr->s = agi->agi_free_root; } -STATIC int64_t -xfs_inobt_key_diff( +STATIC int +xfs_inobt_cmp_key_with_cur( struct xfs_btree_cur *cur, const union xfs_btree_key *key) { - return (int64_t)be32_to_cpu(key->inobt.ir_startino) - - cur->bc_rec.i.ir_startino; + return cmp_int(be32_to_cpu(key->inobt.ir_startino), + cur->bc_rec.i.ir_startino); } -STATIC int64_t -xfs_inobt_diff_two_keys( +STATIC int +xfs_inobt_cmp_two_keys( struct xfs_btree_cur *cur, const union xfs_btree_key *k1, const union xfs_btree_key *k2, @@ -283,8 +283,8 @@ xfs_inobt_diff_two_keys( { ASSERT(!mask || mask->inobt.ir_startino); - return (int64_t)be32_to_cpu(k1->inobt.ir_startino) - - be32_to_cpu(k2->inobt.ir_startino); + return cmp_int(be32_to_cpu(k1->inobt.ir_startino), + be32_to_cpu(k2->inobt.ir_startino)); } static xfs_failaddr_t @@ -430,9 +430,9 @@ const struct xfs_btree_ops xfs_inobt_ops = { .init_high_key_from_rec = xfs_inobt_init_high_key_from_rec, .init_rec_from_cur = xfs_inobt_init_rec_from_cur, .init_ptr_from_cur = xfs_inobt_init_ptr_from_cur, - .key_diff = xfs_inobt_key_diff, + .cmp_key_with_cur = xfs_inobt_cmp_key_with_cur, .buf_ops = &xfs_inobt_buf_ops, - .diff_two_keys = xfs_inobt_diff_two_keys, + .cmp_two_keys = xfs_inobt_cmp_two_keys, .keys_inorder = xfs_inobt_keys_inorder, .recs_inorder = xfs_inobt_recs_inorder, .keys_contiguous = xfs_inobt_keys_contiguous, @@ -460,9 +460,9 @@ const struct xfs_btree_ops xfs_finobt_ops = { .init_high_key_from_rec = xfs_inobt_init_high_key_from_rec, .init_rec_from_cur = xfs_inobt_init_rec_from_cur, .init_ptr_from_cur = xfs_finobt_init_ptr_from_cur, - .key_diff = xfs_inobt_key_diff, + .cmp_key_with_cur = xfs_inobt_cmp_key_with_cur, .buf_ops = &xfs_finobt_buf_ops, - .diff_two_keys = xfs_inobt_diff_two_keys, + .cmp_two_keys = xfs_inobt_cmp_two_keys, .keys_inorder = xfs_inobt_keys_inorder, .recs_inorder = xfs_inobt_recs_inorder, .keys_contiguous = xfs_inobt_keys_contiguous, diff --git a/fs/xfs/libxfs/xfs_log_recover.h b/fs/xfs/libxfs/xfs_log_recover.h index 66c7916fb5cd..95de23095030 100644 --- a/fs/xfs/libxfs/xfs_log_recover.h +++ b/fs/xfs/libxfs/xfs_log_recover.h @@ -104,7 +104,7 @@ struct xlog_recover_item { struct list_head ri_list; int ri_cnt; /* count of regions found */ int ri_total; /* total regions */ - struct xfs_log_iovec *ri_buf; /* ptr to regions buffer */ + struct kvec *ri_buf; /* ptr to regions buffer */ const struct xlog_recover_item_ops *ri_ops; }; @@ -117,7 +117,7 @@ struct xlog_recover { struct list_head r_itemq; /* q for items */ }; -#define ITEM_TYPE(i) (*(unsigned short *)(i)->ri_buf[0].i_addr) +#define ITEM_TYPE(i) (*(unsigned short *)(i)->ri_buf[0].iov_base) #define XLOG_RECOVER_CRCPASS 0 #define XLOG_RECOVER_PASS1 1 diff --git a/fs/xfs/libxfs/xfs_refcount.c b/fs/xfs/libxfs/xfs_refcount.c index cebe83f7842a..897784037483 100644 --- a/fs/xfs/libxfs/xfs_refcount.c +++ b/fs/xfs/libxfs/xfs_refcount.c @@ -2099,9 +2099,7 @@ xfs_refcount_recover_cow_leftovers( * recording the CoW debris we cancel the (empty) transaction * and everything goes away cleanly. */ - error = xfs_trans_alloc_empty(mp, &tp); - if (error) - return error; + tp = xfs_trans_alloc_empty(mp); if (isrt) { xfs_rtgroup_lock(to_rtg(xg), XFS_RTGLOCK_REFCOUNT); diff --git a/fs/xfs/libxfs/xfs_refcount_btree.c b/fs/xfs/libxfs/xfs_refcount_btree.c index 54505fee1852..06da3ca14727 100644 --- a/fs/xfs/libxfs/xfs_refcount_btree.c +++ b/fs/xfs/libxfs/xfs_refcount_btree.c @@ -174,8 +174,8 @@ xfs_refcountbt_init_ptr_from_cur( ptr->s = agf->agf_refcount_root; } -STATIC int64_t -xfs_refcountbt_key_diff( +STATIC int +xfs_refcountbt_cmp_key_with_cur( struct xfs_btree_cur *cur, const union xfs_btree_key *key) { @@ -185,11 +185,11 @@ xfs_refcountbt_key_diff( start = xfs_refcount_encode_startblock(irec->rc_startblock, irec->rc_domain); - return (int64_t)be32_to_cpu(kp->rc_startblock) - start; + return cmp_int(be32_to_cpu(kp->rc_startblock), start); } -STATIC int64_t -xfs_refcountbt_diff_two_keys( +STATIC int +xfs_refcountbt_cmp_two_keys( struct xfs_btree_cur *cur, const union xfs_btree_key *k1, const union xfs_btree_key *k2, @@ -197,8 +197,8 @@ xfs_refcountbt_diff_two_keys( { ASSERT(!mask || mask->refc.rc_startblock); - return (int64_t)be32_to_cpu(k1->refc.rc_startblock) - - be32_to_cpu(k2->refc.rc_startblock); + return cmp_int(be32_to_cpu(k1->refc.rc_startblock), + be32_to_cpu(k2->refc.rc_startblock)); } STATIC xfs_failaddr_t @@ -339,9 +339,9 @@ const struct xfs_btree_ops xfs_refcountbt_ops = { .init_high_key_from_rec = xfs_refcountbt_init_high_key_from_rec, .init_rec_from_cur = xfs_refcountbt_init_rec_from_cur, .init_ptr_from_cur = xfs_refcountbt_init_ptr_from_cur, - .key_diff = xfs_refcountbt_key_diff, + .cmp_key_with_cur = xfs_refcountbt_cmp_key_with_cur, .buf_ops = &xfs_refcountbt_buf_ops, - .diff_two_keys = xfs_refcountbt_diff_two_keys, + .cmp_two_keys = xfs_refcountbt_cmp_two_keys, .keys_inorder = xfs_refcountbt_keys_inorder, .recs_inorder = xfs_refcountbt_recs_inorder, .keys_contiguous = xfs_refcountbt_keys_contiguous, diff --git a/fs/xfs/libxfs/xfs_rmap_btree.c b/fs/xfs/libxfs/xfs_rmap_btree.c index 2cab694ac58a..bf16aee50d73 100644 --- a/fs/xfs/libxfs/xfs_rmap_btree.c +++ b/fs/xfs/libxfs/xfs_rmap_btree.c @@ -243,38 +243,22 @@ static inline uint64_t offset_keymask(uint64_t offset) return offset & ~XFS_RMAP_OFF_UNWRITTEN; } -STATIC int64_t -xfs_rmapbt_key_diff( +STATIC int +xfs_rmapbt_cmp_key_with_cur( struct xfs_btree_cur *cur, const union xfs_btree_key *key) { struct xfs_rmap_irec *rec = &cur->bc_rec.r; const struct xfs_rmap_key *kp = &key->rmap; - __u64 x, y; - int64_t d; - d = (int64_t)be32_to_cpu(kp->rm_startblock) - rec->rm_startblock; - if (d) - return d; - - x = be64_to_cpu(kp->rm_owner); - y = rec->rm_owner; - if (x > y) - return 1; - else if (y > x) - return -1; - - x = offset_keymask(be64_to_cpu(kp->rm_offset)); - y = offset_keymask(xfs_rmap_irec_offset_pack(rec)); - if (x > y) - return 1; - else if (y > x) - return -1; - return 0; + return cmp_int(be32_to_cpu(kp->rm_startblock), rec->rm_startblock) ?: + cmp_int(be64_to_cpu(kp->rm_owner), rec->rm_owner) ?: + cmp_int(offset_keymask(be64_to_cpu(kp->rm_offset)), + offset_keymask(xfs_rmap_irec_offset_pack(rec))); } -STATIC int64_t -xfs_rmapbt_diff_two_keys( +STATIC int +xfs_rmapbt_cmp_two_keys( struct xfs_btree_cur *cur, const union xfs_btree_key *k1, const union xfs_btree_key *k2, @@ -282,36 +266,31 @@ xfs_rmapbt_diff_two_keys( { const struct xfs_rmap_key *kp1 = &k1->rmap; const struct xfs_rmap_key *kp2 = &k2->rmap; - int64_t d; - __u64 x, y; + int d; /* Doesn't make sense to mask off the physical space part */ ASSERT(!mask || mask->rmap.rm_startblock); - d = (int64_t)be32_to_cpu(kp1->rm_startblock) - - be32_to_cpu(kp2->rm_startblock); + d = cmp_int(be32_to_cpu(kp1->rm_startblock), + be32_to_cpu(kp2->rm_startblock)); if (d) return d; if (!mask || mask->rmap.rm_owner) { - x = be64_to_cpu(kp1->rm_owner); - y = be64_to_cpu(kp2->rm_owner); - if (x > y) - return 1; - else if (y > x) - return -1; + d = cmp_int(be64_to_cpu(kp1->rm_owner), + be64_to_cpu(kp2->rm_owner)); + if (d) + return d; } if (!mask || mask->rmap.rm_offset) { /* Doesn't make sense to allow offset but not owner */ ASSERT(!mask || mask->rmap.rm_owner); - x = offset_keymask(be64_to_cpu(kp1->rm_offset)); - y = offset_keymask(be64_to_cpu(kp2->rm_offset)); - if (x > y) - return 1; - else if (y > x) - return -1; + d = cmp_int(offset_keymask(be64_to_cpu(kp1->rm_offset)), + offset_keymask(be64_to_cpu(kp2->rm_offset))); + if (d) + return d; } return 0; @@ -515,9 +494,9 @@ const struct xfs_btree_ops xfs_rmapbt_ops = { .init_high_key_from_rec = xfs_rmapbt_init_high_key_from_rec, .init_rec_from_cur = xfs_rmapbt_init_rec_from_cur, .init_ptr_from_cur = xfs_rmapbt_init_ptr_from_cur, - .key_diff = xfs_rmapbt_key_diff, + .cmp_key_with_cur = xfs_rmapbt_cmp_key_with_cur, .buf_ops = &xfs_rmapbt_buf_ops, - .diff_two_keys = xfs_rmapbt_diff_two_keys, + .cmp_two_keys = xfs_rmapbt_cmp_two_keys, .keys_inorder = xfs_rmapbt_keys_inorder, .recs_inorder = xfs_rmapbt_recs_inorder, .keys_contiguous = xfs_rmapbt_keys_contiguous, @@ -632,9 +611,9 @@ const struct xfs_btree_ops xfs_rmapbt_mem_ops = { .init_high_key_from_rec = xfs_rmapbt_init_high_key_from_rec, .init_rec_from_cur = xfs_rmapbt_init_rec_from_cur, .init_ptr_from_cur = xfbtree_init_ptr_from_cur, - .key_diff = xfs_rmapbt_key_diff, + .cmp_key_with_cur = xfs_rmapbt_cmp_key_with_cur, .buf_ops = &xfs_rmapbt_mem_buf_ops, - .diff_two_keys = xfs_rmapbt_diff_two_keys, + .cmp_two_keys = xfs_rmapbt_cmp_two_keys, .keys_inorder = xfs_rmapbt_keys_inorder, .recs_inorder = xfs_rmapbt_recs_inorder, .keys_contiguous = xfs_rmapbt_keys_contiguous, diff --git a/fs/xfs/libxfs/xfs_rtrefcount_btree.c b/fs/xfs/libxfs/xfs_rtrefcount_btree.c index 3db5e7a4a945..ac11e94b42ae 100644 --- a/fs/xfs/libxfs/xfs_rtrefcount_btree.c +++ b/fs/xfs/libxfs/xfs_rtrefcount_btree.c @@ -156,8 +156,8 @@ xfs_rtrefcountbt_init_ptr_from_cur( ptr->l = 0; } -STATIC int64_t -xfs_rtrefcountbt_key_diff( +STATIC int +xfs_rtrefcountbt_cmp_key_with_cur( struct xfs_btree_cur *cur, const union xfs_btree_key *key) { @@ -167,11 +167,11 @@ xfs_rtrefcountbt_key_diff( start = xfs_refcount_encode_startblock(irec->rc_startblock, irec->rc_domain); - return (int64_t)be32_to_cpu(kp->rc_startblock) - start; + return cmp_int(be32_to_cpu(kp->rc_startblock), start); } -STATIC int64_t -xfs_rtrefcountbt_diff_two_keys( +STATIC int +xfs_rtrefcountbt_cmp_two_keys( struct xfs_btree_cur *cur, const union xfs_btree_key *k1, const union xfs_btree_key *k2, @@ -179,8 +179,8 @@ xfs_rtrefcountbt_diff_two_keys( { ASSERT(!mask || mask->refc.rc_startblock); - return (int64_t)be32_to_cpu(k1->refc.rc_startblock) - - be32_to_cpu(k2->refc.rc_startblock); + return cmp_int(be32_to_cpu(k1->refc.rc_startblock), + be32_to_cpu(k2->refc.rc_startblock)); } static xfs_failaddr_t @@ -387,9 +387,9 @@ const struct xfs_btree_ops xfs_rtrefcountbt_ops = { .init_high_key_from_rec = xfs_rtrefcountbt_init_high_key_from_rec, .init_rec_from_cur = xfs_rtrefcountbt_init_rec_from_cur, .init_ptr_from_cur = xfs_rtrefcountbt_init_ptr_from_cur, - .key_diff = xfs_rtrefcountbt_key_diff, + .cmp_key_with_cur = xfs_rtrefcountbt_cmp_key_with_cur, .buf_ops = &xfs_rtrefcountbt_buf_ops, - .diff_two_keys = xfs_rtrefcountbt_diff_two_keys, + .cmp_two_keys = xfs_rtrefcountbt_cmp_two_keys, .keys_inorder = xfs_rtrefcountbt_keys_inorder, .recs_inorder = xfs_rtrefcountbt_recs_inorder, .keys_contiguous = xfs_rtrefcountbt_keys_contiguous, diff --git a/fs/xfs/libxfs/xfs_rtrmap_btree.c b/fs/xfs/libxfs/xfs_rtrmap_btree.c index 9bdc2cbfc113..55f903165769 100644 --- a/fs/xfs/libxfs/xfs_rtrmap_btree.c +++ b/fs/xfs/libxfs/xfs_rtrmap_btree.c @@ -185,38 +185,22 @@ static inline uint64_t offset_keymask(uint64_t offset) return offset & ~XFS_RMAP_OFF_UNWRITTEN; } -STATIC int64_t -xfs_rtrmapbt_key_diff( +STATIC int +xfs_rtrmapbt_cmp_key_with_cur( struct xfs_btree_cur *cur, const union xfs_btree_key *key) { struct xfs_rmap_irec *rec = &cur->bc_rec.r; const struct xfs_rmap_key *kp = &key->rmap; - __u64 x, y; - int64_t d; - d = (int64_t)be32_to_cpu(kp->rm_startblock) - rec->rm_startblock; - if (d) - return d; - - x = be64_to_cpu(kp->rm_owner); - y = rec->rm_owner; - if (x > y) - return 1; - else if (y > x) - return -1; - - x = offset_keymask(be64_to_cpu(kp->rm_offset)); - y = offset_keymask(xfs_rmap_irec_offset_pack(rec)); - if (x > y) - return 1; - else if (y > x) - return -1; - return 0; + return cmp_int(be32_to_cpu(kp->rm_startblock), rec->rm_startblock) ?: + cmp_int(be64_to_cpu(kp->rm_owner), rec->rm_owner) ?: + cmp_int(offset_keymask(be64_to_cpu(kp->rm_offset)), + offset_keymask(xfs_rmap_irec_offset_pack(rec))); } -STATIC int64_t -xfs_rtrmapbt_diff_two_keys( +STATIC int +xfs_rtrmapbt_cmp_two_keys( struct xfs_btree_cur *cur, const union xfs_btree_key *k1, const union xfs_btree_key *k2, @@ -224,36 +208,31 @@ xfs_rtrmapbt_diff_two_keys( { const struct xfs_rmap_key *kp1 = &k1->rmap; const struct xfs_rmap_key *kp2 = &k2->rmap; - int64_t d; - __u64 x, y; + int d; /* Doesn't make sense to mask off the physical space part */ ASSERT(!mask || mask->rmap.rm_startblock); - d = (int64_t)be32_to_cpu(kp1->rm_startblock) - - be32_to_cpu(kp2->rm_startblock); + d = cmp_int(be32_to_cpu(kp1->rm_startblock), + be32_to_cpu(kp2->rm_startblock)); if (d) return d; if (!mask || mask->rmap.rm_owner) { - x = be64_to_cpu(kp1->rm_owner); - y = be64_to_cpu(kp2->rm_owner); - if (x > y) - return 1; - else if (y > x) - return -1; + d = cmp_int(be64_to_cpu(kp1->rm_owner), + be64_to_cpu(kp2->rm_owner)); + if (d) + return d; } if (!mask || mask->rmap.rm_offset) { /* Doesn't make sense to allow offset but not owner */ ASSERT(!mask || mask->rmap.rm_owner); - x = offset_keymask(be64_to_cpu(kp1->rm_offset)); - y = offset_keymask(be64_to_cpu(kp2->rm_offset)); - if (x > y) - return 1; - else if (y > x) - return -1; + d = cmp_int(offset_keymask(be64_to_cpu(kp1->rm_offset)), + offset_keymask(be64_to_cpu(kp2->rm_offset))); + if (d) + return d; } return 0; @@ -511,9 +490,9 @@ const struct xfs_btree_ops xfs_rtrmapbt_ops = { .init_high_key_from_rec = xfs_rtrmapbt_init_high_key_from_rec, .init_rec_from_cur = xfs_rtrmapbt_init_rec_from_cur, .init_ptr_from_cur = xfs_rtrmapbt_init_ptr_from_cur, - .key_diff = xfs_rtrmapbt_key_diff, + .cmp_key_with_cur = xfs_rtrmapbt_cmp_key_with_cur, .buf_ops = &xfs_rtrmapbt_buf_ops, - .diff_two_keys = xfs_rtrmapbt_diff_two_keys, + .cmp_two_keys = xfs_rtrmapbt_cmp_two_keys, .keys_inorder = xfs_rtrmapbt_keys_inorder, .recs_inorder = xfs_rtrmapbt_recs_inorder, .keys_contiguous = xfs_rtrmapbt_keys_contiguous, @@ -620,9 +599,9 @@ const struct xfs_btree_ops xfs_rtrmapbt_mem_ops = { .init_high_key_from_rec = xfs_rtrmapbt_init_high_key_from_rec, .init_rec_from_cur = xfs_rtrmapbt_init_rec_from_cur, .init_ptr_from_cur = xfbtree_init_ptr_from_cur, - .key_diff = xfs_rtrmapbt_key_diff, + .cmp_key_with_cur = xfs_rtrmapbt_cmp_key_with_cur, .buf_ops = &xfs_rtrmapbt_mem_buf_ops, - .diff_two_keys = xfs_rtrmapbt_diff_two_keys, + .cmp_two_keys = xfs_rtrmapbt_cmp_two_keys, .keys_inorder = xfs_rtrmapbt_keys_inorder, .recs_inorder = xfs_rtrmapbt_recs_inorder, .keys_contiguous = xfs_rtrmapbt_keys_contiguous, diff --git a/fs/xfs/scrub/btree.c b/fs/xfs/scrub/btree.c index fe678a0438bc..cd6f0ff382a7 100644 --- a/fs/xfs/scrub/btree.c +++ b/fs/xfs/scrub/btree.c @@ -306,7 +306,7 @@ xchk_btree_block_check_sibling( if (pbp) xchk_buffer_recheck(bs->sc, pbp); - if (xfs_btree_diff_two_ptrs(cur, pp, sibling)) + if (xfs_btree_cmp_two_ptrs(cur, pp, sibling)) xchk_btree_set_corrupt(bs->sc, cur, level); out: xfs_btree_del_cursor(ncur, XFS_BTREE_ERROR); diff --git a/fs/xfs/scrub/common.c b/fs/xfs/scrub/common.c index 28ad341df8ee..2ef7742be7d3 100644 --- a/fs/xfs/scrub/common.c +++ b/fs/xfs/scrub/common.c @@ -866,11 +866,11 @@ xchk_trans_cancel( sc->tp = NULL; } -int +void xchk_trans_alloc_empty( struct xfs_scrub *sc) { - return xfs_trans_alloc_empty(sc->mp, &sc->tp); + sc->tp = xfs_trans_alloc_empty(sc->mp); } /* @@ -892,7 +892,8 @@ xchk_trans_alloc( return xfs_trans_alloc(sc->mp, &M_RES(sc->mp)->tr_itruncate, resblks, 0, 0, &sc->tp); - return xchk_trans_alloc_empty(sc); + xchk_trans_alloc_empty(sc); + return 0; } /* Set us up with a transaction and an empty context. */ diff --git a/fs/xfs/scrub/common.h b/fs/xfs/scrub/common.h index 19877d99f255..ddbc065c798c 100644 --- a/fs/xfs/scrub/common.h +++ b/fs/xfs/scrub/common.h @@ -7,7 +7,7 @@ #define __XFS_SCRUB_COMMON_H__ int xchk_trans_alloc(struct xfs_scrub *sc, uint resblks); -int xchk_trans_alloc_empty(struct xfs_scrub *sc); +void xchk_trans_alloc_empty(struct xfs_scrub *sc); void xchk_trans_cancel(struct xfs_scrub *sc); bool xchk_process_error(struct xfs_scrub *sc, xfs_agnumber_t agno, diff --git a/fs/xfs/scrub/dir_repair.c b/fs/xfs/scrub/dir_repair.c index 249313882108..8d3b550990b5 100644 --- a/fs/xfs/scrub/dir_repair.c +++ b/fs/xfs/scrub/dir_repair.c @@ -1289,9 +1289,7 @@ xrep_dir_scan_dirtree( if (sc->ilock_flags & (XFS_ILOCK_SHARED | XFS_ILOCK_EXCL)) xchk_iunlock(sc, sc->ilock_flags & (XFS_ILOCK_SHARED | XFS_ILOCK_EXCL)); - error = xchk_trans_alloc_empty(sc); - if (error) - return error; + xchk_trans_alloc_empty(sc); while ((error = xchk_iscan_iter(&rd->pscan.iscan, &ip)) == 1) { bool flush; @@ -1317,9 +1315,7 @@ xrep_dir_scan_dirtree( if (error) break; - error = xchk_trans_alloc_empty(sc); - if (error) - break; + xchk_trans_alloc_empty(sc); } if (xchk_should_terminate(sc, &error)) diff --git a/fs/xfs/scrub/fscounters.c b/fs/xfs/scrub/fscounters.c index 9b598c5790ad..cebd0d526926 100644 --- a/fs/xfs/scrub/fscounters.c +++ b/fs/xfs/scrub/fscounters.c @@ -237,7 +237,8 @@ xchk_setup_fscounters( return error; } - return xchk_trans_alloc_empty(sc); + xchk_trans_alloc_empty(sc); + return 0; } /* diff --git a/fs/xfs/scrub/metapath.c b/fs/xfs/scrub/metapath.c index e21c16fbd15d..14939d7de349 100644 --- a/fs/xfs/scrub/metapath.c +++ b/fs/xfs/scrub/metapath.c @@ -318,9 +318,7 @@ xchk_metapath( return 0; } - error = xchk_trans_alloc_empty(sc); - if (error) - return error; + xchk_trans_alloc_empty(sc); error = xchk_metapath_ilock_both(mpath); if (error) diff --git a/fs/xfs/scrub/nlinks.c b/fs/xfs/scrub/nlinks.c index 4a47d0aabf73..26721fab5cab 100644 --- a/fs/xfs/scrub/nlinks.c +++ b/fs/xfs/scrub/nlinks.c @@ -555,9 +555,7 @@ xchk_nlinks_collect( * do not take sb_internal. */ xchk_trans_cancel(sc); - error = xchk_trans_alloc_empty(sc); - if (error) - return error; + xchk_trans_alloc_empty(sc); while ((error = xchk_iscan_iter(&xnc->collect_iscan, &ip)) == 1) { if (S_ISDIR(VFS_I(ip)->i_mode)) @@ -880,9 +878,7 @@ xchk_nlinks_compare( * inactivation workqueue. */ xchk_trans_cancel(sc); - error = xchk_trans_alloc_empty(sc); - if (error) - return error; + xchk_trans_alloc_empty(sc); /* * Use the inobt to walk all allocated inodes to compare the link diff --git a/fs/xfs/scrub/nlinks_repair.c b/fs/xfs/scrub/nlinks_repair.c index 4ebdee095428..6ef2ee9c3814 100644 --- a/fs/xfs/scrub/nlinks_repair.c +++ b/fs/xfs/scrub/nlinks_repair.c @@ -340,9 +340,7 @@ xrep_nlinks( * We can only push the inactivation workqueues with an empty * transaction. */ - error = xchk_trans_alloc_empty(sc); - if (error) - break; + xchk_trans_alloc_empty(sc); } xchk_iscan_iter_finish(&xnc->compare_iscan); xchk_iscan_teardown(&xnc->compare_iscan); diff --git a/fs/xfs/scrub/parent_repair.c b/fs/xfs/scrub/parent_repair.c index 31bfe10be22a..2949feda6271 100644 --- a/fs/xfs/scrub/parent_repair.c +++ b/fs/xfs/scrub/parent_repair.c @@ -569,9 +569,7 @@ xrep_parent_scan_dirtree( if (sc->ilock_flags & (XFS_ILOCK_SHARED | XFS_ILOCK_EXCL)) xchk_iunlock(sc, sc->ilock_flags & (XFS_ILOCK_SHARED | XFS_ILOCK_EXCL)); - error = xchk_trans_alloc_empty(sc); - if (error) - return error; + xchk_trans_alloc_empty(sc); while ((error = xchk_iscan_iter(&rp->pscan.iscan, &ip)) == 1) { bool flush; @@ -597,9 +595,7 @@ xrep_parent_scan_dirtree( if (error) break; - error = xchk_trans_alloc_empty(sc); - if (error) - break; + xchk_trans_alloc_empty(sc); } if (xchk_should_terminate(sc, &error)) @@ -1099,9 +1095,7 @@ xrep_parent_flush_xattrs( xrep_tempfile_iounlock(rp->sc); /* Recreate the empty transaction and relock the inode. */ - error = xchk_trans_alloc_empty(rp->sc); - if (error) - return error; + xchk_trans_alloc_empty(rp->sc); xchk_ilock(rp->sc, XFS_ILOCK_EXCL); return 0; } diff --git a/fs/xfs/scrub/quotacheck.c b/fs/xfs/scrub/quotacheck.c index dc4033b91e44..e4105aaafe84 100644 --- a/fs/xfs/scrub/quotacheck.c +++ b/fs/xfs/scrub/quotacheck.c @@ -505,9 +505,7 @@ xqcheck_collect_counts( * transactions do not take sb_internal. */ xchk_trans_cancel(sc); - error = xchk_trans_alloc_empty(sc); - if (error) - return error; + xchk_trans_alloc_empty(sc); while ((error = xchk_iscan_iter(&xqc->iscan, &ip)) == 1) { error = xqcheck_collect_inode(xqc, ip); diff --git a/fs/xfs/scrub/rcbag_btree.c b/fs/xfs/scrub/rcbag_btree.c index 709356dc6256..9a4ef823c5a7 100644 --- a/fs/xfs/scrub/rcbag_btree.c +++ b/fs/xfs/scrub/rcbag_btree.c @@ -47,29 +47,20 @@ rcbagbt_init_rec_from_cur( bag_rec->rbg_refcount = bag_irec->rbg_refcount; } -STATIC int64_t -rcbagbt_key_diff( +STATIC int +rcbagbt_cmp_key_with_cur( struct xfs_btree_cur *cur, const union xfs_btree_key *key) { struct rcbag_rec *rec = (struct rcbag_rec *)&cur->bc_rec; const struct rcbag_key *kp = (const struct rcbag_key *)key; - if (kp->rbg_startblock > rec->rbg_startblock) - return 1; - if (kp->rbg_startblock < rec->rbg_startblock) - return -1; - - if (kp->rbg_blockcount > rec->rbg_blockcount) - return 1; - if (kp->rbg_blockcount < rec->rbg_blockcount) - return -1; - - return 0; + return cmp_int(kp->rbg_startblock, rec->rbg_startblock) ?: + cmp_int(kp->rbg_blockcount, rec->rbg_blockcount); } -STATIC int64_t -rcbagbt_diff_two_keys( +STATIC int +rcbagbt_cmp_two_keys( struct xfs_btree_cur *cur, const union xfs_btree_key *k1, const union xfs_btree_key *k2, @@ -80,17 +71,8 @@ rcbagbt_diff_two_keys( ASSERT(mask == NULL); - if (kp1->rbg_startblock > kp2->rbg_startblock) - return 1; - if (kp1->rbg_startblock < kp2->rbg_startblock) - return -1; - - if (kp1->rbg_blockcount > kp2->rbg_blockcount) - return 1; - if (kp1->rbg_blockcount < kp2->rbg_blockcount) - return -1; - - return 0; + return cmp_int(kp1->rbg_startblock, kp2->rbg_startblock) ?: + cmp_int(kp1->rbg_blockcount, kp2->rbg_blockcount); } STATIC int @@ -201,9 +183,9 @@ static const struct xfs_btree_ops rcbagbt_mem_ops = { .init_key_from_rec = rcbagbt_init_key_from_rec, .init_rec_from_cur = rcbagbt_init_rec_from_cur, .init_ptr_from_cur = xfbtree_init_ptr_from_cur, - .key_diff = rcbagbt_key_diff, + .cmp_key_with_cur = rcbagbt_cmp_key_with_cur, .buf_ops = &rcbagbt_mem_buf_ops, - .diff_two_keys = rcbagbt_diff_two_keys, + .cmp_two_keys = rcbagbt_cmp_two_keys, .keys_inorder = rcbagbt_keys_inorder, .recs_inorder = rcbagbt_recs_inorder, }; diff --git a/fs/xfs/scrub/repair.c b/fs/xfs/scrub/repair.c index f8f9ed30f56b..d00c18954a26 100644 --- a/fs/xfs/scrub/repair.c +++ b/fs/xfs/scrub/repair.c @@ -1269,42 +1269,6 @@ xrep_setup_xfbtree( } /* - * Create a dummy transaction for use in a live update hook function. This - * function MUST NOT be called from regular repair code because the current - * process' transaction is saved via the cookie. - */ -int -xrep_trans_alloc_hook_dummy( - struct xfs_mount *mp, - void **cookiep, - struct xfs_trans **tpp) -{ - int error; - - *cookiep = current->journal_info; - current->journal_info = NULL; - - error = xfs_trans_alloc_empty(mp, tpp); - if (!error) - return 0; - - current->journal_info = *cookiep; - *cookiep = NULL; - return error; -} - -/* Cancel a dummy transaction used by a live update hook function. */ -void -xrep_trans_cancel_hook_dummy( - void **cookiep, - struct xfs_trans *tp) -{ - xfs_trans_cancel(tp); - current->journal_info = *cookiep; - *cookiep = NULL; -} - -/* * See if this buffer can pass the given ->verify_struct() function. * * If the buffer already has ops attached and they're not the ones that were diff --git a/fs/xfs/scrub/repair.h b/fs/xfs/scrub/repair.h index af0a3a9e5ed9..9c04295742c8 100644 --- a/fs/xfs/scrub/repair.h +++ b/fs/xfs/scrub/repair.h @@ -180,10 +180,6 @@ int xrep_quotacheck(struct xfs_scrub *sc); int xrep_reinit_pagf(struct xfs_scrub *sc); int xrep_reinit_pagi(struct xfs_scrub *sc); -int xrep_trans_alloc_hook_dummy(struct xfs_mount *mp, void **cookiep, - struct xfs_trans **tpp); -void xrep_trans_cancel_hook_dummy(void **cookiep, struct xfs_trans *tp); - bool xrep_buf_verify_struct(struct xfs_buf *bp, const struct xfs_buf_ops *ops); void xrep_inode_set_nblocks(struct xfs_scrub *sc, int64_t new_blocks); int xrep_reset_metafile_resv(struct xfs_scrub *sc); diff --git a/fs/xfs/scrub/rmap_repair.c b/fs/xfs/scrub/rmap_repair.c index f5f73078ffe2..17d4a38d735c 100644 --- a/fs/xfs/scrub/rmap_repair.c +++ b/fs/xfs/scrub/rmap_repair.c @@ -951,9 +951,7 @@ end_agscan: sa->agf_bp = NULL; sa->agi_bp = NULL; xchk_trans_cancel(sc); - error = xchk_trans_alloc_empty(sc); - if (error) - return error; + xchk_trans_alloc_empty(sc); /* Iterate all AGs for inodes rmaps. */ while ((error = xchk_iscan_iter(&rr->iscan, &ip)) == 1) { @@ -1612,7 +1610,6 @@ xrep_rmapbt_live_update( struct xfs_mount *mp; struct xfs_btree_cur *mcur; struct xfs_trans *tp; - void *txcookie; int error; rr = container_of(nb, struct xrep_rmap, rhook.rmap_hook.nb); @@ -1623,9 +1620,7 @@ xrep_rmapbt_live_update( trace_xrep_rmap_live_update(pag_group(rr->sc->sa.pag), action, p); - error = xrep_trans_alloc_hook_dummy(mp, &txcookie, &tp); - if (error) - goto out_abort; + tp = xfs_trans_alloc_empty(mp); mutex_lock(&rr->lock); mcur = xfs_rmapbt_mem_cursor(rr->sc->sa.pag, tp, &rr->rmap_btree); @@ -1639,14 +1634,13 @@ xrep_rmapbt_live_update( if (error) goto out_cancel; - xrep_trans_cancel_hook_dummy(&txcookie, tp); + xfs_trans_cancel(tp); mutex_unlock(&rr->lock); return NOTIFY_DONE; out_cancel: xfbtree_trans_cancel(&rr->rmap_btree, tp); - xrep_trans_cancel_hook_dummy(&txcookie, tp); -out_abort: + xfs_trans_cancel(tp); mutex_unlock(&rr->lock); xchk_iscan_abort(&rr->iscan); out_unlock: diff --git a/fs/xfs/scrub/rtrmap_repair.c b/fs/xfs/scrub/rtrmap_repair.c index fc2592c53af5..7561941a337a 100644 --- a/fs/xfs/scrub/rtrmap_repair.c +++ b/fs/xfs/scrub/rtrmap_repair.c @@ -580,9 +580,7 @@ xrep_rtrmap_find_rmaps( */ xchk_trans_cancel(sc); xchk_rtgroup_unlock(&sc->sr); - error = xchk_trans_alloc_empty(sc); - if (error) - return error; + xchk_trans_alloc_empty(sc); while ((error = xchk_iscan_iter(&rr->iscan, &ip)) == 1) { error = xrep_rtrmap_scan_inode(rr, ip); @@ -846,7 +844,6 @@ xrep_rtrmapbt_live_update( struct xfs_mount *mp; struct xfs_btree_cur *mcur; struct xfs_trans *tp; - void *txcookie; int error; rr = container_of(nb, struct xrep_rtrmap, rhook.rmap_hook.nb); @@ -857,9 +854,7 @@ xrep_rtrmapbt_live_update( trace_xrep_rmap_live_update(rtg_group(rr->sc->sr.rtg), action, p); - error = xrep_trans_alloc_hook_dummy(mp, &txcookie, &tp); - if (error) - goto out_abort; + tp = xfs_trans_alloc_empty(mp); mutex_lock(&rr->lock); mcur = xfs_rtrmapbt_mem_cursor(rr->sc->sr.rtg, tp, &rr->rtrmap_btree); @@ -873,14 +868,13 @@ xrep_rtrmapbt_live_update( if (error) goto out_cancel; - xrep_trans_cancel_hook_dummy(&txcookie, tp); + xfs_trans_cancel(tp); mutex_unlock(&rr->lock); return NOTIFY_DONE; out_cancel: xfbtree_trans_cancel(&rr->rtrmap_btree, tp); - xrep_trans_cancel_hook_dummy(&txcookie, tp); -out_abort: + xfs_trans_cancel(tp); xchk_iscan_abort(&rr->iscan); mutex_unlock(&rr->lock); out_unlock: diff --git a/fs/xfs/scrub/scrub.c b/fs/xfs/scrub/scrub.c index 76e24032e99a..3c3b0d25006f 100644 --- a/fs/xfs/scrub/scrub.c +++ b/fs/xfs/scrub/scrub.c @@ -876,10 +876,7 @@ xchk_scrubv_open_by_handle( struct xfs_inode *ip; int error; - error = xfs_trans_alloc_empty(mp, &tp); - if (error) - return NULL; - + tp = xfs_trans_alloc_empty(mp); error = xfs_iget(mp, tp, head->svh_ino, XCHK_IGET_FLAGS, 0, &ip); xfs_trans_cancel(tp); if (error) diff --git a/fs/xfs/scrub/trace.h b/fs/xfs/scrub/trace.h index d7c4ced47c15..1e6e9c10cea2 100644 --- a/fs/xfs/scrub/trace.h +++ b/fs/xfs/scrub/trace.h @@ -2996,7 +2996,7 @@ DEFINE_EVENT(xrep_pptr_salvage_class, name, \ DEFINE_XREP_PPTR_SALVAGE_EVENT(xrep_xattr_salvage_pptr); DEFINE_XREP_PPTR_SALVAGE_EVENT(xrep_xattr_insert_pptr); -TRACE_EVENT(xrep_xattr_class, +DECLARE_EVENT_CLASS(xrep_xattr_class, TP_PROTO(struct xfs_inode *ip, struct xfs_inode *arg_ip), TP_ARGS(ip, arg_ip), TP_STRUCT__entry( diff --git a/fs/xfs/xfs_attr_item.c b/fs/xfs/xfs_attr_item.c index f683b7a9323f..5eef3bc30bda 100644 --- a/fs/xfs/xfs_attr_item.c +++ b/fs/xfs/xfs_attr_item.c @@ -91,41 +91,37 @@ xfs_attri_log_nameval_alloc( name_len + new_name_len + value_len + new_value_len); - nv->name.i_addr = nv + 1; - nv->name.i_len = name_len; - nv->name.i_type = XLOG_REG_TYPE_ATTR_NAME; - memcpy(nv->name.i_addr, name, name_len); + nv->name.iov_base = nv + 1; + nv->name.iov_len = name_len; + memcpy(nv->name.iov_base, name, name_len); if (new_name_len) { - nv->new_name.i_addr = nv->name.i_addr + name_len; - nv->new_name.i_len = new_name_len; - memcpy(nv->new_name.i_addr, new_name, new_name_len); + nv->new_name.iov_base = nv->name.iov_base + name_len; + nv->new_name.iov_len = new_name_len; + memcpy(nv->new_name.iov_base, new_name, new_name_len); } else { - nv->new_name.i_addr = NULL; - nv->new_name.i_len = 0; + nv->new_name.iov_base = NULL; + nv->new_name.iov_len = 0; } - nv->new_name.i_type = XLOG_REG_TYPE_ATTR_NEWNAME; if (value_len) { - nv->value.i_addr = nv->name.i_addr + name_len + new_name_len; - nv->value.i_len = value_len; - memcpy(nv->value.i_addr, value, value_len); + nv->value.iov_base = nv->name.iov_base + name_len + new_name_len; + nv->value.iov_len = value_len; + memcpy(nv->value.iov_base, value, value_len); } else { - nv->value.i_addr = NULL; - nv->value.i_len = 0; + nv->value.iov_base = NULL; + nv->value.iov_len = 0; } - nv->value.i_type = XLOG_REG_TYPE_ATTR_VALUE; if (new_value_len) { - nv->new_value.i_addr = nv->name.i_addr + name_len + + nv->new_value.iov_base = nv->name.iov_base + name_len + new_name_len + value_len; - nv->new_value.i_len = new_value_len; - memcpy(nv->new_value.i_addr, new_value, new_value_len); + nv->new_value.iov_len = new_value_len; + memcpy(nv->new_value.iov_base, new_value, new_value_len); } else { - nv->new_value.i_addr = NULL; - nv->new_value.i_len = 0; + nv->new_value.iov_base = NULL; + nv->new_value.iov_len = 0; } - nv->new_value.i_type = XLOG_REG_TYPE_ATTR_NEWVALUE; refcount_set(&nv->refcount, 1); return nv; @@ -170,21 +166,21 @@ xfs_attri_item_size( *nvecs += 2; *nbytes += sizeof(struct xfs_attri_log_format) + - xlog_calc_iovec_len(nv->name.i_len); + xlog_calc_iovec_len(nv->name.iov_len); - if (nv->new_name.i_len) { + if (nv->new_name.iov_len) { *nvecs += 1; - *nbytes += xlog_calc_iovec_len(nv->new_name.i_len); + *nbytes += xlog_calc_iovec_len(nv->new_name.iov_len); } - if (nv->value.i_len) { + if (nv->value.iov_len) { *nvecs += 1; - *nbytes += xlog_calc_iovec_len(nv->value.i_len); + *nbytes += xlog_calc_iovec_len(nv->value.iov_len); } - if (nv->new_value.i_len) { + if (nv->new_value.iov_len) { *nvecs += 1; - *nbytes += xlog_calc_iovec_len(nv->new_value.i_len); + *nbytes += xlog_calc_iovec_len(nv->new_value.iov_len); } } @@ -212,31 +208,36 @@ xfs_attri_item_format( * the log recovery. */ - ASSERT(nv->name.i_len > 0); + ASSERT(nv->name.iov_len > 0); attrip->attri_format.alfi_size++; - if (nv->new_name.i_len > 0) + if (nv->new_name.iov_len > 0) attrip->attri_format.alfi_size++; - if (nv->value.i_len > 0) + if (nv->value.iov_len > 0) attrip->attri_format.alfi_size++; - if (nv->new_value.i_len > 0) + if (nv->new_value.iov_len > 0) attrip->attri_format.alfi_size++; xlog_copy_iovec(lv, &vecp, XLOG_REG_TYPE_ATTRI_FORMAT, &attrip->attri_format, sizeof(struct xfs_attri_log_format)); - xlog_copy_from_iovec(lv, &vecp, &nv->name); - if (nv->new_name.i_len > 0) - xlog_copy_from_iovec(lv, &vecp, &nv->new_name); + xlog_copy_iovec(lv, &vecp, XLOG_REG_TYPE_ATTR_NAME, nv->name.iov_base, + nv->name.iov_len); - if (nv->value.i_len > 0) - xlog_copy_from_iovec(lv, &vecp, &nv->value); + if (nv->new_name.iov_len > 0) + xlog_copy_iovec(lv, &vecp, XLOG_REG_TYPE_ATTR_NEWNAME, + nv->new_name.iov_base, nv->new_name.iov_len); - if (nv->new_value.i_len > 0) - xlog_copy_from_iovec(lv, &vecp, &nv->new_value); + if (nv->value.iov_len > 0) + xlog_copy_iovec(lv, &vecp, XLOG_REG_TYPE_ATTR_VALUE, + nv->value.iov_base, nv->value.iov_len); + + if (nv->new_value.iov_len > 0) + xlog_copy_iovec(lv, &vecp, XLOG_REG_TYPE_ATTR_NEWVALUE, + nv->new_value.iov_base, nv->new_value.iov_len); } /* @@ -383,22 +384,22 @@ xfs_attr_log_item( attrp->alfi_ino = args->dp->i_ino; ASSERT(!(attr->xattri_op_flags & ~XFS_ATTRI_OP_FLAGS_TYPE_MASK)); attrp->alfi_op_flags = attr->xattri_op_flags; - attrp->alfi_value_len = nv->value.i_len; + attrp->alfi_value_len = nv->value.iov_len; switch (xfs_attr_log_item_op(attrp)) { case XFS_ATTRI_OP_FLAGS_PPTR_REPLACE: - ASSERT(nv->value.i_len == nv->new_value.i_len); + ASSERT(nv->value.iov_len == nv->new_value.iov_len); attrp->alfi_igen = VFS_I(args->dp)->i_generation; - attrp->alfi_old_name_len = nv->name.i_len; - attrp->alfi_new_name_len = nv->new_name.i_len; + attrp->alfi_old_name_len = nv->name.iov_len; + attrp->alfi_new_name_len = nv->new_name.iov_len; break; case XFS_ATTRI_OP_FLAGS_PPTR_REMOVE: case XFS_ATTRI_OP_FLAGS_PPTR_SET: attrp->alfi_igen = VFS_I(args->dp)->i_generation; fallthrough; default: - attrp->alfi_name_len = nv->name.i_len; + attrp->alfi_name_len = nv->name.iov_len; break; } @@ -616,10 +617,7 @@ xfs_attri_iread_extents( struct xfs_trans *tp; int error; - error = xfs_trans_alloc_empty(ip->i_mount, &tp); - if (error) - return error; - + tp = xfs_trans_alloc_empty(ip->i_mount); xfs_ilock(ip, XFS_ILOCK_EXCL); error = xfs_iread_extents(tp, ip, XFS_ATTR_FORK); xfs_iunlock(ip, XFS_ILOCK_EXCL); @@ -690,14 +688,14 @@ xfs_attri_recover_work( args->dp = ip; args->geo = mp->m_attr_geo; args->whichfork = XFS_ATTR_FORK; - args->name = nv->name.i_addr; - args->namelen = nv->name.i_len; - args->new_name = nv->new_name.i_addr; - args->new_namelen = nv->new_name.i_len; - args->value = nv->value.i_addr; - args->valuelen = nv->value.i_len; - args->new_value = nv->new_value.i_addr; - args->new_valuelen = nv->new_value.i_len; + args->name = nv->name.iov_base; + args->namelen = nv->name.iov_len; + args->new_name = nv->new_name.iov_base; + args->new_namelen = nv->new_name.iov_len; + args->value = nv->value.iov_base; + args->valuelen = nv->value.iov_len; + args->new_value = nv->new_value.iov_base; + args->new_valuelen = nv->new_value.iov_len; args->attr_filter = attrp->alfi_attr_filter & XFS_ATTRI_FILTER_MASK; args->op_flags = XFS_DA_OP_RECOVERY | XFS_DA_OP_OKNOENT | XFS_DA_OP_LOGGED; @@ -754,8 +752,8 @@ xfs_attr_recover_work( */ attrp = &attrip->attri_format; if (!xfs_attri_validate(mp, attrp) || - !xfs_attr_namecheck(attrp->alfi_attr_filter, nv->name.i_addr, - nv->name.i_len)) + !xfs_attr_namecheck(attrp->alfi_attr_filter, nv->name.iov_base, + nv->name.iov_len)) return -EFSCORRUPTED; attr = xfs_attri_recover_work(mp, dfp, attrp, &ip, nv); @@ -953,50 +951,50 @@ static inline void * xfs_attri_validate_name_iovec( struct xfs_mount *mp, struct xfs_attri_log_format *attri_formatp, - const struct xfs_log_iovec *iovec, + const struct kvec *iovec, unsigned int name_len) { - if (iovec->i_len != xlog_calc_iovec_len(name_len)) { + if (iovec->iov_len != xlog_calc_iovec_len(name_len)) { XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, attri_formatp, sizeof(*attri_formatp)); return NULL; } - if (!xfs_attr_namecheck(attri_formatp->alfi_attr_filter, iovec->i_addr, + if (!xfs_attr_namecheck(attri_formatp->alfi_attr_filter, iovec->iov_base, name_len)) { XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, attri_formatp, sizeof(*attri_formatp)); XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, - iovec->i_addr, iovec->i_len); + iovec->iov_base, iovec->iov_len); return NULL; } - return iovec->i_addr; + return iovec->iov_base; } static inline void * xfs_attri_validate_value_iovec( struct xfs_mount *mp, struct xfs_attri_log_format *attri_formatp, - const struct xfs_log_iovec *iovec, + const struct kvec *iovec, unsigned int value_len) { - if (iovec->i_len != xlog_calc_iovec_len(value_len)) { + if (iovec->iov_len != xlog_calc_iovec_len(value_len)) { XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, attri_formatp, sizeof(*attri_formatp)); return NULL; } if ((attri_formatp->alfi_attr_filter & XFS_ATTR_PARENT) && - !xfs_parent_valuecheck(mp, iovec->i_addr, value_len)) { + !xfs_parent_valuecheck(mp, iovec->iov_base, value_len)) { XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, attri_formatp, sizeof(*attri_formatp)); XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, - iovec->i_addr, iovec->i_len); + iovec->iov_base, iovec->iov_len); return NULL; } - return iovec->i_addr; + return iovec->iov_base; } STATIC int @@ -1023,13 +1021,13 @@ xlog_recover_attri_commit_pass2( /* Validate xfs_attri_log_format before the large memory allocation */ len = sizeof(struct xfs_attri_log_format); - if (item->ri_buf[i].i_len != len) { + if (item->ri_buf[i].iov_len != len) { XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, - item->ri_buf[0].i_addr, item->ri_buf[0].i_len); + item->ri_buf[0].iov_base, item->ri_buf[0].iov_len); return -EFSCORRUPTED; } - attri_formatp = item->ri_buf[i].i_addr; + attri_formatp = item->ri_buf[i].iov_base; if (!xfs_attri_validate(mp, attri_formatp)) { XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, attri_formatp, len); @@ -1218,10 +1216,10 @@ xlog_recover_attrd_commit_pass2( { struct xfs_attrd_log_format *attrd_formatp; - attrd_formatp = item->ri_buf[0].i_addr; - if (item->ri_buf[0].i_len != sizeof(struct xfs_attrd_log_format)) { + attrd_formatp = item->ri_buf[0].iov_base; + if (item->ri_buf[0].iov_len != sizeof(struct xfs_attrd_log_format)) { XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, log->l_mp, - item->ri_buf[0].i_addr, item->ri_buf[0].i_len); + item->ri_buf[0].iov_base, item->ri_buf[0].iov_len); return -EFSCORRUPTED; } diff --git a/fs/xfs/xfs_attr_item.h b/fs/xfs/xfs_attr_item.h index e74128cbb722..d108a11b55ae 100644 --- a/fs/xfs/xfs_attr_item.h +++ b/fs/xfs/xfs_attr_item.h @@ -12,10 +12,10 @@ struct xfs_mount; struct kmem_zone; struct xfs_attri_log_nameval { - struct xfs_log_iovec name; - struct xfs_log_iovec new_name; /* PPTR_REPLACE only */ - struct xfs_log_iovec value; - struct xfs_log_iovec new_value; /* PPTR_REPLACE only */ + struct kvec name; + struct kvec new_name; /* PPTR_REPLACE only */ + struct kvec value; + struct kvec new_value; /* PPTR_REPLACE only */ refcount_t refcount; /* name and value follow the end of this struct */ diff --git a/fs/xfs/xfs_bmap_item.c b/fs/xfs/xfs_bmap_item.c index 646c515ee355..80f0c4bcc483 100644 --- a/fs/xfs/xfs_bmap_item.c +++ b/fs/xfs/xfs_bmap_item.c @@ -654,24 +654,24 @@ xlog_recover_bui_commit_pass2( struct xfs_bui_log_format *bui_formatp; size_t len; - bui_formatp = item->ri_buf[0].i_addr; + bui_formatp = item->ri_buf[0].iov_base; - if (item->ri_buf[0].i_len < xfs_bui_log_format_sizeof(0)) { + if (item->ri_buf[0].iov_len < xfs_bui_log_format_sizeof(0)) { XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, - item->ri_buf[0].i_addr, item->ri_buf[0].i_len); + item->ri_buf[0].iov_base, item->ri_buf[0].iov_len); return -EFSCORRUPTED; } if (bui_formatp->bui_nextents != XFS_BUI_MAX_FAST_EXTENTS) { XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, - item->ri_buf[0].i_addr, item->ri_buf[0].i_len); + item->ri_buf[0].iov_base, item->ri_buf[0].iov_len); return -EFSCORRUPTED; } len = xfs_bui_log_format_sizeof(bui_formatp->bui_nextents); - if (item->ri_buf[0].i_len != len) { + if (item->ri_buf[0].iov_len != len) { XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, - item->ri_buf[0].i_addr, item->ri_buf[0].i_len); + item->ri_buf[0].iov_base, item->ri_buf[0].iov_len); return -EFSCORRUPTED; } @@ -705,10 +705,10 @@ xlog_recover_bud_commit_pass2( { struct xfs_bud_log_format *bud_formatp; - bud_formatp = item->ri_buf[0].i_addr; - if (item->ri_buf[0].i_len != sizeof(struct xfs_bud_log_format)) { + bud_formatp = item->ri_buf[0].iov_base; + if (item->ri_buf[0].iov_len != sizeof(struct xfs_bud_log_format)) { XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, log->l_mp, - item->ri_buf[0].i_addr, item->ri_buf[0].i_len); + item->ri_buf[0].iov_base, item->ri_buf[0].iov_len); return -EFSCORRUPTED; } diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c index ba5bd6031ece..f9ef3b2a332a 100644 --- a/fs/xfs/xfs_buf.c +++ b/fs/xfs/xfs_buf.c @@ -1683,7 +1683,7 @@ xfs_free_buftarg( fs_put_dax(btp->bt_daxdev, btp->bt_mount); /* the main block device is closed by kill_block_super */ if (btp->bt_bdev != btp->bt_mount->m_super->s_bdev) - bdev_fput(btp->bt_bdev_file); + bdev_fput(btp->bt_file); kfree(btp); } @@ -1712,8 +1712,8 @@ xfs_configure_buftarg_atomic_writes( max_bytes = 0; } - btp->bt_bdev_awu_min = min_bytes; - btp->bt_bdev_awu_max = max_bytes; + btp->bt_awu_min = min_bytes; + btp->bt_awu_max = max_bytes; } /* Configure a buffer target that abstracts a block device. */ @@ -1738,14 +1738,9 @@ xfs_configure_buftarg( return -EINVAL; } - /* - * Flush the block device pagecache so our bios see anything dirtied - * before mount. - */ if (bdev_can_atomic_write(btp->bt_bdev)) xfs_configure_buftarg_atomic_writes(btp); - - return sync_blockdev(btp->bt_bdev); + return 0; } int @@ -1803,7 +1798,7 @@ xfs_alloc_buftarg( btp = kzalloc(sizeof(*btp), GFP_KERNEL | __GFP_NOFAIL); btp->bt_mount = mp; - btp->bt_bdev_file = bdev_file; + btp->bt_file = bdev_file; btp->bt_bdev = file_bdev(bdev_file); btp->bt_dev = btp->bt_bdev->bd_dev; btp->bt_daxdev = fs_dax_get_by_bdev(btp->bt_bdev, &btp->bt_dax_part_off, diff --git a/fs/xfs/xfs_buf.h b/fs/xfs/xfs_buf.h index 15fc56948346..b269e115d9ac 100644 --- a/fs/xfs/xfs_buf.h +++ b/fs/xfs/xfs_buf.h @@ -94,7 +94,6 @@ void xfs_buf_cache_destroy(struct xfs_buf_cache *bch); */ struct xfs_buftarg { dev_t bt_dev; - struct file *bt_bdev_file; struct block_device *bt_bdev; struct dax_device *bt_daxdev; struct file *bt_file; @@ -112,9 +111,9 @@ struct xfs_buftarg { struct percpu_counter bt_readahead_count; struct ratelimit_state bt_ioerror_rl; - /* Atomic write unit values, bytes */ - unsigned int bt_bdev_awu_min; - unsigned int bt_bdev_awu_max; + /* Hardware atomic write unit values, bytes */ + unsigned int bt_awu_min; + unsigned int bt_awu_max; /* built-in cache, if we're not using the perag one */ struct xfs_buf_cache bt_cache[]; @@ -375,7 +374,6 @@ extern void xfs_buftarg_wait(struct xfs_buftarg *); extern void xfs_buftarg_drain(struct xfs_buftarg *); int xfs_configure_buftarg(struct xfs_buftarg *btp, unsigned int sectorsize); -#define xfs_getsize_buftarg(buftarg) block_size((buftarg)->bt_bdev) #define xfs_readonly_buftarg(buftarg) bdev_read_only((buftarg)->bt_bdev) int xfs_buf_reverify(struct xfs_buf *bp, const struct xfs_buf_ops *ops); diff --git a/fs/xfs/xfs_buf_item.c b/fs/xfs/xfs_buf_item.c index 7fc54725c5f6..8d85b5eee444 100644 --- a/fs/xfs/xfs_buf_item.c +++ b/fs/xfs/xfs_buf_item.c @@ -90,16 +90,16 @@ xfs_buf_item_relse( /* Is this log iovec plausibly large enough to contain the buffer log format? */ bool xfs_buf_log_check_iovec( - struct xfs_log_iovec *iovec) + struct kvec *iovec) { - struct xfs_buf_log_format *blfp = iovec->i_addr; + struct xfs_buf_log_format *blfp = iovec->iov_base; char *bmp_end; char *item_end; - if (offsetof(struct xfs_buf_log_format, blf_data_map) > iovec->i_len) + if (offsetof(struct xfs_buf_log_format, blf_data_map) > iovec->iov_len) return false; - item_end = (char *)iovec->i_addr + iovec->i_len; + item_end = (char *)iovec->iov_base + iovec->iov_len; bmp_end = (char *)&blfp->blf_data_map[blfp->blf_map_size]; return bmp_end <= item_end; } diff --git a/fs/xfs/xfs_buf_item.h b/fs/xfs/xfs_buf_item.h index 416890b84f8c..3159325dd17b 100644 --- a/fs/xfs/xfs_buf_item.h +++ b/fs/xfs/xfs_buf_item.h @@ -61,7 +61,7 @@ static inline void xfs_buf_dquot_iodone(struct xfs_buf *bp) } #endif /* CONFIG_XFS_QUOTA */ void xfs_buf_iodone(struct xfs_buf *); -bool xfs_buf_log_check_iovec(struct xfs_log_iovec *iovec); +bool xfs_buf_log_check_iovec(struct kvec *iovec); unsigned int xfs_buf_inval_log_space(unsigned int map_count, unsigned int blocksize); diff --git a/fs/xfs/xfs_buf_item_recover.c b/fs/xfs/xfs_buf_item_recover.c index d4c5cef5bc43..5d58e2ae4972 100644 --- a/fs/xfs/xfs_buf_item_recover.c +++ b/fs/xfs/xfs_buf_item_recover.c @@ -159,7 +159,7 @@ STATIC enum xlog_recover_reorder xlog_recover_buf_reorder( struct xlog_recover_item *item) { - struct xfs_buf_log_format *buf_f = item->ri_buf[0].i_addr; + struct xfs_buf_log_format *buf_f = item->ri_buf[0].iov_base; if (buf_f->blf_flags & XFS_BLF_CANCEL) return XLOG_REORDER_CANCEL_LIST; @@ -173,7 +173,7 @@ xlog_recover_buf_ra_pass2( struct xlog *log, struct xlog_recover_item *item) { - struct xfs_buf_log_format *buf_f = item->ri_buf[0].i_addr; + struct xfs_buf_log_format *buf_f = item->ri_buf[0].iov_base; xlog_buf_readahead(log, buf_f->blf_blkno, buf_f->blf_len, NULL); } @@ -187,11 +187,11 @@ xlog_recover_buf_commit_pass1( struct xlog *log, struct xlog_recover_item *item) { - struct xfs_buf_log_format *bf = item->ri_buf[0].i_addr; + struct xfs_buf_log_format *bf = item->ri_buf[0].iov_base; if (!xfs_buf_log_check_iovec(&item->ri_buf[0])) { - xfs_err(log->l_mp, "bad buffer log item size (%d)", - item->ri_buf[0].i_len); + xfs_err(log->l_mp, "bad buffer log item size (%zd)", + item->ri_buf[0].iov_len); return -EFSCORRUPTED; } @@ -487,8 +487,8 @@ xlog_recover_do_reg_buffer( nbits = xfs_contig_bits(buf_f->blf_data_map, buf_f->blf_map_size, bit); ASSERT(nbits > 0); - ASSERT(item->ri_buf[i].i_addr != NULL); - ASSERT(item->ri_buf[i].i_len % XFS_BLF_CHUNK == 0); + ASSERT(item->ri_buf[i].iov_base != NULL); + ASSERT(item->ri_buf[i].iov_len % XFS_BLF_CHUNK == 0); ASSERT(BBTOB(bp->b_length) >= ((uint)bit << XFS_BLF_SHIFT) + (nbits << XFS_BLF_SHIFT)); @@ -500,8 +500,8 @@ xlog_recover_do_reg_buffer( * the log. Hence we need to trim nbits back to the length of * the current region being copied out of the log. */ - if (item->ri_buf[i].i_len < (nbits << XFS_BLF_SHIFT)) - nbits = item->ri_buf[i].i_len >> XFS_BLF_SHIFT; + if (item->ri_buf[i].iov_len < (nbits << XFS_BLF_SHIFT)) + nbits = item->ri_buf[i].iov_len >> XFS_BLF_SHIFT; /* * Do a sanity check if this is a dquot buffer. Just checking @@ -511,18 +511,18 @@ xlog_recover_do_reg_buffer( fa = NULL; if (buf_f->blf_flags & (XFS_BLF_UDQUOT_BUF|XFS_BLF_PDQUOT_BUF|XFS_BLF_GDQUOT_BUF)) { - if (item->ri_buf[i].i_addr == NULL) { + if (item->ri_buf[i].iov_base == NULL) { xfs_alert(mp, "XFS: NULL dquot in %s.", __func__); goto next; } - if (item->ri_buf[i].i_len < size_disk_dquot) { + if (item->ri_buf[i].iov_len < size_disk_dquot) { xfs_alert(mp, - "XFS: dquot too small (%d) in %s.", - item->ri_buf[i].i_len, __func__); + "XFS: dquot too small (%zd) in %s.", + item->ri_buf[i].iov_len, __func__); goto next; } - fa = xfs_dquot_verify(mp, item->ri_buf[i].i_addr, -1); + fa = xfs_dquot_verify(mp, item->ri_buf[i].iov_base, -1); if (fa) { xfs_alert(mp, "dquot corrupt at %pS trying to replay into block 0x%llx", @@ -533,7 +533,7 @@ xlog_recover_do_reg_buffer( memcpy(xfs_buf_offset(bp, (uint)bit << XFS_BLF_SHIFT), /* dest */ - item->ri_buf[i].i_addr, /* source */ + item->ri_buf[i].iov_base, /* source */ nbits<<XFS_BLF_SHIFT); /* length */ next: i++; @@ -669,8 +669,8 @@ xlog_recover_do_inode_buffer( if (next_unlinked_offset < reg_buf_offset) continue; - ASSERT(item->ri_buf[item_index].i_addr != NULL); - ASSERT((item->ri_buf[item_index].i_len % XFS_BLF_CHUNK) == 0); + ASSERT(item->ri_buf[item_index].iov_base != NULL); + ASSERT((item->ri_buf[item_index].iov_len % XFS_BLF_CHUNK) == 0); ASSERT((reg_buf_offset + reg_buf_bytes) <= BBTOB(bp->b_length)); /* @@ -678,7 +678,7 @@ xlog_recover_do_inode_buffer( * current di_next_unlinked field. Extract its value * and copy it to the buffer copy. */ - logged_nextp = item->ri_buf[item_index].i_addr + + logged_nextp = item->ri_buf[item_index].iov_base + next_unlinked_offset - reg_buf_offset; if (XFS_IS_CORRUPT(mp, *logged_nextp == 0)) { xfs_alert(mp, @@ -1002,7 +1002,7 @@ xlog_recover_buf_commit_pass2( struct xlog_recover_item *item, xfs_lsn_t current_lsn) { - struct xfs_buf_log_format *buf_f = item->ri_buf[0].i_addr; + struct xfs_buf_log_format *buf_f = item->ri_buf[0].iov_base; struct xfs_mount *mp = log->l_mp; struct xfs_buf *bp; int error; diff --git a/fs/xfs/xfs_discard.c b/fs/xfs/xfs_discard.c index 94d0873bcd62..ee49f20875af 100644 --- a/fs/xfs/xfs_discard.c +++ b/fs/xfs/xfs_discard.c @@ -103,24 +103,6 @@ xfs_discard_endio( bio_put(bio); } -static inline struct block_device * -xfs_group_bdev( - const struct xfs_group *xg) -{ - struct xfs_mount *mp = xg->xg_mount; - - switch (xg->xg_type) { - case XG_TYPE_AG: - return mp->m_ddev_targp->bt_bdev; - case XG_TYPE_RTG: - return mp->m_rtdev_targp->bt_bdev; - default: - ASSERT(0); - break; - } - return NULL; -} - /* * Walk the discard list and issue discards on all the busy extents in the * list. We plug and chain the bios so that we only need a single completion @@ -138,11 +120,14 @@ xfs_discard_extents( blk_start_plug(&plug); list_for_each_entry(busyp, &extents->extent_list, list) { - trace_xfs_discard_extent(busyp->group, busyp->bno, - busyp->length); + struct xfs_group *xg = busyp->group; + struct xfs_buftarg *btp = + xfs_group_type_buftarg(xg->xg_mount, xg->xg_type); - error = __blkdev_issue_discard(xfs_group_bdev(busyp->group), - xfs_gbno_to_daddr(busyp->group, busyp->bno), + trace_xfs_discard_extent(xg, busyp->bno, busyp->length); + + error = __blkdev_issue_discard(btp->bt_bdev, + xfs_gbno_to_daddr(xg, busyp->bno), XFS_FSB_TO_BB(mp, busyp->length), GFP_KERNEL, &bio); if (error && error != -EOPNOTSUPP) { @@ -204,9 +189,7 @@ xfs_trim_gather_extents( */ xfs_log_force(mp, XFS_LOG_SYNC); - error = xfs_trans_alloc_empty(mp, &tp); - if (error) - return error; + tp = xfs_trans_alloc_empty(mp); error = xfs_alloc_read_agf(pag, tp, 0, &agbp); if (error) @@ -598,9 +581,7 @@ xfs_trim_rtextents( struct xfs_trans *tp; int error; - error = xfs_trans_alloc_empty(mp, &tp); - if (error) - return error; + tp = xfs_trans_alloc_empty(mp); /* * Walk the free ranges between low and high. The query_range function @@ -716,9 +697,7 @@ xfs_trim_rtgroup_extents( struct xfs_trans *tp; int error; - error = xfs_trans_alloc_empty(mp, &tp); - if (error) - return error; + tp = xfs_trans_alloc_empty(mp); /* * Walk the free ranges between low and high. The query_range function diff --git a/fs/xfs/xfs_dquot_item_recover.c b/fs/xfs/xfs_dquot_item_recover.c index 2c2720ce6923..89bc9bcaf51e 100644 --- a/fs/xfs/xfs_dquot_item_recover.c +++ b/fs/xfs/xfs_dquot_item_recover.c @@ -34,10 +34,10 @@ xlog_recover_dquot_ra_pass2( if (mp->m_qflags == 0) return; - recddq = item->ri_buf[1].i_addr; + recddq = item->ri_buf[1].iov_base; if (recddq == NULL) return; - if (item->ri_buf[1].i_len < sizeof(struct xfs_disk_dquot)) + if (item->ri_buf[1].iov_len < sizeof(struct xfs_disk_dquot)) return; type = recddq->d_type & XFS_DQTYPE_REC_MASK; @@ -45,7 +45,7 @@ xlog_recover_dquot_ra_pass2( if (log->l_quotaoffs_flag & type) return; - dq_f = item->ri_buf[0].i_addr; + dq_f = item->ri_buf[0].iov_base; ASSERT(dq_f); ASSERT(dq_f->qlf_len == 1); @@ -79,14 +79,14 @@ xlog_recover_dquot_commit_pass2( if (mp->m_qflags == 0) return 0; - recddq = item->ri_buf[1].i_addr; + recddq = item->ri_buf[1].iov_base; if (recddq == NULL) { xfs_alert(log->l_mp, "NULL dquot in %s.", __func__); return -EFSCORRUPTED; } - if (item->ri_buf[1].i_len < sizeof(struct xfs_disk_dquot)) { - xfs_alert(log->l_mp, "dquot too small (%d) in %s.", - item->ri_buf[1].i_len, __func__); + if (item->ri_buf[1].iov_len < sizeof(struct xfs_disk_dquot)) { + xfs_alert(log->l_mp, "dquot too small (%zd) in %s.", + item->ri_buf[1].iov_len, __func__); return -EFSCORRUPTED; } @@ -108,7 +108,7 @@ xlog_recover_dquot_commit_pass2( * The other possibility, of course, is that the quota subsystem was * removed since the last mount - ENOSYS. */ - dq_f = item->ri_buf[0].i_addr; + dq_f = item->ri_buf[0].iov_base; ASSERT(dq_f); fa = xfs_dquot_verify(mp, recddq, dq_f->qlf_id); if (fa) { @@ -147,7 +147,7 @@ xlog_recover_dquot_commit_pass2( } } - memcpy(ddq, recddq, item->ri_buf[1].i_len); + memcpy(ddq, recddq, item->ri_buf[1].iov_len); if (xfs_has_crc(mp)) { xfs_update_cksum((char *)dqb, sizeof(struct xfs_dqblk), XFS_DQUOT_CRC_OFF); @@ -192,7 +192,7 @@ xlog_recover_quotaoff_commit_pass1( struct xlog *log, struct xlog_recover_item *item) { - struct xfs_qoff_logformat *qoff_f = item->ri_buf[0].i_addr; + struct xfs_qoff_logformat *qoff_f = item->ri_buf[0].iov_base; ASSERT(qoff_f); /* diff --git a/fs/xfs/xfs_exchmaps_item.c b/fs/xfs/xfs_exchmaps_item.c index 264a121c5e16..229cbe0adf17 100644 --- a/fs/xfs/xfs_exchmaps_item.c +++ b/fs/xfs/xfs_exchmaps_item.c @@ -558,12 +558,12 @@ xlog_recover_xmi_commit_pass2( size_t len; len = sizeof(struct xfs_xmi_log_format); - if (item->ri_buf[0].i_len != len) { + if (item->ri_buf[0].iov_len != len) { XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_LOW, log->l_mp); return -EFSCORRUPTED; } - xmi_formatp = item->ri_buf[0].i_addr; + xmi_formatp = item->ri_buf[0].iov_base; if (xmi_formatp->__pad != 0) { XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_LOW, log->l_mp); return -EFSCORRUPTED; @@ -598,8 +598,8 @@ xlog_recover_xmd_commit_pass2( { struct xfs_xmd_log_format *xmd_formatp; - xmd_formatp = item->ri_buf[0].i_addr; - if (item->ri_buf[0].i_len != sizeof(struct xfs_xmd_log_format)) { + xmd_formatp = item->ri_buf[0].iov_base; + if (item->ri_buf[0].iov_len != sizeof(struct xfs_xmd_log_format)) { XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_LOW, log->l_mp); return -EFSCORRUPTED; } diff --git a/fs/xfs/xfs_extent_busy.h b/fs/xfs/xfs_extent_busy.h index f069b04e8ea1..3e6e019b6146 100644 --- a/fs/xfs/xfs_extent_busy.h +++ b/fs/xfs/xfs_extent_busy.h @@ -68,4 +68,12 @@ static inline void xfs_extent_busy_sort(struct list_head *list) list_sort(NULL, list, xfs_extent_busy_ag_cmp); } +/* + * Zoned RTGs don't need to track busy extents, as the actual block freeing only + * happens by a zone reset, which forces out all transactions that touched the + * to be reset zone first. + */ +#define xfs_group_has_extent_busy(mp, type) \ + ((type) == XG_TYPE_AG || !xfs_has_zoned((mp))) + #endif /* __XFS_EXTENT_BUSY_H__ */ diff --git a/fs/xfs/xfs_extfree_item.c b/fs/xfs/xfs_extfree_item.c index d574f5f639fa..47ee598a9827 100644 --- a/fs/xfs/xfs_extfree_item.c +++ b/fs/xfs/xfs_extfree_item.c @@ -182,15 +182,18 @@ xfs_efi_init( * It will handle the conversion of formats if necessary. */ STATIC int -xfs_efi_copy_format(xfs_log_iovec_t *buf, xfs_efi_log_format_t *dst_efi_fmt) +xfs_efi_copy_format( + struct kvec *buf, + struct xfs_efi_log_format *dst_efi_fmt) { - xfs_efi_log_format_t *src_efi_fmt = buf->i_addr; - uint i; - uint len = xfs_efi_log_format_sizeof(src_efi_fmt->efi_nextents); - uint len32 = xfs_efi_log_format32_sizeof(src_efi_fmt->efi_nextents); - uint len64 = xfs_efi_log_format64_sizeof(src_efi_fmt->efi_nextents); + struct xfs_efi_log_format *src_efi_fmt = buf->iov_base; + uint len, len32, len64, i; - if (buf->i_len == len) { + len = xfs_efi_log_format_sizeof(src_efi_fmt->efi_nextents); + len32 = xfs_efi_log_format32_sizeof(src_efi_fmt->efi_nextents); + len64 = xfs_efi_log_format64_sizeof(src_efi_fmt->efi_nextents); + + if (buf->iov_len == len) { memcpy(dst_efi_fmt, src_efi_fmt, offsetof(struct xfs_efi_log_format, efi_extents)); for (i = 0; i < src_efi_fmt->efi_nextents; i++) @@ -198,8 +201,8 @@ xfs_efi_copy_format(xfs_log_iovec_t *buf, xfs_efi_log_format_t *dst_efi_fmt) &src_efi_fmt->efi_extents[i], sizeof(struct xfs_extent)); return 0; - } else if (buf->i_len == len32) { - xfs_efi_log_format_32_t *src_efi_fmt_32 = buf->i_addr; + } else if (buf->iov_len == len32) { + xfs_efi_log_format_32_t *src_efi_fmt_32 = buf->iov_base; dst_efi_fmt->efi_type = src_efi_fmt_32->efi_type; dst_efi_fmt->efi_size = src_efi_fmt_32->efi_size; @@ -212,8 +215,8 @@ xfs_efi_copy_format(xfs_log_iovec_t *buf, xfs_efi_log_format_t *dst_efi_fmt) src_efi_fmt_32->efi_extents[i].ext_len; } return 0; - } else if (buf->i_len == len64) { - xfs_efi_log_format_64_t *src_efi_fmt_64 = buf->i_addr; + } else if (buf->iov_len == len64) { + xfs_efi_log_format_64_t *src_efi_fmt_64 = buf->iov_base; dst_efi_fmt->efi_type = src_efi_fmt_64->efi_type; dst_efi_fmt->efi_size = src_efi_fmt_64->efi_size; @@ -227,8 +230,8 @@ xfs_efi_copy_format(xfs_log_iovec_t *buf, xfs_efi_log_format_t *dst_efi_fmt) } return 0; } - XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, NULL, buf->i_addr, - buf->i_len); + XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, NULL, buf->iov_base, + buf->iov_len); return -EFSCORRUPTED; } @@ -865,11 +868,11 @@ xlog_recover_efi_commit_pass2( struct xfs_efi_log_format *efi_formatp; int error; - efi_formatp = item->ri_buf[0].i_addr; + efi_formatp = item->ri_buf[0].iov_base; - if (item->ri_buf[0].i_len < xfs_efi_log_format_sizeof(0)) { + if (item->ri_buf[0].iov_len < xfs_efi_log_format_sizeof(0)) { XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, - item->ri_buf[0].i_addr, item->ri_buf[0].i_len); + item->ri_buf[0].iov_base, item->ri_buf[0].iov_len); return -EFSCORRUPTED; } @@ -904,11 +907,11 @@ xlog_recover_rtefi_commit_pass2( struct xfs_efi_log_format *efi_formatp; int error; - efi_formatp = item->ri_buf[0].i_addr; + efi_formatp = item->ri_buf[0].iov_base; - if (item->ri_buf[0].i_len < xfs_efi_log_format_sizeof(0)) { + if (item->ri_buf[0].iov_len < xfs_efi_log_format_sizeof(0)) { XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, - item->ri_buf[0].i_addr, item->ri_buf[0].i_len); + item->ri_buf[0].iov_base, item->ri_buf[0].iov_len); return -EFSCORRUPTED; } @@ -933,7 +936,7 @@ xlog_recover_rtefi_commit_pass2( xfs_lsn_t lsn) { XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, log->l_mp, - item->ri_buf[0].i_addr, item->ri_buf[0].i_len); + item->ri_buf[0].iov_base, item->ri_buf[0].iov_len); return -EFSCORRUPTED; } #endif @@ -958,9 +961,9 @@ xlog_recover_efd_commit_pass2( xfs_lsn_t lsn) { struct xfs_efd_log_format *efd_formatp; - int buflen = item->ri_buf[0].i_len; + int buflen = item->ri_buf[0].iov_len; - efd_formatp = item->ri_buf[0].i_addr; + efd_formatp = item->ri_buf[0].iov_base; if (buflen < sizeof(struct xfs_efd_log_format)) { XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, log->l_mp, @@ -968,9 +971,9 @@ xlog_recover_efd_commit_pass2( return -EFSCORRUPTED; } - if (item->ri_buf[0].i_len != xfs_efd_log_format32_sizeof( + if (item->ri_buf[0].iov_len != xfs_efd_log_format32_sizeof( efd_formatp->efd_nextents) && - item->ri_buf[0].i_len != xfs_efd_log_format64_sizeof( + item->ri_buf[0].iov_len != xfs_efd_log_format64_sizeof( efd_formatp->efd_nextents)) { XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, log->l_mp, efd_formatp, buflen); @@ -995,9 +998,9 @@ xlog_recover_rtefd_commit_pass2( xfs_lsn_t lsn) { struct xfs_efd_log_format *efd_formatp; - int buflen = item->ri_buf[0].i_len; + int buflen = item->ri_buf[0].iov_len; - efd_formatp = item->ri_buf[0].i_addr; + efd_formatp = item->ri_buf[0].iov_base; if (buflen < sizeof(struct xfs_efd_log_format)) { XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, log->l_mp, @@ -1005,9 +1008,9 @@ xlog_recover_rtefd_commit_pass2( return -EFSCORRUPTED; } - if (item->ri_buf[0].i_len != xfs_efd_log_format32_sizeof( + if (item->ri_buf[0].iov_len != xfs_efd_log_format32_sizeof( efd_formatp->efd_nextents) && - item->ri_buf[0].i_len != xfs_efd_log_format64_sizeof( + item->ri_buf[0].iov_len != xfs_efd_log_format64_sizeof( efd_formatp->efd_nextents)) { XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, log->l_mp, efd_formatp, buflen); diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c index 0b41b18debf3..ed69a65f56d7 100644 --- a/fs/xfs/xfs_file.c +++ b/fs/xfs/xfs_file.c @@ -497,7 +497,7 @@ restart: static ssize_t xfs_zoned_write_space_reserve( - struct xfs_inode *ip, + struct xfs_mount *mp, struct kiocb *iocb, struct iov_iter *from, unsigned int flags, @@ -533,8 +533,8 @@ xfs_zoned_write_space_reserve( * * Any remaining block will be returned after the write. */ - return xfs_zoned_space_reserve(ip, - XFS_B_TO_FSB(ip->i_mount, count) + 1 + 2, flags, ac); + return xfs_zoned_space_reserve(mp, XFS_B_TO_FSB(mp, count) + 1 + 2, + flags, ac); } static int @@ -718,13 +718,13 @@ xfs_file_dio_write_zoned( struct xfs_zone_alloc_ctx ac = { }; ssize_t ret; - ret = xfs_zoned_write_space_reserve(ip, iocb, from, 0, &ac); + ret = xfs_zoned_write_space_reserve(ip->i_mount, iocb, from, 0, &ac); if (ret < 0) return ret; ret = xfs_file_dio_write_aligned(ip, iocb, from, &xfs_zoned_direct_write_iomap_ops, &xfs_dio_zoned_write_ops, &ac); - xfs_zoned_space_unreserve(ip, &ac); + xfs_zoned_space_unreserve(ip->i_mount, &ac); return ret; } @@ -752,7 +752,7 @@ xfs_file_dio_write_atomic( * HW offload should be faster, so try that first if it is already * known that the write length is not too large. */ - if (ocount > xfs_inode_buftarg(ip)->bt_bdev_awu_max) + if (ocount > xfs_inode_buftarg(ip)->bt_awu_max) dops = &xfs_atomic_write_cow_iomap_ops; else dops = &xfs_direct_write_iomap_ops; @@ -1032,7 +1032,7 @@ xfs_file_buffered_write_zoned( struct xfs_zone_alloc_ctx ac = { }; ssize_t ret; - ret = xfs_zoned_write_space_reserve(ip, iocb, from, XFS_ZR_GREEDY, &ac); + ret = xfs_zoned_write_space_reserve(mp, iocb, from, XFS_ZR_GREEDY, &ac); if (ret < 0) return ret; @@ -1073,7 +1073,7 @@ retry: out_unlock: xfs_iunlock(ip, iolock); out_unreserve: - xfs_zoned_space_unreserve(ip, &ac); + xfs_zoned_space_unreserve(ip->i_mount, &ac); if (ret > 0) { XFS_STATS_ADD(mp, xs_write_bytes, ret); ret = generic_write_sync(iocb, ret); @@ -1414,11 +1414,11 @@ xfs_file_zoned_fallocate( struct xfs_inode *ip = XFS_I(file_inode(file)); int error; - error = xfs_zoned_space_reserve(ip, 2, XFS_ZR_RESERVED, &ac); + error = xfs_zoned_space_reserve(ip->i_mount, 2, XFS_ZR_RESERVED, &ac); if (error) return error; error = __xfs_file_fallocate(file, mode, offset, len, &ac); - xfs_zoned_space_unreserve(ip, &ac); + xfs_zoned_space_unreserve(ip->i_mount, &ac); return error; } @@ -1828,12 +1828,12 @@ xfs_write_fault_zoned( * But as the overallocation is limited to less than a folio and will be * release instantly that's just fine. */ - error = xfs_zoned_space_reserve(ip, XFS_B_TO_FSB(ip->i_mount, len), 0, - &ac); + error = xfs_zoned_space_reserve(ip->i_mount, + XFS_B_TO_FSB(ip->i_mount, len), 0, &ac); if (error < 0) return vmf_fs_error(error); ret = __xfs_write_fault(vmf, order, &ac); - xfs_zoned_space_unreserve(ip, &ac); + xfs_zoned_space_unreserve(ip->i_mount, &ac); return ret; } diff --git a/fs/xfs/xfs_fsmap.c b/fs/xfs/xfs_fsmap.c index 414b27a86458..af68c7de8ee8 100644 --- a/fs/xfs/xfs_fsmap.c +++ b/fs/xfs/xfs_fsmap.c @@ -1270,9 +1270,7 @@ xfs_getfsmap( * buffer locking abilities to detect cycles in the rmapbt * without deadlocking. */ - error = xfs_trans_alloc_empty(mp, &tp); - if (error) - break; + tp = xfs_trans_alloc_empty(mp); info.dev = handlers[i].dev; info.last = false; diff --git a/fs/xfs/xfs_icache.c b/fs/xfs/xfs_icache.c index bbc2f2973dcc..4cf7abe50143 100644 --- a/fs/xfs/xfs_icache.c +++ b/fs/xfs/xfs_icache.c @@ -893,10 +893,7 @@ xfs_metafile_iget( struct xfs_trans *tp; int error; - error = xfs_trans_alloc_empty(mp, &tp); - if (error) - return error; - + tp = xfs_trans_alloc_empty(mp); error = xfs_trans_metafile_iget(tp, ino, metafile_type, ipp); xfs_trans_cancel(tp); return error; diff --git a/fs/xfs/xfs_icreate_item.c b/fs/xfs/xfs_icreate_item.c index 4345db501714..f83ec2bd0583 100644 --- a/fs/xfs/xfs_icreate_item.c +++ b/fs/xfs/xfs_icreate_item.c @@ -158,7 +158,7 @@ xlog_recover_icreate_commit_pass2( int nbufs; int i; - icl = (struct xfs_icreate_log *)item->ri_buf[0].i_addr; + icl = (struct xfs_icreate_log *)item->ri_buf[0].iov_base; if (icl->icl_type != XFS_LI_ICREATE) { xfs_warn(log->l_mp, "xlog_recover_do_icreate_trans: bad type"); return -EINVAL; diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c index 761a996a857c..9c39251961a3 100644 --- a/fs/xfs/xfs_inode.c +++ b/fs/xfs/xfs_inode.c @@ -2932,12 +2932,9 @@ xfs_inode_reload_unlinked( struct xfs_inode *ip) { struct xfs_trans *tp; - int error; - - error = xfs_trans_alloc_empty(ip->i_mount, &tp); - if (error) - return error; + int error = 0; + tp = xfs_trans_alloc_empty(ip->i_mount); xfs_ilock(ip, XFS_ILOCK_SHARED); if (xfs_inode_unlinked_incomplete(ip)) error = xfs_inode_reload_unlinked_bucket(tp, ip); diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h index d7e2b902ef5c..07fbdcc4cbf5 100644 --- a/fs/xfs/xfs_inode.h +++ b/fs/xfs/xfs_inode.h @@ -358,7 +358,7 @@ static inline bool xfs_inode_has_bigrtalloc(const struct xfs_inode *ip) static inline bool xfs_inode_can_hw_atomic_write(const struct xfs_inode *ip) { - return xfs_inode_buftarg(ip)->bt_bdev_awu_max > 0; + return xfs_inode_buftarg(ip)->bt_awu_max > 0; } /* diff --git a/fs/xfs/xfs_inode_item.c b/fs/xfs/xfs_inode_item.c index 285e27ff89e2..829675700fcd 100644 --- a/fs/xfs/xfs_inode_item.c +++ b/fs/xfs/xfs_inode_item.c @@ -1182,12 +1182,12 @@ xfs_iflush_shutdown_abort( */ int xfs_inode_item_format_convert( - struct xfs_log_iovec *buf, + struct kvec *buf, struct xfs_inode_log_format *in_f) { - struct xfs_inode_log_format_32 *in_f32 = buf->i_addr; + struct xfs_inode_log_format_32 *in_f32 = buf->iov_base; - if (buf->i_len != sizeof(*in_f32)) { + if (buf->iov_len != sizeof(*in_f32)) { XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_LOW, NULL); return -EFSCORRUPTED; } diff --git a/fs/xfs/xfs_inode_item.h b/fs/xfs/xfs_inode_item.h index 377e06007804..ba92ce11a011 100644 --- a/fs/xfs/xfs_inode_item.h +++ b/fs/xfs/xfs_inode_item.h @@ -46,8 +46,8 @@ extern void xfs_inode_item_init(struct xfs_inode *, struct xfs_mount *); extern void xfs_inode_item_destroy(struct xfs_inode *); extern void xfs_iflush_abort(struct xfs_inode *); extern void xfs_iflush_shutdown_abort(struct xfs_inode *); -extern int xfs_inode_item_format_convert(xfs_log_iovec_t *, - struct xfs_inode_log_format *); +int xfs_inode_item_format_convert(struct kvec *buf, + struct xfs_inode_log_format *in_f); extern struct kmem_cache *xfs_ili_cache; diff --git a/fs/xfs/xfs_inode_item_recover.c b/fs/xfs/xfs_inode_item_recover.c index 7205fd14f6b3..9d1999d41be1 100644 --- a/fs/xfs/xfs_inode_item_recover.c +++ b/fs/xfs/xfs_inode_item_recover.c @@ -30,13 +30,13 @@ xlog_recover_inode_ra_pass2( struct xlog *log, struct xlog_recover_item *item) { - if (item->ri_buf[0].i_len == sizeof(struct xfs_inode_log_format)) { - struct xfs_inode_log_format *ilfp = item->ri_buf[0].i_addr; + if (item->ri_buf[0].iov_len == sizeof(struct xfs_inode_log_format)) { + struct xfs_inode_log_format *ilfp = item->ri_buf[0].iov_base; xlog_buf_readahead(log, ilfp->ilf_blkno, ilfp->ilf_len, &xfs_inode_buf_ra_ops); } else { - struct xfs_inode_log_format_32 *ilfp = item->ri_buf[0].i_addr; + struct xfs_inode_log_format_32 *ilfp = item->ri_buf[0].iov_base; xlog_buf_readahead(log, ilfp->ilf_blkno, ilfp->ilf_len, &xfs_inode_buf_ra_ops); @@ -326,8 +326,8 @@ xlog_recover_inode_commit_pass2( int need_free = 0; xfs_failaddr_t fa; - if (item->ri_buf[0].i_len == sizeof(struct xfs_inode_log_format)) { - in_f = item->ri_buf[0].i_addr; + if (item->ri_buf[0].iov_len == sizeof(struct xfs_inode_log_format)) { + in_f = item->ri_buf[0].iov_base; } else { in_f = kmalloc(sizeof(struct xfs_inode_log_format), GFP_KERNEL | __GFP_NOFAIL); @@ -366,7 +366,7 @@ xlog_recover_inode_commit_pass2( error = -EFSCORRUPTED; goto out_release; } - ldip = item->ri_buf[1].i_addr; + ldip = item->ri_buf[1].iov_base; if (XFS_IS_CORRUPT(mp, ldip->di_magic != XFS_DINODE_MAGIC)) { xfs_alert(mp, "%s: Bad inode log record, rec ptr "PTR_FMT", ino %lld", @@ -472,12 +472,12 @@ xlog_recover_inode_commit_pass2( goto out_release; } isize = xfs_log_dinode_size(mp); - if (unlikely(item->ri_buf[1].i_len > isize)) { + if (unlikely(item->ri_buf[1].iov_len > isize)) { XFS_CORRUPTION_ERROR("Bad log dinode size", XFS_ERRLEVEL_LOW, mp, ldip, sizeof(*ldip)); xfs_alert(mp, - "Bad inode 0x%llx log dinode size 0x%x", - in_f->ilf_ino, item->ri_buf[1].i_len); + "Bad inode 0x%llx log dinode size 0x%zx", + in_f->ilf_ino, item->ri_buf[1].iov_len); error = -EFSCORRUPTED; goto out_release; } @@ -500,8 +500,8 @@ xlog_recover_inode_commit_pass2( if (in_f->ilf_size == 2) goto out_owner_change; - len = item->ri_buf[2].i_len; - src = item->ri_buf[2].i_addr; + len = item->ri_buf[2].iov_len; + src = item->ri_buf[2].iov_base; ASSERT(in_f->ilf_size <= 4); ASSERT((in_f->ilf_size == 3) || (fields & XFS_ILOG_AFORK)); ASSERT(!(fields & XFS_ILOG_DFORK) || @@ -538,8 +538,8 @@ xlog_recover_inode_commit_pass2( } else { attr_index = 2; } - len = item->ri_buf[attr_index].i_len; - src = item->ri_buf[attr_index].i_addr; + len = item->ri_buf[attr_index].iov_len; + src = item->ri_buf[attr_index].iov_base; ASSERT(len == xlog_calc_iovec_len(in_f->ilf_asize)); switch (in_f->ilf_fields & XFS_ILOG_AFORK) { diff --git a/fs/xfs/xfs_ioctl.c b/fs/xfs/xfs_ioctl.c index d250f7f74e3b..c3e8c5c1084f 100644 --- a/fs/xfs/xfs_ioctl.c +++ b/fs/xfs/xfs_ioctl.c @@ -990,9 +990,8 @@ xfs_ioc_getlabel( BUILD_BUG_ON(sizeof(sbp->sb_fname) > FSLABEL_MAX); /* 1 larger than sb_fname, so this ensures a trailing NUL char */ - memset(label, 0, sizeof(label)); spin_lock(&mp->m_sb_lock); - strncpy(label, sbp->sb_fname, XFSLABEL_MAX); + memtostr_pad(label, sbp->sb_fname); spin_unlock(&mp->m_sb_lock); if (copy_to_user(user_label, label, sizeof(label))) diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c index ff05e6b1b0bb..ec30b78bf5c4 100644 --- a/fs/xfs/xfs_iomap.c +++ b/fs/xfs/xfs_iomap.c @@ -827,7 +827,7 @@ xfs_bmap_hw_atomic_write_possible( /* * The ->iomap_begin caller should ensure this, but check anyway. */ - return len <= xfs_inode_buftarg(ip)->bt_bdev_awu_max; + return len <= xfs_inode_buftarg(ip)->bt_awu_max; } static int diff --git a/fs/xfs/xfs_iops.c b/fs/xfs/xfs_iops.c index 8cddbb7c149b..149b5460fbfd 100644 --- a/fs/xfs/xfs_iops.c +++ b/fs/xfs/xfs_iops.c @@ -665,7 +665,7 @@ xfs_get_atomic_write_max_opt( * less than our out of place write limit, but we don't want to exceed * the awu_max. */ - return min(awu_max, xfs_inode_buftarg(ip)->bt_bdev_awu_max); + return min(awu_max, xfs_inode_buftarg(ip)->bt_awu_max); } static void @@ -970,7 +970,7 @@ xfs_setattr_size( * change. */ if (xfs_is_zoned_inode(ip)) { - error = xfs_zoned_space_reserve(ip, 1, + error = xfs_zoned_space_reserve(mp, 1, XFS_ZR_NOWAIT | XFS_ZR_RESERVED, &ac); if (error) { if (error == -EAGAIN) @@ -998,7 +998,7 @@ xfs_setattr_size( } if (xfs_is_zoned_inode(ip)) - xfs_zoned_space_unreserve(ip, &ac); + xfs_zoned_space_unreserve(mp, &ac); if (error) return error; diff --git a/fs/xfs/xfs_itable.c b/fs/xfs/xfs_itable.c index 1fa1c0564b0c..c8c9b8d8309f 100644 --- a/fs/xfs/xfs_itable.c +++ b/fs/xfs/xfs_itable.c @@ -239,14 +239,10 @@ xfs_bulkstat_one( * Grab an empty transaction so that we can use its recursive buffer * locking abilities to detect cycles in the inobt without deadlocking. */ - error = xfs_trans_alloc_empty(breq->mp, &tp); - if (error) - goto out; - + tp = xfs_trans_alloc_empty(breq->mp); error = xfs_bulkstat_one_int(breq->mp, breq->idmap, tp, breq->startino, &bc); xfs_trans_cancel(tp); -out: kfree(bc.buf); /* @@ -331,17 +327,13 @@ xfs_bulkstat( * Grab an empty transaction so that we can use its recursive buffer * locking abilities to detect cycles in the inobt without deadlocking. */ - error = xfs_trans_alloc_empty(breq->mp, &tp); - if (error) - goto out; - + tp = xfs_trans_alloc_empty(breq->mp); if (breq->flags & XFS_IBULK_SAME_AG) iwalk_flags |= XFS_IWALK_SAME_AG; error = xfs_iwalk(breq->mp, tp, breq->startino, iwalk_flags, xfs_bulkstat_iwalk, breq->icount, &bc); xfs_trans_cancel(tp); -out: kfree(bc.buf); /* @@ -464,14 +456,10 @@ xfs_inumbers( * Grab an empty transaction so that we can use its recursive buffer * locking abilities to detect cycles in the inobt without deadlocking. */ - error = xfs_trans_alloc_empty(breq->mp, &tp); - if (error) - goto out; - + tp = xfs_trans_alloc_empty(breq->mp); error = xfs_inobt_walk(breq->mp, tp, breq->startino, breq->flags, xfs_inumbers_walk, breq->icount, &ic); xfs_trans_cancel(tp); -out: /* * We found some inode groups, so clear the error status and return diff --git a/fs/xfs/xfs_iwalk.c b/fs/xfs/xfs_iwalk.c index 7db3ece370b1..c1c31d1a8e21 100644 --- a/fs/xfs/xfs_iwalk.c +++ b/fs/xfs/xfs_iwalk.c @@ -377,11 +377,8 @@ xfs_iwalk_run_callbacks( if (!has_more) return 0; - if (iwag->drop_trans) { - error = xfs_trans_alloc_empty(mp, &iwag->tp); - if (error) - return error; - } + if (iwag->drop_trans) + iwag->tp = xfs_trans_alloc_empty(mp); /* ...and recreate the cursor just past where we left off. */ error = xfs_ialloc_read_agi(iwag->pag, iwag->tp, 0, agi_bpp); @@ -617,9 +614,7 @@ xfs_iwalk_ag_work( * Grab an empty transaction so that we can use its recursive buffer * locking abilities to detect cycles in the inobt without deadlocking. */ - error = xfs_trans_alloc_empty(mp, &iwag->tp); - if (error) - goto out; + iwag->tp = xfs_trans_alloc_empty(mp); iwag->drop_trans = 1; error = xfs_iwalk_ag(iwag); diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c index 793468b4d30d..c8a57e21a1d3 100644 --- a/fs/xfs/xfs_log.c +++ b/fs/xfs/xfs_log.c @@ -109,14 +109,14 @@ xlog_prepare_iovec( vec = &lv->lv_iovecp[0]; } - len = lv->lv_buf_len + sizeof(struct xlog_op_header); + len = lv->lv_buf_used + sizeof(struct xlog_op_header); if (!IS_ALIGNED(len, sizeof(uint64_t))) { - lv->lv_buf_len = round_up(len, sizeof(uint64_t)) - + lv->lv_buf_used = round_up(len, sizeof(uint64_t)) - sizeof(struct xlog_op_header); } vec->i_type = type; - vec->i_addr = lv->lv_buf + lv->lv_buf_len; + vec->i_addr = lv->lv_buf + lv->lv_buf_used; oph = vec->i_addr; oph->oh_clientid = XFS_TRANSACTION; @@ -1931,9 +1931,9 @@ xlog_print_trans( if (!lv) continue; xfs_warn(mp, " niovecs = %d", lv->lv_niovecs); - xfs_warn(mp, " size = %d", lv->lv_size); + xfs_warn(mp, " alloc_size = %d", lv->lv_alloc_size); xfs_warn(mp, " bytes = %d", lv->lv_bytes); - xfs_warn(mp, " buf len = %d", lv->lv_buf_len); + xfs_warn(mp, " buf used= %d", lv->lv_buf_used); /* dump each iovec for the log item */ vec = lv->lv_iovecp; @@ -3092,16 +3092,16 @@ xfs_log_force_seq( */ void xfs_log_ticket_put( - xlog_ticket_t *ticket) + struct xlog_ticket *ticket) { ASSERT(atomic_read(&ticket->t_ref) > 0); if (atomic_dec_and_test(&ticket->t_ref)) kmem_cache_free(xfs_log_ticket_cache, ticket); } -xlog_ticket_t * +struct xlog_ticket * xfs_log_ticket_get( - xlog_ticket_t *ticket) + struct xlog_ticket *ticket) { ASSERT(atomic_read(&ticket->t_ref) > 0); atomic_inc(&ticket->t_ref); diff --git a/fs/xfs/xfs_log.h b/fs/xfs/xfs_log.h index 13455854365f..af6daf4f6792 100644 --- a/fs/xfs/xfs_log.h +++ b/fs/xfs/xfs_log.h @@ -16,8 +16,8 @@ struct xfs_log_vec { struct xfs_log_item *lv_item; /* owner */ char *lv_buf; /* formatted buffer */ int lv_bytes; /* accounted space in buffer */ - int lv_buf_len; /* aligned size of buffer */ - int lv_size; /* size of allocated lv */ + int lv_buf_used; /* buffer space used so far */ + int lv_alloc_size; /* size of allocated lv */ }; #define XFS_LOG_VEC_ORDERED (-1) @@ -64,12 +64,13 @@ xlog_finish_iovec(struct xfs_log_vec *lv, struct xfs_log_iovec *vec, oph->oh_len = cpu_to_be32(len); len += sizeof(struct xlog_op_header); - lv->lv_buf_len += len; + lv->lv_buf_used += len; lv->lv_bytes += len; vec->i_len = len; /* Catch buffer overruns */ - ASSERT((void *)lv->lv_buf + lv->lv_bytes <= (void *)lv + lv->lv_size); + ASSERT((void *)lv->lv_buf + lv->lv_bytes <= + (void *)lv + lv->lv_alloc_size); } /* @@ -87,13 +88,6 @@ xlog_copy_iovec(struct xfs_log_vec *lv, struct xfs_log_iovec **vecp, return buf; } -static inline void * -xlog_copy_from_iovec(struct xfs_log_vec *lv, struct xfs_log_iovec **vecp, - const struct xfs_log_iovec *src) -{ - return xlog_copy_iovec(lv, vecp, src->i_type, src->i_addr, src->i_len); -} - /* * By comparing each component, we don't have to worry about extra * endian issues in treating two 32 bit numbers as one 64 bit number diff --git a/fs/xfs/xfs_log_cil.c b/fs/xfs/xfs_log_cil.c index a80cb6b9969a..f443757e93c2 100644 --- a/fs/xfs/xfs_log_cil.c +++ b/fs/xfs/xfs_log_cil.c @@ -275,7 +275,7 @@ xlog_cil_alloc_shadow_bufs( struct xfs_log_vec *lv; int niovecs = 0; int nbytes = 0; - int buf_size; + int alloc_size; bool ordered = false; /* Skip items which aren't dirty in this transaction. */ @@ -316,14 +316,14 @@ xlog_cil_alloc_shadow_bufs( * that space to ensure we can align it appropriately and not * overrun the buffer. */ - buf_size = nbytes + xlog_cil_iovec_space(niovecs); + alloc_size = nbytes + xlog_cil_iovec_space(niovecs); /* * if we have no shadow buffer, or it is too small, we need to * reallocate it. */ if (!lip->li_lv_shadow || - buf_size > lip->li_lv_shadow->lv_size) { + alloc_size > lip->li_lv_shadow->lv_alloc_size) { /* * We free and allocate here as a realloc would copy * unnecessary data. We don't use kvzalloc() for the @@ -332,15 +332,15 @@ xlog_cil_alloc_shadow_bufs( * storage. */ kvfree(lip->li_lv_shadow); - lv = xlog_kvmalloc(buf_size); + lv = xlog_kvmalloc(alloc_size); memset(lv, 0, xlog_cil_iovec_space(niovecs)); INIT_LIST_HEAD(&lv->lv_list); lv->lv_item = lip; - lv->lv_size = buf_size; + lv->lv_alloc_size = alloc_size; if (ordered) - lv->lv_buf_len = XFS_LOG_VEC_ORDERED; + lv->lv_buf_used = XFS_LOG_VEC_ORDERED; else lv->lv_iovecp = (struct xfs_log_iovec *)&lv[1]; lip->li_lv_shadow = lv; @@ -348,9 +348,9 @@ xlog_cil_alloc_shadow_bufs( /* same or smaller, optimise common overwrite case */ lv = lip->li_lv_shadow; if (ordered) - lv->lv_buf_len = XFS_LOG_VEC_ORDERED; + lv->lv_buf_used = XFS_LOG_VEC_ORDERED; else - lv->lv_buf_len = 0; + lv->lv_buf_used = 0; lv->lv_bytes = 0; } @@ -370,30 +370,30 @@ xlog_cil_alloc_shadow_bufs( STATIC void xfs_cil_prepare_item( struct xlog *log, + struct xfs_log_item *lip, struct xfs_log_vec *lv, - struct xfs_log_vec *old_lv, int *diff_len) { /* Account for the new LV being passed in */ - if (lv->lv_buf_len != XFS_LOG_VEC_ORDERED) + if (lv->lv_buf_used != XFS_LOG_VEC_ORDERED) *diff_len += lv->lv_bytes; /* * If there is no old LV, this is the first time we've seen the item in * this CIL context and so we need to pin it. If we are replacing the - * old_lv, then remove the space it accounts for and make it the shadow + * old lv, then remove the space it accounts for and make it the shadow * buffer for later freeing. In both cases we are now switching to the * shadow buffer, so update the pointer to it appropriately. */ - if (!old_lv) { + if (!lip->li_lv) { if (lv->lv_item->li_ops->iop_pin) lv->lv_item->li_ops->iop_pin(lv->lv_item); lv->lv_item->li_lv_shadow = NULL; - } else if (old_lv != lv) { - ASSERT(lv->lv_buf_len != XFS_LOG_VEC_ORDERED); + } else if (lip->li_lv != lv) { + ASSERT(lv->lv_buf_used != XFS_LOG_VEC_ORDERED); - *diff_len -= old_lv->lv_bytes; - lv->lv_item->li_lv_shadow = old_lv; + *diff_len -= lip->li_lv->lv_bytes; + lv->lv_item->li_lv_shadow = lip->li_lv; } /* attach new log vector to log item */ @@ -452,10 +452,8 @@ xlog_cil_insert_format_items( } list_for_each_entry(lip, &tp->t_items, li_trans) { - struct xfs_log_vec *lv; - struct xfs_log_vec *old_lv = NULL; - struct xfs_log_vec *shadow; - bool ordered = false; + struct xfs_log_vec *lv = lip->li_lv; + struct xfs_log_vec *shadow = lip->li_lv_shadow; /* Skip items which aren't dirty in this transaction. */ if (!test_bit(XFS_LI_DIRTY, &lip->li_flags)) @@ -465,22 +463,23 @@ xlog_cil_insert_format_items( * The formatting size information is already attached to * the shadow lv on the log item. */ - shadow = lip->li_lv_shadow; - if (shadow->lv_buf_len == XFS_LOG_VEC_ORDERED) - ordered = true; + if (shadow->lv_buf_used == XFS_LOG_VEC_ORDERED) { + if (!lv) { + lv = shadow; + lv->lv_item = lip; + } + ASSERT(shadow->lv_alloc_size == lv->lv_alloc_size); + xfs_cil_prepare_item(log, lip, lv, diff_len); + continue; + } /* Skip items that do not have any vectors for writing */ - if (!shadow->lv_niovecs && !ordered) + if (!shadow->lv_niovecs) continue; /* compare to existing item size */ - old_lv = lip->li_lv; - if (lip->li_lv && shadow->lv_size <= lip->li_lv->lv_size) { + if (lv && shadow->lv_alloc_size <= lv->lv_alloc_size) { /* same or smaller, optimise common overwrite case */ - lv = lip->li_lv; - - if (ordered) - goto insert; /* * set the item up as though it is a new insertion so @@ -492,7 +491,7 @@ xlog_cil_insert_format_items( lv->lv_niovecs = shadow->lv_niovecs; /* reset the lv buffer information for new formatting */ - lv->lv_buf_len = 0; + lv->lv_buf_used = 0; lv->lv_bytes = 0; lv->lv_buf = (char *)lv + xlog_cil_iovec_space(lv->lv_niovecs); @@ -500,17 +499,11 @@ xlog_cil_insert_format_items( /* switch to shadow buffer! */ lv = shadow; lv->lv_item = lip; - if (ordered) { - /* track as an ordered logvec */ - ASSERT(lip->li_lv == NULL); - goto insert; - } } ASSERT(IS_ALIGNED((unsigned long)lv->lv_buf, sizeof(uint64_t))); lip->li_ops->iop_format(lip, lv); -insert: - xfs_cil_prepare_item(log, lv, old_lv, diff_len); + xfs_cil_prepare_item(log, lip, lv, diff_len); } } @@ -1245,7 +1238,7 @@ xlog_cil_build_lv_chain( lv->lv_order_id = item->li_order_id; /* we don't write ordered log vectors */ - if (lv->lv_buf_len != XFS_LOG_VEC_ORDERED) + if (lv->lv_buf_used != XFS_LOG_VEC_ORDERED) *num_bytes += lv->lv_bytes; *num_iovecs += lv->lv_niovecs; list_add_tail(&lv->lv_list, &ctx->lv_chain); diff --git a/fs/xfs/xfs_log_priv.h b/fs/xfs/xfs_log_priv.h index 39a102cc1b43..a9a7a271c15b 100644 --- a/fs/xfs/xfs_log_priv.h +++ b/fs/xfs/xfs_log_priv.h @@ -144,7 +144,7 @@ enum xlog_iclog_state { #define XLOG_COVER_OPS 5 -typedef struct xlog_ticket { +struct xlog_ticket { struct list_head t_queue; /* reserve/write queue */ struct task_struct *t_task; /* task that owns this ticket */ xlog_tid_t t_tid; /* transaction identifier */ @@ -155,7 +155,7 @@ typedef struct xlog_ticket { char t_cnt; /* current unit count */ uint8_t t_flags; /* properties of reservation */ int t_iclog_hdrs; /* iclog hdrs in t_curr_res */ -} xlog_ticket_t; +}; /* * - A log record header is 512 bytes. There is plenty of room to grow the diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c index 2f76531842f8..e6ed9e09c027 100644 --- a/fs/xfs/xfs_log_recover.c +++ b/fs/xfs/xfs_log_recover.c @@ -2131,15 +2131,15 @@ xlog_recover_add_to_cont_trans( item = list_entry(trans->r_itemq.prev, struct xlog_recover_item, ri_list); - old_ptr = item->ri_buf[item->ri_cnt-1].i_addr; - old_len = item->ri_buf[item->ri_cnt-1].i_len; + old_ptr = item->ri_buf[item->ri_cnt-1].iov_base; + old_len = item->ri_buf[item->ri_cnt-1].iov_len; ptr = kvrealloc(old_ptr, len + old_len, GFP_KERNEL); if (!ptr) return -ENOMEM; memcpy(&ptr[old_len], dp, len); - item->ri_buf[item->ri_cnt-1].i_len += len; - item->ri_buf[item->ri_cnt-1].i_addr = ptr; + item->ri_buf[item->ri_cnt-1].iov_len += len; + item->ri_buf[item->ri_cnt-1].iov_base = ptr; trace_xfs_log_recover_item_add_cont(log, trans, item, 0); return 0; } @@ -2223,7 +2223,7 @@ xlog_recover_add_to_trans( } item->ri_total = in_f->ilf_size; - item->ri_buf = kzalloc(item->ri_total * sizeof(xfs_log_iovec_t), + item->ri_buf = kcalloc(item->ri_total, sizeof(*item->ri_buf), GFP_KERNEL | __GFP_NOFAIL); } @@ -2237,8 +2237,8 @@ xlog_recover_add_to_trans( } /* Description region is ri_buf[0] */ - item->ri_buf[item->ri_cnt].i_addr = ptr; - item->ri_buf[item->ri_cnt].i_len = len; + item->ri_buf[item->ri_cnt].iov_base = ptr; + item->ri_buf[item->ri_cnt].iov_len = len; item->ri_cnt++; trace_xfs_log_recover_item_add(log, trans, item, 0); return 0; @@ -2262,7 +2262,7 @@ xlog_recover_free_trans( /* Free the regions in the item. */ list_del(&item->ri_list); for (i = 0; i < item->ri_cnt; i++) - kvfree(item->ri_buf[i].i_addr); + kvfree(item->ri_buf[i].iov_base); /* Free the item itself */ kfree(item->ri_buf); kfree(item); diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c index 29276fe60df9..0b690bc119d7 100644 --- a/fs/xfs/xfs_mount.c +++ b/fs/xfs/xfs_mount.c @@ -171,19 +171,16 @@ xfs_readsb( ASSERT(mp->m_ddev_targp != NULL); /* - * For the initial read, we must guess at the sector - * size based on the block device. It's enough to - * get the sb_sectsize out of the superblock and - * then reread with the proper length. - * We don't verify it yet, because it may not be complete. + * In the first pass, use the device sector size to just read enough + * of the superblock to extract the XFS sector size. + * + * The device sector size must be smaller than or equal to the XFS + * sector size and thus we can always read the superblock. Once we know + * the XFS sector size, re-read it and run the buffer verifier. */ - sector_size = xfs_getsize_buftarg(mp->m_ddev_targp); + sector_size = mp->m_ddev_targp->bt_logical_sectorsize; buf_ops = NULL; - /* - * Allocate a (locked) buffer to hold the superblock. This will be kept - * around at all times to optimize access to the superblock. - */ reread: error = xfs_buf_read_uncached(mp->m_ddev_targp, XFS_SB_DADDR, BTOBB(sector_size), &bp, buf_ops); @@ -247,6 +244,10 @@ reread: /* no need to be quiet anymore, so reset the buf ops */ bp->b_ops = &xfs_sb_buf_ops; + /* + * Keep a pointer of the sb buffer around instead of caching it in the + * buffer cache because we access it frequently. + */ mp->m_sb_bp = bp; xfs_buf_unlock(bp); return 0; @@ -678,68 +679,46 @@ static inline unsigned int max_pow_of_two_factor(const unsigned int nr) } /* - * If the data device advertises atomic write support, limit the size of data - * device atomic writes to the greatest power-of-two factor of the AG size so - * that every atomic write unit aligns with the start of every AG. This is - * required so that the per-AG allocations for an atomic write will always be + * If the underlying device advertises atomic write support, limit the size of + * atomic writes to the greatest power-of-two factor of the group size so + * that every atomic write unit aligns with the start of every group. This is + * required so that the allocations for an atomic write will always be * aligned compatibly with the alignment requirements of the storage. * - * If the data device doesn't advertise atomic writes, then there are no - * alignment restrictions and the largest out-of-place write we can do - * ourselves is the number of blocks that user files can allocate from any AG. - */ -static inline xfs_extlen_t xfs_calc_perag_awu_max(struct xfs_mount *mp) -{ - if (mp->m_ddev_targp->bt_bdev_awu_min > 0) - return max_pow_of_two_factor(mp->m_sb.sb_agblocks); - return rounddown_pow_of_two(mp->m_ag_max_usable); -} - -/* - * Reflink on the realtime device requires rtgroups, and atomic writes require - * reflink. - * - * If the realtime device advertises atomic write support, limit the size of - * data device atomic writes to the greatest power-of-two factor of the rtgroup - * size so that every atomic write unit aligns with the start of every rtgroup. - * This is required so that the per-rtgroup allocations for an atomic write - * will always be aligned compatibly with the alignment requirements of the - * storage. - * - * If the rt device doesn't advertise atomic writes, then there are no - * alignment restrictions and the largest out-of-place write we can do - * ourselves is the number of blocks that user files can allocate from any - * rtgroup. + * If the device doesn't advertise atomic writes, then there are no alignment + * restrictions and the largest out-of-place write we can do ourselves is the + * number of blocks that user files can allocate from any group. */ -static inline xfs_extlen_t xfs_calc_rtgroup_awu_max(struct xfs_mount *mp) +static xfs_extlen_t +xfs_calc_group_awu_max( + struct xfs_mount *mp, + enum xfs_group_type type) { - struct xfs_groups *rgs = &mp->m_groups[XG_TYPE_RTG]; + struct xfs_groups *g = &mp->m_groups[type]; + struct xfs_buftarg *btp = xfs_group_type_buftarg(mp, type); - if (rgs->blocks == 0) + if (g->blocks == 0) return 0; - if (mp->m_rtdev_targp && mp->m_rtdev_targp->bt_bdev_awu_min > 0) - return max_pow_of_two_factor(rgs->blocks); - return rounddown_pow_of_two(rgs->blocks); + if (btp && btp->bt_awu_min > 0) + return max_pow_of_two_factor(g->blocks); + return rounddown_pow_of_two(g->blocks); } /* Compute the maximum atomic write unit size for each section. */ static inline void xfs_calc_atomic_write_unit_max( - struct xfs_mount *mp) + struct xfs_mount *mp, + enum xfs_group_type type) { - struct xfs_groups *ags = &mp->m_groups[XG_TYPE_AG]; - struct xfs_groups *rgs = &mp->m_groups[XG_TYPE_RTG]; + struct xfs_groups *g = &mp->m_groups[type]; const xfs_extlen_t max_write = xfs_calc_atomic_write_max(mp); const xfs_extlen_t max_ioend = xfs_reflink_max_atomic_cow(mp); - const xfs_extlen_t max_agsize = xfs_calc_perag_awu_max(mp); - const xfs_extlen_t max_rgsize = xfs_calc_rtgroup_awu_max(mp); - - ags->awu_max = min3(max_write, max_ioend, max_agsize); - rgs->awu_max = min3(max_write, max_ioend, max_rgsize); + const xfs_extlen_t max_gsize = xfs_calc_group_awu_max(mp, type); - trace_xfs_calc_atomic_write_unit_max(mp, max_write, max_ioend, - max_agsize, max_rgsize); + g->awu_max = min3(max_write, max_ioend, max_gsize); + trace_xfs_calc_atomic_write_unit_max(mp, type, max_write, max_ioend, + max_gsize, g->awu_max); } /* @@ -757,7 +736,8 @@ xfs_set_max_atomic_write_opt( max(mp->m_groups[XG_TYPE_AG].blocks, mp->m_groups[XG_TYPE_RTG].blocks); const xfs_extlen_t max_group_write = - max(xfs_calc_perag_awu_max(mp), xfs_calc_rtgroup_awu_max(mp)); + max(xfs_calc_group_awu_max(mp, XG_TYPE_AG), + xfs_calc_group_awu_max(mp, XG_TYPE_RTG)); int error; if (new_max_bytes == 0) @@ -813,7 +793,8 @@ set_limit: return error; } - xfs_calc_atomic_write_unit_max(mp); + xfs_calc_atomic_write_unit_max(mp, XG_TYPE_AG); + xfs_calc_atomic_write_unit_max(mp, XG_TYPE_RTG); mp->m_awu_max_bytes = new_max_bytes; return 0; } diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h index d85084f9f317..97de44c32272 100644 --- a/fs/xfs/xfs_mount.h +++ b/fs/xfs/xfs_mount.h @@ -802,4 +802,21 @@ static inline void xfs_mod_sb_delalloc(struct xfs_mount *mp, int64_t delta) int xfs_set_max_atomic_write_opt(struct xfs_mount *mp, unsigned long long new_max_bytes); +static inline struct xfs_buftarg * +xfs_group_type_buftarg( + struct xfs_mount *mp, + enum xfs_group_type type) +{ + switch (type) { + case XG_TYPE_AG: + return mp->m_ddev_targp; + case XG_TYPE_RTG: + return mp->m_rtdev_targp; + default: + ASSERT(0); + break; + } + return NULL; +} + #endif /* __XFS_MOUNT_H__ */ diff --git a/fs/xfs/xfs_notify_failure.c b/fs/xfs/xfs_notify_failure.c index 3545dc1d953c..fbeddcac4792 100644 --- a/fs/xfs/xfs_notify_failure.c +++ b/fs/xfs/xfs_notify_failure.c @@ -253,8 +253,7 @@ xfs_dax_notify_dev_failure( return -EOPNOTSUPP; } - error = xfs_dax_translate_range(type == XG_TYPE_RTG ? - mp->m_rtdev_targp : mp->m_ddev_targp, + error = xfs_dax_translate_range(xfs_group_type_buftarg(mp, type), offset, len, &daddr, &bblen); if (error) return error; @@ -280,10 +279,7 @@ xfs_dax_notify_dev_failure( kernel_frozen = xfs_dax_notify_failure_freeze(mp) == 0; } - error = xfs_trans_alloc_empty(mp, &tp); - if (error) - goto out; - + tp = xfs_trans_alloc_empty(mp); start_gno = xfs_fsb_to_gno(mp, start_bno, type); end_gno = xfs_fsb_to_gno(mp, end_bno, type); while ((xg = xfs_group_next_range(mp, xg, start_gno, end_gno, type))) { @@ -354,7 +350,6 @@ xfs_dax_notify_dev_failure( error = -EFSCORRUPTED; } -out: /* Thaw the fs if it has been frozen before. */ if (mf_flags & MF_MEM_PRE_REMOVE) xfs_dax_notify_failure_thaw(mp, kernel_frozen); diff --git a/fs/xfs/xfs_qm.c b/fs/xfs/xfs_qm.c index fa135ac26471..23ba84ec919a 100644 --- a/fs/xfs/xfs_qm.c +++ b/fs/xfs/xfs_qm.c @@ -660,10 +660,7 @@ xfs_qm_load_metadir_qinos( struct xfs_trans *tp; int error; - error = xfs_trans_alloc_empty(mp, &tp); - if (error) - return error; - + tp = xfs_trans_alloc_empty(mp); error = xfs_dqinode_load_parent(tp, &qi->qi_dirip); if (error == -ENOENT) { /* no quota dir directory, but we'll create one later */ @@ -1755,10 +1752,7 @@ xfs_qm_qino_load( struct xfs_inode *dp = NULL; int error; - error = xfs_trans_alloc_empty(mp, &tp); - if (error) - return error; - + tp = xfs_trans_alloc_empty(mp); if (xfs_has_metadir(mp)) { error = xfs_dqinode_load_parent(tp, &dp); if (error) diff --git a/fs/xfs/xfs_refcount_item.c b/fs/xfs/xfs_refcount_item.c index 076501123d89..3728234699a2 100644 --- a/fs/xfs/xfs_refcount_item.c +++ b/fs/xfs/xfs_refcount_item.c @@ -717,18 +717,18 @@ xlog_recover_cui_commit_pass2( struct xfs_cui_log_format *cui_formatp; size_t len; - cui_formatp = item->ri_buf[0].i_addr; + cui_formatp = item->ri_buf[0].iov_base; - if (item->ri_buf[0].i_len < xfs_cui_log_format_sizeof(0)) { + if (item->ri_buf[0].iov_len < xfs_cui_log_format_sizeof(0)) { XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, - item->ri_buf[0].i_addr, item->ri_buf[0].i_len); + item->ri_buf[0].iov_base, item->ri_buf[0].iov_len); return -EFSCORRUPTED; } len = xfs_cui_log_format_sizeof(cui_formatp->cui_nextents); - if (item->ri_buf[0].i_len != len) { + if (item->ri_buf[0].iov_len != len) { XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, - item->ri_buf[0].i_addr, item->ri_buf[0].i_len); + item->ri_buf[0].iov_base, item->ri_buf[0].iov_len); return -EFSCORRUPTED; } @@ -759,18 +759,18 @@ xlog_recover_rtcui_commit_pass2( struct xfs_cui_log_format *cui_formatp; size_t len; - cui_formatp = item->ri_buf[0].i_addr; + cui_formatp = item->ri_buf[0].iov_base; - if (item->ri_buf[0].i_len < xfs_cui_log_format_sizeof(0)) { + if (item->ri_buf[0].iov_len < xfs_cui_log_format_sizeof(0)) { XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, - item->ri_buf[0].i_addr, item->ri_buf[0].i_len); + item->ri_buf[0].iov_base, item->ri_buf[0].iov_len); return -EFSCORRUPTED; } len = xfs_cui_log_format_sizeof(cui_formatp->cui_nextents); - if (item->ri_buf[0].i_len != len) { + if (item->ri_buf[0].iov_len != len) { XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, - item->ri_buf[0].i_addr, item->ri_buf[0].i_len); + item->ri_buf[0].iov_base, item->ri_buf[0].iov_len); return -EFSCORRUPTED; } @@ -791,7 +791,7 @@ xlog_recover_rtcui_commit_pass2( xfs_lsn_t lsn) { XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, log->l_mp, - item->ri_buf[0].i_addr, item->ri_buf[0].i_len); + item->ri_buf[0].iov_base, item->ri_buf[0].iov_len); return -EFSCORRUPTED; } #endif @@ -817,10 +817,10 @@ xlog_recover_cud_commit_pass2( { struct xfs_cud_log_format *cud_formatp; - cud_formatp = item->ri_buf[0].i_addr; - if (item->ri_buf[0].i_len != sizeof(struct xfs_cud_log_format)) { + cud_formatp = item->ri_buf[0].iov_base; + if (item->ri_buf[0].iov_len != sizeof(struct xfs_cud_log_format)) { XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, log->l_mp, - item->ri_buf[0].i_addr, item->ri_buf[0].i_len); + item->ri_buf[0].iov_base, item->ri_buf[0].iov_len); return -EFSCORRUPTED; } @@ -843,10 +843,10 @@ xlog_recover_rtcud_commit_pass2( { struct xfs_cud_log_format *cud_formatp; - cud_formatp = item->ri_buf[0].i_addr; - if (item->ri_buf[0].i_len != sizeof(struct xfs_cud_log_format)) { + cud_formatp = item->ri_buf[0].iov_base; + if (item->ri_buf[0].iov_len != sizeof(struct xfs_cud_log_format)) { XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, log->l_mp, - item->ri_buf[0].i_addr, item->ri_buf[0].i_len); + item->ri_buf[0].iov_base, item->ri_buf[0].iov_len); return -EFSCORRUPTED; } diff --git a/fs/xfs/xfs_rmap_item.c b/fs/xfs/xfs_rmap_item.c index c99700318ec2..15f0903f6fd4 100644 --- a/fs/xfs/xfs_rmap_item.c +++ b/fs/xfs/xfs_rmap_item.c @@ -746,18 +746,18 @@ xlog_recover_rui_commit_pass2( struct xfs_rui_log_format *rui_formatp; size_t len; - rui_formatp = item->ri_buf[0].i_addr; + rui_formatp = item->ri_buf[0].iov_base; - if (item->ri_buf[0].i_len < xfs_rui_log_format_sizeof(0)) { + if (item->ri_buf[0].iov_len < xfs_rui_log_format_sizeof(0)) { XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, - item->ri_buf[0].i_addr, item->ri_buf[0].i_len); + item->ri_buf[0].iov_base, item->ri_buf[0].iov_len); return -EFSCORRUPTED; } len = xfs_rui_log_format_sizeof(rui_formatp->rui_nextents); - if (item->ri_buf[0].i_len != len) { + if (item->ri_buf[0].iov_len != len) { XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, - item->ri_buf[0].i_addr, item->ri_buf[0].i_len); + item->ri_buf[0].iov_base, item->ri_buf[0].iov_len); return -EFSCORRUPTED; } @@ -788,18 +788,18 @@ xlog_recover_rtrui_commit_pass2( struct xfs_rui_log_format *rui_formatp; size_t len; - rui_formatp = item->ri_buf[0].i_addr; + rui_formatp = item->ri_buf[0].iov_base; - if (item->ri_buf[0].i_len < xfs_rui_log_format_sizeof(0)) { + if (item->ri_buf[0].iov_len < xfs_rui_log_format_sizeof(0)) { XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, - item->ri_buf[0].i_addr, item->ri_buf[0].i_len); + item->ri_buf[0].iov_base, item->ri_buf[0].iov_len); return -EFSCORRUPTED; } len = xfs_rui_log_format_sizeof(rui_formatp->rui_nextents); - if (item->ri_buf[0].i_len != len) { + if (item->ri_buf[0].iov_len != len) { XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, - item->ri_buf[0].i_addr, item->ri_buf[0].i_len); + item->ri_buf[0].iov_base, item->ri_buf[0].iov_len); return -EFSCORRUPTED; } @@ -820,7 +820,7 @@ xlog_recover_rtrui_commit_pass2( xfs_lsn_t lsn) { XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, log->l_mp, - item->ri_buf[0].i_addr, item->ri_buf[0].i_len); + item->ri_buf[0].iov_base, item->ri_buf[0].iov_len); return -EFSCORRUPTED; } #endif @@ -846,10 +846,10 @@ xlog_recover_rud_commit_pass2( { struct xfs_rud_log_format *rud_formatp; - rud_formatp = item->ri_buf[0].i_addr; - if (item->ri_buf[0].i_len != sizeof(struct xfs_rud_log_format)) { + rud_formatp = item->ri_buf[0].iov_base; + if (item->ri_buf[0].iov_len != sizeof(struct xfs_rud_log_format)) { XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, log->l_mp, - rud_formatp, item->ri_buf[0].i_len); + rud_formatp, item->ri_buf[0].iov_len); return -EFSCORRUPTED; } @@ -872,10 +872,10 @@ xlog_recover_rtrud_commit_pass2( { struct xfs_rud_log_format *rud_formatp; - rud_formatp = item->ri_buf[0].i_addr; - if (item->ri_buf[0].i_len != sizeof(struct xfs_rud_log_format)) { + rud_formatp = item->ri_buf[0].iov_base; + if (item->ri_buf[0].iov_len != sizeof(struct xfs_rud_log_format)) { XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, log->l_mp, - rud_formatp, item->ri_buf[0].i_len); + rud_formatp, item->ri_buf[0].iov_len); return -EFSCORRUPTED; } diff --git a/fs/xfs/xfs_rtalloc.c b/fs/xfs/xfs_rtalloc.c index 736eb0924573..6907e871fa15 100644 --- a/fs/xfs/xfs_rtalloc.c +++ b/fs/xfs/xfs_rtalloc.c @@ -729,9 +729,7 @@ xfs_rtginode_ensure( if (rtg->rtg_inodes[type]) return 0; - error = xfs_trans_alloc_empty(rtg_mount(rtg), &tp); - if (error) - return error; + tp = xfs_trans_alloc_empty(rtg_mount(rtg)); error = xfs_rtginode_load(rtg, type, tp); xfs_trans_cancel(tp); @@ -1305,9 +1303,7 @@ xfs_growfs_rt_prep_groups( if (!mp->m_rtdirip) { struct xfs_trans *tp; - error = xfs_trans_alloc_empty(mp, &tp); - if (error) - return error; + tp = xfs_trans_alloc_empty(mp); error = xfs_rtginode_load_parent(tp); xfs_trans_cancel(tp); @@ -1674,10 +1670,7 @@ xfs_rtmount_inodes( struct xfs_rtgroup *rtg = NULL; int error; - error = xfs_trans_alloc_empty(mp, &tp); - if (error) - return error; - + tp = xfs_trans_alloc_empty(mp); if (xfs_has_rtgroups(mp) && mp->m_sb.sb_rgcount > 0) { error = xfs_rtginode_load_parent(tp); if (error) diff --git a/fs/xfs/xfs_trace.h b/fs/xfs/xfs_trace.h index ba45d801df1c..e1794e3e3156 100644 --- a/fs/xfs/xfs_trace.h +++ b/fs/xfs/xfs_trace.h @@ -171,36 +171,33 @@ DEFINE_ATTR_LIST_EVENT(xfs_attr_leaf_list); DEFINE_ATTR_LIST_EVENT(xfs_attr_node_list); TRACE_EVENT(xfs_calc_atomic_write_unit_max, - TP_PROTO(struct xfs_mount *mp, unsigned int max_write, - unsigned int max_ioend, unsigned int max_agsize, - unsigned int max_rgsize), - TP_ARGS(mp, max_write, max_ioend, max_agsize, max_rgsize), + TP_PROTO(struct xfs_mount *mp, enum xfs_group_type type, + unsigned int max_write, unsigned int max_ioend, + unsigned int max_gsize, unsigned int awu_max), + TP_ARGS(mp, type, max_write, max_ioend, max_gsize, awu_max), TP_STRUCT__entry( __field(dev_t, dev) + __field(enum xfs_group_type, type) __field(unsigned int, max_write) __field(unsigned int, max_ioend) - __field(unsigned int, max_agsize) - __field(unsigned int, max_rgsize) - __field(unsigned int, data_awu_max) - __field(unsigned int, rt_awu_max) + __field(unsigned int, max_gsize) + __field(unsigned int, awu_max) ), TP_fast_assign( __entry->dev = mp->m_super->s_dev; + __entry->type = type; __entry->max_write = max_write; __entry->max_ioend = max_ioend; - __entry->max_agsize = max_agsize; - __entry->max_rgsize = max_rgsize; - __entry->data_awu_max = mp->m_groups[XG_TYPE_AG].awu_max; - __entry->rt_awu_max = mp->m_groups[XG_TYPE_RTG].awu_max; + __entry->max_gsize = max_gsize; + __entry->awu_max = awu_max; ), - TP_printk("dev %d:%d max_write %u max_ioend %u max_agsize %u max_rgsize %u data_awu_max %u rt_awu_max %u", + TP_printk("dev %d:%d %s max_write %u max_ioend %u max_gsize %u awu_max %u", MAJOR(__entry->dev), MINOR(__entry->dev), + __print_symbolic(__entry->type, XG_TYPE_STRINGS), __entry->max_write, __entry->max_ioend, - __entry->max_agsize, - __entry->max_rgsize, - __entry->data_awu_max, - __entry->rt_awu_max) + __entry->max_gsize, + __entry->awu_max) ); TRACE_EVENT(xfs_calc_max_atomic_write_fsblocks, @@ -428,8 +425,8 @@ DECLARE_EVENT_CLASS(xfs_zone_alloc_class, __field(dev_t, dev) __field(xfs_rgnumber_t, rgno) __field(xfs_rgblock_t, used) + __field(xfs_rgblock_t, allocated) __field(xfs_rgblock_t, written) - __field(xfs_rgblock_t, write_pointer) __field(xfs_rgblock_t, rgbno) __field(xfs_extlen_t, len) ), @@ -437,17 +434,17 @@ DECLARE_EVENT_CLASS(xfs_zone_alloc_class, __entry->dev = rtg_mount(oz->oz_rtg)->m_super->s_dev; __entry->rgno = rtg_rgno(oz->oz_rtg); __entry->used = rtg_rmap(oz->oz_rtg)->i_used_blocks; + __entry->allocated = oz->oz_allocated; __entry->written = oz->oz_written; - __entry->write_pointer = oz->oz_write_pointer; __entry->rgbno = rgbno; __entry->len = len; ), - TP_printk("dev %d:%d rgno 0x%x used 0x%x written 0x%x wp 0x%x rgbno 0x%x len 0x%x", + TP_printk("dev %d:%d rgno 0x%x used 0x%x alloced 0x%x written 0x%x rgbno 0x%x len 0x%x", MAJOR(__entry->dev), MINOR(__entry->dev), __entry->rgno, __entry->used, + __entry->allocated, __entry->written, - __entry->write_pointer, __entry->rgbno, __entry->len) ); @@ -1082,7 +1079,9 @@ DEFINE_INODE_EVENT(xfs_get_acl); #endif DEFINE_INODE_EVENT(xfs_vm_bmap); DEFINE_INODE_EVENT(xfs_file_ioctl); +#ifdef CONFIG_COMPAT DEFINE_INODE_EVENT(xfs_file_compat_ioctl); +#endif DEFINE_INODE_EVENT(xfs_ioctl_setattr); DEFINE_INODE_EVENT(xfs_dir_fsync); DEFINE_INODE_EVENT(xfs_file_fsync); @@ -1397,7 +1396,6 @@ DEFINE_EVENT(xfs_dquot_class, name, \ TP_ARGS(dqp)) DEFINE_DQUOT_EVENT(xfs_dqadjust); DEFINE_DQUOT_EVENT(xfs_dqreclaim_want); -DEFINE_DQUOT_EVENT(xfs_dqreclaim_dirty); DEFINE_DQUOT_EVENT(xfs_dqreclaim_busy); DEFINE_DQUOT_EVENT(xfs_dqreclaim_done); DEFINE_DQUOT_EVENT(xfs_dqattach_found); @@ -1602,7 +1600,6 @@ DEFINE_LOGGRANT_EVENT(xfs_log_ticket_ungrant); DEFINE_LOGGRANT_EVENT(xfs_log_ticket_ungrant_sub); DEFINE_LOGGRANT_EVENT(xfs_log_ticket_ungrant_exit); DEFINE_LOGGRANT_EVENT(xfs_log_cil_wait); -DEFINE_LOGGRANT_EVENT(xfs_log_cil_return); DECLARE_EVENT_CLASS(xfs_log_item_class, TP_PROTO(struct xfs_log_item *lip), @@ -1865,8 +1862,6 @@ DEFINE_SIMPLE_IO_EVENT(xfs_unwritten_convert); DEFINE_SIMPLE_IO_EVENT(xfs_setfilesize); DEFINE_SIMPLE_IO_EVENT(xfs_zero_eof); DEFINE_SIMPLE_IO_EVENT(xfs_end_io_direct_write); -DEFINE_SIMPLE_IO_EVENT(xfs_end_io_direct_write_unwritten); -DEFINE_SIMPLE_IO_EVENT(xfs_end_io_direct_write_append); DEFINE_SIMPLE_IO_EVENT(xfs_file_splice_read); DEFINE_SIMPLE_IO_EVENT(xfs_zoned_map_blocks); @@ -1899,31 +1894,6 @@ DEFINE_EVENT(xfs_itrunc_class, name, \ DEFINE_ITRUNC_EVENT(xfs_itruncate_extents_start); DEFINE_ITRUNC_EVENT(xfs_itruncate_extents_end); -TRACE_EVENT(xfs_pagecache_inval, - TP_PROTO(struct xfs_inode *ip, xfs_off_t start, xfs_off_t finish), - TP_ARGS(ip, start, finish), - TP_STRUCT__entry( - __field(dev_t, dev) - __field(xfs_ino_t, ino) - __field(xfs_fsize_t, size) - __field(xfs_off_t, start) - __field(xfs_off_t, finish) - ), - TP_fast_assign( - __entry->dev = VFS_I(ip)->i_sb->s_dev; - __entry->ino = ip->i_ino; - __entry->size = ip->i_disk_size; - __entry->start = start; - __entry->finish = finish; - ), - TP_printk("dev %d:%d ino 0x%llx disize 0x%llx start 0x%llx finish 0x%llx", - MAJOR(__entry->dev), MINOR(__entry->dev), - __entry->ino, - __entry->size, - __entry->start, - __entry->finish) -); - TRACE_EVENT(xfs_bunmap, TP_PROTO(struct xfs_inode *ip, xfs_fileoff_t fileoff, xfs_filblks_t len, int flags, unsigned long caller_ip), @@ -2269,14 +2239,12 @@ DEFINE_EVENT(xfs_alloc_class, name, \ DEFINE_ALLOC_EVENT(xfs_alloc_exact_done); DEFINE_ALLOC_EVENT(xfs_alloc_exact_notfound); DEFINE_ALLOC_EVENT(xfs_alloc_exact_error); -DEFINE_ALLOC_EVENT(xfs_alloc_near_nominleft); DEFINE_ALLOC_EVENT(xfs_alloc_near_first); DEFINE_ALLOC_EVENT(xfs_alloc_cur); DEFINE_ALLOC_EVENT(xfs_alloc_cur_right); DEFINE_ALLOC_EVENT(xfs_alloc_cur_left); DEFINE_ALLOC_EVENT(xfs_alloc_cur_lookup); DEFINE_ALLOC_EVENT(xfs_alloc_cur_lookup_done); -DEFINE_ALLOC_EVENT(xfs_alloc_near_error); DEFINE_ALLOC_EVENT(xfs_alloc_near_noentry); DEFINE_ALLOC_EVENT(xfs_alloc_near_busy); DEFINE_ALLOC_EVENT(xfs_alloc_size_neither); @@ -2471,13 +2439,8 @@ DEFINE_ATTR_EVENT(xfs_attr_leaf_toosmall); DEFINE_ATTR_EVENT(xfs_attr_node_addname); DEFINE_ATTR_EVENT(xfs_attr_node_get); DEFINE_ATTR_EVENT(xfs_attr_node_replace); -DEFINE_ATTR_EVENT(xfs_attr_node_removename); - -DEFINE_ATTR_EVENT(xfs_attr_fillstate); -DEFINE_ATTR_EVENT(xfs_attr_refillstate); DEFINE_ATTR_EVENT(xfs_attr_rmtval_get); -DEFINE_ATTR_EVENT(xfs_attr_rmtval_set); #define DEFINE_DA_EVENT(name) \ DEFINE_EVENT(xfs_da_class, name, \ @@ -2895,7 +2858,6 @@ DEFINE_EVENT(xfs_rtdiscard_class, name, \ TP_ARGS(mp, rtbno, len)) DEFINE_RTDISCARD_EVENT(xfs_discard_rtextent); DEFINE_RTDISCARD_EVENT(xfs_discard_rttoosmall); -DEFINE_RTDISCARD_EVENT(xfs_discard_rtrelax); DECLARE_EVENT_CLASS(xfs_btree_cur_class, TP_PROTO(struct xfs_btree_cur *cur, int level, struct xfs_buf *bp), @@ -4212,36 +4174,6 @@ DEFINE_INODE_ERROR_EVENT(xfs_reflink_remap_extent_error); DEFINE_INODE_IREC_EVENT(xfs_reflink_remap_extent_src); DEFINE_INODE_IREC_EVENT(xfs_reflink_remap_extent_dest); -/* dedupe tracepoints */ -DEFINE_DOUBLE_IO_EVENT(xfs_reflink_compare_extents); -DEFINE_INODE_ERROR_EVENT(xfs_reflink_compare_extents_error); - -/* ioctl tracepoints */ -TRACE_EVENT(xfs_ioctl_clone, - TP_PROTO(struct inode *src, struct inode *dest), - TP_ARGS(src, dest), - TP_STRUCT__entry( - __field(dev_t, dev) - __field(unsigned long, src_ino) - __field(loff_t, src_isize) - __field(unsigned long, dest_ino) - __field(loff_t, dest_isize) - ), - TP_fast_assign( - __entry->dev = src->i_sb->s_dev; - __entry->src_ino = src->i_ino; - __entry->src_isize = i_size_read(src); - __entry->dest_ino = dest->i_ino; - __entry->dest_isize = i_size_read(dest); - ), - TP_printk("dev %d:%d ino 0x%lx isize 0x%llx -> ino 0x%lx isize 0x%llx", - MAJOR(__entry->dev), MINOR(__entry->dev), - __entry->src_ino, - __entry->src_isize, - __entry->dest_ino, - __entry->dest_isize) -); - /* unshare tracepoints */ DEFINE_SIMPLE_IO_EVENT(xfs_reflink_unshare); DEFINE_INODE_ERROR_EVENT(xfs_reflink_unshare_error); @@ -4249,7 +4181,6 @@ DEFINE_INODE_ERROR_EVENT(xfs_reflink_unshare_error); /* copy on write */ DEFINE_INODE_IREC_EVENT(xfs_reflink_trim_around_shared); DEFINE_INODE_IREC_EVENT(xfs_reflink_cow_found); -DEFINE_INODE_IREC_EVENT(xfs_reflink_cow_enospc); DEFINE_INODE_IREC_EVENT(xfs_reflink_convert_cow); DEFINE_SIMPLE_IO_EVENT(xfs_reflink_cancel_cow_range); @@ -5034,7 +4965,6 @@ DEFINE_ICLOG_EVENT(xlog_iclog_switch); DEFINE_ICLOG_EVENT(xlog_iclog_sync); DEFINE_ICLOG_EVENT(xlog_iclog_syncing); DEFINE_ICLOG_EVENT(xlog_iclog_sync_done); -DEFINE_ICLOG_EVENT(xlog_iclog_want_sync); DEFINE_ICLOG_EVENT(xlog_iclog_wait_on); DEFINE_ICLOG_EVENT(xlog_iclog_write); @@ -5083,7 +5013,6 @@ DEFINE_DAS_STATE_EVENT(xfs_attr_sf_addname_return); DEFINE_DAS_STATE_EVENT(xfs_attr_set_iter_return); DEFINE_DAS_STATE_EVENT(xfs_attr_leaf_addname_return); DEFINE_DAS_STATE_EVENT(xfs_attr_node_addname_return); -DEFINE_DAS_STATE_EVENT(xfs_attr_remove_iter_return); DEFINE_DAS_STATE_EVENT(xfs_attr_rmtval_alloc); DEFINE_DAS_STATE_EVENT(xfs_attr_rmtval_remove_return); DEFINE_DAS_STATE_EVENT(xfs_attr_defer_add); diff --git a/fs/xfs/xfs_trans.c b/fs/xfs/xfs_trans.c index b4a07af513ba..ece374d622b3 100644 --- a/fs/xfs/xfs_trans.c +++ b/fs/xfs/xfs_trans.c @@ -134,18 +134,14 @@ xfs_trans_dup( } /* - * This is called to reserve free disk blocks and log space for the - * given transaction. This must be done before allocating any resources - * within the transaction. + * This is called to reserve free disk blocks and log space for the given + * transaction before allocating any resources within the transaction. * * This will return ENOSPC if there are not enough blocks available. * It will sleep waiting for available log space. - * The only valid value for the flags parameter is XFS_RES_LOG_PERM, which - * is used by long running transactions. If any one of the reservations - * fails then they will all be backed out. * - * This does not do quota reservations. That typically is done by the - * caller afterwards. + * This does not do quota reservations. That typically is done by the caller + * afterwards. */ static int xfs_trans_reserve( @@ -158,10 +154,12 @@ xfs_trans_reserve( int error = 0; bool rsvd = (tp->t_flags & XFS_TRANS_RESERVE) != 0; + ASSERT(resp->tr_logres > 0); + /* - * Attempt to reserve the needed disk blocks by decrementing - * the number needed from the number available. This will - * fail if the count would go below zero. + * Attempt to reserve the needed disk blocks by decrementing the number + * needed from the number available. This will fail if the count would + * go below zero. */ if (blocks > 0) { error = xfs_dec_fdblocks(mp, blocks, rsvd); @@ -173,42 +171,20 @@ xfs_trans_reserve( /* * Reserve the log space needed for this transaction. */ - if (resp->tr_logres > 0) { - bool permanent = false; - - ASSERT(tp->t_log_res == 0 || - tp->t_log_res == resp->tr_logres); - ASSERT(tp->t_log_count == 0 || - tp->t_log_count == resp->tr_logcount); - - if (resp->tr_logflags & XFS_TRANS_PERM_LOG_RES) { - tp->t_flags |= XFS_TRANS_PERM_LOG_RES; - permanent = true; - } else { - ASSERT(tp->t_ticket == NULL); - ASSERT(!(tp->t_flags & XFS_TRANS_PERM_LOG_RES)); - } - - if (tp->t_ticket != NULL) { - ASSERT(resp->tr_logflags & XFS_TRANS_PERM_LOG_RES); - error = xfs_log_regrant(mp, tp->t_ticket); - } else { - error = xfs_log_reserve(mp, resp->tr_logres, - resp->tr_logcount, - &tp->t_ticket, permanent); - } - - if (error) - goto undo_blocks; + if (resp->tr_logflags & XFS_TRANS_PERM_LOG_RES) + tp->t_flags |= XFS_TRANS_PERM_LOG_RES; + error = xfs_log_reserve(mp, resp->tr_logres, resp->tr_logcount, + &tp->t_ticket, (tp->t_flags & XFS_TRANS_PERM_LOG_RES)); + if (error) + goto undo_blocks; - tp->t_log_res = resp->tr_logres; - tp->t_log_count = resp->tr_logcount; - } + tp->t_log_res = resp->tr_logres; + tp->t_log_count = resp->tr_logcount; /* - * Attempt to reserve the needed realtime extents by decrementing - * the number needed from the number available. This will - * fail if the count would go below zero. + * Attempt to reserve the needed realtime extents by decrementing the + * number needed from the number available. This will fail if the + * count would go below zero. */ if (rtextents > 0) { error = xfs_dec_frextents(mp, rtextents); @@ -221,18 +197,11 @@ xfs_trans_reserve( return 0; - /* - * Error cases jump to one of these labels to undo any - * reservations which have already been performed. - */ undo_log: - if (resp->tr_logres > 0) { - xfs_log_ticket_ungrant(mp->m_log, tp->t_ticket); - tp->t_ticket = NULL; - tp->t_log_res = 0; - tp->t_flags &= ~XFS_TRANS_PERM_LOG_RES; - } - + xfs_log_ticket_ungrant(mp->m_log, tp->t_ticket); + tp->t_ticket = NULL; + tp->t_log_res = 0; + tp->t_flags &= ~XFS_TRANS_PERM_LOG_RES; undo_blocks: if (blocks > 0) { xfs_add_fdblocks(mp, blocks); @@ -241,6 +210,28 @@ undo_blocks: return error; } +static struct xfs_trans * +__xfs_trans_alloc( + struct xfs_mount *mp, + uint flags) +{ + struct xfs_trans *tp; + + ASSERT(!(flags & XFS_TRANS_RES_FDBLKS) || xfs_has_lazysbcount(mp)); + + tp = kmem_cache_zalloc(xfs_trans_cache, GFP_KERNEL | __GFP_NOFAIL); + if (!(flags & XFS_TRANS_NO_WRITECOUNT)) + sb_start_intwrite(mp->m_super); + xfs_trans_set_context(tp); + tp->t_flags = flags; + tp->t_mountp = mp; + INIT_LIST_HEAD(&tp->t_items); + INIT_LIST_HEAD(&tp->t_busy); + INIT_LIST_HEAD(&tp->t_dfops); + tp->t_highest_agno = NULLAGNUMBER; + return tp; +} + int xfs_trans_alloc( struct xfs_mount *mp, @@ -254,33 +245,16 @@ xfs_trans_alloc( bool want_retry = true; int error; + ASSERT(resp->tr_logres > 0); + /* * Allocate the handle before we do our freeze accounting and setting up * GFP_NOFS allocation context so that we avoid lockdep false positives * by doing GFP_KERNEL allocations inside sb_start_intwrite(). */ retry: - tp = kmem_cache_zalloc(xfs_trans_cache, GFP_KERNEL | __GFP_NOFAIL); - if (!(flags & XFS_TRANS_NO_WRITECOUNT)) - sb_start_intwrite(mp->m_super); - xfs_trans_set_context(tp); - - /* - * Zero-reservation ("empty") transactions can't modify anything, so - * they're allowed to run while we're frozen. - */ - WARN_ON(resp->tr_logres > 0 && - mp->m_super->s_writers.frozen == SB_FREEZE_COMPLETE); - ASSERT(!(flags & XFS_TRANS_RES_FDBLKS) || - xfs_has_lazysbcount(mp)); - - tp->t_flags = flags; - tp->t_mountp = mp; - INIT_LIST_HEAD(&tp->t_items); - INIT_LIST_HEAD(&tp->t_busy); - INIT_LIST_HEAD(&tp->t_dfops); - tp->t_highest_agno = NULLAGNUMBER; - + WARN_ON(mp->m_super->s_writers.frozen == SB_FREEZE_COMPLETE); + tp = __xfs_trans_alloc(mp, flags); error = xfs_trans_reserve(tp, resp, blocks, rtextents); if (error == -ENOSPC && want_retry) { xfs_trans_cancel(tp); @@ -324,14 +298,11 @@ retry: * where we can be grabbing buffers at the same time that freeze is trying to * drain the buffer LRU list. */ -int +struct xfs_trans * xfs_trans_alloc_empty( - struct xfs_mount *mp, - struct xfs_trans **tpp) + struct xfs_mount *mp) { - struct xfs_trans_res resv = {0}; - - return xfs_trans_alloc(mp, &resv, 0, 0, XFS_TRANS_NO_WRITECOUNT, tpp); + return __xfs_trans_alloc(mp, XFS_TRANS_NO_WRITECOUNT); } /* @@ -1026,51 +997,57 @@ xfs_trans_cancel( } /* - * Roll from one trans in the sequence of PERMANENT transactions to - * the next: permanent transactions are only flushed out when - * committed with xfs_trans_commit(), but we still want as soon - * as possible to let chunks of it go to the log. So we commit the - * chunk we've been working on and get a new transaction to continue. + * Roll from one trans in the sequence of PERMANENT transactions to the next: + * permanent transactions are only flushed out when committed with + * xfs_trans_commit(), but we still want as soon as possible to let chunks of it + * go to the log. So we commit the chunk we've been working on and get a new + * transaction to continue. */ int xfs_trans_roll( struct xfs_trans **tpp) { - struct xfs_trans *trans = *tpp; - struct xfs_trans_res tres; + struct xfs_trans *tp = *tpp; + unsigned int log_res = tp->t_log_res; + unsigned int log_count = tp->t_log_count; int error; - trace_xfs_trans_roll(trans, _RET_IP_); + trace_xfs_trans_roll(tp, _RET_IP_); + + ASSERT(log_res > 0); /* * Copy the critical parameters from one trans to the next. */ - tres.tr_logres = trans->t_log_res; - tres.tr_logcount = trans->t_log_count; - - *tpp = xfs_trans_dup(trans); + *tpp = xfs_trans_dup(tp); /* * Commit the current transaction. - * If this commit failed, then it'd just unlock those items that - * are not marked ihold. That also means that a filesystem shutdown - * is in progress. The caller takes the responsibility to cancel - * the duplicate transaction that gets returned. + * + * If this commit failed, then it'd just unlock those items that are not + * marked ihold. That also means that a filesystem shutdown is in + * progress. The caller takes the responsibility to cancel the + * duplicate transaction that gets returned. */ - error = __xfs_trans_commit(trans, true); + error = __xfs_trans_commit(tp, true); if (error) return error; /* * Reserve space in the log for the next transaction. - * This also pushes items in the "AIL", the list of logged items, - * out to disk if they are taking up space at the tail of the log - * that we want to use. This requires that either nothing be locked - * across this call, or that anything that is locked be logged in - * the prior and the next transactions. + * + * This also pushes items in the AIL out to disk if they are taking up + * space at the tail of the log that we want to use. This requires that + * either nothing be locked across this call, or that anything that is + * locked be logged in the prior and the next transactions. */ - tres.tr_logflags = XFS_TRANS_PERM_LOG_RES; - return xfs_trans_reserve(*tpp, &tres, 0, 0); + tp = *tpp; + error = xfs_log_regrant(tp->t_mountp, tp->t_ticket); + if (error) + return error; + tp->t_log_res = log_res; + tp->t_log_count = log_count; + return 0; } /* @@ -1146,9 +1123,18 @@ xfs_trans_reserve_more( unsigned int blocks, unsigned int rtextents) { - struct xfs_trans_res resv = { }; - - return xfs_trans_reserve(tp, &resv, blocks, rtextents); + bool rsvd = tp->t_flags & XFS_TRANS_RESERVE; + + if (blocks && xfs_dec_fdblocks(tp->t_mountp, blocks, rsvd)) + return -ENOSPC; + if (rtextents && xfs_dec_frextents(tp->t_mountp, rtextents)) { + if (blocks) + xfs_add_fdblocks(tp->t_mountp, blocks); + return -ENOSPC; + } + tp->t_blk_res += blocks; + tp->t_rtx_res += rtextents; + return 0; } /* @@ -1163,14 +1149,13 @@ xfs_trans_reserve_more_inode( unsigned int rblocks, bool force_quota) { - struct xfs_trans_res resv = { }; struct xfs_mount *mp = ip->i_mount; unsigned int rtx = xfs_extlen_to_rtxlen(mp, rblocks); int error; xfs_assert_ilocked(ip, XFS_ILOCK_EXCL); - error = xfs_trans_reserve(tp, &resv, dblocks, rtx); + error = xfs_trans_reserve_more(tp, dblocks, rtx); if (error) return error; diff --git a/fs/xfs/xfs_trans.h b/fs/xfs/xfs_trans.h index 2b366851e9a4..7fb860f645a3 100644 --- a/fs/xfs/xfs_trans.h +++ b/fs/xfs/xfs_trans.h @@ -15,7 +15,6 @@ struct xfs_efd_log_item; struct xfs_efi_log_item; struct xfs_inode; struct xfs_item_ops; -struct xfs_log_iovec; struct xfs_mount; struct xfs_trans; struct xfs_trans_res; @@ -168,8 +167,7 @@ int xfs_trans_alloc(struct xfs_mount *mp, struct xfs_trans_res *resp, struct xfs_trans **tpp); int xfs_trans_reserve_more(struct xfs_trans *tp, unsigned int blocks, unsigned int rtextents); -int xfs_trans_alloc_empty(struct xfs_mount *mp, - struct xfs_trans **tpp); +struct xfs_trans *xfs_trans_alloc_empty(struct xfs_mount *mp); void xfs_trans_mod_sb(xfs_trans_t *, uint, int64_t); int xfs_trans_get_buf_map(struct xfs_trans *tp, struct xfs_buftarg *target, diff --git a/fs/xfs/xfs_xattr.c b/fs/xfs/xfs_xattr.c index 0f641a9091ec..ac5cecec9aa1 100644 --- a/fs/xfs/xfs_xattr.c +++ b/fs/xfs/xfs_xattr.c @@ -243,7 +243,7 @@ __xfs_xattr_put_listent( offset = context->buffer + context->count; memcpy(offset, prefix, prefix_len); offset += prefix_len; - strncpy(offset, (char *)name, namelen); /* real name */ + memcpy(offset, (char *)name, namelen); /* real name */ offset += namelen; *offset = '\0'; diff --git a/fs/xfs/xfs_zone_alloc.c b/fs/xfs/xfs_zone_alloc.c index 01315ed75502..33f7eee521a8 100644 --- a/fs/xfs/xfs_zone_alloc.c +++ b/fs/xfs/xfs_zone_alloc.c @@ -434,7 +434,7 @@ xfs_init_open_zone( spin_lock_init(&oz->oz_alloc_lock); atomic_set(&oz->oz_ref, 1); oz->oz_rtg = rtg; - oz->oz_write_pointer = write_pointer; + oz->oz_allocated = write_pointer; oz->oz_written = write_pointer; oz->oz_write_hint = write_hint; oz->oz_is_gc = is_gc; @@ -569,7 +569,7 @@ xfs_try_use_zone( struct xfs_open_zone *oz, bool lowspace) { - if (oz->oz_write_pointer == rtg_blocks(oz->oz_rtg)) + if (oz->oz_allocated == rtg_blocks(oz->oz_rtg)) return false; if (!lowspace && !xfs_good_hint_match(oz, file_hint)) return false; @@ -654,13 +654,6 @@ static inline bool xfs_zoned_pack_tight(struct xfs_inode *ip) !(ip->i_diflags & XFS_DIFLAG_APPEND); } -/* - * Pick a new zone for writes. - * - * If we aren't using up our budget of open zones just open a new one from the - * freelist. Else try to find one that matches the expected data lifetime. If - * we don't find one that is good pick any zone that is available. - */ static struct xfs_open_zone * xfs_select_zone_nowait( struct xfs_mount *mp, @@ -688,7 +681,8 @@ xfs_select_zone_nowait( goto out_unlock; /* - * See if we can open a new zone and use that. + * See if we can open a new zone and use that so that data for different + * files is mixed as little as possible. */ oz = xfs_try_open_zone(mp, write_hint); if (oz) @@ -744,25 +738,25 @@ xfs_zone_alloc_blocks( { struct xfs_rtgroup *rtg = oz->oz_rtg; struct xfs_mount *mp = rtg_mount(rtg); - xfs_rgblock_t rgbno; + xfs_rgblock_t allocated; spin_lock(&oz->oz_alloc_lock); count_fsb = min3(count_fsb, XFS_MAX_BMBT_EXTLEN, - (xfs_filblks_t)rtg_blocks(rtg) - oz->oz_write_pointer); + (xfs_filblks_t)rtg_blocks(rtg) - oz->oz_allocated); if (!count_fsb) { spin_unlock(&oz->oz_alloc_lock); return 0; } - rgbno = oz->oz_write_pointer; - oz->oz_write_pointer += count_fsb; + allocated = oz->oz_allocated; + oz->oz_allocated += count_fsb; spin_unlock(&oz->oz_alloc_lock); - trace_xfs_zone_alloc_blocks(oz, rgbno, count_fsb); + trace_xfs_zone_alloc_blocks(oz, allocated, count_fsb); *sector = xfs_gbno_to_daddr(&rtg->rtg_group, 0); *is_seq = bdev_zone_is_seq(mp->m_rtdev_targp->bt_bdev, *sector); if (!*is_seq) - *sector += XFS_FSB_TO_BB(mp, rgbno); + *sector += XFS_FSB_TO_BB(mp, allocated); return XFS_FSB_TO_B(mp, count_fsb); } @@ -983,7 +977,7 @@ xfs_zone_rgbno_is_valid( lockdep_assert_held(&rtg_rmap(rtg)->i_lock); if (rtg->rtg_open_zone) - return rgbno < rtg->rtg_open_zone->oz_write_pointer; + return rgbno < rtg->rtg_open_zone->oz_allocated; return !xa_get_mark(&rtg_mount(rtg)->m_groups[XG_TYPE_RTG].xa, rtg_rgno(rtg), XFS_RTG_FREE); } @@ -1017,7 +1011,7 @@ xfs_init_zone( { struct xfs_mount *mp = rtg_mount(rtg); struct xfs_zone_info *zi = mp->m_zone_info; - uint64_t used = rtg_rmap(rtg)->i_used_blocks; + uint32_t used = rtg_rmap(rtg)->i_used_blocks; xfs_rgblock_t write_pointer, highest_rgbno; int error; @@ -1114,24 +1108,27 @@ xfs_get_zone_info_cb( } /* - * Calculate the max open zone limit based on the of number of - * backing zones available + * Calculate the max open zone limit based on the of number of backing zones + * available. */ static inline uint32_t xfs_max_open_zones( struct xfs_mount *mp) { unsigned int max_open, max_open_data_zones; + /* - * We need two zones for every open data zone, - * one in reserve as we don't reclaim open zones. One data zone - * and its spare is included in XFS_MIN_ZONES. + * We need two zones for every open data zone, one in reserve as we + * don't reclaim open zones. One data zone and its spare is included + * in XFS_MIN_ZONES to support at least one user data writer. */ max_open_data_zones = (mp->m_sb.sb_rgcount - XFS_MIN_ZONES) / 2 + 1; max_open = max_open_data_zones + XFS_OPEN_GC_ZONES; /* - * Cap the max open limit to 1/4 of available space + * Cap the max open limit to 1/4 of available space. Without this we'd + * run out of easy reclaim targets too quickly and storage devices don't + * handle huge numbers of concurrent write streams overly well. */ max_open = min(max_open, mp->m_sb.sb_rgcount / 4); diff --git a/fs/xfs/xfs_zone_alloc.h b/fs/xfs/xfs_zone_alloc.h index ecf39106704c..4db02816d0fd 100644 --- a/fs/xfs/xfs_zone_alloc.h +++ b/fs/xfs/xfs_zone_alloc.h @@ -23,9 +23,9 @@ struct xfs_zone_alloc_ctx { */ #define XFS_ZR_RESERVED (1U << 2) -int xfs_zoned_space_reserve(struct xfs_inode *ip, xfs_filblks_t count_fsb, +int xfs_zoned_space_reserve(struct xfs_mount *mp, xfs_filblks_t count_fsb, unsigned int flags, struct xfs_zone_alloc_ctx *ac); -void xfs_zoned_space_unreserve(struct xfs_inode *ip, +void xfs_zoned_space_unreserve(struct xfs_mount *mp, struct xfs_zone_alloc_ctx *ac); void xfs_zoned_add_available(struct xfs_mount *mp, xfs_filblks_t count_fsb); diff --git a/fs/xfs/xfs_zone_gc.c b/fs/xfs/xfs_zone_gc.c index 9c00fc5baa30..064cd1a857a0 100644 --- a/fs/xfs/xfs_zone_gc.c +++ b/fs/xfs/xfs_zone_gc.c @@ -328,10 +328,7 @@ xfs_zone_gc_query( iter->rec_idx = 0; iter->rec_count = 0; - error = xfs_trans_alloc_empty(mp, &tp); - if (error) - return error; - + tp = xfs_trans_alloc_empty(mp); xfs_rtgroup_lock(rtg, XFS_RTGLOCK_RMAP); cur = xfs_rtrmapbt_init_cursor(tp, rtg); error = xfs_rmap_query_range(cur, &ri_low, &ri_high, @@ -533,8 +530,7 @@ xfs_zone_gc_steal_open( spin_lock(&zi->zi_open_zones_lock); list_for_each_entry(oz, &zi->zi_open_zones, oz_entry) { - if (!found || - oz->oz_write_pointer < found->oz_write_pointer) + if (!found || oz->oz_allocated < found->oz_allocated) found = oz; } @@ -584,7 +580,7 @@ xfs_zone_gc_ensure_target( { struct xfs_open_zone *oz = mp->m_zone_info->zi_open_gc_zone; - if (!oz || oz->oz_write_pointer == rtg_blocks(oz->oz_rtg)) + if (!oz || oz->oz_allocated == rtg_blocks(oz->oz_rtg)) return xfs_zone_gc_select_target(mp); return oz; } @@ -605,7 +601,7 @@ xfs_zone_gc_space_available( oz = xfs_zone_gc_ensure_target(data->mp); if (!oz) return false; - return oz->oz_write_pointer < rtg_blocks(oz->oz_rtg) && + return oz->oz_allocated < rtg_blocks(oz->oz_rtg) && xfs_zone_gc_scratch_available(data); } @@ -647,7 +643,7 @@ xfs_zone_gc_alloc_blocks( */ spin_lock(&mp->m_sb_lock); *count_fsb = min(*count_fsb, - rtg_blocks(oz->oz_rtg) - oz->oz_write_pointer); + rtg_blocks(oz->oz_rtg) - oz->oz_allocated); *count_fsb = min3(*count_fsb, mp->m_free[XC_FREE_RTEXTENTS].res_avail, mp->m_free[XC_FREE_RTAVAILABLE].res_avail); @@ -661,8 +657,8 @@ xfs_zone_gc_alloc_blocks( *daddr = xfs_gbno_to_daddr(&oz->oz_rtg->rtg_group, 0); *is_seq = bdev_zone_is_seq(mp->m_rtdev_targp->bt_bdev, *daddr); if (!*is_seq) - *daddr += XFS_FSB_TO_BB(mp, oz->oz_write_pointer); - oz->oz_write_pointer += *count_fsb; + *daddr += XFS_FSB_TO_BB(mp, oz->oz_allocated); + oz->oz_allocated += *count_fsb; atomic_inc(&oz->oz_ref); return oz; } diff --git a/fs/xfs/xfs_zone_info.c b/fs/xfs/xfs_zone_info.c index 733bcc2f8645..07e30c596975 100644 --- a/fs/xfs/xfs_zone_info.c +++ b/fs/xfs/xfs_zone_info.c @@ -32,7 +32,7 @@ xfs_show_open_zone( { seq_printf(m, "\t zone %d, wp %u, written %u, used %u, hint %s\n", rtg_rgno(oz->oz_rtg), - oz->oz_write_pointer, oz->oz_written, + oz->oz_allocated, oz->oz_written, rtg_rmap(oz->oz_rtg)->i_used_blocks, xfs_write_hint_to_str(oz->oz_write_hint)); } diff --git a/fs/xfs/xfs_zone_priv.h b/fs/xfs/xfs_zone_priv.h index ab696975a993..35e6de3d25ed 100644 --- a/fs/xfs/xfs_zone_priv.h +++ b/fs/xfs/xfs_zone_priv.h @@ -11,18 +11,18 @@ struct xfs_open_zone { atomic_t oz_ref; /* - * oz_write_pointer is the write pointer at which space is handed out - * for conventional zones, or simple the count of blocks handed out - * so far for sequential write required zones and is protected by - * oz_alloc_lock/ + * oz_allocated is the amount of space already allocated out of the zone + * and is protected by oz_alloc_lock. + * + * For conventional zones it also is the offset of the next write. */ spinlock_t oz_alloc_lock; - xfs_rgblock_t oz_write_pointer; + xfs_rgblock_t oz_allocated; /* - * oz_written is the number of blocks for which we've received a - * write completion. oz_written must always be <= oz_write_pointer - * and is protected by the ILOCK of the rmap inode. + * oz_written is the number of blocks for which we've received a write + * completion. oz_written must always be <= oz_allocated and is + * protected by the ILOCK of the rmap inode. */ xfs_rgblock_t oz_written; diff --git a/fs/xfs/xfs_zone_space_resv.c b/fs/xfs/xfs_zone_space_resv.c index 93c9a7721139..1313c55b8cbe 100644 --- a/fs/xfs/xfs_zone_space_resv.c +++ b/fs/xfs/xfs_zone_space_resv.c @@ -117,11 +117,10 @@ xfs_zoned_space_wait_error( static int xfs_zoned_reserve_available( - struct xfs_inode *ip, + struct xfs_mount *mp, xfs_filblks_t count_fsb, unsigned int flags) { - struct xfs_mount *mp = ip->i_mount; struct xfs_zone_info *zi = mp->m_zone_info; struct xfs_zone_reservation reservation = { .task = current, @@ -198,11 +197,10 @@ xfs_zoned_reserve_available( */ static int xfs_zoned_reserve_extents_greedy( - struct xfs_inode *ip, + struct xfs_mount *mp, xfs_filblks_t *count_fsb, unsigned int flags) { - struct xfs_mount *mp = ip->i_mount; struct xfs_zone_info *zi = mp->m_zone_info; s64 len = *count_fsb; int error = -ENOSPC; @@ -220,12 +218,11 @@ xfs_zoned_reserve_extents_greedy( int xfs_zoned_space_reserve( - struct xfs_inode *ip, + struct xfs_mount *mp, xfs_filblks_t count_fsb, unsigned int flags, struct xfs_zone_alloc_ctx *ac) { - struct xfs_mount *mp = ip->i_mount; int error; ASSERT(ac->reserved_blocks == 0); @@ -234,11 +231,11 @@ xfs_zoned_space_reserve( error = xfs_dec_freecounter(mp, XC_FREE_RTEXTENTS, count_fsb, flags & XFS_ZR_RESERVED); if (error == -ENOSPC && (flags & XFS_ZR_GREEDY) && count_fsb > 1) - error = xfs_zoned_reserve_extents_greedy(ip, &count_fsb, flags); + error = xfs_zoned_reserve_extents_greedy(mp, &count_fsb, flags); if (error) return error; - error = xfs_zoned_reserve_available(ip, count_fsb, flags); + error = xfs_zoned_reserve_available(mp, count_fsb, flags); if (error) { xfs_add_freecounter(mp, XC_FREE_RTEXTENTS, count_fsb); return error; @@ -249,12 +246,10 @@ xfs_zoned_space_reserve( void xfs_zoned_space_unreserve( - struct xfs_inode *ip, + struct xfs_mount *mp, struct xfs_zone_alloc_ctx *ac) { if (ac->reserved_blocks > 0) { - struct xfs_mount *mp = ip->i_mount; - xfs_zoned_add_available(mp, ac->reserved_blocks); xfs_add_freecounter(mp, XC_FREE_RTEXTENTS, ac->reserved_blocks); } diff --git a/include/asm-generic/param.h b/include/asm-generic/param.h index 8d3009dd28ff..8348c116aa3b 100644 --- a/include/asm-generic/param.h +++ b/include/asm-generic/param.h @@ -6,6 +6,6 @@ # undef HZ # define HZ CONFIG_HZ /* Internal kernel timer frequency */ -# define USER_HZ 100 /* some user interfaces are */ +# define USER_HZ __USER_HZ /* some user interfaces are */ # define CLOCKS_PER_SEC (USER_HZ) /* in "ticks" like times() */ #endif /* __ASM_GENERIC_PARAM_H */ diff --git a/include/drm/drm_buddy.h b/include/drm/drm_buddy.h index 9689a7c5dd36..513837632b7d 100644 --- a/include/drm/drm_buddy.h +++ b/include/drm/drm_buddy.h @@ -160,6 +160,8 @@ int drm_buddy_block_trim(struct drm_buddy *mm, u64 new_size, struct list_head *blocks); +void drm_buddy_reset_clear(struct drm_buddy *mm, bool is_clear); + void drm_buddy_free_block(struct drm_buddy *mm, struct drm_buddy_block *block); void drm_buddy_free_list(struct drm_buddy *mm, diff --git a/include/drm/drm_file.h b/include/drm/drm_file.h index 5c3b2aa3e69d..d344d41e6cfe 100644 --- a/include/drm/drm_file.h +++ b/include/drm/drm_file.h @@ -300,6 +300,9 @@ struct drm_file { * * Mapping of mm object handles to object pointers. Used by the GEM * subsystem. Protected by @table_lock. + * + * Note that allocated entries might be NULL as a transient state when + * creating or deleting a handle. */ struct idr object_idr; diff --git a/include/drm/drm_framebuffer.h b/include/drm/drm_framebuffer.h index 668077009fce..38b24fc8978d 100644 --- a/include/drm/drm_framebuffer.h +++ b/include/drm/drm_framebuffer.h @@ -23,6 +23,7 @@ #ifndef __DRM_FRAMEBUFFER_H__ #define __DRM_FRAMEBUFFER_H__ +#include <linux/bits.h> #include <linux/ctype.h> #include <linux/list.h> #include <linux/sched.h> @@ -100,6 +101,8 @@ struct drm_framebuffer_funcs { unsigned num_clips); }; +#define DRM_FRAMEBUFFER_HAS_HANDLE_REF(_i) BIT(0u + (_i)) + /** * struct drm_framebuffer - frame buffer object * @@ -189,6 +192,10 @@ struct drm_framebuffer { */ int flags; /** + * @internal_flags: Framebuffer flags like DRM_FRAMEBUFFER_HAS_HANDLE_REF. + */ + unsigned int internal_flags; + /** * @filp_head: Placed on &drm_file.fbs, protected by &drm_file.fbs_lock. */ struct list_head filp_head; diff --git a/include/dt-bindings/clock/sun8i-v3s-ccu.h b/include/dt-bindings/clock/sun8i-v3s-ccu.h index 014ac6123d17..c4055629c9f9 100644 --- a/include/dt-bindings/clock/sun8i-v3s-ccu.h +++ b/include/dt-bindings/clock/sun8i-v3s-ccu.h @@ -96,7 +96,7 @@ #define CLK_TCON0 64 #define CLK_CSI_MISC 65 #define CLK_CSI0_MCLK 66 -#define CLK_CSI1_SCLK 67 +#define CLK_CSI_SCLK 67 #define CLK_CSI1_MCLK 68 #define CLK_VE 69 #define CLK_AC_DIG 70 diff --git a/include/linux/arm-smccc.h b/include/linux/arm-smccc.h index 784ebe4607a4..50b47eba7d01 100644 --- a/include/linux/arm-smccc.h +++ b/include/linux/arm-smccc.h @@ -113,7 +113,7 @@ /* KVM UID value: 28b46fb6-2ec5-11e9-a9ca-4b564d003a74 */ #define ARM_SMCCC_VENDOR_HYP_UID_KVM UUID_INIT(\ - 0xb66fb428, 0xc52e, 0xe911, \ + 0x28b46fb6, 0x2ec5, 0x11e9, \ 0xa9, 0xca, 0x4b, 0x56, \ 0x4d, 0x00, 0x3a, 0x74) diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index a59880c809c7..181a0deadc9e 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -269,11 +269,16 @@ static inline dev_t disk_devt(struct gendisk *disk) return MKDEV(disk->major, disk->first_minor); } +#ifdef CONFIG_TRANSPARENT_HUGEPAGE /* * We should strive for 1 << (PAGE_SHIFT + MAX_PAGECACHE_ORDER) * however we constrain this to what we can validate and test. */ #define BLK_MAX_BLOCK_SIZE SZ_64K +#else +#define BLK_MAX_BLOCK_SIZE PAGE_SIZE +#endif + /* blk_validate_limits() validates bsize, so drivers don't usually need to */ static inline int blk_validate_block_size(unsigned long bsize) diff --git a/include/linux/cpu.h b/include/linux/cpu.h index 96a3a0d6a60e..6378370a952f 100644 --- a/include/linux/cpu.h +++ b/include/linux/cpu.h @@ -82,6 +82,7 @@ extern ssize_t cpu_show_old_microcode(struct device *dev, struct device_attribute *attr, char *buf); extern ssize_t cpu_show_indirect_target_selection(struct device *dev, struct device_attribute *attr, char *buf); +extern ssize_t cpu_show_tsa(struct device *dev, struct device_attribute *attr, char *buf); extern __printf(4, 5) struct device *cpu_device_create(struct device *parent, void *drvdata, diff --git a/include/linux/dcache.h b/include/linux/dcache.h index e29823c701ac..cc3e1c1a3454 100644 --- a/include/linux/dcache.h +++ b/include/linux/dcache.h @@ -237,7 +237,6 @@ extern void d_instantiate_new(struct dentry *, struct inode *); extern void __d_drop(struct dentry *dentry); extern void d_drop(struct dentry *dentry); extern void d_delete(struct dentry *); -extern void d_set_d_op(struct dentry *dentry, const struct dentry_operations *op); /* allocate/de-allocate */ extern struct dentry * d_alloc(struct dentry *, const struct qstr *); @@ -245,6 +244,9 @@ extern struct dentry * d_alloc_anon(struct super_block *); extern struct dentry * d_alloc_parallel(struct dentry *, const struct qstr *, wait_queue_head_t *); extern struct dentry * d_splice_alias(struct inode *, struct dentry *); +/* weird procfs mess; *NOT* exported */ +extern struct dentry * d_splice_alias_ops(struct inode *, struct dentry *, + const struct dentry_operations *); extern struct dentry * d_add_ci(struct dentry *, struct inode *, struct qstr *); extern bool d_same_name(const struct dentry *dentry, const struct dentry *parent, const struct qstr *name); @@ -604,4 +606,6 @@ static inline struct dentry *d_next_sibling(const struct dentry *dentry) return hlist_entry_safe(dentry->d_sib.next, struct dentry, d_sib); } +void set_default_d_op(struct super_block *, const struct dentry_operations *); + #endif /* __LINUX_DCACHE_H */ diff --git a/include/linux/fs.h b/include/linux/fs.h index 040c0036320f..ea5b14fe424d 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1402,6 +1402,7 @@ struct super_block { char s_sysfs_name[UUID_STRING_LEN + 1]; unsigned int s_max_links; + unsigned int s_d_flags; /* default d_flags for dentries */ /* * The next field is for VFS *only*. No filesystems have any business @@ -1415,7 +1416,7 @@ struct super_block { */ const char *s_subtype; - const struct dentry_operations *s_d_op; /* default d_op for dentries */ + const struct dentry_operations *__s_d_op; /* default d_op for dentries */ struct shrinker *s_shrink; /* per-sb shrinker handle */ @@ -3599,6 +3600,8 @@ extern int simple_rename(struct mnt_idmap *, struct inode *, unsigned int); extern void simple_recursive_removal(struct dentry *, void (*callback)(struct dentry *)); +extern void locked_recursive_removal(struct dentry *, + void (*callback)(struct dentry *)); extern int noop_fsync(struct file *, loff_t, loff_t, int); extern ssize_t noop_direct_IO(struct kiocb *iocb, struct iov_iter *iter); extern int simple_empty(struct dentry *); @@ -3611,7 +3614,6 @@ extern struct inode *alloc_anon_inode(struct super_block *); struct inode *anon_inode_make_secure_inode(struct super_block *sb, const char *name, const struct inode *context_inode); extern int simple_nosetlease(struct file *, int, struct file_lease **, void **); -extern const struct dentry_operations simple_dentry_operations; extern struct dentry *simple_lookup(struct inode *, struct dentry *, unsigned int flags); extern ssize_t generic_read_dir(struct file *, char __user *, size_t, loff_t *); @@ -3625,6 +3627,7 @@ extern int simple_fill_super(struct super_block *, unsigned long, const struct tree_descr *); extern int simple_pin_fs(struct file_system_type *, struct vfsmount **mount, int *count); extern void simple_release_fs(struct vfsmount **mount, int *count); +struct dentry *simple_start_creating(struct dentry *, const char *); extern ssize_t simple_read_from_buffer(void __user *to, size_t count, loff_t *ppos, const void *from, size_t available); diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h index 22f39e5e2ff1..996be3c2cff0 100644 --- a/include/linux/ieee80211.h +++ b/include/linux/ieee80211.h @@ -663,18 +663,6 @@ static inline bool ieee80211_s1g_has_cssid(__le16 fc) } /** - * ieee80211_is_s1g_short_beacon - check if frame is an S1G short beacon - * @fc: frame control bytes in little-endian byteorder - * Return: whether or not the frame is an S1G short beacon, - * i.e. it is an S1G beacon with 'next TBTT' flag set - */ -static inline bool ieee80211_is_s1g_short_beacon(__le16 fc) -{ - return ieee80211_is_s1g_beacon(fc) && - (fc & cpu_to_le16(IEEE80211_S1G_BCN_NEXT_TBTT)); -} - -/** * ieee80211_is_atim - check if IEEE80211_FTYPE_MGMT && IEEE80211_STYPE_ATIM * @fc: frame control bytes in little-endian byteorder * Return: whether or not the frame is an ATIM frame @@ -4901,6 +4889,39 @@ static inline bool ieee80211_is_ftm(struct sk_buff *skb) return false; } +/** + * ieee80211_is_s1g_short_beacon - check if frame is an S1G short beacon + * @fc: frame control bytes in little-endian byteorder + * @variable: pointer to the beacon frame elements + * @variable_len: length of the frame elements + * Return: whether or not the frame is an S1G short beacon. As per + * IEEE80211-2024 11.1.3.10.1, The S1G beacon compatibility element shall + * always be present as the first element in beacon frames generated at a + * TBTT (Target Beacon Transmission Time), so any frame not containing + * this element must have been generated at a TSBTT (Target Short Beacon + * Transmission Time) that is not a TBTT. Additionally, short beacons are + * prohibited from containing the S1G beacon compatibility element as per + * IEEE80211-2024 9.3.4.3 Table 9-76, so if we have an S1G beacon with + * either no elements or the first element is not the beacon compatibility + * element, we have a short beacon. + */ +static inline bool ieee80211_is_s1g_short_beacon(__le16 fc, const u8 *variable, + size_t variable_len) +{ + if (!ieee80211_is_s1g_beacon(fc)) + return false; + + /* + * If the frame does not contain at least 1 element (this is perfectly + * valid in a short beacon) and is an S1G beacon, we have a short + * beacon. + */ + if (variable_len < 2) + return true; + + return variable[0] != WLAN_EID_S1G_BCN_COMPAT; +} + struct element { u8 id; u8 datalen; diff --git a/include/linux/interconnect-provider.h b/include/linux/interconnect-provider.h index 55cfebc658e6..8a2f652a05ff 100644 --- a/include/linux/interconnect-provider.h +++ b/include/linux/interconnect-provider.h @@ -119,6 +119,7 @@ int icc_std_aggregate(struct icc_node *node, u32 tag, u32 avg_bw, struct icc_node *icc_node_create_dyn(void); struct icc_node *icc_node_create(int id); void icc_node_destroy(int id); +int icc_node_set_name(struct icc_node *node, const struct icc_provider *provider, const char *name); int icc_link_nodes(struct icc_node *src_node, struct icc_node **dst_node); int icc_link_create(struct icc_node *node, const int dst_id); void icc_node_add(struct icc_node *node, struct icc_provider *provider); @@ -152,6 +153,12 @@ static inline void icc_node_destroy(int id) { } +static inline int icc_node_set_name(struct icc_node *node, const struct icc_provider *provider, + const char *name) +{ + return -EOPNOTSUPP; +} + static inline int icc_link_nodes(struct icc_node *src_node, struct icc_node **dst_node) { return -EOPNOTSUPP; diff --git a/include/linux/io_uring_types.h b/include/linux/io_uring_types.h index 2922635986f5..a7efcec2e3d0 100644 --- a/include/linux/io_uring_types.h +++ b/include/linux/io_uring_types.h @@ -698,6 +698,8 @@ struct io_kiocb { struct hlist_node hash_node; /* For IOPOLL setup queues, with hybrid polling */ u64 iopoll_start; + /* for private io_kiocb freeing */ + struct rcu_head rcu_head; }; /* internal polling, see IORING_FEAT_FAST_POLL */ struct async_poll *apoll; diff --git a/include/linux/irqchip/irq-msi-lib.h b/include/linux/irqchip/irq-msi-lib.h index dd8d1d138544..224ac28e88d7 100644 --- a/include/linux/irqchip/irq-msi-lib.h +++ b/include/linux/irqchip/irq-msi-lib.h @@ -17,6 +17,7 @@ #define MATCH_PLATFORM_MSI BIT(DOMAIN_BUS_PLATFORM_MSI) +struct msi_domain_info; int msi_lib_irq_domain_select(struct irq_domain *d, struct irq_fwspec *fwspec, enum irq_domain_bus_token bus_token); diff --git a/include/linux/ism.h b/include/linux/ism.h index 5428edd90982..8358b4cd7ba6 100644 --- a/include/linux/ism.h +++ b/include/linux/ism.h @@ -28,6 +28,7 @@ struct ism_dmb { struct ism_dev { spinlock_t lock; /* protects the ism device */ + spinlock_t cmd_lock; /* serializes cmds */ struct list_head list; struct pci_dev *pdev; diff --git a/include/linux/mm.h b/include/linux/mm.h index 0ef2ba0c667a..fa538feaa8d9 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -2568,6 +2568,11 @@ static inline unsigned long get_mm_counter(struct mm_struct *mm, int member) return percpu_counter_read_positive(&mm->rss_stat[member]); } +static inline unsigned long get_mm_counter_sum(struct mm_struct *mm, int member) +{ + return percpu_counter_sum_positive(&mm->rss_stat[member]); +} + void mm_trace_rss_stat(struct mm_struct *mm, int member); static inline void add_mm_counter(struct mm_struct *mm, int member, long value) diff --git a/include/linux/phy/phy.h b/include/linux/phy/phy.h index 437769e061b7..13add0c2c407 100644 --- a/include/linux/phy/phy.h +++ b/include/linux/phy/phy.h @@ -154,6 +154,7 @@ struct phy_attrs { * @id: id of the phy device * @ops: function pointers for performing phy operations * @mutex: mutex to protect phy_ops + * @lockdep_key: lockdep information for this mutex * @init_count: used to protect when the PHY is used by multiple consumers * @power_count: used to protect when the PHY is used by multiple consumers * @attrs: used to specify PHY specific attributes @@ -165,6 +166,7 @@ struct phy { int id; const struct phy_ops *ops; struct mutex mutex; + struct lock_class_key lockdep_key; int init_count; int power_count; struct phy_attrs attrs; diff --git a/include/linux/power_supply.h b/include/linux/power_supply.h index 7803edaa8ff8..0cca01b5607b 100644 --- a/include/linux/power_supply.h +++ b/include/linux/power_supply.h @@ -888,15 +888,23 @@ static inline int power_supply_is_system_supplied(void) { return -ENOSYS; } extern int power_supply_get_property(struct power_supply *psy, enum power_supply_property psp, union power_supply_propval *val); +int power_supply_get_property_direct(struct power_supply *psy, enum power_supply_property psp, + union power_supply_propval *val); #if IS_ENABLED(CONFIG_POWER_SUPPLY) extern int power_supply_set_property(struct power_supply *psy, enum power_supply_property psp, const union power_supply_propval *val); +int power_supply_set_property_direct(struct power_supply *psy, enum power_supply_property psp, + const union power_supply_propval *val); #else static inline int power_supply_set_property(struct power_supply *psy, enum power_supply_property psp, const union power_supply_propval *val) { return 0; } +static inline int power_supply_set_property_direct(struct power_supply *psy, + enum power_supply_property psp, + const union power_supply_propval *val) +{ return 0; } #endif extern void power_supply_external_power_changed(struct power_supply *psy); diff --git a/include/linux/proc_fs.h b/include/linux/proc_fs.h index ea62201c74c4..de1d24f19f76 100644 --- a/include/linux/proc_fs.h +++ b/include/linux/proc_fs.h @@ -27,6 +27,8 @@ enum { PROC_ENTRY_proc_read_iter = 1U << 1, PROC_ENTRY_proc_compat_ioctl = 1U << 2, + + PROC_ENTRY_FORCE_LOOKUP = 1U << 7, }; struct proc_ops { diff --git a/include/linux/psp-sev.h b/include/linux/psp-sev.h index 0b3a36bdaa90..0f5f94137f6d 100644 --- a/include/linux/psp-sev.h +++ b/include/linux/psp-sev.h @@ -594,6 +594,7 @@ struct sev_data_snp_addr { * @imi_en: launch flow is launching an IMI (Incoming Migration Image) for the * purpose of guest-assisted migration. * @rsvd: reserved + * @desired_tsc_khz: hypervisor desired mean TSC freq in kHz of the guest * @gosvw: guest OS-visible workarounds, as defined by hypervisor */ struct sev_data_snp_launch_start { @@ -603,6 +604,7 @@ struct sev_data_snp_launch_start { u32 ma_en:1; /* In */ u32 imi_en:1; /* In */ u32 rsvd:30; + u32 desired_tsc_khz; /* In */ u8 gosvw[16]; /* In */ } __packed; diff --git a/include/linux/sched.h b/include/linux/sched.h index 4f78a64beb52..aa9c5be7a632 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -548,10 +548,6 @@ struct sched_statistics { u64 nr_failed_migrations_running; u64 nr_failed_migrations_hot; u64 nr_forced_migrations; -#ifdef CONFIG_NUMA_BALANCING - u64 numa_task_migrated; - u64 numa_task_swapped; -#endif u64 nr_wakeups; u64 nr_wakeups_sync; diff --git a/include/linux/security.h b/include/linux/security.h index dba349629229..386463b5e848 100644 --- a/include/linux/security.h +++ b/include/linux/security.h @@ -2211,7 +2211,6 @@ struct dentry *securityfs_create_symlink(const char *name, const char *target, const struct inode_operations *iops); extern void securityfs_remove(struct dentry *dentry); -extern void securityfs_recursive_remove(struct dentry *dentry); #else /* CONFIG_SECURITYFS */ @@ -2243,6 +2242,8 @@ static inline void securityfs_remove(struct dentry *dentry) #endif +#define securityfs_recursive_remove securityfs_remove + #ifdef CONFIG_BPF_SYSCALL union bpf_attr; struct bpf_map; diff --git a/include/linux/sprintf.h b/include/linux/sprintf.h index 51cab2def9ec..876130091384 100644 --- a/include/linux/sprintf.h +++ b/include/linux/sprintf.h @@ -4,6 +4,7 @@ #include <linux/compiler_attributes.h> #include <linux/types.h> +#include <linux/stdarg.h> int num_to_str(char *buf, int size, unsigned long long num, unsigned int width); diff --git a/include/linux/sunrpc/msg_prot.h b/include/linux/sunrpc/msg_prot.h index c4b0eb2b2f04..ada17b57ca44 100644 --- a/include/linux/sunrpc/msg_prot.h +++ b/include/linux/sunrpc/msg_prot.h @@ -69,15 +69,17 @@ enum rpc_reject_stat { }; enum rpc_auth_stat { - RPC_AUTH_OK = 0, - RPC_AUTH_BADCRED = 1, - RPC_AUTH_REJECTEDCRED = 2, - RPC_AUTH_BADVERF = 3, - RPC_AUTH_REJECTEDVERF = 4, - RPC_AUTH_TOOWEAK = 5, + RPC_AUTH_OK = 0, /* success */ + RPC_AUTH_BADCRED = 1, /* bad credential (seal broken) */ + RPC_AUTH_REJECTEDCRED = 2, /* client must begin new session */ + RPC_AUTH_BADVERF = 3, /* bad verifier (seal broken) */ + RPC_AUTH_REJECTEDVERF = 4, /* verifier expired or replayed */ + RPC_AUTH_TOOWEAK = 5, /* rejected for security reasons */ + RPC_AUTH_INVALIDRESP = 6, /* bogus response verifier */ + RPC_AUTH_FAILED = 7, /* reason unknown */ /* RPCSEC_GSS errors */ - RPCSEC_GSS_CREDPROBLEM = 13, - RPCSEC_GSS_CTXPROBLEM = 14 + RPCSEC_GSS_CREDPROBLEM = 13, /* no credentials for user */ + RPCSEC_GSS_CTXPROBLEM = 14 /* problem with context */ }; #define RPC_MAXNETNAMELEN 256 diff --git a/include/linux/sunrpc/rpc_pipe_fs.h b/include/linux/sunrpc/rpc_pipe_fs.h index 3b35b6f6533a..2cb406f8ff4e 100644 --- a/include/linux/sunrpc/rpc_pipe_fs.h +++ b/include/linux/sunrpc/rpc_pipe_fs.h @@ -98,7 +98,7 @@ static inline bool rpc_msg_is_inflight(const struct rpc_pipe_msg *msg) { } struct rpc_clnt; -extern struct dentry *rpc_create_client_dir(struct dentry *, const char *, struct rpc_clnt *); +extern int rpc_create_client_dir(struct dentry *, const char *, struct rpc_clnt *); extern int rpc_remove_client_dir(struct rpc_clnt *); extern void rpc_init_pipe_dir_head(struct rpc_pipe_dir_head *pdh); @@ -127,9 +127,9 @@ extern void rpc_remove_cache_dir(struct dentry *); struct rpc_pipe *rpc_mkpipe_data(const struct rpc_pipe_ops *ops, int flags); void rpc_destroy_pipe_data(struct rpc_pipe *pipe); -extern struct dentry *rpc_mkpipe_dentry(struct dentry *, const char *, void *, +extern int rpc_mkpipe_dentry(struct dentry *, const char *, void *, struct rpc_pipe *); -extern int rpc_unlink(struct dentry *); +extern void rpc_unlink(struct rpc_pipe *); extern int register_rpc_pipefs(void); extern void unregister_rpc_pipefs(void); diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h index 48666b83fe68..40cbe81360ed 100644 --- a/include/linux/sunrpc/svc.h +++ b/include/linux/sunrpc/svc.h @@ -245,10 +245,10 @@ struct svc_rqst { * initialisation success. */ - unsigned long bc_to_initval; - unsigned int bc_to_retries; - void ** rq_lease_breaker; /* The v4 client breaking a lease */ + unsigned long bc_to_initval; + unsigned int bc_to_retries; unsigned int rq_status_counter; /* RPC processing counter */ + void **rq_lease_breaker; /* The v4 client breaking a lease */ }; /* bits for rq_flags */ diff --git a/include/linux/sunrpc/svcauth.h b/include/linux/sunrpc/svcauth.h index 2e111153f7cd..4b92fec23a49 100644 --- a/include/linux/sunrpc/svcauth.h +++ b/include/linux/sunrpc/svcauth.h @@ -86,7 +86,6 @@ struct auth_domain { enum svc_auth_status { SVC_GARBAGE = 1, - SVC_SYSERR, SVC_VALID, SVC_NEGATIVE, SVC_OK, diff --git a/include/linux/sunrpc/xdr.h b/include/linux/sunrpc/xdr.h index a2ab813a9800..e3358c630ba1 100644 --- a/include/linux/sunrpc/xdr.h +++ b/include/linux/sunrpc/xdr.h @@ -119,6 +119,8 @@ xdr_buf_init(struct xdr_buf *buf, void *start, size_t len) #define rpc_autherr_badverf cpu_to_be32(RPC_AUTH_BADVERF) #define rpc_autherr_rejectedverf cpu_to_be32(RPC_AUTH_REJECTEDVERF) #define rpc_autherr_tooweak cpu_to_be32(RPC_AUTH_TOOWEAK) +#define rpc_autherr_invalidresp cpu_to_be32(RPC_AUTH_INVALIDRESP) +#define rpc_autherr_failed cpu_to_be32(RPC_AUTH_FAILED) #define rpcsec_gsserr_credproblem cpu_to_be32(RPCSEC_GSS_CREDPROBLEM) #define rpcsec_gsserr_ctxproblem cpu_to_be32(RPCSEC_GSS_CTXPROBLEM) @@ -242,8 +244,7 @@ typedef int (*kxdrdproc_t)(struct rpc_rqst *rqstp, struct xdr_stream *xdr, extern void xdr_init_encode(struct xdr_stream *xdr, struct xdr_buf *buf, __be32 *p, struct rpc_rqst *rqst); -extern void xdr_init_encode_pages(struct xdr_stream *xdr, struct xdr_buf *buf, - struct page **pages, struct rpc_rqst *rqst); +void xdr_init_encode_pages(struct xdr_stream *xdr, struct xdr_buf *buf); extern __be32 *xdr_reserve_space(struct xdr_stream *xdr, size_t nbytes); extern int xdr_reserve_space_vec(struct xdr_stream *xdr, size_t nbytes); extern void __xdr_commit_encode(struct xdr_stream *xdr); diff --git a/include/linux/tpm.h b/include/linux/tpm.h index a3d8305e88a5..9894c104dc93 100644 --- a/include/linux/tpm.h +++ b/include/linux/tpm.h @@ -182,7 +182,7 @@ struct tpm_chip { unsigned long duration[TPM_NUM_DURATIONS]; /* jiffies */ bool duration_adjusted; - struct dentry *bios_dir[TPM_NUM_EVENT_LOG_FILES]; + struct dentry *bios_dir; const struct attribute_group *groups[3 + TPM_MAX_HASHES]; unsigned int groups_cnt; diff --git a/include/linux/vm_event_item.h b/include/linux/vm_event_item.h index 91a3ce9a2687..9e15a088ba38 100644 --- a/include/linux/vm_event_item.h +++ b/include/linux/vm_event_item.h @@ -66,8 +66,6 @@ enum vm_event_item { PGPGIN, PGPGOUT, PSWPIN, PSWPOUT, NUMA_HINT_FAULTS, NUMA_HINT_FAULTS_LOCAL, NUMA_PAGE_MIGRATE, - NUMA_TASK_MIGRATE, - NUMA_TASK_SWAP, #endif #ifdef CONFIG_MIGRATION PGMIGRATE_SUCCESS, PGMIGRATE_FAIL, diff --git a/include/net/af_vsock.h b/include/net/af_vsock.h index d56e6e135158..d40e978126e3 100644 --- a/include/net/af_vsock.h +++ b/include/net/af_vsock.h @@ -243,8 +243,8 @@ int __vsock_dgram_recvmsg(struct socket *sock, struct msghdr *msg, int vsock_dgram_recvmsg(struct socket *sock, struct msghdr *msg, size_t len, int flags); -#ifdef CONFIG_BPF_SYSCALL extern struct proto vsock_proto; +#ifdef CONFIG_BPF_SYSCALL int vsock_bpf_update_proto(struct sock *sk, struct sk_psock *psock, bool restore); void __init vsock_bpf_build_proto(void); #else diff --git a/include/net/bluetooth/hci.h b/include/net/bluetooth/hci.h index 82cbd54443ac..c79901f2dc2a 100644 --- a/include/net/bluetooth/hci.h +++ b/include/net/bluetooth/hci.h @@ -377,6 +377,8 @@ enum { * This quirk must be set before hci_register_dev is called. */ HCI_QUIRK_BROKEN_READ_PAGE_SCAN_TYPE, + + __HCI_NUM_QUIRKS, }; /* HCI device flags */ diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h index 9fc8f544e20e..f79f59e67114 100644 --- a/include/net/bluetooth/hci_core.h +++ b/include/net/bluetooth/hci_core.h @@ -464,7 +464,7 @@ struct hci_dev { unsigned int auto_accept_delay; - unsigned long quirks; + DECLARE_BITMAP(quirk_flags, __HCI_NUM_QUIRKS); atomic_t cmd_cnt; unsigned int acl_cnt; @@ -656,6 +656,10 @@ struct hci_dev { u8 (*classify_pkt_type)(struct hci_dev *hdev, struct sk_buff *skb); }; +#define hci_set_quirk(hdev, nr) set_bit((nr), (hdev)->quirk_flags) +#define hci_clear_quirk(hdev, nr) clear_bit((nr), (hdev)->quirk_flags) +#define hci_test_quirk(hdev, nr) test_bit((nr), (hdev)->quirk_flags) + #define HCI_PHY_HANDLE(handle) (handle & 0xff) enum conn_reasons { @@ -829,20 +833,20 @@ extern struct mutex hci_cb_list_lock; #define hci_dev_test_and_clear_flag(hdev, nr) test_and_clear_bit((nr), (hdev)->dev_flags) #define hci_dev_test_and_change_flag(hdev, nr) test_and_change_bit((nr), (hdev)->dev_flags) -#define hci_dev_clear_volatile_flags(hdev) \ - do { \ - hci_dev_clear_flag(hdev, HCI_LE_SCAN); \ - hci_dev_clear_flag(hdev, HCI_LE_ADV); \ - hci_dev_clear_flag(hdev, HCI_LL_RPA_RESOLUTION);\ - hci_dev_clear_flag(hdev, HCI_PERIODIC_INQ); \ - hci_dev_clear_flag(hdev, HCI_QUALITY_REPORT); \ +#define hci_dev_clear_volatile_flags(hdev) \ + do { \ + hci_dev_clear_flag((hdev), HCI_LE_SCAN); \ + hci_dev_clear_flag((hdev), HCI_LE_ADV); \ + hci_dev_clear_flag((hdev), HCI_LL_RPA_RESOLUTION); \ + hci_dev_clear_flag((hdev), HCI_PERIODIC_INQ); \ + hci_dev_clear_flag((hdev), HCI_QUALITY_REPORT); \ } while (0) #define hci_dev_le_state_simultaneous(hdev) \ - (!test_bit(HCI_QUIRK_BROKEN_LE_STATES, &hdev->quirks) && \ - (hdev->le_states[4] & 0x08) && /* Central */ \ - (hdev->le_states[4] & 0x40) && /* Peripheral */ \ - (hdev->le_states[3] & 0x10)) /* Simultaneous */ + (!hci_test_quirk((hdev), HCI_QUIRK_BROKEN_LE_STATES) && \ + ((hdev)->le_states[4] & 0x08) && /* Central */ \ + ((hdev)->le_states[4] & 0x40) && /* Peripheral */ \ + ((hdev)->le_states[3] & 0x10)) /* Simultaneous */ /* ----- HCI interface to upper protocols ----- */ int l2cap_connect_ind(struct hci_dev *hdev, bdaddr_t *bdaddr); @@ -1350,8 +1354,7 @@ hci_conn_hash_lookup_big_state(struct hci_dev *hdev, __u8 handle, __u16 state) rcu_read_lock(); list_for_each_entry_rcu(c, &h->list, list) { - if (c->type != BIS_LINK || bacmp(&c->dst, BDADDR_ANY) || - c->state != state) + if (c->type != BIS_LINK || c->state != state) continue; if (handle == c->iso_qos.bcast.big) { @@ -1932,8 +1935,8 @@ void hci_conn_del_sysfs(struct hci_conn *conn); ((dev)->le_rx_def_phys & HCI_LE_SET_PHY_2M)) #define le_coded_capable(dev) (((dev)->le_features[1] & HCI_LE_PHY_CODED) && \ - !test_bit(HCI_QUIRK_BROKEN_LE_CODED, \ - &(dev)->quirks)) + !hci_test_quirk((dev), \ + HCI_QUIRK_BROKEN_LE_CODED)) #define scan_coded(dev) (((dev)->le_tx_def_phys & HCI_LE_SET_PHY_CODED) || \ ((dev)->le_rx_def_phys & HCI_LE_SET_PHY_CODED)) @@ -1941,31 +1944,31 @@ void hci_conn_del_sysfs(struct hci_conn *conn); #define ll_privacy_capable(dev) ((dev)->le_features[0] & HCI_LE_LL_PRIVACY) #define privacy_mode_capable(dev) (ll_privacy_capable(dev) && \ - (hdev->commands[39] & 0x04)) + ((dev)->commands[39] & 0x04)) #define read_key_size_capable(dev) \ ((dev)->commands[20] & 0x10 && \ - !test_bit(HCI_QUIRK_BROKEN_READ_ENC_KEY_SIZE, &hdev->quirks)) + !hci_test_quirk((dev), HCI_QUIRK_BROKEN_READ_ENC_KEY_SIZE)) #define read_voice_setting_capable(dev) \ ((dev)->commands[9] & 0x04 && \ - !test_bit(HCI_QUIRK_BROKEN_READ_VOICE_SETTING, &(dev)->quirks)) + !hci_test_quirk((dev), HCI_QUIRK_BROKEN_READ_VOICE_SETTING)) /* Use enhanced synchronous connection if command is supported and its quirk * has not been set. */ #define enhanced_sync_conn_capable(dev) \ (((dev)->commands[29] & 0x08) && \ - !test_bit(HCI_QUIRK_BROKEN_ENHANCED_SETUP_SYNC_CONN, &(dev)->quirks)) + !hci_test_quirk((dev), HCI_QUIRK_BROKEN_ENHANCED_SETUP_SYNC_CONN)) /* Use ext scanning if set ext scan param and ext scan enable is supported */ #define use_ext_scan(dev) (((dev)->commands[37] & 0x20) && \ ((dev)->commands[37] & 0x40) && \ - !test_bit(HCI_QUIRK_BROKEN_EXT_SCAN, &(dev)->quirks)) + !hci_test_quirk((dev), HCI_QUIRK_BROKEN_EXT_SCAN)) /* Use ext create connection if command is supported */ #define use_ext_conn(dev) (((dev)->commands[37] & 0x80) && \ - !test_bit(HCI_QUIRK_BROKEN_EXT_CREATE_CONN, &(dev)->quirks)) + !hci_test_quirk((dev), HCI_QUIRK_BROKEN_EXT_CREATE_CONN)) /* Extended advertising support */ #define ext_adv_capable(dev) (((dev)->le_features[1] & HCI_LE_EXT_ADV)) @@ -1980,8 +1983,8 @@ void hci_conn_del_sysfs(struct hci_conn *conn); */ #define use_enhanced_conn_complete(dev) ((ll_privacy_capable(dev) || \ ext_adv_capable(dev)) && \ - !test_bit(HCI_QUIRK_BROKEN_EXT_CREATE_CONN, \ - &(dev)->quirks)) + !hci_test_quirk((dev), \ + HCI_QUIRK_BROKEN_EXT_CREATE_CONN)) /* Periodic advertising support */ #define per_adv_capable(dev) (((dev)->le_features[1] & HCI_LE_PERIODIC_ADV)) @@ -1998,7 +2001,7 @@ void hci_conn_del_sysfs(struct hci_conn *conn); #define sync_recv_capable(dev) ((dev)->le_features[3] & HCI_LE_ISO_SYNC_RECEIVER) #define mws_transport_config_capable(dev) (((dev)->commands[30] & 0x08) && \ - (!test_bit(HCI_QUIRK_BROKEN_MWS_TRANSPORT_CONFIG, &(dev)->quirks))) + (!hci_test_quirk((dev), HCI_QUIRK_BROKEN_MWS_TRANSPORT_CONFIG))) /* ----- HCI protocols ----- */ #define HCI_PROTO_DEFER 0x01 diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index d1848dc8ec99..10248d527616 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -2690,7 +2690,7 @@ struct cfg80211_scan_request { s8 tsf_report_link_id; /* keep last */ - struct ieee80211_channel *channels[] __counted_by(n_channels); + struct ieee80211_channel *channels[]; }; static inline void get_random_mask_addr(u8 *buf, const u8 *addr, const u8 *mask) diff --git a/include/net/netfilter/nf_conntrack.h b/include/net/netfilter/nf_conntrack.h index 3f02a45773e8..ca26274196b9 100644 --- a/include/net/netfilter/nf_conntrack.h +++ b/include/net/netfilter/nf_conntrack.h @@ -306,8 +306,19 @@ static inline bool nf_ct_is_expired(const struct nf_conn *ct) /* use after obtaining a reference count */ static inline bool nf_ct_should_gc(const struct nf_conn *ct) { - return nf_ct_is_expired(ct) && nf_ct_is_confirmed(ct) && - !nf_ct_is_dying(ct); + if (!nf_ct_is_confirmed(ct)) + return false; + + /* load ct->timeout after is_confirmed() test. + * Pairs with __nf_conntrack_confirm() which: + * 1. Increases ct->timeout value + * 2. Inserts ct into rcu hlist + * 3. Sets the confirmed bit + * 4. Unlocks the hlist lock + */ + smp_acquire__after_ctrl_dep(); + + return nf_ct_is_expired(ct) && !nf_ct_is_dying(ct); } #define NF_CT_DAY (86400 * HZ) diff --git a/include/net/netfilter/nf_flow_table.h b/include/net/netfilter/nf_flow_table.h index d711642e78b5..c003cd194fa2 100644 --- a/include/net/netfilter/nf_flow_table.h +++ b/include/net/netfilter/nf_flow_table.h @@ -370,7 +370,7 @@ static inline __be16 __nf_flow_pppoe_proto(const struct sk_buff *skb) static inline bool nf_flow_pppoe_proto(struct sk_buff *skb, __be16 *inner_proto) { - if (!pskb_may_pull(skb, PPPOE_SES_HLEN)) + if (!pskb_may_pull(skb, ETH_HLEN + PPPOE_SES_HLEN)) return false; *inner_proto = __nf_flow_pppoe_proto(skb); diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h index e4d8e451e935..5e49619ae49c 100644 --- a/include/net/netfilter/nf_tables.h +++ b/include/net/netfilter/nf_tables.h @@ -1142,11 +1142,6 @@ int nft_set_catchall_validate(const struct nft_ctx *ctx, struct nft_set *set); int nf_tables_bind_chain(const struct nft_ctx *ctx, struct nft_chain *chain); void nf_tables_unbind_chain(const struct nft_ctx *ctx, struct nft_chain *chain); -struct nft_hook; -void nf_tables_chain_device_notify(const struct nft_chain *chain, - const struct nft_hook *hook, - const struct net_device *dev, int event); - enum nft_chain_types { NFT_CHAIN_T_DEFAULT = 0, NFT_CHAIN_T_ROUTE, diff --git a/include/net/pkt_sched.h b/include/net/pkt_sched.h index d7b7b6cd4aa1..8a75c73fc555 100644 --- a/include/net/pkt_sched.h +++ b/include/net/pkt_sched.h @@ -114,7 +114,6 @@ struct qdisc_rate_table *qdisc_get_rtab(struct tc_ratespec *r, struct netlink_ext_ack *extack); void qdisc_put_rtab(struct qdisc_rate_table *tab); void qdisc_put_stab(struct qdisc_size_table *tab); -void qdisc_warn_nonwc(const char *txt, struct Qdisc *qdisc); bool sch_direct_xmit(struct sk_buff *skb, struct Qdisc *q, struct net_device *dev, struct netdev_queue *txq, spinlock_t *root_lock, bool validate); @@ -290,4 +289,28 @@ static inline bool tc_qdisc_stats_dump(struct Qdisc *sch, return true; } +static inline void qdisc_warn_nonwc(const char *txt, struct Qdisc *qdisc) +{ + if (!(qdisc->flags & TCQ_F_WARN_NONWC)) { + pr_warn("%s: %s qdisc %X: is non-work-conserving?\n", + txt, qdisc->ops->id, qdisc->handle >> 16); + qdisc->flags |= TCQ_F_WARN_NONWC; + } +} + +static inline unsigned int qdisc_peek_len(struct Qdisc *sch) +{ + struct sk_buff *skb; + unsigned int len; + + skb = sch->ops->peek(sch); + if (unlikely(skb == NULL)) { + qdisc_warn_nonwc("qdisc_peek_len", sch); + return 0; + } + len = qdisc_pkt_len(skb); + + return len; +} + #endif diff --git a/include/net/xfrm.h b/include/net/xfrm.h index a21e276dbe44..f3014e4f54fc 100644 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h @@ -441,7 +441,6 @@ int xfrm_input_register_afinfo(const struct xfrm_input_afinfo *afinfo); int xfrm_input_unregister_afinfo(const struct xfrm_input_afinfo *afinfo); void xfrm_flush_gc(void); -void xfrm_state_delete_tunnel(struct xfrm_state *x); struct xfrm_type { struct module *owner; @@ -474,7 +473,7 @@ struct xfrm_type_offload { int xfrm_register_type_offload(const struct xfrm_type_offload *type, unsigned short family); void xfrm_unregister_type_offload(const struct xfrm_type_offload *type, unsigned short family); -void xfrm_set_type_offload(struct xfrm_state *x); +void xfrm_set_type_offload(struct xfrm_state *x, bool try_load); static inline void xfrm_unset_type_offload(struct xfrm_state *x) { if (!x->type_offload) @@ -916,7 +915,7 @@ static inline void xfrm_pols_put(struct xfrm_policy **pols, int npols) xfrm_pol_put(pols[i]); } -void __xfrm_state_destroy(struct xfrm_state *, bool); +void __xfrm_state_destroy(struct xfrm_state *); static inline void __xfrm_state_put(struct xfrm_state *x) { @@ -926,13 +925,7 @@ static inline void __xfrm_state_put(struct xfrm_state *x) static inline void xfrm_state_put(struct xfrm_state *x) { if (refcount_dec_and_test(&x->refcnt)) - __xfrm_state_destroy(x, false); -} - -static inline void xfrm_state_put_sync(struct xfrm_state *x) -{ - if (refcount_dec_and_test(&x->refcnt)) - __xfrm_state_destroy(x, true); + __xfrm_state_destroy(x); } static inline void xfrm_state_hold(struct xfrm_state *x) @@ -1770,7 +1763,7 @@ struct xfrmk_spdinfo { struct xfrm_state *xfrm_find_acq_byseq(struct net *net, u32 mark, u32 seq, u32 pcpu_num); int xfrm_state_delete(struct xfrm_state *x); -int xfrm_state_flush(struct net *net, u8 proto, bool task_valid, bool sync); +int xfrm_state_flush(struct net *net, u8 proto, bool task_valid); int xfrm_dev_state_flush(struct net *net, struct net_device *dev, bool task_valid); int xfrm_dev_policy_flush(struct net *net, struct net_device *dev, bool task_valid); diff --git a/include/trace/events/btrfs.h b/include/trace/events/btrfs.h index bebc252db865..d54fe354b390 100644 --- a/include/trace/events/btrfs.h +++ b/include/trace/events/btrfs.h @@ -144,7 +144,8 @@ FLUSH_STATES #define EXTENT_FLAGS \ { EXTENT_DIRTY, "DIRTY"}, \ { EXTENT_LOCKED, "LOCKED"}, \ - { EXTENT_NEW, "NEW"}, \ + { EXTENT_DIRTY_LOG1, "DIRTY_LOG1"}, \ + { EXTENT_DIRTY_LOG2, "DIRTY_LOG2"}, \ { EXTENT_DELALLOC, "DELALLOC"}, \ { EXTENT_DEFRAG, "DEFRAG"}, \ { EXTENT_BOUNDARY, "BOUNDARY"}, \ @@ -1095,7 +1096,7 @@ TRACE_EVENT(btrfs_cow_block, TP_fast_assign_btrfs(root->fs_info, __entry->root_objectid = btrfs_root_id(root); __entry->buf_start = buf->start; - __entry->refs = atomic_read(&buf->refs); + __entry->refs = refcount_read(&buf->refs); __entry->cow_start = cow->start; __entry->buf_level = btrfs_header_level(buf); __entry->cow_level = btrfs_header_level(cow); diff --git a/include/trace/events/netfs.h b/include/trace/events/netfs.h index 73e96ccbe830..64a382fbc31a 100644 --- a/include/trace/events/netfs.h +++ b/include/trace/events/netfs.h @@ -55,6 +55,7 @@ EM(netfs_rreq_trace_copy, "COPY ") \ EM(netfs_rreq_trace_dirty, "DIRTY ") \ EM(netfs_rreq_trace_done, "DONE ") \ + EM(netfs_rreq_trace_end_copy_to_cache, "END-C2C") \ EM(netfs_rreq_trace_free, "FREE ") \ EM(netfs_rreq_trace_ki_complete, "KI-CMPL") \ EM(netfs_rreq_trace_recollect, "RECLLCT") \ @@ -559,6 +560,35 @@ TRACE_EVENT(netfs_write, __entry->start, __entry->start + __entry->len - 1) ); +TRACE_EVENT(netfs_copy2cache, + TP_PROTO(const struct netfs_io_request *rreq, + const struct netfs_io_request *creq), + + TP_ARGS(rreq, creq), + + TP_STRUCT__entry( + __field(unsigned int, rreq) + __field(unsigned int, creq) + __field(unsigned int, cookie) + __field(unsigned int, ino) + ), + + TP_fast_assign( + struct netfs_inode *__ctx = netfs_inode(rreq->inode); + struct fscache_cookie *__cookie = netfs_i_cookie(__ctx); + __entry->rreq = rreq->debug_id; + __entry->creq = creq->debug_id; + __entry->cookie = __cookie ? __cookie->debug_id : 0; + __entry->ino = rreq->inode->i_ino; + ), + + TP_printk("R=%08x CR=%08x c=%08x i=%x ", + __entry->rreq, + __entry->creq, + __entry->cookie, + __entry->ino) + ); + TRACE_EVENT(netfs_collect, TP_PROTO(const struct netfs_io_request *wreq), diff --git a/include/trace/events/rxrpc.h b/include/trace/events/rxrpc.h index 378d2dfc7392..de6f6d25767c 100644 --- a/include/trace/events/rxrpc.h +++ b/include/trace/events/rxrpc.h @@ -322,20 +322,24 @@ EM(rxrpc_call_put_kernel, "PUT kernel ") \ EM(rxrpc_call_put_poke, "PUT poke ") \ EM(rxrpc_call_put_recvmsg, "PUT recvmsg ") \ + EM(rxrpc_call_put_release_recvmsg_q, "PUT rls-rcmq") \ EM(rxrpc_call_put_release_sock, "PUT rls-sock") \ EM(rxrpc_call_put_release_sock_tba, "PUT rls-sk-a") \ EM(rxrpc_call_put_sendmsg, "PUT sendmsg ") \ - EM(rxrpc_call_put_unnotify, "PUT unnotify") \ EM(rxrpc_call_put_userid_exists, "PUT u-exists") \ EM(rxrpc_call_put_userid, "PUT user-id ") \ EM(rxrpc_call_see_accept, "SEE accept ") \ EM(rxrpc_call_see_activate_client, "SEE act-clnt") \ + EM(rxrpc_call_see_already_released, "SEE alrdy-rl") \ EM(rxrpc_call_see_connect_failed, "SEE con-fail") \ EM(rxrpc_call_see_connected, "SEE connect ") \ EM(rxrpc_call_see_conn_abort, "SEE conn-abt") \ + EM(rxrpc_call_see_discard, "SEE discard ") \ EM(rxrpc_call_see_disconnected, "SEE disconn ") \ EM(rxrpc_call_see_distribute_error, "SEE dist-err") \ EM(rxrpc_call_see_input, "SEE input ") \ + EM(rxrpc_call_see_notify_released, "SEE nfy-rlsd") \ + EM(rxrpc_call_see_recvmsg, "SEE recvmsg ") \ EM(rxrpc_call_see_release, "SEE release ") \ EM(rxrpc_call_see_userid_exists, "SEE u-exists") \ EM(rxrpc_call_see_waiting_call, "SEE q-conn ") \ diff --git a/include/trace/events/sunrpc.h b/include/trace/events/sunrpc.h index aad697da1580..750ecce56930 100644 --- a/include/trace/events/sunrpc.h +++ b/include/trace/events/sunrpc.h @@ -1691,7 +1691,6 @@ SVC_RQST_FLAG_LIST __print_flags(flags, "|", SVC_RQST_FLAG_LIST) TRACE_DEFINE_ENUM(SVC_GARBAGE); -TRACE_DEFINE_ENUM(SVC_SYSERR); TRACE_DEFINE_ENUM(SVC_VALID); TRACE_DEFINE_ENUM(SVC_NEGATIVE); TRACE_DEFINE_ENUM(SVC_OK); @@ -1704,7 +1703,6 @@ TRACE_DEFINE_ENUM(SVC_COMPLETE); #define show_svc_auth_status(status) \ __print_symbolic(status, \ { SVC_GARBAGE, "SVC_GARBAGE" }, \ - { SVC_SYSERR, "SVC_SYSERR" }, \ { SVC_VALID, "SVC_VALID" }, \ { SVC_NEGATIVE, "SVC_NEGATIVE" }, \ { SVC_OK, "SVC_OK" }, \ @@ -2123,22 +2121,35 @@ TRACE_EVENT(svc_xprt_accept, ) ); -TRACE_EVENT(svc_wake_up, - TP_PROTO(int pid), +DECLARE_EVENT_CLASS(svc_pool_thread_event, + TP_PROTO(const struct svc_pool *pool, pid_t pid), - TP_ARGS(pid), + TP_ARGS(pool, pid), TP_STRUCT__entry( - __field(int, pid) + __field(unsigned int, pool_id) + __field(pid_t, pid) ), TP_fast_assign( + __entry->pool_id = pool->sp_id; __entry->pid = pid; ), - TP_printk("pid=%d", __entry->pid) + TP_printk("pool=%u pid=%d", __entry->pool_id, __entry->pid) ); +#define DEFINE_SVC_POOL_THREAD_EVENT(name) \ + DEFINE_EVENT(svc_pool_thread_event, svc_pool_thread_##name, \ + TP_PROTO( \ + const struct svc_pool *pool, pid_t pid \ + ), \ + TP_ARGS(pool, pid)) + +DEFINE_SVC_POOL_THREAD_EVENT(wake); +DEFINE_SVC_POOL_THREAD_EVENT(running); +DEFINE_SVC_POOL_THREAD_EVENT(noidle); + TRACE_EVENT(svc_alloc_arg_err, TP_PROTO( unsigned int requested, diff --git a/include/uapi/asm-generic/param.h b/include/uapi/asm-generic/param.h index baad02ea7f93..3ed505dfea13 100644 --- a/include/uapi/asm-generic/param.h +++ b/include/uapi/asm-generic/param.h @@ -2,8 +2,12 @@ #ifndef _UAPI__ASM_GENERIC_PARAM_H #define _UAPI__ASM_GENERIC_PARAM_H +#ifndef __USER_HZ +#define __USER_HZ 100 +#endif + #ifndef HZ -#define HZ 100 +#define HZ __USER_HZ #endif #ifndef EXEC_PAGESIZE diff --git a/include/uapi/linux/bits.h b/include/uapi/linux/bits.h index 682b406e1067..a04afef9efca 100644 --- a/include/uapi/linux/bits.h +++ b/include/uapi/linux/bits.h @@ -4,9 +4,9 @@ #ifndef _UAPI_LINUX_BITS_H #define _UAPI_LINUX_BITS_H -#define __GENMASK(h, l) (((~_UL(0)) << (l)) & (~_UL(0) >> (BITS_PER_LONG - 1 - (h)))) +#define __GENMASK(h, l) (((~_UL(0)) << (l)) & (~_UL(0) >> (__BITS_PER_LONG - 1 - (h)))) -#define __GENMASK_ULL(h, l) (((~_ULL(0)) << (l)) & (~_ULL(0) >> (BITS_PER_LONG_LONG - 1 - (h)))) +#define __GENMASK_ULL(h, l) (((~_ULL(0)) << (l)) & (~_ULL(0) >> (__BITS_PER_LONG_LONG - 1 - (h)))) #define __GENMASK_U128(h, l) \ ((_BIT128((h)) << 1) - (_BIT128(l))) diff --git a/include/uapi/linux/btrfs.h b/include/uapi/linux/btrfs.h index dd02160015b2..8e710bbb688e 100644 --- a/include/uapi/linux/btrfs.h +++ b/include/uapi/linux/btrfs.h @@ -616,8 +616,11 @@ struct btrfs_ioctl_clone_range_args { #define BTRFS_DEFRAG_RANGE_COMPRESS 1 #define BTRFS_DEFRAG_RANGE_START_IO 2 #define BTRFS_DEFRAG_RANGE_COMPRESS_LEVEL 4 +/* Request no compression on the range (uncompress if necessary). */ +#define BTRFS_DEFRAG_RANGE_NOCOMPRESS 8 #define BTRFS_DEFRAG_RANGE_FLAGS_SUPP (BTRFS_DEFRAG_RANGE_COMPRESS | \ BTRFS_DEFRAG_RANGE_COMPRESS_LEVEL | \ + BTRFS_DEFRAG_RANGE_NOCOMPRESS | \ BTRFS_DEFRAG_RANGE_START_IO) struct btrfs_ioctl_defrag_range_args { diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index 37891580d05d..7a4c35ff03fe 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -467,6 +467,10 @@ struct kvm_run { __u64 leaf; __u64 r11, r12, r13, r14; } get_tdvmcall_info; + struct { + __u64 ret; + __u64 vector; + } setup_event_notify; }; } tdx; /* Fix the size of the union. */ diff --git a/include/uapi/linux/netfilter/nf_tables.h b/include/uapi/linux/netfilter/nf_tables.h index 518ba144544c..2beb30be2c5f 100644 --- a/include/uapi/linux/netfilter/nf_tables.h +++ b/include/uapi/linux/netfilter/nf_tables.h @@ -142,8 +142,6 @@ enum nf_tables_msg_types { NFT_MSG_DESTROYOBJ, NFT_MSG_DESTROYFLOWTABLE, NFT_MSG_GETSETELEM_RESET, - NFT_MSG_NEWDEV, - NFT_MSG_DELDEV, NFT_MSG_MAX, }; @@ -1786,18 +1784,10 @@ enum nft_synproxy_attributes { * enum nft_device_attributes - nf_tables device netlink attributes * * @NFTA_DEVICE_NAME: name of this device (NLA_STRING) - * @NFTA_DEVICE_TABLE: table containing the flowtable or chain hooking into the device (NLA_STRING) - * @NFTA_DEVICE_FLOWTABLE: flowtable hooking into the device (NLA_STRING) - * @NFTA_DEVICE_CHAIN: chain hooking into the device (NLA_STRING) - * @NFTA_DEVICE_SPEC: hook spec matching the device (NLA_STRING) */ enum nft_devices_attributes { NFTA_DEVICE_UNSPEC, NFTA_DEVICE_NAME, - NFTA_DEVICE_TABLE, - NFTA_DEVICE_FLOWTABLE, - NFTA_DEVICE_CHAIN, - NFTA_DEVICE_SPEC, __NFTA_DEVICE_MAX }; #define NFTA_DEVICE_MAX (__NFTA_DEVICE_MAX - 1) diff --git a/include/uapi/linux/netfilter/nfnetlink.h b/include/uapi/linux/netfilter/nfnetlink.h index 50d807af2649..6cd58cd2a6f0 100644 --- a/include/uapi/linux/netfilter/nfnetlink.h +++ b/include/uapi/linux/netfilter/nfnetlink.h @@ -25,8 +25,6 @@ enum nfnetlink_groups { #define NFNLGRP_ACCT_QUOTA NFNLGRP_ACCT_QUOTA NFNLGRP_NFTRACE, #define NFNLGRP_NFTRACE NFNLGRP_NFTRACE - NFNLGRP_NFT_DEV, -#define NFNLGRP_NFT_DEV NFNLGRP_NFT_DEV __NFNLGRP_MAX, }; #define NFNLGRP_MAX (__NFNLGRP_MAX - 1) diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c index 73648d26a622..5111ec040c53 100644 --- a/io_uring/io_uring.c +++ b/io_uring/io_uring.c @@ -1666,12 +1666,11 @@ static void io_iopoll_req_issued(struct io_kiocb *req, unsigned int issue_flags) io_req_flags_t io_file_get_flags(struct file *file) { - struct inode *inode = file_inode(file); io_req_flags_t res = 0; BUILD_BUG_ON(REQ_F_ISREG_BIT != REQ_F_SUPPORT_NOWAIT_BIT + 1); - if (S_ISREG(inode->i_mode) && !(inode->i_flags & S_ANON_INODE)) + if (S_ISREG(file_inode(file)->i_mode)) res |= REQ_F_ISREG; if ((file->f_flags & O_NONBLOCK) || (file->f_mode & FMODE_NOWAIT)) res |= REQ_F_SUPPORT_NOWAIT; diff --git a/io_uring/msg_ring.c b/io_uring/msg_ring.c index 71400d6cefc8..4c2578f2efcb 100644 --- a/io_uring/msg_ring.c +++ b/io_uring/msg_ring.c @@ -82,7 +82,7 @@ static void io_msg_tw_complete(struct io_kiocb *req, io_tw_token_t tw) spin_unlock(&ctx->msg_lock); } if (req) - kmem_cache_free(req_cachep, req); + kfree_rcu(req, rcu_head); percpu_ref_put(&ctx->refs); } @@ -90,7 +90,7 @@ static int io_msg_remote_post(struct io_ring_ctx *ctx, struct io_kiocb *req, int res, u32 cflags, u64 user_data) { if (!READ_ONCE(ctx->submitter_task)) { - kmem_cache_free(req_cachep, req); + kfree_rcu(req, rcu_head); return -EOWNERDEAD; } req->opcode = IORING_OP_NOP; diff --git a/io_uring/net.c b/io_uring/net.c index 43a43522f406..bec8c6ed0a93 100644 --- a/io_uring/net.c +++ b/io_uring/net.c @@ -1738,9 +1738,11 @@ int io_connect(struct io_kiocb *req, unsigned int issue_flags) int ret; bool force_nonblock = issue_flags & IO_URING_F_NONBLOCK; - if (unlikely(req->flags & REQ_F_FAIL)) { - ret = -ECONNRESET; - goto out; + if (connect->in_progress) { + struct poll_table_struct pt = { ._key = EPOLLERR }; + + if (vfs_poll(req->file, &pt) & EPOLLERR) + goto get_sock_err; } file_flags = force_nonblock ? O_NONBLOCK : 0; @@ -1765,8 +1767,10 @@ int io_connect(struct io_kiocb *req, unsigned int issue_flags) * which means the previous result is good. For both of these, * grab the sock_error() and use that for the completion. */ - if (ret == -EBADFD || ret == -EISCONN) + if (ret == -EBADFD || ret == -EISCONN) { +get_sock_err: ret = sock_error(sock_from_file(req->file)->sk); + } } if (ret == -ERESTARTSYS) ret = -EINTR; diff --git a/io_uring/poll.c b/io_uring/poll.c index 0526062e2f81..20e9b46a4adf 100644 --- a/io_uring/poll.c +++ b/io_uring/poll.c @@ -273,8 +273,6 @@ static int io_poll_check_events(struct io_kiocb *req, io_tw_token_t tw) return IOU_POLL_REISSUE; } } - if (unlikely(req->cqe.res & EPOLLERR)) - req_set_fail(req); if (req->apoll_events & EPOLLONESHOT) return IOU_POLL_DONE; diff --git a/io_uring/zcrx.c b/io_uring/zcrx.c index 085eeed8cd50..4a7011c799f0 100644 --- a/io_uring/zcrx.c +++ b/io_uring/zcrx.c @@ -76,6 +76,8 @@ static int io_import_dmabuf(struct io_zcrx_ifq *ifq, int dmabuf_fd = area_reg->dmabuf_fd; int i, ret; + if (off) + return -EINVAL; if (WARN_ON_ONCE(!ifq->dev)) return -EFAULT; if (!IS_ENABLED(CONFIG_DMA_SHARED_BUFFER)) @@ -106,7 +108,7 @@ static int io_import_dmabuf(struct io_zcrx_ifq *ifq, for_each_sgtable_dma_sg(mem->sgt, sg, i) total_size += sg_dma_len(sg); - if (total_size < off + len) { + if (total_size != len) { ret = -EINVAL; goto err; } @@ -863,10 +865,7 @@ static int io_pp_zc_init(struct page_pool *pp) static void io_pp_zc_destroy(struct page_pool *pp) { struct io_zcrx_ifq *ifq = io_pp_to_ifq(pp); - struct io_zcrx_area *area = ifq->area; - if (WARN_ON_ONCE(area->free_count != area->nia.num_niovs)) - return; percpu_ref_put(&ifq->ctx->refs); } diff --git a/ipc/mqueue.c b/ipc/mqueue.c index 82ed2d3c9846..482af449e00d 100644 --- a/ipc/mqueue.c +++ b/ipc/mqueue.c @@ -411,6 +411,7 @@ static int mqueue_fill_super(struct super_block *sb, struct fs_context *fc) sb->s_blocksize_bits = PAGE_SHIFT; sb->s_magic = MQUEUE_MAGIC; sb->s_op = &mqueue_super_ops; + sb->s_d_flags = DCACHE_DONTCACHE; inode = mqueue_get_inode(sb, ns, S_IFDIR | S_ISVTX | S_IRWXUGO, NULL); if (IS_ERR(inode)) diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c index b71e428ad936..ad6df48b540c 100644 --- a/kernel/bpf/helpers.c +++ b/kernel/bpf/helpers.c @@ -884,6 +884,13 @@ int bpf_bprintf_prepare(char *fmt, u32 fmt_size, const u64 *raw_args, if (fmt[i] == 'p') { sizeof_cur_arg = sizeof(long); + if (fmt[i + 1] == 0 || isspace(fmt[i + 1]) || + ispunct(fmt[i + 1])) { + if (tmp_buf) + cur_arg = raw_args[num_spec]; + goto nocopy_fmt; + } + if ((fmt[i + 1] == 'k' || fmt[i + 1] == 'u') && fmt[i + 2] == 's') { fmt_ptype = fmt[i + 1]; @@ -891,11 +898,9 @@ int bpf_bprintf_prepare(char *fmt, u32 fmt_size, const u64 *raw_args, goto fmt_str; } - if (fmt[i + 1] == 0 || isspace(fmt[i + 1]) || - ispunct(fmt[i + 1]) || fmt[i + 1] == 'K' || + if (fmt[i + 1] == 'K' || fmt[i + 1] == 'x' || fmt[i + 1] == 's' || fmt[i + 1] == 'S') { - /* just kernel pointers */ if (tmp_buf) cur_arg = raw_args[num_spec]; i++; diff --git a/kernel/bpf/sysfs_btf.c b/kernel/bpf/sysfs_btf.c index 941d0d2427e3..8e61dc555415 100644 --- a/kernel/bpf/sysfs_btf.c +++ b/kernel/bpf/sysfs_btf.c @@ -21,7 +21,7 @@ static int btf_sysfs_vmlinux_mmap(struct file *filp, struct kobject *kobj, { unsigned long pages = PAGE_ALIGN(attr->size) >> PAGE_SHIFT; size_t vm_size = vma->vm_end - vma->vm_start; - phys_addr_t addr = virt_to_phys(__start_BTF); + phys_addr_t addr = __pa_symbol(__start_BTF); unsigned long pfn = addr >> PAGE_SHIFT; if (attr->private != __start_BTF || !PAGE_ALIGNED(addr)) diff --git a/kernel/cgroup/legacy_freezer.c b/kernel/cgroup/legacy_freezer.c index 507b8f19a262..dd9417425d92 100644 --- a/kernel/cgroup/legacy_freezer.c +++ b/kernel/cgroup/legacy_freezer.c @@ -66,15 +66,9 @@ static struct freezer *parent_freezer(struct freezer *freezer) bool cgroup_freezing(struct task_struct *task) { bool ret; - unsigned int state; rcu_read_lock(); - /* Check if the cgroup is still FREEZING, but not FROZEN. The extra - * !FROZEN check is required, because the FREEZING bit is not cleared - * when the state FROZEN is reached. - */ - state = task_freezer(task)->state; - ret = (state & CGROUP_FREEZING) && !(state & CGROUP_FROZEN); + ret = task_freezer(task)->state & CGROUP_FREEZING; rcu_read_unlock(); return ret; diff --git a/kernel/dma/contiguous.c b/kernel/dma/contiguous.c index 8df0dfaaca18..67af8a55185d 100644 --- a/kernel/dma/contiguous.c +++ b/kernel/dma/contiguous.c @@ -222,7 +222,10 @@ void __init dma_contiguous_reserve(phys_addr_t limit) if (size_cmdline != -1) { selected_size = size_cmdline; selected_base = base_cmdline; - selected_limit = min_not_zero(limit_cmdline, limit); + + /* Hornor the user setup dma address limit */ + selected_limit = limit_cmdline ?: limit; + if (base_cmdline + size_cmdline == limit_cmdline) fixed = true; } else { diff --git a/kernel/events/core.c b/kernel/events/core.c index 0db36b2b2448..22fdf0c187cd 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -7204,18 +7204,18 @@ void perf_event_wakeup(struct perf_event *event) static void perf_sigtrap(struct perf_event *event) { /* - * We'd expect this to only occur if the irq_work is delayed and either - * ctx->task or current has changed in the meantime. This can be the - * case on architectures that do not implement arch_irq_work_raise(). + * Both perf_pending_task() and perf_pending_irq() can race with the + * task exiting. */ - if (WARN_ON_ONCE(event->ctx->task != current)) + if (current->flags & PF_EXITING) return; /* - * Both perf_pending_task() and perf_pending_irq() can race with the - * task exiting. + * We'd expect this to only occur if the irq_work is delayed and either + * ctx->task or current has changed in the meantime. This can be the + * case on architectures that do not implement arch_irq_work_raise(). */ - if (current->flags & PF_EXITING) + if (WARN_ON_ONCE(event->ctx->task != current)) return; send_sig_perf((void __user *)event->pending_addr, diff --git a/kernel/freezer.c b/kernel/freezer.c index 8d530d0949ff..6a96149aede9 100644 --- a/kernel/freezer.c +++ b/kernel/freezer.c @@ -201,18 +201,9 @@ static int __restore_freezer_state(struct task_struct *p, void *arg) void __thaw_task(struct task_struct *p) { - unsigned long flags; - - spin_lock_irqsave(&freezer_lock, flags); - if (WARN_ON_ONCE(freezing(p))) - goto unlock; - - if (!frozen(p) || task_call_func(p, __restore_freezer_state, NULL)) - goto unlock; - - wake_up_state(p, TASK_FROZEN); -unlock: - spin_unlock_irqrestore(&freezer_lock, flags); + guard(spinlock_irqsave)(&freezer_lock); + if (frozen(p) && !task_call_func(p, __restore_freezer_state, NULL)) + wake_up_state(p, TASK_FROZEN); } /** diff --git a/kernel/module/main.c b/kernel/module/main.c index 413ac6ea3702..c2c08007029d 100644 --- a/kernel/module/main.c +++ b/kernel/module/main.c @@ -1573,8 +1573,14 @@ static int apply_relocations(struct module *mod, const struct load_info *info) if (infosec >= info->hdr->e_shnum) continue; - /* Don't bother with non-allocated sections */ - if (!(info->sechdrs[infosec].sh_flags & SHF_ALLOC)) + /* + * Don't bother with non-allocated sections. + * An exception is the percpu section, which has separate allocations + * for individual CPUs. We relocate the percpu section in the initial + * ELF template and subsequently copy it to the per-CPU destinations. + */ + if (!(info->sechdrs[infosec].sh_flags & SHF_ALLOC) && + (!infosec || infosec != info->index.pcpu)) continue; if (info->sechdrs[i].sh_flags & SHF_RELA_LIVEPATCH) @@ -2696,9 +2702,8 @@ static int find_module_sections(struct module *mod, struct load_info *info) static int move_module(struct module *mod, struct load_info *info) { - int i; - enum mod_mem_type t = 0; - int ret = -ENOMEM; + int i, ret; + enum mod_mem_type t = MOD_MEM_NUM_TYPES; bool codetag_section_found = false; for_each_mod_mem_type(type) { @@ -2776,7 +2781,7 @@ static int move_module(struct module *mod, struct load_info *info) return 0; out_err: module_memory_restore_rox(mod); - for (t--; t >= 0; t--) + while (t--) module_memory_free(mod, t); if (codetag_section_found) codetag_free_module_sections(mod); diff --git a/kernel/power/suspend.c b/kernel/power/suspend.c index bb608b68fb30..b4ca17c2fecf 100644 --- a/kernel/power/suspend.c +++ b/kernel/power/suspend.c @@ -384,6 +384,7 @@ static int suspend_prepare(suspend_state_t state) return 0; dpm_save_failed_step(SUSPEND_FREEZE); + filesystems_thaw(); pm_notifier_call_chain(PM_POST_SUSPEND); Restore: pm_restore_console(); @@ -540,7 +541,6 @@ int suspend_devices_and_enter(suspend_state_t state) return error; Recover_platform: - pm_restore_gfp_mask(); platform_recover(state); goto Resume_devices; } @@ -593,8 +593,6 @@ static int enter_state(suspend_state_t state) ksys_sync_helper(); trace_suspend_resume(TPS("sync_filesystems"), 0, false); } - if (filesystem_freeze_enabled) - filesystems_freeze(); pm_pr_dbg("Preparing system for sleep (%s)\n", mem_sleep_labels[state]); pm_suspend_clear_flags(); @@ -614,7 +612,6 @@ static int enter_state(suspend_state_t state) pm_pr_dbg("Finishing wakeup.\n"); suspend_finish(); Unlock: - filesystems_thaw(); mutex_unlock(&system_transition_mutex); return error; } diff --git a/kernel/resource.c b/kernel/resource.c index 8d3e6ed0bdc1..f9bb5481501a 100644 --- a/kernel/resource.c +++ b/kernel/resource.c @@ -1279,8 +1279,9 @@ static int __request_region_locked(struct resource *res, struct resource *parent * become unavailable to other users. Conflicts are * not expected. Warn to aid debugging if encountered. */ - if (conflict->desc == IORES_DESC_DEVICE_PRIVATE_MEMORY) { - pr_warn("Unaddressable device %s %pR conflicts with %pR", + if (parent == &iomem_resource && + conflict->desc == IORES_DESC_DEVICE_PRIVATE_MEMORY) { + pr_warn("Unaddressable device %s %pR conflicts with %pR\n", conflict->name, conflict, res); } if (conflict != parent) { diff --git a/kernel/sched/core.c b/kernel/sched/core.c index ec68fc686bd7..81c6df746df1 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -3362,10 +3362,6 @@ void set_task_cpu(struct task_struct *p, unsigned int new_cpu) #ifdef CONFIG_NUMA_BALANCING static void __migrate_swap_task(struct task_struct *p, int cpu) { - __schedstat_inc(p->stats.numa_task_swapped); - count_vm_numa_event(NUMA_TASK_SWAP); - count_memcg_event_mm(p->mm, NUMA_TASK_SWAP); - if (task_on_rq_queued(p)) { struct rq *src_rq, *dst_rq; struct rq_flags srf, drf; @@ -7939,9 +7935,8 @@ int migrate_task_to(struct task_struct *p, int target_cpu) if (!cpumask_test_cpu(target_cpu, p->cpus_ptr)) return -EINVAL; - __schedstat_inc(p->stats.numa_task_migrated); - count_vm_numa_event(NUMA_TASK_MIGRATE); - count_memcg_event_mm(p->mm, NUMA_TASK_MIGRATE); + /* TODO: This is not properly updating schedstats */ + trace_sched_move_numa(p, curr_cpu, target_cpu); return stop_one_cpu(curr_cpu, migration_cpu_stop, &arg); } diff --git a/kernel/sched/debug.c b/kernel/sched/debug.c index 9d71baf08075..557246880a7e 100644 --- a/kernel/sched/debug.c +++ b/kernel/sched/debug.c @@ -1210,10 +1210,6 @@ void proc_sched_show_task(struct task_struct *p, struct pid_namespace *ns, P_SCHEDSTAT(nr_failed_migrations_running); P_SCHEDSTAT(nr_failed_migrations_hot); P_SCHEDSTAT(nr_forced_migrations); -#ifdef CONFIG_NUMA_BALANCING - P_SCHEDSTAT(numa_task_migrated); - P_SCHEDSTAT(numa_task_swapped); -#endif P_SCHEDSTAT(nr_wakeups); P_SCHEDSTAT(nr_wakeups_sync); P_SCHEDSTAT(nr_wakeups_migrate); diff --git a/kernel/sched/ext.c b/kernel/sched/ext.c index b498d867ba21..7dd5cbcb7a06 100644 --- a/kernel/sched/ext.c +++ b/kernel/sched/ext.c @@ -1272,7 +1272,8 @@ static inline struct rq *scx_locked_rq(void) #define SCX_CALL_OP(sch, mask, op, rq, args...) \ do { \ - update_locked_rq(rq); \ + if (rq) \ + update_locked_rq(rq); \ if (mask) { \ scx_kf_allow(mask); \ (sch)->ops.op(args); \ @@ -1280,14 +1281,16 @@ do { \ } else { \ (sch)->ops.op(args); \ } \ - update_locked_rq(NULL); \ + if (rq) \ + update_locked_rq(NULL); \ } while (0) #define SCX_CALL_OP_RET(sch, mask, op, rq, args...) \ ({ \ __typeof__((sch)->ops.op(args)) __ret; \ \ - update_locked_rq(rq); \ + if (rq) \ + update_locked_rq(rq); \ if (mask) { \ scx_kf_allow(mask); \ __ret = (sch)->ops.op(args); \ @@ -1295,7 +1298,8 @@ do { \ } else { \ __ret = (sch)->ops.op(args); \ } \ - update_locked_rq(NULL); \ + if (rq) \ + update_locked_rq(NULL); \ __ret; \ }) diff --git a/kernel/sched/ext_idle.c b/kernel/sched/ext_idle.c index 6d29d3cbc670..001fb88a8481 100644 --- a/kernel/sched/ext_idle.c +++ b/kernel/sched/ext_idle.c @@ -903,7 +903,7 @@ s32 select_cpu_from_kfunc(struct task_struct *p, s32 prev_cpu, u64 wake_flags, * selection optimizations and simply check whether the previously * used CPU is idle and within the allowed cpumask. */ - if (p->nr_cpus_allowed == 1) { + if (p->nr_cpus_allowed == 1 || is_migration_disabled(p)) { if (cpumask_test_cpu(prev_cpu, allowed ?: p->cpus_ptr) && scx_idle_test_and_clear_cpu(prev_cpu)) cpu = prev_cpu; diff --git a/kernel/sched/loadavg.c b/kernel/sched/loadavg.c index c48900b856a2..52ca8e268cfc 100644 --- a/kernel/sched/loadavg.c +++ b/kernel/sched/loadavg.c @@ -80,7 +80,7 @@ long calc_load_fold_active(struct rq *this_rq, long adjust) long nr_active, delta = 0; nr_active = this_rq->nr_running - adjust; - nr_active += (int)this_rq->nr_uninterruptible; + nr_active += (long)this_rq->nr_uninterruptible; if (nr_active != this_rq->calc_load_active) { delta = nr_active - this_rq->calc_load_active; diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h index 475bb5998295..83e3aa917142 100644 --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h @@ -1149,7 +1149,7 @@ struct rq { * one CPU and if it got migrated afterwards it may decrease * it on another CPU. Always updated under the runqueue lock: */ - unsigned int nr_uninterruptible; + unsigned long nr_uninterruptible; union { struct task_struct __rcu *donor; /* Scheduler context */ diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c index a009c91f7b05..83c65f3afcca 100644 --- a/kernel/time/timekeeping.c +++ b/kernel/time/timekeeping.c @@ -1256,7 +1256,7 @@ int get_device_system_crosststamp(int (*get_time_fn) struct system_time_snapshot *history_begin, struct system_device_crosststamp *xtstamp) { - struct system_counterval_t system_counterval; + struct system_counterval_t system_counterval = {}; struct timekeeper *tk = &tk_core.timekeeper; u64 cycles, now, interval_start; unsigned int clock_was_set_seq = 0; diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c index 120531268abf..d01e5c910ce1 100644 --- a/kernel/trace/trace_events.c +++ b/kernel/trace/trace_events.c @@ -3136,7 +3136,10 @@ __register_event(struct trace_event_call *call, struct module *mod) if (ret < 0) return ret; + down_write(&trace_event_sem); list_add(&call->list, &ftrace_events); + up_write(&trace_event_sem); + if (call->flags & TRACE_EVENT_FL_DYNAMIC) atomic_set(&call->refcnt, 0); else @@ -3750,6 +3753,8 @@ __trace_add_event_dirs(struct trace_array *tr) struct trace_event_call *call; int ret; + lockdep_assert_held(&trace_event_sem); + list_for_each_entry(call, &ftrace_events, list) { ret = __trace_add_new_event(call, tr); if (ret < 0) diff --git a/kernel/trace/trace_osnoise.c b/kernel/trace/trace_osnoise.c index 6819b93309ce..fd259da0aa64 100644 --- a/kernel/trace/trace_osnoise.c +++ b/kernel/trace/trace_osnoise.c @@ -637,8 +637,8 @@ __timerlat_dump_stack(struct trace_buffer *buffer, struct trace_stack *fstack, u entry = ring_buffer_event_data(event); - memcpy(&entry->caller, fstack->calls, size); entry->size = fstack->nr_entries; + memcpy(&entry->caller, fstack->calls, size); trace_buffer_unlock_commit_nostack(buffer, event); } diff --git a/kernel/trace/trace_probe.c b/kernel/trace/trace_probe.c index 424751cdf31f..40830a3ecd96 100644 --- a/kernel/trace/trace_probe.c +++ b/kernel/trace/trace_probe.c @@ -657,7 +657,7 @@ static int parse_btf_arg(char *varname, ret = query_btf_context(ctx); if (ret < 0 || ctx->nr_params == 0) { trace_probe_log_err(ctx->offset, NO_BTF_ENTRY); - return PTR_ERR(params); + return -ENOENT; } } params = ctx->params; diff --git a/lib/alloc_tag.c b/lib/alloc_tag.c index 3a74d63a959e..0142bc916f73 100644 --- a/lib/alloc_tag.c +++ b/lib/alloc_tag.c @@ -135,6 +135,9 @@ size_t alloc_tag_top_users(struct codetag_bytes *tags, size_t count, bool can_sl struct codetag_bytes n; unsigned int i, nr = 0; + if (IS_ERR_OR_NULL(alloc_tag_cttype)) + return 0; + if (can_sleep) codetag_lock_module_list(alloc_tag_cttype, true); else if (!codetag_trylock_module_list(alloc_tag_cttype)) diff --git a/lib/maple_tree.c b/lib/maple_tree.c index 00524e55a21e..ef66be963798 100644 --- a/lib/maple_tree.c +++ b/lib/maple_tree.c @@ -5319,6 +5319,7 @@ static void mt_destroy_walk(struct maple_enode *enode, struct maple_tree *mt, struct maple_enode *start; if (mte_is_leaf(enode)) { + mte_set_node_dead(enode); node->type = mte_node_type(enode); goto free_leaf; } diff --git a/mm/damon/core.c b/mm/damon/core.c index b217e0120e09..339116ea30e3 100644 --- a/mm/damon/core.c +++ b/mm/damon/core.c @@ -754,6 +754,19 @@ static struct damos_quota_goal *damos_nth_quota_goal( return NULL; } +static void damos_commit_quota_goal_union( + struct damos_quota_goal *dst, struct damos_quota_goal *src) +{ + switch (dst->metric) { + case DAMOS_QUOTA_NODE_MEM_USED_BP: + case DAMOS_QUOTA_NODE_MEM_FREE_BP: + dst->nid = src->nid; + break; + default: + break; + } +} + static void damos_commit_quota_goal( struct damos_quota_goal *dst, struct damos_quota_goal *src) { @@ -762,6 +775,7 @@ static void damos_commit_quota_goal( if (dst->metric == DAMOS_QUOTA_USER_INPUT) dst->current_value = src->current_value; /* keep last_psi_total as is, since it will be updated in next cycle */ + damos_commit_quota_goal_union(dst, src); } /** @@ -795,6 +809,7 @@ int damos_commit_quota_goals(struct damos_quota *dst, struct damos_quota *src) src_goal->metric, src_goal->target_value); if (!new_goal) return -ENOMEM; + damos_commit_quota_goal_union(new_goal, src_goal); damos_add_quota_goal(dst, new_goal); } return 0; @@ -1449,6 +1464,7 @@ static unsigned long damon_get_intervals_score(struct damon_ctx *c) } } target_access_events = max_access_events * goal_bp / 10000; + target_access_events = target_access_events ? : 1; return access_events * 10000 / target_access_events; } @@ -2355,9 +2371,8 @@ static void kdamond_usleep(unsigned long usecs) * * If there is a &struct damon_call_control request that registered via * &damon_call() on @ctx, do or cancel the invocation of the function depending - * on @cancel. @cancel is set when the kdamond is deactivated by DAMOS - * watermarks, or the kdamond is already out of the main loop and therefore - * will be terminated. + * on @cancel. @cancel is set when the kdamond is already out of the main loop + * and therefore will be terminated. */ static void kdamond_call(struct damon_ctx *ctx, bool cancel) { @@ -2405,7 +2420,7 @@ static int kdamond_wait_activation(struct damon_ctx *ctx) if (ctx->callback.after_wmarks_check && ctx->callback.after_wmarks_check(ctx)) break; - kdamond_call(ctx, true); + kdamond_call(ctx, false); damos_walk_cancel(ctx); } return -EBUSY; diff --git a/mm/hugetlb.c b/mm/hugetlb.c index 9dc95eac558c..a0d285d20992 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -2340,12 +2340,15 @@ struct folio *alloc_hugetlb_folio_reserve(struct hstate *h, int preferred_nid, struct folio *folio; spin_lock_irq(&hugetlb_lock); + if (!h->resv_huge_pages) { + spin_unlock_irq(&hugetlb_lock); + return NULL; + } + folio = dequeue_hugetlb_folio_nodemask(h, gfp_mask, preferred_nid, nmask); - if (folio) { - VM_BUG_ON(!h->resv_huge_pages); + if (folio) h->resv_huge_pages--; - } spin_unlock_irq(&hugetlb_lock); return folio; diff --git a/mm/kasan/report.c b/mm/kasan/report.c index 8357e1a33699..62c01b4527eb 100644 --- a/mm/kasan/report.c +++ b/mm/kasan/report.c @@ -370,36 +370,6 @@ static inline bool init_task_stack_addr(const void *addr) sizeof(init_thread_union.stack)); } -/* - * This function is invoked with report_lock (a raw_spinlock) held. A - * PREEMPT_RT kernel cannot call find_vm_area() as it will acquire a sleeping - * rt_spinlock. - * - * For !RT kernel, the PROVE_RAW_LOCK_NESTING config option will print a - * lockdep warning for this raw_spinlock -> spinlock dependency. This config - * option is enabled by default to ensure better test coverage to expose this - * kind of RT kernel problem. This lockdep splat, however, can be suppressed - * by using DEFINE_WAIT_OVERRIDE_MAP() if it serves a useful purpose and the - * invalid PREEMPT_RT case has been taken care of. - */ -static inline struct vm_struct *kasan_find_vm_area(void *addr) -{ - static DEFINE_WAIT_OVERRIDE_MAP(vmalloc_map, LD_WAIT_SLEEP); - struct vm_struct *va; - - if (IS_ENABLED(CONFIG_PREEMPT_RT)) - return NULL; - - /* - * Suppress lockdep warning and fetch vmalloc area of the - * offending address. - */ - lock_map_acquire_try(&vmalloc_map); - va = find_vm_area(addr); - lock_map_release(&vmalloc_map); - return va; -} - static void print_address_description(void *addr, u8 tag, struct kasan_report_info *info) { @@ -429,19 +399,10 @@ static void print_address_description(void *addr, u8 tag, } if (is_vmalloc_addr(addr)) { - struct vm_struct *va = kasan_find_vm_area(addr); - - if (va) { - pr_err("The buggy address belongs to the virtual mapping at\n" - " [%px, %px) created by:\n" - " %pS\n", - va->addr, va->addr + va->size, va->caller); - pr_err("\n"); - - page = vmalloc_to_page(addr); - } else { - pr_err("The buggy address %px belongs to a vmalloc virtual mapping\n", addr); - } + pr_err("The buggy address belongs to a"); + if (!vmalloc_dump_obj(addr)) + pr_cont(" vmalloc virtual mapping\n"); + page = vmalloc_to_page(addr); } if (page) { @@ -3669,10 +3669,10 @@ static ssize_t advisor_mode_show(struct kobject *kobj, { const char *output; - if (ksm_advisor == KSM_ADVISOR_NONE) - output = "[none] scan-time"; - else if (ksm_advisor == KSM_ADVISOR_SCAN_TIME) + if (ksm_advisor == KSM_ADVISOR_SCAN_TIME) output = "none [scan-time]"; + else + output = "[none] scan-time"; return sysfs_emit(buf, "%s\n", output); } diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 902da8a9c643..70fdeda1120b 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -474,8 +474,6 @@ static const unsigned int memcg_vm_event_stat[] = { NUMA_PAGE_MIGRATE, NUMA_PTE_UPDATES, NUMA_HINT_FAULTS, - NUMA_TASK_MIGRATE, - NUMA_TASK_SWAP, #endif }; diff --git a/mm/memory-failure.c b/mm/memory-failure.c index b91a33fb6c69..225dddff091d 100644 --- a/mm/memory-failure.c +++ b/mm/memory-failure.c @@ -1561,6 +1561,10 @@ static int get_hwpoison_page(struct page *p, unsigned long flags) return ret; } +/* + * The caller must guarantee the folio isn't large folio, except hugetlb. + * try_to_unmap() can't handle it. + */ int unmap_poisoned_folio(struct folio *folio, unsigned long pfn, bool must_kill) { enum ttu_flags ttu = TTU_IGNORE_MLOCK | TTU_SYNC | TTU_HWPOISON; diff --git a/mm/migrate.c b/mm/migrate.c index 8cf0f9c9599d..2c88f3b33833 100644 --- a/mm/migrate.c +++ b/mm/migrate.c @@ -2399,6 +2399,7 @@ set_status: static int get_compat_pages_array(const void __user *chunk_pages[], const void __user * __user *pages, + unsigned long chunk_offset, unsigned long chunk_nr) { compat_uptr_t __user *pages32 = (compat_uptr_t __user *)pages; @@ -2406,7 +2407,7 @@ static int get_compat_pages_array(const void __user *chunk_pages[], int i; for (i = 0; i < chunk_nr; i++) { - if (get_user(p, pages32 + i)) + if (get_user(p, pages32 + chunk_offset + i)) return -EFAULT; chunk_pages[i] = compat_ptr(p); } @@ -2425,27 +2426,28 @@ static int do_pages_stat(struct mm_struct *mm, unsigned long nr_pages, #define DO_PAGES_STAT_CHUNK_NR 16UL const void __user *chunk_pages[DO_PAGES_STAT_CHUNK_NR]; int chunk_status[DO_PAGES_STAT_CHUNK_NR]; + unsigned long chunk_offset = 0; while (nr_pages) { unsigned long chunk_nr = min(nr_pages, DO_PAGES_STAT_CHUNK_NR); if (in_compat_syscall()) { if (get_compat_pages_array(chunk_pages, pages, - chunk_nr)) + chunk_offset, chunk_nr)) break; } else { - if (copy_from_user(chunk_pages, pages, + if (copy_from_user(chunk_pages, pages + chunk_offset, chunk_nr * sizeof(*chunk_pages))) break; } do_pages_stat_array(mm, chunk_nr, chunk_pages, chunk_status); - if (copy_to_user(status, chunk_status, chunk_nr * sizeof(*status))) + if (copy_to_user(status + chunk_offset, chunk_status, + chunk_nr * sizeof(*status))) break; - pages += chunk_nr; - status += chunk_nr; + chunk_offset += chunk_nr; nr_pages -= chunk_nr; } return nr_pages ? -EFAULT : 0; diff --git a/mm/rmap.c b/mm/rmap.c index fb63d9256f09..1320b88fab74 100644 --- a/mm/rmap.c +++ b/mm/rmap.c @@ -1845,23 +1845,32 @@ void folio_remove_rmap_pud(struct folio *folio, struct page *page, #endif } -/* We support batch unmapping of PTEs for lazyfree large folios */ -static inline bool can_batch_unmap_folio_ptes(unsigned long addr, - struct folio *folio, pte_t *ptep) +static inline unsigned int folio_unmap_pte_batch(struct folio *folio, + struct page_vma_mapped_walk *pvmw, + enum ttu_flags flags, pte_t pte) { const fpb_t fpb_flags = FPB_IGNORE_DIRTY | FPB_IGNORE_SOFT_DIRTY; - int max_nr = folio_nr_pages(folio); - pte_t pte = ptep_get(ptep); + unsigned long end_addr, addr = pvmw->address; + struct vm_area_struct *vma = pvmw->vma; + unsigned int max_nr; + + if (flags & TTU_HWPOISON) + return 1; + if (!folio_test_large(folio)) + return 1; + /* We may only batch within a single VMA and a single page table. */ + end_addr = pmd_addr_end(addr, vma->vm_end); + max_nr = (end_addr - addr) >> PAGE_SHIFT; + + /* We only support lazyfree batching for now ... */ if (!folio_test_anon(folio) || folio_test_swapbacked(folio)) - return false; + return 1; if (pte_unused(pte)) - return false; - if (pte_pfn(pte) != folio_pfn(folio)) - return false; + return 1; - return folio_pte_batch(folio, addr, ptep, pte, max_nr, fpb_flags, NULL, - NULL, NULL) == max_nr; + return folio_pte_batch(folio, addr, pvmw->pte, pte, max_nr, fpb_flags, + NULL, NULL, NULL); } /* @@ -2024,9 +2033,7 @@ static bool try_to_unmap_one(struct folio *folio, struct vm_area_struct *vma, if (pte_dirty(pteval)) folio_mark_dirty(folio); } else if (likely(pte_present(pteval))) { - if (folio_test_large(folio) && !(flags & TTU_HWPOISON) && - can_batch_unmap_folio_ptes(address, folio, pvmw.pte)) - nr_pages = folio_nr_pages(folio); + nr_pages = folio_unmap_pte_batch(folio, &pvmw, flags, pteval); end_addr = address + nr_pages * PAGE_SIZE; flush_cache_range(vma, address, end_addr); @@ -2206,13 +2213,16 @@ discard: hugetlb_remove_rmap(folio); } else { folio_remove_rmap_ptes(folio, subpage, nr_pages, vma); - folio_ref_sub(folio, nr_pages - 1); } if (vma->vm_flags & VM_LOCKED) mlock_drain_local(); - folio_put(folio); - /* We have already batched the entire folio */ - if (nr_pages > 1) + folio_put_refs(folio, nr_pages); + + /* + * If we are sure that we batched the entire folio and cleared + * all PTEs, we can just optimize and stop right here. + */ + if (nr_pages == folio_nr_pages(folio)) goto walk_done; continue; walk_abort: diff --git a/mm/secretmem.c b/mm/secretmem.c index 9a11a38a6770..b7c5592d6711 100644 --- a/mm/secretmem.c +++ b/mm/secretmem.c @@ -201,7 +201,7 @@ static struct file *secretmem_file_create(unsigned long flags) return ERR_CAST(inode); file = alloc_file_pseudo(inode, secretmem_mnt, "secretmem", - O_RDWR, &secretmem_fops); + O_RDWR | O_LARGEFILE, &secretmem_fops); if (IS_ERR(file)) goto err_free_inode; @@ -215,6 +215,8 @@ static struct file *secretmem_file_create(unsigned long flags) inode->i_mode |= S_IFREG; inode->i_size = 0; + atomic_inc(&secretmem_users); + return file; err_free_inode: @@ -248,9 +250,6 @@ SYSCALL_DEFINE1(memfd_secret, unsigned int, flags) goto err_put_fd; } - file->f_flags |= O_LARGEFILE; - - atomic_inc(&secretmem_users); fd_install(fd, file); return fd; @@ -261,7 +260,15 @@ err_put_fd: static int secretmem_init_fs_context(struct fs_context *fc) { - return init_pseudo(fc, SECRETMEM_MAGIC) ? 0 : -ENOMEM; + struct pseudo_fs_context *ctx; + + ctx = init_pseudo(fc, SECRETMEM_MAGIC); + if (!ctx) + return -ENOMEM; + + fc->s_iflags |= SB_I_NOEXEC; + fc->s_iflags |= SB_I_NODEV; + return 0; } static struct file_system_type secretmem_fs = { @@ -279,9 +286,6 @@ static int __init secretmem_init(void) if (IS_ERR(secretmem_mnt)) return PTR_ERR(secretmem_mnt); - /* prevent secretmem mappings from ever getting PROT_EXEC */ - secretmem_mnt->mnt_flags |= MNT_NOEXEC; - return 0; } fs_initcall(secretmem_init); diff --git a/mm/shmem.c b/mm/shmem.c index 3a5a65b1f41a..cbcb573cde06 100644 --- a/mm/shmem.c +++ b/mm/shmem.c @@ -4984,7 +4984,6 @@ static void shmem_put_super(struct super_block *sb) static const struct dentry_operations shmem_ci_dentry_ops = { .d_hash = generic_ci_d_hash, .d_compare = generic_ci_d_compare, - .d_delete = always_delete_dentry, }; #endif @@ -5032,7 +5031,7 @@ static int shmem_fill_super(struct super_block *sb, struct fs_context *fc) if (ctx->encoding) { sb->s_encoding = ctx->encoding; - sb->s_d_op = &shmem_ci_dentry_ops; + set_default_d_op(sb, &shmem_ci_dentry_ops); if (ctx->strict_encoding) sb->s_encoding_flags = SB_ENC_STRICT_MODE_FL; } @@ -5041,6 +5040,7 @@ static int shmem_fill_super(struct super_block *sb, struct fs_context *fc) #else sb->s_flags |= SB_NOUSER; #endif /* CONFIG_TMPFS */ + sb->s_d_flags |= DCACHE_DONTCACHE; sbinfo->max_blocks = ctx->blocks; sbinfo->max_inodes = ctx->inodes; sbinfo->free_ispace = sbinfo->max_inodes * BOGO_INODE_SIZE; diff --git a/mm/vmalloc.c b/mm/vmalloc.c index ab986dd09b6a..6dbcdceecae1 100644 --- a/mm/vmalloc.c +++ b/mm/vmalloc.c @@ -514,6 +514,7 @@ static int vmap_pages_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end, pgprot_t prot, struct page **pages, int *nr, pgtbl_mod_mask *mask) { + int err = 0; pte_t *pte; /* @@ -530,12 +531,18 @@ static int vmap_pages_pte_range(pmd_t *pmd, unsigned long addr, do { struct page *page = pages[*nr]; - if (WARN_ON(!pte_none(ptep_get(pte)))) - return -EBUSY; - if (WARN_ON(!page)) - return -ENOMEM; - if (WARN_ON(!pfn_valid(page_to_pfn(page)))) - return -EINVAL; + if (WARN_ON(!pte_none(ptep_get(pte)))) { + err = -EBUSY; + break; + } + if (WARN_ON(!page)) { + err = -ENOMEM; + break; + } + if (WARN_ON(!pfn_valid(page_to_pfn(page)))) { + err = -EINVAL; + break; + } set_pte_at(&init_mm, addr, pte, mk_pte(page, prot)); (*nr)++; @@ -543,7 +550,8 @@ static int vmap_pages_pte_range(pmd_t *pmd, unsigned long addr, arch_leave_lazy_mmu_mode(); *mask |= PGTBL_PTE_MODIFIED; - return 0; + + return err; } static int vmap_pages_pmd_range(pud_t *pud, unsigned long addr, diff --git a/mm/vmscan.c b/mm/vmscan.c index f8dfd2864bbf..424412680cfc 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -1138,6 +1138,14 @@ retry: goto keep; if (folio_contain_hwpoisoned_page(folio)) { + /* + * unmap_poisoned_folio() can't handle large + * folio, just skip it. memory_failure() will + * handle it if the UCE is triggered again. + */ + if (folio_test_large(folio)) + goto keep_locked; + unmap_poisoned_folio(folio, folio_pfn(folio), false); folio_unlock(folio); folio_put(folio); diff --git a/mm/vmstat.c b/mm/vmstat.c index 429ae5339bfe..a78d70ddeacd 100644 --- a/mm/vmstat.c +++ b/mm/vmstat.c @@ -1346,8 +1346,6 @@ const char * const vmstat_text[] = { "numa_hint_faults", "numa_hint_faults_local", "numa_pages_migrated", - "numa_task_migrated", - "numa_task_swapped", #endif #ifdef CONFIG_MIGRATION "pgmigrate_success", diff --git a/mm/zsmalloc.c b/mm/zsmalloc.c index 999b513c7fdf..f3e2215f95eb 100644 --- a/mm/zsmalloc.c +++ b/mm/zsmalloc.c @@ -1043,6 +1043,9 @@ static struct zspage *alloc_zspage(struct zs_pool *pool, if (!zspage) return NULL; + if (!IS_ENABLED(CONFIG_COMPACTION)) + gfp &= ~__GFP_MOVABLE; + zspage->magic = ZSPAGE_MAGIC; zspage->pool = pool; zspage->class = class->index; diff --git a/net/8021q/vlan.c b/net/8021q/vlan.c index 06908e37c3d9..9a6df8c1daf9 100644 --- a/net/8021q/vlan.c +++ b/net/8021q/vlan.c @@ -357,6 +357,35 @@ static int __vlan_device_event(struct net_device *dev, unsigned long event) return err; } +static void vlan_vid0_add(struct net_device *dev) +{ + struct vlan_info *vlan_info; + int err; + + if (!(dev->features & NETIF_F_HW_VLAN_CTAG_FILTER)) + return; + + pr_info("adding VLAN 0 to HW filter on device %s\n", dev->name); + + err = vlan_vid_add(dev, htons(ETH_P_8021Q), 0); + if (err) + return; + + vlan_info = rtnl_dereference(dev->vlan_info); + vlan_info->auto_vid0 = true; +} + +static void vlan_vid0_del(struct net_device *dev) +{ + struct vlan_info *vlan_info = rtnl_dereference(dev->vlan_info); + + if (!vlan_info || !vlan_info->auto_vid0) + return; + + vlan_info->auto_vid0 = false; + vlan_vid_del(dev, htons(ETH_P_8021Q), 0); +} + static int vlan_device_event(struct notifier_block *unused, unsigned long event, void *ptr) { @@ -378,15 +407,10 @@ static int vlan_device_event(struct notifier_block *unused, unsigned long event, return notifier_from_errno(err); } - if ((event == NETDEV_UP) && - (dev->features & NETIF_F_HW_VLAN_CTAG_FILTER)) { - pr_info("adding VLAN 0 to HW filter on device %s\n", - dev->name); - vlan_vid_add(dev, htons(ETH_P_8021Q), 0); - } - if (event == NETDEV_DOWN && - (dev->features & NETIF_F_HW_VLAN_CTAG_FILTER)) - vlan_vid_del(dev, htons(ETH_P_8021Q), 0); + if (event == NETDEV_UP) + vlan_vid0_add(dev); + else if (event == NETDEV_DOWN) + vlan_vid0_del(dev); vlan_info = rtnl_dereference(dev->vlan_info); if (!vlan_info) diff --git a/net/8021q/vlan.h b/net/8021q/vlan.h index 5eaf38875554..c7ffe591d593 100644 --- a/net/8021q/vlan.h +++ b/net/8021q/vlan.h @@ -33,6 +33,7 @@ struct vlan_info { struct vlan_group grp; struct list_head vid_list; unsigned int nr_vids; + bool auto_vid0; struct rcu_head rcu; }; diff --git a/net/appletalk/aarp.c b/net/appletalk/aarp.c index 9c787e2e4b17..4744e3fd4544 100644 --- a/net/appletalk/aarp.c +++ b/net/appletalk/aarp.c @@ -35,6 +35,7 @@ #include <linux/seq_file.h> #include <linux/export.h> #include <linux/etherdevice.h> +#include <linux/refcount.h> int sysctl_aarp_expiry_time = AARP_EXPIRY_TIME; int sysctl_aarp_tick_time = AARP_TICK_TIME; @@ -44,6 +45,7 @@ int sysctl_aarp_resolve_time = AARP_RESOLVE_TIME; /* Lists of aarp entries */ /** * struct aarp_entry - AARP entry + * @refcnt: Reference count * @last_sent: Last time we xmitted the aarp request * @packet_queue: Queue of frames wait for resolution * @status: Used for proxy AARP @@ -55,6 +57,7 @@ int sysctl_aarp_resolve_time = AARP_RESOLVE_TIME; * @next: Next entry in chain */ struct aarp_entry { + refcount_t refcnt; /* These first two are only used for unresolved entries */ unsigned long last_sent; struct sk_buff_head packet_queue; @@ -79,6 +82,17 @@ static DEFINE_RWLOCK(aarp_lock); /* Used to walk the list and purge/kick entries. */ static struct timer_list aarp_timer; +static inline void aarp_entry_get(struct aarp_entry *a) +{ + refcount_inc(&a->refcnt); +} + +static inline void aarp_entry_put(struct aarp_entry *a) +{ + if (refcount_dec_and_test(&a->refcnt)) + kfree(a); +} + /* * Delete an aarp queue * @@ -87,7 +101,7 @@ static struct timer_list aarp_timer; static void __aarp_expire(struct aarp_entry *a) { skb_queue_purge(&a->packet_queue); - kfree(a); + aarp_entry_put(a); } /* @@ -380,9 +394,11 @@ static void aarp_purge(void) static struct aarp_entry *aarp_alloc(void) { struct aarp_entry *a = kmalloc(sizeof(*a), GFP_ATOMIC); + if (!a) + return NULL; - if (a) - skb_queue_head_init(&a->packet_queue); + refcount_set(&a->refcnt, 1); + skb_queue_head_init(&a->packet_queue); return a; } @@ -477,6 +493,7 @@ int aarp_proxy_probe_network(struct atalk_iface *atif, struct atalk_addr *sa) entry->dev = atif->dev; write_lock_bh(&aarp_lock); + aarp_entry_get(entry); hash = sa->s_node % (AARP_HASH_SIZE - 1); entry->next = proxies[hash]; @@ -502,6 +519,7 @@ int aarp_proxy_probe_network(struct atalk_iface *atif, struct atalk_addr *sa) retval = 1; } + aarp_entry_put(entry); write_unlock_bh(&aarp_lock); out: return retval; diff --git a/net/appletalk/ddp.c b/net/appletalk/ddp.c index 73ea7e67f05a..30242fe10341 100644 --- a/net/appletalk/ddp.c +++ b/net/appletalk/ddp.c @@ -576,6 +576,7 @@ static int atrtr_create(struct rtentry *r, struct net_device *devhint) /* Fill in the routing entry */ rt->target = ta->sat_addr; + dev_put(rt->dev); /* Release old device */ dev_hold(devhint); rt->dev = devhint; rt->flags = r->rt_flags; diff --git a/net/atm/clip.c b/net/atm/clip.c index b234dc3bcb0d..f7a5565e794e 100644 --- a/net/atm/clip.c +++ b/net/atm/clip.c @@ -45,7 +45,8 @@ #include <net/atmclip.h> static struct net_device *clip_devs; -static struct atm_vcc *atmarpd; +static struct atm_vcc __rcu *atmarpd; +static DEFINE_MUTEX(atmarpd_lock); static struct timer_list idle_timer; static const struct neigh_ops clip_neigh_ops; @@ -53,24 +54,35 @@ static int to_atmarpd(enum atmarp_ctrl_type type, int itf, __be32 ip) { struct sock *sk; struct atmarp_ctrl *ctrl; + struct atm_vcc *vcc; struct sk_buff *skb; + int err = 0; pr_debug("(%d)\n", type); - if (!atmarpd) - return -EUNATCH; + + rcu_read_lock(); + vcc = rcu_dereference(atmarpd); + if (!vcc) { + err = -EUNATCH; + goto unlock; + } skb = alloc_skb(sizeof(struct atmarp_ctrl), GFP_ATOMIC); - if (!skb) - return -ENOMEM; + if (!skb) { + err = -ENOMEM; + goto unlock; + } ctrl = skb_put(skb, sizeof(struct atmarp_ctrl)); ctrl->type = type; ctrl->itf_num = itf; ctrl->ip = ip; - atm_force_charge(atmarpd, skb->truesize); + atm_force_charge(vcc, skb->truesize); - sk = sk_atm(atmarpd); + sk = sk_atm(vcc); skb_queue_tail(&sk->sk_receive_queue, skb); sk->sk_data_ready(sk); - return 0; +unlock: + rcu_read_unlock(); + return err; } static void link_vcc(struct clip_vcc *clip_vcc, struct atmarp_entry *entry) @@ -417,6 +429,8 @@ static int clip_mkip(struct atm_vcc *vcc, int timeout) if (!vcc->push) return -EBADFD; + if (vcc->user_back) + return -EINVAL; clip_vcc = kmalloc(sizeof(struct clip_vcc), GFP_KERNEL); if (!clip_vcc) return -ENOMEM; @@ -607,17 +621,27 @@ static void atmarpd_close(struct atm_vcc *vcc) { pr_debug("\n"); - rtnl_lock(); - atmarpd = NULL; + mutex_lock(&atmarpd_lock); + RCU_INIT_POINTER(atmarpd, NULL); + mutex_unlock(&atmarpd_lock); + + synchronize_rcu(); skb_queue_purge(&sk_atm(vcc)->sk_receive_queue); - rtnl_unlock(); pr_debug("(done)\n"); module_put(THIS_MODULE); } +static int atmarpd_send(struct atm_vcc *vcc, struct sk_buff *skb) +{ + atm_return_tx(vcc, skb); + dev_kfree_skb_any(skb); + return 0; +} + static const struct atmdev_ops atmarpd_dev_ops = { - .close = atmarpd_close + .close = atmarpd_close, + .send = atmarpd_send }; @@ -631,15 +655,18 @@ static struct atm_dev atmarpd_dev = { static int atm_init_atmarp(struct atm_vcc *vcc) { - rtnl_lock(); + if (vcc->push == clip_push) + return -EINVAL; + + mutex_lock(&atmarpd_lock); if (atmarpd) { - rtnl_unlock(); + mutex_unlock(&atmarpd_lock); return -EADDRINUSE; } mod_timer(&idle_timer, jiffies + CLIP_CHECK_INTERVAL * HZ); - atmarpd = vcc; + rcu_assign_pointer(atmarpd, vcc); set_bit(ATM_VF_META, &vcc->flags); set_bit(ATM_VF_READY, &vcc->flags); /* allow replies and avoid getting closed if signaling dies */ @@ -648,13 +675,14 @@ static int atm_init_atmarp(struct atm_vcc *vcc) vcc->push = NULL; vcc->pop = NULL; /* crash */ vcc->push_oam = NULL; /* crash */ - rtnl_unlock(); + mutex_unlock(&atmarpd_lock); return 0; } static int clip_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) { struct atm_vcc *vcc = ATM_SD(sock); + struct sock *sk = sock->sk; int err = 0; switch (cmd) { @@ -675,14 +703,18 @@ static int clip_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) err = clip_create(arg); break; case ATMARPD_CTRL: + lock_sock(sk); err = atm_init_atmarp(vcc); if (!err) { sock->state = SS_CONNECTED; __module_get(THIS_MODULE); } + release_sock(sk); break; case ATMARP_MKIP: + lock_sock(sk); err = clip_mkip(vcc, arg); + release_sock(sk); break; case ATMARP_SETENTRY: err = clip_setentry(vcc, (__force __be32)arg); diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c index 14d7221b8ac0..441cb1700f99 100644 --- a/net/bluetooth/hci_core.c +++ b/net/bluetooth/hci_core.c @@ -2654,7 +2654,7 @@ int hci_register_dev(struct hci_dev *hdev) /* Devices that are marked for raw-only usage are unconfigured * and should not be included in normal operation. */ - if (test_bit(HCI_QUIRK_RAW_DEVICE, &hdev->quirks)) + if (hci_test_quirk(hdev, HCI_QUIRK_RAW_DEVICE)) hci_dev_set_flag(hdev, HCI_UNCONFIGURED); /* Mark Remote Wakeup connection flag as supported if driver has wakeup @@ -2784,7 +2784,7 @@ int hci_register_suspend_notifier(struct hci_dev *hdev) int ret = 0; if (!hdev->suspend_notifier.notifier_call && - !test_bit(HCI_QUIRK_NO_SUSPEND_NOTIFIER, &hdev->quirks)) { + !hci_test_quirk(hdev, HCI_QUIRK_NO_SUSPEND_NOTIFIER)) { hdev->suspend_notifier.notifier_call = hci_suspend_notifier; ret = register_pm_notifier(&hdev->suspend_notifier); } diff --git a/net/bluetooth/hci_debugfs.c b/net/bluetooth/hci_debugfs.c index f625074d1f00..99e2e9fc70e8 100644 --- a/net/bluetooth/hci_debugfs.c +++ b/net/bluetooth/hci_debugfs.c @@ -38,7 +38,7 @@ static ssize_t __name ## _read(struct file *file, \ struct hci_dev *hdev = file->private_data; \ char buf[3]; \ \ - buf[0] = test_bit(__quirk, &hdev->quirks) ? 'Y' : 'N'; \ + buf[0] = test_bit(__quirk, hdev->quirk_flags) ? 'Y' : 'N'; \ buf[1] = '\n'; \ buf[2] = '\0'; \ return simple_read_from_buffer(user_buf, count, ppos, buf, 2); \ @@ -59,10 +59,10 @@ static ssize_t __name ## _write(struct file *file, \ if (err) \ return err; \ \ - if (enable == test_bit(__quirk, &hdev->quirks)) \ + if (enable == test_bit(__quirk, hdev->quirk_flags)) \ return -EALREADY; \ \ - change_bit(__quirk, &hdev->quirks); \ + change_bit(__quirk, hdev->quirk_flags); \ \ return count; \ } \ @@ -1356,7 +1356,7 @@ static ssize_t vendor_diag_write(struct file *file, const char __user *user_buf, * for the vendor callback. Instead just store the desired value and * the setting will be programmed when the controller gets powered on. */ - if (test_bit(HCI_QUIRK_NON_PERSISTENT_DIAG, &hdev->quirks) && + if (hci_test_quirk(hdev, HCI_QUIRK_NON_PERSISTENT_DIAG) && (!test_bit(HCI_RUNNING, &hdev->flags) || hci_dev_test_flag(hdev, HCI_USER_CHANNEL))) goto done; diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c index 4d5ace9d245d..cf4b30ac9e0e 100644 --- a/net/bluetooth/hci_event.c +++ b/net/bluetooth/hci_event.c @@ -908,8 +908,8 @@ static u8 hci_cc_read_local_ext_features(struct hci_dev *hdev, void *data, return rp->status; if (hdev->max_page < rp->max_page) { - if (test_bit(HCI_QUIRK_BROKEN_LOCAL_EXT_FEATURES_PAGE_2, - &hdev->quirks)) + if (hci_test_quirk(hdev, + HCI_QUIRK_BROKEN_LOCAL_EXT_FEATURES_PAGE_2)) bt_dev_warn(hdev, "broken local ext features page 2"); else hdev->max_page = rp->max_page; @@ -936,7 +936,7 @@ static u8 hci_cc_read_buffer_size(struct hci_dev *hdev, void *data, hdev->acl_pkts = __le16_to_cpu(rp->acl_max_pkt); hdev->sco_pkts = __le16_to_cpu(rp->sco_max_pkt); - if (test_bit(HCI_QUIRK_FIXUP_BUFFER_SIZE, &hdev->quirks)) { + if (hci_test_quirk(hdev, HCI_QUIRK_FIXUP_BUFFER_SIZE)) { hdev->sco_mtu = 64; hdev->sco_pkts = 8; } @@ -2971,7 +2971,7 @@ static void hci_inquiry_complete_evt(struct hci_dev *hdev, void *data, * state to indicate completion. */ if (!hci_dev_test_flag(hdev, HCI_LE_SCAN) || - !test_bit(HCI_QUIRK_SIMULTANEOUS_DISCOVERY, &hdev->quirks)) + !hci_test_quirk(hdev, HCI_QUIRK_SIMULTANEOUS_DISCOVERY)) hci_discovery_set_state(hdev, DISCOVERY_STOPPED); goto unlock; } @@ -2990,7 +2990,7 @@ static void hci_inquiry_complete_evt(struct hci_dev *hdev, void *data, * state to indicate completion. */ if (!hci_dev_test_flag(hdev, HCI_LE_SCAN) || - !test_bit(HCI_QUIRK_SIMULTANEOUS_DISCOVERY, &hdev->quirks)) + !hci_test_quirk(hdev, HCI_QUIRK_SIMULTANEOUS_DISCOVERY)) hci_discovery_set_state(hdev, DISCOVERY_STOPPED); } @@ -3614,8 +3614,7 @@ static void hci_encrypt_change_evt(struct hci_dev *hdev, void *data, /* We skip the WRITE_AUTH_PAYLOAD_TIMEOUT for ATS2851 based controllers * to avoid unexpected SMP command errors when pairing. */ - if (test_bit(HCI_QUIRK_BROKEN_WRITE_AUTH_PAYLOAD_TIMEOUT, - &hdev->quirks)) + if (hci_test_quirk(hdev, HCI_QUIRK_BROKEN_WRITE_AUTH_PAYLOAD_TIMEOUT)) goto notify; /* Set the default Authenticated Payload Timeout after @@ -5914,7 +5913,7 @@ static struct hci_conn *check_pending_le_conn(struct hci_dev *hdev, * while we have an existing one in peripheral role. */ if (hdev->conn_hash.le_num_peripheral > 0 && - (test_bit(HCI_QUIRK_BROKEN_LE_STATES, &hdev->quirks) || + (hci_test_quirk(hdev, HCI_QUIRK_BROKEN_LE_STATES) || !(hdev->le_states[3] & 0x10))) return NULL; @@ -6310,8 +6309,8 @@ static void hci_le_ext_adv_report_evt(struct hci_dev *hdev, void *data, evt_type = __le16_to_cpu(info->type) & LE_EXT_ADV_EVT_TYPE_MASK; legacy_evt_type = ext_evt_type_to_legacy(hdev, evt_type); - if (test_bit(HCI_QUIRK_FIXUP_LE_EXT_ADV_REPORT_PHY, - &hdev->quirks)) { + if (hci_test_quirk(hdev, + HCI_QUIRK_FIXUP_LE_EXT_ADV_REPORT_PHY)) { info->primary_phy &= 0x1f; info->secondary_phy &= 0x1f; } @@ -6966,7 +6965,10 @@ static void hci_le_big_sync_established_evt(struct hci_dev *hdev, void *data, bis->iso_qos.bcast.in.sdu = le16_to_cpu(ev->max_pdu); if (!ev->status) { + bis->state = BT_CONNECTED; set_bit(HCI_CONN_BIG_SYNC, &bis->flags); + hci_debugfs_create_conn(bis); + hci_conn_add_sysfs(bis); hci_iso_setup_path(bis); } } diff --git a/net/bluetooth/hci_sync.c b/net/bluetooth/hci_sync.c index 77b3691f3423..7938c004071c 100644 --- a/net/bluetooth/hci_sync.c +++ b/net/bluetooth/hci_sync.c @@ -393,7 +393,7 @@ static void le_scan_disable(struct work_struct *work) if (hdev->discovery.type != DISCOV_TYPE_INTERLEAVED) goto _return; - if (test_bit(HCI_QUIRK_SIMULTANEOUS_DISCOVERY, &hdev->quirks)) { + if (hci_test_quirk(hdev, HCI_QUIRK_SIMULTANEOUS_DISCOVERY)) { if (!test_bit(HCI_INQUIRY, &hdev->flags) && hdev->discovery.state != DISCOVERY_RESOLVING) goto discov_stopped; @@ -1345,7 +1345,7 @@ int hci_setup_ext_adv_instance_sync(struct hci_dev *hdev, u8 instance) * Command Disallowed error, so we must first disable the * instance if it is active. */ - if (adv && !adv->pending) { + if (adv) { err = hci_disable_ext_adv_instance_sync(hdev, instance); if (err) return err; @@ -3587,7 +3587,7 @@ static void hci_dev_get_bd_addr_from_property(struct hci_dev *hdev) if (ret < 0 || !bacmp(&ba, BDADDR_ANY)) return; - if (test_bit(HCI_QUIRK_BDADDR_PROPERTY_BROKEN, &hdev->quirks)) + if (hci_test_quirk(hdev, HCI_QUIRK_BDADDR_PROPERTY_BROKEN)) baswap(&hdev->public_addr, &ba); else bacpy(&hdev->public_addr, &ba); @@ -3662,7 +3662,7 @@ static int hci_init0_sync(struct hci_dev *hdev) bt_dev_dbg(hdev, ""); /* Reset */ - if (!test_bit(HCI_QUIRK_RESET_ON_CLOSE, &hdev->quirks)) { + if (!hci_test_quirk(hdev, HCI_QUIRK_RESET_ON_CLOSE)) { err = hci_reset_sync(hdev); if (err) return err; @@ -3675,7 +3675,7 @@ static int hci_unconf_init_sync(struct hci_dev *hdev) { int err; - if (test_bit(HCI_QUIRK_RAW_DEVICE, &hdev->quirks)) + if (hci_test_quirk(hdev, HCI_QUIRK_RAW_DEVICE)) return 0; err = hci_init0_sync(hdev); @@ -3718,7 +3718,7 @@ static int hci_read_local_cmds_sync(struct hci_dev *hdev) * supported commands. */ if (hdev->hci_ver > BLUETOOTH_VER_1_1 && - !test_bit(HCI_QUIRK_BROKEN_LOCAL_COMMANDS, &hdev->quirks)) + !hci_test_quirk(hdev, HCI_QUIRK_BROKEN_LOCAL_COMMANDS)) return __hci_cmd_sync_status(hdev, HCI_OP_READ_LOCAL_COMMANDS, 0, NULL, HCI_CMD_TIMEOUT); @@ -3732,7 +3732,7 @@ static int hci_init1_sync(struct hci_dev *hdev) bt_dev_dbg(hdev, ""); /* Reset */ - if (!test_bit(HCI_QUIRK_RESET_ON_CLOSE, &hdev->quirks)) { + if (!hci_test_quirk(hdev, HCI_QUIRK_RESET_ON_CLOSE)) { err = hci_reset_sync(hdev); if (err) return err; @@ -3795,7 +3795,7 @@ static int hci_set_event_filter_sync(struct hci_dev *hdev, u8 flt_type, if (!hci_dev_test_flag(hdev, HCI_BREDR_ENABLED)) return 0; - if (test_bit(HCI_QUIRK_BROKEN_FILTER_CLEAR_ALL, &hdev->quirks)) + if (hci_test_quirk(hdev, HCI_QUIRK_BROKEN_FILTER_CLEAR_ALL)) return 0; memset(&cp, 0, sizeof(cp)); @@ -3822,7 +3822,7 @@ static int hci_clear_event_filter_sync(struct hci_dev *hdev) * a hci_set_event_filter_sync() call succeeds, but we do * the check both for parity and as a future reminder. */ - if (test_bit(HCI_QUIRK_BROKEN_FILTER_CLEAR_ALL, &hdev->quirks)) + if (hci_test_quirk(hdev, HCI_QUIRK_BROKEN_FILTER_CLEAR_ALL)) return 0; return hci_set_event_filter_sync(hdev, HCI_FLT_CLEAR_ALL, 0x00, @@ -3846,7 +3846,7 @@ static int hci_write_sync_flowctl_sync(struct hci_dev *hdev) /* Check if the controller supports SCO and HCI_OP_WRITE_SYNC_FLOWCTL */ if (!lmp_sco_capable(hdev) || !(hdev->commands[10] & BIT(4)) || - !test_bit(HCI_QUIRK_SYNC_FLOWCTL_SUPPORTED, &hdev->quirks)) + !hci_test_quirk(hdev, HCI_QUIRK_SYNC_FLOWCTL_SUPPORTED)) return 0; memset(&cp, 0, sizeof(cp)); @@ -3921,7 +3921,7 @@ static int hci_write_inquiry_mode_sync(struct hci_dev *hdev) u8 mode; if (!lmp_inq_rssi_capable(hdev) && - !test_bit(HCI_QUIRK_FIXUP_INQUIRY_MODE, &hdev->quirks)) + !hci_test_quirk(hdev, HCI_QUIRK_FIXUP_INQUIRY_MODE)) return 0; /* If Extended Inquiry Result events are supported, then @@ -4111,7 +4111,7 @@ static int hci_set_event_mask_sync(struct hci_dev *hdev) } if (lmp_inq_rssi_capable(hdev) || - test_bit(HCI_QUIRK_FIXUP_INQUIRY_MODE, &hdev->quirks)) + hci_test_quirk(hdev, HCI_QUIRK_FIXUP_INQUIRY_MODE)) events[4] |= 0x02; /* Inquiry Result with RSSI */ if (lmp_ext_feat_capable(hdev)) @@ -4163,7 +4163,7 @@ static int hci_read_stored_link_key_sync(struct hci_dev *hdev) struct hci_cp_read_stored_link_key cp; if (!(hdev->commands[6] & 0x20) || - test_bit(HCI_QUIRK_BROKEN_STORED_LINK_KEY, &hdev->quirks)) + hci_test_quirk(hdev, HCI_QUIRK_BROKEN_STORED_LINK_KEY)) return 0; memset(&cp, 0, sizeof(cp)); @@ -4212,7 +4212,7 @@ static int hci_read_def_err_data_reporting_sync(struct hci_dev *hdev) { if (!(hdev->commands[18] & 0x04) || !(hdev->features[0][6] & LMP_ERR_DATA_REPORTING) || - test_bit(HCI_QUIRK_BROKEN_ERR_DATA_REPORTING, &hdev->quirks)) + hci_test_quirk(hdev, HCI_QUIRK_BROKEN_ERR_DATA_REPORTING)) return 0; return __hci_cmd_sync_status(hdev, HCI_OP_READ_DEF_ERR_DATA_REPORTING, @@ -4226,7 +4226,7 @@ static int hci_read_page_scan_type_sync(struct hci_dev *hdev) * this command in the bit mask of supported commands. */ if (!(hdev->commands[13] & 0x01) || - test_bit(HCI_QUIRK_BROKEN_READ_PAGE_SCAN_TYPE, &hdev->quirks)) + hci_test_quirk(hdev, HCI_QUIRK_BROKEN_READ_PAGE_SCAN_TYPE)) return 0; return __hci_cmd_sync_status(hdev, HCI_OP_READ_PAGE_SCAN_TYPE, @@ -4421,7 +4421,7 @@ static int hci_le_read_adv_tx_power_sync(struct hci_dev *hdev) static int hci_le_read_tx_power_sync(struct hci_dev *hdev) { if (!(hdev->commands[38] & 0x80) || - test_bit(HCI_QUIRK_BROKEN_READ_TRANSMIT_POWER, &hdev->quirks)) + hci_test_quirk(hdev, HCI_QUIRK_BROKEN_READ_TRANSMIT_POWER)) return 0; return __hci_cmd_sync_status(hdev, HCI_OP_LE_READ_TRANSMIT_POWER, @@ -4464,7 +4464,7 @@ static int hci_le_set_rpa_timeout_sync(struct hci_dev *hdev) __le16 timeout = cpu_to_le16(hdev->rpa_timeout); if (!(hdev->commands[35] & 0x04) || - test_bit(HCI_QUIRK_BROKEN_SET_RPA_TIMEOUT, &hdev->quirks)) + hci_test_quirk(hdev, HCI_QUIRK_BROKEN_SET_RPA_TIMEOUT)) return 0; return __hci_cmd_sync_status(hdev, HCI_OP_LE_SET_RPA_TIMEOUT, @@ -4609,7 +4609,7 @@ static int hci_delete_stored_link_key_sync(struct hci_dev *hdev) * just disable this command. */ if (!(hdev->commands[6] & 0x80) || - test_bit(HCI_QUIRK_BROKEN_STORED_LINK_KEY, &hdev->quirks)) + hci_test_quirk(hdev, HCI_QUIRK_BROKEN_STORED_LINK_KEY)) return 0; memset(&cp, 0, sizeof(cp)); @@ -4735,7 +4735,7 @@ static int hci_set_err_data_report_sync(struct hci_dev *hdev) if (!(hdev->commands[18] & 0x08) || !(hdev->features[0][6] & LMP_ERR_DATA_REPORTING) || - test_bit(HCI_QUIRK_BROKEN_ERR_DATA_REPORTING, &hdev->quirks)) + hci_test_quirk(hdev, HCI_QUIRK_BROKEN_ERR_DATA_REPORTING)) return 0; if (enabled == hdev->err_data_reporting) @@ -4948,7 +4948,7 @@ static int hci_dev_setup_sync(struct hci_dev *hdev) size_t i; if (!hci_dev_test_flag(hdev, HCI_SETUP) && - !test_bit(HCI_QUIRK_NON_PERSISTENT_SETUP, &hdev->quirks)) + !hci_test_quirk(hdev, HCI_QUIRK_NON_PERSISTENT_SETUP)) return 0; bt_dev_dbg(hdev, ""); @@ -4959,7 +4959,7 @@ static int hci_dev_setup_sync(struct hci_dev *hdev) ret = hdev->setup(hdev); for (i = 0; i < ARRAY_SIZE(hci_broken_table); i++) { - if (test_bit(hci_broken_table[i].quirk, &hdev->quirks)) + if (hci_test_quirk(hdev, hci_broken_table[i].quirk)) bt_dev_warn(hdev, "%s", hci_broken_table[i].desc); } @@ -4967,10 +4967,10 @@ static int hci_dev_setup_sync(struct hci_dev *hdev) * BD_ADDR invalid before creating the HCI device or in * its setup callback. */ - invalid_bdaddr = test_bit(HCI_QUIRK_INVALID_BDADDR, &hdev->quirks) || - test_bit(HCI_QUIRK_USE_BDADDR_PROPERTY, &hdev->quirks); + invalid_bdaddr = hci_test_quirk(hdev, HCI_QUIRK_INVALID_BDADDR) || + hci_test_quirk(hdev, HCI_QUIRK_USE_BDADDR_PROPERTY); if (!ret) { - if (test_bit(HCI_QUIRK_USE_BDADDR_PROPERTY, &hdev->quirks) && + if (hci_test_quirk(hdev, HCI_QUIRK_USE_BDADDR_PROPERTY) && !bacmp(&hdev->public_addr, BDADDR_ANY)) hci_dev_get_bd_addr_from_property(hdev); @@ -4992,7 +4992,7 @@ static int hci_dev_setup_sync(struct hci_dev *hdev) * In case any of them is set, the controller has to * start up as unconfigured. */ - if (test_bit(HCI_QUIRK_EXTERNAL_CONFIG, &hdev->quirks) || + if (hci_test_quirk(hdev, HCI_QUIRK_EXTERNAL_CONFIG) || invalid_bdaddr) hci_dev_set_flag(hdev, HCI_UNCONFIGURED); @@ -5052,7 +5052,7 @@ static int hci_dev_init_sync(struct hci_dev *hdev) * then they need to be reprogrammed after the init procedure * completed. */ - if (test_bit(HCI_QUIRK_NON_PERSISTENT_DIAG, &hdev->quirks) && + if (hci_test_quirk(hdev, HCI_QUIRK_NON_PERSISTENT_DIAG) && !hci_dev_test_flag(hdev, HCI_USER_CHANNEL) && hci_dev_test_flag(hdev, HCI_VENDOR_DIAG) && hdev->set_diag) ret = hdev->set_diag(hdev, true); @@ -5309,7 +5309,7 @@ int hci_dev_close_sync(struct hci_dev *hdev) /* Reset device */ skb_queue_purge(&hdev->cmd_q); atomic_set(&hdev->cmd_cnt, 1); - if (test_bit(HCI_QUIRK_RESET_ON_CLOSE, &hdev->quirks) && + if (hci_test_quirk(hdev, HCI_QUIRK_RESET_ON_CLOSE) && !auto_off && !hci_dev_test_flag(hdev, HCI_UNCONFIGURED)) { set_bit(HCI_INIT, &hdev->flags); hci_reset_sync(hdev); @@ -5493,7 +5493,7 @@ static int hci_disconnect_sync(struct hci_dev *hdev, struct hci_conn *conn, { struct hci_cp_disconnect cp; - if (test_bit(HCI_CONN_BIG_CREATED, &conn->flags)) { + if (conn->type == BIS_LINK) { /* This is a BIS connection, hci_conn_del will * do the necessary cleanup. */ @@ -5959,7 +5959,7 @@ static int hci_active_scan_sync(struct hci_dev *hdev, uint16_t interval) own_addr_type = ADDR_LE_DEV_PUBLIC; if (hci_is_adv_monitoring(hdev) || - (test_bit(HCI_QUIRK_STRICT_DUPLICATE_FILTER, &hdev->quirks) && + (hci_test_quirk(hdev, HCI_QUIRK_STRICT_DUPLICATE_FILTER) && hdev->discovery.result_filtering)) { /* Duplicate filter should be disabled when some advertisement * monitor is activated, otherwise AdvMon can only receive one @@ -6022,8 +6022,7 @@ int hci_start_discovery_sync(struct hci_dev *hdev) * and LE scanning are done sequentially with separate * timeouts. */ - if (test_bit(HCI_QUIRK_SIMULTANEOUS_DISCOVERY, - &hdev->quirks)) { + if (hci_test_quirk(hdev, HCI_QUIRK_SIMULTANEOUS_DISCOVERY)) { timeout = msecs_to_jiffies(DISCOV_LE_TIMEOUT); /* During simultaneous discovery, we double LE scan * interval. We must leave some time for the controller @@ -6100,7 +6099,7 @@ static int hci_update_event_filter_sync(struct hci_dev *hdev) /* Some fake CSR controllers lock up after setting this type of * filter, so avoid sending the request altogether. */ - if (test_bit(HCI_QUIRK_BROKEN_FILTER_CLEAR_ALL, &hdev->quirks)) + if (hci_test_quirk(hdev, HCI_QUIRK_BROKEN_FILTER_CLEAR_ALL)) return 0; /* Always clear event filter when starting */ @@ -6815,8 +6814,8 @@ int hci_get_random_address(struct hci_dev *hdev, bool require_privacy, return 0; } - /* No privacy so use a public address. */ - *own_addr_type = ADDR_LE_DEV_PUBLIC; + /* No privacy, use the current address */ + hci_copy_identity_address(hdev, rand_addr, own_addr_type); return 0; } diff --git a/net/bluetooth/l2cap_core.c b/net/bluetooth/l2cap_core.c index 40daa38276f3..805c752ac0a9 100644 --- a/net/bluetooth/l2cap_core.c +++ b/net/bluetooth/l2cap_core.c @@ -3520,12 +3520,28 @@ done: /* Configure output options and let the other side know * which ones we don't like. */ - /* If MTU is not provided in configure request, use the most recently - * explicitly or implicitly accepted value for the other direction, - * or the default value. + /* If MTU is not provided in configure request, try adjusting it + * to the current output MTU if it has been set + * + * Bluetooth Core 6.1, Vol 3, Part A, Section 4.5 + * + * Each configuration parameter value (if any is present) in an + * L2CAP_CONFIGURATION_RSP packet reflects an ‘adjustment’ to a + * configuration parameter value that has been sent (or, in case + * of default values, implied) in the corresponding + * L2CAP_CONFIGURATION_REQ packet. */ - if (mtu == 0) - mtu = chan->imtu ? chan->imtu : L2CAP_DEFAULT_MTU; + if (!mtu) { + /* Only adjust for ERTM channels as for older modes the + * remote stack may not be able to detect that the + * adjustment causing it to silently drop packets. + */ + if (chan->mode == L2CAP_MODE_ERTM && + chan->omtu && chan->omtu != L2CAP_DEFAULT_MTU) + mtu = chan->omtu; + else + mtu = L2CAP_DEFAULT_MTU; + } if (mtu < L2CAP_DEFAULT_MIN_MTU) result = L2CAP_CONF_UNACCEPT; diff --git a/net/bluetooth/l2cap_sock.c b/net/bluetooth/l2cap_sock.c index 5aa55fa69594..82d943c4cb50 100644 --- a/net/bluetooth/l2cap_sock.c +++ b/net/bluetooth/l2cap_sock.c @@ -1703,6 +1703,9 @@ static void l2cap_sock_resume_cb(struct l2cap_chan *chan) { struct sock *sk = chan->data; + if (!sk) + return; + if (test_and_clear_bit(FLAG_PENDING_SECURITY, &chan->flags)) { sk->sk_state = BT_CONNECTED; chan->state = BT_CONNECTED; diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c index 1485b455ade4..63dba0503653 100644 --- a/net/bluetooth/mgmt.c +++ b/net/bluetooth/mgmt.c @@ -464,7 +464,7 @@ static int read_index_list(struct sock *sk, struct hci_dev *hdev, void *data, /* Devices marked as raw-only are neither configured * nor unconfigured controllers. */ - if (test_bit(HCI_QUIRK_RAW_DEVICE, &d->quirks)) + if (hci_test_quirk(d, HCI_QUIRK_RAW_DEVICE)) continue; if (!hci_dev_test_flag(d, HCI_UNCONFIGURED)) { @@ -522,7 +522,7 @@ static int read_unconf_index_list(struct sock *sk, struct hci_dev *hdev, /* Devices marked as raw-only are neither configured * nor unconfigured controllers. */ - if (test_bit(HCI_QUIRK_RAW_DEVICE, &d->quirks)) + if (hci_test_quirk(d, HCI_QUIRK_RAW_DEVICE)) continue; if (hci_dev_test_flag(d, HCI_UNCONFIGURED)) { @@ -576,7 +576,7 @@ static int read_ext_index_list(struct sock *sk, struct hci_dev *hdev, /* Devices marked as raw-only are neither configured * nor unconfigured controllers. */ - if (test_bit(HCI_QUIRK_RAW_DEVICE, &d->quirks)) + if (hci_test_quirk(d, HCI_QUIRK_RAW_DEVICE)) continue; if (hci_dev_test_flag(d, HCI_UNCONFIGURED)) @@ -612,12 +612,12 @@ static int read_ext_index_list(struct sock *sk, struct hci_dev *hdev, static bool is_configured(struct hci_dev *hdev) { - if (test_bit(HCI_QUIRK_EXTERNAL_CONFIG, &hdev->quirks) && + if (hci_test_quirk(hdev, HCI_QUIRK_EXTERNAL_CONFIG) && !hci_dev_test_flag(hdev, HCI_EXT_CONFIGURED)) return false; - if ((test_bit(HCI_QUIRK_INVALID_BDADDR, &hdev->quirks) || - test_bit(HCI_QUIRK_USE_BDADDR_PROPERTY, &hdev->quirks)) && + if ((hci_test_quirk(hdev, HCI_QUIRK_INVALID_BDADDR) || + hci_test_quirk(hdev, HCI_QUIRK_USE_BDADDR_PROPERTY)) && !bacmp(&hdev->public_addr, BDADDR_ANY)) return false; @@ -628,12 +628,12 @@ static __le32 get_missing_options(struct hci_dev *hdev) { u32 options = 0; - if (test_bit(HCI_QUIRK_EXTERNAL_CONFIG, &hdev->quirks) && + if (hci_test_quirk(hdev, HCI_QUIRK_EXTERNAL_CONFIG) && !hci_dev_test_flag(hdev, HCI_EXT_CONFIGURED)) options |= MGMT_OPTION_EXTERNAL_CONFIG; - if ((test_bit(HCI_QUIRK_INVALID_BDADDR, &hdev->quirks) || - test_bit(HCI_QUIRK_USE_BDADDR_PROPERTY, &hdev->quirks)) && + if ((hci_test_quirk(hdev, HCI_QUIRK_INVALID_BDADDR) || + hci_test_quirk(hdev, HCI_QUIRK_USE_BDADDR_PROPERTY)) && !bacmp(&hdev->public_addr, BDADDR_ANY)) options |= MGMT_OPTION_PUBLIC_ADDRESS; @@ -669,7 +669,7 @@ static int read_config_info(struct sock *sk, struct hci_dev *hdev, memset(&rp, 0, sizeof(rp)); rp.manufacturer = cpu_to_le16(hdev->manufacturer); - if (test_bit(HCI_QUIRK_EXTERNAL_CONFIG, &hdev->quirks)) + if (hci_test_quirk(hdev, HCI_QUIRK_EXTERNAL_CONFIG)) options |= MGMT_OPTION_EXTERNAL_CONFIG; if (hdev->set_bdaddr) @@ -828,8 +828,7 @@ static u32 get_supported_settings(struct hci_dev *hdev) if (lmp_sc_capable(hdev)) settings |= MGMT_SETTING_SECURE_CONN; - if (test_bit(HCI_QUIRK_WIDEBAND_SPEECH_SUPPORTED, - &hdev->quirks)) + if (hci_test_quirk(hdev, HCI_QUIRK_WIDEBAND_SPEECH_SUPPORTED)) settings |= MGMT_SETTING_WIDEBAND_SPEECH; } @@ -841,8 +840,7 @@ static u32 get_supported_settings(struct hci_dev *hdev) settings |= MGMT_SETTING_ADVERTISING; } - if (test_bit(HCI_QUIRK_EXTERNAL_CONFIG, &hdev->quirks) || - hdev->set_bdaddr) + if (hci_test_quirk(hdev, HCI_QUIRK_EXTERNAL_CONFIG) || hdev->set_bdaddr) settings |= MGMT_SETTING_CONFIGURATION; if (cis_central_capable(hdev)) @@ -4307,7 +4305,7 @@ static int set_wideband_speech(struct sock *sk, struct hci_dev *hdev, bt_dev_dbg(hdev, "sock %p", sk); - if (!test_bit(HCI_QUIRK_WIDEBAND_SPEECH_SUPPORTED, &hdev->quirks)) + if (!hci_test_quirk(hdev, HCI_QUIRK_WIDEBAND_SPEECH_SUPPORTED)) return mgmt_cmd_status(sk, hdev->id, MGMT_OP_SET_WIDEBAND_SPEECH, MGMT_STATUS_NOT_SUPPORTED); @@ -7935,7 +7933,7 @@ static int set_external_config(struct sock *sk, struct hci_dev *hdev, return mgmt_cmd_status(sk, hdev->id, MGMT_OP_SET_EXTERNAL_CONFIG, MGMT_STATUS_INVALID_PARAMS); - if (!test_bit(HCI_QUIRK_EXTERNAL_CONFIG, &hdev->quirks)) + if (!hci_test_quirk(hdev, HCI_QUIRK_EXTERNAL_CONFIG)) return mgmt_cmd_status(sk, hdev->id, MGMT_OP_SET_EXTERNAL_CONFIG, MGMT_STATUS_NOT_SUPPORTED); @@ -9338,7 +9336,7 @@ void mgmt_index_added(struct hci_dev *hdev) { struct mgmt_ev_ext_index ev; - if (test_bit(HCI_QUIRK_RAW_DEVICE, &hdev->quirks)) + if (hci_test_quirk(hdev, HCI_QUIRK_RAW_DEVICE)) return; if (hci_dev_test_flag(hdev, HCI_UNCONFIGURED)) { @@ -9362,7 +9360,7 @@ void mgmt_index_removed(struct hci_dev *hdev) struct mgmt_ev_ext_index ev; struct cmd_lookup match = { NULL, hdev, MGMT_STATUS_INVALID_INDEX }; - if (test_bit(HCI_QUIRK_RAW_DEVICE, &hdev->quirks)) + if (hci_test_quirk(hdev, HCI_QUIRK_RAW_DEVICE)) return; mgmt_pending_foreach(0, hdev, true, cmd_complete_rsp, &match); @@ -10089,7 +10087,7 @@ static bool is_filter_match(struct hci_dev *hdev, s8 rssi, u8 *eir, if (hdev->discovery.rssi != HCI_RSSI_INVALID && (rssi == HCI_RSSI_INVALID || (rssi < hdev->discovery.rssi && - !test_bit(HCI_QUIRK_STRICT_DUPLICATE_FILTER, &hdev->quirks)))) + !hci_test_quirk(hdev, HCI_QUIRK_STRICT_DUPLICATE_FILTER)))) return false; if (hdev->discovery.uuid_count != 0) { @@ -10107,7 +10105,7 @@ static bool is_filter_match(struct hci_dev *hdev, s8 rssi, u8 *eir, /* If duplicate filtering does not report RSSI changes, then restart * scanning to ensure updated result with updated RSSI values. */ - if (test_bit(HCI_QUIRK_STRICT_DUPLICATE_FILTER, &hdev->quirks)) { + if (hci_test_quirk(hdev, HCI_QUIRK_STRICT_DUPLICATE_FILTER)) { /* Validate RSSI value against the RSSI threshold once more. */ if (hdev->discovery.rssi != HCI_RSSI_INVALID && rssi < hdev->discovery.rssi) diff --git a/net/bluetooth/msft.c b/net/bluetooth/msft.c index 5a8ccc491b14..c560d8467669 100644 --- a/net/bluetooth/msft.c +++ b/net/bluetooth/msft.c @@ -989,7 +989,7 @@ static void msft_monitor_device_evt(struct hci_dev *hdev, struct sk_buff *skb) handle_data = msft_find_handle_data(hdev, ev->monitor_handle, false); - if (!test_bit(HCI_QUIRK_USE_MSFT_EXT_ADDRESS_FILTER, &hdev->quirks)) { + if (!hci_test_quirk(hdev, HCI_QUIRK_USE_MSFT_EXT_ADDRESS_FILTER)) { if (!handle_data) return; mgmt_handle = handle_data->mgmt_handle; diff --git a/net/bluetooth/smp.c b/net/bluetooth/smp.c index 47f359f24d1f..8115d42fc15b 100644 --- a/net/bluetooth/smp.c +++ b/net/bluetooth/smp.c @@ -1379,7 +1379,7 @@ static void smp_timeout(struct work_struct *work) bt_dev_dbg(conn->hcon->hdev, "conn %p", conn); - hci_disconnect(conn->hcon, HCI_ERROR_REMOTE_USER_TERM); + hci_disconnect(conn->hcon, HCI_ERROR_AUTH_FAILURE); } static struct smp_chan *smp_chan_create(struct l2cap_conn *conn) @@ -2977,8 +2977,25 @@ static int smp_sig_channel(struct l2cap_chan *chan, struct sk_buff *skb) if (code > SMP_CMD_MAX) goto drop; - if (smp && !test_and_clear_bit(code, &smp->allow_cmd)) + if (smp && !test_and_clear_bit(code, &smp->allow_cmd)) { + /* If there is a context and the command is not allowed consider + * it a failure so the session is cleanup properly. + */ + switch (code) { + case SMP_CMD_IDENT_INFO: + case SMP_CMD_IDENT_ADDR_INFO: + case SMP_CMD_SIGN_INFO: + /* 3.6.1. Key distribution and generation + * + * A device may reject a distributed key by sending the + * Pairing Failed command with the reason set to + * "Key Rejected". + */ + smp_failure(conn, SMP_KEY_REJECTED); + break; + } goto drop; + } /* If we don't have a context the only allowed commands are * pairing request and security request. diff --git a/net/bluetooth/smp.h b/net/bluetooth/smp.h index 87a59ec2c9f0..c5da53dfab04 100644 --- a/net/bluetooth/smp.h +++ b/net/bluetooth/smp.h @@ -138,6 +138,7 @@ struct smp_cmd_keypress_notify { #define SMP_NUMERIC_COMP_FAILED 0x0c #define SMP_BREDR_PAIRING_IN_PROGRESS 0x0d #define SMP_CROSS_TRANSP_NOT_ALLOWED 0x0e +#define SMP_KEY_REJECTED 0x0f #define SMP_MIN_ENC_KEY_SIZE 7 #define SMP_MAX_ENC_KEY_SIZE 16 diff --git a/net/bridge/br_switchdev.c b/net/bridge/br_switchdev.c index 95d7355a0407..9a910cf0256e 100644 --- a/net/bridge/br_switchdev.c +++ b/net/bridge/br_switchdev.c @@ -17,6 +17,9 @@ static bool nbp_switchdev_can_offload_tx_fwd(const struct net_bridge_port *p, if (!static_branch_unlikely(&br_switchdev_tx_fwd_offload)) return false; + if (br_multicast_igmp_type(skb)) + return false; + return (p->flags & BR_TX_FWD_OFFLOAD) && (p->hwdom != BR_INPUT_SKB_CB(skb)->src_hwdom); } diff --git a/net/ipv4/ipcomp.c b/net/ipv4/ipcomp.c index 5a4fb2539b08..9a45aed508d1 100644 --- a/net/ipv4/ipcomp.c +++ b/net/ipv4/ipcomp.c @@ -54,6 +54,7 @@ static int ipcomp4_err(struct sk_buff *skb, u32 info) } /* We always hold one tunnel user reference to indicate a tunnel */ +static struct lock_class_key xfrm_state_lock_key; static struct xfrm_state *ipcomp_tunnel_create(struct xfrm_state *x) { struct net *net = xs_net(x); @@ -62,6 +63,7 @@ static struct xfrm_state *ipcomp_tunnel_create(struct xfrm_state *x) t = xfrm_state_alloc(net); if (!t) goto out; + lockdep_set_class(&t->lock, &xfrm_state_lock_key); t->id.proto = IPPROTO_IPIP; t->id.spi = x->props.saddr.a4; diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index f64f8276a73c..461a9ab540af 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -1176,7 +1176,7 @@ restart: goto do_error; while (msg_data_left(msg)) { - ssize_t copy = 0; + int copy = 0; skb = tcp_write_queue_tail(sk); if (skb) diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 12c2e6fc85c6..68bc79eb9019 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -5181,7 +5181,9 @@ end: skb_condense(skb); skb_set_owner_r(skb, sk); } - tcp_rcvbuf_grow(sk); + /* do not grow rcvbuf for not-yet-accepted or orphaned sockets. */ + if (sk->sk_socket) + tcp_rcvbuf_grow(sk); } static int __must_check tcp_queue_rcv(struct sock *sk, struct sk_buff *skb, diff --git a/net/ipv4/tcp_offload.c b/net/ipv4/tcp_offload.c index d293087b426d..be5c2294610e 100644 --- a/net/ipv4/tcp_offload.c +++ b/net/ipv4/tcp_offload.c @@ -359,6 +359,7 @@ struct sk_buff *tcp_gro_receive(struct list_head *head, struct sk_buff *skb, flush |= skb->ip_summed != p->ip_summed; flush |= skb->csum_level != p->csum_level; flush |= NAPI_GRO_CB(p)->count >= 64; + skb_set_network_header(skb, skb_gro_receive_network_offset(skb)); if (flush || skb_gro_receive_list(p, skb)) mss = 1; diff --git a/net/ipv4/udp_offload.c b/net/ipv4/udp_offload.c index 85b5aa82d7d7..e0a6bfa95118 100644 --- a/net/ipv4/udp_offload.c +++ b/net/ipv4/udp_offload.c @@ -767,6 +767,7 @@ static struct sk_buff *udp_gro_receive_segment(struct list_head *head, NAPI_GRO_CB(skb)->flush = 1; return NULL; } + skb_set_network_header(skb, skb_gro_receive_network_offset(skb)); ret = skb_gro_receive_list(p, skb); } else { skb_gro_postpull_rcsum(skb, uh, diff --git a/net/ipv4/xfrm4_input.c b/net/ipv4/xfrm4_input.c index 0d31a8c108d4..f28cfd88eaf5 100644 --- a/net/ipv4/xfrm4_input.c +++ b/net/ipv4/xfrm4_input.c @@ -202,6 +202,9 @@ struct sk_buff *xfrm4_gro_udp_encap_rcv(struct sock *sk, struct list_head *head, if (len <= sizeof(struct ip_esp_hdr) || udpdata32[0] == 0) goto out; + /* set the transport header to ESP */ + skb_set_transport_header(skb, offset); + NAPI_GRO_CB(skb)->proto = IPPROTO_UDP; pp = call_gro_receive(ops->callbacks.gro_receive, head, skb); diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index ba2ec7c870cc..870a0bd6c2ba 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -3525,11 +3525,9 @@ static void addrconf_gre_config(struct net_device *dev) ASSERT_RTNL(); - idev = ipv6_find_idev(dev); - if (IS_ERR(idev)) { - pr_debug("%s: add_dev failed\n", __func__); + idev = addrconf_add_dev(dev); + if (IS_ERR(idev)) return; - } /* Generate the IPv6 link-local address using addrconf_addr_gen(), * unless we have an IPv4 GRE device not bound to an IP address and @@ -3543,9 +3541,6 @@ static void addrconf_gre_config(struct net_device *dev) } add_v4_addrs(idev); - - if (dev->flags & IFF_POINTOPOINT) - addrconf_add_mroute(dev); } #endif diff --git a/net/ipv6/ipcomp6.c b/net/ipv6/ipcomp6.c index 72d4858dec18..8607569de34f 100644 --- a/net/ipv6/ipcomp6.c +++ b/net/ipv6/ipcomp6.c @@ -71,6 +71,7 @@ static int ipcomp6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, return 0; } +static struct lock_class_key xfrm_state_lock_key; static struct xfrm_state *ipcomp6_tunnel_create(struct xfrm_state *x) { struct net *net = xs_net(x); @@ -79,6 +80,7 @@ static struct xfrm_state *ipcomp6_tunnel_create(struct xfrm_state *x) t = xfrm_state_alloc(net); if (!t) goto out; + lockdep_set_class(&t->lock, &xfrm_state_lock_key); t->id.proto = IPPROTO_IPV6; t->id.spi = xfrm6_tunnel_alloc_spi(net, (xfrm_address_t *)&x->props.saddr); diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c index 65831b4fee1f..616bf4c0c8fd 100644 --- a/net/ipv6/mcast.c +++ b/net/ipv6/mcast.c @@ -807,8 +807,8 @@ static void mld_del_delrec(struct inet6_dev *idev, struct ifmcaddr6 *im) } else { im->mca_crcount = idev->mc_qrv; } - in6_dev_put(pmc->idev); ip6_mc_clear_src(pmc); + in6_dev_put(pmc->idev); kfree_rcu(pmc, rcu); } } diff --git a/net/ipv6/rpl_iptunnel.c b/net/ipv6/rpl_iptunnel.c index 7c05ac846646..eccfa4203e96 100644 --- a/net/ipv6/rpl_iptunnel.c +++ b/net/ipv6/rpl_iptunnel.c @@ -129,13 +129,13 @@ static int rpl_do_srh_inline(struct sk_buff *skb, const struct rpl_lwt *rlwt, struct dst_entry *cache_dst) { struct ipv6_rpl_sr_hdr *isrh, *csrh; - const struct ipv6hdr *oldhdr; + struct ipv6hdr oldhdr; struct ipv6hdr *hdr; unsigned char *buf; size_t hdrlen; int err; - oldhdr = ipv6_hdr(skb); + memcpy(&oldhdr, ipv6_hdr(skb), sizeof(oldhdr)); buf = kcalloc(struct_size(srh, segments.addr, srh->segments_left), 2, GFP_ATOMIC); if (!buf) @@ -147,7 +147,7 @@ static int rpl_do_srh_inline(struct sk_buff *skb, const struct rpl_lwt *rlwt, memcpy(isrh, srh, sizeof(*isrh)); memcpy(isrh->rpl_segaddr, &srh->rpl_segaddr[1], (srh->segments_left - 1) * 16); - isrh->rpl_segaddr[srh->segments_left - 1] = oldhdr->daddr; + isrh->rpl_segaddr[srh->segments_left - 1] = oldhdr.daddr; ipv6_rpl_srh_compress(csrh, isrh, &srh->rpl_segaddr[0], isrh->segments_left - 1); @@ -169,7 +169,7 @@ static int rpl_do_srh_inline(struct sk_buff *skb, const struct rpl_lwt *rlwt, skb_mac_header_rebuild(skb); hdr = ipv6_hdr(skb); - memmove(hdr, oldhdr, sizeof(*hdr)); + memmove(hdr, &oldhdr, sizeof(*hdr)); isrh = (void *)hdr + sizeof(*hdr); memcpy(isrh, csrh, hdrlen); diff --git a/net/ipv6/xfrm6_input.c b/net/ipv6/xfrm6_input.c index 841c81abaaf4..9005fc156a20 100644 --- a/net/ipv6/xfrm6_input.c +++ b/net/ipv6/xfrm6_input.c @@ -202,6 +202,9 @@ struct sk_buff *xfrm6_gro_udp_encap_rcv(struct sock *sk, struct list_head *head, if (len <= sizeof(struct ip_esp_hdr) || udpdata32[0] == 0) goto out; + /* set the transport header to ESP */ + skb_set_transport_header(skb, offset); + NAPI_GRO_CB(skb)->proto = IPPROTO_UDP; pp = call_gro_receive(ops->callbacks.gro_receive, head, skb); diff --git a/net/ipv6/xfrm6_tunnel.c b/net/ipv6/xfrm6_tunnel.c index bf140ef781c1..5120a763da0d 100644 --- a/net/ipv6/xfrm6_tunnel.c +++ b/net/ipv6/xfrm6_tunnel.c @@ -334,8 +334,8 @@ static void __net_exit xfrm6_tunnel_net_exit(struct net *net) struct xfrm6_tunnel_net *xfrm6_tn = xfrm6_tunnel_pernet(net); unsigned int i; + xfrm_state_flush(net, IPSEC_PROTO_ANY, false); xfrm_flush_gc(); - xfrm_state_flush(net, 0, false, true); for (i = 0; i < XFRM6_TUNNEL_SPI_BYADDR_HSIZE; i++) WARN_ON_ONCE(!hlist_empty(&xfrm6_tn->spi_byaddr[i])); diff --git a/net/key/af_key.c b/net/key/af_key.c index efc2a91f4c48..b5d761700776 100644 --- a/net/key/af_key.c +++ b/net/key/af_key.c @@ -1766,7 +1766,7 @@ static int pfkey_flush(struct sock *sk, struct sk_buff *skb, const struct sadb_m if (proto == 0) return -EINVAL; - err = xfrm_state_flush(net, proto, true, false); + err = xfrm_state_flush(net, proto, true); err2 = unicast_flush_resp(sk, hdr); if (err || err2) { if (err == -ESRCH) /* empty table - go quietly */ diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c index d9d88f2f2831..954795b0fe48 100644 --- a/net/mac80211/cfg.c +++ b/net/mac80211/cfg.c @@ -1959,6 +1959,20 @@ static int sta_link_apply_parameters(struct ieee80211_local *local, ieee80211_sta_init_nss(link_sta); if (params->opmode_notif_used) { + enum nl80211_chan_width width = link->conf->chanreq.oper.width; + + switch (width) { + case NL80211_CHAN_WIDTH_20: + case NL80211_CHAN_WIDTH_40: + case NL80211_CHAN_WIDTH_80: + case NL80211_CHAN_WIDTH_160: + case NL80211_CHAN_WIDTH_80P80: + case NL80211_CHAN_WIDTH_320: /* not VHT, allowed for HE/EHT */ + break; + default: + return -EINVAL; + } + /* returned value is only needed for rc update, but the * rc isn't initialized here yet, so ignore it */ diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c index 7c27f3cd841c..c01634fdba78 100644 --- a/net/mac80211/iface.c +++ b/net/mac80211/iface.c @@ -1150,6 +1150,8 @@ static void ieee80211_sdata_init(struct ieee80211_local *local, { sdata->local = local; + INIT_LIST_HEAD(&sdata->key_list); + /* * Initialize the default link, so we can use link_id 0 for non-MLD, * and that continues to work for non-MLD-aware drivers that use just @@ -2210,8 +2212,6 @@ int ieee80211_if_add(struct ieee80211_local *local, const char *name, ieee80211_init_frag_cache(&sdata->frags); - INIT_LIST_HEAD(&sdata->key_list); - wiphy_delayed_work_init(&sdata->dec_tailroom_needed_wk, ieee80211_delayed_tailroom_dec); diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index 2d46d4af60d7..0ed68182f79b 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -3934,6 +3934,9 @@ static void ieee80211_set_disassoc(struct ieee80211_sub_if_data *sdata, lockdep_assert_wiphy(local->hw.wiphy); + if (frame_buf) + memset(frame_buf, 0, IEEE80211_DEAUTH_FRAME_LEN); + if (WARN_ON(!ap_sta)) return; @@ -7195,6 +7198,7 @@ static void ieee80211_rx_mgmt_beacon(struct ieee80211_link_data *link, struct ieee80211_bss_conf *bss_conf = link->conf; struct ieee80211_vif_cfg *vif_cfg = &sdata->vif.cfg; struct ieee80211_mgmt *mgmt = (void *) hdr; + struct ieee80211_ext *ext = NULL; size_t baselen; struct ieee802_11_elems *elems; struct ieee80211_local *local = sdata->local; @@ -7220,7 +7224,7 @@ static void ieee80211_rx_mgmt_beacon(struct ieee80211_link_data *link, /* Process beacon from the current BSS */ bssid = ieee80211_get_bssid(hdr, len, sdata->vif.type); if (ieee80211_is_s1g_beacon(mgmt->frame_control)) { - struct ieee80211_ext *ext = (void *) mgmt; + ext = (void *)mgmt; variable = ext->u.s1g_beacon.variable + ieee80211_s1g_optional_len(ext->frame_control); } @@ -7407,7 +7411,9 @@ static void ieee80211_rx_mgmt_beacon(struct ieee80211_link_data *link, } if ((ncrc == link->u.mgd.beacon_crc && link->u.mgd.beacon_crc_valid) || - ieee80211_is_s1g_short_beacon(mgmt->frame_control)) + (ext && ieee80211_is_s1g_short_beacon(ext->frame_control, + parse_params.start, + parse_params.len))) goto free; link->u.mgd.beacon_crc = ncrc; link->u.mgd.beacon_crc_valid = true; @@ -10699,8 +10705,8 @@ static void ieee80211_ml_epcs(struct ieee80211_sub_if_data *sdata, */ for_each_mle_subelement(sub, (const u8 *)elems->ml_epcs, elems->ml_epcs_len) { + struct ieee802_11_elems *link_elems __free(kfree) = NULL; struct ieee80211_link_data *link; - struct ieee802_11_elems *link_elems __free(kfree); u8 *pos = (void *)sub->data; u16 control; ssize_t len; diff --git a/net/mac80211/parse.c b/net/mac80211/parse.c index 96584b39215e..c5e0f7f46004 100644 --- a/net/mac80211/parse.c +++ b/net/mac80211/parse.c @@ -758,7 +758,6 @@ static size_t ieee802_11_find_bssid_profile(const u8 *start, size_t len, { const struct element *elem, *sub; size_t profile_len = 0; - bool found = false; if (!bss || !bss->transmitted_bss) return profile_len; @@ -809,15 +808,14 @@ static size_t ieee802_11_find_bssid_profile(const u8 *start, size_t len, index[2], new_bssid); if (ether_addr_equal(new_bssid, bss->bssid)) { - found = true; elems->bssid_index_len = index[1]; elems->bssid_index = (void *)&index[2]; - break; + return profile_len; } } } - return found ? profile_len : 0; + return 0; } static void diff --git a/net/mac80211/util.c b/net/mac80211/util.c index a125995ed252..e66da651678a 100644 --- a/net/mac80211/util.c +++ b/net/mac80211/util.c @@ -2144,11 +2144,6 @@ int ieee80211_reconfig(struct ieee80211_local *local) cfg80211_sched_scan_stopped_locked(local->hw.wiphy, 0); wake_up: - - if (local->virt_monitors > 0 && - local->virt_monitors == local->open_count) - ieee80211_add_virtual_monitor(local); - /* * Clear the WLAN_STA_BLOCK_BA flag so new aggregation * sessions can be established after a resume. @@ -2202,6 +2197,10 @@ int ieee80211_reconfig(struct ieee80211_local *local) } } + if (local->virt_monitors > 0 && + local->virt_monitors == local->open_count) + ieee80211_add_virtual_monitor(local); + if (!suspended) return 0; diff --git a/net/mptcp/options.c b/net/mptcp/options.c index 421ced031289..1f898888b223 100644 --- a/net/mptcp/options.c +++ b/net/mptcp/options.c @@ -978,8 +978,9 @@ static bool check_fully_established(struct mptcp_sock *msk, struct sock *ssk, if (subflow->mp_join) goto reset; subflow->mp_capable = 0; + if (!mptcp_try_fallback(ssk)) + goto reset; pr_fallback(msk); - mptcp_do_fallback(ssk); return false; } diff --git a/net/mptcp/pm.c b/net/mptcp/pm.c index feb01747d7d8..420d416e2603 100644 --- a/net/mptcp/pm.c +++ b/net/mptcp/pm.c @@ -765,8 +765,14 @@ void mptcp_pm_mp_fail_received(struct sock *sk, u64 fail_seq) pr_debug("fail_seq=%llu\n", fail_seq); - if (!READ_ONCE(msk->allow_infinite_fallback)) + /* After accepting the fail, we can't create any other subflows */ + spin_lock_bh(&msk->fallback_lock); + if (!msk->allow_infinite_fallback) { + spin_unlock_bh(&msk->fallback_lock); return; + } + msk->allow_subflows = false; + spin_unlock_bh(&msk->fallback_lock); if (!subflow->fail_tout) { pr_debug("send MP_FAIL response and infinite map\n"); diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c index edf14c2c2062..6a817a13b154 100644 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@ -560,10 +560,9 @@ static bool mptcp_check_data_fin(struct sock *sk) static void mptcp_dss_corruption(struct mptcp_sock *msk, struct sock *ssk) { - if (READ_ONCE(msk->allow_infinite_fallback)) { + if (mptcp_try_fallback(ssk)) { MPTCP_INC_STATS(sock_net(ssk), MPTCP_MIB_DSSCORRUPTIONFALLBACK); - mptcp_do_fallback(ssk); } else { MPTCP_INC_STATS(sock_net(ssk), MPTCP_MIB_DSSCORRUPTIONRESET); mptcp_subflow_reset(ssk); @@ -792,7 +791,7 @@ void mptcp_data_ready(struct sock *sk, struct sock *ssk) static void mptcp_subflow_joined(struct mptcp_sock *msk, struct sock *ssk) { mptcp_subflow_ctx(ssk)->map_seq = READ_ONCE(msk->ack_seq); - WRITE_ONCE(msk->allow_infinite_fallback, false); + msk->allow_infinite_fallback = false; mptcp_event(MPTCP_EVENT_SUB_ESTABLISHED, msk, ssk, GFP_ATOMIC); } @@ -803,6 +802,14 @@ static bool __mptcp_finish_join(struct mptcp_sock *msk, struct sock *ssk) if (sk->sk_state != TCP_ESTABLISHED) return false; + spin_lock_bh(&msk->fallback_lock); + if (!msk->allow_subflows) { + spin_unlock_bh(&msk->fallback_lock); + return false; + } + mptcp_subflow_joined(msk, ssk); + spin_unlock_bh(&msk->fallback_lock); + /* attach to msk socket only after we are sure we will deal with it * at close time */ @@ -811,7 +818,6 @@ static bool __mptcp_finish_join(struct mptcp_sock *msk, struct sock *ssk) mptcp_subflow_ctx(ssk)->subflow_id = msk->subflow_id++; mptcp_sockopt_sync_locked(msk, ssk); - mptcp_subflow_joined(msk, ssk); mptcp_stop_tout_timer(sk); __mptcp_propagate_sndbuf(sk, ssk); return true; @@ -1136,10 +1142,14 @@ static void mptcp_update_infinite_map(struct mptcp_sock *msk, mpext->infinite_map = 1; mpext->data_len = 0; + if (!mptcp_try_fallback(ssk)) { + mptcp_subflow_reset(ssk); + return; + } + MPTCP_INC_STATS(sock_net(ssk), MPTCP_MIB_INFINITEMAPTX); mptcp_subflow_ctx(ssk)->send_infinite_map = 0; pr_fallback(msk); - mptcp_do_fallback(ssk); } #define MPTCP_MAX_GSO_SIZE (GSO_LEGACY_MAX_SIZE - (MAX_TCP_HEADER + 1)) @@ -2543,9 +2553,9 @@ static void mptcp_check_fastclose(struct mptcp_sock *msk) static void __mptcp_retrans(struct sock *sk) { + struct mptcp_sendmsg_info info = { .data_lock_held = true, }; struct mptcp_sock *msk = mptcp_sk(sk); struct mptcp_subflow_context *subflow; - struct mptcp_sendmsg_info info = {}; struct mptcp_data_frag *dfrag; struct sock *ssk; int ret, err; @@ -2590,6 +2600,18 @@ static void __mptcp_retrans(struct sock *sk) info.sent = 0; info.limit = READ_ONCE(msk->csum_enabled) ? dfrag->data_len : dfrag->already_sent; + + /* + * make the whole retrans decision, xmit, disallow + * fallback atomic + */ + spin_lock_bh(&msk->fallback_lock); + if (__mptcp_check_fallback(msk)) { + spin_unlock_bh(&msk->fallback_lock); + release_sock(ssk); + return; + } + while (info.sent < info.limit) { ret = mptcp_sendmsg_frag(sk, ssk, dfrag, &info); if (ret <= 0) @@ -2603,8 +2625,9 @@ static void __mptcp_retrans(struct sock *sk) len = max(copied, len); tcp_push(ssk, 0, info.mss_now, tcp_sk(ssk)->nonagle, info.size_goal); - WRITE_ONCE(msk->allow_infinite_fallback, false); + msk->allow_infinite_fallback = false; } + spin_unlock_bh(&msk->fallback_lock); release_sock(ssk); } @@ -2730,7 +2753,8 @@ static void __mptcp_init_sock(struct sock *sk) WRITE_ONCE(msk->first, NULL); inet_csk(sk)->icsk_sync_mss = mptcp_sync_mss; WRITE_ONCE(msk->csum_enabled, mptcp_is_checksum_enabled(sock_net(sk))); - WRITE_ONCE(msk->allow_infinite_fallback, true); + msk->allow_infinite_fallback = true; + msk->allow_subflows = true; msk->recovery = false; msk->subflow_id = 1; msk->last_data_sent = tcp_jiffies32; @@ -2738,6 +2762,7 @@ static void __mptcp_init_sock(struct sock *sk) msk->last_ack_recv = tcp_jiffies32; mptcp_pm_data_init(msk); + spin_lock_init(&msk->fallback_lock); /* re-use the csk retrans timer for MPTCP-level retrans */ timer_setup(&msk->sk.icsk_retransmit_timer, mptcp_retransmit_timer, 0); @@ -3117,7 +3142,16 @@ static int mptcp_disconnect(struct sock *sk, int flags) * subflow */ mptcp_destroy_common(msk, MPTCP_CF_FASTCLOSE); + + /* The first subflow is already in TCP_CLOSE status, the following + * can't overlap with a fallback anymore + */ + spin_lock_bh(&msk->fallback_lock); + msk->allow_subflows = true; + msk->allow_infinite_fallback = true; WRITE_ONCE(msk->flags, 0); + spin_unlock_bh(&msk->fallback_lock); + msk->cb_flags = 0; msk->recovery = false; WRITE_ONCE(msk->can_ack, false); @@ -3524,7 +3558,13 @@ bool mptcp_finish_join(struct sock *ssk) /* active subflow, already present inside the conn_list */ if (!list_empty(&subflow->node)) { + spin_lock_bh(&msk->fallback_lock); + if (!msk->allow_subflows) { + spin_unlock_bh(&msk->fallback_lock); + return false; + } mptcp_subflow_joined(msk, ssk); + spin_unlock_bh(&msk->fallback_lock); mptcp_propagate_sndbuf(parent, ssk); return true; } diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h index 3dd11dd3ba16..6ec245fd2778 100644 --- a/net/mptcp/protocol.h +++ b/net/mptcp/protocol.h @@ -346,10 +346,16 @@ struct mptcp_sock { u64 rtt_us; /* last maximum rtt of subflows */ } rcvq_space; u8 scaling_ratio; + bool allow_subflows; u32 subflow_id; u32 setsockopt_seq; char ca_name[TCP_CA_NAME_MAX]; + + spinlock_t fallback_lock; /* protects fallback, + * allow_infinite_fallback and + * allow_join + */ }; #define mptcp_data_lock(sk) spin_lock_bh(&(sk)->sk_lock.slock) @@ -1216,15 +1222,22 @@ static inline bool mptcp_check_fallback(const struct sock *sk) return __mptcp_check_fallback(msk); } -static inline void __mptcp_do_fallback(struct mptcp_sock *msk) +static inline bool __mptcp_try_fallback(struct mptcp_sock *msk) { if (__mptcp_check_fallback(msk)) { pr_debug("TCP fallback already done (msk=%p)\n", msk); - return; + return true; } - if (WARN_ON_ONCE(!READ_ONCE(msk->allow_infinite_fallback))) - return; + spin_lock_bh(&msk->fallback_lock); + if (!msk->allow_infinite_fallback) { + spin_unlock_bh(&msk->fallback_lock); + return false; + } + + msk->allow_subflows = false; set_bit(MPTCP_FALLBACK_DONE, &msk->flags); + spin_unlock_bh(&msk->fallback_lock); + return true; } static inline bool __mptcp_has_initial_subflow(const struct mptcp_sock *msk) @@ -1236,14 +1249,15 @@ static inline bool __mptcp_has_initial_subflow(const struct mptcp_sock *msk) TCPF_SYN_RECV | TCPF_LISTEN)); } -static inline void mptcp_do_fallback(struct sock *ssk) +static inline bool mptcp_try_fallback(struct sock *ssk) { struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(ssk); struct sock *sk = subflow->conn; struct mptcp_sock *msk; msk = mptcp_sk(sk); - __mptcp_do_fallback(msk); + if (!__mptcp_try_fallback(msk)) + return false; if (READ_ONCE(msk->snd_data_fin_enable) && !(ssk->sk_shutdown & SEND_SHUTDOWN)) { gfp_t saved_allocation = ssk->sk_allocation; @@ -1255,6 +1269,7 @@ static inline void mptcp_do_fallback(struct sock *ssk) tcp_shutdown(ssk, SEND_SHUTDOWN); ssk->sk_allocation = saved_allocation; } + return true; } #define pr_fallback(a) pr_debug("%s:fallback to TCP (msk=%p)\n", __func__, a) @@ -1264,7 +1279,7 @@ static inline void mptcp_subflow_early_fallback(struct mptcp_sock *msk, { pr_fallback(msk); subflow->request_mptcp = 0; - __mptcp_do_fallback(msk); + WARN_ON_ONCE(!__mptcp_try_fallback(msk)); } static inline bool mptcp_check_infinite_map(struct sk_buff *skb) diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c index 15613d691bfe..1802bc5435a1 100644 --- a/net/mptcp/subflow.c +++ b/net/mptcp/subflow.c @@ -544,9 +544,11 @@ static void subflow_finish_connect(struct sock *sk, const struct sk_buff *skb) mptcp_get_options(skb, &mp_opt); if (subflow->request_mptcp) { if (!(mp_opt.suboptions & OPTION_MPTCP_MPC_SYNACK)) { + if (!mptcp_try_fallback(sk)) + goto do_reset; + MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_MPCAPABLEACTIVEFALLBACK); - mptcp_do_fallback(sk); pr_fallback(msk); goto fallback; } @@ -1300,20 +1302,29 @@ static void subflow_sched_work_if_closed(struct mptcp_sock *msk, struct sock *ss mptcp_schedule_work(sk); } -static void mptcp_subflow_fail(struct mptcp_sock *msk, struct sock *ssk) +static bool mptcp_subflow_fail(struct mptcp_sock *msk, struct sock *ssk) { struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(ssk); unsigned long fail_tout; + /* we are really failing, prevent any later subflow join */ + spin_lock_bh(&msk->fallback_lock); + if (!msk->allow_infinite_fallback) { + spin_unlock_bh(&msk->fallback_lock); + return false; + } + msk->allow_subflows = false; + spin_unlock_bh(&msk->fallback_lock); + /* graceful failure can happen only on the MPC subflow */ if (WARN_ON_ONCE(ssk != READ_ONCE(msk->first))) - return; + return false; /* since the close timeout take precedence on the fail one, * no need to start the latter when the first is already set */ if (sock_flag((struct sock *)msk, SOCK_DEAD)) - return; + return true; /* we don't need extreme accuracy here, use a zero fail_tout as special * value meaning no fail timeout at all; @@ -1325,6 +1336,7 @@ static void mptcp_subflow_fail(struct mptcp_sock *msk, struct sock *ssk) tcp_send_ack(ssk); mptcp_reset_tout_timer(msk, subflow->fail_tout); + return true; } static bool subflow_check_data_avail(struct sock *ssk) @@ -1385,17 +1397,16 @@ fallback: (subflow->mp_join || subflow->valid_csum_seen)) { subflow->send_mp_fail = 1; - if (!READ_ONCE(msk->allow_infinite_fallback)) { + if (!mptcp_subflow_fail(msk, ssk)) { subflow->reset_transient = 0; subflow->reset_reason = MPTCP_RST_EMIDDLEBOX; goto reset; } - mptcp_subflow_fail(msk, ssk); WRITE_ONCE(subflow->data_avail, true); return true; } - if (!READ_ONCE(msk->allow_infinite_fallback)) { + if (!mptcp_try_fallback(ssk)) { /* fatal protocol error, close the socket. * subflow_error_report() will introduce the appropriate barriers */ @@ -1413,8 +1424,6 @@ reset: WRITE_ONCE(subflow->data_avail, false); return false; } - - mptcp_do_fallback(ssk); } skb = skb_peek(&ssk->sk_receive_queue); @@ -1679,7 +1688,6 @@ int __mptcp_subflow_connect(struct sock *sk, const struct mptcp_pm_local *local, /* discard the subflow socket */ mptcp_sock_graft(ssk, sk->sk_socket); iput(SOCK_INODE(sf)); - WRITE_ONCE(msk->allow_infinite_fallback, false); mptcp_stop_tout_timer(sk); return 0; @@ -1851,7 +1859,7 @@ static void subflow_state_change(struct sock *sk) msk = mptcp_sk(parent); if (subflow_simultaneous_connect(sk)) { - mptcp_do_fallback(sk); + WARN_ON_ONCE(!mptcp_try_fallback(sk)); pr_fallback(msk); subflow->conn_finished = 1; mptcp_propagate_state(parent, sk, subflow, NULL); diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index 201d3c4ec623..e51f0b441109 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -1124,6 +1124,12 @@ static int nf_ct_resolve_clash_harder(struct sk_buff *skb, u32 repl_idx) hlist_nulls_add_head_rcu(&loser_ct->tuplehash[IP_CT_DIR_REPLY].hnnode, &nf_conntrack_hash[repl_idx]); + /* confirmed bit must be set after hlist add, not before: + * loser_ct can still be visible to other cpu due to + * SLAB_TYPESAFE_BY_RCU. + */ + smp_mb__before_atomic(); + set_bit(IPS_CONFIRMED_BIT, &loser_ct->status); NF_CT_STAT_INC(net, clash_resolve); return NF_ACCEPT; @@ -1260,8 +1266,6 @@ __nf_conntrack_confirm(struct sk_buff *skb) * user context, else we insert an already 'dead' hash, blocking * further use of that particular connection -JM. */ - ct->status |= IPS_CONFIRMED; - if (unlikely(nf_ct_is_dying(ct))) { NF_CT_STAT_INC(net, insert_failed); goto dying; @@ -1293,7 +1297,7 @@ chaintoolong: } } - /* Timer relative to confirmation time, not original + /* Timeout is relative to confirmation time, not original setting time, otherwise we'd get timer wrap in weird delay cases. */ ct->timeout += nfct_time_stamp; @@ -1301,11 +1305,21 @@ chaintoolong: __nf_conntrack_insert_prepare(ct); /* Since the lookup is lockless, hash insertion must be done after - * starting the timer and setting the CONFIRMED bit. The RCU barriers - * guarantee that no other CPU can find the conntrack before the above - * stores are visible. + * setting ct->timeout. The RCU barriers guarantee that no other CPU + * can find the conntrack before the above stores are visible. */ __nf_conntrack_hash_insert(ct, hash, reply_hash); + + /* IPS_CONFIRMED unset means 'ct not (yet) in hash', conntrack lookups + * skip entries that lack this bit. This happens when a CPU is looking + * at a stale entry that is being recycled due to SLAB_TYPESAFE_BY_RCU + * or when another CPU encounters this entry right after the insertion + * but before the set-confirm-bit below. This bit must not be set until + * after __nf_conntrack_hash_insert(). + */ + smp_mb__before_atomic(); + set_bit(IPS_CONFIRMED_BIT, &ct->status); + nf_conntrack_double_unlock(hash, reply_hash); local_bh_enable(); diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 24c71ecb2179..a7240736f98e 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -9686,64 +9686,6 @@ struct nf_hook_ops *nft_hook_find_ops_rcu(const struct nft_hook *hook, } EXPORT_SYMBOL_GPL(nft_hook_find_ops_rcu); -static void -nf_tables_device_notify(const struct nft_table *table, int attr, - const char *name, const struct nft_hook *hook, - const struct net_device *dev, int event) -{ - struct net *net = dev_net(dev); - struct nlmsghdr *nlh; - struct sk_buff *skb; - u16 flags = 0; - - if (!nfnetlink_has_listeners(net, NFNLGRP_NFT_DEV)) - return; - - skb = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); - if (!skb) - goto err; - - event = event == NETDEV_REGISTER ? NFT_MSG_NEWDEV : NFT_MSG_DELDEV; - event = nfnl_msg_type(NFNL_SUBSYS_NFTABLES, event); - nlh = nfnl_msg_put(skb, 0, 0, event, flags, table->family, - NFNETLINK_V0, nft_base_seq(net)); - if (!nlh) - goto err; - - if (nla_put_string(skb, NFTA_DEVICE_TABLE, table->name) || - nla_put_string(skb, attr, name) || - nla_put(skb, NFTA_DEVICE_SPEC, hook->ifnamelen, hook->ifname) || - nla_put_string(skb, NFTA_DEVICE_NAME, dev->name)) - goto err; - - nlmsg_end(skb, nlh); - nfnetlink_send(skb, net, 0, NFNLGRP_NFT_DEV, - nlmsg_report(nlh), GFP_KERNEL); - return; -err: - if (skb) - kfree_skb(skb); - nfnetlink_set_err(net, 0, NFNLGRP_NFT_DEV, -ENOBUFS); -} - -void -nf_tables_chain_device_notify(const struct nft_chain *chain, - const struct nft_hook *hook, - const struct net_device *dev, int event) -{ - nf_tables_device_notify(chain->table, NFTA_DEVICE_CHAIN, - chain->name, hook, dev, event); -} - -static void -nf_tables_flowtable_device_notify(const struct nft_flowtable *ft, - const struct nft_hook *hook, - const struct net_device *dev, int event) -{ - nf_tables_device_notify(ft->table, NFTA_DEVICE_FLOWTABLE, - ft->name, hook, dev, event); -} - static int nft_flowtable_event(unsigned long event, struct net_device *dev, struct nft_flowtable *flowtable, bool changename) { @@ -9791,7 +9733,6 @@ static int nft_flowtable_event(unsigned long event, struct net_device *dev, list_add_tail_rcu(&ops->list, &hook->ops_list); break; } - nf_tables_flowtable_device_notify(flowtable, hook, dev, event); break; } return 0; diff --git a/net/netfilter/nf_tables_trace.c b/net/netfilter/nf_tables_trace.c index ae3fe87195ab..a88abae5a9de 100644 --- a/net/netfilter/nf_tables_trace.c +++ b/net/netfilter/nf_tables_trace.c @@ -127,6 +127,9 @@ static int nf_trace_fill_ct_info(struct sk_buff *nlskb, if (nla_put_be32(nlskb, NFTA_TRACE_CT_ID, (__force __be32)id)) return -1; + /* Kernel implementation detail, withhold this from userspace for now */ + status &= ~IPS_NAT_CLASH; + if (status && nla_put_be32(nlskb, NFTA_TRACE_CT_STATUS, htonl(status))) return -1; } diff --git a/net/netfilter/nfnetlink.c b/net/netfilter/nfnetlink.c index ac77fc21632d..e598a2a252b0 100644 --- a/net/netfilter/nfnetlink.c +++ b/net/netfilter/nfnetlink.c @@ -86,7 +86,6 @@ static const int nfnl_group2type[NFNLGRP_MAX+1] = { [NFNLGRP_NFTABLES] = NFNL_SUBSYS_NFTABLES, [NFNLGRP_ACCT_QUOTA] = NFNL_SUBSYS_ACCT, [NFNLGRP_NFTRACE] = NFNL_SUBSYS_NFTABLES, - [NFNLGRP_NFT_DEV] = NFNL_SUBSYS_NFTABLES, }; static struct nfnl_net *nfnl_pernet(struct net *net) diff --git a/net/netfilter/nft_chain_filter.c b/net/netfilter/nft_chain_filter.c index 846d48ba8965..b16185e9a6dd 100644 --- a/net/netfilter/nft_chain_filter.c +++ b/net/netfilter/nft_chain_filter.c @@ -363,8 +363,6 @@ static int nft_netdev_event(unsigned long event, struct net_device *dev, list_add_tail_rcu(&ops->list, &hook->ops_list); break; } - nf_tables_chain_device_notify(&basechain->chain, - hook, dev, event); break; } return 0; diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index e8972a857e51..6332a0e06596 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c @@ -387,7 +387,6 @@ static void netlink_skb_set_owner_r(struct sk_buff *skb, struct sock *sk) WARN_ON(skb->sk != NULL); skb->sk = sk; skb->destructor = netlink_skb_destructor; - atomic_add(skb->truesize, &sk->sk_rmem_alloc); sk_mem_charge(sk, skb->truesize); } @@ -1212,41 +1211,48 @@ struct sk_buff *netlink_alloc_large_skb(unsigned int size, int broadcast) int netlink_attachskb(struct sock *sk, struct sk_buff *skb, long *timeo, struct sock *ssk) { + DECLARE_WAITQUEUE(wait, current); struct netlink_sock *nlk; + unsigned int rmem; nlk = nlk_sk(sk); + rmem = atomic_add_return(skb->truesize, &sk->sk_rmem_alloc); - if ((atomic_read(&sk->sk_rmem_alloc) > sk->sk_rcvbuf || - test_bit(NETLINK_S_CONGESTED, &nlk->state))) { - DECLARE_WAITQUEUE(wait, current); - if (!*timeo) { - if (!ssk || netlink_is_kernel(ssk)) - netlink_overrun(sk); - sock_put(sk); - kfree_skb(skb); - return -EAGAIN; - } - - __set_current_state(TASK_INTERRUPTIBLE); - add_wait_queue(&nlk->wait, &wait); + if ((rmem == skb->truesize || rmem < READ_ONCE(sk->sk_rcvbuf)) && + !test_bit(NETLINK_S_CONGESTED, &nlk->state)) { + netlink_skb_set_owner_r(skb, sk); + return 0; + } - if ((atomic_read(&sk->sk_rmem_alloc) > sk->sk_rcvbuf || - test_bit(NETLINK_S_CONGESTED, &nlk->state)) && - !sock_flag(sk, SOCK_DEAD)) - *timeo = schedule_timeout(*timeo); + atomic_sub(skb->truesize, &sk->sk_rmem_alloc); - __set_current_state(TASK_RUNNING); - remove_wait_queue(&nlk->wait, &wait); + if (!*timeo) { + if (!ssk || netlink_is_kernel(ssk)) + netlink_overrun(sk); sock_put(sk); + kfree_skb(skb); + return -EAGAIN; + } - if (signal_pending(current)) { - kfree_skb(skb); - return sock_intr_errno(*timeo); - } - return 1; + __set_current_state(TASK_INTERRUPTIBLE); + add_wait_queue(&nlk->wait, &wait); + rmem = atomic_read(&sk->sk_rmem_alloc); + + if (((rmem && rmem + skb->truesize > READ_ONCE(sk->sk_rcvbuf)) || + test_bit(NETLINK_S_CONGESTED, &nlk->state)) && + !sock_flag(sk, SOCK_DEAD)) + *timeo = schedule_timeout(*timeo); + + __set_current_state(TASK_RUNNING); + remove_wait_queue(&nlk->wait, &wait); + sock_put(sk); + + if (signal_pending(current)) { + kfree_skb(skb); + return sock_intr_errno(*timeo); } - netlink_skb_set_owner_r(skb, sk); - return 0; + + return 1; } static int __netlink_sendskb(struct sock *sk, struct sk_buff *skb) @@ -1307,6 +1313,7 @@ static int netlink_unicast_kernel(struct sock *sk, struct sk_buff *skb, ret = -ECONNREFUSED; if (nlk->netlink_rcv != NULL) { ret = skb->len; + atomic_add(skb->truesize, &sk->sk_rmem_alloc); netlink_skb_set_owner_r(skb, sk); NETLINK_CB(skb).sk = ssk; netlink_deliver_tap_kernel(sk, ssk, skb); @@ -1383,13 +1390,19 @@ EXPORT_SYMBOL_GPL(netlink_strict_get_check); static int netlink_broadcast_deliver(struct sock *sk, struct sk_buff *skb) { struct netlink_sock *nlk = nlk_sk(sk); + unsigned int rmem, rcvbuf; - if (atomic_read(&sk->sk_rmem_alloc) <= sk->sk_rcvbuf && + rmem = atomic_add_return(skb->truesize, &sk->sk_rmem_alloc); + rcvbuf = READ_ONCE(sk->sk_rcvbuf); + + if ((rmem == skb->truesize || rmem <= rcvbuf) && !test_bit(NETLINK_S_CONGESTED, &nlk->state)) { netlink_skb_set_owner_r(skb, sk); __netlink_sendskb(sk, skb); - return atomic_read(&sk->sk_rmem_alloc) > (sk->sk_rcvbuf >> 1); + return rmem > (rcvbuf >> 1); } + + atomic_sub(skb->truesize, &sk->sk_rmem_alloc); return -1; } @@ -2245,6 +2258,7 @@ static int netlink_dump(struct sock *sk, bool lock_taken) struct netlink_ext_ack extack = {}; struct netlink_callback *cb; struct sk_buff *skb = NULL; + unsigned int rmem, rcvbuf; size_t max_recvmsg_len; struct module *module; int err = -ENOBUFS; @@ -2258,9 +2272,6 @@ static int netlink_dump(struct sock *sk, bool lock_taken) goto errout_skb; } - if (atomic_read(&sk->sk_rmem_alloc) >= sk->sk_rcvbuf) - goto errout_skb; - /* NLMSG_GOODSIZE is small to avoid high order allocations being * required, but it makes sense to _attempt_ a 32KiB allocation * to reduce number of system calls on dump operations, if user @@ -2283,6 +2294,13 @@ static int netlink_dump(struct sock *sk, bool lock_taken) if (!skb) goto errout_skb; + rcvbuf = READ_ONCE(sk->sk_rcvbuf); + rmem = atomic_add_return(skb->truesize, &sk->sk_rmem_alloc); + if (rmem != skb->truesize && rmem >= rcvbuf) { + atomic_sub(skb->truesize, &sk->sk_rmem_alloc); + goto errout_skb; + } + /* Trim skb to allocated size. User is expected to provide buffer as * large as max(min_dump_alloc, 32KiB (max_recvmsg_len capped at * netlink_recvmsg())). dump will pack as many smaller messages as diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index 3d43f3eae759..be608f07441f 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -2785,7 +2785,7 @@ static int tpacket_snd(struct packet_sock *po, struct msghdr *msg) int len_sum = 0; int status = TP_STATUS_AVAILABLE; int hlen, tlen, copylen = 0; - long timeo = 0; + long timeo; mutex_lock(&po->pg_vec_lock); @@ -2839,22 +2839,28 @@ static int tpacket_snd(struct packet_sock *po, struct msghdr *msg) if ((size_max > dev->mtu + reserve + VLAN_HLEN) && !vnet_hdr_sz) size_max = dev->mtu + reserve + VLAN_HLEN; + timeo = sock_sndtimeo(&po->sk, msg->msg_flags & MSG_DONTWAIT); reinit_completion(&po->skb_completion); do { ph = packet_current_frame(po, &po->tx_ring, TP_STATUS_SEND_REQUEST); if (unlikely(ph == NULL)) { - if (need_wait && skb) { - timeo = sock_sndtimeo(&po->sk, msg->msg_flags & MSG_DONTWAIT); + /* Note: packet_read_pending() might be slow if we + * have to call it as it's per_cpu variable, but in + * fast-path we don't have to call it, only when ph + * is NULL, we need to check the pending_refcnt. + */ + if (need_wait && packet_read_pending(&po->tx_ring)) { timeo = wait_for_completion_interruptible_timeout(&po->skb_completion, timeo); if (timeo <= 0) { err = !timeo ? -ETIMEDOUT : -ERESTARTSYS; goto out_put; } - } - /* check for additional frames */ - continue; + /* check for additional frames */ + continue; + } else + break; } skb = NULL; @@ -2943,14 +2949,7 @@ tpacket_error: } packet_increment_head(&po->tx_ring); len_sum += tp_len; - } while (likely((ph != NULL) || - /* Note: packet_read_pending() might be slow if we have - * to call it as it's per_cpu variable, but in fast-path - * we already short-circuit the loop with the first - * condition, and luckily don't have to go that path - * anyway. - */ - (need_wait && packet_read_pending(&po->tx_ring)))); + } while (1); err = len_sum; goto out_put; diff --git a/net/phonet/pep.c b/net/phonet/pep.c index 53a858478e22..62527e1ebb88 100644 --- a/net/phonet/pep.c +++ b/net/phonet/pep.c @@ -826,6 +826,7 @@ static struct sock *pep_sock_accept(struct sock *sk, } /* Check for duplicate pipe handle */ + pn_skb_get_dst_sockaddr(skb, &dst); newsk = pep_find_pipe(&pn->hlist, &dst, pipe_handle); if (unlikely(newsk)) { __sock_put(newsk); @@ -850,7 +851,6 @@ static struct sock *pep_sock_accept(struct sock *sk, newsk->sk_destruct = pipe_destruct; newpn = pep_sk(newsk); - pn_skb_get_dst_sockaddr(skb, &dst); pn_skb_get_src_sockaddr(skb, &src); newpn->pn_sk.sobject = pn_sockaddr_get_object(&dst); newpn->pn_sk.dobject = pn_sockaddr_get_object(&src); diff --git a/net/rxrpc/ar-internal.h b/net/rxrpc/ar-internal.h index 5bd3922c310d..5b7342d43486 100644 --- a/net/rxrpc/ar-internal.h +++ b/net/rxrpc/ar-internal.h @@ -44,6 +44,7 @@ enum rxrpc_skb_mark { RXRPC_SKB_MARK_SERVICE_CONN_SECURED, /* Service connection response has been verified */ RXRPC_SKB_MARK_REJECT_BUSY, /* Reject with BUSY */ RXRPC_SKB_MARK_REJECT_ABORT, /* Reject with ABORT (code in skb->priority) */ + RXRPC_SKB_MARK_REJECT_CONN_ABORT, /* Reject with connection ABORT (code in skb->priority) */ }; /* @@ -361,12 +362,15 @@ struct rxrpc_local { struct list_head new_client_calls; /* Newly created client calls need connection */ spinlock_t client_call_lock; /* Lock for ->new_client_calls */ struct sockaddr_rxrpc srx; /* local address */ - /* Provide a kvec table sufficiently large to manage either a DATA - * packet with a maximum set of jumbo subpackets or a PING ACK padded - * out to 64K with zeropages for PMTUD. - */ - struct kvec kvec[1 + RXRPC_MAX_NR_JUMBO > 3 + 16 ? - 1 + RXRPC_MAX_NR_JUMBO : 3 + 16]; + union { + /* Provide a kvec table sufficiently large to manage either a + * DATA packet with a maximum set of jumbo subpackets or a PING + * ACK padded out to 64K with zeropages for PMTUD. + */ + struct kvec kvec[1 + RXRPC_MAX_NR_JUMBO > 3 + 16 ? + 1 + RXRPC_MAX_NR_JUMBO : 3 + 16]; + struct bio_vec bvec[3 + 16]; + }; }; /* @@ -1250,6 +1254,8 @@ int rxrpc_encap_rcv(struct sock *, struct sk_buff *); void rxrpc_error_report(struct sock *); bool rxrpc_direct_abort(struct sk_buff *skb, enum rxrpc_abort_reason why, s32 abort_code, int err); +bool rxrpc_direct_conn_abort(struct sk_buff *skb, enum rxrpc_abort_reason why, + s32 abort_code, int err); int rxrpc_io_thread(void *data); void rxrpc_post_response(struct rxrpc_connection *conn, struct sk_buff *skb); static inline void rxrpc_wake_up_io_thread(struct rxrpc_local *local) @@ -1380,6 +1386,7 @@ struct rxrpc_peer *rxrpc_lookup_peer_rcu(struct rxrpc_local *, const struct sockaddr_rxrpc *); struct rxrpc_peer *rxrpc_lookup_peer(struct rxrpc_local *local, struct sockaddr_rxrpc *srx, gfp_t gfp); +void rxrpc_assess_MTU_size(struct rxrpc_local *local, struct rxrpc_peer *peer); struct rxrpc_peer *rxrpc_alloc_peer(struct rxrpc_local *, gfp_t, enum rxrpc_peer_trace); void rxrpc_new_incoming_peer(struct rxrpc_local *local, struct rxrpc_peer *peer); diff --git a/net/rxrpc/call_accept.c b/net/rxrpc/call_accept.c index a4b363b47cca..00982a030744 100644 --- a/net/rxrpc/call_accept.c +++ b/net/rxrpc/call_accept.c @@ -149,6 +149,7 @@ static int rxrpc_service_prealloc_one(struct rxrpc_sock *rx, id_in_use: write_unlock(&rx->call_lock); + rxrpc_prefail_call(call, RXRPC_CALL_LOCAL_ERROR, -EBADSLT); rxrpc_cleanup_call(call); _leave(" = -EBADSLT"); return -EBADSLT; @@ -218,6 +219,7 @@ void rxrpc_discard_prealloc(struct rxrpc_sock *rx) tail = b->call_backlog_tail; while (CIRC_CNT(head, tail, size) > 0) { struct rxrpc_call *call = b->call_backlog[tail]; + rxrpc_see_call(call, rxrpc_call_see_discard); rcu_assign_pointer(call->socket, rx); if (rx->app_ops && rx->app_ops->discard_new_call) { @@ -254,6 +256,9 @@ static struct rxrpc_call *rxrpc_alloc_incoming_call(struct rxrpc_sock *rx, unsigned short call_tail, conn_tail, peer_tail; unsigned short call_count, conn_count; + if (!b) + return NULL; + /* #calls >= #conns >= #peers must hold true. */ call_head = smp_load_acquire(&b->call_backlog_head); call_tail = b->call_backlog_tail; @@ -369,8 +374,8 @@ bool rxrpc_new_incoming_call(struct rxrpc_local *local, spin_lock(&rx->incoming_lock); if (rx->sk.sk_state == RXRPC_SERVER_LISTEN_DISABLED || rx->sk.sk_state == RXRPC_CLOSE) { - rxrpc_direct_abort(skb, rxrpc_abort_shut_down, - RX_INVALID_OPERATION, -ESHUTDOWN); + rxrpc_direct_conn_abort(skb, rxrpc_abort_shut_down, + RX_INVALID_OPERATION, -ESHUTDOWN); goto no_call; } @@ -402,6 +407,7 @@ bool rxrpc_new_incoming_call(struct rxrpc_local *local, spin_unlock(&rx->incoming_lock); read_unlock_irq(&local->services_lock); + rxrpc_assess_MTU_size(local, call->peer); if (hlist_unhashed(&call->error_link)) { spin_lock_irq(&call->peer->lock); @@ -416,12 +422,12 @@ bool rxrpc_new_incoming_call(struct rxrpc_local *local, unsupported_service: read_unlock_irq(&local->services_lock); - return rxrpc_direct_abort(skb, rxrpc_abort_service_not_offered, - RX_INVALID_OPERATION, -EOPNOTSUPP); + return rxrpc_direct_conn_abort(skb, rxrpc_abort_service_not_offered, + RX_INVALID_OPERATION, -EOPNOTSUPP); unsupported_security: read_unlock_irq(&local->services_lock); - return rxrpc_direct_abort(skb, rxrpc_abort_service_not_offered, - RX_INVALID_OPERATION, -EKEYREJECTED); + return rxrpc_direct_conn_abort(skb, rxrpc_abort_service_not_offered, + RX_INVALID_OPERATION, -EKEYREJECTED); no_call: spin_unlock(&rx->incoming_lock); read_unlock_irq(&local->services_lock); diff --git a/net/rxrpc/call_object.c b/net/rxrpc/call_object.c index 15067ff7b1f2..918f41d97a2f 100644 --- a/net/rxrpc/call_object.c +++ b/net/rxrpc/call_object.c @@ -561,7 +561,7 @@ static void rxrpc_cleanup_rx_buffers(struct rxrpc_call *call) void rxrpc_release_call(struct rxrpc_sock *rx, struct rxrpc_call *call) { struct rxrpc_connection *conn = call->conn; - bool put = false, putu = false; + bool putu = false; _enter("{%d,%d}", call->debug_id, refcount_read(&call->ref)); @@ -573,23 +573,13 @@ void rxrpc_release_call(struct rxrpc_sock *rx, struct rxrpc_call *call) rxrpc_put_call_slot(call); - /* Make sure we don't get any more notifications */ + /* Note that at this point, the call may still be on or may have been + * added back on to the socket receive queue. recvmsg() must discard + * released calls. The CALL_RELEASED flag should prevent further + * notifications. + */ spin_lock_irq(&rx->recvmsg_lock); - - if (!list_empty(&call->recvmsg_link)) { - _debug("unlinking once-pending call %p { e=%lx f=%lx }", - call, call->events, call->flags); - list_del(&call->recvmsg_link); - put = true; - } - - /* list_empty() must return false in rxrpc_notify_socket() */ - call->recvmsg_link.next = NULL; - call->recvmsg_link.prev = NULL; - spin_unlock_irq(&rx->recvmsg_lock); - if (put) - rxrpc_put_call(call, rxrpc_call_put_unnotify); write_lock(&rx->call_lock); @@ -638,6 +628,12 @@ void rxrpc_release_calls_on_socket(struct rxrpc_sock *rx) rxrpc_put_call(call, rxrpc_call_put_release_sock); } + while ((call = list_first_entry_or_null(&rx->recvmsg_q, + struct rxrpc_call, recvmsg_link))) { + list_del_init(&call->recvmsg_link); + rxrpc_put_call(call, rxrpc_call_put_release_recvmsg_q); + } + _leave(""); } diff --git a/net/rxrpc/io_thread.c b/net/rxrpc/io_thread.c index 27b650d30f4d..e939ecf417c4 100644 --- a/net/rxrpc/io_thread.c +++ b/net/rxrpc/io_thread.c @@ -97,6 +97,20 @@ bool rxrpc_direct_abort(struct sk_buff *skb, enum rxrpc_abort_reason why, return false; } +/* + * Directly produce a connection abort from a packet. + */ +bool rxrpc_direct_conn_abort(struct sk_buff *skb, enum rxrpc_abort_reason why, + s32 abort_code, int err) +{ + struct rxrpc_skb_priv *sp = rxrpc_skb(skb); + + trace_rxrpc_abort(0, why, sp->hdr.cid, 0, sp->hdr.seq, abort_code, err); + skb->mark = RXRPC_SKB_MARK_REJECT_CONN_ABORT; + skb->priority = abort_code; + return false; +} + static bool rxrpc_bad_message(struct sk_buff *skb, enum rxrpc_abort_reason why) { return rxrpc_direct_abort(skb, why, RX_PROTOCOL_ERROR, -EBADMSG); diff --git a/net/rxrpc/output.c b/net/rxrpc/output.c index 0af19bcdc80a..8b5903b6e481 100644 --- a/net/rxrpc/output.c +++ b/net/rxrpc/output.c @@ -814,6 +814,9 @@ void rxrpc_reject_packet(struct rxrpc_local *local, struct sk_buff *skb) __be32 code; int ret, ioc; + if (sp->hdr.type == RXRPC_PACKET_TYPE_ABORT) + return; /* Never abort an abort. */ + rxrpc_see_skb(skb, rxrpc_skb_see_reject); iov[0].iov_base = &whdr; @@ -826,7 +829,13 @@ void rxrpc_reject_packet(struct rxrpc_local *local, struct sk_buff *skb) msg.msg_controllen = 0; msg.msg_flags = 0; - memset(&whdr, 0, sizeof(whdr)); + whdr = (struct rxrpc_wire_header) { + .epoch = htonl(sp->hdr.epoch), + .cid = htonl(sp->hdr.cid), + .callNumber = htonl(sp->hdr.callNumber), + .serviceId = htons(sp->hdr.serviceId), + .flags = ~sp->hdr.flags & RXRPC_CLIENT_INITIATED, + }; switch (skb->mark) { case RXRPC_SKB_MARK_REJECT_BUSY: @@ -834,6 +843,9 @@ void rxrpc_reject_packet(struct rxrpc_local *local, struct sk_buff *skb) size = sizeof(whdr); ioc = 1; break; + case RXRPC_SKB_MARK_REJECT_CONN_ABORT: + whdr.callNumber = 0; + fallthrough; case RXRPC_SKB_MARK_REJECT_ABORT: whdr.type = RXRPC_PACKET_TYPE_ABORT; code = htonl(skb->priority); @@ -847,14 +859,6 @@ void rxrpc_reject_packet(struct rxrpc_local *local, struct sk_buff *skb) if (rxrpc_extract_addr_from_skb(&srx, skb) == 0) { msg.msg_namelen = srx.transport_len; - whdr.epoch = htonl(sp->hdr.epoch); - whdr.cid = htonl(sp->hdr.cid); - whdr.callNumber = htonl(sp->hdr.callNumber); - whdr.serviceId = htons(sp->hdr.serviceId); - whdr.flags = sp->hdr.flags; - whdr.flags ^= RXRPC_CLIENT_INITIATED; - whdr.flags &= RXRPC_CLIENT_INITIATED; - iov_iter_kvec(&msg.msg_iter, WRITE, iov, ioc, size); ret = do_udp_sendmsg(local->socket, &msg, size); if (ret < 0) @@ -924,7 +928,7 @@ void rxrpc_send_response(struct rxrpc_connection *conn, struct sk_buff *response { struct rxrpc_skb_priv *sp = rxrpc_skb(response); struct scatterlist sg[16]; - struct bio_vec bvec[16]; + struct bio_vec *bvec = conn->local->bvec; struct msghdr msg; size_t len = sp->resp.len; __be32 wserial; @@ -938,6 +942,9 @@ void rxrpc_send_response(struct rxrpc_connection *conn, struct sk_buff *response if (ret < 0) goto fail; nr_sg = ret; + ret = -EIO; + if (WARN_ON_ONCE(nr_sg > ARRAY_SIZE(conn->local->bvec))) + goto fail; for (int i = 0; i < nr_sg; i++) bvec_set_page(&bvec[i], sg_page(&sg[i]), sg[i].length, sg[i].offset); diff --git a/net/rxrpc/peer_object.c b/net/rxrpc/peer_object.c index e2f35e6c04d6..366431b0736c 100644 --- a/net/rxrpc/peer_object.c +++ b/net/rxrpc/peer_object.c @@ -149,8 +149,7 @@ struct rxrpc_peer *rxrpc_lookup_peer_rcu(struct rxrpc_local *local, * assess the MTU size for the network interface through which this peer is * reached */ -static void rxrpc_assess_MTU_size(struct rxrpc_local *local, - struct rxrpc_peer *peer) +void rxrpc_assess_MTU_size(struct rxrpc_local *local, struct rxrpc_peer *peer) { struct net *net = local->net; struct dst_entry *dst; @@ -277,8 +276,6 @@ static void rxrpc_init_peer(struct rxrpc_local *local, struct rxrpc_peer *peer, peer->hdrsize += sizeof(struct rxrpc_wire_header); peer->max_data = peer->if_mtu - peer->hdrsize; - - rxrpc_assess_MTU_size(local, peer); } /* @@ -297,6 +294,7 @@ static struct rxrpc_peer *rxrpc_create_peer(struct rxrpc_local *local, if (peer) { memcpy(&peer->srx, srx, sizeof(*srx)); rxrpc_init_peer(local, peer, hash_key); + rxrpc_assess_MTU_size(local, peer); } _leave(" = %p", peer); diff --git a/net/rxrpc/recvmsg.c b/net/rxrpc/recvmsg.c index 86a27fb55a1c..7fa7e77f6bb9 100644 --- a/net/rxrpc/recvmsg.c +++ b/net/rxrpc/recvmsg.c @@ -29,6 +29,10 @@ void rxrpc_notify_socket(struct rxrpc_call *call) if (!list_empty(&call->recvmsg_link)) return; + if (test_bit(RXRPC_CALL_RELEASED, &call->flags)) { + rxrpc_see_call(call, rxrpc_call_see_notify_released); + return; + } rcu_read_lock(); @@ -447,6 +451,16 @@ try_again: goto try_again; } + rxrpc_see_call(call, rxrpc_call_see_recvmsg); + if (test_bit(RXRPC_CALL_RELEASED, &call->flags)) { + rxrpc_see_call(call, rxrpc_call_see_already_released); + list_del_init(&call->recvmsg_link); + spin_unlock_irq(&rx->recvmsg_lock); + release_sock(&rx->sk); + trace_rxrpc_recvmsg(call->debug_id, rxrpc_recvmsg_unqueue, 0); + rxrpc_put_call(call, rxrpc_call_put_recvmsg); + goto try_again; + } if (!(flags & MSG_PEEK)) list_del_init(&call->recvmsg_link); else @@ -470,8 +484,13 @@ try_again: release_sock(&rx->sk); - if (test_bit(RXRPC_CALL_RELEASED, &call->flags)) - BUG(); + if (test_bit(RXRPC_CALL_RELEASED, &call->flags)) { + rxrpc_see_call(call, rxrpc_call_see_already_released); + mutex_unlock(&call->user_mutex); + if (!(flags & MSG_PEEK)) + rxrpc_put_call(call, rxrpc_call_put_recvmsg); + goto try_again; + } ret = rxrpc_recvmsg_user_id(call, msg, flags); if (ret < 0) diff --git a/net/rxrpc/security.c b/net/rxrpc/security.c index 078d91a6b77f..2bfbf2b2bb37 100644 --- a/net/rxrpc/security.c +++ b/net/rxrpc/security.c @@ -140,15 +140,15 @@ const struct rxrpc_security *rxrpc_get_incoming_security(struct rxrpc_sock *rx, sec = rxrpc_security_lookup(sp->hdr.securityIndex); if (!sec) { - rxrpc_direct_abort(skb, rxrpc_abort_unsupported_security, - RX_INVALID_OPERATION, -EKEYREJECTED); + rxrpc_direct_conn_abort(skb, rxrpc_abort_unsupported_security, + RX_INVALID_OPERATION, -EKEYREJECTED); return NULL; } if (sp->hdr.securityIndex != RXRPC_SECURITY_NONE && !rx->securities) { - rxrpc_direct_abort(skb, rxrpc_abort_no_service_key, - sec->no_key_abort, -EKEYREJECTED); + rxrpc_direct_conn_abort(skb, rxrpc_abort_no_service_key, + sec->no_key_abort, -EKEYREJECTED); return NULL; } diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c index d8a33486c511..d7c767b861a4 100644 --- a/net/sched/sch_api.c +++ b/net/sched/sch_api.c @@ -336,17 +336,22 @@ out: return q; } -static struct Qdisc *qdisc_leaf(struct Qdisc *p, u32 classid) +static struct Qdisc *qdisc_leaf(struct Qdisc *p, u32 classid, + struct netlink_ext_ack *extack) { unsigned long cl; const struct Qdisc_class_ops *cops = p->ops->cl_ops; - if (cops == NULL) - return NULL; + if (cops == NULL) { + NL_SET_ERR_MSG(extack, "Parent qdisc is not classful"); + return ERR_PTR(-EOPNOTSUPP); + } cl = cops->find(p, classid); - if (cl == 0) - return NULL; + if (cl == 0) { + NL_SET_ERR_MSG(extack, "Specified class not found"); + return ERR_PTR(-ENOENT); + } return cops->leaf(p, cl); } @@ -596,16 +601,6 @@ out: qdisc_skb_cb(skb)->pkt_len = pkt_len; } -void qdisc_warn_nonwc(const char *txt, struct Qdisc *qdisc) -{ - if (!(qdisc->flags & TCQ_F_WARN_NONWC)) { - pr_warn("%s: %s qdisc %X: is non-work-conserving?\n", - txt, qdisc->ops->id, qdisc->handle >> 16); - qdisc->flags |= TCQ_F_WARN_NONWC; - } -} -EXPORT_SYMBOL(qdisc_warn_nonwc); - static enum hrtimer_restart qdisc_watchdog(struct hrtimer *timer) { struct qdisc_watchdog *wd = container_of(timer, struct qdisc_watchdog, @@ -1490,7 +1485,7 @@ static int __tc_get_qdisc(struct sk_buff *skb, struct nlmsghdr *n, NL_SET_ERR_MSG(extack, "Failed to find qdisc with specified classid"); return -ENOENT; } - q = qdisc_leaf(p, clid); + q = qdisc_leaf(p, clid, extack); } else if (dev_ingress_queue(dev)) { q = rtnl_dereference(dev_ingress_queue(dev)->qdisc_sleeping); } @@ -1501,6 +1496,8 @@ static int __tc_get_qdisc(struct sk_buff *skb, struct nlmsghdr *n, NL_SET_ERR_MSG(extack, "Cannot find specified qdisc on specified device"); return -ENOENT; } + if (IS_ERR(q)) + return PTR_ERR(q); if (tcm->tcm_handle && q->handle != tcm->tcm_handle) { NL_SET_ERR_MSG(extack, "Invalid handle"); @@ -1602,7 +1599,9 @@ static int __tc_modify_qdisc(struct sk_buff *skb, struct nlmsghdr *n, NL_SET_ERR_MSG(extack, "Failed to find specified qdisc"); return -ENOENT; } - q = qdisc_leaf(p, clid); + q = qdisc_leaf(p, clid, extack); + if (IS_ERR(q)) + return PTR_ERR(q); } else if (dev_ingress_queue_create(dev)) { q = rtnl_dereference(dev_ingress_queue(dev)->qdisc_sleeping); } diff --git a/net/sched/sch_hfsc.c b/net/sched/sch_hfsc.c index 5a7745170e84..d8fd35da32a7 100644 --- a/net/sched/sch_hfsc.c +++ b/net/sched/sch_hfsc.c @@ -835,22 +835,6 @@ update_vf(struct hfsc_class *cl, unsigned int len, u64 cur_time) } } -static unsigned int -qdisc_peek_len(struct Qdisc *sch) -{ - struct sk_buff *skb; - unsigned int len; - - skb = sch->ops->peek(sch); - if (unlikely(skb == NULL)) { - qdisc_warn_nonwc("qdisc_peek_len", sch); - return 0; - } - len = qdisc_pkt_len(skb); - - return len; -} - static void hfsc_adjust_levels(struct hfsc_class *cl) { diff --git a/net/sched/sch_htb.c b/net/sched/sch_htb.c index 14bf71f57057..c968ea763774 100644 --- a/net/sched/sch_htb.c +++ b/net/sched/sch_htb.c @@ -821,7 +821,9 @@ static struct htb_class *htb_lookup_leaf(struct htb_prio *hprio, const int prio) u32 *pid; } stk[TC_HTB_MAXDEPTH], *sp = stk; - BUG_ON(!hprio->row.rb_node); + if (unlikely(!hprio->row.rb_node)) + return NULL; + sp->root = hprio->row.rb_node; sp->pptr = &hprio->ptr; sp->pid = &hprio->last_ptr_id; diff --git a/net/sched/sch_qfq.c b/net/sched/sch_qfq.c index bf1282cb22eb..2255355e51d3 100644 --- a/net/sched/sch_qfq.c +++ b/net/sched/sch_qfq.c @@ -412,7 +412,7 @@ static int qfq_change_class(struct Qdisc *sch, u32 classid, u32 parentid, bool existing = false; struct nlattr *tb[TCA_QFQ_MAX + 1]; struct qfq_aggregate *new_agg = NULL; - u32 weight, lmax, inv_w; + u32 weight, lmax, inv_w, old_weight, old_lmax; int err; int delta_w; @@ -443,12 +443,16 @@ static int qfq_change_class(struct Qdisc *sch, u32 classid, u32 parentid, inv_w = ONE_FP / weight; weight = ONE_FP / inv_w; - if (cl != NULL && - lmax == cl->agg->lmax && - weight == cl->agg->class_weight) - return 0; /* nothing to change */ + if (cl != NULL) { + sch_tree_lock(sch); + old_weight = cl->agg->class_weight; + old_lmax = cl->agg->lmax; + sch_tree_unlock(sch); + if (lmax == old_lmax && weight == old_weight) + return 0; /* nothing to change */ + } - delta_w = weight - (cl ? cl->agg->class_weight : 0); + delta_w = weight - (cl ? old_weight : 0); if (q->wsum + delta_w > QFQ_MAX_WSUM) { NL_SET_ERR_MSG_FMT_MOD(extack, @@ -532,9 +536,6 @@ destroy_class: static void qfq_destroy_class(struct Qdisc *sch, struct qfq_class *cl) { - struct qfq_sched *q = qdisc_priv(sch); - - qfq_rm_from_agg(q, cl); gen_kill_estimator(&cl->rate_est); qdisc_put(cl->qdisc); kfree(cl); @@ -555,6 +556,7 @@ static int qfq_delete_class(struct Qdisc *sch, unsigned long arg, qdisc_purge_queue(cl->qdisc); qdisc_class_hash_remove(&q->clhash, &cl->common); + qfq_rm_from_agg(q, cl); sch_tree_unlock(sch); @@ -625,6 +627,7 @@ static int qfq_dump_class(struct Qdisc *sch, unsigned long arg, { struct qfq_class *cl = (struct qfq_class *)arg; struct nlattr *nest; + u32 class_weight, lmax; tcm->tcm_parent = TC_H_ROOT; tcm->tcm_handle = cl->common.classid; @@ -633,8 +636,13 @@ static int qfq_dump_class(struct Qdisc *sch, unsigned long arg, nest = nla_nest_start_noflag(skb, TCA_OPTIONS); if (nest == NULL) goto nla_put_failure; - if (nla_put_u32(skb, TCA_QFQ_WEIGHT, cl->agg->class_weight) || - nla_put_u32(skb, TCA_QFQ_LMAX, cl->agg->lmax)) + + sch_tree_lock(sch); + class_weight = cl->agg->class_weight; + lmax = cl->agg->lmax; + sch_tree_unlock(sch); + if (nla_put_u32(skb, TCA_QFQ_WEIGHT, class_weight) || + nla_put_u32(skb, TCA_QFQ_LMAX, lmax)) goto nla_put_failure; return nla_nest_end(skb, nest); @@ -651,8 +659,10 @@ static int qfq_dump_class_stats(struct Qdisc *sch, unsigned long arg, memset(&xstats, 0, sizeof(xstats)); + sch_tree_lock(sch); xstats.weight = cl->agg->class_weight; xstats.lmax = cl->agg->lmax; + sch_tree_unlock(sch); if (gnet_stats_copy_basic(d, NULL, &cl->bstats, true) < 0 || gnet_stats_copy_rate_est(d, &cl->rate_est) < 0 || @@ -989,7 +999,7 @@ static struct sk_buff *agg_dequeue(struct qfq_aggregate *agg, if (cl->qdisc->q.qlen == 0) /* no more packets, remove from list */ list_del_init(&cl->alist); - else if (cl->deficit < qdisc_pkt_len(cl->qdisc->ops->peek(cl->qdisc))) { + else if (cl->deficit < qdisc_peek_len(cl->qdisc)) { cl->deficit += agg->lmax; list_move_tail(&cl->alist, &agg->active); } @@ -1491,6 +1501,7 @@ static void qfq_destroy_qdisc(struct Qdisc *sch) for (i = 0; i < q->clhash.hashsize; i++) { hlist_for_each_entry_safe(cl, next, &q->clhash.hash[i], common.hnode) { + qfq_rm_from_agg(q, cl); qfq_destroy_class(sch, cl); } } diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c index 3760131f1484..1882bab8e00e 100644 --- a/net/smc/af_smc.c +++ b/net/smc/af_smc.c @@ -30,6 +30,10 @@ #include <linux/splice.h> #include <net/sock.h> +#include <net/inet_common.h> +#if IS_ENABLED(CONFIG_IPV6) +#include <net/ipv6.h> +#endif #include <net/tcp.h> #include <net/smc.h> #include <asm/ioctls.h> @@ -360,6 +364,16 @@ static void smc_destruct(struct sock *sk) return; if (!sock_flag(sk, SOCK_DEAD)) return; + switch (sk->sk_family) { + case AF_INET: + inet_sock_destruct(sk); + break; +#if IS_ENABLED(CONFIG_IPV6) + case AF_INET6: + inet6_sock_destruct(sk); + break; +#endif + } } static struct lock_class_key smc_key; diff --git a/net/smc/smc.h b/net/smc/smc.h index 78ae10d06ed2..2c9084963739 100644 --- a/net/smc/smc.h +++ b/net/smc/smc.h @@ -283,10 +283,10 @@ struct smc_connection { }; struct smc_sock { /* smc sock container */ - struct sock sk; -#if IS_ENABLED(CONFIG_IPV6) - struct ipv6_pinfo *pinet6; -#endif + union { + struct sock sk; + struct inet_sock icsk_inet; + }; struct socket *clcsock; /* internal tcp socket */ void (*clcsk_state_change)(struct sock *sk); /* original stat_change fct. */ diff --git a/net/sunrpc/auth_gss/auth_gss.c b/net/sunrpc/auth_gss/auth_gss.c index 7b943fbafcc3..5c095cb8cb20 100644 --- a/net/sunrpc/auth_gss/auth_gss.c +++ b/net/sunrpc/auth_gss/auth_gss.c @@ -887,25 +887,16 @@ static void gss_pipe_dentry_destroy(struct dentry *dir, struct rpc_pipe_dir_object *pdo) { struct gss_pipe *gss_pipe = pdo->pdo_data; - struct rpc_pipe *pipe = gss_pipe->pipe; - if (pipe->dentry != NULL) { - rpc_unlink(pipe->dentry); - pipe->dentry = NULL; - } + rpc_unlink(gss_pipe->pipe); } static int gss_pipe_dentry_create(struct dentry *dir, struct rpc_pipe_dir_object *pdo) { struct gss_pipe *p = pdo->pdo_data; - struct dentry *dentry; - dentry = rpc_mkpipe_dentry(dir, p->name, p->clnt, p->pipe); - if (IS_ERR(dentry)) - return PTR_ERR(dentry); - p->pipe->dentry = dentry; - return 0; + return rpc_mkpipe_dentry(dir, p->name, p->clnt, p->pipe); } static const struct rpc_pipe_dir_object_ops gss_pipe_dir_object_ops = { diff --git a/net/sunrpc/auth_gss/svcauth_gss.c b/net/sunrpc/auth_gss/svcauth_gss.c index 73a90ad873fb..e82212f6b562 100644 --- a/net/sunrpc/auth_gss/svcauth_gss.c +++ b/net/sunrpc/auth_gss/svcauth_gss.c @@ -1628,7 +1628,7 @@ svcauth_gss_accept(struct svc_rqst *rqstp) int ret; struct sunrpc_net *sn = net_generic(SVC_NET(rqstp), sunrpc_net_id); - rqstp->rq_auth_stat = rpc_autherr_badcred; + rqstp->rq_auth_stat = rpc_autherr_failed; if (!svcdata) svcdata = kmalloc(sizeof(*svcdata), GFP_KERNEL); if (!svcdata) @@ -1638,6 +1638,7 @@ svcauth_gss_accept(struct svc_rqst *rqstp) svcdata->rsci = NULL; gc = &svcdata->clcred; + rqstp->rq_auth_stat = rpc_autherr_badcred; if (!svcauth_gss_decode_credbody(&rqstp->rq_arg_stream, gc, &rpcstart)) goto auth_err; if (gc->gc_v != RPC_GSS_VERSION) diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index 21426c3049d3..8ca354ecfd02 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -112,47 +112,46 @@ static void rpc_clnt_remove_pipedir(struct rpc_clnt *clnt) } } -static struct dentry *rpc_setup_pipedir_sb(struct super_block *sb, +static int rpc_setup_pipedir_sb(struct super_block *sb, struct rpc_clnt *clnt) { static uint32_t clntid; const char *dir_name = clnt->cl_program->pipe_dir_name; char name[15]; - struct dentry *dir, *dentry; + struct dentry *dir; + int err; dir = rpc_d_lookup_sb(sb, dir_name); if (dir == NULL) { pr_info("RPC: pipefs directory doesn't exist: %s\n", dir_name); - return dir; + return -ENOENT; } for (;;) { snprintf(name, sizeof(name), "clnt%x", (unsigned int)clntid++); name[sizeof(name) - 1] = '\0'; - dentry = rpc_create_client_dir(dir, name, clnt); - if (!IS_ERR(dentry)) + err = rpc_create_client_dir(dir, name, clnt); + if (!err) break; - if (dentry == ERR_PTR(-EEXIST)) + if (err == -EEXIST) continue; printk(KERN_INFO "RPC: Couldn't create pipefs entry" - " %s/%s, error %ld\n", - dir_name, name, PTR_ERR(dentry)); + " %s/%s, error %d\n", + dir_name, name, err); break; } dput(dir); - return dentry; + return err; } static int rpc_setup_pipedir(struct super_block *pipefs_sb, struct rpc_clnt *clnt) { - struct dentry *dentry; - clnt->pipefs_sb = pipefs_sb; if (clnt->cl_program->pipe_dir_name != NULL) { - dentry = rpc_setup_pipedir_sb(pipefs_sb, clnt); - if (IS_ERR(dentry)) - return PTR_ERR(dentry); + int err = rpc_setup_pipedir_sb(pipefs_sb, clnt); + if (err && err != -ENOENT) + return err; } return 0; } @@ -180,16 +179,9 @@ static int rpc_clnt_skip_event(struct rpc_clnt *clnt, unsigned long event) static int __rpc_clnt_handle_event(struct rpc_clnt *clnt, unsigned long event, struct super_block *sb) { - struct dentry *dentry; - switch (event) { case RPC_PIPEFS_MOUNT: - dentry = rpc_setup_pipedir_sb(sb, clnt); - if (!dentry) - return -ENOENT; - if (IS_ERR(dentry)) - return PTR_ERR(dentry); - break; + return rpc_setup_pipedir_sb(sb, clnt); case RPC_PIPEFS_UMOUNT: __rpc_clnt_remove_pipedir(clnt); break; diff --git a/net/sunrpc/rpc_pipe.c b/net/sunrpc/rpc_pipe.c index 98f78cd55905..0bd1df2ebb47 100644 --- a/net/sunrpc/rpc_pipe.c +++ b/net/sunrpc/rpc_pipe.c @@ -168,8 +168,9 @@ rpc_inode_setowner(struct inode *inode, void *private) } static void -rpc_close_pipes(struct inode *inode) +rpc_close_pipes(struct dentry *dentry) { + struct inode *inode = dentry->d_inode; struct rpc_pipe *pipe = RPC_I(inode)->pipe; int need_release; LIST_HEAD(free_list); @@ -484,60 +485,6 @@ rpc_get_inode(struct super_block *sb, umode_t mode) return inode; } -static int __rpc_create_common(struct inode *dir, struct dentry *dentry, - umode_t mode, - const struct file_operations *i_fop, - void *private) -{ - struct inode *inode; - - d_drop(dentry); - inode = rpc_get_inode(dir->i_sb, mode); - if (!inode) - goto out_err; - inode->i_ino = iunique(dir->i_sb, 100); - if (i_fop) - inode->i_fop = i_fop; - if (private) - rpc_inode_setowner(inode, private); - d_add(dentry, inode); - return 0; -out_err: - printk(KERN_WARNING "%s: %s failed to allocate inode for dentry %pd\n", - __FILE__, __func__, dentry); - dput(dentry); - return -ENOMEM; -} - -static int __rpc_create(struct inode *dir, struct dentry *dentry, - umode_t mode, - const struct file_operations *i_fop, - void *private) -{ - int err; - - err = __rpc_create_common(dir, dentry, S_IFREG | mode, i_fop, private); - if (err) - return err; - fsnotify_create(dir, dentry); - return 0; -} - -static int __rpc_mkdir(struct inode *dir, struct dentry *dentry, - umode_t mode, - const struct file_operations *i_fop, - void *private) -{ - int err; - - err = __rpc_create_common(dir, dentry, S_IFDIR | mode, i_fop, private); - if (err) - return err; - inc_nlink(dir); - fsnotify_mkdir(dir, dentry); - return 0; -} - static void init_pipe(struct rpc_pipe *pipe) { @@ -574,119 +521,60 @@ struct rpc_pipe *rpc_mkpipe_data(const struct rpc_pipe_ops *ops, int flags) } EXPORT_SYMBOL_GPL(rpc_mkpipe_data); -static int __rpc_mkpipe_dentry(struct inode *dir, struct dentry *dentry, - umode_t mode, - const struct file_operations *i_fop, - void *private, - struct rpc_pipe *pipe) +static int rpc_new_file(struct dentry *parent, + const char *name, + umode_t mode, + const struct file_operations *i_fop, + void *private) { - struct rpc_inode *rpci; - int err; + struct dentry *dentry = simple_start_creating(parent, name); + struct inode *dir = parent->d_inode; + struct inode *inode; - err = __rpc_create_common(dir, dentry, S_IFIFO | mode, i_fop, private); - if (err) - return err; - rpci = RPC_I(d_inode(dentry)); - rpci->private = private; - rpci->pipe = pipe; + if (IS_ERR(dentry)) + return PTR_ERR(dentry); + + inode = rpc_get_inode(dir->i_sb, S_IFREG | mode); + if (unlikely(!inode)) { + dput(dentry); + inode_unlock(dir); + return -ENOMEM; + } + inode->i_ino = iunique(dir->i_sb, 100); + if (i_fop) + inode->i_fop = i_fop; + rpc_inode_setowner(inode, private); + d_instantiate(dentry, inode); fsnotify_create(dir, dentry); + inode_unlock(dir); return 0; } -static int __rpc_rmdir(struct inode *dir, struct dentry *dentry) -{ - int ret; - - dget(dentry); - ret = simple_rmdir(dir, dentry); - d_drop(dentry); - if (!ret) - fsnotify_rmdir(dir, dentry); - dput(dentry); - return ret; -} - -static int __rpc_unlink(struct inode *dir, struct dentry *dentry) -{ - int ret; - - dget(dentry); - ret = simple_unlink(dir, dentry); - d_drop(dentry); - if (!ret) - fsnotify_unlink(dir, dentry); - dput(dentry); - return ret; -} - -static int __rpc_rmpipe(struct inode *dir, struct dentry *dentry) +static struct dentry *rpc_new_dir(struct dentry *parent, + const char *name, + umode_t mode) { - struct inode *inode = d_inode(dentry); - - rpc_close_pipes(inode); - return __rpc_unlink(dir, dentry); -} + struct dentry *dentry = simple_start_creating(parent, name); + struct inode *dir = parent->d_inode; + struct inode *inode; -static struct dentry *__rpc_lookup_create_exclusive(struct dentry *parent, - const char *name) -{ - struct qstr q = QSTR(name); - struct dentry *dentry = try_lookup_noperm(&q, parent); - if (!dentry) { - dentry = d_alloc(parent, &q); - if (!dentry) - return ERR_PTR(-ENOMEM); - } - if (d_really_is_negative(dentry)) + if (IS_ERR(dentry)) return dentry; - dput(dentry); - return ERR_PTR(-EEXIST); -} -/* - * FIXME: This probably has races. - */ -static void __rpc_depopulate(struct dentry *parent, - const struct rpc_filelist *files, - int start, int eof) -{ - struct inode *dir = d_inode(parent); - struct dentry *dentry; - struct qstr name; - int i; - - for (i = start; i < eof; i++) { - name.name = files[i].name; - name.len = strlen(files[i].name); - dentry = try_lookup_noperm(&name, parent); - - if (dentry == NULL) - continue; - if (d_really_is_negative(dentry)) - goto next; - switch (d_inode(dentry)->i_mode & S_IFMT) { - default: - BUG(); - case S_IFREG: - __rpc_unlink(dir, dentry); - break; - case S_IFDIR: - __rpc_rmdir(dir, dentry); - } -next: + inode = rpc_get_inode(dir->i_sb, S_IFDIR | mode); + if (unlikely(!inode)) { dput(dentry); + inode_unlock(dir); + return ERR_PTR(-ENOMEM); } -} -static void rpc_depopulate(struct dentry *parent, - const struct rpc_filelist *files, - int start, int eof) -{ - struct inode *dir = d_inode(parent); - - inode_lock_nested(dir, I_MUTEX_CHILD); - __rpc_depopulate(parent, files, start, eof); + inode->i_ino = iunique(dir->i_sb, 100); + inc_nlink(dir); + d_instantiate(dentry, inode); + fsnotify_mkdir(dir, dentry); inode_unlock(dir); + + return dentry; } static int rpc_populate(struct dentry *parent, @@ -694,92 +582,39 @@ static int rpc_populate(struct dentry *parent, int start, int eof, void *private) { - struct inode *dir = d_inode(parent); struct dentry *dentry; int i, err; - inode_lock(dir); for (i = start; i < eof; i++) { - dentry = __rpc_lookup_create_exclusive(parent, files[i].name); - err = PTR_ERR(dentry); - if (IS_ERR(dentry)) - goto out_bad; switch (files[i].mode & S_IFMT) { default: BUG(); case S_IFREG: - err = __rpc_create(dir, dentry, + err = rpc_new_file(parent, + files[i].name, files[i].mode, files[i].i_fop, private); + if (err) + goto out_bad; break; case S_IFDIR: - err = __rpc_mkdir(dir, dentry, - files[i].mode, - NULL, - private); + dentry = rpc_new_dir(parent, + files[i].name, + files[i].mode); + if (IS_ERR(dentry)) { + err = PTR_ERR(dentry); + goto out_bad; + } } - if (err != 0) - goto out_bad; } - inode_unlock(dir); return 0; out_bad: - __rpc_depopulate(parent, files, start, eof); - inode_unlock(dir); printk(KERN_WARNING "%s: %s failed to populate directory %pd\n", __FILE__, __func__, parent); return err; } -static struct dentry *rpc_mkdir_populate(struct dentry *parent, - const char *name, umode_t mode, void *private, - int (*populate)(struct dentry *, void *), void *args_populate) -{ - struct dentry *dentry; - struct inode *dir = d_inode(parent); - int error; - - inode_lock_nested(dir, I_MUTEX_PARENT); - dentry = __rpc_lookup_create_exclusive(parent, name); - if (IS_ERR(dentry)) - goto out; - error = __rpc_mkdir(dir, dentry, mode, NULL, private); - if (error != 0) - goto out_err; - if (populate != NULL) { - error = populate(dentry, args_populate); - if (error) - goto err_rmdir; - } -out: - inode_unlock(dir); - return dentry; -err_rmdir: - __rpc_rmdir(dir, dentry); -out_err: - dentry = ERR_PTR(error); - goto out; -} - -static int rpc_rmdir_depopulate(struct dentry *dentry, - void (*depopulate)(struct dentry *)) -{ - struct dentry *parent; - struct inode *dir; - int error; - - parent = dget_parent(dentry); - dir = d_inode(parent); - inode_lock_nested(dir, I_MUTEX_PARENT); - if (depopulate != NULL) - depopulate(dentry); - error = __rpc_rmdir(dir, dentry); - inode_unlock(dir); - dput(parent); - return error; -} - /** * rpc_mkpipe_dentry - make an rpc_pipefs file for kernel<->userspace * communication @@ -799,11 +634,13 @@ static int rpc_rmdir_depopulate(struct dentry *dentry, * The @private argument passed here will be available to all these methods * from the file pointer, via RPC_I(file_inode(file))->private. */ -struct dentry *rpc_mkpipe_dentry(struct dentry *parent, const char *name, +int rpc_mkpipe_dentry(struct dentry *parent, const char *name, void *private, struct rpc_pipe *pipe) { - struct dentry *dentry; struct inode *dir = d_inode(parent); + struct dentry *dentry; + struct inode *inode; + struct rpc_inode *rpci; umode_t umode = S_IFIFO | 0600; int err; @@ -812,48 +649,53 @@ struct dentry *rpc_mkpipe_dentry(struct dentry *parent, const char *name, if (pipe->ops->downcall == NULL) umode &= ~0222; - inode_lock_nested(dir, I_MUTEX_PARENT); - dentry = __rpc_lookup_create_exclusive(parent, name); - if (IS_ERR(dentry)) - goto out; - err = __rpc_mkpipe_dentry(dir, dentry, umode, &rpc_pipe_fops, - private, pipe); - if (err) - goto out_err; -out: + dentry = simple_start_creating(parent, name); + if (IS_ERR(dentry)) { + err = PTR_ERR(dentry); + goto failed; + } + + inode = rpc_get_inode(dir->i_sb, umode); + if (unlikely(!inode)) { + dput(dentry); + inode_unlock(dir); + err = -ENOMEM; + goto failed; + } + inode->i_ino = iunique(dir->i_sb, 100); + inode->i_fop = &rpc_pipe_fops; + rpci = RPC_I(inode); + rpci->private = private; + rpci->pipe = pipe; + rpc_inode_setowner(inode, private); + d_instantiate(dentry, inode); + pipe->dentry = dentry; + fsnotify_create(dir, dentry); inode_unlock(dir); - return dentry; -out_err: - dentry = ERR_PTR(err); - printk(KERN_WARNING "%s: %s() failed to create pipe %pd/%s (errno = %d)\n", - __FILE__, __func__, parent, name, - err); - goto out; + return 0; + +failed: + pr_warn("%s() failed to create pipe %pd/%s (errno = %d)\n", + __func__, parent, name, err); + return err; } EXPORT_SYMBOL_GPL(rpc_mkpipe_dentry); /** * rpc_unlink - remove a pipe - * @dentry: dentry for the pipe, as returned from rpc_mkpipe + * @pipe: the pipe to be removed * * After this call, lookups will no longer find the pipe, and any * attempts to read or write using preexisting opens of the pipe will * return -EPIPE. */ -int -rpc_unlink(struct dentry *dentry) +void +rpc_unlink(struct rpc_pipe *pipe) { - struct dentry *parent; - struct inode *dir; - int error = 0; - - parent = dget_parent(dentry); - dir = d_inode(parent); - inode_lock_nested(dir, I_MUTEX_PARENT); - error = __rpc_rmpipe(dir, dentry); - inode_unlock(dir); - dput(parent); - return error; + if (pipe->dentry) { + simple_recursive_removal(pipe->dentry, rpc_close_pipes); + pipe->dentry = NULL; + } } EXPORT_SYMBOL_GPL(rpc_unlink); @@ -1010,31 +852,6 @@ rpc_destroy_pipe_dir_objects(struct rpc_pipe_dir_head *pdh) pdo->pdo_ops->destroy(dir, pdo); } -enum { - RPCAUTH_info, - RPCAUTH_EOF -}; - -static const struct rpc_filelist authfiles[] = { - [RPCAUTH_info] = { - .name = "info", - .i_fop = &rpc_info_operations, - .mode = S_IFREG | 0400, - }, -}; - -static int rpc_clntdir_populate(struct dentry *dentry, void *private) -{ - return rpc_populate(dentry, - authfiles, RPCAUTH_info, RPCAUTH_EOF, - private); -} - -static void rpc_clntdir_depopulate(struct dentry *dentry) -{ - rpc_depopulate(dentry, authfiles, RPCAUTH_info, RPCAUTH_EOF); -} - /** * rpc_create_client_dir - Create a new rpc_client directory in rpc_pipefs * @dentry: the parent of new directory @@ -1046,19 +863,27 @@ static void rpc_clntdir_depopulate(struct dentry *dentry) * information about the client, together with any "pipes" that may * later be created using rpc_mkpipe(). */ -struct dentry *rpc_create_client_dir(struct dentry *dentry, - const char *name, - struct rpc_clnt *rpc_client) +int rpc_create_client_dir(struct dentry *dentry, + const char *name, + struct rpc_clnt *rpc_client) { struct dentry *ret; + int err; - ret = rpc_mkdir_populate(dentry, name, 0555, NULL, - rpc_clntdir_populate, rpc_client); - if (!IS_ERR(ret)) { - rpc_client->cl_pipedir_objects.pdh_dentry = ret; - rpc_create_pipe_dir_objects(&rpc_client->cl_pipedir_objects); + ret = rpc_new_dir(dentry, name, 0555); + if (IS_ERR(ret)) + return PTR_ERR(ret); + err = rpc_new_file(ret, "info", S_IFREG | 0400, + &rpc_info_operations, rpc_client); + if (err) { + pr_warn("%s failed to populate directory %pd\n", + __func__, ret); + simple_recursive_removal(ret, NULL); + return err; } - return ret; + rpc_client->cl_pipedir_objects.pdh_dentry = ret; + rpc_create_pipe_dir_objects(&rpc_client->cl_pipedir_objects); + return 0; } /** @@ -1073,7 +898,8 @@ int rpc_remove_client_dir(struct rpc_clnt *rpc_client) return 0; rpc_destroy_pipe_dir_objects(&rpc_client->cl_pipedir_objects); rpc_client->cl_pipedir_objects.pdh_dentry = NULL; - return rpc_rmdir_depopulate(dentry, rpc_clntdir_depopulate); + simple_recursive_removal(dentry, NULL); + return 0; } static const struct rpc_filelist cache_pipefs_files[3] = { @@ -1094,28 +920,25 @@ static const struct rpc_filelist cache_pipefs_files[3] = { }, }; -static int rpc_cachedir_populate(struct dentry *dentry, void *private) -{ - return rpc_populate(dentry, - cache_pipefs_files, 0, 3, - private); -} - -static void rpc_cachedir_depopulate(struct dentry *dentry) -{ - rpc_depopulate(dentry, cache_pipefs_files, 0, 3); -} - struct dentry *rpc_create_cache_dir(struct dentry *parent, const char *name, umode_t umode, struct cache_detail *cd) { - return rpc_mkdir_populate(parent, name, umode, NULL, - rpc_cachedir_populate, cd); + struct dentry *dentry; + + dentry = rpc_new_dir(parent, name, umode); + if (!IS_ERR(dentry)) { + int error = rpc_populate(dentry, cache_pipefs_files, 0, 3, cd); + if (error) { + simple_recursive_removal(dentry, NULL); + return ERR_PTR(error); + } + } + return dentry; } void rpc_remove_cache_dir(struct dentry *dentry) { - rpc_rmdir_depopulate(dentry, rpc_cachedir_depopulate); + simple_recursive_removal(dentry, NULL); } /* @@ -1141,7 +964,6 @@ enum { RPCAUTH_nfsd4_cb, RPCAUTH_cache, RPCAUTH_nfsd, - RPCAUTH_gssd, RPCAUTH_RootEOF }; @@ -1178,10 +1000,6 @@ static const struct rpc_filelist files[] = { .name = "nfsd", .mode = S_IFDIR | 0555, }, - [RPCAUTH_gssd] = { - .name = "gssd", - .mode = S_IFDIR | 0555, - }, }; /* @@ -1241,13 +1059,6 @@ void rpc_put_sb_net(const struct net *net) } EXPORT_SYMBOL_GPL(rpc_put_sb_net); -static const struct rpc_filelist gssd_dummy_clnt_dir[] = { - [0] = { - .name = "clntXX", - .mode = S_IFDIR | 0555, - }, -}; - static ssize_t dummy_downcall(struct file *filp, const char __user *src, size_t len) { @@ -1276,14 +1087,6 @@ rpc_dummy_info_show(struct seq_file *m, void *v) } DEFINE_SHOW_ATTRIBUTE(rpc_dummy_info); -static const struct rpc_filelist gssd_dummy_info_file[] = { - [0] = { - .name = "info", - .i_fop = &rpc_dummy_info_fops, - .mode = S_IFREG | 0400, - }, -}; - /** * rpc_gssd_dummy_populate - create a dummy gssd pipe * @root: root of the rpc_pipefs filesystem @@ -1292,69 +1095,32 @@ static const struct rpc_filelist gssd_dummy_info_file[] = { * Create a dummy set of directories and a pipe that gssd can hold open to * indicate that it is up and running. */ -static struct dentry * +static int rpc_gssd_dummy_populate(struct dentry *root, struct rpc_pipe *pipe_data) { - int ret = 0; - struct dentry *gssd_dentry; - struct dentry *clnt_dentry = NULL; - struct dentry *pipe_dentry = NULL; - - /* We should never get this far if "gssd" doesn't exist */ - gssd_dentry = try_lookup_noperm(&QSTR(files[RPCAUTH_gssd].name), root); - if (!gssd_dentry) - return ERR_PTR(-ENOENT); - - ret = rpc_populate(gssd_dentry, gssd_dummy_clnt_dir, 0, 1, NULL); - if (ret) { - pipe_dentry = ERR_PTR(ret); - goto out; - } + struct dentry *gssd_dentry, *clnt_dentry; + int err; - clnt_dentry = try_lookup_noperm(&QSTR(gssd_dummy_clnt_dir[0].name), - gssd_dentry); - if (!clnt_dentry) { - __rpc_depopulate(gssd_dentry, gssd_dummy_clnt_dir, 0, 1); - pipe_dentry = ERR_PTR(-ENOENT); - goto out; - } + gssd_dentry = rpc_new_dir(root, "gssd", 0555); + if (IS_ERR(gssd_dentry)) + return -ENOENT; - ret = rpc_populate(clnt_dentry, gssd_dummy_info_file, 0, 1, NULL); - if (ret) { - __rpc_depopulate(gssd_dentry, gssd_dummy_clnt_dir, 0, 1); - pipe_dentry = ERR_PTR(ret); - goto out; - } + clnt_dentry = rpc_new_dir(gssd_dentry, "clntXX", 0555); + if (IS_ERR(clnt_dentry)) + return -ENOENT; - pipe_dentry = rpc_mkpipe_dentry(clnt_dentry, "gssd", NULL, pipe_data); - if (IS_ERR(pipe_dentry)) { - __rpc_depopulate(clnt_dentry, gssd_dummy_info_file, 0, 1); - __rpc_depopulate(gssd_dentry, gssd_dummy_clnt_dir, 0, 1); - } -out: - dput(clnt_dentry); - dput(gssd_dentry); - return pipe_dentry; -} - -static void -rpc_gssd_dummy_depopulate(struct dentry *pipe_dentry) -{ - struct dentry *clnt_dir = pipe_dentry->d_parent; - struct dentry *gssd_dir = clnt_dir->d_parent; - - dget(pipe_dentry); - __rpc_rmpipe(d_inode(clnt_dir), pipe_dentry); - __rpc_depopulate(clnt_dir, gssd_dummy_info_file, 0, 1); - __rpc_depopulate(gssd_dir, gssd_dummy_clnt_dir, 0, 1); - dput(pipe_dentry); + err = rpc_new_file(clnt_dentry, "info", 0400, + &rpc_dummy_info_fops, NULL); + if (!err) + err = rpc_mkpipe_dentry(clnt_dentry, "gssd", NULL, pipe_data); + return err; } static int rpc_fill_super(struct super_block *sb, struct fs_context *fc) { struct inode *inode; - struct dentry *root, *gssd_dentry; + struct dentry *root; struct net *net = sb->s_fs_info; struct sunrpc_net *sn = net_generic(net, sunrpc_net_id); int err; @@ -1363,7 +1129,7 @@ rpc_fill_super(struct super_block *sb, struct fs_context *fc) sb->s_blocksize_bits = PAGE_SHIFT; sb->s_magic = RPCAUTH_GSSMAGIC; sb->s_op = &s_ops; - sb->s_d_op = &simple_dentry_operations; + sb->s_d_flags = DCACHE_DONTCACHE; sb->s_time_gran = 1; inode = rpc_get_inode(sb, S_IFDIR | 0555); @@ -1373,11 +1139,9 @@ rpc_fill_super(struct super_block *sb, struct fs_context *fc) if (rpc_populate(root, files, RPCAUTH_lockd, RPCAUTH_RootEOF, NULL)) return -ENOMEM; - gssd_dentry = rpc_gssd_dummy_populate(root, sn->gssd_dummy); - if (IS_ERR(gssd_dentry)) { - __rpc_depopulate(root, files, RPCAUTH_lockd, RPCAUTH_RootEOF); - return PTR_ERR(gssd_dentry); - } + err = rpc_gssd_dummy_populate(root, sn->gssd_dummy); + if (err) + return err; dprintk("RPC: sending pipefs MOUNT notification for net %x%s\n", net->ns.inum, NET_NAME(net)); @@ -1386,18 +1150,6 @@ rpc_fill_super(struct super_block *sb, struct fs_context *fc) err = blocking_notifier_call_chain(&rpc_pipefs_notifier_list, RPC_PIPEFS_MOUNT, sb); - if (err) - goto err_depopulate; - mutex_unlock(&sn->pipefs_sb_lock); - return 0; - -err_depopulate: - rpc_gssd_dummy_depopulate(gssd_dentry); - blocking_notifier_call_chain(&rpc_pipefs_notifier_list, - RPC_PIPEFS_UMOUNT, - sb); - sn->pipefs_sb = NULL; - __rpc_depopulate(root, files, RPCAUTH_lockd, RPCAUTH_RootEOF); mutex_unlock(&sn->pipefs_sb_lock); return err; } diff --git a/net/sunrpc/socklib.c b/net/sunrpc/socklib.c index 1b2b84feeec6..4e92e2a50168 100644 --- a/net/sunrpc/socklib.c +++ b/net/sunrpc/socklib.c @@ -27,97 +27,60 @@ struct xdr_skb_reader { struct sk_buff *skb; unsigned int offset; + bool need_checksum; size_t count; __wsum csum; }; -typedef size_t (*xdr_skb_read_actor)(struct xdr_skb_reader *desc, void *to, - size_t len); - /** * xdr_skb_read_bits - copy some data bits from skb to internal buffer * @desc: sk_buff copy helper * @to: copy destination * @len: number of bytes to copy * - * Possibly called several times to iterate over an sk_buff and copy - * data out of it. + * Possibly called several times to iterate over an sk_buff and copy data out of + * it. */ static size_t xdr_skb_read_bits(struct xdr_skb_reader *desc, void *to, size_t len) { - if (len > desc->count) - len = desc->count; - if (unlikely(skb_copy_bits(desc->skb, desc->offset, to, len))) - return 0; - desc->count -= len; - desc->offset += len; - return len; -} + len = min(len, desc->count); + + if (desc->need_checksum) { + __wsum csum; + + csum = skb_copy_and_csum_bits(desc->skb, desc->offset, to, len); + desc->csum = csum_block_add(desc->csum, csum, desc->offset); + } else { + if (unlikely(skb_copy_bits(desc->skb, desc->offset, to, len))) + return 0; + } -/** - * xdr_skb_read_and_csum_bits - copy and checksum from skb to buffer - * @desc: sk_buff copy helper - * @to: copy destination - * @len: number of bytes to copy - * - * Same as skb_read_bits, but calculate a checksum at the same time. - */ -static size_t xdr_skb_read_and_csum_bits(struct xdr_skb_reader *desc, void *to, size_t len) -{ - unsigned int pos; - __wsum csum2; - - if (len > desc->count) - len = desc->count; - pos = desc->offset; - csum2 = skb_copy_and_csum_bits(desc->skb, pos, to, len); - desc->csum = csum_block_add(desc->csum, csum2, pos); desc->count -= len; desc->offset += len; return len; } -/** - * xdr_partial_copy_from_skb - copy data out of an skb - * @xdr: target XDR buffer - * @base: starting offset - * @desc: sk_buff copy helper - * @copy_actor: virtual method for copying data - * - */ static ssize_t -xdr_partial_copy_from_skb(struct xdr_buf *xdr, unsigned int base, struct xdr_skb_reader *desc, xdr_skb_read_actor copy_actor) +xdr_partial_copy_from_skb(struct xdr_buf *xdr, struct xdr_skb_reader *desc) { - struct page **ppage = xdr->pages; - unsigned int len, pglen = xdr->page_len; - ssize_t copied = 0; - size_t ret; - - len = xdr->head[0].iov_len; - if (base < len) { - len -= base; - ret = copy_actor(desc, (char *)xdr->head[0].iov_base + base, len); - copied += ret; - if (ret != len || !desc->count) - goto out; - base = 0; - } else - base -= len; - - if (unlikely(pglen == 0)) - goto copy_tail; - if (unlikely(base >= pglen)) { - base -= pglen; - goto copy_tail; - } - if (base || xdr->page_base) { - pglen -= base; - base += xdr->page_base; - ppage += base >> PAGE_SHIFT; - base &= ~PAGE_MASK; - } - do { + struct page **ppage = xdr->pages + (xdr->page_base >> PAGE_SHIFT); + unsigned int poff = xdr->page_base & ~PAGE_MASK; + unsigned int pglen = xdr->page_len; + ssize_t copied = 0; + size_t ret; + + if (xdr->head[0].iov_len == 0) + return 0; + + ret = xdr_skb_read_bits(desc, xdr->head[0].iov_base, + xdr->head[0].iov_len); + if (ret != xdr->head[0].iov_len || !desc->count) + return ret; + copied += ret; + + while (pglen) { + unsigned int len = min(PAGE_SIZE - poff, pglen); char *kaddr; /* ACL likes to be lazy in allocating pages - ACLs @@ -126,36 +89,29 @@ xdr_partial_copy_from_skb(struct xdr_buf *xdr, unsigned int base, struct xdr_skb *ppage = alloc_page(GFP_NOWAIT | __GFP_NOWARN); if (unlikely(*ppage == NULL)) { if (copied == 0) - copied = -ENOMEM; - goto out; + return -ENOMEM; + return copied; } } - len = PAGE_SIZE; kaddr = kmap_atomic(*ppage); - if (base) { - len -= base; - if (pglen < len) - len = pglen; - ret = copy_actor(desc, kaddr + base, len); - base = 0; - } else { - if (pglen < len) - len = pglen; - ret = copy_actor(desc, kaddr, len); - } + ret = xdr_skb_read_bits(desc, kaddr + poff, len); flush_dcache_page(*ppage); kunmap_atomic(kaddr); + copied += ret; if (ret != len || !desc->count) - goto out; + return copied; ppage++; - } while ((pglen -= len) != 0); -copy_tail: - len = xdr->tail[0].iov_len; - if (base < len) - copied += copy_actor(desc, (char *)xdr->tail[0].iov_base + base, len - base); -out: + pglen -= len; + poff = 0; + } + + if (xdr->tail[0].iov_len) { + copied += xdr_skb_read_bits(desc, xdr->tail[0].iov_base, + xdr->tail[0].iov_len); + } + return copied; } @@ -169,17 +125,22 @@ out: */ int csum_partial_copy_to_xdr(struct xdr_buf *xdr, struct sk_buff *skb) { - struct xdr_skb_reader desc; - - desc.skb = skb; - desc.offset = 0; - desc.count = skb->len - desc.offset; + struct xdr_skb_reader desc = { + .skb = skb, + .count = skb->len - desc.offset, + }; - if (skb_csum_unnecessary(skb)) - goto no_checksum; + if (skb_csum_unnecessary(skb)) { + if (xdr_partial_copy_from_skb(xdr, &desc) < 0) + return -1; + if (desc.count) + return -1; + return 0; + } + desc.need_checksum = true; desc.csum = csum_partial(skb->data, desc.offset, skb->csum); - if (xdr_partial_copy_from_skb(xdr, 0, &desc, xdr_skb_read_and_csum_bits) < 0) + if (xdr_partial_copy_from_skb(xdr, &desc) < 0) return -1; if (desc.offset != skb->len) { __wsum csum2; @@ -194,14 +155,7 @@ int csum_partial_copy_to_xdr(struct xdr_buf *xdr, struct sk_buff *skb) !skb->csum_complete_sw) netdev_rx_csum_fault(skb->dev, skb); return 0; -no_checksum: - if (xdr_partial_copy_from_skb(xdr, 0, &desc, xdr_skb_read_bits) < 0) - return -1; - if (desc.count) - return -1; - return 0; } -EXPORT_SYMBOL_GPL(csum_partial_copy_to_xdr); static inline int xprt_sendmsg(struct socket *sock, struct msghdr *msg, size_t seek) diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c index 9c93b854e809..b1fab3a69544 100644 --- a/net/sunrpc/svc.c +++ b/net/sunrpc/svc.c @@ -751,14 +751,16 @@ void svc_pool_wake_idle_thread(struct svc_pool *pool) WRITE_ONCE(rqstp->rq_qtime, ktime_get()); if (!task_is_running(rqstp->rq_task)) { wake_up_process(rqstp->rq_task); - trace_svc_wake_up(rqstp->rq_task->pid); + trace_svc_pool_thread_wake(pool, rqstp->rq_task->pid); percpu_counter_inc(&pool->sp_threads_woken); + } else { + trace_svc_pool_thread_running(pool, rqstp->rq_task->pid); } rcu_read_unlock(); return; } rcu_read_unlock(); - + trace_svc_pool_thread_noidle(pool, 0); } EXPORT_SYMBOL_GPL(svc_pool_wake_idle_thread); @@ -1332,6 +1334,9 @@ svc_process_common(struct svc_rqst *rqstp) int pr, rc; __be32 *p; + /* Reset the accept_stat for the RPC */ + rqstp->rq_accept_statp = NULL; + /* Will be turned off only when NFSv4 Sessions are used */ set_bit(RQ_USEDEFERRAL, &rqstp->rq_flags); clear_bit(RQ_DROPME, &rqstp->rq_flags); @@ -1373,8 +1378,6 @@ svc_process_common(struct svc_rqst *rqstp) case SVC_GARBAGE: rqstp->rq_auth_stat = rpc_autherr_badcred; goto err_bad_auth; - case SVC_SYSERR: - goto err_system_err; case SVC_DENIED: goto err_bad_auth; case SVC_CLOSE: @@ -1385,7 +1388,8 @@ svc_process_common(struct svc_rqst *rqstp) goto sendit; default: pr_warn_once("Unexpected svc_auth_status (%d)\n", auth_res); - goto err_system_err; + rqstp->rq_auth_stat = rpc_autherr_failed; + goto err_bad_auth; } if (progp == NULL) @@ -1512,12 +1516,6 @@ err_bad_proc: serv->sv_stats->rpcbadfmt++; *rqstp->rq_accept_statp = rpc_proc_unavail; goto sendit; - -err_system_err: - if (serv->sv_stats) - serv->sv_stats->rpcbadfmt++; - *rqstp->rq_accept_statp = rpc_system_err; - goto sendit; } /* diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c index e1c85123b445..46c156b121db 100644 --- a/net/sunrpc/svcsock.c +++ b/net/sunrpc/svcsock.c @@ -1197,7 +1197,7 @@ err_noclose: * that the pages backing @xdr are unchanging. */ static int svc_tcp_sendmsg(struct svc_sock *svsk, struct svc_rqst *rqstp, - rpc_fraghdr marker, unsigned int *sentp) + rpc_fraghdr marker, int *sentp) { struct msghdr msg = { .msg_flags = MSG_SPLICE_PAGES, @@ -1247,8 +1247,7 @@ static int svc_tcp_sendto(struct svc_rqst *rqstp) struct xdr_buf *xdr = &rqstp->rq_res; rpc_fraghdr marker = cpu_to_be32(RPC_LAST_STREAM_FRAGMENT | (u32)xdr->len); - unsigned int sent; - int err; + int sent, err; svc_tcp_release_ctxt(xprt, rqstp->rq_xprt_ctxt); rqstp->rq_xprt_ctxt = NULL; diff --git a/net/sunrpc/xdr.c b/net/sunrpc/xdr.c index 2ea00e354ba6..1346fdf33835 100644 --- a/net/sunrpc/xdr.c +++ b/net/sunrpc/xdr.c @@ -993,21 +993,18 @@ EXPORT_SYMBOL_GPL(xdr_init_encode); * xdr_init_encode_pages - Initialize an xdr_stream for encoding into pages * @xdr: pointer to xdr_stream struct * @buf: pointer to XDR buffer into which to encode data - * @pages: list of pages to decode into - * @rqst: pointer to controlling rpc_rqst, for debugging * */ -void xdr_init_encode_pages(struct xdr_stream *xdr, struct xdr_buf *buf, - struct page **pages, struct rpc_rqst *rqst) +void xdr_init_encode_pages(struct xdr_stream *xdr, struct xdr_buf *buf) { xdr_reset_scratch_buffer(xdr); xdr->buf = buf; - xdr->page_ptr = pages; + xdr->page_ptr = buf->pages; xdr->iov = NULL; - xdr->p = page_address(*pages); + xdr->p = page_address(*xdr->page_ptr); xdr->end = (void *)xdr->p + min_t(u32, buf->buflen, PAGE_SIZE); - xdr->rqst = rqst; + xdr->rqst = NULL; } EXPORT_SYMBOL_GPL(xdr_init_encode_pages); diff --git a/net/tipc/topsrv.c b/net/tipc/topsrv.c index 8ee0c07d00e9..ffe577bf6b51 100644 --- a/net/tipc/topsrv.c +++ b/net/tipc/topsrv.c @@ -704,8 +704,10 @@ static void tipc_topsrv_stop(struct net *net) for (id = 0; srv->idr_in_use; id++) { con = idr_find(&srv->conn_idr, id); if (con) { + conn_get(con); spin_unlock_bh(&srv->idr_lock); tipc_conn_close(con); + conn_put(con); spin_lock_bh(&srv->idr_lock); } } diff --git a/net/tls/tls_strp.c b/net/tls/tls_strp.c index 65b0da6fdf6a..095cf31bae0b 100644 --- a/net/tls/tls_strp.c +++ b/net/tls/tls_strp.c @@ -512,9 +512,8 @@ static int tls_strp_read_sock(struct tls_strparser *strp) if (inq < strp->stm.full_len) return tls_strp_read_copy(strp, true); + tls_strp_load_anchor_with_queue(strp, inq); if (!strp->stm.full_len) { - tls_strp_load_anchor_with_queue(strp, inq); - sz = tls_rx_msg_size(strp, strp->anchor); if (sz < 0) { tls_strp_abort_strp(strp, sz); diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c index 2e7a3034e965..1053662725f8 100644 --- a/net/vmw_vsock/af_vsock.c +++ b/net/vmw_vsock/af_vsock.c @@ -407,6 +407,8 @@ EXPORT_SYMBOL_GPL(vsock_enqueue_accept); static bool vsock_use_local_transport(unsigned int remote_cid) { + lockdep_assert_held(&vsock_register_mutex); + if (!transport_local) return false; @@ -464,6 +466,8 @@ int vsock_assign_transport(struct vsock_sock *vsk, struct vsock_sock *psk) remote_flags = vsk->remote_addr.svm_flags; + mutex_lock(&vsock_register_mutex); + switch (sk->sk_type) { case SOCK_DGRAM: new_transport = transport_dgram; @@ -479,12 +483,15 @@ int vsock_assign_transport(struct vsock_sock *vsk, struct vsock_sock *psk) new_transport = transport_h2g; break; default: - return -ESOCKTNOSUPPORT; + ret = -ESOCKTNOSUPPORT; + goto err; } if (vsk->transport) { - if (vsk->transport == new_transport) - return 0; + if (vsk->transport == new_transport) { + ret = 0; + goto err; + } /* transport->release() must be called with sock lock acquired. * This path can only be taken during vsock_connect(), where we @@ -508,8 +515,16 @@ int vsock_assign_transport(struct vsock_sock *vsk, struct vsock_sock *psk) /* We increase the module refcnt to prevent the transport unloading * while there are open sockets assigned to it. */ - if (!new_transport || !try_module_get(new_transport->module)) - return -ENODEV; + if (!new_transport || !try_module_get(new_transport->module)) { + ret = -ENODEV; + goto err; + } + + /* It's safe to release the mutex after a successful try_module_get(). + * Whichever transport `new_transport` points at, it won't go away until + * the last module_put() below or in vsock_deassign_transport(). + */ + mutex_unlock(&vsock_register_mutex); if (sk->sk_type == SOCK_SEQPACKET) { if (!new_transport->seqpacket_allow || @@ -528,12 +543,31 @@ int vsock_assign_transport(struct vsock_sock *vsk, struct vsock_sock *psk) vsk->transport = new_transport; return 0; +err: + mutex_unlock(&vsock_register_mutex); + return ret; } EXPORT_SYMBOL_GPL(vsock_assign_transport); +/* + * Provide safe access to static transport_{h2g,g2h,dgram,local} callbacks. + * Otherwise we may race with module removal. Do not use on `vsk->transport`. + */ +static u32 vsock_registered_transport_cid(const struct vsock_transport **transport) +{ + u32 cid = VMADDR_CID_ANY; + + mutex_lock(&vsock_register_mutex); + if (*transport) + cid = (*transport)->get_local_cid(); + mutex_unlock(&vsock_register_mutex); + + return cid; +} + bool vsock_find_cid(unsigned int cid) { - if (transport_g2h && cid == transport_g2h->get_local_cid()) + if (cid == vsock_registered_transport_cid(&transport_g2h)) return true; if (transport_h2g && cid == VMADDR_CID_HOST) @@ -2536,18 +2570,19 @@ static long vsock_dev_do_ioctl(struct file *filp, unsigned int cmd, void __user *ptr) { u32 __user *p = ptr; - u32 cid = VMADDR_CID_ANY; int retval = 0; + u32 cid; switch (cmd) { case IOCTL_VM_SOCKETS_GET_LOCAL_CID: /* To be compatible with the VMCI behavior, we prioritize the * guest CID instead of well-know host CID (VMADDR_CID_HOST). */ - if (transport_g2h) - cid = transport_g2h->get_local_cid(); - else if (transport_h2g) - cid = transport_h2g->get_local_cid(); + cid = vsock_registered_transport_cid(&transport_g2h); + if (cid == VMADDR_CID_ANY) + cid = vsock_registered_transport_cid(&transport_h2g); + if (cid == VMADDR_CID_ANY) + cid = vsock_registered_transport_cid(&transport_local); if (put_user(cid, p) != 0) retval = -EFAULT; diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index 85f139016da2..50202d170f3a 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -229,6 +229,7 @@ static int validate_beacon_head(const struct nlattr *attr, unsigned int len = nla_len(attr); const struct element *elem; const struct ieee80211_mgmt *mgmt = (void *)data; + const struct ieee80211_ext *ext; unsigned int fixedlen, hdrlen; bool s1g_bcn; @@ -237,8 +238,10 @@ static int validate_beacon_head(const struct nlattr *attr, s1g_bcn = ieee80211_is_s1g_beacon(mgmt->frame_control); if (s1g_bcn) { - fixedlen = offsetof(struct ieee80211_ext, - u.s1g_beacon.variable); + ext = (struct ieee80211_ext *)mgmt; + fixedlen = + offsetof(struct ieee80211_ext, u.s1g_beacon.variable) + + ieee80211_s1g_optional_len(ext->frame_control); hdrlen = offsetof(struct ieee80211_ext, u.s1g_beacon); } else { fixedlen = offsetof(struct ieee80211_mgmt, diff --git a/net/wireless/util.c b/net/wireless/util.c index ed868c0f7ca8..1ad5a6bdfd75 100644 --- a/net/wireless/util.c +++ b/net/wireless/util.c @@ -820,6 +820,52 @@ bool ieee80211_is_valid_amsdu(struct sk_buff *skb, u8 mesh_hdr) } EXPORT_SYMBOL(ieee80211_is_valid_amsdu); + +/* + * Detects if an MSDU frame was maliciously converted into an A-MSDU + * frame by an adversary. This is done by parsing the received frame + * as if it were a regular MSDU, even though the A-MSDU flag is set. + * + * For non-mesh interfaces, detection involves checking whether the + * payload, when interpreted as an MSDU, begins with a valid RFC1042 + * header. This is done by comparing the A-MSDU subheader's destination + * address to the start of the RFC1042 header. + * + * For mesh interfaces, the MSDU includes a 6-byte Mesh Control field + * and an optional variable-length Mesh Address Extension field before + * the RFC1042 header. The position of the RFC1042 header must therefore + * be calculated based on the mesh header length. + * + * Since this function intentionally parses an A-MSDU frame as an MSDU, + * it only assumes that the A-MSDU subframe header is present, and + * beyond this it performs its own bounds checks under the assumption + * that the frame is instead parsed as a non-aggregated MSDU. + */ +static bool +is_amsdu_aggregation_attack(struct ethhdr *eth, struct sk_buff *skb, + enum nl80211_iftype iftype) +{ + int offset; + + /* Non-mesh case can be directly compared */ + if (iftype != NL80211_IFTYPE_MESH_POINT) + return ether_addr_equal(eth->h_dest, rfc1042_header); + + offset = __ieee80211_get_mesh_hdrlen(eth->h_dest[0]); + if (offset == 6) { + /* Mesh case with empty address extension field */ + return ether_addr_equal(eth->h_source, rfc1042_header); + } else if (offset + ETH_ALEN <= skb->len) { + /* Mesh case with non-empty address extension field */ + u8 temp[ETH_ALEN]; + + skb_copy_bits(skb, offset, temp, ETH_ALEN); + return ether_addr_equal(temp, rfc1042_header); + } + + return false; +} + void ieee80211_amsdu_to_8023s(struct sk_buff *skb, struct sk_buff_head *list, const u8 *addr, enum nl80211_iftype iftype, const unsigned int extra_headroom, @@ -861,8 +907,10 @@ void ieee80211_amsdu_to_8023s(struct sk_buff *skb, struct sk_buff_head *list, /* the last MSDU has no padding */ if (subframe_len > remaining) goto purge; - /* mitigate A-MSDU aggregation injection attacks */ - if (ether_addr_equal(hdr.eth.h_dest, rfc1042_header)) + /* mitigate A-MSDU aggregation injection attacks, to be + * checked when processing first subframe (offset == 0). + */ + if (offset == 0 && is_amsdu_aggregation_attack(&hdr.eth, skb, iftype)) goto purge; offset += sizeof(struct ethhdr); diff --git a/net/xfrm/xfrm_device.c b/net/xfrm/xfrm_device.c index 81fd486b5e56..d2819baea414 100644 --- a/net/xfrm/xfrm_device.c +++ b/net/xfrm/xfrm_device.c @@ -305,7 +305,6 @@ int xfrm_dev_state_add(struct net *net, struct xfrm_state *x, return -EINVAL; } - xfrm_set_type_offload(x); if (!x->type_offload) { NL_SET_ERR_MSG(extack, "Type doesn't support offload"); dev_put(dev); diff --git a/net/xfrm/xfrm_interface_core.c b/net/xfrm/xfrm_interface_core.c index cb1e12740c87..330a05286a56 100644 --- a/net/xfrm/xfrm_interface_core.c +++ b/net/xfrm/xfrm_interface_core.c @@ -875,7 +875,7 @@ static int xfrmi_changelink(struct net_device *dev, struct nlattr *tb[], return -EINVAL; } - if (p.collect_md) { + if (p.collect_md || xi->p.collect_md) { NL_SET_ERR_MSG(extack, "collect_md can't be changed"); return -EINVAL; } @@ -886,11 +886,6 @@ static int xfrmi_changelink(struct net_device *dev, struct nlattr *tb[], } else { if (xi->dev != dev) return -EEXIST; - if (xi->p.collect_md) { - NL_SET_ERR_MSG(extack, - "device can't be changed to collect_md"); - return -EINVAL; - } } return xfrmi_update(xi, &p); diff --git a/net/xfrm/xfrm_ipcomp.c b/net/xfrm/xfrm_ipcomp.c index 907c3ccb440d..43fdc6ed8dd1 100644 --- a/net/xfrm/xfrm_ipcomp.c +++ b/net/xfrm/xfrm_ipcomp.c @@ -97,7 +97,7 @@ static int ipcomp_input_done2(struct sk_buff *skb, int err) struct ip_comp_hdr *ipch = ip_comp_hdr(skb); const int plen = skb->len; - skb_reset_transport_header(skb); + skb->transport_header = skb->network_header + sizeof(*ipch); return ipcomp_post_acomp(skb, err, 0) ?: skb->len < (plen + sizeof(ip_comp_hdr)) ? -EINVAL : @@ -313,7 +313,6 @@ void ipcomp_destroy(struct xfrm_state *x) struct ipcomp_data *ipcd = x->data; if (!ipcd) return; - xfrm_state_delete_tunnel(x); ipcomp_free_data(ipcd); kfree(ipcd); } diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c index 77cc418ad69e..97ff756191ba 100644 --- a/net/xfrm/xfrm_state.c +++ b/net/xfrm/xfrm_state.c @@ -424,11 +424,10 @@ void xfrm_unregister_type_offload(const struct xfrm_type_offload *type, } EXPORT_SYMBOL(xfrm_unregister_type_offload); -void xfrm_set_type_offload(struct xfrm_state *x) +void xfrm_set_type_offload(struct xfrm_state *x, bool try_load) { const struct xfrm_type_offload *type = NULL; struct xfrm_state_afinfo *afinfo; - bool try_load = true; retry: afinfo = xfrm_state_get_afinfo(x->props.family); @@ -593,7 +592,7 @@ void xfrm_state_free(struct xfrm_state *x) } EXPORT_SYMBOL(xfrm_state_free); -static void ___xfrm_state_destroy(struct xfrm_state *x) +static void xfrm_state_gc_destroy(struct xfrm_state *x) { if (x->mode_cbs && x->mode_cbs->destroy_state) x->mode_cbs->destroy_state(x); @@ -607,6 +606,7 @@ static void ___xfrm_state_destroy(struct xfrm_state *x) kfree(x->coaddr); kfree(x->replay_esn); kfree(x->preplay_esn); + xfrm_unset_type_offload(x); if (x->type) { x->type->destructor(x); xfrm_put_type(x->type); @@ -631,7 +631,7 @@ static void xfrm_state_gc_task(struct work_struct *work) synchronize_rcu(); hlist_for_each_entry_safe(x, tmp, &gc_list, gclist) - ___xfrm_state_destroy(x); + xfrm_state_gc_destroy(x); } static enum hrtimer_restart xfrm_timer_handler(struct hrtimer *me) @@ -780,8 +780,6 @@ void xfrm_dev_state_free(struct xfrm_state *x) struct xfrm_dev_offload *xso = &x->xso; struct net_device *dev = READ_ONCE(xso->dev); - xfrm_unset_type_offload(x); - if (dev && dev->xfrmdev_ops) { spin_lock_bh(&xfrm_state_dev_gc_lock); if (!hlist_unhashed(&x->dev_gclist)) @@ -797,22 +795,18 @@ void xfrm_dev_state_free(struct xfrm_state *x) } #endif -void __xfrm_state_destroy(struct xfrm_state *x, bool sync) +void __xfrm_state_destroy(struct xfrm_state *x) { WARN_ON(x->km.state != XFRM_STATE_DEAD); - if (sync) { - synchronize_rcu(); - ___xfrm_state_destroy(x); - } else { - spin_lock_bh(&xfrm_state_gc_lock); - hlist_add_head(&x->gclist, &xfrm_state_gc_list); - spin_unlock_bh(&xfrm_state_gc_lock); - schedule_work(&xfrm_state_gc_work); - } + spin_lock_bh(&xfrm_state_gc_lock); + hlist_add_head(&x->gclist, &xfrm_state_gc_list); + spin_unlock_bh(&xfrm_state_gc_lock); + schedule_work(&xfrm_state_gc_work); } EXPORT_SYMBOL(__xfrm_state_destroy); +static void xfrm_state_delete_tunnel(struct xfrm_state *x); int __xfrm_state_delete(struct xfrm_state *x) { struct net *net = xs_net(x); @@ -840,6 +834,8 @@ int __xfrm_state_delete(struct xfrm_state *x) xfrm_dev_state_delete(x); + xfrm_state_delete_tunnel(x); + /* All xfrm_state objects are created by xfrm_state_alloc. * The xfrm_state_alloc call gives a reference, and that * is what we are dropping here. @@ -921,7 +917,7 @@ xfrm_dev_state_flush_secctx_check(struct net *net, struct net_device *dev, bool } #endif -int xfrm_state_flush(struct net *net, u8 proto, bool task_valid, bool sync) +int xfrm_state_flush(struct net *net, u8 proto, bool task_valid) { int i, err = 0, cnt = 0; @@ -943,10 +939,7 @@ restart: err = xfrm_state_delete(x); xfrm_audit_state_delete(x, err ? 0 : 1, task_valid); - if (sync) - xfrm_state_put_sync(x); - else - xfrm_state_put(x); + xfrm_state_put(x); if (!err) cnt++; @@ -1307,14 +1300,8 @@ static void xfrm_hash_grow_check(struct net *net, int have_hash_collision) static void xfrm_state_look_at(struct xfrm_policy *pol, struct xfrm_state *x, const struct flowi *fl, unsigned short family, struct xfrm_state **best, int *acq_in_progress, - int *error) + int *error, unsigned int pcpu_id) { - /* We need the cpu id just as a lookup key, - * we don't require it to be stable. - */ - unsigned int pcpu_id = get_cpu(); - put_cpu(); - /* Resolution logic: * 1. There is a valid state with matching selector. Done. * 2. Valid state with inappropriate selector. Skip. @@ -1381,14 +1368,15 @@ xfrm_state_find(const xfrm_address_t *daddr, const xfrm_address_t *saddr, /* We need the cpu id just as a lookup key, * we don't require it to be stable. */ - pcpu_id = get_cpu(); - put_cpu(); + pcpu_id = raw_smp_processor_id(); to_put = NULL; sequence = read_seqcount_begin(&net->xfrm.xfrm_state_hash_generation); rcu_read_lock(); + xfrm_hash_ptrs_get(net, &state_ptrs); + hlist_for_each_entry_rcu(x, &pol->state_cache_list, state_cache) { if (x->props.family == encap_family && x->props.reqid == tmpl->reqid && @@ -1400,7 +1388,7 @@ xfrm_state_find(const xfrm_address_t *daddr, const xfrm_address_t *saddr, tmpl->id.proto == x->id.proto && (tmpl->id.spi == x->id.spi || !tmpl->id.spi)) xfrm_state_look_at(pol, x, fl, encap_family, - &best, &acquire_in_progress, &error); + &best, &acquire_in_progress, &error, pcpu_id); } if (best) @@ -1417,7 +1405,7 @@ xfrm_state_find(const xfrm_address_t *daddr, const xfrm_address_t *saddr, tmpl->id.proto == x->id.proto && (tmpl->id.spi == x->id.spi || !tmpl->id.spi)) xfrm_state_look_at(pol, x, fl, family, - &best, &acquire_in_progress, &error); + &best, &acquire_in_progress, &error, pcpu_id); } cached: @@ -1429,8 +1417,6 @@ cached: else if (acquire_in_progress) /* XXX: acquire_in_progress should not happen */ WARN_ON(1); - xfrm_hash_ptrs_get(net, &state_ptrs); - h = __xfrm_dst_hash(daddr, saddr, tmpl->reqid, encap_family, state_ptrs.hmask); hlist_for_each_entry_rcu(x, state_ptrs.bydst + h, bydst) { #ifdef CONFIG_XFRM_OFFLOAD @@ -1460,7 +1446,7 @@ cached: tmpl->id.proto == x->id.proto && (tmpl->id.spi == x->id.spi || !tmpl->id.spi)) xfrm_state_look_at(pol, x, fl, family, - &best, &acquire_in_progress, &error); + &best, &acquire_in_progress, &error, pcpu_id); } if (best || acquire_in_progress) goto found; @@ -1495,7 +1481,7 @@ cached: tmpl->id.proto == x->id.proto && (tmpl->id.spi == x->id.spi || !tmpl->id.spi)) xfrm_state_look_at(pol, x, fl, family, - &best, &acquire_in_progress, &error); + &best, &acquire_in_progress, &error, pcpu_id); } found: @@ -3077,20 +3063,17 @@ void xfrm_flush_gc(void) } EXPORT_SYMBOL(xfrm_flush_gc); -/* Temporarily located here until net/xfrm/xfrm_tunnel.c is created */ -void xfrm_state_delete_tunnel(struct xfrm_state *x) +static void xfrm_state_delete_tunnel(struct xfrm_state *x) { if (x->tunnel) { struct xfrm_state *t = x->tunnel; - if (atomic_read(&t->tunnel_users) == 2) + if (atomic_dec_return(&t->tunnel_users) == 1) xfrm_state_delete(t); - atomic_dec(&t->tunnel_users); - xfrm_state_put_sync(t); + xfrm_state_put(t); x->tunnel = NULL; } } -EXPORT_SYMBOL(xfrm_state_delete_tunnel); u32 xfrm_state_mtu(struct xfrm_state *x, int mtu) { @@ -3295,8 +3278,8 @@ void xfrm_state_fini(struct net *net) unsigned int sz; flush_work(&net->xfrm.state_hash_work); + xfrm_state_flush(net, IPSEC_PROTO_ANY, false); flush_work(&xfrm_state_gc_work); - xfrm_state_flush(net, 0, false, true); WARN_ON(!list_empty(&net->xfrm.state_all)); diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c index 59f258daf830..684239018bec 100644 --- a/net/xfrm/xfrm_user.c +++ b/net/xfrm/xfrm_user.c @@ -977,6 +977,7 @@ static struct xfrm_state *xfrm_state_construct(struct net *net, /* override default values from above */ xfrm_update_ae_params(x, attrs, 0); + xfrm_set_type_offload(x, attrs[XFRMA_OFFLOAD_DEV]); /* configure the hardware if offload is requested */ if (attrs[XFRMA_OFFLOAD_DEV]) { err = xfrm_dev_state_add(net, x, @@ -2634,7 +2635,7 @@ static int xfrm_flush_sa(struct sk_buff *skb, struct nlmsghdr *nlh, struct xfrm_usersa_flush *p = nlmsg_data(nlh); int err; - err = xfrm_state_flush(net, p->proto, true, false); + err = xfrm_state_flush(net, p->proto, true); if (err) { if (err == -ESRCH) /* empty table */ return 0; diff --git a/rust/Makefile b/rust/Makefile index 27dec7904c3a..115b63b7d1e3 100644 --- a/rust/Makefile +++ b/rust/Makefile @@ -194,6 +194,7 @@ quiet_cmd_rustdoc_test = RUSTDOC T $< RUST_MODFILE=test.rs \ OBJTREE=$(abspath $(objtree)) \ $(RUSTDOC) --test $(rust_common_flags) \ + -Zcrate-attr='feature(used_with_arg)' \ @$(objtree)/include/generated/rustc_cfg \ $(rustc_target_flags) $(rustdoc_test_target_flags) \ $(rustdoc_test_quiet) \ diff --git a/rust/kernel/drm/device.rs b/rust/kernel/drm/device.rs index 624d7a4c83ea..14c1aa402951 100644 --- a/rust/kernel/drm/device.rs +++ b/rust/kernel/drm/device.rs @@ -66,7 +66,7 @@ impl<T: drm::Driver> Device<T> { open: Some(drm::File::<T::File>::open_callback), postclose: Some(drm::File::<T::File>::postclose_callback), unload: None, - release: None, + release: Some(Self::release), master_set: None, master_drop: None, debugfs_init: None, @@ -162,6 +162,16 @@ impl<T: drm::Driver> Device<T> { // SAFETY: `ptr` is valid by the safety requirements of this function. unsafe { &*ptr.cast() } } + + extern "C" fn release(ptr: *mut bindings::drm_device) { + // SAFETY: `ptr` is a valid pointer to a `struct drm_device` and embedded in `Self`. + let this = unsafe { Self::from_drm_device(ptr) }; + + // SAFETY: + // - When `release` runs it is guaranteed that there is no further access to `this`. + // - `this` is valid for dropping. + unsafe { core::ptr::drop_in_place(this) }; + } } impl<T: drm::Driver> Deref for Device<T> { diff --git a/rust/kernel/drm/driver.rs b/rust/kernel/drm/driver.rs index acb638086131..af93d46d03d3 100644 --- a/rust/kernel/drm/driver.rs +++ b/rust/kernel/drm/driver.rs @@ -10,7 +10,6 @@ use crate::{ drm, error::{to_result, Result}, prelude::*, - str::CStr, types::ARef, }; use macros::vtable; diff --git a/rust/kernel/firmware.rs b/rust/kernel/firmware.rs index 2494c96e105f..4fe621f35716 100644 --- a/rust/kernel/firmware.rs +++ b/rust/kernel/firmware.rs @@ -202,7 +202,7 @@ macro_rules! module_firmware { }; #[link_section = ".modinfo"] - #[used] + #[used(compiler)] static __MODULE_FIRMWARE: [u8; $($builder)*::create(__MODULE_FIRMWARE_PREFIX) .build_length()] = $($builder)*::create(__MODULE_FIRMWARE_PREFIX).build(); }; diff --git a/rust/kernel/init.rs b/rust/kernel/init.rs index 8d228c237954..21ef202ab0db 100644 --- a/rust/kernel/init.rs +++ b/rust/kernel/init.rs @@ -231,14 +231,14 @@ macro_rules! try_init { ($(&$this:ident in)? $t:ident $(::<$($generics:ty),* $(,)?>)? { $($fields:tt)* }) => { - ::pin_init::try_init!($(&$this in)? $t $(::<$($generics),* $(,)?>)? { + ::pin_init::try_init!($(&$this in)? $t $(::<$($generics),*>)? { $($fields)* }? $crate::error::Error) }; ($(&$this:ident in)? $t:ident $(::<$($generics:ty),* $(,)?>)? { $($fields:tt)* }? $err:ty) => { - ::pin_init::try_init!($(&$this in)? $t $(::<$($generics),* $(,)?>)? { + ::pin_init::try_init!($(&$this in)? $t $(::<$($generics),*>)? { $($fields)* }? $err) }; @@ -291,14 +291,14 @@ macro_rules! try_pin_init { ($(&$this:ident in)? $t:ident $(::<$($generics:ty),* $(,)?>)? { $($fields:tt)* }) => { - ::pin_init::try_pin_init!($(&$this in)? $t $(::<$($generics),* $(,)?>)? { + ::pin_init::try_pin_init!($(&$this in)? $t $(::<$($generics),*>)? { $($fields)* }? $crate::error::Error) }; ($(&$this:ident in)? $t:ident $(::<$($generics:ty),* $(,)?>)? { $($fields:tt)* }? $err:ty) => { - ::pin_init::try_pin_init!($(&$this in)? $t $(::<$($generics),* $(,)?>)? { + ::pin_init::try_pin_init!($(&$this in)? $t $(::<$($generics),*>)? { $($fields)* }? $err) }; diff --git a/rust/kernel/kunit.rs b/rust/kernel/kunit.rs index 4b8cdcb21e77..b9e65905e121 100644 --- a/rust/kernel/kunit.rs +++ b/rust/kernel/kunit.rs @@ -302,7 +302,7 @@ macro_rules! kunit_unsafe_test_suite { is_init: false, }; - #[used] + #[used(compiler)] #[allow(unused_unsafe)] #[cfg_attr(not(target_os = "macos"), link_section = ".kunit_test_suites")] static mut KUNIT_TEST_SUITE_ENTRY: *const ::kernel::bindings::kunit_suite = diff --git a/rust/kernel/lib.rs b/rust/kernel/lib.rs index 6b4774b2b1c3..e13d6ed88fa6 100644 --- a/rust/kernel/lib.rs +++ b/rust/kernel/lib.rs @@ -34,6 +34,9 @@ // Expected to become stable. #![feature(arbitrary_self_types)] // +// To be determined. +#![feature(used_with_arg)] +// // `feature(derive_coerce_pointee)` is expected to become stable. Before Rust // 1.84.0, it did not exist, so enable the predecessor features. #![cfg_attr(CONFIG_RUSTC_HAS_COERCE_POINTEE, feature(derive_coerce_pointee))] diff --git a/rust/macros/module.rs b/rust/macros/module.rs index 2ddd2eeb2852..75efc6eeeafc 100644 --- a/rust/macros/module.rs +++ b/rust/macros/module.rs @@ -57,7 +57,7 @@ impl<'a> ModInfoBuilder<'a> { {cfg} #[doc(hidden)] #[cfg_attr(not(target_os = \"macos\"), link_section = \".modinfo\")] - #[used] + #[used(compiler)] pub static __{module}_{counter}: [u8; {length}] = *{string}; ", cfg = if builtin { @@ -249,7 +249,7 @@ pub(crate) fn module(ts: TokenStream) -> TokenStream { // key or a new section. For the moment, keep it simple. #[cfg(MODULE)] #[doc(hidden)] - #[used] + #[used(compiler)] static __IS_RUST_MODULE: () = (); static mut __MOD: ::core::mem::MaybeUninit<{type_}> = @@ -273,7 +273,7 @@ pub(crate) fn module(ts: TokenStream) -> TokenStream { #[cfg(MODULE)] #[doc(hidden)] - #[used] + #[used(compiler)] #[link_section = \".init.data\"] static __UNIQUE_ID___addressable_init_module: unsafe extern \"C\" fn() -> i32 = init_module; @@ -293,7 +293,7 @@ pub(crate) fn module(ts: TokenStream) -> TokenStream { #[cfg(MODULE)] #[doc(hidden)] - #[used] + #[used(compiler)] #[link_section = \".exit.data\"] static __UNIQUE_ID___addressable_cleanup_module: extern \"C\" fn() = cleanup_module; @@ -303,7 +303,7 @@ pub(crate) fn module(ts: TokenStream) -> TokenStream { #[cfg(not(CONFIG_HAVE_ARCH_PREL32_RELOCATIONS))] #[doc(hidden)] #[link_section = \"{initcall_section}\"] - #[used] + #[used(compiler)] pub static __{ident}_initcall: extern \"C\" fn() -> ::kernel::ffi::c_int = __{ident}_init; diff --git a/samples/damon/mtier.c b/samples/damon/mtier.c index 36d2cd933f5a..c94254b77fc9 100644 --- a/samples/damon/mtier.c +++ b/samples/damon/mtier.c @@ -164,8 +164,12 @@ static int damon_sample_mtier_enable_store( if (enable == enabled) return 0; - if (enable) - return damon_sample_mtier_start(); + if (enable) { + err = damon_sample_mtier_start(); + if (err) + enable = false; + return err; + } damon_sample_mtier_stop(); return 0; } diff --git a/samples/damon/prcl.c b/samples/damon/prcl.c index 056b1b21a0fe..5597e6a08ab2 100644 --- a/samples/damon/prcl.c +++ b/samples/damon/prcl.c @@ -122,8 +122,12 @@ static int damon_sample_prcl_enable_store( if (enable == enabled) return 0; - if (enable) - return damon_sample_prcl_start(); + if (enable) { + err = damon_sample_prcl_start(); + if (err) + enable = false; + return err; + } damon_sample_prcl_stop(); return 0; } diff --git a/samples/damon/wsse.c b/samples/damon/wsse.c index 11be25803274..e20238a249e7 100644 --- a/samples/damon/wsse.c +++ b/samples/damon/wsse.c @@ -102,8 +102,12 @@ static int damon_sample_wsse_enable_store( if (enable == enabled) return 0; - if (enable) - return damon_sample_wsse_start(); + if (enable) { + err = damon_sample_wsse_start(); + if (err) + enable = false; + return err; + } damon_sample_wsse_stop(); return 0; } diff --git a/scripts/Makefile.build b/scripts/Makefile.build index a6461ea411f7..ba71b27aa363 100644 --- a/scripts/Makefile.build +++ b/scripts/Makefile.build @@ -312,10 +312,11 @@ $(obj)/%.lst: $(obj)/%.c FORCE # - Stable since Rust 1.82.0: `feature(asm_const)`, `feature(raw_ref_op)`. # - Stable since Rust 1.87.0: `feature(asm_goto)`. # - Expected to become stable: `feature(arbitrary_self_types)`. +# - To be determined: `feature(used_with_arg)`. # # Please see https://github.com/Rust-for-Linux/linux/issues/2 for details on # the unstable features in use. -rust_allowed_features := asm_const,asm_goto,arbitrary_self_types,lint_reasons,raw_ref_op +rust_allowed_features := asm_const,asm_goto,arbitrary_self_types,lint_reasons,raw_ref_op,used_with_arg # `--out-dir` is required to avoid temporaries being created by `rustc` in the # current working directory, which may be not accessible in the out-of-tree diff --git a/scripts/gdb/linux/constants.py.in b/scripts/gdb/linux/constants.py.in index fd6bd69c5096..f795302ddfa8 100644 --- a/scripts/gdb/linux/constants.py.in +++ b/scripts/gdb/linux/constants.py.in @@ -20,6 +20,7 @@ #include <linux/of_fdt.h> #include <linux/page_ext.h> #include <linux/radix-tree.h> +#include <linux/maple_tree.h> #include <linux/slab.h> #include <linux/threads.h> #include <linux/vmalloc.h> @@ -93,6 +94,12 @@ LX_GDBPARSED(RADIX_TREE_MAP_SIZE) LX_GDBPARSED(RADIX_TREE_MAP_SHIFT) LX_GDBPARSED(RADIX_TREE_MAP_MASK) +/* linux/maple_tree.h */ +LX_VALUE(MAPLE_NODE_SLOTS) +LX_VALUE(MAPLE_RANGE64_SLOTS) +LX_VALUE(MAPLE_ARANGE64_SLOTS) +LX_GDBPARSED(MAPLE_NODE_MASK) + /* linux/vmalloc.h */ LX_VALUE(VM_IOREMAP) LX_VALUE(VM_ALLOC) diff --git a/scripts/gdb/linux/interrupts.py b/scripts/gdb/linux/interrupts.py index 616a5f26377a..f4f715a8f0e3 100644 --- a/scripts/gdb/linux/interrupts.py +++ b/scripts/gdb/linux/interrupts.py @@ -7,7 +7,7 @@ import gdb from linux import constants from linux import cpus from linux import utils -from linux import radixtree +from linux import mapletree irq_desc_type = utils.CachedType("struct irq_desc") @@ -23,12 +23,12 @@ def irqd_is_level(desc): def show_irq_desc(prec, irq): text = "" - desc = radixtree.lookup(gdb.parse_and_eval("&irq_desc_tree"), irq) + desc = mapletree.mtree_load(gdb.parse_and_eval("&sparse_irqs"), irq) if desc is None: return text - desc = desc.cast(irq_desc_type.get_type()) - if desc is None: + desc = desc.cast(irq_desc_type.get_type().pointer()) + if desc == 0: return text if irq_settings_is_hidden(desc): @@ -110,7 +110,7 @@ def x86_show_mce(prec, var, pfx, desc): pvar = gdb.parse_and_eval(var) text = "%*s: " % (prec, pfx) for cpu in cpus.each_online_cpu(): - text += "%10u " % (cpus.per_cpu(pvar, cpu)) + text += "%10u " % (cpus.per_cpu(pvar, cpu).dereference()) text += " %s\n" % (desc) return text @@ -142,7 +142,7 @@ def x86_show_interupts(prec): if constants.LX_CONFIG_X86_MCE: text += x86_show_mce(prec, "&mce_exception_count", "MCE", "Machine check exceptions") - text == x86_show_mce(prec, "&mce_poll_count", "MCP", "Machine check polls") + text += x86_show_mce(prec, "&mce_poll_count", "MCP", "Machine check polls") text += show_irq_err_count(prec) @@ -221,8 +221,8 @@ class LxInterruptList(gdb.Command): gdb.write("CPU%-8d" % cpu) gdb.write("\n") - if utils.gdb_eval_or_none("&irq_desc_tree") is None: - return + if utils.gdb_eval_or_none("&sparse_irqs") is None: + raise gdb.GdbError("Unable to find the sparse IRQ tree, is CONFIG_SPARSE_IRQ enabled?") for irq in range(nr_irqs): gdb.write(show_irq_desc(prec, irq)) diff --git a/scripts/gdb/linux/mapletree.py b/scripts/gdb/linux/mapletree.py new file mode 100644 index 000000000000..d52d51c0a03f --- /dev/null +++ b/scripts/gdb/linux/mapletree.py @@ -0,0 +1,252 @@ +# SPDX-License-Identifier: GPL-2.0 +# +# Maple tree helpers +# +# Copyright (c) 2025 Broadcom +# +# Authors: +# Florian Fainelli <florian.fainelli@broadcom.com> + +import gdb + +from linux import utils +from linux import constants +from linux import xarray + +maple_tree_root_type = utils.CachedType("struct maple_tree") +maple_node_type = utils.CachedType("struct maple_node") +maple_enode_type = utils.CachedType("void") + +maple_dense = 0 +maple_leaf_64 = 1 +maple_range_64 = 2 +maple_arange_64 = 3 + +class Mas(object): + ma_active = 0 + ma_start = 1 + ma_root = 2 + ma_none = 3 + ma_pause = 4 + ma_overflow = 5 + ma_underflow = 6 + ma_error = 7 + + def __init__(self, mt, first, end): + if mt.type == maple_tree_root_type.get_type().pointer(): + self.tree = mt.dereference() + elif mt.type != maple_tree_root_type.get_type(): + raise gdb.GdbError("must be {} not {}" + .format(maple_tree_root_type.get_type().pointer(), mt.type)) + self.tree = mt + self.index = first + self.last = end + self.node = None + self.status = self.ma_start + self.min = 0 + self.max = -1 + + def is_start(self): + # mas_is_start() + return self.status == self.ma_start + + def is_ptr(self): + # mas_is_ptr() + return self.status == self.ma_root + + def is_none(self): + # mas_is_none() + return self.status == self.ma_none + + def root(self): + # mas_root() + return self.tree['ma_root'].cast(maple_enode_type.get_type().pointer()) + + def start(self): + # mas_start() + if self.is_start() is False: + return None + + self.min = 0 + self.max = ~0 + + while True: + self.depth = 0 + root = self.root() + if xarray.xa_is_node(root): + self.depth = 0 + self.status = self.ma_active + self.node = mte_safe_root(root) + self.offset = 0 + if mte_dead_node(self.node) is True: + continue + + return None + + self.node = None + # Empty tree + if root is None: + self.status = self.ma_none + self.offset = constants.LX_MAPLE_NODE_SLOTS + return None + + # Single entry tree + self.status = self.ma_root + self.offset = constants.LX_MAPLE_NODE_SLOTS + + if self.index != 0: + return None + + return root + + return None + + def reset(self): + # mas_reset() + self.status = self.ma_start + self.node = None + +def mte_safe_root(node): + if node.type != maple_enode_type.get_type().pointer(): + raise gdb.GdbError("{} must be {} not {}" + .format(mte_safe_root.__name__, maple_enode_type.get_type().pointer(), node.type)) + ulong_type = utils.get_ulong_type() + indirect_ptr = node.cast(ulong_type) & ~0x2 + val = indirect_ptr.cast(maple_enode_type.get_type().pointer()) + return val + +def mte_node_type(entry): + ulong_type = utils.get_ulong_type() + val = None + if entry.type == maple_enode_type.get_type().pointer(): + val = entry.cast(ulong_type) + elif entry.type == ulong_type: + val = entry + else: + raise gdb.GdbError("{} must be {} not {}" + .format(mte_node_type.__name__, maple_enode_type.get_type().pointer(), entry.type)) + return (val >> 0x3) & 0xf + +def ma_dead_node(node): + if node.type != maple_node_type.get_type().pointer(): + raise gdb.GdbError("{} must be {} not {}" + .format(ma_dead_node.__name__, maple_node_type.get_type().pointer(), node.type)) + ulong_type = utils.get_ulong_type() + parent = node['parent'] + indirect_ptr = node['parent'].cast(ulong_type) & ~constants.LX_MAPLE_NODE_MASK + return indirect_ptr == node + +def mte_to_node(enode): + ulong_type = utils.get_ulong_type() + if enode.type == maple_enode_type.get_type().pointer(): + indirect_ptr = enode.cast(ulong_type) + elif enode.type == ulong_type: + indirect_ptr = enode + else: + raise gdb.GdbError("{} must be {} not {}" + .format(mte_to_node.__name__, maple_enode_type.get_type().pointer(), enode.type)) + indirect_ptr = indirect_ptr & ~constants.LX_MAPLE_NODE_MASK + return indirect_ptr.cast(maple_node_type.get_type().pointer()) + +def mte_dead_node(enode): + if enode.type != maple_enode_type.get_type().pointer(): + raise gdb.GdbError("{} must be {} not {}" + .format(mte_dead_node.__name__, maple_enode_type.get_type().pointer(), enode.type)) + node = mte_to_node(enode) + return ma_dead_node(node) + +def ma_is_leaf(tp): + result = tp < maple_range_64 + return tp < maple_range_64 + +def mt_pivots(t): + if t == maple_dense: + return 0 + elif t == maple_leaf_64 or t == maple_range_64: + return constants.LX_MAPLE_RANGE64_SLOTS - 1 + elif t == maple_arange_64: + return constants.LX_MAPLE_ARANGE64_SLOTS - 1 + +def ma_pivots(node, t): + if node.type != maple_node_type.get_type().pointer(): + raise gdb.GdbError("{}: must be {} not {}" + .format(ma_pivots.__name__, maple_node_type.get_type().pointer(), node.type)) + if t == maple_arange_64: + return node['ma64']['pivot'] + elif t == maple_leaf_64 or t == maple_range_64: + return node['mr64']['pivot'] + else: + return None + +def ma_slots(node, tp): + if node.type != maple_node_type.get_type().pointer(): + raise gdb.GdbError("{}: must be {} not {}" + .format(ma_slots.__name__, maple_node_type.get_type().pointer(), node.type)) + if tp == maple_arange_64: + return node['ma64']['slot'] + elif tp == maple_range_64 or tp == maple_leaf_64: + return node['mr64']['slot'] + elif tp == maple_dense: + return node['slot'] + else: + return None + +def mt_slot(mt, slots, offset): + ulong_type = utils.get_ulong_type() + return slots[offset].cast(ulong_type) + +def mtree_lookup_walk(mas): + ulong_type = utils.get_ulong_type() + n = mas.node + + while True: + node = mte_to_node(n) + tp = mte_node_type(n) + pivots = ma_pivots(node, tp) + end = mt_pivots(tp) + offset = 0 + while True: + if pivots[offset] >= mas.index: + break + if offset >= end: + break + offset += 1 + + slots = ma_slots(node, tp) + n = mt_slot(mas.tree, slots, offset) + if ma_dead_node(node) is True: + mas.reset() + return None + break + + if ma_is_leaf(tp) is True: + break + + return n + +def mtree_load(mt, index): + ulong_type = utils.get_ulong_type() + # MT_STATE(...) + mas = Mas(mt, index, index) + entry = None + + while True: + entry = mas.start() + if mas.is_none(): + return None + + if mas.is_ptr(): + if index != 0: + entry = None + return entry + + entry = mtree_lookup_walk(mas) + if entry is None and mas.is_start(): + continue + else: + break + + if xarray.xa_is_zero(entry): + return None + + return entry diff --git a/scripts/gdb/linux/vfs.py b/scripts/gdb/linux/vfs.py index b5fbb18ccb77..9e921b645a68 100644 --- a/scripts/gdb/linux/vfs.py +++ b/scripts/gdb/linux/vfs.py @@ -22,7 +22,7 @@ def dentry_name(d): if parent == d or parent == 0: return "" p = dentry_name(d['d_parent']) + "/" - return p + d['d_shortname']['string'].string() + return p + d['d_name']['name'].string() class DentryName(gdb.Function): """Return string of the full path of a dentry. diff --git a/scripts/gdb/linux/xarray.py b/scripts/gdb/linux/xarray.py new file mode 100644 index 000000000000..f4477b5def75 --- /dev/null +++ b/scripts/gdb/linux/xarray.py @@ -0,0 +1,28 @@ +# SPDX-License-Identifier: GPL-2.0 +# +# Xarray helpers +# +# Copyright (c) 2025 Broadcom +# +# Authors: +# Florian Fainelli <florian.fainelli@broadcom.com> + +import gdb + +from linux import utils +from linux import constants + +def xa_is_internal(entry): + ulong_type = utils.get_ulong_type() + return ((entry.cast(ulong_type) & 3) == 2) + +def xa_mk_internal(v): + return ((v << 2) | 2) + +def xa_is_zero(entry): + ulong_type = utils.get_ulong_type() + return entry.cast(ulong_type) == xa_mk_internal(257) + +def xa_is_node(entry): + ulong_type = utils.get_ulong_type() + return xa_is_internal(entry) and (entry.cast(ulong_type) > 4096) diff --git a/security/apparmor/file.c b/security/apparmor/file.c index d52a5b14dad4..f494217112c9 100644 --- a/security/apparmor/file.c +++ b/security/apparmor/file.c @@ -604,7 +604,7 @@ int aa_file_perm(const char *op, const struct cred *subj_cred, rcu_read_unlock(); /* TODO: label cross check */ - if (file->f_path.mnt && path_mediated_fs(file->f_path.dentry)) + if (path_mediated_fs(file->f_path.dentry)) error = __file_path_perm(op, subj_cred, label, flabel, file, request, denied, in_atomic); diff --git a/security/inode.c b/security/inode.c index 3913501621fa..43382ef8896e 100644 --- a/security/inode.c +++ b/security/inode.c @@ -112,18 +112,20 @@ static struct dentry *securityfs_create_dentry(const char *name, umode_t mode, struct dentry *dentry; struct inode *dir, *inode; int error; + bool pinned = false; if (!(mode & S_IFMT)) mode = (mode & S_IALLUGO) | S_IFREG; pr_debug("securityfs: creating file '%s'\n",name); - error = simple_pin_fs(&fs_type, &mount, &mount_count); - if (error) - return ERR_PTR(error); - - if (!parent) + if (!parent) { + error = simple_pin_fs(&fs_type, &mount, &mount_count); + if (error) + return ERR_PTR(error); + pinned = true; parent = mount->mnt_root; + } dir = d_inode(parent); @@ -159,7 +161,6 @@ static struct dentry *securityfs_create_dentry(const char *name, umode_t mode, inode->i_fop = fops; } d_instantiate(dentry, inode); - dget(dentry); inode_unlock(dir); return dentry; @@ -168,7 +169,8 @@ out1: dentry = ERR_PTR(error); out: inode_unlock(dir); - simple_release_fs(&mount, &mount_count); + if (pinned) + simple_release_fs(&mount, &mount_count); return dentry; } @@ -279,6 +281,12 @@ struct dentry *securityfs_create_symlink(const char *name, } EXPORT_SYMBOL_GPL(securityfs_create_symlink); +static void remove_one(struct dentry *victim) +{ + if (victim->d_parent == victim->d_sb->s_root) + simple_release_fs(&mount, &mount_count); +} + /** * securityfs_remove - removes a file or directory from the securityfs filesystem * @@ -291,43 +299,11 @@ EXPORT_SYMBOL_GPL(securityfs_create_symlink); * This function is required to be called in order for the file to be * removed. No automatic cleanup of files will happen when a module is * removed; you are responsible here. - */ -void securityfs_remove(struct dentry *dentry) -{ - struct inode *dir; - - if (IS_ERR_OR_NULL(dentry)) - return; - - dir = d_inode(dentry->d_parent); - inode_lock(dir); - if (simple_positive(dentry)) { - if (d_is_dir(dentry)) - simple_rmdir(dir, dentry); - else - simple_unlink(dir, dentry); - dput(dentry); - } - inode_unlock(dir); - simple_release_fs(&mount, &mount_count); -} -EXPORT_SYMBOL_GPL(securityfs_remove); - -static void remove_one(struct dentry *victim) -{ - simple_release_fs(&mount, &mount_count); -} - -/** - * securityfs_recursive_remove - recursively removes a file or directory - * - * @dentry: a pointer to a the dentry of the file or directory to be removed. * - * This function recursively removes a file or directory in securityfs that was - * previously created with a call to another securityfs function (like - * securityfs_create_file() or variants thereof.) + * AV: when applied to directory it will take all children out; no need to call + * it for descendents if ancestor is getting killed. */ -void securityfs_recursive_remove(struct dentry *dentry) +void securityfs_remove(struct dentry *dentry) { if (IS_ERR_OR_NULL(dentry)) return; @@ -336,7 +312,7 @@ void securityfs_recursive_remove(struct dentry *dentry) simple_recursive_removal(dentry, remove_one); simple_release_fs(&mount, &mount_count); } -EXPORT_SYMBOL_GPL(securityfs_recursive_remove); +EXPORT_SYMBOL_GPL(securityfs_remove); #ifdef CONFIG_SECURITY static struct dentry *lsm_dentry; diff --git a/security/integrity/evm/evm_secfs.c b/security/integrity/evm/evm_secfs.c index 9b907c2fee60..b0d2aad27850 100644 --- a/security/integrity/evm/evm_secfs.c +++ b/security/integrity/evm/evm_secfs.c @@ -17,7 +17,6 @@ #include "evm.h" static struct dentry *evm_dir; -static struct dentry *evm_init_tpm; static struct dentry *evm_symlink; #ifdef CONFIG_EVM_ADD_XATTRS @@ -286,7 +285,7 @@ static int evm_init_xattrs(void) { evm_xattrs = securityfs_create_file("evm_xattrs", 0660, evm_dir, NULL, &evm_xattr_ops); - if (!evm_xattrs || IS_ERR(evm_xattrs)) + if (IS_ERR(evm_xattrs)) return -EFAULT; return 0; @@ -301,21 +300,22 @@ static int evm_init_xattrs(void) int __init evm_init_secfs(void) { int error = 0; + struct dentry *dentry; evm_dir = securityfs_create_dir("evm", integrity_dir); - if (!evm_dir || IS_ERR(evm_dir)) + if (IS_ERR(evm_dir)) return -EFAULT; - evm_init_tpm = securityfs_create_file("evm", 0660, - evm_dir, NULL, &evm_key_ops); - if (!evm_init_tpm || IS_ERR(evm_init_tpm)) { + dentry = securityfs_create_file("evm", 0660, + evm_dir, NULL, &evm_key_ops); + if (IS_ERR(dentry)) { error = -EFAULT; goto out; } evm_symlink = securityfs_create_symlink("evm", NULL, "integrity/evm/evm", NULL); - if (!evm_symlink || IS_ERR(evm_symlink)) { + if (IS_ERR(evm_symlink)) { error = -EFAULT; goto out; } @@ -328,7 +328,6 @@ int __init evm_init_secfs(void) return 0; out: securityfs_remove(evm_symlink); - securityfs_remove(evm_init_tpm); securityfs_remove(evm_dir); return error; } diff --git a/security/integrity/ima/ima_fs.c b/security/integrity/ima/ima_fs.c index e4a79a9b2d58..87045b09f120 100644 --- a/security/integrity/ima/ima_fs.c +++ b/security/integrity/ima/ima_fs.c @@ -116,28 +116,6 @@ void ima_putc(struct seq_file *m, void *data, int datalen) seq_putc(m, *(char *)data++); } -static struct dentry **ascii_securityfs_measurement_lists __ro_after_init; -static struct dentry **binary_securityfs_measurement_lists __ro_after_init; -static int securityfs_measurement_list_count __ro_after_init; - -static void lookup_template_data_hash_algo(int *algo_idx, enum hash_algo *algo, - struct seq_file *m, - struct dentry **lists) -{ - struct dentry *dentry; - int i; - - dentry = file_dentry(m->file); - - for (i = 0; i < securityfs_measurement_list_count; i++) { - if (dentry == lists[i]) { - *algo_idx = i; - *algo = ima_algo_array[i].algo; - break; - } - } -} - /* print format: * 32bit-le=pcr# * char[n]=template digest @@ -160,9 +138,10 @@ int ima_measurements_show(struct seq_file *m, void *v) algo_idx = ima_sha1_idx; algo = HASH_ALGO_SHA1; - if (m->file != NULL) - lookup_template_data_hash_algo(&algo_idx, &algo, m, - binary_securityfs_measurement_lists); + if (m->file != NULL) { + algo_idx = (unsigned long)file_inode(m->file)->i_private; + algo = ima_algo_array[algo_idx].algo; + } /* get entry */ e = qe->entry; @@ -256,9 +235,10 @@ static int ima_ascii_measurements_show(struct seq_file *m, void *v) algo_idx = ima_sha1_idx; algo = HASH_ALGO_SHA1; - if (m->file != NULL) - lookup_template_data_hash_algo(&algo_idx, &algo, m, - ascii_securityfs_measurement_lists); + if (m->file != NULL) { + algo_idx = (unsigned long)file_inode(m->file)->i_private; + algo = ima_algo_array[algo_idx].algo; + } /* get entry */ e = qe->entry; @@ -396,11 +376,6 @@ out: static struct dentry *ima_dir; static struct dentry *ima_symlink; -static struct dentry *binary_runtime_measurements; -static struct dentry *ascii_runtime_measurements; -static struct dentry *runtime_measurements_count; -static struct dentry *violations; -static struct dentry *ima_policy; enum ima_fs_flags { IMA_FS_BUSY, @@ -417,64 +392,33 @@ static const struct seq_operations ima_policy_seqops = { }; #endif -static void __init remove_securityfs_measurement_lists(struct dentry **lists) -{ - int i; - - if (lists) { - for (i = 0; i < securityfs_measurement_list_count; i++) - securityfs_remove(lists[i]); - - kfree(lists); - } -} - static int __init create_securityfs_measurement_lists(void) { - char file_name[NAME_MAX + 1]; - struct dentry *dentry; - u16 algo; - int i; - - securityfs_measurement_list_count = NR_BANKS(ima_tpm_chip); + int count = NR_BANKS(ima_tpm_chip); if (ima_sha1_idx >= NR_BANKS(ima_tpm_chip)) - securityfs_measurement_list_count++; + count++; - ascii_securityfs_measurement_lists = - kcalloc(securityfs_measurement_list_count, sizeof(struct dentry *), - GFP_KERNEL); - if (!ascii_securityfs_measurement_lists) - return -ENOMEM; - - binary_securityfs_measurement_lists = - kcalloc(securityfs_measurement_list_count, sizeof(struct dentry *), - GFP_KERNEL); - if (!binary_securityfs_measurement_lists) - return -ENOMEM; - - for (i = 0; i < securityfs_measurement_list_count; i++) { - algo = ima_algo_array[i].algo; + for (int i = 0; i < count; i++) { + u16 algo = ima_algo_array[i].algo; + char file_name[NAME_MAX + 1]; + struct dentry *dentry; sprintf(file_name, "ascii_runtime_measurements_%s", hash_algo_name[algo]); dentry = securityfs_create_file(file_name, S_IRUSR | S_IRGRP, - ima_dir, NULL, + ima_dir, (void *)(uintptr_t)i, &ima_ascii_measurements_ops); if (IS_ERR(dentry)) return PTR_ERR(dentry); - ascii_securityfs_measurement_lists[i] = dentry; - sprintf(file_name, "binary_runtime_measurements_%s", hash_algo_name[algo]); dentry = securityfs_create_file(file_name, S_IRUSR | S_IRGRP, - ima_dir, NULL, + ima_dir, (void *)(uintptr_t)i, &ima_measurements_ops); if (IS_ERR(dentry)) return PTR_ERR(dentry); - - binary_securityfs_measurement_lists[i] = dentry; } return 0; @@ -533,8 +477,7 @@ static int ima_release_policy(struct inode *inode, struct file *file) ima_update_policy(); #if !defined(CONFIG_IMA_WRITE_POLICY) && !defined(CONFIG_IMA_READ_POLICY) - securityfs_remove(ima_policy); - ima_policy = NULL; + securityfs_remove(file->f_path.dentry); #elif defined(CONFIG_IMA_WRITE_POLICY) clear_bit(IMA_FS_BUSY, &ima_fs_flags); #elif defined(CONFIG_IMA_READ_POLICY) @@ -553,11 +496,9 @@ static const struct file_operations ima_measure_policy_ops = { int __init ima_fs_init(void) { + struct dentry *dentry; int ret; - ascii_securityfs_measurement_lists = NULL; - binary_securityfs_measurement_lists = NULL; - ima_dir = securityfs_create_dir("ima", integrity_dir); if (IS_ERR(ima_dir)) return PTR_ERR(ima_dir); @@ -573,57 +514,45 @@ int __init ima_fs_init(void) if (ret != 0) goto out; - binary_runtime_measurements = - securityfs_create_symlink("binary_runtime_measurements", ima_dir, + dentry = securityfs_create_symlink("binary_runtime_measurements", ima_dir, "binary_runtime_measurements_sha1", NULL); - if (IS_ERR(binary_runtime_measurements)) { - ret = PTR_ERR(binary_runtime_measurements); + if (IS_ERR(dentry)) { + ret = PTR_ERR(dentry); goto out; } - ascii_runtime_measurements = - securityfs_create_symlink("ascii_runtime_measurements", ima_dir, + dentry = securityfs_create_symlink("ascii_runtime_measurements", ima_dir, "ascii_runtime_measurements_sha1", NULL); - if (IS_ERR(ascii_runtime_measurements)) { - ret = PTR_ERR(ascii_runtime_measurements); + if (IS_ERR(dentry)) { + ret = PTR_ERR(dentry); goto out; } - runtime_measurements_count = - securityfs_create_file("runtime_measurements_count", + dentry = securityfs_create_file("runtime_measurements_count", S_IRUSR | S_IRGRP, ima_dir, NULL, &ima_measurements_count_ops); - if (IS_ERR(runtime_measurements_count)) { - ret = PTR_ERR(runtime_measurements_count); + if (IS_ERR(dentry)) { + ret = PTR_ERR(dentry); goto out; } - violations = - securityfs_create_file("violations", S_IRUSR | S_IRGRP, + dentry = securityfs_create_file("violations", S_IRUSR | S_IRGRP, ima_dir, NULL, &ima_htable_violations_ops); - if (IS_ERR(violations)) { - ret = PTR_ERR(violations); + if (IS_ERR(dentry)) { + ret = PTR_ERR(dentry); goto out; } - ima_policy = securityfs_create_file("policy", POLICY_FILE_FLAGS, + dentry = securityfs_create_file("policy", POLICY_FILE_FLAGS, ima_dir, NULL, &ima_measure_policy_ops); - if (IS_ERR(ima_policy)) { - ret = PTR_ERR(ima_policy); + if (IS_ERR(dentry)) { + ret = PTR_ERR(dentry); goto out; } return 0; out: - securityfs_remove(ima_policy); - securityfs_remove(violations); - securityfs_remove(runtime_measurements_count); - securityfs_remove(ascii_runtime_measurements); - securityfs_remove(binary_runtime_measurements); - remove_securityfs_measurement_lists(ascii_securityfs_measurement_lists); - remove_securityfs_measurement_lists(binary_securityfs_measurement_lists); - securityfs_measurement_list_count = 0; securityfs_remove(ima_symlink); securityfs_remove(ima_dir); diff --git a/security/ipe/fs.c b/security/ipe/fs.c index f40e50bfd2e7..0bb9468b8026 100644 --- a/security/ipe/fs.c +++ b/security/ipe/fs.c @@ -12,11 +12,8 @@ #include "policy.h" #include "audit.h" -static struct dentry *np __ro_after_init; static struct dentry *root __ro_after_init; struct dentry *policy_root __ro_after_init; -static struct dentry *audit_node __ro_after_init; -static struct dentry *enforce_node __ro_after_init; /** * setaudit() - Write handler for the securityfs node, "ipe/success_audit" @@ -200,27 +197,26 @@ static int __init ipe_init_securityfs(void) { int rc = 0; struct ipe_policy *ap; + struct dentry *dentry; if (!ipe_enabled) return -EOPNOTSUPP; root = securityfs_create_dir("ipe", NULL); - if (IS_ERR(root)) { - rc = PTR_ERR(root); - goto err; - } + if (IS_ERR(root)) + return PTR_ERR(root); - audit_node = securityfs_create_file("success_audit", 0600, root, + dentry = securityfs_create_file("success_audit", 0600, root, NULL, &audit_fops); - if (IS_ERR(audit_node)) { - rc = PTR_ERR(audit_node); + if (IS_ERR(dentry)) { + rc = PTR_ERR(dentry); goto err; } - enforce_node = securityfs_create_file("enforce", 0600, root, NULL, + dentry = securityfs_create_file("enforce", 0600, root, NULL, &enforce_fops); - if (IS_ERR(enforce_node)) { - rc = PTR_ERR(enforce_node); + if (IS_ERR(dentry)) { + rc = PTR_ERR(dentry); goto err; } @@ -237,18 +233,14 @@ static int __init ipe_init_securityfs(void) goto err; } - np = securityfs_create_file("new_policy", 0200, root, NULL, &np_fops); - if (IS_ERR(np)) { - rc = PTR_ERR(np); + dentry = securityfs_create_file("new_policy", 0200, root, NULL, &np_fops); + if (IS_ERR(dentry)) { + rc = PTR_ERR(dentry); goto err; } return 0; err: - securityfs_remove(np); - securityfs_remove(policy_root); - securityfs_remove(enforce_node); - securityfs_remove(audit_node); securityfs_remove(root); return rc; } diff --git a/security/ipe/policy_fs.c b/security/ipe/policy_fs.c index db26032ccbe1..9d92d8a14b13 100644 --- a/security/ipe/policy_fs.c +++ b/security/ipe/policy_fs.c @@ -438,7 +438,7 @@ static const struct ipefs_file policy_subdir[] = { */ void ipe_del_policyfs_node(struct ipe_policy *p) { - securityfs_recursive_remove(p->policyfs); + securityfs_remove(p->policyfs); p->policyfs = NULL; } @@ -485,6 +485,6 @@ int ipe_new_policyfs_node(struct ipe_policy *p) return 0; err: - securityfs_recursive_remove(policyfs); + securityfs_remove(policyfs); return rc; } diff --git a/security/landlock/syscalls.c b/security/landlock/syscalls.c index 33eafb71e4f3..0116e9f93ffe 100644 --- a/security/landlock/syscalls.c +++ b/security/landlock/syscalls.c @@ -303,7 +303,6 @@ static int get_path_from_fd(const s32 fd, struct path *const path) if ((fd_file(f)->f_op == &ruleset_fops) || (fd_file(f)->f_path.mnt->mnt_flags & MNT_INTERNAL) || (fd_file(f)->f_path.dentry->d_sb->s_flags & SB_NOUSER) || - d_is_negative(fd_file(f)->f_path.dentry) || IS_PRIVATE(d_backing_inode(fd_file(f)->f_path.dentry))) return -EBADFD; diff --git a/sound/core/compress_offload.c b/sound/core/compress_offload.c index 840bb9cfe789..a66f258cafaa 100644 --- a/sound/core/compress_offload.c +++ b/sound/core/compress_offload.c @@ -1269,62 +1269,62 @@ static long snd_compr_ioctl(struct file *f, unsigned int cmd, unsigned long arg) stream = &data->stream; guard(mutex)(&stream->device->lock); - switch (_IOC_NR(cmd)) { - case _IOC_NR(SNDRV_COMPRESS_IOCTL_VERSION): + switch (cmd) { + case SNDRV_COMPRESS_IOCTL_VERSION: return put_user(SNDRV_COMPRESS_VERSION, (int __user *)arg) ? -EFAULT : 0; - case _IOC_NR(SNDRV_COMPRESS_GET_CAPS): + case SNDRV_COMPRESS_GET_CAPS: return snd_compr_get_caps(stream, arg); #ifndef COMPR_CODEC_CAPS_OVERFLOW - case _IOC_NR(SNDRV_COMPRESS_GET_CODEC_CAPS): + case SNDRV_COMPRESS_GET_CODEC_CAPS: return snd_compr_get_codec_caps(stream, arg); #endif - case _IOC_NR(SNDRV_COMPRESS_SET_PARAMS): + case SNDRV_COMPRESS_SET_PARAMS: return snd_compr_set_params(stream, arg); - case _IOC_NR(SNDRV_COMPRESS_GET_PARAMS): + case SNDRV_COMPRESS_GET_PARAMS: return snd_compr_get_params(stream, arg); - case _IOC_NR(SNDRV_COMPRESS_SET_METADATA): + case SNDRV_COMPRESS_SET_METADATA: return snd_compr_set_metadata(stream, arg); - case _IOC_NR(SNDRV_COMPRESS_GET_METADATA): + case SNDRV_COMPRESS_GET_METADATA: return snd_compr_get_metadata(stream, arg); } if (stream->direction == SND_COMPRESS_ACCEL) { #if IS_ENABLED(CONFIG_SND_COMPRESS_ACCEL) - switch (_IOC_NR(cmd)) { - case _IOC_NR(SNDRV_COMPRESS_TASK_CREATE): + switch (cmd) { + case SNDRV_COMPRESS_TASK_CREATE: return snd_compr_task_create(stream, arg); - case _IOC_NR(SNDRV_COMPRESS_TASK_FREE): + case SNDRV_COMPRESS_TASK_FREE: return snd_compr_task_seq(stream, arg, snd_compr_task_free_one); - case _IOC_NR(SNDRV_COMPRESS_TASK_START): + case SNDRV_COMPRESS_TASK_START: return snd_compr_task_start_ioctl(stream, arg); - case _IOC_NR(SNDRV_COMPRESS_TASK_STOP): + case SNDRV_COMPRESS_TASK_STOP: return snd_compr_task_seq(stream, arg, snd_compr_task_stop_one); - case _IOC_NR(SNDRV_COMPRESS_TASK_STATUS): + case SNDRV_COMPRESS_TASK_STATUS: return snd_compr_task_status_ioctl(stream, arg); } #endif return -ENOTTY; } - switch (_IOC_NR(cmd)) { - case _IOC_NR(SNDRV_COMPRESS_TSTAMP): + switch (cmd) { + case SNDRV_COMPRESS_TSTAMP: return snd_compr_tstamp(stream, arg); - case _IOC_NR(SNDRV_COMPRESS_AVAIL): + case SNDRV_COMPRESS_AVAIL: return snd_compr_ioctl_avail(stream, arg); - case _IOC_NR(SNDRV_COMPRESS_PAUSE): + case SNDRV_COMPRESS_PAUSE: return snd_compr_pause(stream); - case _IOC_NR(SNDRV_COMPRESS_RESUME): + case SNDRV_COMPRESS_RESUME: return snd_compr_resume(stream); - case _IOC_NR(SNDRV_COMPRESS_START): + case SNDRV_COMPRESS_START: return snd_compr_start(stream); - case _IOC_NR(SNDRV_COMPRESS_STOP): + case SNDRV_COMPRESS_STOP: return snd_compr_stop(stream); - case _IOC_NR(SNDRV_COMPRESS_DRAIN): + case SNDRV_COMPRESS_DRAIN: return snd_compr_drain(stream); - case _IOC_NR(SNDRV_COMPRESS_PARTIAL_DRAIN): + case SNDRV_COMPRESS_PARTIAL_DRAIN: return snd_compr_partial_drain(stream); - case _IOC_NR(SNDRV_COMPRESS_NEXT_TRACK): + case SNDRV_COMPRESS_NEXT_TRACK: return snd_compr_next_track(stream); } diff --git a/sound/isa/ad1816a/ad1816a.c b/sound/isa/ad1816a/ad1816a.c index 99006dc4777e..5c9e2d41d900 100644 --- a/sound/isa/ad1816a/ad1816a.c +++ b/sound/isa/ad1816a/ad1816a.c @@ -98,7 +98,7 @@ static int snd_card_ad1816a_pnp(int dev, struct pnp_card_link *card, pdev = pnp_request_card_device(card, id->devs[1].id, NULL); if (pdev == NULL) { mpu_port[dev] = -1; - dev_warn(&pdev->dev, "MPU401 device busy, skipping.\n"); + pr_warn("MPU401 device busy, skipping.\n"); return 0; } diff --git a/sound/pci/hda/cs35l56_hda.c b/sound/pci/hda/cs35l56_hda.c index 3f2fd32f4ad9..886c53184fec 100644 --- a/sound/pci/hda/cs35l56_hda.c +++ b/sound/pci/hda/cs35l56_hda.c @@ -873,6 +873,52 @@ static int cs35l56_hda_system_resume(struct device *dev) return 0; } +static int cs35l56_hda_fixup_yoga9(struct cs35l56_hda *cs35l56, int *bus_addr) +{ + /* The cirrus,dev-index property has the wrong values */ + switch (*bus_addr) { + case 0x30: + cs35l56->index = 1; + return 0; + case 0x31: + cs35l56->index = 0; + return 0; + default: + /* There is a pseudo-address for broadcast to both amps - ignore it */ + dev_dbg(cs35l56->base.dev, "Ignoring I2C address %#x\n", *bus_addr); + return 0; + } +} + +static const struct { + const char *sub; + int (*fixup_fn)(struct cs35l56_hda *cs35l56, int *bus_addr); +} cs35l56_hda_fixups[] = { + { + .sub = "17AA390B", /* Lenovo Yoga Book 9i GenX */ + .fixup_fn = cs35l56_hda_fixup_yoga9, + }, +}; + +static int cs35l56_hda_apply_platform_fixups(struct cs35l56_hda *cs35l56, const char *sub, + int *bus_addr) +{ + int i; + + if (IS_ERR(sub)) + return 0; + + for (i = 0; i < ARRAY_SIZE(cs35l56_hda_fixups); i++) { + if (strcasecmp(cs35l56_hda_fixups[i].sub, sub) == 0) { + dev_dbg(cs35l56->base.dev, "Applying fixup for %s\n", + cs35l56_hda_fixups[i].sub); + return (cs35l56_hda_fixups[i].fixup_fn)(cs35l56, bus_addr); + } + } + + return 0; +} + static int cs35l56_hda_read_acpi(struct cs35l56_hda *cs35l56, int hid, int id) { u32 values[HDA_MAX_COMPONENTS]; @@ -897,39 +943,47 @@ static int cs35l56_hda_read_acpi(struct cs35l56_hda *cs35l56, int hid, int id) ACPI_COMPANION_SET(cs35l56->base.dev, adev); } - property = "cirrus,dev-index"; - ret = device_property_count_u32(cs35l56->base.dev, property); - if (ret <= 0) - goto err; - - if (ret > ARRAY_SIZE(values)) { - ret = -EINVAL; - goto err; - } - nval = ret; + /* Initialize things that could be overwritten by a fixup */ + cs35l56->index = -1; - ret = device_property_read_u32_array(cs35l56->base.dev, property, values, nval); + sub = acpi_get_subsystem_id(ACPI_HANDLE(cs35l56->base.dev)); + ret = cs35l56_hda_apply_platform_fixups(cs35l56, sub, &id); if (ret) - goto err; + return ret; - cs35l56->index = -1; - for (i = 0; i < nval; i++) { - if (values[i] == id) { - cs35l56->index = i; - break; - } - } - /* - * It's not an error for the ID to be missing: for I2C there can be - * an alias address that is not a real device. So reject silently. - */ if (cs35l56->index == -1) { - dev_dbg(cs35l56->base.dev, "No index found in %s\n", property); - ret = -ENODEV; - goto err; - } + property = "cirrus,dev-index"; + ret = device_property_count_u32(cs35l56->base.dev, property); + if (ret <= 0) + goto err; - sub = acpi_get_subsystem_id(ACPI_HANDLE(cs35l56->base.dev)); + if (ret > ARRAY_SIZE(values)) { + ret = -EINVAL; + goto err; + } + nval = ret; + + ret = device_property_read_u32_array(cs35l56->base.dev, property, values, nval); + if (ret) + goto err; + + for (i = 0; i < nval; i++) { + if (values[i] == id) { + cs35l56->index = i; + break; + } + } + + /* + * It's not an error for the ID to be missing: for I2C there can be + * an alias address that is not a real device. So reject silently. + */ + if (cs35l56->index == -1) { + dev_dbg(cs35l56->base.dev, "No index found in %s\n", property); + ret = -ENODEV; + goto err; + } + } if (IS_ERR(sub)) { dev_info(cs35l56->base.dev, diff --git a/sound/pci/hda/patch_hdmi.c b/sound/pci/hda/patch_hdmi.c index 08308231b4ed..9a7793eb16e9 100644 --- a/sound/pci/hda/patch_hdmi.c +++ b/sound/pci/hda/patch_hdmi.c @@ -4551,7 +4551,9 @@ HDA_CODEC_ENTRY(0x10de002e, "Tegra186 HDMI/DP1", patch_tegra_hdmi), HDA_CODEC_ENTRY(0x10de002f, "Tegra194 HDMI/DP2", patch_tegra_hdmi), HDA_CODEC_ENTRY(0x10de0030, "Tegra194 HDMI/DP3", patch_tegra_hdmi), HDA_CODEC_ENTRY(0x10de0031, "Tegra234 HDMI/DP", patch_tegra234_hdmi), +HDA_CODEC_ENTRY(0x10de0033, "SoC 33 HDMI/DP", patch_tegra234_hdmi), HDA_CODEC_ENTRY(0x10de0034, "Tegra264 HDMI/DP", patch_tegra234_hdmi), +HDA_CODEC_ENTRY(0x10de0035, "SoC 35 HDMI/DP", patch_tegra234_hdmi), HDA_CODEC_ENTRY(0x10de0040, "GPU 40 HDMI/DP", patch_nvhdmi), HDA_CODEC_ENTRY(0x10de0041, "GPU 41 HDMI/DP", patch_nvhdmi), HDA_CODEC_ENTRY(0x10de0042, "GPU 42 HDMI/DP", patch_nvhdmi), @@ -4590,15 +4592,32 @@ HDA_CODEC_ENTRY(0x10de0097, "GPU 97 HDMI/DP", patch_nvhdmi), HDA_CODEC_ENTRY(0x10de0098, "GPU 98 HDMI/DP", patch_nvhdmi), HDA_CODEC_ENTRY(0x10de0099, "GPU 99 HDMI/DP", patch_nvhdmi), HDA_CODEC_ENTRY(0x10de009a, "GPU 9a HDMI/DP", patch_nvhdmi), +HDA_CODEC_ENTRY(0x10de009b, "GPU 9b HDMI/DP", patch_nvhdmi), +HDA_CODEC_ENTRY(0x10de009c, "GPU 9c HDMI/DP", patch_nvhdmi), HDA_CODEC_ENTRY(0x10de009d, "GPU 9d HDMI/DP", patch_nvhdmi), HDA_CODEC_ENTRY(0x10de009e, "GPU 9e HDMI/DP", patch_nvhdmi), HDA_CODEC_ENTRY(0x10de009f, "GPU 9f HDMI/DP", patch_nvhdmi), HDA_CODEC_ENTRY(0x10de00a0, "GPU a0 HDMI/DP", patch_nvhdmi), +HDA_CODEC_ENTRY(0x10de00a1, "GPU a1 HDMI/DP", patch_nvhdmi), HDA_CODEC_ENTRY(0x10de00a3, "GPU a3 HDMI/DP", patch_nvhdmi), HDA_CODEC_ENTRY(0x10de00a4, "GPU a4 HDMI/DP", patch_nvhdmi), HDA_CODEC_ENTRY(0x10de00a5, "GPU a5 HDMI/DP", patch_nvhdmi), HDA_CODEC_ENTRY(0x10de00a6, "GPU a6 HDMI/DP", patch_nvhdmi), HDA_CODEC_ENTRY(0x10de00a7, "GPU a7 HDMI/DP", patch_nvhdmi), +HDA_CODEC_ENTRY(0x10de00a8, "GPU a8 HDMI/DP", patch_nvhdmi), +HDA_CODEC_ENTRY(0x10de00a9, "GPU a9 HDMI/DP", patch_nvhdmi), +HDA_CODEC_ENTRY(0x10de00aa, "GPU aa HDMI/DP", patch_nvhdmi), +HDA_CODEC_ENTRY(0x10de00ab, "GPU ab HDMI/DP", patch_nvhdmi), +HDA_CODEC_ENTRY(0x10de00ad, "GPU ad HDMI/DP", patch_nvhdmi), +HDA_CODEC_ENTRY(0x10de00ae, "GPU ae HDMI/DP", patch_nvhdmi), +HDA_CODEC_ENTRY(0x10de00af, "GPU af HDMI/DP", patch_nvhdmi), +HDA_CODEC_ENTRY(0x10de00b0, "GPU b0 HDMI/DP", patch_nvhdmi), +HDA_CODEC_ENTRY(0x10de00b1, "GPU b1 HDMI/DP", patch_nvhdmi), +HDA_CODEC_ENTRY(0x10de00c0, "GPU c0 HDMI/DP", patch_nvhdmi), +HDA_CODEC_ENTRY(0x10de00c1, "GPU c1 HDMI/DP", patch_nvhdmi), +HDA_CODEC_ENTRY(0x10de00c3, "GPU c3 HDMI/DP", patch_nvhdmi), +HDA_CODEC_ENTRY(0x10de00c4, "GPU c4 HDMI/DP", patch_nvhdmi), +HDA_CODEC_ENTRY(0x10de00c5, "GPU c5 HDMI/DP", patch_nvhdmi), HDA_CODEC_ENTRY(0x10de8001, "MCP73 HDMI", patch_nvhdmi_2ch), HDA_CODEC_ENTRY(0x10de8067, "MCP67/68 HDMI", patch_nvhdmi_2ch), HDA_CODEC_ENTRY(0x67663d82, "Arise 82 HDMI/DP", patch_gf_hdmi), diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index 5d6d01ecfee2..2627e2f49316 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -4753,7 +4753,7 @@ static void alc245_fixup_hp_mute_led_v1_coefbit(struct hda_codec *codec, if (action == HDA_FIXUP_ACT_PRE_PROBE) { spec->mute_led_polarity = 0; spec->mute_led_coef.idx = 0x0b; - spec->mute_led_coef.mask = 1 << 3; + spec->mute_led_coef.mask = 3 << 2; spec->mute_led_coef.on = 1 << 3; spec->mute_led_coef.off = 0; snd_hda_gen_add_mute_led_cdev(codec, coef_mute_led_set); @@ -7497,6 +7497,9 @@ static void alc287_fixup_yoga9_14iap7_bass_spk_pin(struct hda_codec *codec, }; struct alc_spec *spec = codec->spec; + /* Support Audio mute LED and Mic mute LED on keyboard */ + hda_fixup_ideapad_acpi(codec, fix, action); + switch (action) { case HDA_FIXUP_ACT_PRE_PROBE: snd_hda_apply_pincfgs(codec, pincfgs); @@ -10691,6 +10694,7 @@ static const struct hda_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x103c, 0x8788, "HP OMEN 15", ALC285_FIXUP_HP_MUTE_LED), SND_PCI_QUIRK(0x103c, 0x87b7, "HP Laptop 14-fq0xxx", ALC236_FIXUP_HP_MUTE_LED_COEFBIT2), SND_PCI_QUIRK(0x103c, 0x87c8, "HP", ALC287_FIXUP_HP_GPIO_LED), + SND_PCI_QUIRK(0x103c, 0x87cc, "HP Pavilion 15-eg0xxx", ALC287_FIXUP_HP_GPIO_LED), SND_PCI_QUIRK(0x103c, 0x87d3, "HP Laptop 15-gw0xxx", ALC236_FIXUP_HP_MUTE_LED_COEFBIT2), SND_PCI_QUIRK(0x103c, 0x87df, "HP ProBook 430 G8 Notebook PC", ALC236_FIXUP_HP_GPIO_LED), SND_PCI_QUIRK(0x103c, 0x87e5, "HP ProBook 440 G8 Notebook PC", ALC236_FIXUP_HP_GPIO_LED), @@ -10769,6 +10773,7 @@ static const struct hda_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x103c, 0x8a2e, "HP Envy 16", ALC287_FIXUP_CS35L41_I2C_2), SND_PCI_QUIRK(0x103c, 0x8a30, "HP Envy 17", ALC287_FIXUP_CS35L41_I2C_2), SND_PCI_QUIRK(0x103c, 0x8a31, "HP Envy 15", ALC287_FIXUP_CS35L41_I2C_2), + SND_PCI_QUIRK(0x103c, 0x8a4f, "HP Victus 15-fa0xxx (MB 8A4F)", ALC245_FIXUP_HP_MUTE_LED_COEFBIT), SND_PCI_QUIRK(0x103c, 0x8a6e, "HP EDNA 360", ALC287_FIXUP_CS35L41_I2C_4), SND_PCI_QUIRK(0x103c, 0x8a74, "HP ProBook 440 G8 Notebook PC", ALC236_FIXUP_HP_GPIO_LED), SND_PCI_QUIRK(0x103c, 0x8a78, "HP Dev One", ALC285_FIXUP_HP_LIMIT_INT_MIC_BOOST), @@ -10814,6 +10819,7 @@ static const struct hda_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x103c, 0x8b97, "HP", ALC236_FIXUP_HP_MUTE_LED_MICMUTE_VREF), SND_PCI_QUIRK(0x103c, 0x8bb3, "HP Slim OMEN", ALC287_FIXUP_CS35L41_I2C_2), SND_PCI_QUIRK(0x103c, 0x8bb4, "HP Slim OMEN", ALC287_FIXUP_CS35L41_I2C_2), + SND_PCI_QUIRK(0x103c, 0x8bbe, "HP Victus 16-r0xxx (MB 8BBE)", ALC245_FIXUP_HP_MUTE_LED_COEFBIT), SND_PCI_QUIRK(0x103c, 0x8bc8, "HP Victus 15-fa1xxx", ALC245_FIXUP_HP_MUTE_LED_COEFBIT), SND_PCI_QUIRK(0x103c, 0x8bcd, "HP Omen 16-xd0xxx", ALC245_FIXUP_HP_MUTE_LED_V1_COEFBIT), SND_PCI_QUIRK(0x103c, 0x8bdd, "HP Envy 17", ALC287_FIXUP_CS35L41_I2C_2), @@ -10881,6 +10887,7 @@ static const struct hda_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x103c, 0x8ce0, "HP SnowWhite", ALC287_FIXUP_CS35L41_I2C_2_HP_GPIO_LED), SND_PCI_QUIRK(0x103c, 0x8cf5, "HP ZBook Studio 16", ALC245_FIXUP_CS35L41_SPI_4_HP_GPIO_LED), SND_PCI_QUIRK(0x103c, 0x8d01, "HP ZBook Power 14 G12", ALC285_FIXUP_HP_GPIO_LED), + SND_PCI_QUIRK(0x103c, 0x8d07, "HP Victus 15-fb2xxx (MB 8D07)", ALC245_FIXUP_HP_MUTE_LED_COEFBIT), SND_PCI_QUIRK(0x103c, 0x8d18, "HP EliteStudio 8 AIO", ALC274_FIXUP_HP_AIO_BIND_DACS), SND_PCI_QUIRK(0x103c, 0x8d84, "HP EliteBook X G1i", ALC285_FIXUP_HP_GPIO_LED), SND_PCI_QUIRK(0x103c, 0x8d85, "HP EliteBook 14 G12", ALC285_FIXUP_HP_GPIO_LED), @@ -11005,6 +11012,7 @@ static const struct hda_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x1043, 0x1a13, "Asus G73Jw", ALC269_FIXUP_ASUS_G73JW), SND_PCI_QUIRK(0x1043, 0x1a63, "ASUS UX3405MA", ALC245_FIXUP_CS35L41_SPI_2), SND_PCI_QUIRK(0x1043, 0x1a83, "ASUS UM5302LA", ALC294_FIXUP_CS35L41_I2C_2), + SND_PCI_QUIRK(0x1043, 0x1a8e, "ASUS G712LWS", ALC294_FIXUP_LENOVO_MIC_LOCATION), SND_PCI_QUIRK(0x1043, 0x1a8f, "ASUS UX582ZS", ALC245_FIXUP_CS35L41_SPI_2), SND_PCI_QUIRK(0x1043, 0x1b11, "ASUS UX431DA", ALC294_FIXUP_ASUS_COEF_1B), SND_PCI_QUIRK(0x1043, 0x1b13, "ASUS U41SV/GA403U", ALC285_FIXUP_ASUS_GA403U_HEADSET_MIC), @@ -11040,6 +11048,7 @@ static const struct hda_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x1043, 0x1e63, "ASUS H7606W", ALC285_FIXUP_ASUS_GU605_SPI_SPEAKER2_TO_DAC1), SND_PCI_QUIRK(0x1043, 0x1e83, "ASUS GA605W", ALC285_FIXUP_ASUS_GU605_SPI_SPEAKER2_TO_DAC1), SND_PCI_QUIRK(0x1043, 0x1e8e, "ASUS Zephyrus G15", ALC289_FIXUP_ASUS_GA401), + SND_PCI_QUIRK(0x1043, 0x1e93, "ASUS ExpertBook B9403CVAR", ALC294_FIXUP_ASUS_HPE), SND_PCI_QUIRK(0x1043, 0x1eb3, "ASUS Ally RCLA72", ALC287_FIXUP_TAS2781_I2C), SND_PCI_QUIRK(0x1043, 0x1ed3, "ASUS HN7306W", ALC287_FIXUP_CS35L41_I2C_2), SND_PCI_QUIRK(0x1043, 0x1ee2, "ASUS UM6702RA/RC", ALC287_FIXUP_CS35L41_I2C_2), @@ -11424,6 +11433,7 @@ static const struct hda_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x2782, 0x0228, "Infinix ZERO BOOK 13", ALC269VB_FIXUP_INFINIX_ZERO_BOOK_13), SND_PCI_QUIRK(0x2782, 0x0232, "CHUWI CoreBook XPro", ALC269VB_FIXUP_CHUWI_COREBOOK_XPRO), SND_PCI_QUIRK(0x2782, 0x1407, "Positivo P15X", ALC269_FIXUP_POSITIVO_P15X_HEADSET_MIC), + SND_PCI_QUIRK(0x2782, 0x1409, "Positivo K116J", ALC269_FIXUP_POSITIVO_P15X_HEADSET_MIC), SND_PCI_QUIRK(0x2782, 0x1701, "Infinix Y4 Max", ALC269VC_FIXUP_INFINIX_Y4_MAX), SND_PCI_QUIRK(0x2782, 0x1705, "MEDION E15433", ALC269VC_FIXUP_INFINIX_Y4_MAX), SND_PCI_QUIRK(0x2782, 0x1707, "Vaio VJFE-ADL", ALC298_FIXUP_SPK_VOLUME), diff --git a/sound/pci/hda/tas2781_hda.c b/sound/pci/hda/tas2781_hda.c index 5f1d4b3e9688..34217ce9f28e 100644 --- a/sound/pci/hda/tas2781_hda.c +++ b/sound/pci/hda/tas2781_hda.c @@ -44,7 +44,7 @@ static void tas2781_apply_calib(struct tasdevice_priv *p) TASDEVICE_REG(0, 0x13, 0x70), TASDEVICE_REG(0, 0x18, 0x7c), }; - unsigned int crc, oft; + unsigned int crc, oft, node_num; unsigned char *buf; int i, j, k, l; @@ -80,8 +80,9 @@ static void tas2781_apply_calib(struct tasdevice_priv *p) dev_err(p->dev, "%s: CRC error\n", __func__); return; } + node_num = tmp_val[1]; - for (j = 0, k = 0; j < tmp_val[1]; j++) { + for (j = 0, k = 0; j < node_num; j++) { oft = j * 6 + 3; if (tmp_val[oft] == TASDEV_UEFI_CALI_REG_ADDR_FLG) { for (i = 0; i < TASDEV_CALIB_N; i++) { @@ -99,8 +100,9 @@ static void tas2781_apply_calib(struct tasdevice_priv *p) } data[l] = k; + oft++; for (i = 0; i < TASDEV_CALIB_N * 4; i++) - data[l + i] = data[4 * oft + i]; + data[l + i + 1] = data[4 * oft + i]; k++; } } diff --git a/sound/soc/amd/yc/acp6x-mach.c b/sound/soc/amd/yc/acp6x-mach.c index 97e340140d0c..f210a253da9f 100644 --- a/sound/soc/amd/yc/acp6x-mach.c +++ b/sound/soc/amd/yc/acp6x-mach.c @@ -420,6 +420,13 @@ static const struct dmi_system_id yc_acp_quirk_table[] = { .driver_data = &acp6x_card, .matches = { DMI_MATCH(DMI_BOARD_VENDOR, "ASUSTeK COMPUTER INC."), + DMI_MATCH(DMI_PRODUCT_NAME, "M6501RM"), + } + }, + { + .driver_data = &acp6x_card, + .matches = { + DMI_MATCH(DMI_BOARD_VENDOR, "ASUSTeK COMPUTER INC."), DMI_MATCH(DMI_PRODUCT_NAME, "E1404FA"), } }, @@ -539,6 +546,13 @@ static const struct dmi_system_id yc_acp_quirk_table[] = { .driver_data = &acp6x_card, .matches = { DMI_MATCH(DMI_BOARD_VENDOR, "HP"), + DMI_MATCH(DMI_PRODUCT_NAME, "Victus by HP Gaming Laptop 15-fb1xxx"), + } + }, + { + .driver_data = &acp6x_card, + .matches = { + DMI_MATCH(DMI_BOARD_VENDOR, "HP"), DMI_MATCH(DMI_PRODUCT_NAME, "Victus by HP Gaming Laptop 15-fb2xxx"), } }, @@ -588,6 +602,13 @@ static const struct dmi_system_id yc_acp_quirk_table[] = { .driver_data = &acp6x_card, .matches = { DMI_MATCH(DMI_BOARD_VENDOR, "HP"), + DMI_MATCH(DMI_BOARD_NAME, "8A81"), + } + }, + { + .driver_data = &acp6x_card, + .matches = { + DMI_MATCH(DMI_BOARD_VENDOR, "HP"), DMI_MATCH(DMI_BOARD_NAME, "8B27"), } }, diff --git a/sound/soc/codecs/cs35l56-shared.c b/sound/soc/codecs/cs35l56-shared.c index d0831d609584..ba653f6ccfae 100644 --- a/sound/soc/codecs/cs35l56-shared.c +++ b/sound/soc/codecs/cs35l56-shared.c @@ -980,7 +980,7 @@ int cs35l56_hw_init(struct cs35l56_base *cs35l56_base) break; default: dev_err(cs35l56_base->dev, "Unknown device %x\n", devid); - return ret; + return -ENODEV; } cs35l56_base->type = devid & 0xFF; diff --git a/sound/soc/codecs/rt5645.c b/sound/soc/codecs/rt5645.c index 08df87238eee..29a403526cd9 100644 --- a/sound/soc/codecs/rt5645.c +++ b/sound/soc/codecs/rt5645.c @@ -82,6 +82,7 @@ static const struct reg_sequence rt5650_init_list[] = { {0xf6, 0x0100}, {RT5645_PWR_ANLG1, 0x02}, {RT5645_IL_CMD3, 0x6728}, + {RT5645_PR_BASE + 0x3a, 0x0000}, }; static const struct reg_default rt5645_reg[] = { diff --git a/sound/soc/codecs/rt5660.c b/sound/soc/codecs/rt5660.c index 82b92e83be4c..44c3a3b92f98 100644 --- a/sound/soc/codecs/rt5660.c +++ b/sound/soc/codecs/rt5660.c @@ -1315,14 +1315,17 @@ static int rt5660_i2c_probe(struct i2c_client *i2c) regmap_update_bits(rt5660->regmap, RT5660_GPIO_CTRL1, RT5660_GP1_PIN_MASK, RT5660_GP1_PIN_DMIC1_SCL); - if (rt5660->pdata.dmic1_data_pin == RT5660_DMIC1_DATA_GPIO2) + if (rt5660->pdata.dmic1_data_pin == RT5660_DMIC1_DATA_GPIO2) { regmap_update_bits(rt5660->regmap, RT5660_DMIC_CTRL1, RT5660_SEL_DMIC_DATA_MASK, RT5660_SEL_DMIC_DATA_GPIO2); - else if (rt5660->pdata.dmic1_data_pin == RT5660_DMIC1_DATA_IN1P) + regmap_update_bits(rt5660->regmap, RT5660_GPIO_CTRL1, + RT5660_GP2_PIN_MASK, RT5660_GP2_PIN_DMIC1_SDA); + } else if (rt5660->pdata.dmic1_data_pin == RT5660_DMIC1_DATA_IN1P) { regmap_update_bits(rt5660->regmap, RT5660_DMIC_CTRL1, RT5660_SEL_DMIC_DATA_MASK, RT5660_SEL_DMIC_DATA_IN1P); + } } return devm_snd_soc_register_component(&i2c->dev, diff --git a/sound/soc/fsl/fsl_asrc.c b/sound/soc/fsl/fsl_asrc.c index 677529916dc0..745532ccbdba 100644 --- a/sound/soc/fsl/fsl_asrc.c +++ b/sound/soc/fsl/fsl_asrc.c @@ -517,7 +517,8 @@ static int fsl_asrc_config_pair(struct fsl_asrc_pair *pair, bool use_ideal_rate) regmap_update_bits(asrc->regmap, REG_ASRCTR, ASRCTR_ATSi_MASK(index), ASRCTR_ATS(index)); regmap_update_bits(asrc->regmap, REG_ASRCTR, - ASRCTR_USRi_MASK(index), 0); + ASRCTR_IDRi_MASK(index) | ASRCTR_USRi_MASK(index), + ASRCTR_USR(index)); /* Set the input and output clock sources */ regmap_update_bits(asrc->regmap, REG_ASRCSR, diff --git a/sound/soc/fsl/fsl_sai.c b/sound/soc/fsl/fsl_sai.c index af1a168d35e3..50af6b725670 100644 --- a/sound/soc/fsl/fsl_sai.c +++ b/sound/soc/fsl/fsl_sai.c @@ -803,13 +803,15 @@ static void fsl_sai_config_disable(struct fsl_sai *sai, int dir) * anymore. Add software reset to fix this issue. * This is a hardware bug, and will be fix in the * next sai version. + * + * In consumer mode, this can happen even after a + * single open/close, especially if both tx and rx + * are running concurrently. */ - if (!sai->is_consumer_mode[tx]) { - /* Software Reset */ - regmap_write(sai->regmap, FSL_SAI_xCSR(tx, ofs), FSL_SAI_CSR_SR); - /* Clear SR bit to finish the reset */ - regmap_write(sai->regmap, FSL_SAI_xCSR(tx, ofs), 0); - } + /* Software Reset */ + regmap_write(sai->regmap, FSL_SAI_xCSR(tx, ofs), FSL_SAI_CSR_SR); + /* Clear SR bit to finish the reset */ + regmap_write(sai->regmap, FSL_SAI_xCSR(tx, ofs), 0); } static int fsl_sai_trigger(struct snd_pcm_substream *substream, int cmd, diff --git a/sound/soc/intel/avs/pcm.c b/sound/soc/intel/avs/pcm.c index ccf90428126d..0efe490024b0 100644 --- a/sound/soc/intel/avs/pcm.c +++ b/sound/soc/intel/avs/pcm.c @@ -1570,11 +1570,13 @@ static void avs_component_hda_unregister_dais(struct snd_soc_component *componen { struct snd_soc_acpi_mach *mach; struct snd_soc_dai *dai, *save; + struct avs_mach_pdata *pdata; struct hda_codec *codec; char name[32]; mach = dev_get_platdata(component->card->dev); - codec = mach->pdata; + pdata = mach->pdata; + codec = pdata->codec; snprintf(name, sizeof(name), "%s-cpu", dev_name(&codec->core.dev)); for_each_component_dais_safe(component, dai, save) { diff --git a/sound/soc/intel/boards/Kconfig b/sound/soc/intel/boards/Kconfig index 2df7afa2f469..c23fdb6aad4c 100644 --- a/sound/soc/intel/boards/Kconfig +++ b/sound/soc/intel/boards/Kconfig @@ -11,7 +11,7 @@ menuconfig SND_SOC_INTEL_MACH kernel: saying N will just cause the configurator to skip all the questions about Intel ASoC machine drivers. -if SND_SOC_INTEL_MACH +if SND_SOC_INTEL_MACH && (SND_SOC_SOF_INTEL_COMMON || !SND_SOC_SOF_INTEL_COMMON) config SND_SOC_INTEL_USER_FRIENDLY_LONG_NAMES bool "Use more user friendly long card names" @@ -42,6 +42,7 @@ config SND_SOC_INTEL_SOF_NUVOTON_COMMON tristate config SND_SOC_INTEL_SOF_BOARD_HELPERS + select SND_SOC_ACPI_INTEL_MATCH tristate if SND_SOC_INTEL_CATPT diff --git a/sound/soc/intel/boards/sof_sdw.c b/sound/soc/intel/boards/sof_sdw.c index 81a914bd7ec2..504887505e68 100644 --- a/sound/soc/intel/boards/sof_sdw.c +++ b/sound/soc/intel/boards/sof_sdw.c @@ -783,6 +783,9 @@ static const struct dmi_system_id sof_sdw_quirk_table[] = { static const struct snd_pci_quirk sof_sdw_ssid_quirk_table[] = { SND_PCI_QUIRK(0x1043, 0x1e13, "ASUS Zenbook S14", SOC_SDW_CODEC_MIC), SND_PCI_QUIRK(0x1043, 0x1f43, "ASUS Zenbook S16", SOC_SDW_CODEC_MIC), + SND_PCI_QUIRK(0x17aa, 0x2347, "Lenovo P16", SOC_SDW_CODEC_MIC), + SND_PCI_QUIRK(0x17aa, 0x2348, "Lenovo P16", SOC_SDW_CODEC_MIC), + SND_PCI_QUIRK(0x17aa, 0x2349, "Lenovo P1", SOC_SDW_CODEC_MIC), {} }; diff --git a/sound/soc/intel/common/soc-acpi-intel-arl-match.c b/sound/soc/intel/common/soc-acpi-intel-arl-match.c index 73e581e93755..6bf7a6250ddc 100644 --- a/sound/soc/intel/common/soc-acpi-intel-arl-match.c +++ b/sound/soc/intel/common/soc-acpi-intel-arl-match.c @@ -238,6 +238,15 @@ static const struct snd_soc_acpi_adr_device rt722_0_single_adr[] = { } }; +static const struct snd_soc_acpi_adr_device rt1316_3_single_adr[] = { + { + .adr = 0x000330025D131601ull, + .num_endpoints = 1, + .endpoints = &single_endpoint, + .name_prefix = "rt1316-1" + } +}; + static const struct snd_soc_acpi_adr_device rt1320_2_single_adr[] = { { .adr = 0x000230025D132001ull, @@ -368,6 +377,20 @@ static const struct snd_soc_acpi_link_adr arl_sdca_rvp[] = { {} }; +static const struct snd_soc_acpi_link_adr arl_rt711_l0_rt1316_l3[] = { + { + .mask = BIT(0), + .num_adr = ARRAY_SIZE(rt711_sdca_0_adr), + .adr_d = rt711_sdca_0_adr, + }, + { + .mask = BIT(3), + .num_adr = ARRAY_SIZE(rt1316_3_single_adr), + .adr_d = rt1316_3_single_adr, + }, + {} +}; + static const struct snd_soc_acpi_link_adr arl_rt722_l0_rt1320_l2[] = { { .mask = BIT(0), @@ -468,6 +491,13 @@ struct snd_soc_acpi_mach snd_soc_acpi_intel_arl_sdw_machines[] = { .get_function_tplg_files = sof_sdw_get_tplg_files, }, { + .link_mask = BIT(2) | BIT(3), + .links = arl_cs42l43_l2_cs35l56_l3, + .drv_name = "sof_sdw", + .sof_tplg_filename = "sof-arl-cs42l43-l2-cs35l56-l3.tplg", + .get_function_tplg_files = sof_sdw_get_tplg_files, + }, + { .link_mask = BIT(2), .links = arl_cs42l43_l2, .drv_name = "sof_sdw", @@ -475,11 +505,10 @@ struct snd_soc_acpi_mach snd_soc_acpi_intel_arl_sdw_machines[] = { .get_function_tplg_files = sof_sdw_get_tplg_files, }, { - .link_mask = BIT(2) | BIT(3), - .links = arl_cs42l43_l2_cs35l56_l3, + .link_mask = BIT(0) | BIT(3), + .links = arl_rt711_l0_rt1316_l3, .drv_name = "sof_sdw", - .sof_tplg_filename = "sof-arl-cs42l43-l2-cs35l56-l3.tplg", - .get_function_tplg_files = sof_sdw_get_tplg_files, + .sof_tplg_filename = "sof-arl-rt711-l0-rt1316-l3.tplg", }, { .link_mask = 0x1, /* link0 required */ diff --git a/sound/soc/mediatek/common/mtk-soundcard-driver.c b/sound/soc/mediatek/common/mtk-soundcard-driver.c index 713a368f79cf..95a083939f3e 100644 --- a/sound/soc/mediatek/common/mtk-soundcard-driver.c +++ b/sound/soc/mediatek/common/mtk-soundcard-driver.c @@ -262,9 +262,13 @@ int mtk_soundcard_common_probe(struct platform_device *pdev) soc_card_data->accdet = accdet_comp; else dev_err(&pdev->dev, "No sound component found from mediatek,accdet property\n"); + + put_device(&accdet_pdev->dev); } else { dev_err(&pdev->dev, "No device found from mediatek,accdet property\n"); } + + of_node_put(accdet_node); } platform_node = of_parse_phandle(pdev->dev.of_node, "mediatek,platform", 0); diff --git a/sound/soc/mediatek/mt8365/mt8365-dai-i2s.c b/sound/soc/mediatek/mt8365/mt8365-dai-i2s.c index cae51756cead..cb9beb172ed5 100644 --- a/sound/soc/mediatek/mt8365/mt8365-dai-i2s.c +++ b/sound/soc/mediatek/mt8365/mt8365-dai-i2s.c @@ -812,11 +812,10 @@ static const struct snd_soc_dapm_route mtk_dai_i2s_routes[] = { static int mt8365_dai_i2s_set_priv(struct mtk_base_afe *afe) { int i, ret; - struct mt8365_afe_private *afe_priv = afe->platform_priv; for (i = 0; i < DAI_I2S_NUM; i++) { ret = mt8365_dai_set_priv(afe, mt8365_i2s_priv[i].id, - sizeof(*afe_priv), + sizeof(mt8365_i2s_priv[i]), &mt8365_i2s_priv[i]); if (ret) return ret; diff --git a/sound/soc/sdca/sdca_functions.c b/sound/soc/sdca/sdca_functions.c index 64ac26443890..de213a69e0da 100644 --- a/sound/soc/sdca/sdca_functions.c +++ b/sound/soc/sdca/sdca_functions.c @@ -211,7 +211,7 @@ static int find_sdca_init_table(struct device *dev, } else if (num_init_writes % sizeof(*raw) != 0) { dev_err(dev, "%pfwP: init table size invalid\n", function_node); return -EINVAL; - } else if (num_init_writes > SDCA_MAX_INIT_COUNT) { + } else if ((num_init_writes / sizeof(*raw)) > SDCA_MAX_INIT_COUNT) { dev_err(dev, "%pfwP: maximum init table size exceeded\n", function_node); return -EINVAL; } diff --git a/sound/soc/sof/intel/ptl.c b/sound/soc/sof/intel/ptl.c index 875d18193b05..1bc1f54c470d 100644 --- a/sound/soc/sof/intel/ptl.c +++ b/sound/soc/sof/intel/ptl.c @@ -117,6 +117,7 @@ const struct sof_intel_dsp_desc ptl_chip_info = { .read_sdw_lcount = hda_sdw_check_lcount_ext, .check_sdw_irq = lnl_dsp_check_sdw_irq, .check_sdw_wakeen_irq = lnl_sdw_check_wakeen_irq, + .sdw_process_wakeen = hda_sdw_process_wakeen_common, .check_ipc_irq = mtl_dsp_check_ipc_irq, .check_mic_privacy_irq = sof_ptl_check_mic_privacy_irq, .process_mic_privacy = sof_ptl_process_mic_privacy, diff --git a/sound/usb/format.c b/sound/usb/format.c index 8cd54f7bf33a..0ee532acbb60 100644 --- a/sound/usb/format.c +++ b/sound/usb/format.c @@ -310,16 +310,14 @@ static bool focusrite_valid_sample_rate(struct snd_usb_audio *chip, struct audioformat *fp, unsigned int rate) { - struct usb_interface *iface; struct usb_host_interface *alts; unsigned char *fmt; unsigned int max_rate; - iface = usb_ifnum_to_if(chip->dev, fp->iface); - if (!iface) + alts = snd_usb_get_host_interface(chip, fp->iface, fp->altsetting); + if (!alts) return true; - alts = &iface->altsetting[fp->altset_idx]; fmt = snd_usb_find_csint_desc(alts->extra, alts->extralen, NULL, UAC_FORMAT_TYPE); if (!fmt) @@ -328,20 +326,20 @@ static bool focusrite_valid_sample_rate(struct snd_usb_audio *chip, if (fmt[0] == 10) { /* bLength */ max_rate = combine_quad(&fmt[6]); - /* Validate max rate */ - if (max_rate != 48000 && - max_rate != 96000 && - max_rate != 192000 && - max_rate != 384000) { - + switch (max_rate) { + case 48000: + return (rate == 44100 || rate == 48000); + case 96000: + return (rate == 88200 || rate == 96000); + case 192000: + return (rate == 176400 || rate == 192000); + default: usb_audio_info(chip, "%u:%d : unexpected max rate: %u\n", fp->iface, fp->altsetting, max_rate); return true; } - - return rate <= max_rate; } return true; diff --git a/sound/usb/qcom/qc_audio_offload.c b/sound/usb/qcom/qc_audio_offload.c index 3543b5a53592..a25c5a531690 100644 --- a/sound/usb/qcom/qc_audio_offload.c +++ b/sound/usb/qcom/qc_audio_offload.c @@ -825,8 +825,8 @@ static int uaudio_sideband_notifier(struct usb_interface *intf, } } - mutex_unlock(&qdev_mutex); mutex_unlock(&chip->mutex); + mutex_unlock(&qdev_mutex); return 0; } @@ -1865,8 +1865,8 @@ static void qc_usb_audio_offload_disconnect(struct snd_usb_audio *chip) /* Device has already been cleaned up, or never populated */ if (!dev->chip) { - mutex_unlock(&qdev_mutex); mutex_unlock(&chip->mutex); + mutex_unlock(&qdev_mutex); return; } @@ -1921,8 +1921,8 @@ static void qc_usb_audio_offload_suspend(struct usb_interface *intf, uaudio_send_disconnect_ind(chip); - mutex_unlock(&qdev_mutex); mutex_unlock(&chip->mutex); + mutex_unlock(&qdev_mutex); } static struct snd_usb_platform_ops offload_ops = { diff --git a/tools/arch/x86/include/asm/msr-index.h b/tools/arch/x86/include/asm/msr-index.h index b7dded3c8113..5cfb5d74dd5f 100644 --- a/tools/arch/x86/include/asm/msr-index.h +++ b/tools/arch/x86/include/asm/msr-index.h @@ -628,6 +628,7 @@ #define MSR_AMD64_OSVW_STATUS 0xc0010141 #define MSR_AMD_PPIN_CTL 0xc00102f0 #define MSR_AMD_PPIN 0xc00102f1 +#define MSR_AMD64_CPUID_FN_7 0xc0011002 #define MSR_AMD64_CPUID_FN_1 0xc0011004 #define MSR_AMD64_LS_CFG 0xc0011020 #define MSR_AMD64_DC_CFG 0xc0011022 diff --git a/tools/hv/hv_fcopy_uio_daemon.c b/tools/hv/hv_fcopy_uio_daemon.c index 0198321d14a2..92e8307b2a46 100644 --- a/tools/hv/hv_fcopy_uio_daemon.c +++ b/tools/hv/hv_fcopy_uio_daemon.c @@ -35,7 +35,10 @@ #define WIN8_SRV_MINOR 1 #define WIN8_SRV_VERSION (WIN8_SRV_MAJOR << 16 | WIN8_SRV_MINOR) -#define FCOPY_UIO "/sys/bus/vmbus/devices/eb765408-105f-49b6-b4aa-c123b64d17d4/uio" +#define FCOPY_DEVICE_PATH(subdir) \ + "/sys/bus/vmbus/devices/eb765408-105f-49b6-b4aa-c123b64d17d4/" #subdir +#define FCOPY_UIO_PATH FCOPY_DEVICE_PATH(uio) +#define FCOPY_CHANNELS_PATH FCOPY_DEVICE_PATH(channels) #define FCOPY_VER_COUNT 1 static const int fcopy_versions[] = { @@ -47,9 +50,62 @@ static const int fw_versions[] = { UTIL_FW_VERSION }; -#define HV_RING_SIZE 0x4000 /* 16KB ring buffer size */ +static uint32_t get_ring_buffer_size(void) +{ + char ring_path[PATH_MAX]; + DIR *dir; + struct dirent *entry; + struct stat st; + uint32_t ring_size = 0; + int retry_count = 0; + + /* Find the channel directory */ + dir = opendir(FCOPY_CHANNELS_PATH); + if (!dir) { + usleep(100 * 1000); /* Avoid race with kernel, wait 100ms and retry once */ + dir = opendir(FCOPY_CHANNELS_PATH); + if (!dir) { + syslog(LOG_ERR, "Failed to open channels directory: %s", strerror(errno)); + return 0; + } + } + +retry_once: + while ((entry = readdir(dir)) != NULL) { + if (entry->d_type == DT_DIR && strcmp(entry->d_name, ".") != 0 && + strcmp(entry->d_name, "..") != 0) { + snprintf(ring_path, sizeof(ring_path), "%s/%s/ring", + FCOPY_CHANNELS_PATH, entry->d_name); + + if (stat(ring_path, &st) == 0) { + /* + * stat returns size of Tx, Rx rings combined, + * so take half of it for individual ring size. + */ + ring_size = (uint32_t)st.st_size / 2; + syslog(LOG_INFO, "Ring buffer size from %s: %u bytes", + ring_path, ring_size); + break; + } + } + } + + if (!ring_size && retry_count == 0) { + retry_count = 1; + rewinddir(dir); + usleep(100 * 1000); /* Wait 100ms and retry once */ + goto retry_once; + } + + closedir(dir); -static unsigned char desc[HV_RING_SIZE]; + if (!ring_size) + syslog(LOG_ERR, "Could not determine ring size"); + + return ring_size; +} + +static unsigned char *desc; static int target_fd; static char target_fname[PATH_MAX]; @@ -62,8 +118,11 @@ static int hv_fcopy_create_file(char *file_name, char *path_name, __u32 flags) filesize = 0; p = path_name; - snprintf(target_fname, sizeof(target_fname), "%s/%s", - path_name, file_name); + if (snprintf(target_fname, sizeof(target_fname), "%s/%s", + path_name, file_name) >= sizeof(target_fname)) { + syslog(LOG_ERR, "target file name is too long: %s/%s", path_name, file_name); + goto done; + } /* * Check to see if the path is already in place; if not, @@ -270,7 +329,7 @@ static void wcstoutf8(char *dest, const __u16 *src, size_t dest_size) { size_t len = 0; - while (len < dest_size) { + while (len < dest_size && *src) { if (src[len] < 0x80) dest[len++] = (char)(*src++); else @@ -282,27 +341,15 @@ static void wcstoutf8(char *dest, const __u16 *src, size_t dest_size) static int hv_fcopy_start(struct hv_start_fcopy *smsg_in) { - setlocale(LC_ALL, "en_US.utf8"); - size_t file_size, path_size; - char *file_name, *path_name; - char *in_file_name = (char *)smsg_in->file_name; - char *in_path_name = (char *)smsg_in->path_name; - - file_size = wcstombs(NULL, (const wchar_t *restrict)in_file_name, 0) + 1; - path_size = wcstombs(NULL, (const wchar_t *restrict)in_path_name, 0) + 1; - - file_name = (char *)malloc(file_size * sizeof(char)); - path_name = (char *)malloc(path_size * sizeof(char)); - - if (!file_name || !path_name) { - free(file_name); - free(path_name); - syslog(LOG_ERR, "Can't allocate memory for file name and/or path name"); - return HV_E_FAIL; - } + /* + * file_name and path_name should have same length with appropriate + * member of hv_start_fcopy. + */ + char file_name[W_MAX_PATH], path_name[W_MAX_PATH]; - wcstoutf8(file_name, (__u16 *)in_file_name, file_size); - wcstoutf8(path_name, (__u16 *)in_path_name, path_size); + setlocale(LC_ALL, "en_US.utf8"); + wcstoutf8(file_name, smsg_in->file_name, W_MAX_PATH - 1); + wcstoutf8(path_name, smsg_in->path_name, W_MAX_PATH - 1); return hv_fcopy_create_file(file_name, path_name, smsg_in->copy_flags); } @@ -406,7 +453,7 @@ int main(int argc, char *argv[]) int daemonize = 1, long_index = 0, opt, ret = -EINVAL; struct vmbus_br txbr, rxbr; void *ring; - uint32_t len = HV_RING_SIZE; + uint32_t ring_size, len; char uio_name[NAME_MAX] = {0}; char uio_dev_path[PATH_MAX] = {0}; @@ -437,7 +484,20 @@ int main(int argc, char *argv[]) openlog("HV_UIO_FCOPY", 0, LOG_USER); syslog(LOG_INFO, "starting; pid is:%d", getpid()); - fcopy_get_first_folder(FCOPY_UIO, uio_name); + ring_size = get_ring_buffer_size(); + if (!ring_size) { + ret = -ENODEV; + goto exit; + } + + desc = malloc(ring_size * sizeof(unsigned char)); + if (!desc) { + syslog(LOG_ERR, "malloc failed for desc buffer"); + ret = -ENOMEM; + goto exit; + } + + fcopy_get_first_folder(FCOPY_UIO_PATH, uio_name); snprintf(uio_dev_path, sizeof(uio_dev_path), "/dev/%s", uio_name); fcopy_fd = open(uio_dev_path, O_RDWR); @@ -445,17 +505,17 @@ int main(int argc, char *argv[]) syslog(LOG_ERR, "open %s failed; error: %d %s", uio_dev_path, errno, strerror(errno)); ret = fcopy_fd; - goto exit; + goto free_desc; } - ring = vmbus_uio_map(&fcopy_fd, HV_RING_SIZE); + ring = vmbus_uio_map(&fcopy_fd, ring_size); if (!ring) { ret = errno; syslog(LOG_ERR, "mmap ringbuffer failed; error: %d %s", ret, strerror(ret)); goto close; } - vmbus_br_setup(&txbr, ring, HV_RING_SIZE); - vmbus_br_setup(&rxbr, (char *)ring + HV_RING_SIZE, HV_RING_SIZE); + vmbus_br_setup(&txbr, ring, ring_size); + vmbus_br_setup(&rxbr, (char *)ring + ring_size, ring_size); rxbr.vbr->imask = 0; @@ -472,7 +532,7 @@ int main(int argc, char *argv[]) goto close; } - len = HV_RING_SIZE; + len = ring_size; ret = rte_vmbus_chan_recv_raw(&rxbr, desc, &len); if (unlikely(ret <= 0)) { /* This indicates a failure to communicate (or worse) */ @@ -492,6 +552,8 @@ int main(int argc, char *argv[]) } close: close(fcopy_fd); +free_desc: + free(desc); exit: return ret; } diff --git a/tools/include/linux/kallsyms.h b/tools/include/linux/kallsyms.h index 5a37ccbec54f..f61a01dd7eb7 100644 --- a/tools/include/linux/kallsyms.h +++ b/tools/include/linux/kallsyms.h @@ -18,6 +18,7 @@ static inline const char *kallsyms_lookup(unsigned long addr, return NULL; } +#ifdef HAVE_BACKTRACE_SUPPORT #include <execinfo.h> #include <stdlib.h> static inline void print_ip_sym(const char *loglvl, unsigned long ip) @@ -30,5 +31,8 @@ static inline void print_ip_sym(const char *loglvl, unsigned long ip) free(name); } +#else +static inline void print_ip_sym(const char *loglvl, unsigned long ip) {} +#endif #endif diff --git a/tools/include/uapi/linux/bits.h b/tools/include/uapi/linux/bits.h index 682b406e1067..a04afef9efca 100644 --- a/tools/include/uapi/linux/bits.h +++ b/tools/include/uapi/linux/bits.h @@ -4,9 +4,9 @@ #ifndef _UAPI_LINUX_BITS_H #define _UAPI_LINUX_BITS_H -#define __GENMASK(h, l) (((~_UL(0)) << (l)) & (~_UL(0) >> (BITS_PER_LONG - 1 - (h)))) +#define __GENMASK(h, l) (((~_UL(0)) << (l)) & (~_UL(0) >> (__BITS_PER_LONG - 1 - (h)))) -#define __GENMASK_ULL(h, l) (((~_ULL(0)) << (l)) & (~_ULL(0) >> (BITS_PER_LONG_LONG - 1 - (h)))) +#define __GENMASK_ULL(h, l) (((~_ULL(0)) << (l)) & (~_ULL(0) >> (__BITS_PER_LONG_LONG - 1 - (h)))) #define __GENMASK_U128(h, l) \ ((_BIT128((h)) << 1) - (_BIT128(l))) diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index 52e353368f58..d41ee26b9443 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -735,7 +735,7 @@ struct bpf_object { struct usdt_manager *usdt_man; - struct bpf_map *arena_map; + int arena_map_idx; void *arena_data; size_t arena_data_sz; @@ -1517,6 +1517,7 @@ static struct bpf_object *bpf_object__new(const char *path, obj->efile.obj_buf_sz = obj_buf_sz; obj->efile.btf_maps_shndx = -1; obj->kconfig_map_idx = -1; + obj->arena_map_idx = -1; obj->kern_version = get_kernel_version(); obj->state = OBJ_OPEN; @@ -2964,7 +2965,7 @@ static int init_arena_map_data(struct bpf_object *obj, struct bpf_map *map, const long page_sz = sysconf(_SC_PAGE_SIZE); size_t mmap_sz; - mmap_sz = bpf_map_mmap_sz(obj->arena_map); + mmap_sz = bpf_map_mmap_sz(map); if (roundup(data_sz, page_sz) > mmap_sz) { pr_warn("elf: sec '%s': declared ARENA map size (%zu) is too small to hold global __arena variables of size %zu\n", sec_name, mmap_sz, data_sz); @@ -3038,12 +3039,12 @@ static int bpf_object__init_user_btf_maps(struct bpf_object *obj, bool strict, if (map->def.type != BPF_MAP_TYPE_ARENA) continue; - if (obj->arena_map) { + if (obj->arena_map_idx >= 0) { pr_warn("map '%s': only single ARENA map is supported (map '%s' is also ARENA)\n", - map->name, obj->arena_map->name); + map->name, obj->maps[obj->arena_map_idx].name); return -EINVAL; } - obj->arena_map = map; + obj->arena_map_idx = i; if (obj->efile.arena_data) { err = init_arena_map_data(obj, map, ARENA_SEC, obj->efile.arena_data_shndx, @@ -3053,7 +3054,7 @@ static int bpf_object__init_user_btf_maps(struct bpf_object *obj, bool strict, return err; } } - if (obj->efile.arena_data && !obj->arena_map) { + if (obj->efile.arena_data && obj->arena_map_idx < 0) { pr_warn("elf: sec '%s': to use global __arena variables the ARENA map should be explicitly declared in SEC(\".maps\")\n", ARENA_SEC); return -ENOENT; @@ -4583,8 +4584,13 @@ static int bpf_program__record_reloc(struct bpf_program *prog, if (shdr_idx == obj->efile.arena_data_shndx) { reloc_desc->type = RELO_DATA; reloc_desc->insn_idx = insn_idx; - reloc_desc->map_idx = obj->arena_map - obj->maps; + reloc_desc->map_idx = obj->arena_map_idx; reloc_desc->sym_off = sym->st_value; + + map = &obj->maps[obj->arena_map_idx]; + pr_debug("prog '%s': found arena map %d (%s, sec %d, off %zu) for insn %u\n", + prog->name, obj->arena_map_idx, map->name, map->sec_idx, + map->sec_offset, insn_idx); return 0; } diff --git a/tools/objtool/check.c b/tools/objtool/check.c index d967ac001498..67d76f3a1dce 100644 --- a/tools/objtool/check.c +++ b/tools/objtool/check.c @@ -224,6 +224,7 @@ static bool is_rust_noreturn(const struct symbol *func) str_ends_with(func->name, "_4core9panicking14panic_explicit") || str_ends_with(func->name, "_4core9panicking14panic_nounwind") || str_ends_with(func->name, "_4core9panicking18panic_bounds_check") || + str_ends_with(func->name, "_4core9panicking18panic_nounwind_fmt") || str_ends_with(func->name, "_4core9panicking19assert_failed_inner") || str_ends_with(func->name, "_4core9panicking30panic_null_pointer_dereference") || str_ends_with(func->name, "_4core9panicking36panic_misaligned_pointer_dereference") || diff --git a/tools/testing/selftests/bpf/prog_tests/recursive_attach.c b/tools/testing/selftests/bpf/prog_tests/recursive_attach.c index 8100509e561b..0ffa01d54ce2 100644 --- a/tools/testing/selftests/bpf/prog_tests/recursive_attach.c +++ b/tools/testing/selftests/bpf/prog_tests/recursive_attach.c @@ -149,3 +149,70 @@ close_prog: fentry_recursive_target__destroy(target_skel); fentry_recursive__destroy(tracing_skel); } + +static void *fentry_target_test_run(void *arg) +{ + for (;;) { + int prog_fd = __atomic_load_n((int *)arg, __ATOMIC_SEQ_CST); + LIBBPF_OPTS(bpf_test_run_opts, topts); + int err; + + if (prog_fd == -1) + break; + err = bpf_prog_test_run_opts(prog_fd, &topts); + if (!ASSERT_OK(err, "fentry_target test_run")) + break; + } + + return NULL; +} + +void test_fentry_attach_stress(void) +{ + struct fentry_recursive_target *target_skel = NULL; + struct fentry_recursive *tracing_skel = NULL; + struct bpf_program *prog; + int err, i, tgt_prog_fd; + pthread_t thread; + + target_skel = fentry_recursive_target__open_and_load(); + if (!ASSERT_OK_PTR(target_skel, + "fentry_recursive_target__open_and_load")) + goto close_prog; + tgt_prog_fd = bpf_program__fd(target_skel->progs.fentry_target); + err = pthread_create(&thread, NULL, + fentry_target_test_run, &tgt_prog_fd); + if (!ASSERT_OK(err, "bpf_program__set_attach_target")) + goto close_prog; + + for (i = 0; i < 1000; i++) { + tracing_skel = fentry_recursive__open(); + if (!ASSERT_OK_PTR(tracing_skel, "fentry_recursive__open")) + goto stop_thread; + + prog = tracing_skel->progs.recursive_attach; + err = bpf_program__set_attach_target(prog, tgt_prog_fd, + "fentry_target"); + if (!ASSERT_OK(err, "bpf_program__set_attach_target")) + goto stop_thread; + + err = fentry_recursive__load(tracing_skel); + if (!ASSERT_OK(err, "fentry_recursive__load")) + goto stop_thread; + + err = fentry_recursive__attach(tracing_skel); + if (!ASSERT_OK(err, "fentry_recursive__attach")) + goto stop_thread; + + fentry_recursive__destroy(tracing_skel); + tracing_skel = NULL; + } + +stop_thread: + __atomic_store_n(&tgt_prog_fd, -1, __ATOMIC_SEQ_CST); + err = pthread_join(thread, NULL); + ASSERT_OK(err, "pthread_join"); +close_prog: + fentry_recursive__destroy(tracing_skel); + fentry_recursive_target__destroy(target_skel); +} diff --git a/tools/testing/selftests/bpf/prog_tests/snprintf.c b/tools/testing/selftests/bpf/prog_tests/snprintf.c index 4be6fdb78c6a..594441acb707 100644 --- a/tools/testing/selftests/bpf/prog_tests/snprintf.c +++ b/tools/testing/selftests/bpf/prog_tests/snprintf.c @@ -116,6 +116,8 @@ static void test_snprintf_negative(void) ASSERT_ERR(load_single_snprintf("%llc"), "invalid specifier 7"); ASSERT_ERR(load_single_snprintf("\x80"), "non ascii character"); ASSERT_ERR(load_single_snprintf("\x1"), "non printable character"); + ASSERT_ERR(load_single_snprintf("%p%"), "invalid specifier 8"); + ASSERT_ERR(load_single_snprintf("%s%"), "invalid specifier 9"); } void test_snprintf(void) diff --git a/tools/testing/selftests/drivers/net/lib/py/load.py b/tools/testing/selftests/drivers/net/lib/py/load.py index d9c10613ae67..44151b7b1a24 100644 --- a/tools/testing/selftests/drivers/net/lib/py/load.py +++ b/tools/testing/selftests/drivers/net/lib/py/load.py @@ -1,5 +1,6 @@ # SPDX-License-Identifier: GPL-2.0 +import re import time from lib.py import ksft_pr, cmd, ip, rand_port, wait_port_listen @@ -10,12 +11,11 @@ class GenerateTraffic: self.env = env - if port is None: - port = rand_port() - self._iperf_server = cmd(f"iperf3 -s -1 -p {port}", background=True) - wait_port_listen(port) + self.port = rand_port() if port is None else port + self._iperf_server = cmd(f"iperf3 -s -1 -p {self.port}", background=True) + wait_port_listen(self.port) time.sleep(0.1) - self._iperf_client = cmd(f"iperf3 -c {env.addr} -P 16 -p {port} -t 86400", + self._iperf_client = cmd(f"iperf3 -c {env.addr} -P 16 -p {self.port} -t 86400", background=True, host=env.remote) # Wait for traffic to ramp up @@ -56,3 +56,16 @@ class GenerateTraffic: ksft_pr(">> Server:") ksft_pr(self._iperf_server.stdout) ksft_pr(self._iperf_server.stderr) + self._wait_client_stopped() + + def _wait_client_stopped(self, sleep=0.005, timeout=5): + end = time.monotonic() + timeout + + live_port_pattern = re.compile(fr":{self.port:04X} 0[^6] ") + + while time.monotonic() < end: + data = cmd("cat /proc/net/tcp*", host=self.env.remote).stdout + if not live_port_pattern.search(data): + return + time.sleep(sleep) + raise Exception(f"Waiting for client to stop timed out after {timeout}s") diff --git a/tools/testing/selftests/futex/include/futex2test.h b/tools/testing/selftests/futex/include/futex2test.h index ea79662405bc..1f625b39948a 100644 --- a/tools/testing/selftests/futex/include/futex2test.h +++ b/tools/testing/selftests/futex/include/futex2test.h @@ -4,6 +4,7 @@ * * Copyright 2021 Collabora Ltd. */ +#include <linux/time_types.h> #include <stdint.h> #define u64_to_ptr(x) ((void *)(uintptr_t)(x)) @@ -65,7 +66,12 @@ struct futex32_numa { static inline int futex_waitv(volatile struct futex_waitv *waiters, unsigned long nr_waiters, unsigned long flags, struct timespec *timo, clockid_t clockid) { - return syscall(__NR_futex_waitv, waiters, nr_waiters, flags, timo, clockid); + struct __kernel_timespec ts = { + .tv_sec = timo->tv_sec, + .tv_nsec = timo->tv_nsec, + }; + + return syscall(__NR_futex_waitv, waiters, nr_waiters, flags, &ts, clockid); } /* diff --git a/tools/testing/selftests/hid/tests/test_mouse.py b/tools/testing/selftests/hid/tests/test_mouse.py index 66daf7e5975c..eb4e15a0e53b 100644 --- a/tools/testing/selftests/hid/tests/test_mouse.py +++ b/tools/testing/selftests/hid/tests/test_mouse.py @@ -439,6 +439,68 @@ class BadResolutionMultiplierMouse(ResolutionMultiplierMouse): return 32 # EPIPE +class BadReportDescriptorMouse(BaseMouse): + """ + This "device" was one autogenerated by syzbot. There are a lot of issues in + it, and the most problematic is that it declares features that have no + size. + + This leads to report->size being set to 0 and can mess up with usbhid + internals. Fortunately, uhid merely passes the incoming buffer, without + touching it so a buffer of size 0 will be translated to [] without + triggering a kernel oops. + + Because the report descriptor is wrong, no input are created, and we need + to tweak a little bit the parameters to make it look correct. + """ + + # fmt: off + report_descriptor = [ + 0x96, 0x01, 0x00, # Report Count (1) 0 + 0x06, 0x01, 0x00, # Usage Page (Generic Desktop) 3 + # 0x03, 0x00, 0x00, 0x00, 0x00, # Ignored by the kernel somehow + 0x2a, 0x90, 0xa0, # Usage Maximum (41104) 6 + 0x27, 0x00, 0x00, 0x00, 0x00, # Logical Maximum (0) 9 + 0xb3, 0x81, 0x3e, 0x25, 0x03, # Feature (Cnst,Arr,Abs,Vol) 14 + 0x1b, 0xdd, 0xe8, 0x40, 0x50, # Usage Minimum (1346431197) 19 + 0x3b, 0x5d, 0x8c, 0x3d, 0xda, # Designator Index 24 + ] + # fmt: on + + def __init__( + self, rdesc=report_descriptor, name=None, input_info=(3, 0x045E, 0x07DA) + ): + super().__init__(rdesc, name, input_info) + self.high_resolution_report_called = False + + def get_evdev(self, application=None): + assert self._input_nodes is None + return ( + "Ok" # should be a list or None, but both would fail, so abusing the system + ) + + def next_sync_events(self, application=None): + # there are no evdev nodes, so no events + return [] + + def is_ready(self): + # we wait for the SET_REPORT command to come + return self.high_resolution_report_called + + def set_report(self, req, rnum, rtype, data): + if rtype != self.UHID_FEATURE_REPORT: + raise InvalidHIDCommunication(f"Unexpected report type: {rtype}") + if rnum != 0x0: + raise InvalidHIDCommunication(f"Unexpected report number: {rnum}") + + if len(data) != 1: + raise InvalidHIDCommunication(f"Unexpected data: {data}, expected '[0]'") + + self.high_resolution_report_called = True + + return 0 + + class ResolutionMultiplierHWheelMouse(TwoWheelMouse): # fmt: off report_descriptor = [ @@ -975,3 +1037,11 @@ class TestMiMouse(TestWheelMouse): # assert below print out the real error pass assert remaining == [] + + +class TestBadReportDescriptorMouse(base.BaseTestCase.TestUhid): + def create_device(self): + return BadReportDescriptorMouse() + + def assertName(self, uhdev): + pass diff --git a/tools/testing/selftests/kvm/x86/monitor_mwait_test.c b/tools/testing/selftests/kvm/x86/monitor_mwait_test.c index 390ae2d87493..0eb371c62ab8 100644 --- a/tools/testing/selftests/kvm/x86/monitor_mwait_test.c +++ b/tools/testing/selftests/kvm/x86/monitor_mwait_test.c @@ -74,6 +74,7 @@ int main(int argc, char *argv[]) int testcase; char test[80]; + TEST_REQUIRE(this_cpu_has(X86_FEATURE_MWAIT)); TEST_REQUIRE(kvm_has_cap(KVM_CAP_DISABLE_QUIRKS2)); ksft_print_header(); diff --git a/tools/testing/selftests/mm/split_huge_page_test.c b/tools/testing/selftests/mm/split_huge_page_test.c index aa7400ed0e99..f0d9c035641d 100644 --- a/tools/testing/selftests/mm/split_huge_page_test.c +++ b/tools/testing/selftests/mm/split_huge_page_test.c @@ -31,6 +31,7 @@ uint64_t pmd_pagesize; #define INPUT_MAX 80 #define PID_FMT "%d,0x%lx,0x%lx,%d" +#define PID_FMT_OFFSET "%d,0x%lx,0x%lx,%d,%d" #define PATH_FMT "%s,0x%lx,0x%lx,%d" #define PFN_MASK ((1UL<<55)-1) @@ -483,7 +484,7 @@ void split_thp_in_pagecache_to_order_at(size_t fd_size, const char *fs_loc, write_debugfs(PID_FMT, getpid(), (uint64_t)addr, (uint64_t)addr + fd_size, order); else - write_debugfs(PID_FMT, getpid(), (uint64_t)addr, + write_debugfs(PID_FMT_OFFSET, getpid(), (uint64_t)addr, (uint64_t)addr + fd_size, order, offset); for (i = 0; i < fd_size; i++) diff --git a/tools/testing/selftests/net/gre_ipv6_lladdr.sh b/tools/testing/selftests/net/gre_ipv6_lladdr.sh index 5b34f6e1f831..48eb999a3120 100755 --- a/tools/testing/selftests/net/gre_ipv6_lladdr.sh +++ b/tools/testing/selftests/net/gre_ipv6_lladdr.sh @@ -24,7 +24,10 @@ setup_basenet() ip -netns "${NS0}" address add dev lo 2001:db8::10/64 nodad } -# Check if network device has an IPv6 link-local address assigned. +# Check the IPv6 configuration of a network device. +# +# We currently check the generation of the link-local IPv6 address and the +# creation of the ff00::/8 multicast route. # # Parameters: # @@ -35,7 +38,7 @@ setup_basenet() # a link-local address) # * $4: The user visible name for the scenario being tested # -check_ipv6_ll_addr() +check_ipv6_device_config() { local DEV="$1" local EXTRA_MATCH="$2" @@ -45,7 +48,11 @@ check_ipv6_ll_addr() RET=0 set +e ip -netns "${NS0}" -6 address show dev "${DEV}" scope link | grep "fe80::" | grep -q "${EXTRA_MATCH}" - check_err_fail "${XRET}" $? "" + check_err_fail "${XRET}" $? "IPv6 link-local address generation" + + ip -netns "${NS0}" -6 route show table local type multicast ff00::/8 proto kernel | grep -q "${DEV}" + check_err_fail 0 $? "IPv6 multicast route creation" + log_test "${MSG}" set -e } @@ -102,20 +109,20 @@ test_gre_device() ;; esac - # Check that IPv6 link-local address is generated when device goes up + # Check the IPv6 device configuration when it goes up ip netns exec "${NS0}" sysctl -qw net.ipv6.conf.gretest.addr_gen_mode="${ADDR_GEN_MODE}" ip -netns "${NS0}" link set dev gretest up - check_ipv6_ll_addr gretest "${MATCH_REGEXP}" "${XRET}" "config: ${MSG}" + check_ipv6_device_config gretest "${MATCH_REGEXP}" "${XRET}" "config: ${MSG}" # Now disable link-local address generation ip -netns "${NS0}" link set dev gretest down ip netns exec "${NS0}" sysctl -qw net.ipv6.conf.gretest.addr_gen_mode=1 ip -netns "${NS0}" link set dev gretest up - # Check that link-local address generation works when re-enabled while - # the device is already up + # Check the IPv6 device configuration when link-local address + # generation is re-enabled while the device is already up ip netns exec "${NS0}" sysctl -qw net.ipv6.conf.gretest.addr_gen_mode="${ADDR_GEN_MODE}" - check_ipv6_ll_addr gretest "${MATCH_REGEXP}" "${XRET}" "update: ${MSG}" + check_ipv6_device_config gretest "${MATCH_REGEXP}" "${XRET}" "update: ${MSG}" ip -netns "${NS0}" link del dev gretest } @@ -126,7 +133,7 @@ test_gre4() local MODE for GRE_TYPE in "gre" "gretap"; do - printf "\n####\nTesting IPv6 link-local address generation on ${GRE_TYPE} devices\n####\n\n" + printf "\n####\nTesting IPv6 configuration of ${GRE_TYPE} devices\n####\n\n" for MODE in "eui64" "none" "stable-privacy" "random"; do test_gre_device "${GRE_TYPE}" 192.0.2.10 192.0.2.11 "${MODE}" @@ -142,7 +149,7 @@ test_gre6() local MODE for GRE_TYPE in "ip6gre" "ip6gretap"; do - printf "\n####\nTesting IPv6 link-local address generation on ${GRE_TYPE} devices\n####\n\n" + printf "\n####\nTesting IPv6 configuration of ${GRE_TYPE} devices\n####\n\n" for MODE in "eui64" "none" "stable-privacy" "random"; do test_gre_device "${GRE_TYPE}" 2001:db8::10 2001:db8::11 "${MODE}" diff --git a/tools/testing/selftests/net/lib.sh b/tools/testing/selftests/net/lib.sh index 006fdadcc4b9..86a216e9aca8 100644 --- a/tools/testing/selftests/net/lib.sh +++ b/tools/testing/selftests/net/lib.sh @@ -312,7 +312,7 @@ log_test_result() local test_name=$1; shift local opt_str=$1; shift local result=$1; shift - local retmsg=$1; shift + local retmsg=$1 printf "TEST: %-60s [%s]\n" "$test_name $opt_str" "$result" if [[ $retmsg ]]; then diff --git a/tools/testing/selftests/net/mptcp/Makefile b/tools/testing/selftests/net/mptcp/Makefile index e47788bfa671..4c7e51336ab2 100644 --- a/tools/testing/selftests/net/mptcp/Makefile +++ b/tools/testing/selftests/net/mptcp/Makefile @@ -4,7 +4,8 @@ top_srcdir = ../../../../.. CFLAGS += -Wall -Wl,--no-as-needed -O2 -g -I$(top_srcdir)/usr/include $(KHDR_INCLUDES) -TEST_PROGS := mptcp_connect.sh pm_netlink.sh mptcp_join.sh diag.sh \ +TEST_PROGS := mptcp_connect.sh mptcp_connect_mmap.sh mptcp_connect_sendfile.sh \ + mptcp_connect_checksum.sh pm_netlink.sh mptcp_join.sh diag.sh \ simult_flows.sh mptcp_sockopt.sh userspace_pm.sh TEST_GEN_FILES = mptcp_connect pm_nl_ctl mptcp_sockopt mptcp_inq mptcp_diag diff --git a/tools/testing/selftests/net/mptcp/mptcp_connect_checksum.sh b/tools/testing/selftests/net/mptcp/mptcp_connect_checksum.sh new file mode 100755 index 000000000000..ce93ec2f107f --- /dev/null +++ b/tools/testing/selftests/net/mptcp/mptcp_connect_checksum.sh @@ -0,0 +1,5 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +MPTCP_LIB_KSFT_TEST="$(basename "${0}" .sh)" \ + "$(dirname "${0}")/mptcp_connect.sh" -C "${@}" diff --git a/tools/testing/selftests/net/mptcp/mptcp_connect_mmap.sh b/tools/testing/selftests/net/mptcp/mptcp_connect_mmap.sh new file mode 100755 index 000000000000..5dd30f9394af --- /dev/null +++ b/tools/testing/selftests/net/mptcp/mptcp_connect_mmap.sh @@ -0,0 +1,5 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +MPTCP_LIB_KSFT_TEST="$(basename "${0}" .sh)" \ + "$(dirname "${0}")/mptcp_connect.sh" -m mmap "${@}" diff --git a/tools/testing/selftests/net/mptcp/mptcp_connect_sendfile.sh b/tools/testing/selftests/net/mptcp/mptcp_connect_sendfile.sh new file mode 100755 index 000000000000..1d16fb1cc9bb --- /dev/null +++ b/tools/testing/selftests/net/mptcp/mptcp_connect_sendfile.sh @@ -0,0 +1,5 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +MPTCP_LIB_KSFT_TEST="$(basename "${0}" .sh)" \ + "$(dirname "${0}")/mptcp_connect.sh" -m sendfile "${@}" diff --git a/tools/testing/selftests/net/netfilter/.gitignore b/tools/testing/selftests/net/netfilter/.gitignore index 64c4f8d9aa6c..5d2be9a00627 100644 --- a/tools/testing/selftests/net/netfilter/.gitignore +++ b/tools/testing/selftests/net/netfilter/.gitignore @@ -5,3 +5,4 @@ conntrack_dump_flush conntrack_reverse_clash sctp_collision nf_queue +udpclash diff --git a/tools/testing/selftests/net/netfilter/Makefile b/tools/testing/selftests/net/netfilter/Makefile index e9b2f553588d..a98ed892f55f 100644 --- a/tools/testing/selftests/net/netfilter/Makefile +++ b/tools/testing/selftests/net/netfilter/Makefile @@ -15,6 +15,7 @@ TEST_PROGS += conntrack_tcp_unreplied.sh TEST_PROGS += conntrack_resize.sh TEST_PROGS += conntrack_sctp_collision.sh TEST_PROGS += conntrack_vrf.sh +TEST_PROGS += conntrack_clash.sh TEST_PROGS += conntrack_reverse_clash.sh TEST_PROGS += ipvs.sh TEST_PROGS += nf_conntrack_packetdrill.sh @@ -44,6 +45,7 @@ TEST_GEN_FILES += connect_close nf_queue TEST_GEN_FILES += conntrack_dump_flush TEST_GEN_FILES += conntrack_reverse_clash TEST_GEN_FILES += sctp_collision +TEST_GEN_FILES += udpclash include ../../lib.mk @@ -52,6 +54,7 @@ $(OUTPUT)/nf_queue: LDLIBS += $(MNL_LDLIBS) $(OUTPUT)/conntrack_dump_flush: CFLAGS += $(MNL_CFLAGS) $(OUTPUT)/conntrack_dump_flush: LDLIBS += $(MNL_LDLIBS) +$(OUTPUT)/udpclash: LDLIBS += -lpthread TEST_FILES := lib.sh TEST_FILES += packetdrill diff --git a/tools/testing/selftests/net/netfilter/conntrack_clash.sh b/tools/testing/selftests/net/netfilter/conntrack_clash.sh new file mode 100755 index 000000000000..606a43a60f73 --- /dev/null +++ b/tools/testing/selftests/net/netfilter/conntrack_clash.sh @@ -0,0 +1,174 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +source lib.sh + +clash_resolution_active=0 +dport=22111 +ret=0 + +cleanup() +{ + # netns cleanup also zaps any remaining socat echo server. + cleanup_all_ns +} + +checktool "nft --version" "run test without nft" +checktool "conntrack --version" "run test without conntrack" +checktool "socat -h" "run test without socat" + +trap cleanup EXIT + +setup_ns nsclient1 nsclient2 nsrouter + +ip netns exec "$nsrouter" nft -f -<<EOF +table ip t { + chain lb { + meta l4proto udp dnat to numgen random mod 3 map { 0 : 10.0.2.1 . 9000, 1 : 10.0.2.1 . 9001, 2 : 10.0.2.1 . 9002 } + } + + chain prerouting { + type nat hook prerouting priority dstnat + + udp dport $dport counter jump lb + } + + chain output { + type nat hook output priority dstnat + + udp dport $dport counter jump lb + } +} +EOF + +load_simple_ruleset() +{ +ip netns exec "$1" nft -f -<<EOF +table ip t { + chain forward { + type filter hook forward priority 0 + + ct state new counter + } +} +EOF +} + +spawn_servers() +{ + local ns="$1" + local ports="9000 9001 9002" + + for port in $ports; do + ip netns exec "$ns" socat UDP-RECVFROM:$port,fork PIPE 2>/dev/null & + done + + for port in $ports; do + wait_local_port_listen "$ns" $port udp + done +} + +add_addr() +{ + local ns="$1" + local dev="$2" + local i="$3" + local j="$4" + + ip -net "$ns" link set "$dev" up + ip -net "$ns" addr add "10.0.$i.$j/24" dev "$dev" +} + +ping_test() +{ + local ns="$1" + local daddr="$2" + + if ! ip netns exec "$ns" ping -q -c 1 $daddr > /dev/null;then + echo "FAIL: ping from $ns to $daddr" + exit 1 + fi +} + +run_one_clash_test() +{ + local ns="$1" + local ctns="$2" + local daddr="$3" + local dport="$4" + local entries + local cre + + if ! ip netns exec "$ns" ./udpclash $daddr $dport;then + echo "INFO: did not receive expected number of replies for $daddr:$dport" + ip netns exec "$ctns" conntrack -S + # don't fail: check if clash resolution triggered after all. + fi + + entries=$(ip netns exec "$ctns" conntrack -S | wc -l) + cre=$(ip netns exec "$ctns" conntrack -S | grep "clash_resolve=0" | wc -l) + + if [ "$cre" -ne "$entries" ];then + clash_resolution_active=1 + return 0 + fi + + # not a failure: clash resolution logic did not trigger. + # With right timing, xmit completed sequentially and + # no parallel insertion occurs. + return $ksft_skip +} + +run_clash_test() +{ + local ns="$1" + local ctns="$2" + local daddr="$3" + local dport="$4" + local softerr=0 + + for i in $(seq 1 10);do + run_one_clash_test "$ns" "$ctns" "$daddr" "$dport" + local rv=$? + if [ $rv -eq 0 ];then + echo "PASS: clash resolution test for $daddr:$dport on attempt $i" + return 0 + elif [ $rv -eq $ksft_skip ]; then + softerr=1 + fi + done + + [ $softerr -eq 1 ] && echo "SKIP: clash resolution for $daddr:$dport did not trigger" +} + +ip link add veth0 netns "$nsclient1" type veth peer name veth0 netns "$nsrouter" +ip link add veth0 netns "$nsclient2" type veth peer name veth1 netns "$nsrouter" +add_addr "$nsclient1" veth0 1 1 +add_addr "$nsclient2" veth0 2 1 +add_addr "$nsrouter" veth0 1 99 +add_addr "$nsrouter" veth1 2 99 + +ip -net "$nsclient1" route add default via 10.0.1.99 +ip -net "$nsclient2" route add default via 10.0.2.99 +ip netns exec "$nsrouter" sysctl -q net.ipv4.ip_forward=1 + +ping_test "$nsclient1" 10.0.1.99 +ping_test "$nsclient1" 10.0.2.1 +ping_test "$nsclient2" 10.0.1.1 + +spawn_servers "$nsclient2" + +# exercise clash resolution with nat: +# nsrouter is supposed to dnat to 10.0.2.1:900{0,1,2,3}. +run_clash_test "$nsclient1" "$nsrouter" 10.0.1.99 "$dport" + +# exercise clash resolution without nat. +load_simple_ruleset "$nsclient2" +run_clash_test "$nsclient2" "$nsclient2" 127.0.0.1 9001 + +if [ $clash_resolution_active -eq 0 ];then + [ "$ret" -eq 0 ] && ret=$ksft_skip + echo "SKIP: Clash resolution did not trigger" +fi + +exit $ret diff --git a/tools/testing/selftests/net/netfilter/conntrack_resize.sh b/tools/testing/selftests/net/netfilter/conntrack_resize.sh index 9e033e80219e..788cd56ea4a0 100755 --- a/tools/testing/selftests/net/netfilter/conntrack_resize.sh +++ b/tools/testing/selftests/net/netfilter/conntrack_resize.sh @@ -12,6 +12,9 @@ tmpfile="" tmpfile_proc="" tmpfile_uniq="" ret=0 +have_socat=0 + +socat -h > /dev/null && have_socat=1 insert_count=2000 [ "$KSFT_MACHINE_SLOW" = "yes" ] && insert_count=400 @@ -123,7 +126,7 @@ ctflush() { done } -ctflood() +ct_pingflood() { local ns="$1" local duration="$2" @@ -152,6 +155,44 @@ ctflood() wait } +ct_udpflood() +{ + local ns="$1" + local duration="$2" + local now=$(date +%s) + local end=$((now + duration)) + + [ $have_socat -ne "1" ] && return + + while [ $now -lt $end ]; do +ip netns exec "$ns" bash<<"EOF" + for i in $(seq 1 100);do + dport=$(((RANDOM%65536)+1)) + + echo bar | socat -u STDIN UDP:"127.0.0.1:$dport" & + done > /dev/null 2>&1 + wait +EOF + now=$(date +%s) + done +} + +ct_udpclash() +{ + local ns="$1" + local duration="$2" + local now=$(date +%s) + local end=$((now + duration)) + + [ -x udpclash ] || return + + while [ $now -lt $end ]; do + ip netns exec "$ns" ./udpclash 127.0.0.1 $((RANDOM%65536)) > /dev/null 2>&1 + + now=$(date +%s) + done +} + # dump to /dev/null. We don't want dumps to cause infinite loops # or use-after-free even when conntrack table is altered while dumps # are in progress. @@ -169,6 +210,48 @@ ct_nulldump() wait } +ct_nulldump_loop() +{ + local ns="$1" + local duration="$2" + local now=$(date +%s) + local end=$((now + duration)) + + while [ $now -lt $end ]; do + ct_nulldump "$ns" + sleep $((RANDOM%2)) + now=$(date +%s) + done +} + +change_timeouts() +{ + local ns="$1" + local r1=$((RANDOM%2)) + local r2=$((RANDOM%2)) + + [ "$r1" -eq 1 ] && ip netns exec "$ns" sysctl -q net.netfilter.nf_conntrack_icmp_timeout=$((RANDOM%5)) + [ "$r2" -eq 1 ] && ip netns exec "$ns" sysctl -q net.netfilter.nf_conntrack_udp_timeout=$((RANDOM%5)) +} + +ct_change_timeouts_loop() +{ + local ns="$1" + local duration="$2" + local now=$(date +%s) + local end=$((now + duration)) + + while [ $now -lt $end ]; do + change_timeouts "$ns" + sleep $((RANDOM%2)) + now=$(date +%s) + done + + # restore defaults + ip netns exec "$ns" sysctl -q net.netfilter.nf_conntrack_icmp_timeout=30 + ip netns exec "$ns" sysctl -q net.netfilter.nf_conntrack_udp_timeout=30 +} + check_taint() { local tainted_then="$1" @@ -198,10 +281,14 @@ insert_flood() r=$((RANDOM%$insert_count)) - ctflood "$n" "$timeout" "floodresize" & + ct_pingflood "$n" "$timeout" "floodresize" & + ct_udpflood "$n" "$timeout" & + ct_udpclash "$n" "$timeout" & + insert_ctnetlink "$n" "$r" & ctflush "$n" "$timeout" & - ct_nulldump "$n" & + ct_nulldump_loop "$n" "$timeout" & + ct_change_timeouts_loop "$n" "$timeout" & wait } @@ -306,7 +393,7 @@ test_dump_all() ip netns exec "$nsclient1" sysctl -q net.netfilter.nf_conntrack_icmp_timeout=3600 - ctflood "$nsclient1" $timeout "dumpall" & + ct_pingflood "$nsclient1" $timeout "dumpall" & insert_ctnetlink "$nsclient2" $insert_count wait @@ -368,7 +455,7 @@ test_conntrack_disable() ct_flush_once "$nsclient1" ct_flush_once "$nsclient2" - ctflood "$nsclient1" "$timeout" "conntrack disable" + ct_pingflood "$nsclient1" "$timeout" "conntrack disable" ip netns exec "$nsclient2" ping -q -c 1 127.0.0.1 >/dev/null 2>&1 # Disabled, should not have picked up any connection. diff --git a/tools/testing/selftests/net/netfilter/nft_concat_range.sh b/tools/testing/selftests/net/netfilter/nft_concat_range.sh index cd12b8b5ac0e..20e76b395c85 100755 --- a/tools/testing/selftests/net/netfilter/nft_concat_range.sh +++ b/tools/testing/selftests/net/netfilter/nft_concat_range.sh @@ -1311,6 +1311,9 @@ maybe_send_match() { # - remove some elements, check that packets don't match anymore test_correctness_main() { range_size=1 + + send_nomatch $((end + 1)) $((end + 1 + src_delta)) || return 1 + for i in $(seq "${start}" $((start + count))); do local elem="" diff --git a/tools/testing/selftests/net/netfilter/udpclash.c b/tools/testing/selftests/net/netfilter/udpclash.c new file mode 100644 index 000000000000..85c7b906ad08 --- /dev/null +++ b/tools/testing/selftests/net/netfilter/udpclash.c @@ -0,0 +1,158 @@ +// SPDX-License-Identifier: GPL-2.0 + +/* Usage: ./udpclash <IP> <PORT> + * + * Emit THREAD_COUNT UDP packets sharing the same saddr:daddr pair. + * + * This mimics DNS resolver libraries that emit A and AAAA requests + * in parallel. + * + * This exercises conntrack clash resolution logic added and later + * refined in + * + * 71d8c47fc653 ("netfilter: conntrack: introduce clash resolution on insertion race") + * ed07d9a021df ("netfilter: nf_conntrack: resolve clash for matching conntracks") + * 6a757c07e51f ("netfilter: conntrack: allow insertion of clashing entries") + */ +#include <stdio.h> +#include <string.h> +#include <stdlib.h> +#include <unistd.h> +#include <arpa/inet.h> +#include <sys/socket.h> +#include <pthread.h> + +#define THREAD_COUNT 128 + +struct thread_args { + const struct sockaddr_in *si_remote; + int sockfd; +}; + +static int wait = 1; + +static void *thread_main(void *varg) +{ + const struct sockaddr_in *si_remote; + const struct thread_args *args = varg; + static const char msg[] = "foo"; + + si_remote = args->si_remote; + + while (wait == 1) + ; + + if (sendto(args->sockfd, msg, strlen(msg), MSG_NOSIGNAL, + (struct sockaddr *)si_remote, sizeof(*si_remote)) < 0) + exit(111); + + return varg; +} + +static int run_test(int fd, const struct sockaddr_in *si_remote) +{ + struct thread_args thread_args = { + .si_remote = si_remote, + .sockfd = fd, + }; + pthread_t *tid = calloc(THREAD_COUNT, sizeof(pthread_t)); + unsigned int repl_count = 0, timeout = 0; + int i; + + if (!tid) { + perror("calloc"); + return 1; + } + + for (i = 0; i < THREAD_COUNT; i++) { + int err = pthread_create(&tid[i], NULL, &thread_main, &thread_args); + + if (err != 0) { + perror("pthread_create"); + exit(1); + } + } + + wait = 0; + + for (i = 0; i < THREAD_COUNT; i++) + pthread_join(tid[i], NULL); + + while (repl_count < THREAD_COUNT) { + struct sockaddr_in si_repl; + socklen_t si_repl_len = sizeof(si_repl); + char repl[512]; + ssize_t ret; + + ret = recvfrom(fd, repl, sizeof(repl), MSG_NOSIGNAL, + (struct sockaddr *) &si_repl, &si_repl_len); + if (ret < 0) { + if (timeout++ > 5000) { + fputs("timed out while waiting for reply from thread\n", stderr); + break; + } + + /* give reply time to pass though the stack */ + usleep(1000); + continue; + } + + if (si_repl_len != sizeof(*si_remote)) { + fprintf(stderr, "warning: reply has unexpected repl_len %d vs %d\n", + (int)si_repl_len, (int)sizeof(si_repl)); + } else if (si_remote->sin_addr.s_addr != si_repl.sin_addr.s_addr || + si_remote->sin_port != si_repl.sin_port) { + char a[64], b[64]; + + inet_ntop(AF_INET, &si_remote->sin_addr, a, sizeof(a)); + inet_ntop(AF_INET, &si_repl.sin_addr, b, sizeof(b)); + + fprintf(stderr, "reply from wrong source: want %s:%d got %s:%d\n", + a, ntohs(si_remote->sin_port), b, ntohs(si_repl.sin_port)); + } + + repl_count++; + } + + printf("got %d of %d replies\n", repl_count, THREAD_COUNT); + + free(tid); + + return repl_count == THREAD_COUNT ? 0 : 1; +} + +int main(int argc, char *argv[]) +{ + struct sockaddr_in si_local = { + .sin_family = AF_INET, + }; + struct sockaddr_in si_remote = { + .sin_family = AF_INET, + }; + int fd, ret; + + if (argc < 3) { + fputs("Usage: send_udp <daddr> <dport>\n", stderr); + return 1; + } + + si_remote.sin_port = htons(atoi(argv[2])); + si_remote.sin_addr.s_addr = inet_addr(argv[1]); + + fd = socket(AF_INET, SOCK_DGRAM|SOCK_CLOEXEC|SOCK_NONBLOCK, IPPROTO_UDP); + if (fd < 0) { + perror("socket"); + return 1; + } + + if (bind(fd, (struct sockaddr *)&si_local, sizeof(si_local)) < 0) { + perror("bind"); + return 1; + } + + ret = run_test(fd, &si_remote); + + close(fd); + + return ret; +} diff --git a/tools/testing/selftests/net/packetdrill/tcp_ooo-before-and-after-accept.pkt b/tools/testing/selftests/net/packetdrill/tcp_ooo-before-and-after-accept.pkt new file mode 100644 index 000000000000..09aabc775e80 --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_ooo-before-and-after-accept.pkt @@ -0,0 +1,53 @@ +// SPDX-License-Identifier: GPL-2.0 + +--mss=1000 + +`./defaults.sh +sysctl -q net.ipv4.tcp_rmem="4096 131072 $((32*1024*1024))"` + +// Test that a not-yet-accepted socket does not change +// its initial sk_rcvbuf (tcp_rmem[1]) when receiving ooo packets. + + +0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + +0 bind(3, ..., ...) = 0 + +0 listen(3, 1) = 0 + + +0 < S 0:0(0) win 65535 <mss 1000,nop,nop,sackOK,nop,wscale 7> + +0 > S. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK,nop,wscale 10> + +.1 < . 1:1(0) ack 1 win 257 + +0 < . 2001:41001(39000) ack 1 win 257 + +0 > . 1:1(0) ack 1 <nop,nop,sack 2001:41001> + +0 < . 41001:101001(60000) ack 1 win 257 + +0 > . 1:1(0) ack 1 <nop,nop,sack 2001:101001> + +0 < . 1:1001(1000) ack 1 win 257 + +0 > . 1:1(0) ack 1001 <nop,nop,sack 2001:101001> + +0 < . 1001:2001(1000) ack 1 win 257 + +0 > . 1:1(0) ack 101001 + + +0 accept(3, ..., ...) = 4 + + +0 %{ assert SK_MEMINFO_RCVBUF == 131072, SK_MEMINFO_RCVBUF }% + + +0 close(4) = 0 + +0 close(3) = 0 + +// Test that ooo packets for accepted sockets do increase sk_rcvbuf + +0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + +0 bind(3, ..., ...) = 0 + +0 listen(3, 1) = 0 + + +0 < S 0:0(0) win 65535 <mss 1000,nop,nop,sackOK,nop,wscale 7> + +0 > S. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK,nop,wscale 10> + +.1 < . 1:1(0) ack 1 win 257 + + +0 accept(3, ..., ...) = 4 + + +0 < . 2001:41001(39000) ack 1 win 257 + +0 > . 1:1(0) ack 1 <nop,nop,sack 2001:41001> + +0 < . 41001:101001(60000) ack 1 win 257 + +0 > . 1:1(0) ack 1 <nop,nop,sack 2001:101001> + + +0 %{ assert SK_MEMINFO_RCVBUF > 131072, SK_MEMINFO_RCVBUF }% + diff --git a/tools/testing/selftests/net/udpgro.sh b/tools/testing/selftests/net/udpgro.sh index 1dc337c709f8..b17e032a6d75 100755 --- a/tools/testing/selftests/net/udpgro.sh +++ b/tools/testing/selftests/net/udpgro.sh @@ -48,7 +48,7 @@ run_one() { cfg_veth - ip netns exec "${PEER_NS}" ./udpgso_bench_rx -C 1000 -R 10 ${rx_args} & + ip netns exec "${PEER_NS}" ./udpgso_bench_rx -C 1000 -R 100 ${rx_args} & local PID1=$! wait_local_port_listen ${PEER_NS} 8000 udp @@ -95,7 +95,7 @@ run_one_nat() { # will land on the 'plain' one ip netns exec "${PEER_NS}" ./udpgso_bench_rx -G ${family} -b ${addr1} -n 0 & local PID1=$! - ip netns exec "${PEER_NS}" ./udpgso_bench_rx -C 1000 -R 10 ${family} -b ${addr2%/*} ${rx_args} & + ip netns exec "${PEER_NS}" ./udpgso_bench_rx -C 1000 -R 100 ${family} -b ${addr2%/*} ${rx_args} & local PID2=$! wait_local_port_listen "${PEER_NS}" 8000 udp @@ -117,9 +117,9 @@ run_one_2sock() { cfg_veth - ip netns exec "${PEER_NS}" ./udpgso_bench_rx -C 1000 -R 10 ${rx_args} -p 12345 & + ip netns exec "${PEER_NS}" ./udpgso_bench_rx -C 1000 -R 100 ${rx_args} -p 12345 & local PID1=$! - ip netns exec "${PEER_NS}" ./udpgso_bench_rx -C 2000 -R 10 ${rx_args} & + ip netns exec "${PEER_NS}" ./udpgso_bench_rx -C 2000 -R 100 ${rx_args} & local PID2=$! wait_local_port_listen "${PEER_NS}" 12345 udp diff --git a/tools/testing/selftests/net/vlan_hw_filter.sh b/tools/testing/selftests/net/vlan_hw_filter.sh index 7bc804ffaf7c..0fb56baf28e4 100755 --- a/tools/testing/selftests/net/vlan_hw_filter.sh +++ b/tools/testing/selftests/net/vlan_hw_filter.sh @@ -3,27 +3,101 @@ readonly NETNS="ns-$(mktemp -u XXXXXX)" +ALL_TESTS=" + test_vlan_filter_check + test_vlan0_del_crash_01 + test_vlan0_del_crash_02 + test_vlan0_del_crash_03 + test_vid0_memleak +" + ret=0 +setup() { + ip netns add ${NETNS} +} + cleanup() { - ip netns del $NETNS + ip netns del $NETNS 2>/dev/null } trap cleanup EXIT fail() { - echo "ERROR: ${1:-unexpected return code} (ret: $_)" >&2 - ret=1 + echo "ERROR: ${1:-unexpected return code} (ret: $_)" >&2 + ret=1 +} + +tests_run() +{ + local current_test + for current_test in ${TESTS:-$ALL_TESTS}; do + $current_test + done +} + +test_vlan_filter_check() { + setup + ip netns exec ${NETNS} ip link add bond0 type bond mode 0 + ip netns exec ${NETNS} ip link add bond_slave_1 type veth peer veth2 + ip netns exec ${NETNS} ip link set bond_slave_1 master bond0 + ip netns exec ${NETNS} ethtool -K bond0 rx-vlan-filter off + ip netns exec ${NETNS} ip link add link bond_slave_1 name bond_slave_1.0 type vlan id 0 + ip netns exec ${NETNS} ip link add link bond0 name bond0.0 type vlan id 0 + ip netns exec ${NETNS} ip link set bond_slave_1 nomaster + ip netns exec ${NETNS} ip link del veth2 || fail "Please check vlan HW filter function" + cleanup } -ip netns add ${NETNS} -ip netns exec ${NETNS} ip link add bond0 type bond mode 0 -ip netns exec ${NETNS} ip link add bond_slave_1 type veth peer veth2 -ip netns exec ${NETNS} ip link set bond_slave_1 master bond0 -ip netns exec ${NETNS} ethtool -K bond0 rx-vlan-filter off -ip netns exec ${NETNS} ip link add link bond_slave_1 name bond_slave_1.0 type vlan id 0 -ip netns exec ${NETNS} ip link add link bond0 name bond0.0 type vlan id 0 -ip netns exec ${NETNS} ip link set bond_slave_1 nomaster -ip netns exec ${NETNS} ip link del veth2 || fail "Please check vlan HW filter function" +#enable vlan_filter feature of real_dev with vlan0 during running time +test_vlan0_del_crash_01() { + setup + ip netns exec ${NETNS} ip link add bond0 type bond mode 0 + ip netns exec ${NETNS} ip link add link bond0 name vlan0 type vlan id 0 protocol 802.1q + ip netns exec ${NETNS} ethtool -K bond0 rx-vlan-filter off + ip netns exec ${NETNS} ifconfig bond0 up + ip netns exec ${NETNS} ethtool -K bond0 rx-vlan-filter on + ip netns exec ${NETNS} ifconfig bond0 down + ip netns exec ${NETNS} ifconfig bond0 up + ip netns exec ${NETNS} ip link del vlan0 || fail "Please check vlan HW filter function" + cleanup +} + +#enable vlan_filter feature and add vlan0 for real_dev during running time +test_vlan0_del_crash_02() { + setup + ip netns exec ${NETNS} ip link add bond0 type bond mode 0 + ip netns exec ${NETNS} ethtool -K bond0 rx-vlan-filter off + ip netns exec ${NETNS} ifconfig bond0 up + ip netns exec ${NETNS} ethtool -K bond0 rx-vlan-filter on + ip netns exec ${NETNS} ip link add link bond0 name vlan0 type vlan id 0 protocol 802.1q + ip netns exec ${NETNS} ifconfig bond0 down + ip netns exec ${NETNS} ifconfig bond0 up + ip netns exec ${NETNS} ip link del vlan0 || fail "Please check vlan HW filter function" + cleanup +} + +#enable vlan_filter feature of real_dev during running time +#test kernel_bug of vlan unregister +test_vlan0_del_crash_03() { + setup + ip netns exec ${NETNS} ip link add bond0 type bond mode 0 + ip netns exec ${NETNS} ip link add link bond0 name vlan0 type vlan id 0 protocol 802.1q + ip netns exec ${NETNS} ethtool -K bond0 rx-vlan-filter off + ip netns exec ${NETNS} ifconfig bond0 up + ip netns exec ${NETNS} ethtool -K bond0 rx-vlan-filter on + ip netns exec ${NETNS} ifconfig bond0 down + ip netns exec ${NETNS} ip link del vlan0 || fail "Please check vlan HW filter function" + cleanup +} + +test_vid0_memleak() { + setup + ip netns exec ${NETNS} ip link add bond0 up type bond mode 0 + ip netns exec ${NETNS} ethtool -K bond0 rx-vlan-filter off + ip netns exec ${NETNS} ip link del dev bond0 || fail "Please check vlan HW filter function" + cleanup +} +tests_run exit $ret diff --git a/tools/testing/selftests/sched_ext/exit.c b/tools/testing/selftests/sched_ext/exit.c index 9451782689de..ee25824b1cbe 100644 --- a/tools/testing/selftests/sched_ext/exit.c +++ b/tools/testing/selftests/sched_ext/exit.c @@ -22,6 +22,14 @@ static enum scx_test_status run(void *ctx) struct bpf_link *link; char buf[16]; + /* + * On single-CPU systems, ops.select_cpu() is never + * invoked, so skip this test to avoid getting stuck + * indefinitely. + */ + if (tc == EXIT_SELECT_CPU && libbpf_num_possible_cpus() == 1) + continue; + skel = exit__open(); SCX_ENUM_INIT(skel); skel->rodata->exit_point = tc; diff --git a/tools/testing/selftests/tc-testing/tc-tests/infra/qdiscs.json b/tools/testing/selftests/tc-testing/tc-tests/infra/qdiscs.json index 9aa44d8176d9..c6db7fa94f55 100644 --- a/tools/testing/selftests/tc-testing/tc-tests/infra/qdiscs.json +++ b/tools/testing/selftests/tc-testing/tc-tests/infra/qdiscs.json @@ -128,6 +128,32 @@ ] }, { + "id": "5456", + "name": "Test htb_dequeue_tree with deactivation and row emptying", + "category": [ + "qdisc", + "htb" + ], + "plugins": { + "requires": "nsPlugin" + }, + "setup": [ + "$IP link set dev $DUMMY up || true", + "$IP addr add 10.10.11.10/24 dev $DUMMY || true", + "$TC qdisc add dev $DUMMY root handle 1: htb default 1", + "$TC class add dev $DUMMY parent 1: classid 1:1 htb rate 64bit ", + "$TC qdisc add dev $DUMMY parent 1:1 handle 2: netem", + "$TC qdisc add dev $DUMMY parent 2:1 handle 3: blackhole" + ], + "cmdUnderTest": "ping -c1 -W0.01 -I $DUMMY 10.10.11.11", + "expExitCode": "1", + "verifyCmd": "$TC -j qdisc show dev $DUMMY", + "matchJSON": [], + "teardown": [ + "$TC qdisc del dev $DUMMY root" + ] + }, + { "id": "c024", "name": "Test TBF with SKBPRIO - catch qlen corner cases", "category": [ @@ -635,5 +661,108 @@ "$TC qdisc del dev $DUMMY handle 1:0 root", "$IP addr del 10.10.10.10/24 dev $DUMMY || true" ] + }, + { + "id": "d74b", + "name": "Test use-after-free with DRR/NETEM/BLACKHOLE chain", + "category": [ + "qdisc", + "hfsc", + "drr", + "netem", + "blackhole" + ], + "plugins": { + "requires": [ + "nsPlugin", + "scapyPlugin" + ] + }, + "setup": [ + "$IP link set dev $DUMMY up || true", + "$IP addr add 10.10.11.10/24 dev $DUMMY || true", + "$TC qdisc add dev $DUMMY root handle 1: drr", + "$TC filter add dev $DUMMY parent 1: basic classid 1:1", + "$TC class add dev $DUMMY parent 1: classid 1:1 drr", + "$TC qdisc add dev $DUMMY parent 1:1 handle 2: hfsc def 1", + "$TC class add dev $DUMMY parent 2: classid 2:1 hfsc rt m1 8 d 1 m2 0", + "$TC qdisc add dev $DUMMY parent 2:1 handle 3: netem", + "$TC qdisc add dev $DUMMY parent 3:1 handle 4: blackhole", + "ping -c1 -W0.01 -I $DUMMY 10.10.11.11 || true", + "$TC class del dev $DUMMY classid 1:1" + ], + "cmdUnderTest": "ping -c1 -W0.01 -I $DUMMY 10.10.11.11", + "expExitCode": "1", + "verifyCmd": "$TC -j class ls dev $DUMMY classid 1:1", + "matchJSON": [], + "teardown": [ + "$TC qdisc del dev $DUMMY root handle 1: drr" + ] + }, + { + "id": "be28", + "name": "Try to add fq_codel qdisc as a child of an hhf qdisc", + "category": [ + "qdisc", + "fq_codel", + "hhf" + ], + "plugins": { + "requires": "nsPlugin" + }, + "setup": [ + "$TC qdisc add dev $DUMMY root handle a: hhf" + ], + "cmdUnderTest": "$TC qdisc add dev $DUMMY parent a: handle b: fq_codel", + "expExitCode": "2", + "verifyCmd": "$TC -j qdisc ls dev $DUMMY handle b:", + "matchJSON": [], + "teardown": [ + "$TC qdisc del dev $DUMMY root" + ] + }, + { + "id": "fcb5", + "name": "Try to add pie qdisc as a child of a drr qdisc", + "category": [ + "qdisc", + "pie", + "drr" + ], + "plugins": { + "requires": "nsPlugin" + }, + "setup": [ + "$TC qdisc add dev $DUMMY root handle a: drr" + ], + "cmdUnderTest": "$TC qdisc add dev $DUMMY parent a: handle b: pie", + "expExitCode": "2", + "verifyCmd": "$TC -j qdisc ls dev $DUMMY handle b:", + "matchJSON": [], + "teardown": [ + "$TC qdisc del dev $DUMMY root" + ] + }, + { + "id": "7801", + "name": "Try to add fq qdisc as a child of an inexistent hfsc class", + "category": [ + "qdisc", + "sfq", + "hfsc" + ], + "plugins": { + "requires": "nsPlugin" + }, + "setup": [ + "$TC qdisc add dev $DUMMY root handle a: hfsc" + ], + "cmdUnderTest": "$TC qdisc add dev $DUMMY parent a:fff2 sfq limit 4", + "expExitCode": "2", + "verifyCmd": "$TC -j qdisc ls dev $DUMMY handle b:", + "matchJSON": [], + "teardown": [ + "$TC qdisc del dev $DUMMY root" + ] } ] diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index eec82775c5bf..222f0e894a0c 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -2572,6 +2572,8 @@ static int kvm_vm_set_mem_attributes(struct kvm *kvm, gfn_t start, gfn_t end, r = xa_reserve(&kvm->mem_attr_array, i, GFP_KERNEL_ACCOUNT); if (r) goto out_unlock; + + cond_resched(); } kvm_handle_gfn_range(kvm, &pre_set_range); @@ -2580,6 +2582,7 @@ static int kvm_vm_set_mem_attributes(struct kvm *kvm, gfn_t start, gfn_t end, r = xa_err(xa_store(&kvm->mem_attr_array, i, entry, GFP_KERNEL_ACCOUNT)); KVM_BUG_ON(r, kvm); + cond_resched(); } kvm_handle_gfn_range(kvm, &post_set_range); |