diff options
31 files changed, 783 insertions, 476 deletions
@@ -602,6 +602,12 @@ Paul Mackerras <paulus@ozlabs.org> <paulus@samba.org> Paul Mackerras <paulus@ozlabs.org> <paulus@au1.ibm.com> Paul Moore <paul@paul-moore.com> <paul.moore@hp.com> Paul Moore <paul@paul-moore.com> <pmoore@redhat.com> +Paulo Alcantara <pc@manguebit.org> <pcacjr@zytor.com> +Paulo Alcantara <pc@manguebit.org> <paulo@paulo.ac> +Paulo Alcantara <pc@manguebit.org> <pc@cjr.nz> +Paulo Alcantara <pc@manguebit.org> <palcantara@suse.de> +Paulo Alcantara <pc@manguebit.org> <palcantara@suse.com> +Paulo Alcantara <pc@manguebit.org> <pc@manguebit.com> Pavankumar Kondeti <quic_pkondeti@quicinc.com> <pkondeti@codeaurora.org> Peter A Jonsson <pj@ludd.ltu.se> Peter Oruba <peter.oruba@amd.com> diff --git a/Documentation/filesystems/smb/index.rst b/Documentation/filesystems/smb/index.rst index 1c8597a679ab..6df23b0e45c8 100644 --- a/Documentation/filesystems/smb/index.rst +++ b/Documentation/filesystems/smb/index.rst @@ -8,3 +8,4 @@ CIFS ksmbd cifsroot + smbdirect diff --git a/Documentation/filesystems/smb/smbdirect.rst b/Documentation/filesystems/smb/smbdirect.rst new file mode 100644 index 000000000000..ca6927c0b2c0 --- /dev/null +++ b/Documentation/filesystems/smb/smbdirect.rst @@ -0,0 +1,103 @@ +.. SPDX-License-Identifier: GPL-2.0 + +=========================== +SMB Direct - SMB3 over RDMA +=========================== + +This document describes how to set up the Linux SMB client and server to +use RDMA. + +Overview +======== +The Linux SMB kernel client supports SMB Direct, which is a transport +scheme for SMB3 that uses RDMA (Remote Direct Memory Access) to provide +high throughput and low latencies by bypassing the traditional TCP/IP +stack. +SMB Direct on the Linux SMB client can be tested against KSMBD - a +kernel-space SMB server. + +Installation +============= +- Install an RDMA device. As long as the RDMA device driver is supported + by the kernel, it should work. This includes both software emulators (soft + RoCE, soft iWARP) and hardware devices (InfiniBand, RoCE, iWARP). + +- Install a kernel with SMB Direct support. The first kernel release to + support SMB Direct on both the client and server side is 5.15. Therefore, + a distribution compatible with kernel 5.15 or later is required. + +- Install cifs-utils, which provides the `mount.cifs` command to mount SMB + shares. + +- Configure the RDMA stack + + Make sure that your kernel configuration has RDMA support enabled. Under + Device Drivers -> Infiniband support, update the kernel configuration to + enable Infiniband support. + + Enable the appropriate IB HCA support or iWARP adapter support, + depending on your hardware. + + If you are using InfiniBand, enable IP-over-InfiniBand support. + + For soft RDMA, enable either the soft iWARP (`RDMA _SIW`) or soft RoCE + (`RDMA_RXE`) module. Install the `iproute2` package and use the + `rdma link add` command to load the module and create an + RDMA interface. + + e.g. if your local ethernet interface is `eth0`, you can use: + + .. code-block:: bash + + sudo rdma link add siw0 type siw netdev eth0 + +- Enable SMB Direct support for both the server and the client in the kernel + configuration. + + Server Setup + + .. code-block:: text + + Network File Systems ---> + <M> SMB3 server support + [*] Support for SMB Direct protocol + + Client Setup + + .. code-block:: text + + Network File Systems ---> + <M> SMB3 and CIFS support (advanced network filesystem) + [*] SMB Direct support + +- Build and install the kernel. SMB Direct support will be enabled in the + cifs.ko and ksmbd.ko modules. + +Setup and Usage +================ + +- Set up and start a KSMBD server as described in the `KSMBD documentation + <https://www.kernel.org/doc/Documentation/filesystems/smb/ksmbd.rst>`_. + Also add the "server multi channel support = yes" parameter to ksmbd.conf. + +- On the client, mount the share with `rdma` mount option to use SMB Direct + (specify a SMB version 3.0 or higher using `vers`). + + For example: + + .. code-block:: bash + + mount -t cifs //server/share /mnt/point -o vers=3.1.1,rdma + +- To verify that the mount is using SMB Direct, you can check dmesg for the + following log line after mounting: + + .. code-block:: text + + CIFS: VFS: RDMA transport established + + Or, verify `rdma` mount option for the share in `/proc/mounts`: + + .. code-block:: bash + + cat /proc/mounts | grep cifs diff --git a/MAINTAINERS b/MAINTAINERS index 5defb941c141..a92290fffa16 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -5986,7 +5986,7 @@ X: drivers/clk/clkdev.c COMMON INTERNET FILE SYSTEM CLIENT (CIFS and SMB3) M: Steve French <sfrench@samba.org> M: Steve French <smfrench@gmail.com> -R: Paulo Alcantara <pc@manguebit.com> (DFS, global name space) +R: Paulo Alcantara <pc@manguebit.org> (DFS, global name space) R: Ronnie Sahlberg <ronniesahlberg@gmail.com> (directory leases, sparse files) R: Shyam Prasad N <sprasad@microsoft.com> (multichannel) R: Tom Talpey <tom@talpey.com> (RDMA, smbdirect) @@ -9280,7 +9280,7 @@ F: include/linux/iomap.h FILESYSTEMS [NETFS LIBRARY] M: David Howells <dhowells@redhat.com> -M: Paulo Alcantara <pc@manguebit.com> +M: Paulo Alcantara <pc@manguebit.org> L: netfs@lists.linux.dev L: linux-fsdevel@vger.kernel.org S: Supported diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c index 466283326630..741b229f0718 100644 --- a/arch/x86/events/intel/core.c +++ b/arch/x86/events/intel/core.c @@ -2900,6 +2900,7 @@ static void intel_pmu_config_acr(int idx, u64 mask, u32 reload) { struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); int msr_b, msr_c; + int msr_offset; if (!mask && !cpuc->acr_cfg_b[idx]) return; @@ -2907,19 +2908,20 @@ static void intel_pmu_config_acr(int idx, u64 mask, u32 reload) if (idx < INTEL_PMC_IDX_FIXED) { msr_b = MSR_IA32_PMC_V6_GP0_CFG_B; msr_c = MSR_IA32_PMC_V6_GP0_CFG_C; + msr_offset = x86_pmu.addr_offset(idx, false); } else { msr_b = MSR_IA32_PMC_V6_FX0_CFG_B; msr_c = MSR_IA32_PMC_V6_FX0_CFG_C; - idx -= INTEL_PMC_IDX_FIXED; + msr_offset = x86_pmu.addr_offset(idx - INTEL_PMC_IDX_FIXED, false); } if (cpuc->acr_cfg_b[idx] != mask) { - wrmsrl(msr_b + x86_pmu.addr_offset(idx, false), mask); + wrmsrl(msr_b + msr_offset, mask); cpuc->acr_cfg_b[idx] = mask; } /* Only need to update the reload value when there is a valid config value. */ if (mask && cpuc->acr_cfg_c[idx] != reload) { - wrmsrl(msr_c + x86_pmu.addr_offset(idx, false), reload); + wrmsrl(msr_c + msr_offset, reload); cpuc->acr_cfg_c[idx] = reload; } } diff --git a/arch/x86/include/asm/trace/fpu.h b/arch/x86/include/asm/trace/fpu.h index 0454d5e60e5d..721b408d9a67 100644 --- a/arch/x86/include/asm/trace/fpu.h +++ b/arch/x86/include/asm/trace/fpu.h @@ -44,16 +44,6 @@ DEFINE_EVENT(x86_fpu, x86_fpu_after_save, TP_ARGS(fpu) ); -DEFINE_EVENT(x86_fpu, x86_fpu_before_restore, - TP_PROTO(struct fpu *fpu), - TP_ARGS(fpu) -); - -DEFINE_EVENT(x86_fpu, x86_fpu_after_restore, - TP_PROTO(struct fpu *fpu), - TP_ARGS(fpu) -); - DEFINE_EVENT(x86_fpu, x86_fpu_regs_activated, TP_PROTO(struct fpu *fpu), TP_ARGS(fpu) @@ -64,11 +54,6 @@ DEFINE_EVENT(x86_fpu, x86_fpu_regs_deactivated, TP_ARGS(fpu) ); -DEFINE_EVENT(x86_fpu, x86_fpu_init_state, - TP_PROTO(struct fpu *fpu), - TP_ARGS(fpu) -); - DEFINE_EVENT(x86_fpu, x86_fpu_dropped, TP_PROTO(struct fpu *fpu), TP_ARGS(fpu) diff --git a/arch/x86/kernel/ioport.c b/arch/x86/kernel/ioport.c index 6290dd120f5e..ff40f09ad911 100644 --- a/arch/x86/kernel/ioport.c +++ b/arch/x86/kernel/ioport.c @@ -33,8 +33,9 @@ void io_bitmap_share(struct task_struct *tsk) set_tsk_thread_flag(tsk, TIF_IO_BITMAP); } -static void task_update_io_bitmap(struct task_struct *tsk) +static void task_update_io_bitmap(void) { + struct task_struct *tsk = current; struct thread_struct *t = &tsk->thread; if (t->iopl_emul == 3 || t->io_bitmap) { @@ -54,7 +55,12 @@ void io_bitmap_exit(struct task_struct *tsk) struct io_bitmap *iobm = tsk->thread.io_bitmap; tsk->thread.io_bitmap = NULL; - task_update_io_bitmap(tsk); + /* + * Don't touch the TSS when invoked on a failed fork(). TSS + * reflects the state of @current and not the state of @tsk. + */ + if (tsk == current) + task_update_io_bitmap(); if (iobm && refcount_dec_and_test(&iobm->refcnt)) kfree(iobm); } @@ -192,8 +198,7 @@ SYSCALL_DEFINE1(iopl, unsigned int, level) } t->iopl_emul = level; - task_update_io_bitmap(current); - + task_update_io_bitmap(); return 0; } diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index c1d2dac72b9c..704883c21f3a 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c @@ -176,6 +176,7 @@ int copy_thread(struct task_struct *p, const struct kernel_clone_args *args) frame->ret_addr = (unsigned long) ret_from_fork_asm; p->thread.sp = (unsigned long) fork_frame; p->thread.io_bitmap = NULL; + clear_tsk_thread_flag(p, TIF_IO_BITMAP); p->thread.iopl_warn = 0; memset(p->thread.ptrace_bps, 0, sizeof(p->thread.ptrace_bps)); @@ -464,6 +465,11 @@ void native_tss_update_io_bitmap(void) } else { struct io_bitmap *iobm = t->io_bitmap; + if (WARN_ON_ONCE(!iobm)) { + clear_thread_flag(TIF_IO_BITMAP); + native_tss_invalidate_io_bitmap(); + } + /* * Only copy bitmap data when the sequence number differs. The * update time is accounted to the incoming task. diff --git a/drivers/pci/msi/irqdomain.c b/drivers/pci/msi/irqdomain.c index d7ba8795d60f..c05152733993 100644 --- a/drivers/pci/msi/irqdomain.c +++ b/drivers/pci/msi/irqdomain.c @@ -271,6 +271,7 @@ static bool pci_create_device_domain(struct pci_dev *pdev, const struct msi_doma /** * pci_setup_msi_device_domain - Setup a device MSI interrupt domain * @pdev: The PCI device to create the domain on + * @hwsize: The maximum number of MSI vectors * * Return: * True when: @@ -287,7 +288,7 @@ static bool pci_create_device_domain(struct pci_dev *pdev, const struct msi_doma * - The device is removed * - MSI is disabled and a MSI-X domain is created */ -bool pci_setup_msi_device_domain(struct pci_dev *pdev) +bool pci_setup_msi_device_domain(struct pci_dev *pdev, unsigned int hwsize) { if (WARN_ON_ONCE(pdev->msix_enabled)) return false; @@ -297,7 +298,7 @@ bool pci_setup_msi_device_domain(struct pci_dev *pdev) if (pci_match_device_domain(pdev, DOMAIN_BUS_PCI_DEVICE_MSIX)) msi_remove_device_irq_domain(&pdev->dev, MSI_DEFAULT_DOMAIN); - return pci_create_device_domain(pdev, &pci_msi_template, 1); + return pci_create_device_domain(pdev, &pci_msi_template, hwsize); } /** diff --git a/drivers/pci/msi/msi.c b/drivers/pci/msi/msi.c index d6ce04054702..6ede55a7c5e6 100644 --- a/drivers/pci/msi/msi.c +++ b/drivers/pci/msi/msi.c @@ -439,16 +439,16 @@ int __pci_enable_msi_range(struct pci_dev *dev, int minvec, int maxvec, if (nvec < minvec) return -ENOSPC; - if (nvec > maxvec) - nvec = maxvec; - rc = pci_setup_msi_context(dev); if (rc) return rc; - if (!pci_setup_msi_device_domain(dev)) + if (!pci_setup_msi_device_domain(dev, nvec)) return -ENODEV; + if (nvec > maxvec) + nvec = maxvec; + for (;;) { if (affd) { nvec = irq_calc_affinity_vectors(minvec, nvec, affd); diff --git a/drivers/pci/msi/msi.h b/drivers/pci/msi/msi.h index fc70b601e942..0b420b319f50 100644 --- a/drivers/pci/msi/msi.h +++ b/drivers/pci/msi/msi.h @@ -107,7 +107,7 @@ enum support_mode { }; bool pci_msi_domain_supports(struct pci_dev *dev, unsigned int feature_mask, enum support_mode mode); -bool pci_setup_msi_device_domain(struct pci_dev *pdev); +bool pci_setup_msi_device_domain(struct pci_dev *pdev, unsigned int hwsize); bool pci_setup_msix_device_domain(struct pci_dev *pdev, unsigned int hwsize); /* Legacy (!IRQDOMAIN) fallbacks */ diff --git a/fs/namespace.c b/fs/namespace.c index 2f2e93927f46..e13d9ab4f564 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -2410,7 +2410,7 @@ void drop_collected_mounts(struct vfsmount *mnt) namespace_unlock(); } -bool has_locked_children(struct mount *mnt, struct dentry *dentry) +static bool __has_locked_children(struct mount *mnt, struct dentry *dentry) { struct mount *child; @@ -2424,6 +2424,16 @@ bool has_locked_children(struct mount *mnt, struct dentry *dentry) return false; } +bool has_locked_children(struct mount *mnt, struct dentry *dentry) +{ + bool res; + + read_seqlock_excl(&mount_lock); + res = __has_locked_children(mnt, dentry); + read_sequnlock_excl(&mount_lock); + return res; +} + /* * Check that there aren't references to earlier/same mount namespaces in the * specified subtree. Such references can act as pins for mount namespaces @@ -2468,23 +2478,27 @@ struct vfsmount *clone_private_mount(const struct path *path) if (IS_MNT_UNBINDABLE(old_mnt)) return ERR_PTR(-EINVAL); - if (mnt_has_parent(old_mnt)) { - if (!check_mnt(old_mnt)) - return ERR_PTR(-EINVAL); - } else { - if (!is_mounted(&old_mnt->mnt)) - return ERR_PTR(-EINVAL); - - /* Make sure this isn't something purely kernel internal. */ - if (!is_anon_ns(old_mnt->mnt_ns)) + /* + * Make sure the source mount is acceptable. + * Anything mounted in our mount namespace is allowed. + * Otherwise, it must be the root of an anonymous mount + * namespace, and we need to make sure no namespace + * loops get created. + */ + if (!check_mnt(old_mnt)) { + if (!is_mounted(&old_mnt->mnt) || + !is_anon_ns(old_mnt->mnt_ns) || + mnt_has_parent(old_mnt)) return ERR_PTR(-EINVAL); - /* Make sure we don't create mount namespace loops. */ if (!check_for_nsfs_mounts(old_mnt)) return ERR_PTR(-EINVAL); } - if (has_locked_children(old_mnt, path->dentry)) + if (!ns_capable(old_mnt->mnt_ns->user_ns, CAP_SYS_ADMIN)) + return ERR_PTR(-EPERM); + + if (__has_locked_children(old_mnt, path->dentry)) return ERR_PTR(-EINVAL); new_mnt = clone_mnt(old_mnt, path->dentry, CL_PRIVATE); @@ -2930,6 +2944,10 @@ static int do_change_type(struct path *path, int ms_flags) return -EINVAL; namespace_lock(); + if (!check_mnt(mnt)) { + err = -EINVAL; + goto out_unlock; + } if (type == MS_SHARED) { err = invent_group_ids(mnt, recurse); if (err) @@ -3021,7 +3039,7 @@ static struct mount *__do_loopback(struct path *old_path, int recurse) if (!may_copy_tree(old_path)) return mnt; - if (!recurse && has_locked_children(old, old_path->dentry)) + if (!recurse && __has_locked_children(old, old_path->dentry)) return mnt; if (recurse) @@ -3414,7 +3432,7 @@ static int do_set_group(struct path *from_path, struct path *to_path) goto out; /* From mount should not have locked children in place of To's root */ - if (has_locked_children(from, to->mnt.mnt_root)) + if (__has_locked_children(from, to->mnt.mnt_root)) goto out; /* Setting sharing groups is only allowed on private mounts */ @@ -3428,7 +3446,7 @@ static int do_set_group(struct path *from_path, struct path *to_path) if (IS_MNT_SLAVE(from)) { struct mount *m = from->mnt_master; - list_add(&to->mnt_slave, &m->mnt_slave_list); + list_add(&to->mnt_slave, &from->mnt_slave); to->mnt_master = m; } @@ -3453,18 +3471,25 @@ out: * Check if path is overmounted, i.e., if there's a mount on top of * @path->mnt with @path->dentry as mountpoint. * - * Context: This function expects namespace_lock() to be held. + * Context: namespace_sem must be held at least shared. + * MUST NOT be called under lock_mount_hash() (there one should just + * call __lookup_mnt() and check if it returns NULL). * Return: If path is overmounted true is returned, false if not. */ static inline bool path_overmounted(const struct path *path) { + unsigned seq = read_seqbegin(&mount_lock); + bool no_child; + rcu_read_lock(); - if (unlikely(__lookup_mnt(path->mnt, path->dentry))) { - rcu_read_unlock(); - return true; - } + no_child = !__lookup_mnt(path->mnt, path->dentry); rcu_read_unlock(); - return false; + if (need_seqretry(&mount_lock, seq)) { + read_seqlock_excl(&mount_lock); + no_child = !__lookup_mnt(path->mnt, path->dentry); + read_sequnlock_excl(&mount_lock); + } + return unlikely(!no_child); } /** @@ -3623,37 +3648,41 @@ static int do_move_mount(struct path *old_path, ns = old->mnt_ns; err = -EINVAL; - if (!may_use_mount(p)) - goto out; - /* The thing moved must be mounted... */ if (!is_mounted(&old->mnt)) goto out; - /* ... and either ours or the root of anon namespace */ - if (!(attached ? check_mnt(old) : is_anon_ns(ns))) - goto out; - - if (is_anon_ns(ns) && ns == p->mnt_ns) { + if (check_mnt(old)) { + /* if the source is in our namespace... */ + /* ... it should be detachable from parent */ + if (!mnt_has_parent(old) || IS_MNT_LOCKED(old)) + goto out; + /* ... and the target should be in our namespace */ + if (!check_mnt(p)) + goto out; + } else { /* - * Ending up with two files referring to the root of the - * same anonymous mount namespace would cause an error - * as this would mean trying to move the same mount - * twice into the mount tree which would be rejected - * later. But be explicit about it right here. + * otherwise the source must be the root of some anon namespace. + * AV: check for mount being root of an anon namespace is worth + * an inlined predicate... */ - goto out; - } else if (is_anon_ns(p->mnt_ns)) { + if (!is_anon_ns(ns) || mnt_has_parent(old)) + goto out; /* - * Don't allow moving an attached mount tree to an - * anonymous mount tree. + * Bail out early if the target is within the same namespace - + * subsequent checks would've rejected that, but they lose + * some corner cases if we check it early. */ - goto out; + if (ns == p->mnt_ns) + goto out; + /* + * Target should be either in our namespace or in an acceptable + * anon namespace, sensu check_anonymous_mnt(). + */ + if (!may_use_mount(p)) + goto out; } - if (old->mnt.mnt_flags & MNT_LOCKED) - goto out; - if (!path_mounted(old_path)) goto out; diff --git a/fs/resctrl/rdtgroup.c b/fs/resctrl/rdtgroup.c index cc37f58b47dd..1beb124e25f6 100644 --- a/fs/resctrl/rdtgroup.c +++ b/fs/resctrl/rdtgroup.c @@ -536,6 +536,8 @@ static ssize_t rdtgroup_cpus_write(struct kernfs_open_file *of, goto unlock; } + rdt_last_cmd_clear(); + if (rdtgrp->mode == RDT_MODE_PSEUDO_LOCKED || rdtgrp->mode == RDT_MODE_PSEUDO_LOCKSETUP) { ret = -EINVAL; @@ -3472,6 +3474,8 @@ static int mkdir_rdt_prepare(struct kernfs_node *parent_kn, goto out_unlock; } + rdt_last_cmd_clear(); + /* * Check that the parent directory for a monitor group is a "mon_groups" * directory. diff --git a/fs/smb/client/cifs_debug.c b/fs/smb/client/cifs_debug.c index e03c890de0a0..c0196be0e65f 100644 --- a/fs/smb/client/cifs_debug.c +++ b/fs/smb/client/cifs_debug.c @@ -362,6 +362,10 @@ static int cifs_debug_data_proc_show(struct seq_file *m, void *v) c = 0; spin_lock(&cifs_tcp_ses_lock); list_for_each_entry(server, &cifs_tcp_ses_list, tcp_ses_list) { +#ifdef CONFIG_CIFS_SMB_DIRECT + struct smbdirect_socket_parameters *sp; +#endif + /* channel info will be printed as a part of sessions below */ if (SERVER_IS_CHAN(server)) continue; @@ -383,25 +387,26 @@ static int cifs_debug_data_proc_show(struct seq_file *m, void *v) seq_printf(m, "\nSMBDirect transport not available"); goto skip_rdma; } + sp = &server->smbd_conn->socket.parameters; seq_printf(m, "\nSMBDirect (in hex) protocol version: %x " "transport status: %x", server->smbd_conn->protocol, - server->smbd_conn->transport_status); + server->smbd_conn->socket.status); seq_printf(m, "\nConn receive_credit_max: %x " "send_credit_target: %x max_send_size: %x", - server->smbd_conn->receive_credit_max, - server->smbd_conn->send_credit_target, - server->smbd_conn->max_send_size); + sp->recv_credit_max, + sp->send_credit_target, + sp->max_send_size); seq_printf(m, "\nConn max_fragmented_recv_size: %x " "max_fragmented_send_size: %x max_receive_size:%x", - server->smbd_conn->max_fragmented_recv_size, - server->smbd_conn->max_fragmented_send_size, - server->smbd_conn->max_receive_size); + sp->max_fragmented_recv_size, + sp->max_fragmented_send_size, + sp->max_recv_size); seq_printf(m, "\nConn keep_alive_interval: %x " "max_readwrite_size: %x rdma_readwrite_threshold: %x", - server->smbd_conn->keep_alive_interval, - server->smbd_conn->max_readwrite_size, + sp->keepalive_interval_msec * 1000, + sp->max_read_write_size, server->smbd_conn->rdma_readwrite_threshold); seq_printf(m, "\nDebug count_get_receive_buffer: %x " "count_put_receive_buffer: %x count_send_empty: %x", diff --git a/fs/smb/client/cifsfs.h b/fs/smb/client/cifsfs.h index ca435a3841b8..b9ec9fe16a98 100644 --- a/fs/smb/client/cifsfs.h +++ b/fs/smb/client/cifsfs.h @@ -145,6 +145,6 @@ extern const struct export_operations cifs_export_ops; #endif /* CONFIG_CIFS_NFSD_EXPORT */ /* when changing internal version - update following two lines at same time */ -#define SMB3_PRODUCT_BUILD 54 -#define CIFS_VERSION "2.54" +#define SMB3_PRODUCT_BUILD 55 +#define CIFS_VERSION "2.55" #endif /* _CIFSFS_H */ diff --git a/fs/smb/client/cifsglob.h b/fs/smb/client/cifsglob.h index ad7dd16db3e9..45e94e18f4d5 100644 --- a/fs/smb/client/cifsglob.h +++ b/fs/smb/client/cifsglob.h @@ -1085,6 +1085,7 @@ struct cifs_chan { }; #define CIFS_SES_FLAG_SCALE_CHANNELS (0x1) +#define CIFS_SES_FLAGS_PENDING_QUERY_INTERFACES (0x2) /* * Session structure. One of these for each uid session with a particular host diff --git a/fs/smb/client/connect.c b/fs/smb/client/connect.c index 024817d40c5f..28bc33496623 100644 --- a/fs/smb/client/connect.c +++ b/fs/smb/client/connect.c @@ -116,13 +116,9 @@ static void smb2_query_server_interfaces(struct work_struct *work) rc = server->ops->query_server_interfaces(xid, tcon, false); free_xid(xid); - if (rc) { - if (rc == -EOPNOTSUPP) - return; - + if (rc) cifs_dbg(FYI, "%s: failed to query server interfaces: %d\n", __func__, rc); - } queue_delayed_work(cifsiod_wq, &tcon->query_interfaces, (SMB_INTERFACE_POLL_INTERVAL * HZ)); diff --git a/fs/smb/client/smb2ops.c b/fs/smb/client/smb2ops.c index bab9f567d9b7..1468c16ea9b8 100644 --- a/fs/smb/client/smb2ops.c +++ b/fs/smb/client/smb2ops.c @@ -504,6 +504,9 @@ smb3_negotiate_wsize(struct cifs_tcon *tcon, struct smb3_fs_context *ctx) wsize = min_t(unsigned int, wsize, server->max_write); #ifdef CONFIG_CIFS_SMB_DIRECT if (server->rdma) { + struct smbdirect_socket_parameters *sp = + &server->smbd_conn->socket.parameters; + if (server->sign) /* * Account for SMB2 data transfer packet header and @@ -511,12 +514,12 @@ smb3_negotiate_wsize(struct cifs_tcon *tcon, struct smb3_fs_context *ctx) */ wsize = min_t(unsigned int, wsize, - server->smbd_conn->max_fragmented_send_size - + sp->max_fragmented_send_size - SMB2_READWRITE_PDU_HEADER_SIZE - sizeof(struct smb2_transform_hdr)); else wsize = min_t(unsigned int, - wsize, server->smbd_conn->max_readwrite_size); + wsize, sp->max_read_write_size); } #endif if (!(server->capabilities & SMB2_GLOBAL_CAP_LARGE_MTU)) @@ -552,6 +555,9 @@ smb3_negotiate_rsize(struct cifs_tcon *tcon, struct smb3_fs_context *ctx) rsize = min_t(unsigned int, rsize, server->max_read); #ifdef CONFIG_CIFS_SMB_DIRECT if (server->rdma) { + struct smbdirect_socket_parameters *sp = + &server->smbd_conn->socket.parameters; + if (server->sign) /* * Account for SMB2 data transfer packet header and @@ -559,12 +565,12 @@ smb3_negotiate_rsize(struct cifs_tcon *tcon, struct smb3_fs_context *ctx) */ rsize = min_t(unsigned int, rsize, - server->smbd_conn->max_fragmented_recv_size - + sp->max_fragmented_recv_size - SMB2_READWRITE_PDU_HEADER_SIZE - sizeof(struct smb2_transform_hdr)); else rsize = min_t(unsigned int, - rsize, server->smbd_conn->max_readwrite_size); + rsize, sp->max_read_write_size); } #endif diff --git a/fs/smb/client/smb2pdu.c b/fs/smb/client/smb2pdu.c index 0c320d06809c..a717be1626a3 100644 --- a/fs/smb/client/smb2pdu.c +++ b/fs/smb/client/smb2pdu.c @@ -36,6 +36,7 @@ #include "smb2glob.h" #include "cifspdu.h" #include "cifs_spnego.h" +#include "../common/smbdirect/smbdirect.h" #include "smbdirect.h" #include "trace.h" #ifdef CONFIG_CIFS_DFS_UPCALL @@ -411,14 +412,23 @@ skip_sess_setup: if (!rc && (server->capabilities & SMB2_GLOBAL_CAP_MULTI_CHANNEL) && server->ops->query_server_interfaces) { - mutex_unlock(&ses->session_mutex); - /* - * query server network interfaces, in case they change + * query server network interfaces, in case they change. + * Also mark the session as pending this update while the query + * is in progress. This will be used to avoid calling + * smb2_reconnect recursively. */ + ses->flags |= CIFS_SES_FLAGS_PENDING_QUERY_INTERFACES; xid = get_xid(); rc = server->ops->query_server_interfaces(xid, tcon, false); free_xid(xid); + ses->flags &= ~CIFS_SES_FLAGS_PENDING_QUERY_INTERFACES; + + /* regardless of rc value, setup polling */ + queue_delayed_work(cifsiod_wq, &tcon->query_interfaces, + (SMB_INTERFACE_POLL_INTERVAL * HZ)); + + mutex_unlock(&ses->session_mutex); if (rc == -EOPNOTSUPP && ses->chan_count > 1) { /* @@ -438,11 +448,8 @@ skip_sess_setup: if (ses->chan_max > ses->chan_count && ses->iface_count && !SERVER_IS_CHAN(server)) { - if (ses->chan_count == 1) { + if (ses->chan_count == 1) cifs_server_dbg(VFS, "supports multichannel now\n"); - queue_delayed_work(cifsiod_wq, &tcon->query_interfaces, - (SMB_INTERFACE_POLL_INTERVAL * HZ)); - } cifs_try_adding_channels(ses); } @@ -560,11 +567,18 @@ static int smb2_ioctl_req_init(u32 opcode, struct cifs_tcon *tcon, struct TCP_Server_Info *server, void **request_buf, unsigned int *total_len) { - /* Skip reconnect only for FSCTL_VALIDATE_NEGOTIATE_INFO IOCTLs */ - if (opcode == FSCTL_VALIDATE_NEGOTIATE_INFO) { + /* + * Skip reconnect in one of the following cases: + * 1. For FSCTL_VALIDATE_NEGOTIATE_INFO IOCTLs + * 2. For FSCTL_QUERY_NETWORK_INTERFACE_INFO IOCTL when called from + * smb2_reconnect (indicated by CIFS_SES_FLAG_SCALE_CHANNELS ses flag) + */ + if (opcode == FSCTL_VALIDATE_NEGOTIATE_INFO || + (opcode == FSCTL_QUERY_NETWORK_INTERFACE_INFO && + (tcon->ses->flags & CIFS_SES_FLAGS_PENDING_QUERY_INTERFACES))) return __smb2_plain_req_init(SMB2_IOCTL, tcon, server, request_buf, total_len); - } + return smb2_plain_req_init(SMB2_IOCTL, tcon, server, request_buf, total_len); } @@ -4449,10 +4463,10 @@ smb2_new_read_req(void **buf, unsigned int *total_len, #ifdef CONFIG_CIFS_SMB_DIRECT /* * If we want to do a RDMA write, fill in and append - * smbd_buffer_descriptor_v1 to the end of read request + * smbdirect_buffer_descriptor_v1 to the end of read request */ if (rdata && smb3_use_rdma_offload(io_parms)) { - struct smbd_buffer_descriptor_v1 *v1; + struct smbdirect_buffer_descriptor_v1 *v1; bool need_invalidate = server->dialect == SMB30_PROT_ID; rdata->mr = smbd_register_mr(server->smbd_conn, &rdata->subreq.io_iter, @@ -4466,8 +4480,8 @@ smb2_new_read_req(void **buf, unsigned int *total_len, req->ReadChannelInfoOffset = cpu_to_le16(offsetof(struct smb2_read_req, Buffer)); req->ReadChannelInfoLength = - cpu_to_le16(sizeof(struct smbd_buffer_descriptor_v1)); - v1 = (struct smbd_buffer_descriptor_v1 *) &req->Buffer[0]; + cpu_to_le16(sizeof(struct smbdirect_buffer_descriptor_v1)); + v1 = (struct smbdirect_buffer_descriptor_v1 *) &req->Buffer[0]; v1->offset = cpu_to_le64(rdata->mr->mr->iova); v1->token = cpu_to_le32(rdata->mr->mr->rkey); v1->length = cpu_to_le32(rdata->mr->mr->length); @@ -4975,10 +4989,10 @@ smb2_async_writev(struct cifs_io_subrequest *wdata) #ifdef CONFIG_CIFS_SMB_DIRECT /* * If we want to do a server RDMA read, fill in and append - * smbd_buffer_descriptor_v1 to the end of write request + * smbdirect_buffer_descriptor_v1 to the end of write request */ if (smb3_use_rdma_offload(io_parms)) { - struct smbd_buffer_descriptor_v1 *v1; + struct smbdirect_buffer_descriptor_v1 *v1; bool need_invalidate = server->dialect == SMB30_PROT_ID; wdata->mr = smbd_register_mr(server->smbd_conn, &wdata->subreq.io_iter, @@ -4997,8 +5011,8 @@ smb2_async_writev(struct cifs_io_subrequest *wdata) req->WriteChannelInfoOffset = cpu_to_le16(offsetof(struct smb2_write_req, Buffer)); req->WriteChannelInfoLength = - cpu_to_le16(sizeof(struct smbd_buffer_descriptor_v1)); - v1 = (struct smbd_buffer_descriptor_v1 *) &req->Buffer[0]; + cpu_to_le16(sizeof(struct smbdirect_buffer_descriptor_v1)); + v1 = (struct smbdirect_buffer_descriptor_v1 *) &req->Buffer[0]; v1->offset = cpu_to_le64(wdata->mr->mr->iova); v1->token = cpu_to_le32(wdata->mr->mr->rkey); v1->length = cpu_to_le32(wdata->mr->mr->length); diff --git a/fs/smb/client/smbdirect.c b/fs/smb/client/smbdirect.c index b0b7254661e9..5ae847919da5 100644 --- a/fs/smb/client/smbdirect.c +++ b/fs/smb/client/smbdirect.c @@ -7,6 +7,7 @@ #include <linux/module.h> #include <linux/highmem.h> #include <linux/folio_queue.h> +#include "../common/smbdirect/smbdirect_pdu.h" #include "smbdirect.h" #include "cifs_debug.h" #include "cifsproto.h" @@ -50,9 +51,6 @@ struct smb_extract_to_rdma { static ssize_t smb_extract_iter_to_rdma(struct iov_iter *iter, size_t len, struct smb_extract_to_rdma *rdma); -/* SMBD version number */ -#define SMBD_V1 0x0100 - /* Port numbers for SMBD transport */ #define SMB_PORT 445 #define SMBD_PORT 5445 @@ -165,10 +163,11 @@ static void smbd_disconnect_rdma_work(struct work_struct *work) { struct smbd_connection *info = container_of(work, struct smbd_connection, disconnect_work); + struct smbdirect_socket *sc = &info->socket; - if (info->transport_status == SMBD_CONNECTED) { - info->transport_status = SMBD_DISCONNECTING; - rdma_disconnect(info->id); + if (sc->status == SMBDIRECT_SOCKET_CONNECTED) { + sc->status = SMBDIRECT_SOCKET_DISCONNECTING; + rdma_disconnect(sc->rdma.cm_id); } } @@ -182,6 +181,7 @@ static int smbd_conn_upcall( struct rdma_cm_id *id, struct rdma_cm_event *event) { struct smbd_connection *info = id->context; + struct smbdirect_socket *sc = &info->socket; log_rdma_event(INFO, "event=%d status=%d\n", event->event, event->status); @@ -205,7 +205,7 @@ static int smbd_conn_upcall( case RDMA_CM_EVENT_ESTABLISHED: log_rdma_event(INFO, "connected event=%d\n", event->event); - info->transport_status = SMBD_CONNECTED; + sc->status = SMBDIRECT_SOCKET_CONNECTED; wake_up_interruptible(&info->conn_wait); break; @@ -213,20 +213,20 @@ static int smbd_conn_upcall( case RDMA_CM_EVENT_UNREACHABLE: case RDMA_CM_EVENT_REJECTED: log_rdma_event(INFO, "connecting failed event=%d\n", event->event); - info->transport_status = SMBD_DISCONNECTED; + sc->status = SMBDIRECT_SOCKET_DISCONNECTED; wake_up_interruptible(&info->conn_wait); break; case RDMA_CM_EVENT_DEVICE_REMOVAL: case RDMA_CM_EVENT_DISCONNECTED: /* This happens when we fail the negotiation */ - if (info->transport_status == SMBD_NEGOTIATE_FAILED) { - info->transport_status = SMBD_DISCONNECTED; + if (sc->status == SMBDIRECT_SOCKET_NEGOTIATE_FAILED) { + sc->status = SMBDIRECT_SOCKET_DISCONNECTED; wake_up(&info->conn_wait); break; } - info->transport_status = SMBD_DISCONNECTED; + sc->status = SMBDIRECT_SOCKET_DISCONNECTED; wake_up_interruptible(&info->disconn_wait); wake_up_interruptible(&info->wait_reassembly_queue); wake_up_interruptible_all(&info->wait_send_queue); @@ -275,6 +275,8 @@ static void send_done(struct ib_cq *cq, struct ib_wc *wc) int i; struct smbd_request *request = container_of(wc->wr_cqe, struct smbd_request, cqe); + struct smbd_connection *info = request->info; + struct smbdirect_socket *sc = &info->socket; log_rdma_send(INFO, "smbd_request 0x%p completed wc->status=%d\n", request, wc->status); @@ -286,7 +288,7 @@ static void send_done(struct ib_cq *cq, struct ib_wc *wc) } for (i = 0; i < request->num_sge; i++) - ib_dma_unmap_single(request->info->id->device, + ib_dma_unmap_single(sc->ib.dev, request->sge[i].addr, request->sge[i].length, DMA_TO_DEVICE); @@ -299,7 +301,7 @@ static void send_done(struct ib_cq *cq, struct ib_wc *wc) mempool_free(request, request->info->request_mempool); } -static void dump_smbd_negotiate_resp(struct smbd_negotiate_resp *resp) +static void dump_smbdirect_negotiate_resp(struct smbdirect_negotiate_resp *resp) { log_rdma_event(INFO, "resp message min_version %u max_version %u negotiated_version %u credits_requested %u credits_granted %u status %u max_readwrite_size %u preferred_send_size %u max_receive_size %u max_fragmented_size %u\n", resp->min_version, resp->max_version, @@ -318,15 +320,17 @@ static bool process_negotiation_response( struct smbd_response *response, int packet_length) { struct smbd_connection *info = response->info; - struct smbd_negotiate_resp *packet = smbd_response_payload(response); + struct smbdirect_socket *sc = &info->socket; + struct smbdirect_socket_parameters *sp = &sc->parameters; + struct smbdirect_negotiate_resp *packet = smbd_response_payload(response); - if (packet_length < sizeof(struct smbd_negotiate_resp)) { + if (packet_length < sizeof(struct smbdirect_negotiate_resp)) { log_rdma_event(ERR, "error: packet_length=%d\n", packet_length); return false; } - if (le16_to_cpu(packet->negotiated_version) != SMBD_V1) { + if (le16_to_cpu(packet->negotiated_version) != SMBDIRECT_V1) { log_rdma_event(ERR, "error: negotiated_version=%x\n", le16_to_cpu(packet->negotiated_version)); return false; @@ -347,20 +351,20 @@ static bool process_negotiation_response( atomic_set(&info->receive_credits, 0); - if (le32_to_cpu(packet->preferred_send_size) > info->max_receive_size) { + if (le32_to_cpu(packet->preferred_send_size) > sp->max_recv_size) { log_rdma_event(ERR, "error: preferred_send_size=%d\n", le32_to_cpu(packet->preferred_send_size)); return false; } - info->max_receive_size = le32_to_cpu(packet->preferred_send_size); + sp->max_recv_size = le32_to_cpu(packet->preferred_send_size); if (le32_to_cpu(packet->max_receive_size) < SMBD_MIN_RECEIVE_SIZE) { log_rdma_event(ERR, "error: max_receive_size=%d\n", le32_to_cpu(packet->max_receive_size)); return false; } - info->max_send_size = min_t(int, info->max_send_size, - le32_to_cpu(packet->max_receive_size)); + sp->max_send_size = min_t(u32, sp->max_send_size, + le32_to_cpu(packet->max_receive_size)); if (le32_to_cpu(packet->max_fragmented_size) < SMBD_MIN_FRAGMENTED_SIZE) { @@ -368,18 +372,18 @@ static bool process_negotiation_response( le32_to_cpu(packet->max_fragmented_size)); return false; } - info->max_fragmented_send_size = + sp->max_fragmented_send_size = le32_to_cpu(packet->max_fragmented_size); info->rdma_readwrite_threshold = - rdma_readwrite_threshold > info->max_fragmented_send_size ? - info->max_fragmented_send_size : + rdma_readwrite_threshold > sp->max_fragmented_send_size ? + sp->max_fragmented_send_size : rdma_readwrite_threshold; - info->max_readwrite_size = min_t(u32, + sp->max_read_write_size = min_t(u32, le32_to_cpu(packet->max_readwrite_size), info->max_frmr_depth * PAGE_SIZE); - info->max_frmr_depth = info->max_readwrite_size / PAGE_SIZE; + info->max_frmr_depth = sp->max_read_write_size / PAGE_SIZE; return true; } @@ -393,8 +397,9 @@ static void smbd_post_send_credits(struct work_struct *work) struct smbd_connection *info = container_of(work, struct smbd_connection, post_send_credits_work); + struct smbdirect_socket *sc = &info->socket; - if (info->transport_status != SMBD_CONNECTED) { + if (sc->status != SMBDIRECT_SOCKET_CONNECTED) { wake_up(&info->wait_receive_queues); return; } @@ -448,7 +453,7 @@ static void smbd_post_send_credits(struct work_struct *work) /* Called from softirq, when recv is done */ static void recv_done(struct ib_cq *cq, struct ib_wc *wc) { - struct smbd_data_transfer *data_transfer; + struct smbdirect_data_transfer *data_transfer; struct smbd_response *response = container_of(wc->wr_cqe, struct smbd_response, cqe); struct smbd_connection *info = response->info; @@ -474,7 +479,7 @@ static void recv_done(struct ib_cq *cq, struct ib_wc *wc) switch (response->type) { /* SMBD negotiation response */ case SMBD_NEGOTIATE_RESP: - dump_smbd_negotiate_resp(smbd_response_payload(response)); + dump_smbdirect_negotiate_resp(smbd_response_payload(response)); info->full_packet_received = true; info->negotiate_done = process_negotiation_response(response, wc->byte_len); @@ -531,7 +536,7 @@ static void recv_done(struct ib_cq *cq, struct ib_wc *wc) /* Send a KEEP_ALIVE response right away if requested */ info->keep_alive_requested = KEEP_ALIVE_NONE; if (le16_to_cpu(data_transfer->flags) & - SMB_DIRECT_RESPONSE_REQUESTED) { + SMBDIRECT_FLAG_RESPONSE_REQUESTED) { info->keep_alive_requested = KEEP_ALIVE_PENDING; } @@ -635,32 +640,34 @@ static int smbd_ia_open( struct smbd_connection *info, struct sockaddr *dstaddr, int port) { + struct smbdirect_socket *sc = &info->socket; int rc; - info->id = smbd_create_id(info, dstaddr, port); - if (IS_ERR(info->id)) { - rc = PTR_ERR(info->id); + sc->rdma.cm_id = smbd_create_id(info, dstaddr, port); + if (IS_ERR(sc->rdma.cm_id)) { + rc = PTR_ERR(sc->rdma.cm_id); goto out1; } + sc->ib.dev = sc->rdma.cm_id->device; - if (!frwr_is_supported(&info->id->device->attrs)) { + if (!frwr_is_supported(&sc->ib.dev->attrs)) { log_rdma_event(ERR, "Fast Registration Work Requests (FRWR) is not supported\n"); log_rdma_event(ERR, "Device capability flags = %llx max_fast_reg_page_list_len = %u\n", - info->id->device->attrs.device_cap_flags, - info->id->device->attrs.max_fast_reg_page_list_len); + sc->ib.dev->attrs.device_cap_flags, + sc->ib.dev->attrs.max_fast_reg_page_list_len); rc = -EPROTONOSUPPORT; goto out2; } info->max_frmr_depth = min_t(int, smbd_max_frmr_depth, - info->id->device->attrs.max_fast_reg_page_list_len); + sc->ib.dev->attrs.max_fast_reg_page_list_len); info->mr_type = IB_MR_TYPE_MEM_REG; - if (info->id->device->attrs.kernel_cap_flags & IBK_SG_GAPS_REG) + if (sc->ib.dev->attrs.kernel_cap_flags & IBK_SG_GAPS_REG) info->mr_type = IB_MR_TYPE_SG_GAPS; - info->pd = ib_alloc_pd(info->id->device, 0); - if (IS_ERR(info->pd)) { - rc = PTR_ERR(info->pd); + sc->ib.pd = ib_alloc_pd(sc->ib.dev, 0); + if (IS_ERR(sc->ib.pd)) { + rc = PTR_ERR(sc->ib.pd); log_rdma_event(ERR, "ib_alloc_pd() returned %d\n", rc); goto out2; } @@ -668,8 +675,8 @@ static int smbd_ia_open( return 0; out2: - rdma_destroy_id(info->id); - info->id = NULL; + rdma_destroy_id(sc->rdma.cm_id); + sc->rdma.cm_id = NULL; out1: return rc; @@ -683,10 +690,12 @@ out1: */ static int smbd_post_send_negotiate_req(struct smbd_connection *info) { + struct smbdirect_socket *sc = &info->socket; + struct smbdirect_socket_parameters *sp = &sc->parameters; struct ib_send_wr send_wr; int rc = -ENOMEM; struct smbd_request *request; - struct smbd_negotiate_req *packet; + struct smbdirect_negotiate_req *packet; request = mempool_alloc(info->request_mempool, GFP_KERNEL); if (!request) @@ -695,29 +704,29 @@ static int smbd_post_send_negotiate_req(struct smbd_connection *info) request->info = info; packet = smbd_request_payload(request); - packet->min_version = cpu_to_le16(SMBD_V1); - packet->max_version = cpu_to_le16(SMBD_V1); + packet->min_version = cpu_to_le16(SMBDIRECT_V1); + packet->max_version = cpu_to_le16(SMBDIRECT_V1); packet->reserved = 0; - packet->credits_requested = cpu_to_le16(info->send_credit_target); - packet->preferred_send_size = cpu_to_le32(info->max_send_size); - packet->max_receive_size = cpu_to_le32(info->max_receive_size); + packet->credits_requested = cpu_to_le16(sp->send_credit_target); + packet->preferred_send_size = cpu_to_le32(sp->max_send_size); + packet->max_receive_size = cpu_to_le32(sp->max_recv_size); packet->max_fragmented_size = - cpu_to_le32(info->max_fragmented_recv_size); + cpu_to_le32(sp->max_fragmented_recv_size); request->num_sge = 1; request->sge[0].addr = ib_dma_map_single( - info->id->device, (void *)packet, + sc->ib.dev, (void *)packet, sizeof(*packet), DMA_TO_DEVICE); - if (ib_dma_mapping_error(info->id->device, request->sge[0].addr)) { + if (ib_dma_mapping_error(sc->ib.dev, request->sge[0].addr)) { rc = -EIO; goto dma_mapping_failed; } request->sge[0].length = sizeof(*packet); - request->sge[0].lkey = info->pd->local_dma_lkey; + request->sge[0].lkey = sc->ib.pd->local_dma_lkey; ib_dma_sync_single_for_device( - info->id->device, request->sge[0].addr, + sc->ib.dev, request->sge[0].addr, request->sge[0].length, DMA_TO_DEVICE); request->cqe.done = send_done; @@ -734,14 +743,14 @@ static int smbd_post_send_negotiate_req(struct smbd_connection *info) request->sge[0].length, request->sge[0].lkey); atomic_inc(&info->send_pending); - rc = ib_post_send(info->id->qp, &send_wr, NULL); + rc = ib_post_send(sc->ib.qp, &send_wr, NULL); if (!rc) return 0; /* if we reach here, post send failed */ log_rdma_send(ERR, "ib_post_send failed rc=%d\n", rc); atomic_dec(&info->send_pending); - ib_dma_unmap_single(info->id->device, request->sge[0].addr, + ib_dma_unmap_single(sc->ib.dev, request->sge[0].addr, request->sge[0].length, DMA_TO_DEVICE); smbd_disconnect_rdma_connection(info); @@ -774,10 +783,10 @@ static int manage_credits_prior_sending(struct smbd_connection *info) /* * Check if we need to send a KEEP_ALIVE message * The idle connection timer triggers a KEEP_ALIVE message when expires - * SMB_DIRECT_RESPONSE_REQUESTED is set in the message flag to have peer send + * SMBDIRECT_FLAG_RESPONSE_REQUESTED is set in the message flag to have peer send * back a response. * return value: - * 1 if SMB_DIRECT_RESPONSE_REQUESTED needs to be set + * 1 if SMBDIRECT_FLAG_RESPONSE_REQUESTED needs to be set * 0: otherwise */ static int manage_keep_alive_before_sending(struct smbd_connection *info) @@ -793,6 +802,8 @@ static int manage_keep_alive_before_sending(struct smbd_connection *info) static int smbd_post_send(struct smbd_connection *info, struct smbd_request *request) { + struct smbdirect_socket *sc = &info->socket; + struct smbdirect_socket_parameters *sp = &sc->parameters; struct ib_send_wr send_wr; int rc, i; @@ -801,7 +812,7 @@ static int smbd_post_send(struct smbd_connection *info, "rdma_request sge[%d] addr=0x%llx length=%u\n", i, request->sge[i].addr, request->sge[i].length); ib_dma_sync_single_for_device( - info->id->device, + sc->ib.dev, request->sge[i].addr, request->sge[i].length, DMA_TO_DEVICE); @@ -816,7 +827,7 @@ static int smbd_post_send(struct smbd_connection *info, send_wr.opcode = IB_WR_SEND; send_wr.send_flags = IB_SEND_SIGNALED; - rc = ib_post_send(info->id->qp, &send_wr, NULL); + rc = ib_post_send(sc->ib.qp, &send_wr, NULL); if (rc) { log_rdma_send(ERR, "ib_post_send failed rc=%d\n", rc); smbd_disconnect_rdma_connection(info); @@ -824,7 +835,7 @@ static int smbd_post_send(struct smbd_connection *info, } else /* Reset timer for idle connection after packet is sent */ mod_delayed_work(info->workqueue, &info->idle_timer_work, - info->keep_alive_interval*HZ); + msecs_to_jiffies(sp->keepalive_interval_msec)); return rc; } @@ -833,22 +844,24 @@ static int smbd_post_send_iter(struct smbd_connection *info, struct iov_iter *iter, int *_remaining_data_length) { + struct smbdirect_socket *sc = &info->socket; + struct smbdirect_socket_parameters *sp = &sc->parameters; int i, rc; int header_length; int data_length; struct smbd_request *request; - struct smbd_data_transfer *packet; + struct smbdirect_data_transfer *packet; int new_credits = 0; wait_credit: /* Wait for send credits. A SMBD packet needs one credit */ rc = wait_event_interruptible(info->wait_send_queue, atomic_read(&info->send_credits) > 0 || - info->transport_status != SMBD_CONNECTED); + sc->status != SMBDIRECT_SOCKET_CONNECTED); if (rc) goto err_wait_credit; - if (info->transport_status != SMBD_CONNECTED) { + if (sc->status != SMBDIRECT_SOCKET_CONNECTED) { log_outgoing(ERR, "disconnected not sending on wait_credit\n"); rc = -EAGAIN; goto err_wait_credit; @@ -860,17 +873,17 @@ wait_credit: wait_send_queue: wait_event(info->wait_post_send, - atomic_read(&info->send_pending) < info->send_credit_target || - info->transport_status != SMBD_CONNECTED); + atomic_read(&info->send_pending) < sp->send_credit_target || + sc->status != SMBDIRECT_SOCKET_CONNECTED); - if (info->transport_status != SMBD_CONNECTED) { + if (sc->status != SMBDIRECT_SOCKET_CONNECTED) { log_outgoing(ERR, "disconnected not sending on wait_send_queue\n"); rc = -EAGAIN; goto err_wait_send_queue; } if (unlikely(atomic_inc_return(&info->send_pending) > - info->send_credit_target)) { + sp->send_credit_target)) { atomic_dec(&info->send_pending); goto wait_send_queue; } @@ -890,8 +903,8 @@ wait_send_queue: .nr_sge = 1, .max_sge = SMBDIRECT_MAX_SEND_SGE, .sge = request->sge, - .device = info->id->device, - .local_dma_lkey = info->pd->local_dma_lkey, + .device = sc->ib.dev, + .local_dma_lkey = sc->ib.pd->local_dma_lkey, .direction = DMA_TO_DEVICE, }; @@ -909,7 +922,7 @@ wait_send_queue: /* Fill in the packet header */ packet = smbd_request_payload(request); - packet->credits_requested = cpu_to_le16(info->send_credit_target); + packet->credits_requested = cpu_to_le16(sp->send_credit_target); new_credits = manage_credits_prior_sending(info); atomic_add(new_credits, &info->receive_credits); @@ -919,7 +932,7 @@ wait_send_queue: packet->flags = 0; if (manage_keep_alive_before_sending(info)) - packet->flags |= cpu_to_le16(SMB_DIRECT_RESPONSE_REQUESTED); + packet->flags |= cpu_to_le16(SMBDIRECT_FLAG_RESPONSE_REQUESTED); packet->reserved = 0; if (!data_length) @@ -938,23 +951,23 @@ wait_send_queue: le32_to_cpu(packet->remaining_data_length)); /* Map the packet to DMA */ - header_length = sizeof(struct smbd_data_transfer); + header_length = sizeof(struct smbdirect_data_transfer); /* If this is a packet without payload, don't send padding */ if (!data_length) - header_length = offsetof(struct smbd_data_transfer, padding); + header_length = offsetof(struct smbdirect_data_transfer, padding); - request->sge[0].addr = ib_dma_map_single(info->id->device, + request->sge[0].addr = ib_dma_map_single(sc->ib.dev, (void *)packet, header_length, DMA_TO_DEVICE); - if (ib_dma_mapping_error(info->id->device, request->sge[0].addr)) { + if (ib_dma_mapping_error(sc->ib.dev, request->sge[0].addr)) { rc = -EIO; request->sge[0].addr = 0; goto err_dma; } request->sge[0].length = header_length; - request->sge[0].lkey = info->pd->local_dma_lkey; + request->sge[0].lkey = sc->ib.pd->local_dma_lkey; rc = smbd_post_send(info, request); if (!rc) @@ -963,7 +976,7 @@ wait_send_queue: err_dma: for (i = 0; i < request->num_sge; i++) if (request->sge[i].addr) - ib_dma_unmap_single(info->id->device, + ib_dma_unmap_single(sc->ib.dev, request->sge[i].addr, request->sge[i].length, DMA_TO_DEVICE); @@ -1008,17 +1021,19 @@ static int smbd_post_send_empty(struct smbd_connection *info) static int smbd_post_recv( struct smbd_connection *info, struct smbd_response *response) { + struct smbdirect_socket *sc = &info->socket; + struct smbdirect_socket_parameters *sp = &sc->parameters; struct ib_recv_wr recv_wr; int rc = -EIO; response->sge.addr = ib_dma_map_single( - info->id->device, response->packet, - info->max_receive_size, DMA_FROM_DEVICE); - if (ib_dma_mapping_error(info->id->device, response->sge.addr)) + sc->ib.dev, response->packet, + sp->max_recv_size, DMA_FROM_DEVICE); + if (ib_dma_mapping_error(sc->ib.dev, response->sge.addr)) return rc; - response->sge.length = info->max_receive_size; - response->sge.lkey = info->pd->local_dma_lkey; + response->sge.length = sp->max_recv_size; + response->sge.lkey = sc->ib.pd->local_dma_lkey; response->cqe.done = recv_done; @@ -1027,9 +1042,9 @@ static int smbd_post_recv( recv_wr.sg_list = &response->sge; recv_wr.num_sge = 1; - rc = ib_post_recv(info->id->qp, &recv_wr, NULL); + rc = ib_post_recv(sc->ib.qp, &recv_wr, NULL); if (rc) { - ib_dma_unmap_single(info->id->device, response->sge.addr, + ib_dma_unmap_single(sc->ib.dev, response->sge.addr, response->sge.length, DMA_FROM_DEVICE); smbd_disconnect_rdma_connection(info); log_rdma_recv(ERR, "ib_post_recv failed rc=%d\n", rc); @@ -1187,9 +1202,10 @@ static struct smbd_response *get_receive_buffer(struct smbd_connection *info) static void put_receive_buffer( struct smbd_connection *info, struct smbd_response *response) { + struct smbdirect_socket *sc = &info->socket; unsigned long flags; - ib_dma_unmap_single(info->id->device, response->sge.addr, + ib_dma_unmap_single(sc->ib.dev, response->sge.addr, response->sge.length, DMA_FROM_DEVICE); spin_lock_irqsave(&info->receive_queue_lock, flags); @@ -1264,6 +1280,8 @@ static void idle_connection_timer(struct work_struct *work) struct smbd_connection *info = container_of( work, struct smbd_connection, idle_timer_work.work); + struct smbdirect_socket *sc = &info->socket; + struct smbdirect_socket_parameters *sp = &sc->parameters; if (info->keep_alive_requested != KEEP_ALIVE_NONE) { log_keep_alive(ERR, @@ -1278,7 +1296,7 @@ static void idle_connection_timer(struct work_struct *work) /* Setup the next idle timeout work */ queue_delayed_work(info->workqueue, &info->idle_timer_work, - info->keep_alive_interval*HZ); + msecs_to_jiffies(sp->keepalive_interval_msec)); } /* @@ -1289,6 +1307,8 @@ static void idle_connection_timer(struct work_struct *work) void smbd_destroy(struct TCP_Server_Info *server) { struct smbd_connection *info = server->smbd_conn; + struct smbdirect_socket *sc; + struct smbdirect_socket_parameters *sp; struct smbd_response *response; unsigned long flags; @@ -1296,19 +1316,22 @@ void smbd_destroy(struct TCP_Server_Info *server) log_rdma_event(INFO, "rdma session already destroyed\n"); return; } + sc = &info->socket; + sp = &sc->parameters; log_rdma_event(INFO, "destroying rdma session\n"); - if (info->transport_status != SMBD_DISCONNECTED) { - rdma_disconnect(server->smbd_conn->id); + if (sc->status != SMBDIRECT_SOCKET_DISCONNECTED) { + rdma_disconnect(sc->rdma.cm_id); log_rdma_event(INFO, "wait for transport being disconnected\n"); wait_event_interruptible( info->disconn_wait, - info->transport_status == SMBD_DISCONNECTED); + sc->status == SMBDIRECT_SOCKET_DISCONNECTED); } log_rdma_event(INFO, "destroying qp\n"); - ib_drain_qp(info->id->qp); - rdma_destroy_qp(info->id); + ib_drain_qp(sc->ib.qp); + rdma_destroy_qp(sc->rdma.cm_id); + sc->ib.qp = NULL; log_rdma_event(INFO, "cancelling idle timer\n"); cancel_delayed_work_sync(&info->idle_timer_work); @@ -1336,7 +1359,7 @@ void smbd_destroy(struct TCP_Server_Info *server) log_rdma_event(INFO, "free receive buffers\n"); wait_event(info->wait_receive_queues, info->count_receive_queue + info->count_empty_packet_queue - == info->receive_credit_max); + == sp->recv_credit_max); destroy_receive_buffers(info); /* @@ -1355,10 +1378,10 @@ void smbd_destroy(struct TCP_Server_Info *server) } destroy_mr_list(info); - ib_free_cq(info->send_cq); - ib_free_cq(info->recv_cq); - ib_dealloc_pd(info->pd); - rdma_destroy_id(info->id); + ib_free_cq(sc->ib.send_cq); + ib_free_cq(sc->ib.recv_cq); + ib_dealloc_pd(sc->ib.pd); + rdma_destroy_id(sc->rdma.cm_id); /* free mempools */ mempool_destroy(info->request_mempool); @@ -1367,7 +1390,7 @@ void smbd_destroy(struct TCP_Server_Info *server) mempool_destroy(info->response_mempool); kmem_cache_destroy(info->response_cache); - info->transport_status = SMBD_DESTROYED; + sc->status = SMBDIRECT_SOCKET_DESTROYED; destroy_workqueue(info->workqueue); log_rdma_event(INFO, "rdma session destroyed\n"); @@ -1392,7 +1415,7 @@ int smbd_reconnect(struct TCP_Server_Info *server) * This is possible if transport is disconnected and we haven't received * notification from RDMA, but upper layer has detected timeout */ - if (server->smbd_conn->transport_status == SMBD_CONNECTED) { + if (server->smbd_conn->socket.status == SMBDIRECT_SOCKET_CONNECTED) { log_rdma_event(INFO, "disconnecting transport\n"); smbd_destroy(server); } @@ -1424,6 +1447,8 @@ static void destroy_caches_and_workqueue(struct smbd_connection *info) #define MAX_NAME_LEN 80 static int allocate_caches_and_workqueue(struct smbd_connection *info) { + struct smbdirect_socket *sc = &info->socket; + struct smbdirect_socket_parameters *sp = &sc->parameters; char name[MAX_NAME_LEN]; int rc; @@ -1432,13 +1457,13 @@ static int allocate_caches_and_workqueue(struct smbd_connection *info) kmem_cache_create( name, sizeof(struct smbd_request) + - sizeof(struct smbd_data_transfer), + sizeof(struct smbdirect_data_transfer), 0, SLAB_HWCACHE_ALIGN, NULL); if (!info->request_cache) return -ENOMEM; info->request_mempool = - mempool_create(info->send_credit_target, mempool_alloc_slab, + mempool_create(sp->send_credit_target, mempool_alloc_slab, mempool_free_slab, info->request_cache); if (!info->request_mempool) goto out1; @@ -1448,13 +1473,13 @@ static int allocate_caches_and_workqueue(struct smbd_connection *info) kmem_cache_create( name, sizeof(struct smbd_response) + - info->max_receive_size, + sp->max_recv_size, 0, SLAB_HWCACHE_ALIGN, NULL); if (!info->response_cache) goto out2; info->response_mempool = - mempool_create(info->receive_credit_max, mempool_alloc_slab, + mempool_create(sp->recv_credit_max, mempool_alloc_slab, mempool_free_slab, info->response_cache); if (!info->response_mempool) goto out3; @@ -1464,7 +1489,7 @@ static int allocate_caches_and_workqueue(struct smbd_connection *info) if (!info->workqueue) goto out4; - rc = allocate_receive_buffers(info, info->receive_credit_max); + rc = allocate_receive_buffers(info, sp->recv_credit_max); if (rc) { log_rdma_event(ERR, "failed to allocate receive buffers\n"); goto out5; @@ -1491,6 +1516,8 @@ static struct smbd_connection *_smbd_get_connection( { int rc; struct smbd_connection *info; + struct smbdirect_socket *sc; + struct smbdirect_socket_parameters *sp; struct rdma_conn_param conn_param; struct ib_qp_init_attr qp_attr; struct sockaddr_in *addr_in = (struct sockaddr_in *) dstaddr; @@ -1500,101 +1527,102 @@ static struct smbd_connection *_smbd_get_connection( info = kzalloc(sizeof(struct smbd_connection), GFP_KERNEL); if (!info) return NULL; + sc = &info->socket; + sp = &sc->parameters; - info->transport_status = SMBD_CONNECTING; + sc->status = SMBDIRECT_SOCKET_CONNECTING; rc = smbd_ia_open(info, dstaddr, port); if (rc) { log_rdma_event(INFO, "smbd_ia_open rc=%d\n", rc); goto create_id_failed; } - if (smbd_send_credit_target > info->id->device->attrs.max_cqe || - smbd_send_credit_target > info->id->device->attrs.max_qp_wr) { + if (smbd_send_credit_target > sc->ib.dev->attrs.max_cqe || + smbd_send_credit_target > sc->ib.dev->attrs.max_qp_wr) { log_rdma_event(ERR, "consider lowering send_credit_target = %d. Possible CQE overrun, device reporting max_cqe %d max_qp_wr %d\n", smbd_send_credit_target, - info->id->device->attrs.max_cqe, - info->id->device->attrs.max_qp_wr); + sc->ib.dev->attrs.max_cqe, + sc->ib.dev->attrs.max_qp_wr); goto config_failed; } - if (smbd_receive_credit_max > info->id->device->attrs.max_cqe || - smbd_receive_credit_max > info->id->device->attrs.max_qp_wr) { + if (smbd_receive_credit_max > sc->ib.dev->attrs.max_cqe || + smbd_receive_credit_max > sc->ib.dev->attrs.max_qp_wr) { log_rdma_event(ERR, "consider lowering receive_credit_max = %d. Possible CQE overrun, device reporting max_cqe %d max_qp_wr %d\n", smbd_receive_credit_max, - info->id->device->attrs.max_cqe, - info->id->device->attrs.max_qp_wr); + sc->ib.dev->attrs.max_cqe, + sc->ib.dev->attrs.max_qp_wr); goto config_failed; } - info->receive_credit_max = smbd_receive_credit_max; - info->send_credit_target = smbd_send_credit_target; - info->max_send_size = smbd_max_send_size; - info->max_fragmented_recv_size = smbd_max_fragmented_recv_size; - info->max_receive_size = smbd_max_receive_size; - info->keep_alive_interval = smbd_keep_alive_interval; + sp->recv_credit_max = smbd_receive_credit_max; + sp->send_credit_target = smbd_send_credit_target; + sp->max_send_size = smbd_max_send_size; + sp->max_fragmented_recv_size = smbd_max_fragmented_recv_size; + sp->max_recv_size = smbd_max_receive_size; + sp->keepalive_interval_msec = smbd_keep_alive_interval * 1000; - if (info->id->device->attrs.max_send_sge < SMBDIRECT_MAX_SEND_SGE || - info->id->device->attrs.max_recv_sge < SMBDIRECT_MAX_RECV_SGE) { + if (sc->ib.dev->attrs.max_send_sge < SMBDIRECT_MAX_SEND_SGE || + sc->ib.dev->attrs.max_recv_sge < SMBDIRECT_MAX_RECV_SGE) { log_rdma_event(ERR, "device %.*s max_send_sge/max_recv_sge = %d/%d too small\n", IB_DEVICE_NAME_MAX, - info->id->device->name, - info->id->device->attrs.max_send_sge, - info->id->device->attrs.max_recv_sge); + sc->ib.dev->name, + sc->ib.dev->attrs.max_send_sge, + sc->ib.dev->attrs.max_recv_sge); goto config_failed; } - info->send_cq = NULL; - info->recv_cq = NULL; - info->send_cq = - ib_alloc_cq_any(info->id->device, info, - info->send_credit_target, IB_POLL_SOFTIRQ); - if (IS_ERR(info->send_cq)) { - info->send_cq = NULL; + sc->ib.send_cq = + ib_alloc_cq_any(sc->ib.dev, info, + sp->send_credit_target, IB_POLL_SOFTIRQ); + if (IS_ERR(sc->ib.send_cq)) { + sc->ib.send_cq = NULL; goto alloc_cq_failed; } - info->recv_cq = - ib_alloc_cq_any(info->id->device, info, - info->receive_credit_max, IB_POLL_SOFTIRQ); - if (IS_ERR(info->recv_cq)) { - info->recv_cq = NULL; + sc->ib.recv_cq = + ib_alloc_cq_any(sc->ib.dev, info, + sp->recv_credit_max, IB_POLL_SOFTIRQ); + if (IS_ERR(sc->ib.recv_cq)) { + sc->ib.recv_cq = NULL; goto alloc_cq_failed; } memset(&qp_attr, 0, sizeof(qp_attr)); qp_attr.event_handler = smbd_qp_async_error_upcall; qp_attr.qp_context = info; - qp_attr.cap.max_send_wr = info->send_credit_target; - qp_attr.cap.max_recv_wr = info->receive_credit_max; + qp_attr.cap.max_send_wr = sp->send_credit_target; + qp_attr.cap.max_recv_wr = sp->recv_credit_max; qp_attr.cap.max_send_sge = SMBDIRECT_MAX_SEND_SGE; qp_attr.cap.max_recv_sge = SMBDIRECT_MAX_RECV_SGE; qp_attr.cap.max_inline_data = 0; qp_attr.sq_sig_type = IB_SIGNAL_REQ_WR; qp_attr.qp_type = IB_QPT_RC; - qp_attr.send_cq = info->send_cq; - qp_attr.recv_cq = info->recv_cq; + qp_attr.send_cq = sc->ib.send_cq; + qp_attr.recv_cq = sc->ib.recv_cq; qp_attr.port_num = ~0; - rc = rdma_create_qp(info->id, info->pd, &qp_attr); + rc = rdma_create_qp(sc->rdma.cm_id, sc->ib.pd, &qp_attr); if (rc) { log_rdma_event(ERR, "rdma_create_qp failed %i\n", rc); goto create_qp_failed; } + sc->ib.qp = sc->rdma.cm_id->qp; memset(&conn_param, 0, sizeof(conn_param)); conn_param.initiator_depth = 0; conn_param.responder_resources = - min(info->id->device->attrs.max_qp_rd_atom, + min(sc->ib.dev->attrs.max_qp_rd_atom, SMBD_CM_RESPONDER_RESOURCES); info->responder_resources = conn_param.responder_resources; log_rdma_mr(INFO, "responder_resources=%d\n", info->responder_resources); /* Need to send IRD/ORD in private data for iWARP */ - info->id->device->ops.get_port_immutable( - info->id->device, info->id->port_num, &port_immutable); + sc->ib.dev->ops.get_port_immutable( + sc->ib.dev, sc->rdma.cm_id->port_num, &port_immutable); if (port_immutable.core_cap_flags & RDMA_CORE_PORT_IWARP) { ird_ord_hdr[0] = info->responder_resources; ird_ord_hdr[1] = 1; @@ -1615,16 +1643,16 @@ static struct smbd_connection *_smbd_get_connection( init_waitqueue_head(&info->conn_wait); init_waitqueue_head(&info->disconn_wait); init_waitqueue_head(&info->wait_reassembly_queue); - rc = rdma_connect(info->id, &conn_param); + rc = rdma_connect(sc->rdma.cm_id, &conn_param); if (rc) { log_rdma_event(ERR, "rdma_connect() failed with %i\n", rc); goto rdma_connect_failed; } wait_event_interruptible( - info->conn_wait, info->transport_status != SMBD_CONNECTING); + info->conn_wait, sc->status != SMBDIRECT_SOCKET_CONNECTING); - if (info->transport_status != SMBD_CONNECTED) { + if (sc->status != SMBDIRECT_SOCKET_CONNECTED) { log_rdma_event(ERR, "rdma_connect failed port=%d\n", port); goto rdma_connect_failed; } @@ -1640,7 +1668,7 @@ static struct smbd_connection *_smbd_get_connection( init_waitqueue_head(&info->wait_send_queue); INIT_DELAYED_WORK(&info->idle_timer_work, idle_connection_timer); queue_delayed_work(info->workqueue, &info->idle_timer_work, - info->keep_alive_interval*HZ); + msecs_to_jiffies(sp->keepalive_interval_msec)); init_waitqueue_head(&info->wait_send_pending); atomic_set(&info->send_pending, 0); @@ -1675,26 +1703,26 @@ allocate_mr_failed: negotiation_failed: cancel_delayed_work_sync(&info->idle_timer_work); destroy_caches_and_workqueue(info); - info->transport_status = SMBD_NEGOTIATE_FAILED; + sc->status = SMBDIRECT_SOCKET_NEGOTIATE_FAILED; init_waitqueue_head(&info->conn_wait); - rdma_disconnect(info->id); + rdma_disconnect(sc->rdma.cm_id); wait_event(info->conn_wait, - info->transport_status == SMBD_DISCONNECTED); + sc->status == SMBDIRECT_SOCKET_DISCONNECTED); allocate_cache_failed: rdma_connect_failed: - rdma_destroy_qp(info->id); + rdma_destroy_qp(sc->rdma.cm_id); create_qp_failed: alloc_cq_failed: - if (info->send_cq) - ib_free_cq(info->send_cq); - if (info->recv_cq) - ib_free_cq(info->recv_cq); + if (sc->ib.send_cq) + ib_free_cq(sc->ib.send_cq); + if (sc->ib.recv_cq) + ib_free_cq(sc->ib.recv_cq); config_failed: - ib_dealloc_pd(info->pd); - rdma_destroy_id(info->id); + ib_dealloc_pd(sc->ib.pd); + rdma_destroy_id(sc->rdma.cm_id); create_id_failed: kfree(info); @@ -1734,8 +1762,9 @@ try_again: static int smbd_recv_buf(struct smbd_connection *info, char *buf, unsigned int size) { + struct smbdirect_socket *sc = &info->socket; struct smbd_response *response; - struct smbd_data_transfer *data_transfer; + struct smbdirect_data_transfer *data_transfer; int to_copy, to_read, data_read, offset; u32 data_length, remaining_data_length, data_offset; int rc; @@ -1848,12 +1877,12 @@ read_rfc1002_done: rc = wait_event_interruptible( info->wait_reassembly_queue, info->reassembly_data_length >= size || - info->transport_status != SMBD_CONNECTED); + sc->status != SMBDIRECT_SOCKET_CONNECTED); /* Don't return any data if interrupted */ if (rc) return rc; - if (info->transport_status != SMBD_CONNECTED) { + if (sc->status != SMBDIRECT_SOCKET_CONNECTED) { log_read(ERR, "disconnected\n"); return -ECONNABORTED; } @@ -1871,6 +1900,7 @@ static int smbd_recv_page(struct smbd_connection *info, struct page *page, unsigned int page_offset, unsigned int to_read) { + struct smbdirect_socket *sc = &info->socket; int ret; char *to_address; void *page_address; @@ -1879,7 +1909,7 @@ static int smbd_recv_page(struct smbd_connection *info, ret = wait_event_interruptible( info->wait_reassembly_queue, info->reassembly_data_length >= to_read || - info->transport_status != SMBD_CONNECTED); + sc->status != SMBDIRECT_SOCKET_CONNECTED); if (ret) return ret; @@ -1954,12 +1984,14 @@ int smbd_send(struct TCP_Server_Info *server, int num_rqst, struct smb_rqst *rqst_array) { struct smbd_connection *info = server->smbd_conn; + struct smbdirect_socket *sc = &info->socket; + struct smbdirect_socket_parameters *sp = &sc->parameters; struct smb_rqst *rqst; struct iov_iter iter; unsigned int remaining_data_length, klen; int rc, i, rqst_idx; - if (info->transport_status != SMBD_CONNECTED) + if (sc->status != SMBDIRECT_SOCKET_CONNECTED) return -EAGAIN; /* @@ -1971,10 +2003,10 @@ int smbd_send(struct TCP_Server_Info *server, for (i = 0; i < num_rqst; i++) remaining_data_length += smb_rqst_len(server, &rqst_array[i]); - if (unlikely(remaining_data_length > info->max_fragmented_send_size)) { + if (unlikely(remaining_data_length > sp->max_fragmented_send_size)) { /* assertion: payload never exceeds negotiated maximum */ log_write(ERR, "payload size %d > max size %d\n", - remaining_data_length, info->max_fragmented_send_size); + remaining_data_length, sp->max_fragmented_send_size); return -EINVAL; } @@ -2053,6 +2085,7 @@ static void smbd_mr_recovery_work(struct work_struct *work) { struct smbd_connection *info = container_of(work, struct smbd_connection, mr_recovery_work); + struct smbdirect_socket *sc = &info->socket; struct smbd_mr *smbdirect_mr; int rc; @@ -2070,7 +2103,7 @@ static void smbd_mr_recovery_work(struct work_struct *work) } smbdirect_mr->mr = ib_alloc_mr( - info->pd, info->mr_type, + sc->ib.pd, info->mr_type, info->max_frmr_depth); if (IS_ERR(smbdirect_mr->mr)) { log_rdma_mr(ERR, "ib_alloc_mr failed mr_type=%x max_frmr_depth=%x\n", @@ -2099,12 +2132,13 @@ static void smbd_mr_recovery_work(struct work_struct *work) static void destroy_mr_list(struct smbd_connection *info) { + struct smbdirect_socket *sc = &info->socket; struct smbd_mr *mr, *tmp; cancel_work_sync(&info->mr_recovery_work); list_for_each_entry_safe(mr, tmp, &info->mr_list, list) { if (mr->state == MR_INVALIDATED) - ib_dma_unmap_sg(info->id->device, mr->sgt.sgl, + ib_dma_unmap_sg(sc->ib.dev, mr->sgt.sgl, mr->sgt.nents, mr->dir); ib_dereg_mr(mr->mr); kfree(mr->sgt.sgl); @@ -2121,6 +2155,7 @@ static void destroy_mr_list(struct smbd_connection *info) */ static int allocate_mr_list(struct smbd_connection *info) { + struct smbdirect_socket *sc = &info->socket; int i; struct smbd_mr *smbdirect_mr, *tmp; @@ -2136,7 +2171,7 @@ static int allocate_mr_list(struct smbd_connection *info) smbdirect_mr = kzalloc(sizeof(*smbdirect_mr), GFP_KERNEL); if (!smbdirect_mr) goto cleanup_entries; - smbdirect_mr->mr = ib_alloc_mr(info->pd, info->mr_type, + smbdirect_mr->mr = ib_alloc_mr(sc->ib.pd, info->mr_type, info->max_frmr_depth); if (IS_ERR(smbdirect_mr->mr)) { log_rdma_mr(ERR, "ib_alloc_mr failed mr_type=%x max_frmr_depth=%x\n", @@ -2181,20 +2216,20 @@ cleanup_entries: */ static struct smbd_mr *get_mr(struct smbd_connection *info) { + struct smbdirect_socket *sc = &info->socket; struct smbd_mr *ret; int rc; again: rc = wait_event_interruptible(info->wait_mr, atomic_read(&info->mr_ready_count) || - info->transport_status != SMBD_CONNECTED); + sc->status != SMBDIRECT_SOCKET_CONNECTED); if (rc) { log_rdma_mr(ERR, "wait_event_interruptible rc=%x\n", rc); return NULL; } - if (info->transport_status != SMBD_CONNECTED) { - log_rdma_mr(ERR, "info->transport_status=%x\n", - info->transport_status); + if (sc->status != SMBDIRECT_SOCKET_CONNECTED) { + log_rdma_mr(ERR, "sc->status=%x\n", sc->status); return NULL; } @@ -2247,6 +2282,7 @@ struct smbd_mr *smbd_register_mr(struct smbd_connection *info, struct iov_iter *iter, bool writing, bool need_invalidate) { + struct smbdirect_socket *sc = &info->socket; struct smbd_mr *smbdirect_mr; int rc, num_pages; enum dma_data_direction dir; @@ -2276,7 +2312,7 @@ struct smbd_mr *smbd_register_mr(struct smbd_connection *info, num_pages, iov_iter_count(iter), info->max_frmr_depth); smbd_iter_to_mr(info, iter, &smbdirect_mr->sgt, info->max_frmr_depth); - rc = ib_dma_map_sg(info->id->device, smbdirect_mr->sgt.sgl, + rc = ib_dma_map_sg(sc->ib.dev, smbdirect_mr->sgt.sgl, smbdirect_mr->sgt.nents, dir); if (!rc) { log_rdma_mr(ERR, "ib_dma_map_sg num_pages=%x dir=%x rc=%x\n", @@ -2312,7 +2348,7 @@ struct smbd_mr *smbd_register_mr(struct smbd_connection *info, * on IB_WR_REG_MR. Hardware enforces a barrier and order of execution * on the next ib_post_send when we actually send I/O to remote peer */ - rc = ib_post_send(info->id->qp, ®_wr->wr, NULL); + rc = ib_post_send(sc->ib.qp, ®_wr->wr, NULL); if (!rc) return smbdirect_mr; @@ -2321,7 +2357,7 @@ struct smbd_mr *smbd_register_mr(struct smbd_connection *info, /* If all failed, attempt to recover this MR by setting it MR_ERROR*/ map_mr_error: - ib_dma_unmap_sg(info->id->device, smbdirect_mr->sgt.sgl, + ib_dma_unmap_sg(sc->ib.dev, smbdirect_mr->sgt.sgl, smbdirect_mr->sgt.nents, smbdirect_mr->dir); dma_map_error: @@ -2359,6 +2395,7 @@ int smbd_deregister_mr(struct smbd_mr *smbdirect_mr) { struct ib_send_wr *wr; struct smbd_connection *info = smbdirect_mr->conn; + struct smbdirect_socket *sc = &info->socket; int rc = 0; if (smbdirect_mr->need_invalidate) { @@ -2372,7 +2409,7 @@ int smbd_deregister_mr(struct smbd_mr *smbdirect_mr) wr->send_flags = IB_SEND_SIGNALED; init_completion(&smbdirect_mr->invalidate_done); - rc = ib_post_send(info->id->qp, wr, NULL); + rc = ib_post_send(sc->ib.qp, wr, NULL); if (rc) { log_rdma_mr(ERR, "ib_post_send failed rc=%x\n", rc); smbd_disconnect_rdma_connection(info); @@ -2389,7 +2426,7 @@ int smbd_deregister_mr(struct smbd_mr *smbdirect_mr) if (smbdirect_mr->state == MR_INVALIDATED) { ib_dma_unmap_sg( - info->id->device, smbdirect_mr->sgt.sgl, + sc->ib.dev, smbdirect_mr->sgt.sgl, smbdirect_mr->sgt.nents, smbdirect_mr->dir); smbdirect_mr->state = MR_READY; diff --git a/fs/smb/client/smbdirect.h b/fs/smb/client/smbdirect.h index c08e3665150d..75b3f491c3ad 100644 --- a/fs/smb/client/smbdirect.h +++ b/fs/smb/client/smbdirect.h @@ -15,6 +15,9 @@ #include <rdma/rdma_cm.h> #include <linux/mempool.h> +#include "../common/smbdirect/smbdirect.h" +#include "../common/smbdirect/smbdirect_socket.h" + extern int rdma_readwrite_threshold; extern int smbd_max_frmr_depth; extern int smbd_keep_alive_interval; @@ -50,14 +53,8 @@ enum smbd_connection_status { * 5. mempools for allocating packets */ struct smbd_connection { - enum smbd_connection_status transport_status; - - /* RDMA related */ - struct rdma_cm_id *id; - struct ib_qp_init_attr qp_attr; - struct ib_pd *pd; - struct ib_cq *send_cq, *recv_cq; - struct ib_device_attr dev_attr; + struct smbdirect_socket socket; + int ri_rc; struct completion ri_done; wait_queue_head_t conn_wait; @@ -72,15 +69,7 @@ struct smbd_connection { spinlock_t lock_new_credits_offered; int new_credits_offered; - /* Connection parameters defined in [MS-SMBD] 3.1.1.1 */ - int receive_credit_max; - int send_credit_target; - int max_send_size; - int max_fragmented_recv_size; - int max_fragmented_send_size; - int max_receive_size; - int keep_alive_interval; - int max_readwrite_size; + /* dynamic connection parameters defined in [MS-SMBD] 3.1.1.1 */ enum keep_alive_status keep_alive_requested; int protocol; atomic_t send_credits; @@ -177,54 +166,6 @@ enum smbd_message_type { SMBD_TRANSFER_DATA, }; -#define SMB_DIRECT_RESPONSE_REQUESTED 0x0001 - -/* SMBD negotiation request packet [MS-SMBD] 2.2.1 */ -struct smbd_negotiate_req { - __le16 min_version; - __le16 max_version; - __le16 reserved; - __le16 credits_requested; - __le32 preferred_send_size; - __le32 max_receive_size; - __le32 max_fragmented_size; -} __packed; - -/* SMBD negotiation response packet [MS-SMBD] 2.2.2 */ -struct smbd_negotiate_resp { - __le16 min_version; - __le16 max_version; - __le16 negotiated_version; - __le16 reserved; - __le16 credits_requested; - __le16 credits_granted; - __le32 status; - __le32 max_readwrite_size; - __le32 preferred_send_size; - __le32 max_receive_size; - __le32 max_fragmented_size; -} __packed; - -/* SMBD data transfer packet with payload [MS-SMBD] 2.2.3 */ -struct smbd_data_transfer { - __le16 credits_requested; - __le16 credits_granted; - __le16 flags; - __le16 reserved; - __le32 remaining_data_length; - __le32 data_offset; - __le32 data_length; - __le32 padding; - __u8 buffer[]; -} __packed; - -/* The packet fields for a registered RDMA buffer */ -struct smbd_buffer_descriptor_v1 { - __le64 offset; - __le32 token; - __le32 length; -} __packed; - /* Maximum number of SGEs used by smbdirect.c in any send work request */ #define SMBDIRECT_MAX_SEND_SGE 6 diff --git a/fs/smb/client/transport.c b/fs/smb/client/transport.c index 266af17aa7d9..191783f553ce 100644 --- a/fs/smb/client/transport.c +++ b/fs/smb/client/transport.c @@ -1018,14 +1018,16 @@ struct TCP_Server_Info *cifs_pick_channel(struct cifs_ses *ses) uint index = 0; unsigned int min_in_flight = UINT_MAX, max_in_flight = 0; struct TCP_Server_Info *server = NULL; - int i; + int i, start, cur; if (!ses) return NULL; spin_lock(&ses->chan_lock); + start = atomic_inc_return(&ses->chan_seq); for (i = 0; i < ses->chan_count; i++) { - server = ses->chans[i].server; + cur = (start + i) % ses->chan_count; + server = ses->chans[cur].server; if (!server || server->terminate) continue; @@ -1042,17 +1044,15 @@ struct TCP_Server_Info *cifs_pick_channel(struct cifs_ses *ses) */ if (server->in_flight < min_in_flight) { min_in_flight = server->in_flight; - index = i; + index = cur; } if (server->in_flight > max_in_flight) max_in_flight = server->in_flight; } /* if all channels are equally loaded, fall back to round-robin */ - if (min_in_flight == max_in_flight) { - index = (uint)atomic_inc_return(&ses->chan_seq); - index %= ses->chan_count; - } + if (min_in_flight == max_in_flight) + index = (uint)start % ses->chan_count; server = ses->chans[index].server; spin_unlock(&ses->chan_lock); diff --git a/fs/smb/common/smbdirect/smbdirect.h b/fs/smb/common/smbdirect/smbdirect.h new file mode 100644 index 000000000000..b9a385344ff3 --- /dev/null +++ b/fs/smb/common/smbdirect/smbdirect.h @@ -0,0 +1,37 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* + * Copyright (C) 2017, Microsoft Corporation. + * Copyright (C) 2018, LG Electronics. + */ + +#ifndef __FS_SMB_COMMON_SMBDIRECT_SMBDIRECT_H__ +#define __FS_SMB_COMMON_SMBDIRECT_SMBDIRECT_H__ + +/* SMB-DIRECT buffer descriptor V1 structure [MS-SMBD] 2.2.3.1 */ +struct smbdirect_buffer_descriptor_v1 { + __le64 offset; + __le32 token; + __le32 length; +} __packed; + +/* + * Connection parameters mostly from [MS-SMBD] 3.1.1.1 + * + * These are setup and negotiated at the beginning of a + * connection and remain constant unless explicitly changed. + * + * Some values are important for the upper layer. + */ +struct smbdirect_socket_parameters { + __u16 recv_credit_max; + __u16 send_credit_target; + __u32 max_send_size; + __u32 max_fragmented_send_size; + __u32 max_recv_size; + __u32 max_fragmented_recv_size; + __u32 max_read_write_size; + __u32 keepalive_interval_msec; + __u32 keepalive_timeout_msec; +} __packed; + +#endif /* __FS_SMB_COMMON_SMBDIRECT_SMBDIRECT_H__ */ diff --git a/fs/smb/common/smbdirect/smbdirect_pdu.h b/fs/smb/common/smbdirect/smbdirect_pdu.h new file mode 100644 index 000000000000..ae9fdb05ce23 --- /dev/null +++ b/fs/smb/common/smbdirect/smbdirect_pdu.h @@ -0,0 +1,55 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* + * Copyright (c) 2017 Stefan Metzmacher + */ + +#ifndef __FS_SMB_COMMON_SMBDIRECT_SMBDIRECT_PDU_H__ +#define __FS_SMB_COMMON_SMBDIRECT_SMBDIRECT_PDU_H__ + +#define SMBDIRECT_V1 0x0100 + +/* SMBD negotiation request packet [MS-SMBD] 2.2.1 */ +struct smbdirect_negotiate_req { + __le16 min_version; + __le16 max_version; + __le16 reserved; + __le16 credits_requested; + __le32 preferred_send_size; + __le32 max_receive_size; + __le32 max_fragmented_size; +} __packed; + +/* SMBD negotiation response packet [MS-SMBD] 2.2.2 */ +struct smbdirect_negotiate_resp { + __le16 min_version; + __le16 max_version; + __le16 negotiated_version; + __le16 reserved; + __le16 credits_requested; + __le16 credits_granted; + __le32 status; + __le32 max_readwrite_size; + __le32 preferred_send_size; + __le32 max_receive_size; + __le32 max_fragmented_size; +} __packed; + +#define SMBDIRECT_DATA_MIN_HDR_SIZE 0x14 +#define SMBDIRECT_DATA_OFFSET 0x18 + +#define SMBDIRECT_FLAG_RESPONSE_REQUESTED 0x0001 + +/* SMBD data transfer packet with payload [MS-SMBD] 2.2.3 */ +struct smbdirect_data_transfer { + __le16 credits_requested; + __le16 credits_granted; + __le16 flags; + __le16 reserved; + __le32 remaining_data_length; + __le32 data_offset; + __le32 data_length; + __le32 padding; + __u8 buffer[]; +} __packed; + +#endif /* __FS_SMB_COMMON_SMBDIRECT_SMBDIRECT_PDU_H__ */ diff --git a/fs/smb/common/smbdirect/smbdirect_socket.h b/fs/smb/common/smbdirect/smbdirect_socket.h new file mode 100644 index 000000000000..e5b15cc44a7b --- /dev/null +++ b/fs/smb/common/smbdirect/smbdirect_socket.h @@ -0,0 +1,43 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* + * Copyright (c) 2025 Stefan Metzmacher + */ + +#ifndef __FS_SMB_COMMON_SMBDIRECT_SMBDIRECT_SOCKET_H__ +#define __FS_SMB_COMMON_SMBDIRECT_SMBDIRECT_SOCKET_H__ + +enum smbdirect_socket_status { + SMBDIRECT_SOCKET_CREATED, + SMBDIRECT_SOCKET_CONNECTING, + SMBDIRECT_SOCKET_CONNECTED, + SMBDIRECT_SOCKET_NEGOTIATE_FAILED, + SMBDIRECT_SOCKET_DISCONNECTING, + SMBDIRECT_SOCKET_DISCONNECTED, + SMBDIRECT_SOCKET_DESTROYED +}; + +struct smbdirect_socket { + enum smbdirect_socket_status status; + + /* RDMA related */ + struct { + struct rdma_cm_id *cm_id; + } rdma; + + /* IB verbs related */ + struct { + struct ib_pd *pd; + struct ib_cq *send_cq; + struct ib_cq *recv_cq; + + /* + * shortcuts for rdma.cm_id->{qp,device}; + */ + struct ib_qp *qp; + struct ib_device *dev; + } ib; + + struct smbdirect_socket_parameters parameters; +}; + +#endif /* __FS_SMB_COMMON_SMBDIRECT_SMBDIRECT_SOCKET_H__ */ diff --git a/include/linux/mount.h b/include/linux/mount.h index d3ee0e5162f0..4880f434c021 100644 --- a/include/linux/mount.h +++ b/include/linux/mount.h @@ -65,7 +65,8 @@ enum mount_flags { MNT_ATIME_MASK = MNT_NOATIME | MNT_NODIRATIME | MNT_RELATIME, MNT_INTERNAL_FLAGS = MNT_SHARED | MNT_WRITE_HOLD | MNT_INTERNAL | - MNT_DOOMED | MNT_SYNC_UMOUNT | MNT_MARKED, + MNT_DOOMED | MNT_SYNC_UMOUNT | MNT_MARKED | + MNT_LOCKED, }; struct vfsmount { diff --git a/include/linux/time_namespace.h b/include/linux/time_namespace.h index 0b8b32bf0655..bb2c52f4fc94 100644 --- a/include/linux/time_namespace.h +++ b/include/linux/time_namespace.h @@ -12,6 +12,7 @@ struct user_namespace; extern struct user_namespace init_user_ns; +struct seq_file; struct vm_area_struct; struct timens_offsets { diff --git a/include/trace/events/power.h b/include/trace/events/power.h index 9253e83b9bb4..6c631eec23e3 100644 --- a/include/trace/events/power.h +++ b/include/trace/events/power.h @@ -338,53 +338,6 @@ DEFINE_EVENT(wakeup_source, wakeup_source_deactivate, ); /* - * The clock events are used for clock enable/disable and for - * clock rate change - */ -DECLARE_EVENT_CLASS(clock, - - TP_PROTO(const char *name, unsigned int state, unsigned int cpu_id), - - TP_ARGS(name, state, cpu_id), - - TP_STRUCT__entry( - __string( name, name ) - __field( u64, state ) - __field( u64, cpu_id ) - ), - - TP_fast_assign( - __assign_str(name); - __entry->state = state; - __entry->cpu_id = cpu_id; - ), - - TP_printk("%s state=%lu cpu_id=%lu", __get_str(name), - (unsigned long)__entry->state, (unsigned long)__entry->cpu_id) -); - -DEFINE_EVENT(clock, clock_enable, - - TP_PROTO(const char *name, unsigned int state, unsigned int cpu_id), - - TP_ARGS(name, state, cpu_id) -); - -DEFINE_EVENT(clock, clock_disable, - - TP_PROTO(const char *name, unsigned int state, unsigned int cpu_id), - - TP_ARGS(name, state, cpu_id) -); - -DEFINE_EVENT(clock, clock_set_rate, - - TP_PROTO(const char *name, unsigned int state, unsigned int cpu_id), - - TP_ARGS(name, state, cpu_id) -); - -/* * The power domain events are used for power domains transitions */ DECLARE_EVENT_CLASS(power_domain, diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c index e24509bd0af5..00fc38d70e86 100644 --- a/kernel/trace/ring_buffer.c +++ b/kernel/trace/ring_buffer.c @@ -6795,7 +6795,7 @@ int ring_buffer_subbuf_order_set(struct trace_buffer *buffer, int order) old_size = buffer->subbuf_size; /* prevent another thread from changing buffer sizes */ - mutex_lock(&buffer->mutex); + guard(mutex)(&buffer->mutex); atomic_inc(&buffer->record_disabled); /* Make sure all commits have finished */ @@ -6900,7 +6900,6 @@ int ring_buffer_subbuf_order_set(struct trace_buffer *buffer, int order) } atomic_dec(&buffer->record_disabled); - mutex_unlock(&buffer->mutex); return 0; @@ -6909,7 +6908,6 @@ error: buffer->subbuf_size = old_size; atomic_dec(&buffer->record_disabled); - mutex_unlock(&buffer->mutex); for_each_buffer_cpu(buffer, cpu) { cpu_buffer = buffer->buffers[cpu]; diff --git a/kernel/trace/trace_events_filter.c b/kernel/trace/trace_events_filter.c index 2048560264bb..ea8b364b6818 100644 --- a/kernel/trace/trace_events_filter.c +++ b/kernel/trace/trace_events_filter.c @@ -1250,7 +1250,9 @@ static void append_filter_err(struct trace_array *tr, static inline struct event_filter *event_filter(struct trace_event_file *file) { - return file->filter; + return rcu_dereference_protected(file->filter, + lockdep_is_held(&event_mutex)); + } /* caller must hold event_mutex */ @@ -1320,7 +1322,7 @@ void free_event_filter(struct event_filter *filter) static inline void __remove_filter(struct trace_event_file *file) { filter_disable(file); - remove_filter_string(file->filter); + remove_filter_string(event_filter(file)); } static void filter_free_subsystem_preds(struct trace_subsystem_dir *dir, @@ -1335,22 +1337,139 @@ static void filter_free_subsystem_preds(struct trace_subsystem_dir *dir, } } +struct filter_list { + struct list_head list; + struct event_filter *filter; +}; + +struct filter_head { + struct list_head list; + struct rcu_head rcu; +}; + + +static void free_filter_list(struct rcu_head *rhp) +{ + struct filter_head *filter_list = container_of(rhp, struct filter_head, rcu); + struct filter_list *filter_item, *tmp; + + list_for_each_entry_safe(filter_item, tmp, &filter_list->list, list) { + __free_filter(filter_item->filter); + list_del(&filter_item->list); + kfree(filter_item); + } + kfree(filter_list); +} + +static void free_filter_list_tasks(struct rcu_head *rhp) +{ + call_rcu(rhp, free_filter_list); +} + +/* + * The tracepoint_synchronize_unregister() is a double rcu call. + * It calls synchronize_rcu_tasks_trace() followed by synchronize_rcu(). + * Instead of waiting for it, simply call these via the call_rcu*() + * variants. + */ +static void delay_free_filter(struct filter_head *head) +{ + call_rcu_tasks_trace(&head->rcu, free_filter_list_tasks); +} + +static void try_delay_free_filter(struct event_filter *filter) +{ + struct filter_head *head; + struct filter_list *item; + + head = kmalloc(sizeof(*head), GFP_KERNEL); + if (!head) + goto free_now; + + INIT_LIST_HEAD(&head->list); + + item = kmalloc(sizeof(*item), GFP_KERNEL); + if (!item) { + kfree(head); + goto free_now; + } + + item->filter = filter; + list_add_tail(&item->list, &head->list); + delay_free_filter(head); + return; + + free_now: + /* Make sure the filter is not being used */ + tracepoint_synchronize_unregister(); + __free_filter(filter); +} + static inline void __free_subsystem_filter(struct trace_event_file *file) { - __free_filter(file->filter); + __free_filter(event_filter(file)); file->filter = NULL; } +static inline void event_set_filter(struct trace_event_file *file, + struct event_filter *filter) +{ + rcu_assign_pointer(file->filter, filter); +} + +static inline void event_clear_filter(struct trace_event_file *file) +{ + RCU_INIT_POINTER(file->filter, NULL); +} + static void filter_free_subsystem_filters(struct trace_subsystem_dir *dir, - struct trace_array *tr) + struct trace_array *tr, + struct event_filter *filter) { struct trace_event_file *file; + struct filter_head *head; + struct filter_list *item; + + head = kmalloc(sizeof(*head), GFP_KERNEL); + if (!head) + goto free_now; + + INIT_LIST_HEAD(&head->list); + + item = kmalloc(sizeof(*item), GFP_KERNEL); + if (!item) { + kfree(head); + goto free_now; + } + + item->filter = filter; + list_add_tail(&item->list, &head->list); list_for_each_entry(file, &tr->events, list) { if (file->system != dir) continue; + item = kmalloc(sizeof(*item), GFP_KERNEL); + if (!item) + goto free_now; + item->filter = event_filter(file); + list_add_tail(&item->list, &head->list); + event_clear_filter(file); + } + + delay_free_filter(head); + return; + free_now: + tracepoint_synchronize_unregister(); + + if (head) + free_filter_list(&head->rcu); + + list_for_each_entry(file, &tr->events, list) { + if (file->system != dir || !file->filter) + continue; __free_subsystem_filter(file); } + __free_filter(filter); } int filter_assign_type(const char *type) @@ -2120,22 +2239,6 @@ static inline void event_set_filtered_flag(struct trace_event_file *file) trace_buffered_event_enable(); } -static inline void event_set_filter(struct trace_event_file *file, - struct event_filter *filter) -{ - rcu_assign_pointer(file->filter, filter); -} - -static inline void event_clear_filter(struct trace_event_file *file) -{ - RCU_INIT_POINTER(file->filter, NULL); -} - -struct filter_list { - struct list_head list; - struct event_filter *filter; -}; - static int process_system_preds(struct trace_subsystem_dir *dir, struct trace_array *tr, struct filter_parse_error *pe, @@ -2144,11 +2247,16 @@ static int process_system_preds(struct trace_subsystem_dir *dir, struct trace_event_file *file; struct filter_list *filter_item; struct event_filter *filter = NULL; - struct filter_list *tmp; - LIST_HEAD(filter_list); + struct filter_head *filter_list; bool fail = true; int err; + filter_list = kmalloc(sizeof(*filter_list), GFP_KERNEL); + if (!filter_list) + return -ENOMEM; + + INIT_LIST_HEAD(&filter_list->list); + list_for_each_entry(file, &tr->events, list) { if (file->system != dir) @@ -2175,7 +2283,7 @@ static int process_system_preds(struct trace_subsystem_dir *dir, if (!filter_item) goto fail_mem; - list_add_tail(&filter_item->list, &filter_list); + list_add_tail(&filter_item->list, &filter_list->list); /* * Regardless of if this returned an error, we still * replace the filter for the call. @@ -2195,31 +2303,22 @@ static int process_system_preds(struct trace_subsystem_dir *dir, * Do a synchronize_rcu() and to ensure all calls are * done with them before we free them. */ - tracepoint_synchronize_unregister(); - list_for_each_entry_safe(filter_item, tmp, &filter_list, list) { - __free_filter(filter_item->filter); - list_del(&filter_item->list); - kfree(filter_item); - } + delay_free_filter(filter_list); return 0; fail: /* No call succeeded */ - list_for_each_entry_safe(filter_item, tmp, &filter_list, list) { - list_del(&filter_item->list); - kfree(filter_item); - } + free_filter_list(&filter_list->rcu); parse_error(pe, FILT_ERR_BAD_SUBSYS_FILTER, 0); return -EINVAL; fail_mem: __free_filter(filter); + /* If any call succeeded, we still need to sync */ if (!fail) - tracepoint_synchronize_unregister(); - list_for_each_entry_safe(filter_item, tmp, &filter_list, list) { - __free_filter(filter_item->filter); - list_del(&filter_item->list); - kfree(filter_item); - } + delay_free_filter(filter_list); + else + free_filter_list(&filter_list->rcu); + return -ENOMEM; } @@ -2361,9 +2460,7 @@ int apply_event_filter(struct trace_event_file *file, char *filter_string) event_clear_filter(file); - /* Make sure the filter is not being used */ - tracepoint_synchronize_unregister(); - __free_filter(filter); + try_delay_free_filter(filter); return 0; } @@ -2387,11 +2484,8 @@ int apply_event_filter(struct trace_event_file *file, char *filter_string) event_set_filter(file, filter); - if (tmp) { - /* Make sure the call is done with the filter */ - tracepoint_synchronize_unregister(); - __free_filter(tmp); - } + if (tmp) + try_delay_free_filter(tmp); } return err; @@ -2417,9 +2511,7 @@ int apply_subsystem_event_filter(struct trace_subsystem_dir *dir, filter = system->filter; system->filter = NULL; /* Ensure all filters are no longer used */ - tracepoint_synchronize_unregister(); - filter_free_subsystem_filters(dir, tr); - __free_filter(filter); + filter_free_subsystem_filters(dir, tr, filter); return 0; } diff --git a/tools/testing/selftests/mount_setattr/mount_setattr_test.c b/tools/testing/selftests/mount_setattr/mount_setattr_test.c index 8b378c91debf..b1e4618399be 100644 --- a/tools/testing/selftests/mount_setattr/mount_setattr_test.c +++ b/tools/testing/selftests/mount_setattr/mount_setattr_test.c @@ -2079,24 +2079,9 @@ TEST_F(mount_setattr, detached_tree_propagation) * means that the device information will be different for any * statx() that was taken from /mnt/A before the mount compared * to one after the mount. - * - * Since we already now that the device information between the - * stx1 and stx2 samples are identical we also now that stx2 and - * stx3 device information will necessarily differ. */ ASSERT_NE(stx1.stx_dev_minor, stx3.stx_dev_minor); - - /* - * If mount propagation worked correctly then the tmpfs mount - * that was created after the mount namespace was unshared will - * have propagated onto /mnt/A in the detached mount tree. - * - * Verify that the device information for stx3 and stx4 are - * identical. It is already established that stx3 is different - * from both stx1 and stx2 sampled before the tmpfs mount was - * done so if stx3 and stx4 are identical the proof is done. - */ - ASSERT_EQ(stx3.stx_dev_minor, stx4.stx_dev_minor); + ASSERT_EQ(stx1.stx_dev_minor, stx4.stx_dev_minor); EXPECT_EQ(close(fd_tree), 0); } |