From f4bd73b5a950866f6c6fc98a7b684d307c5d586a Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Tue, 28 Sep 2021 09:42:27 +0900 Subject: af_unix: Return errno instead of NULL in unix_create1(). unix_create1() returns NULL on error, and the callers assume that it never fails for reasons other than out of memory. So, the callers always return -ENOMEM when unix_create1() fails. However, it also returns NULL when the number of af_unix sockets exceeds twice the limit controlled by sysctl: fs.file-max. In this case, the callers should return -ENFILE like alloc_empty_file(). This patch changes unix_create1() to return the correct error value instead of NULL on error. Out of curiosity, the assumption has been wrong since 1999 due to this change introduced in 2.2.4 [0]. diff -u --recursive --new-file v2.2.3/linux/net/unix/af_unix.c linux/net/unix/af_unix.c --- v2.2.3/linux/net/unix/af_unix.c Tue Jan 19 11:32:53 1999 +++ linux/net/unix/af_unix.c Sun Mar 21 07:22:00 1999 @@ -388,6 +413,9 @@ { struct sock *sk; + if (atomic_read(&unix_nr_socks) >= 2*max_files) + return NULL; + MOD_INC_USE_COUNT; sk = sk_alloc(PF_UNIX, GFP_KERNEL, 1); if (!sk) { [0]: https://cdn.kernel.org/pub/linux/kernel/v2.2/patch-2.2.4.gz Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Signed-off-by: Kuniyuki Iwashima Signed-off-by: David S. Miller --- net/unix/af_unix.c | 49 ++++++++++++++++++++++++++++++++----------------- 1 file changed, 32 insertions(+), 17 deletions(-) (limited to 'net/unix/af_unix.c') diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index 92345c9bb60c..f505b89bda6a 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -828,20 +828,25 @@ struct proto unix_stream_proto = { static struct sock *unix_create1(struct net *net, struct socket *sock, int kern, int type) { - struct sock *sk = NULL; struct unix_sock *u; + struct sock *sk; + int err; atomic_long_inc(&unix_nr_socks); - if (atomic_long_read(&unix_nr_socks) > 2 * get_max_files()) - goto out; + if (atomic_long_read(&unix_nr_socks) > 2 * get_max_files()) { + err = -ENFILE; + goto err; + } if (type == SOCK_STREAM) sk = sk_alloc(net, PF_UNIX, GFP_KERNEL, &unix_stream_proto, kern); else /*dgram and seqpacket */ sk = sk_alloc(net, PF_UNIX, GFP_KERNEL, &unix_dgram_proto, kern); - if (!sk) - goto out; + if (!sk) { + err = -ENOMEM; + goto err; + } sock_init_data(sock, sk); @@ -861,20 +866,23 @@ static struct sock *unix_create1(struct net *net, struct socket *sock, int kern, init_waitqueue_func_entry(&u->peer_wake, unix_dgram_peer_wake_relay); memset(&u->scm_stat, 0, sizeof(struct scm_stat)); unix_insert_socket(unix_sockets_unbound(sk), sk); -out: - if (sk == NULL) - atomic_long_dec(&unix_nr_socks); - else { - local_bh_disable(); - sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1); - local_bh_enable(); - } + + local_bh_disable(); + sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1); + local_bh_enable(); + return sk; + +err: + atomic_long_dec(&unix_nr_socks); + return ERR_PTR(err); } static int unix_create(struct net *net, struct socket *sock, int protocol, int kern) { + struct sock *sk; + if (protocol && protocol != PF_UNIX) return -EPROTONOSUPPORT; @@ -901,7 +909,11 @@ static int unix_create(struct net *net, struct socket *sock, int protocol, return -ESOCKTNOSUPPORT; } - return unix_create1(net, sock, kern, sock->type) ? 0 : -ENOMEM; + sk = unix_create1(net, sock, kern, sock->type); + if (IS_ERR(sk)) + return PTR_ERR(sk); + + return 0; } static int unix_release(struct socket *sock) @@ -1314,12 +1326,15 @@ static int unix_stream_connect(struct socket *sock, struct sockaddr *uaddr, we will have to recheck all again in any case. */ - err = -ENOMEM; - /* create new sock for complete connection */ newsk = unix_create1(sock_net(sk), NULL, 0, sock->type); - if (newsk == NULL) + if (IS_ERR(newsk)) { + err = PTR_ERR(newsk); + newsk = NULL; goto out; + } + + err = -ENOMEM; /* Allocate skb for sending to listening sock */ skb = sock_wmalloc(newsk, 1, 0, GFP_KERNEL); -- cgit v1.2.3 From 35306eb23814444bd4021f8a1c3047d3cb0c8b2b Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 29 Sep 2021 15:57:50 -0700 Subject: af_unix: fix races in sk_peer_pid and sk_peer_cred accesses Jann Horn reported that SO_PEERCRED and SO_PEERGROUPS implementations are racy, as af_unix can concurrently change sk_peer_pid and sk_peer_cred. In order to fix this issue, this patch adds a new spinlock that needs to be used whenever these fields are read or written. Jann also pointed out that l2cap_sock_get_peer_pid_cb() is currently reading sk->sk_peer_pid which makes no sense, as this field is only possibly set by AF_UNIX sockets. We will have to clean this in a separate patch. This could be done by reverting b48596d1dc25 "Bluetooth: L2CAP: Add get_peer_pid callback" or implementing what was truly expected. Fixes: 109f6e39fa07 ("af_unix: Allow SO_PEERCRED to work across namespaces.") Signed-off-by: Eric Dumazet Reported-by: Jann Horn Cc: Eric W. Biederman Cc: Luiz Augusto von Dentz Cc: Marcel Holtmann Signed-off-by: David S. Miller --- include/net/sock.h | 2 ++ net/core/sock.c | 32 ++++++++++++++++++++++++++------ net/unix/af_unix.c | 34 ++++++++++++++++++++++++++++------ 3 files changed, 56 insertions(+), 12 deletions(-) (limited to 'net/unix/af_unix.c') diff --git a/include/net/sock.h b/include/net/sock.h index dc3f8169312e..ae929e21a376 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -488,8 +488,10 @@ struct sock { u8 sk_prefer_busy_poll; u16 sk_busy_poll_budget; #endif + spinlock_t sk_peer_lock; struct pid *sk_peer_pid; const struct cred *sk_peer_cred; + long sk_rcvtimeo; ktime_t sk_stamp; #if BITS_PER_LONG==32 diff --git a/net/core/sock.c b/net/core/sock.c index 7060d183216e..c1601f75ec4b 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -1376,6 +1376,16 @@ set_sndbuf: } EXPORT_SYMBOL(sock_setsockopt); +static const struct cred *sk_get_peer_cred(struct sock *sk) +{ + const struct cred *cred; + + spin_lock(&sk->sk_peer_lock); + cred = get_cred(sk->sk_peer_cred); + spin_unlock(&sk->sk_peer_lock); + + return cred; +} static void cred_to_ucred(struct pid *pid, const struct cred *cred, struct ucred *ucred) @@ -1552,7 +1562,11 @@ int sock_getsockopt(struct socket *sock, int level, int optname, struct ucred peercred; if (len > sizeof(peercred)) len = sizeof(peercred); + + spin_lock(&sk->sk_peer_lock); cred_to_ucred(sk->sk_peer_pid, sk->sk_peer_cred, &peercred); + spin_unlock(&sk->sk_peer_lock); + if (copy_to_user(optval, &peercred, len)) return -EFAULT; goto lenout; @@ -1560,20 +1574,23 @@ int sock_getsockopt(struct socket *sock, int level, int optname, case SO_PEERGROUPS: { + const struct cred *cred; int ret, n; - if (!sk->sk_peer_cred) + cred = sk_get_peer_cred(sk); + if (!cred) return -ENODATA; - n = sk->sk_peer_cred->group_info->ngroups; + n = cred->group_info->ngroups; if (len < n * sizeof(gid_t)) { len = n * sizeof(gid_t); + put_cred(cred); return put_user(len, optlen) ? -EFAULT : -ERANGE; } len = n * sizeof(gid_t); - ret = groups_to_user((gid_t __user *)optval, - sk->sk_peer_cred->group_info); + ret = groups_to_user((gid_t __user *)optval, cred->group_info); + put_cred(cred); if (ret) return ret; goto lenout; @@ -1935,9 +1952,10 @@ static void __sk_destruct(struct rcu_head *head) sk->sk_frag.page = NULL; } - if (sk->sk_peer_cred) - put_cred(sk->sk_peer_cred); + /* We do not need to acquire sk->sk_peer_lock, we are the last user. */ + put_cred(sk->sk_peer_cred); put_pid(sk->sk_peer_pid); + if (likely(sk->sk_net_refcnt)) put_net(sock_net(sk)); sk_prot_free(sk->sk_prot_creator, sk); @@ -3145,6 +3163,8 @@ void sock_init_data(struct socket *sock, struct sock *sk) sk->sk_peer_pid = NULL; sk->sk_peer_cred = NULL; + spin_lock_init(&sk->sk_peer_lock); + sk->sk_write_pending = 0; sk->sk_rcvlowat = 1; sk->sk_rcvtimeo = MAX_SCHEDULE_TIMEOUT; diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index f505b89bda6a..efac5989edb5 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -608,20 +608,42 @@ static void unix_release_sock(struct sock *sk, int embrion) static void init_peercred(struct sock *sk) { - put_pid(sk->sk_peer_pid); - if (sk->sk_peer_cred) - put_cred(sk->sk_peer_cred); + const struct cred *old_cred; + struct pid *old_pid; + + spin_lock(&sk->sk_peer_lock); + old_pid = sk->sk_peer_pid; + old_cred = sk->sk_peer_cred; sk->sk_peer_pid = get_pid(task_tgid(current)); sk->sk_peer_cred = get_current_cred(); + spin_unlock(&sk->sk_peer_lock); + + put_pid(old_pid); + put_cred(old_cred); } static void copy_peercred(struct sock *sk, struct sock *peersk) { - put_pid(sk->sk_peer_pid); - if (sk->sk_peer_cred) - put_cred(sk->sk_peer_cred); + const struct cred *old_cred; + struct pid *old_pid; + + if (sk < peersk) { + spin_lock(&sk->sk_peer_lock); + spin_lock_nested(&peersk->sk_peer_lock, SINGLE_DEPTH_NESTING); + } else { + spin_lock(&peersk->sk_peer_lock); + spin_lock_nested(&sk->sk_peer_lock, SINGLE_DEPTH_NESTING); + } + old_pid = sk->sk_peer_pid; + old_cred = sk->sk_peer_cred; sk->sk_peer_pid = get_pid(peersk->sk_peer_pid); sk->sk_peer_cred = get_cred(peersk->sk_peer_cred); + + spin_unlock(&sk->sk_peer_lock); + spin_unlock(&peersk->sk_peer_lock); + + put_pid(old_pid); + put_cred(old_cred); } static int unix_listen(struct socket *sock, int backlog) -- cgit v1.2.3 From d0c6416bd7091647f6041599f396bfa19ae30368 Mon Sep 17 00:00:00 2001 From: Jiang Wang Date: Mon, 4 Oct 2021 23:25:28 +0000 Subject: unix: Fix an issue in unix_shutdown causing the other end read/write failures Commit 94531cfcbe79 ("af_unix: Add unix_stream_proto for sockmap") sets unix domain socket peer state to TCP_CLOSE in unix_shutdown. This could happen when the local end is shutdown but the other end is not. Then, the other end will get read or write failures which is not expected. Fix the issue by setting the local state to shutdown. Fixes: 94531cfcbe79 ("af_unix: Add unix_stream_proto for sockmap") Reported-by: Casey Schaufler Suggested-by: Cong Wang Signed-off-by: Jiang Wang Signed-off-by: Daniel Borkmann Tested-by: Casey Schaufler Reviewed-by: Casey Schaufler Acked-by: Song Liu Link: https://lore.kernel.org/bpf/20211004232530.2377085-1-jiang.wang@bytedance.com --- net/unix/af_unix.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) (limited to 'net/unix/af_unix.c') diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index f505b89bda6a..915afcae6a12 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -2860,6 +2860,9 @@ static int unix_shutdown(struct socket *sock, int mode) unix_state_lock(sk); sk->sk_shutdown |= mode; + if ((sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET) && + mode == SHUTDOWN_MASK) + sk->sk_state = TCP_CLOSE; other = unix_peer(sk); if (other) sock_hold(other); @@ -2882,12 +2885,10 @@ static int unix_shutdown(struct socket *sock, int mode) other->sk_shutdown |= peer_mode; unix_state_unlock(other); other->sk_state_change(other); - if (peer_mode == SHUTDOWN_MASK) { + if (peer_mode == SHUTDOWN_MASK) sk_wake_async(other, SOCK_WAKE_WAITD, POLL_HUP); - other->sk_state = TCP_CLOSE; - } else if (peer_mode & RCV_SHUTDOWN) { + else if (peer_mode & RCV_SHUTDOWN) sk_wake_async(other, SOCK_WAKE_WAITD, POLL_IN); - } } if (other) sock_put(other); -- cgit v1.2.3 From 0edf0824e0dc359ed76bf96af986e6570ca2c0b9 Mon Sep 17 00:00:00 2001 From: Stephen Boyd Date: Fri, 8 Oct 2021 14:59:45 -0700 Subject: af_unix: Rename UNIX-DGRAM to UNIX to maintain backwards compatability Then name of this protocol changed in commit 94531cfcbe79 ("af_unix: Add unix_stream_proto for sockmap") because that commit added stream support to the af_unix protocol. Renaming the existing protocol makes a ChromeOS protocol test[1] fail now that the name has changed in /proc/net/protocols from "UNIX" to "UNIX-DGRAM". Let's put the name back to how it was while keeping the stream protocol as "UNIX-STREAM" so that the procfs interface doesn't change. This fixes the test and maintains backwards compatibility in proc. Cc: Jiang Wang Cc: Andrii Nakryiko Cc: Cong Wang Cc: Jakub Sitnicki Cc: John Fastabend Cc: Dmitry Osipenko Link: https://source.chromium.org/chromiumos/chromiumos/codesearch/+/main:src/platform/tast-tests/src/chromiumos/tast/local/bundles/cros/network/supported_protocols.go;l=50;drc=e8b1c3f94cb40a054f4aa1ef1aff61e75dc38f18 [1] Fixes: 94531cfcbe79 ("af_unix: Add unix_stream_proto for sockmap") Signed-off-by: Stephen Boyd Signed-off-by: David S. Miller --- net/unix/af_unix.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net/unix/af_unix.c') diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index 0878ab86597b..89f9e85ae970 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -828,7 +828,7 @@ static void unix_unhash(struct sock *sk) } struct proto unix_dgram_proto = { - .name = "UNIX-DGRAM", + .name = "UNIX", .owner = THIS_MODULE, .obj_size = sizeof(struct unix_sock), .close = unix_close, -- cgit v1.2.3