summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDmitry Mishin <dim@openvz.org>2006-08-31 15:28:39 -0700
committerDavid S. Miller <davem@sunset.davemloft.net>2006-09-22 15:18:47 -0700
commitfda9ef5d679b07c9d9097aaf6ef7f069d794a8f9 (patch)
tree6a265dc2038bc5568c5a499e6c8d4733650ed3f7
parentdc435e6dac1439340eaeceef84022c4e4749796d (diff)
[NET]: Fix sk->sk_filter field access
Function sk_filter() is called from tcp_v{4,6}_rcv() functions with arg needlock = 0, while socket is not locked at that moment. In order to avoid this and similar issues in the future, use rcu for sk->sk_filter field read protection. Signed-off-by: Dmitry Mishin <dim@openvz.org> Signed-off-by: Alexey Kuznetsov <kuznet@ms2.inr.ac.ru> Signed-off-by: Kirill Korotaev <dev@openvz.org>
-rw-r--r--include/linux/filter.h13
-rw-r--r--include/net/sock.h34
-rw-r--r--net/core/filter.c8
-rw-r--r--net/core/sock.c22
-rw-r--r--net/dccp/ipv6.c2
-rw-r--r--net/decnet/dn_nsp_in.c2
-rw-r--r--net/ipv4/tcp_ipv4.c2
-rw-r--r--net/ipv6/tcp_ipv6.c4
-rw-r--r--net/packet/af_packet.c43
-rw-r--r--net/sctp/input.c2
10 files changed, 61 insertions, 71 deletions
diff --git a/include/linux/filter.h b/include/linux/filter.h
index c6cb8f09508..91b2e3b9251 100644
--- a/include/linux/filter.h
+++ b/include/linux/filter.h
@@ -25,10 +25,10 @@
struct sock_filter /* Filter block */
{
- __u16 code; /* Actual filter code */
- __u8 jt; /* Jump true */
- __u8 jf; /* Jump false */
- __u32 k; /* Generic multiuse field */
+ __u16 code; /* Actual filter code */
+ __u8 jt; /* Jump true */
+ __u8 jf; /* Jump false */
+ __u32 k; /* Generic multiuse field */
};
struct sock_fprog /* Required for SO_ATTACH_FILTER. */
@@ -41,8 +41,9 @@ struct sock_fprog /* Required for SO_ATTACH_FILTER. */
struct sk_filter
{
atomic_t refcnt;
- unsigned int len; /* Number of filter blocks */
- struct sock_filter insns[0];
+ unsigned int len; /* Number of filter blocks */
+ struct rcu_head rcu;
+ struct sock_filter insns[0];
};
static inline unsigned int sk_filter_len(struct sk_filter *fp)
diff --git a/include/net/sock.h b/include/net/sock.h
index 337ebec84c7..edd4d73ce7f 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -862,30 +862,24 @@ extern void sock_init_data(struct socket *sock, struct sock *sk);
*
*/
-static inline int sk_filter(struct sock *sk, struct sk_buff *skb, int needlock)
+static inline int sk_filter(struct sock *sk, struct sk_buff *skb)
{
int err;
+ struct sk_filter *filter;
err = security_sock_rcv_skb(sk, skb);
if (err)
return err;
- if (sk->sk_filter) {
- struct sk_filter *filter;
-
- if (needlock)
- bh_lock_sock(sk);
-
- filter = sk->sk_filter;
- if (filter) {
- unsigned int pkt_len = sk_run_filter(skb, filter->insns,
- filter->len);
- err = pkt_len ? pskb_trim(skb, pkt_len) : -EPERM;
- }
-
- if (needlock)
- bh_unlock_sock(sk);
+ rcu_read_lock_bh();
+ filter = sk->sk_filter;
+ if (filter) {
+ unsigned int pkt_len = sk_run_filter(skb, filter->insns,
+ filter->len);
+ err = pkt_len ? pskb_trim(skb, pkt_len) : -EPERM;
}
+ rcu_read_unlock_bh();
+
return err;
}
@@ -897,6 +891,12 @@ static inline int sk_filter(struct sock *sk, struct sk_buff *skb, int needlock)
* Remove a filter from a socket and release its resources.
*/
+static inline void sk_filter_rcu_free(struct rcu_head *rcu)
+{
+ struct sk_filter *fp = container_of(rcu, struct sk_filter, rcu);
+ kfree(fp);
+}
+
static inline void sk_filter_release(struct sock *sk, struct sk_filter *fp)
{
unsigned int size = sk_filter_len(fp);
@@ -904,7 +904,7 @@ static inline void sk_filter_release(struct sock *sk, struct sk_filter *fp)
atomic_sub(size, &sk->sk_omem_alloc);
if (atomic_dec_and_test(&fp->refcnt))
- kfree(fp);
+ call_rcu_bh(&fp->rcu, sk_filter_rcu_free);
}
static inline void sk_filter_charge(struct sock *sk, struct sk_filter *fp)
diff --git a/net/core/filter.c b/net/core/filter.c
index 5b4486a60cf..6732782a5a4 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -422,10 +422,10 @@ int sk_attach_filter(struct sock_fprog *fprog, struct sock *sk)
if (!err) {
struct sk_filter *old_fp;
- spin_lock_bh(&sk->sk_lock.slock);
- old_fp = sk->sk_filter;
- sk->sk_filter = fp;
- spin_unlock_bh(&sk->sk_lock.slock);
+ rcu_read_lock_bh();
+ old_fp = rcu_dereference(sk->sk_filter);
+ rcu_assign_pointer(sk->sk_filter, fp);
+ rcu_read_unlock_bh();
fp = old_fp;
}
diff --git a/net/core/sock.c b/net/core/sock.c
index cfaf09039b0..b77e155cbe6 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -247,11 +247,7 @@ int sock_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
goto out;
}
- /* It would be deadlock, if sock_queue_rcv_skb is used
- with socket lock! We assume that users of this
- function are lock free.
- */
- err = sk_filter(sk, skb, 1);
+ err = sk_filter(sk, skb);
if (err)
goto out;
@@ -278,7 +274,7 @@ int sk_receive_skb(struct sock *sk, struct sk_buff *skb)
{
int rc = NET_RX_SUCCESS;
- if (sk_filter(sk, skb, 0))
+ if (sk_filter(sk, skb))
goto discard_and_relse;
skb->dev = NULL;
@@ -606,15 +602,15 @@ set_rcvbuf:
break;
case SO_DETACH_FILTER:
- spin_lock_bh(&sk->sk_lock.slock);
- filter = sk->sk_filter;
+ rcu_read_lock_bh();
+ filter = rcu_dereference(sk->sk_filter);
if (filter) {
- sk->sk_filter = NULL;
- spin_unlock_bh(&sk->sk_lock.slock);
+ rcu_assign_pointer(sk->sk_filter, NULL);
sk_filter_release(sk, filter);
+ rcu_read_unlock_bh();
break;
}
- spin_unlock_bh(&sk->sk_lock.slock);
+ rcu_read_unlock_bh();
ret = -ENONET;
break;
@@ -884,10 +880,10 @@ void sk_free(struct sock *sk)
if (sk->sk_destruct)
sk->sk_destruct(sk);
- filter = sk->sk_filter;
+ filter = rcu_dereference(sk->sk_filter);
if (filter) {
sk_filter_release(sk, filter);
- sk->sk_filter = NULL;
+ rcu_assign_pointer(sk->sk_filter, NULL);
}
sock_disable_timestamp(sk);
diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c
index f9c5e12d703..7a47399cf31 100644
--- a/net/dccp/ipv6.c
+++ b/net/dccp/ipv6.c
@@ -970,7 +970,7 @@ static int dccp_v6_do_rcv(struct sock *sk, struct sk_buff *skb)
if (skb->protocol == htons(ETH_P_IP))
return dccp_v4_do_rcv(sk, skb);
- if (sk_filter(sk, skb, 0))
+ if (sk_filter(sk, skb))
goto discard;
/*
diff --git a/net/decnet/dn_nsp_in.c b/net/decnet/dn_nsp_in.c
index 86f7f3b28e7..72ecc6e62ec 100644
--- a/net/decnet/dn_nsp_in.c
+++ b/net/decnet/dn_nsp_in.c
@@ -586,7 +586,7 @@ static __inline__ int dn_queue_skb(struct sock *sk, struct sk_buff *skb, int sig
goto out;
}
- err = sk_filter(sk, skb, 0);
+ err = sk_filter(sk, skb);
if (err)
goto out;
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 23b46e36b14..39b17985608 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -1104,7 +1104,7 @@ process:
goto discard_and_relse;
nf_reset(skb);
- if (sk_filter(sk, skb, 0))
+ if (sk_filter(sk, skb))
goto discard_and_relse;
skb->dev = NULL;
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index 2b18918f301..2546fc9f0a7 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -1075,7 +1075,7 @@ static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb)
if (skb->protocol == htons(ETH_P_IP))
return tcp_v4_do_rcv(sk, skb);
- if (sk_filter(sk, skb, 0))
+ if (sk_filter(sk, skb))
goto discard;
/*
@@ -1232,7 +1232,7 @@ process:
if (!xfrm6_policy_check(sk, XFRM_POLICY_IN, skb))
goto discard_and_relse;
- if (sk_filter(sk, skb, 0))
+ if (sk_filter(sk, skb))
goto discard_and_relse;
skb->dev = NULL;
diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index 300215bdbf4..f4ccb90e673 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -427,21 +427,24 @@ out_unlock:
}
#endif
-static inline unsigned run_filter(struct sk_buff *skb, struct sock *sk, unsigned res)
+static inline int run_filter(struct sk_buff *skb, struct sock *sk,
+ unsigned *snaplen)
{
struct sk_filter *filter;
+ int err = 0;
- bh_lock_sock(sk);
- filter = sk->sk_filter;
- /*
- * Our caller already checked that filter != NULL but we need to
- * verify that under bh_lock_sock() to be safe
- */
- if (likely(filter != NULL))
- res = sk_run_filter(skb, filter->insns, filter->len);
- bh_unlock_sock(sk);
+ rcu_read_lock_bh();
+ filter = rcu_dereference(sk->sk_filter);
+ if (filter != NULL) {
+ err = sk_run_filter(skb, filter->insns, filter->len);
+ if (!err)
+ err = -EPERM;
+ else if (*snaplen > err)
+ *snaplen = err;
+ }
+ rcu_read_unlock_bh();
- return res;
+ return err;
}
/*
@@ -491,13 +494,8 @@ static int packet_rcv(struct sk_buff *skb, struct net_device *dev, struct packet
snaplen = skb->len;
- if (sk->sk_filter) {
- unsigned res = run_filter(skb, sk, snaplen);
- if (res == 0)
- goto drop_n_restore;
- if (snaplen > res)
- snaplen = res;
- }
+ if (run_filter(skb, sk, &snaplen) < 0)
+ goto drop_n_restore;
if (atomic_read(&sk->sk_rmem_alloc) + skb->truesize >=
(unsigned)sk->sk_rcvbuf)
@@ -593,13 +591,8 @@ static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev, struct packe
snaplen = skb->len;
- if (sk->sk_filter) {
- unsigned res = run_filter(skb, sk, snaplen);
- if (res == 0)
- goto drop_n_restore;
- if (snaplen > res)
- snaplen = res;
- }
+ if (run_filter(skb, sk, &snaplen) < 0)
+ goto drop_n_restore;
if (sk->sk_type == SOCK_DGRAM) {
macoff = netoff = TPACKET_ALIGN(TPACKET_HDRLEN) + 16;
diff --git a/net/sctp/input.c b/net/sctp/input.c
index 8a34d95602c..03f65de75d8 100644
--- a/net/sctp/input.c
+++ b/net/sctp/input.c
@@ -228,7 +228,7 @@ int sctp_rcv(struct sk_buff *skb)
goto discard_release;
nf_reset(skb);
- if (sk_filter(sk, skb, 1))
+ if (sk_filter(sk, skb))
goto discard_release;
/* Create an SCTP packet structure. */