summaryrefslogtreecommitdiff
path: root/net/core/datagram.c
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@ppc970.osdl.org>2005-04-16 15:20:36 -0700
committerLinus Torvalds <torvalds@ppc970.osdl.org>2005-04-16 15:20:36 -0700
commit1da177e4c3f41524e886b7f1b8a0c1fc7321cac2 (patch)
tree0bba044c4ce775e45a88a51686b5d9f90697ea9d /net/core/datagram.c
Linux-2.6.12-rc2
Initial git repository build. I'm not bothering with the full history, even though we have it. We can create a separate "historical" git archive of that later if we want to, and in the meantime it's about 3.2GB when imported into git - space that would just make the early git days unnecessarily complicated, when we don't have a lot of good infrastructure for it. Let it rip!
Diffstat (limited to 'net/core/datagram.c')
-rw-r--r--net/core/datagram.c482
1 files changed, 482 insertions, 0 deletions
diff --git a/net/core/datagram.c b/net/core/datagram.c
new file mode 100644
index 00000000000..d1bfd279cc1
--- /dev/null
+++ b/net/core/datagram.c
@@ -0,0 +1,482 @@
+/*
+ * SUCS NET3:
+ *
+ * Generic datagram handling routines. These are generic for all
+ * protocols. Possibly a generic IP version on top of these would
+ * make sense. Not tonight however 8-).
+ * This is used because UDP, RAW, PACKET, DDP, IPX, AX.25 and
+ * NetROM layer all have identical poll code and mostly
+ * identical recvmsg() code. So we share it here. The poll was
+ * shared before but buried in udp.c so I moved it.
+ *
+ * Authors: Alan Cox <alan@redhat.com>. (datagram_poll() from old
+ * udp.c code)
+ *
+ * Fixes:
+ * Alan Cox : NULL return from skb_peek_copy()
+ * understood
+ * Alan Cox : Rewrote skb_read_datagram to avoid the
+ * skb_peek_copy stuff.
+ * Alan Cox : Added support for SOCK_SEQPACKET.
+ * IPX can no longer use the SO_TYPE hack
+ * but AX.25 now works right, and SPX is
+ * feasible.
+ * Alan Cox : Fixed write poll of non IP protocol
+ * crash.
+ * Florian La Roche: Changed for my new skbuff handling.
+ * Darryl Miles : Fixed non-blocking SOCK_SEQPACKET.
+ * Linus Torvalds : BSD semantic fixes.
+ * Alan Cox : Datagram iovec handling
+ * Darryl Miles : Fixed non-blocking SOCK_STREAM.
+ * Alan Cox : POSIXisms
+ * Pete Wyckoff : Unconnected accept() fix.
+ *
+ */
+
+#include <linux/module.h>
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <asm/uaccess.h>
+#include <asm/system.h>
+#include <linux/mm.h>
+#include <linux/interrupt.h>
+#include <linux/errno.h>
+#include <linux/sched.h>
+#include <linux/inet.h>
+#include <linux/tcp.h>
+#include <linux/netdevice.h>
+#include <linux/rtnetlink.h>
+#include <linux/poll.h>
+#include <linux/highmem.h>
+
+#include <net/protocol.h>
+#include <linux/skbuff.h>
+#include <net/sock.h>
+#include <net/checksum.h>
+
+
+/*
+ * Is a socket 'connection oriented' ?
+ */
+static inline int connection_based(struct sock *sk)
+{
+ return sk->sk_type == SOCK_SEQPACKET || sk->sk_type == SOCK_STREAM;
+}
+
+/*
+ * Wait for a packet..
+ */
+static int wait_for_packet(struct sock *sk, int *err, long *timeo_p)
+{
+ int error;
+ DEFINE_WAIT(wait);
+
+ prepare_to_wait_exclusive(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE);
+
+ /* Socket errors? */
+ error = sock_error(sk);
+ if (error)
+ goto out_err;
+
+ if (!skb_queue_empty(&sk->sk_receive_queue))
+ goto out;
+
+ /* Socket shut down? */
+ if (sk->sk_shutdown & RCV_SHUTDOWN)
+ goto out_noerr;
+
+ /* Sequenced packets can come disconnected.
+ * If so we report the problem
+ */
+ error = -ENOTCONN;
+ if (connection_based(sk) &&
+ !(sk->sk_state == TCP_ESTABLISHED || sk->sk_state == TCP_LISTEN))
+ goto out_err;
+
+ /* handle signals */
+ if (signal_pending(current))
+ goto interrupted;
+
+ error = 0;
+ *timeo_p = schedule_timeout(*timeo_p);
+out:
+ finish_wait(sk->sk_sleep, &wait);
+ return error;
+interrupted:
+ error = sock_intr_errno(*timeo_p);
+out_err:
+ *err = error;
+ goto out;
+out_noerr:
+ *err = 0;
+ error = 1;
+ goto out;
+}
+
+/**
+ * skb_recv_datagram - Receive a datagram skbuff
+ * @sk - socket
+ * @flags - MSG_ flags
+ * @noblock - blocking operation?
+ * @err - error code returned
+ *
+ * Get a datagram skbuff, understands the peeking, nonblocking wakeups
+ * and possible races. This replaces identical code in packet, raw and
+ * udp, as well as the IPX AX.25 and Appletalk. It also finally fixes
+ * the long standing peek and read race for datagram sockets. If you
+ * alter this routine remember it must be re-entrant.
+ *
+ * This function will lock the socket if a skb is returned, so the caller
+ * needs to unlock the socket in that case (usually by calling
+ * skb_free_datagram)
+ *
+ * * It does not lock socket since today. This function is
+ * * free of race conditions. This measure should/can improve
+ * * significantly datagram socket latencies at high loads,
+ * * when data copying to user space takes lots of time.
+ * * (BTW I've just killed the last cli() in IP/IPv6/core/netlink/packet
+ * * 8) Great win.)
+ * * --ANK (980729)
+ *
+ * The order of the tests when we find no data waiting are specified
+ * quite explicitly by POSIX 1003.1g, don't change them without having
+ * the standard around please.
+ */
+struct sk_buff *skb_recv_datagram(struct sock *sk, unsigned flags,
+ int noblock, int *err)
+{
+ struct sk_buff *skb;
+ long timeo;
+ /*
+ * Caller is allowed not to check sk->sk_err before skb_recv_datagram()
+ */
+ int error = sock_error(sk);
+
+ if (error)
+ goto no_packet;
+
+ timeo = sock_rcvtimeo(sk, noblock);
+
+ do {
+ /* Again only user level code calls this function, so nothing
+ * interrupt level will suddenly eat the receive_queue.
+ *
+ * Look at current nfs client by the way...
+ * However, this function was corrent in any case. 8)
+ */
+ if (flags & MSG_PEEK) {
+ unsigned long cpu_flags;
+
+ spin_lock_irqsave(&sk->sk_receive_queue.lock,
+ cpu_flags);
+ skb = skb_peek(&sk->sk_receive_queue);
+ if (skb)
+ atomic_inc(&skb->users);
+ spin_unlock_irqrestore(&sk->sk_receive_queue.lock,
+ cpu_flags);
+ } else
+ skb = skb_dequeue(&sk->sk_receive_queue);
+
+ if (skb)
+ return skb;
+
+ /* User doesn't want to wait */
+ error = -EAGAIN;
+ if (!timeo)
+ goto no_packet;
+
+ } while (!wait_for_packet(sk, err, &timeo));
+
+ return NULL;
+
+no_packet:
+ *err = error;
+ return NULL;
+}
+
+void skb_free_datagram(struct sock *sk, struct sk_buff *skb)
+{
+ kfree_skb(skb);
+}
+
+/**
+ * skb_copy_datagram_iovec - Copy a datagram to an iovec.
+ * @skb - buffer to copy
+ * @offset - offset in the buffer to start copying from
+ * @iovec - io vector to copy to
+ * @len - amount of data to copy from buffer to iovec
+ *
+ * Note: the iovec is modified during the copy.
+ */
+int skb_copy_datagram_iovec(const struct sk_buff *skb, int offset,
+ struct iovec *to, int len)
+{
+ int start = skb_headlen(skb);
+ int i, copy = start - offset;
+
+ /* Copy header. */
+ if (copy > 0) {
+ if (copy > len)
+ copy = len;
+ if (memcpy_toiovec(to, skb->data + offset, copy))
+ goto fault;
+ if ((len -= copy) == 0)
+ return 0;
+ offset += copy;
+ }
+
+ /* Copy paged appendix. Hmm... why does this look so complicated? */
+ for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
+ int end;
+
+ BUG_TRAP(start <= offset + len);
+
+ end = start + skb_shinfo(skb)->frags[i].size;
+ if ((copy = end - offset) > 0) {
+ int err;
+ u8 *vaddr;
+ skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
+ struct page *page = frag->page;
+
+ if (copy > len)
+ copy = len;
+ vaddr = kmap(page);
+ err = memcpy_toiovec(to, vaddr + frag->page_offset +
+ offset - start, copy);
+ kunmap(page);
+ if (err)
+ goto fault;
+ if (!(len -= copy))
+ return 0;
+ offset += copy;
+ }
+ start = end;
+ }
+
+ if (skb_shinfo(skb)->frag_list) {
+ struct sk_buff *list = skb_shinfo(skb)->frag_list;
+
+ for (; list; list = list->next) {
+ int end;
+
+ BUG_TRAP(start <= offset + len);
+
+ end = start + list->len;
+ if ((copy = end - offset) > 0) {
+ if (copy > len)
+ copy = len;
+ if (skb_copy_datagram_iovec(list,
+ offset - start,
+ to, copy))
+ goto fault;
+ if ((len -= copy) == 0)
+ return 0;
+ offset += copy;
+ }
+ start = end;
+ }
+ }
+ if (!len)
+ return 0;
+
+fault:
+ return -EFAULT;
+}
+
+static int skb_copy_and_csum_datagram(const struct sk_buff *skb, int offset,
+ u8 __user *to, int len,
+ unsigned int *csump)
+{
+ int start = skb_headlen(skb);
+ int pos = 0;
+ int i, copy = start - offset;
+
+ /* Copy header. */
+ if (copy > 0) {
+ int err = 0;
+ if (copy > len)
+ copy = len;
+ *csump = csum_and_copy_to_user(skb->data + offset, to, copy,
+ *csump, &err);
+ if (err)
+ goto fault;
+ if ((len -= copy) == 0)
+ return 0;
+ offset += copy;
+ to += copy;
+ pos = copy;
+ }
+
+ for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
+ int end;
+
+ BUG_TRAP(start <= offset + len);
+
+ end = start + skb_shinfo(skb)->frags[i].size;
+ if ((copy = end - offset) > 0) {
+ unsigned int csum2;
+ int err = 0;
+ u8 *vaddr;
+ skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
+ struct page *page = frag->page;
+
+ if (copy > len)
+ copy = len;
+ vaddr = kmap(page);
+ csum2 = csum_and_copy_to_user(vaddr +
+ frag->page_offset +
+ offset - start,
+ to, copy, 0, &err);
+ kunmap(page);
+ if (err)
+ goto fault;
+ *csump = csum_block_add(*csump, csum2, pos);
+ if (!(len -= copy))
+ return 0;
+ offset += copy;
+ to += copy;
+ pos += copy;
+ }
+ start = end;
+ }
+
+ if (skb_shinfo(skb)->frag_list) {
+ struct sk_buff *list = skb_shinfo(skb)->frag_list;
+
+ for (; list; list=list->next) {
+ int end;
+
+ BUG_TRAP(start <= offset + len);
+
+ end = start + list->len;
+ if ((copy = end - offset) > 0) {
+ unsigned int csum2 = 0;
+ if (copy > len)
+ copy = len;
+ if (skb_copy_and_csum_datagram(list,
+ offset - start,
+ to, copy,
+ &csum2))
+ goto fault;
+ *csump = csum_block_add(*csump, csum2, pos);
+ if ((len -= copy) == 0)
+ return 0;
+ offset += copy;
+ to += copy;
+ pos += copy;
+ }
+ start = end;
+ }
+ }
+ if (!len)
+ return 0;
+
+fault:
+ return -EFAULT;
+}
+
+/**
+ * skb_copy_and_csum_datagram_iovec - Copy and checkum skb to user iovec.
+ * @skb - skbuff
+ * @hlen - hardware length
+ * @iovec - io vector
+ *
+ * Caller _must_ check that skb will fit to this iovec.
+ *
+ * Returns: 0 - success.
+ * -EINVAL - checksum failure.
+ * -EFAULT - fault during copy. Beware, in this case iovec
+ * can be modified!
+ */
+int skb_copy_and_csum_datagram_iovec(const struct sk_buff *skb,
+ int hlen, struct iovec *iov)
+{
+ unsigned int csum;
+ int chunk = skb->len - hlen;
+
+ /* Skip filled elements.
+ * Pretty silly, look at memcpy_toiovec, though 8)
+ */
+ while (!iov->iov_len)
+ iov++;
+
+ if (iov->iov_len < chunk) {
+ if ((unsigned short)csum_fold(skb_checksum(skb, 0, chunk + hlen,
+ skb->csum)))
+ goto csum_error;
+ if (skb_copy_datagram_iovec(skb, hlen, iov, chunk))
+ goto fault;
+ } else {
+ csum = csum_partial(skb->data, hlen, skb->csum);
+ if (skb_copy_and_csum_datagram(skb, hlen, iov->iov_base,
+ chunk, &csum))
+ goto fault;
+ if ((unsigned short)csum_fold(csum))
+ goto csum_error;
+ iov->iov_len -= chunk;
+ iov->iov_base += chunk;
+ }
+ return 0;
+csum_error:
+ return -EINVAL;
+fault:
+ return -EFAULT;
+}
+
+/**
+ * datagram_poll - generic datagram poll
+ * @file - file struct
+ * @sock - socket
+ * @wait - poll table
+ *
+ * Datagram poll: Again totally generic. This also handles
+ * sequenced packet sockets providing the socket receive queue
+ * is only ever holding data ready to receive.
+ *
+ * Note: when you _don't_ use this routine for this protocol,
+ * and you use a different write policy from sock_writeable()
+ * then please supply your own write_space callback.
+ */
+unsigned int datagram_poll(struct file *file, struct socket *sock,
+ poll_table *wait)
+{
+ struct sock *sk = sock->sk;
+ unsigned int mask;
+
+ poll_wait(file, sk->sk_sleep, wait);
+ mask = 0;
+
+ /* exceptional events? */
+ if (sk->sk_err || !skb_queue_empty(&sk->sk_error_queue))
+ mask |= POLLERR;
+ if (sk->sk_shutdown == SHUTDOWN_MASK)
+ mask |= POLLHUP;
+
+ /* readable? */
+ if (!skb_queue_empty(&sk->sk_receive_queue) ||
+ (sk->sk_shutdown & RCV_SHUTDOWN))
+ mask |= POLLIN | POLLRDNORM;
+
+ /* Connection-based need to check for termination and startup */
+ if (connection_based(sk)) {
+ if (sk->sk_state == TCP_CLOSE)
+ mask |= POLLHUP;
+ /* connection hasn't started yet? */
+ if (sk->sk_state == TCP_SYN_SENT)
+ return mask;
+ }
+
+ /* writable? */
+ if (sock_writeable(sk))
+ mask |= POLLOUT | POLLWRNORM | POLLWRBAND;
+ else
+ set_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags);
+
+ return mask;
+}
+
+EXPORT_SYMBOL(datagram_poll);
+EXPORT_SYMBOL(skb_copy_and_csum_datagram_iovec);
+EXPORT_SYMBOL(skb_copy_datagram_iovec);
+EXPORT_SYMBOL(skb_free_datagram);
+EXPORT_SYMBOL(skb_recv_datagram);